From b4e12a07697e796d6a3edf5c1a299059a02ab08a Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Thu, 8 Oct 2020 14:19:18 +0200 Subject: [PATCH 001/161] added first version of cellxgene data format loader --- sfaira/data/databases/cellxgene_group.py | 29 +++++++++ sfaira/data/databases/cellxgene_loader.py | 75 +++++++++++++++++++++++ sfaira/data/databases/external.py | 1 + 3 files changed, 105 insertions(+) create mode 100644 sfaira/data/databases/cellxgene_group.py create mode 100644 sfaira/data/databases/cellxgene_loader.py create mode 100644 sfaira/data/databases/external.py diff --git a/sfaira/data/databases/cellxgene_group.py b/sfaira/data/databases/cellxgene_group.py new file mode 100644 index 000000000..6468ef844 --- /dev/null +++ b/sfaira/data/databases/cellxgene_group.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroupBase + +from .cellxgene_loader import Dataset + + +class DatasetGroupCellxgene(DatasetGroupBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None + ): + fn_ls = os.listdir(path) + fn_ls = [x for x in fn_ls if x in self.accepted_file_names] + datasets = [ + Dataset(path=path, fn=x, meta_path=meta_path) + for x in fn_ls + ] + keys = [x.id for x in datasets] + self.datasets = dict(zip(keys, datasets)) + + @property + def accepted_file_names(self): + return [ + "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad" + ] diff --git a/sfaira/data/databases/cellxgene_loader.py b/sfaira/data/databases/cellxgene_loader.py new file mode 100644 index 000000000..a8e1b840e --- /dev/null +++ b/sfaira/data/databases/cellxgene_loader.py @@ -0,0 +1,75 @@ +import numpy as np +import os +from typing import Union +from .external import DatasetBase +import anndata + +from .external import DatasetGroupBase + + +class Dataset(DatasetBase): + """ + This is a dataloader for downloaded h5ad from cellxgene. + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None], + fn: str, + meta_path: Union[str, None] = None, + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.fn = fn + self.species = str(fn).split("_")[2] + self.id = str(fn).split(".")[0] + self.organ = str(fn).split("_")[3] + self.sub_tissue = None + self.download_website = None # TODO + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + fn = os.path.join(self.path, self.fn) + adata = anndata.read(fn) + adata.X = adata.raw.X + + self.adata.uns["lab"] = adata.uns["contributors"]["name"] + self.adata.uns["year"] = None + self.adata.uns["doi"] = None # TODO + if len(np.unique(adata.obs["organism"].values)) > 1: + raise Warning("found multiple assay in data set %s" % self.fn) + self.adata.uns["protocol"] = adata.obs["assay"].values[0] + # Select tissue: blood is handled as a separate tissue in .obs + #if len(np.unique(adata.obs["tissue"].values)) > 1: + # raise Warning("found multiple tissue in data set %s" % self.fn) + #self.adata.uns["organ"] = adata.obs["tissue"].values[0] + self.adata.uns["organ"] = str(self.fn).split("_")[3] + if len(np.unique(adata.obs["organism"].values)) > 1: + raise Warning("found multiple organisms in data set %s" % self.fn) + self.adata.uns["animal"] = adata.obs["organism"].values[0] + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'raw' + + self.adata.obs["subtissue"] = self.sub_tissue + self.adata.obs["dev_stage"] = adata.obs["development_stage"].values + self.adata.obs["sex"] = adata.obs["sex"].values + self.adata.obs["ethnicity"] = adata.obs["ethnicity"].values + self.adata.obs["healthy"] = adata.obs["disease"].values == "normal" + self.adata.obs["state_exact"] = adata.obs["disease"].values + + self.adata.obs["cell_ontology_id"] = adata.obs["cell_type_ontology_term_id"].values.tolist() + self.adata.obs["cell_ontology_class"] = adata.obs["cell_type"].values.tolist() + self.adata.obs["cell_types_original"] = adata.obs["free_annotation"].values.tolist() + + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + diff --git a/sfaira/data/databases/external.py b/sfaira/data/databases/external.py new file mode 100644 index 000000000..9f4e3db68 --- /dev/null +++ b/sfaira/data/databases/external.py @@ -0,0 +1 @@ +from sfaira.data import DatasetBase, DatasetGroupBase From 27fa462fca88f8435658422ef98ef39c68200c48 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 12:17:48 +0100 Subject: [PATCH 002/161] refactored anndata field entries from data loaders to be named in separate consts object --- sfaira/api/__init__.py | 1 + sfaira/api/consts.py | 1 + sfaira/consts.py | 93 +++++++++++++++++++ sfaira/data/human/adipose/external.py | 1 + .../human_adipose_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/adrenalgland/external.py | 1 + ...man_adrenalgland_2020_microwell_han_001.py | 25 ++--- ...man_adrenalgland_2020_microwell_han_002.py | 25 ++--- ...man_adrenalgland_2020_microwell_han_003.py | 25 ++--- ...man_adrenalgland_2020_microwell_han_004.py | 25 ++--- ...man_adrenalgland_2020_microwell_han_005.py | 25 ++--- ...man_adrenalgland_2020_microwell_han_006.py | 25 ++--- sfaira/data/human/artery/external.py | 1 + .../human_artery_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/bladder/external.py | 1 + .../human_bladder_2020_microwell_han_001.py | 25 ++--- .../human_bladder_2020_microwell_han_002.py | 25 ++--- .../human_bladder_2020_microwell_han_003.py | 25 ++--- sfaira/data/human/blood/external.py | 1 + .../blood/human_blood_2018_10x_ica_001.py | 31 ++++--- .../human_blood_2019_10x_10xGenomics_001.py | 31 ++++--- .../human_blood_2020_microwell_han_001.py | 25 ++--- .../human_blood_2020_microwell_han_002.py | 25 ++--- .../human_blood_2020_microwell_han_003.py | 25 ++--- .../human_blood_2020_microwell_han_004.py | 25 ++--- .../human_blood_2020_microwell_han_005.py | 25 ++--- .../human_blood_2020_microwell_han_006.py | 25 ++--- .../human_blood_2020_microwell_han_007.py | 25 ++--- sfaira/data/human/bone/external.py | 1 + .../human/bone/human_bone_2018_10x_ica_001.py | 31 ++++--- .../bone/human_bone_2020_microwell_han_001.py | 25 ++--- .../bone/human_bone_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/brain/external.py | 1 + .../human_brain_2017_DroNcSeq_habib_001.py | 31 ++++--- .../human_brain_2020_microwell_han_001.py | 25 ++--- .../human_brain_2020_microwell_han_002.py | 25 ++--- .../human_brain_2020_microwell_han_003.py | 25 ++--- .../human_brain_2020_microwell_han_004.py | 25 ++--- .../human_brain_2020_microwell_han_005.py | 25 ++--- .../human_brain_2020_microwell_han_006.py | 25 ++--- sfaira/data/human/calvaria/external.py | 1 + .../human_calvaria_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/cervix/external.py | 1 + .../human_cervix_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/chorionicvillus/external.py | 1 + ..._chorionicvillus_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/colon/external.py | 1 + .../colon/human_colon_2019_10x_kinchen_001.py | 35 +++---- .../colon/human_colon_2019_10x_smilie_001.py | 31 ++++--- .../colon/human_colon_2019_10x_wang_001.py | 31 ++++--- .../colon/human_colon_2020_10x_james_001.py | 31 ++++--- .../human_colon_2020_microwell_han_001.py | 25 ++--- .../human_colon_2020_microwell_han_002.py | 25 ++--- .../human_colon_2020_microwell_han_003.py | 25 ++--- .../human_colon_2020_microwell_han_004.py | 25 ++--- sfaira/data/human/duodenum/external.py | 1 + .../human_duodenum_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/epityphlon/external.py | 1 + ...human_epityphlon_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/esophagus/external.py | 1 + .../human_esophagus_2019_10x_madissoon_001.py | 31 ++++--- .../human_esophagus_2020_microwell_han_001.py | 25 ++--- .../human_esophagus_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/eye/external.py | 1 + .../eye/human_eye_2019_10x_lukowski_001.py | 31 ++++--- .../human/eye/human_eye_2019_10x_menon_001.py | 31 ++++--- .../human/eye/human_eye_2019_10x_voigt_001.py | 31 ++++--- .../eye/human_eye_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/fallopiantube/external.py | 1 + ...an_fallopiantube_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/femalegonad/external.py | 1 + ...uman_femalegonad_2020_microwell_han_001.py | 25 ++--- ...uman_femalegonad_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/gallbladder/external.py | 1 + ...uman_gallbladder_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/heart/external.py | 1 + .../human_heart_2020_microwell_han_001.py | 25 ++--- .../human_heart_2020_microwell_han_002.py | 25 ++--- .../human_heart_2020_microwell_han_003.py | 25 ++--- .../human_heart_2020_microwell_han_004.py | 25 ++--- sfaira/data/human/hesc/external.py | 1 + .../hesc/human_hesc_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/ileum/external.py | 1 + .../ileum/human_ileum_2019_10x_martin_001.py | 31 ++++--- .../ileum/human_ileum_2019_10x_wang_001.py | 31 ++++--- .../human_ileum_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/jejunum/external.py | 1 + .../human_jejunum_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/kidney/external.py | 1 + .../human_kidney_2019_10xSn_lake_001.py | 31 ++++--- .../human_kidney_2019_10x_stewart_001.py | 31 ++++--- .../kidney/human_kidney_2020_10x_liao_001.py | 31 ++++--- .../human_kidney_2020_microwell_han_001.py | 25 ++--- .../human_kidney_2020_microwell_han_002.py | 25 ++--- .../human_kidney_2020_microwell_han_003.py | 25 ++--- .../human_kidney_2020_microwell_han_004.py | 25 ++--- .../human_kidney_2020_microwell_han_005.py | 25 ++--- .../human_kidney_2020_microwell_han_006.py | 25 ++--- .../human_kidney_2020_microwell_han_007.py | 25 ++--- sfaira/data/human/liver/external.py | 1 + .../human_liver_2018_10x_macparland_001.py | 31 ++++--- .../liver/human_liver_2019_10x_popescu_001.py | 31 ++++--- .../human_liver_2019_10x_ramachandran_001.py | 31 ++++--- .../human_liver_2019_mCELSeq2_aizarani_001.py | 29 +++--- .../human_liver_2020_microwell_han_001.py | 25 ++--- .../human_liver_2020_microwell_han_002.py | 25 ++--- .../human_liver_2020_microwell_han_003.py | 25 ++--- .../human_liver_2020_microwell_han_004.py | 25 ++--- .../human_liver_2020_microwell_han_005.py | 25 ++--- sfaira/data/human/lung/external.py | 1 + .../lung/human_lung_2019_10x_braga_001.py | 29 +++--- .../lung/human_lung_2019_10x_braga_002.py | 29 +++--- .../lung/human_lung_2019_10x_madissoon_001.py | 29 +++--- .../lung/human_lung_2019_dropseq_braga_003.py | 29 +++--- .../lung/human_lung_2020_10x_habermann_001.py | 29 +++--- .../lung/human_lung_2020_10x_lukassen_001.py | 29 +++--- .../lung/human_lung_2020_10x_lukassen_002.py | 29 +++--- .../lung/human_lung_2020_10x_miller_001.py | 29 +++--- .../human_lung_2020_10x_travaglini_001.py | 31 ++++--- .../lung/human_lung_2020_microwell_han_001.py | 25 ++--- .../lung/human_lung_2020_microwell_han_002.py | 25 ++--- .../lung/human_lung_2020_microwell_han_003.py | 25 ++--- .../lung/human_lung_2020_microwell_han_004.py | 25 ++--- .../lung/human_lung_2020_microwell_han_005.py | 25 ++--- ...uman_lung_2020_smartseq2_travaglini_002.py | 31 ++++--- sfaira/data/human/malegonad/external.py | 1 + .../human_malegonad_2018_10x_guo_001.py | 31 ++++--- .../human_malegonad_2020_microwell_han_001.py | 25 ++--- .../human_malegonad_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/mixed/external.py | 1 + .../mixed/human_mixed_2019_10x_szabo_001.py | 29 +++--- sfaira/data/human/muscle/external.py | 1 + .../human_muscle_2020_microwell_han_001.py | 25 ++--- .../human_muscle_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/omentum/external.py | 1 + .../human_omentum_2020_microwell_han_001.py | 25 ++--- .../human_omentum_2020_microwell_han_002.py | 25 ++--- .../human_omentum_2020_microwell_han_003.py | 25 ++--- sfaira/data/human/pancreas/external.py | 1 + .../human_pancreas_2016_indrop_baron_001.py | 31 ++++--- ...pancreas_2016_smartseq2_segerstolpe_001.py | 33 +++---- .../human_pancreas_2017_smartseq2_enge_001.py | 31 ++++--- .../human_pancreas_2020_microwell_han_001.py | 25 ++--- .../human_pancreas_2020_microwell_han_002.py | 25 ++--- .../human_pancreas_2020_microwell_han_003.py | 25 ++--- .../human_pancreas_2020_microwell_han_004.py | 25 ++--- sfaira/data/human/placenta/external.py | 1 + .../human_placenta_2018_10x_ventotormo_001.py | 31 ++++--- ..._placenta_2018_smartseq2_ventotormo_001.py | 31 ++++--- .../human_placenta_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/pleura/external.py | 1 + .../human_pleura_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/prostate/external.py | 1 + .../human_prostate_2018_10x_henry_001.py | 31 ++++--- .../human_prostate_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/rectum/external.py | 1 + .../rectum/human_rectum_2019_10x_wang_001.py | 31 ++++--- .../human_rectum_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/rib/external.py | 1 + .../rib/human_rib_2020_microwell_han_001.py | 25 ++--- .../rib/human_rib_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/skin/external.py | 1 + .../skin/human_skin_2020_microwell_han_001.py | 25 ++--- .../skin/human_skin_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/spinalcord/external.py | 1 + ...human_spinalcord_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/spleen/external.py | 1 + .../human_spleen_2019_10x_madissoon_001.py | 31 ++++--- .../human_spleen_2020_microwell_han_001.py | 25 ++--- .../human_spleen_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/stomach/external.py | 1 + .../human_stomach_2020_microwell_han_001.py | 25 ++--- .../human_stomach_2020_microwell_han_002.py | 25 ++--- .../human_stomach_2020_microwell_han_003.py | 25 ++--- .../human_stomach_2020_microwell_han_004.py | 25 ++--- .../human_stomach_2020_microwell_han_005.py | 25 ++--- .../human_stomach_2020_microwell_han_006.py | 25 ++--- .../human_stomach_2020_microwell_han_007.py | 25 ++--- .../human_stomach_2020_microwell_han_008.py | 25 ++--- .../human_stomach_2020_microwell_han_009.py | 25 ++--- .../human_stomach_2020_microwell_han_010.py | 25 ++--- sfaira/data/human/thymus/external.py | 1 + .../thymus/human_thymus_2020_10x_park_001.py | 31 ++++--- .../human_thymus_2020_microwell_han_001.py | 25 ++--- .../human_thymus_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/thyroid/external.py | 1 + .../human_thyroid_2020_microwell_han_001.py | 25 ++--- .../human_thyroid_2020_microwell_han_002.py | 25 ++--- sfaira/data/human/trachea/external.py | 1 + .../human_trachea_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/ureter/external.py | 1 + .../human_ureter_2020_microwell_han_001.py | 25 ++--- sfaira/data/human/uterus/external.py | 1 + .../human_uterus_2020_microwell_han_001.py | 25 ++--- sfaira/data/mouse/bladder/external.py | 1 + .../mouse_bladder_2018_microwell_han_001.py | 33 +++---- .../mouse_bladder_2019_10x_pisco_001.py | 33 +++---- .../mouse_bladder_2019_smartseq2_pisco_001.py | 31 ++++--- sfaira/data/mouse/brain/external.py | 1 + .../mouse_brain_2018_microwell_han_001.py | 33 +++---- .../mouse_brain_2018_microwell_han_002.py | 33 +++---- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 33 +++---- .../mouse_brain_2019_smartseq2_pisco_001.py | 33 +++---- .../mouse_brain_2019_smartseq2_pisco_002.py | 33 +++---- sfaira/data/mouse/diaphragm/external.py | 1 + ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 31 ++++--- sfaira/data/mouse/fat/external.py | 1 + .../mouse/fat/mouse_fat_2019_10x_pisco_001.py | 33 +++---- .../fat/mouse_fat_2019_smartseq2_pisco_001.py | 33 +++---- .../fat/mouse_fat_2019_smartseq2_pisco_002.py | 33 +++---- .../fat/mouse_fat_2019_smartseq2_pisco_003.py | 33 +++---- .../fat/mouse_fat_2019_smartseq2_pisco_004.py | 33 +++---- sfaira/data/mouse/heart/external.py | 1 + .../heart/mouse_heart_2019_10x_pisco_001.py | 33 +++---- .../mouse_heart_2019_smartseq2_pisco_001.py | 33 +++---- .../mouse_heart_2019_smartseq2_pisco_002.py | 33 +++---- sfaira/data/mouse/kidney/external.py | 1 + .../mouse_kidney_2018_microwell_han_001.py | 33 +++---- .../mouse_kidney_2018_microwell_han_002.py | 33 +++---- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 33 +++---- .../mouse_kidney_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/large_intestine/external.py | 1 + ...ouse_large_intestine_2019_10x_pisco_001.py | 33 +++---- ...arge_intestine_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/limb_muscle/external.py | 1 + ...ouse_limb_muscle_2018_microwell_han_001.py | 33 +++---- .../mouse_limb_muscle_2019_10x_pisco_001.py | 33 +++---- ...se_limb_muscle_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/liver/external.py | 1 + .../mouse_liver_2018_microwell_han_001.py | 33 +++---- .../mouse_liver_2018_microwell_han_002.py | 33 +++---- .../liver/mouse_liver_2019_10x_pisco_001.py | 33 +++---- .../mouse_liver_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/lung/external.py | 1 + .../lung/mouse_lung_2018_microwell_han_001.py | 33 +++---- .../lung/mouse_lung_2018_microwell_han_002.py | 33 +++---- .../lung/mouse_lung_2018_microwell_han_003.py | 33 +++---- .../lung/mouse_lung_2019_10x_pisco_001.py | 33 +++---- .../mouse_lung_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/mammary_gland/external.py | 1 + ...se_mammary_gland_2018_microwell_han_001.py | 33 +++---- ...se_mammary_gland_2018_microwell_han_002.py | 33 +++---- ...se_mammary_gland_2018_microwell_han_003.py | 33 +++---- ...se_mammary_gland_2018_microwell_han_004.py | 33 +++---- .../mouse_mammary_gland_2019_10x_pisco_001.py | 33 +++---- ..._mammary_gland_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/marrow/external.py | 1 + .../marrow/mouse_marrow_2018_microwell_001.py | 33 +++---- .../marrow/mouse_marrow_2019_10x_pisco_001.py | 33 +++---- .../mouse_marrow_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/ovary/external.py | 1 + .../mouse_ovary_2018_microwell_han_001.py | 33 +++---- .../mouse_ovary_2018_microwell_han_002.py | 33 +++---- sfaira/data/mouse/pancreas/external.py | 1 + .../mouse_pancreas_2018_microwell_han_001.py | 33 +++---- .../mouse_pancreas_2019_10x_pisco_001.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_001.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_002.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_003.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_004.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_005.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_006.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_007.py | 33 +++---- .../mouse_pancreas_2019_10x_thompson_008.py | 33 +++---- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 33 +++---- .../data/mouse/peripheral_blood/external.py | 1 + ...peripheral_blood_2018_microwell_han_001.py | 33 +++---- ...peripheral_blood_2018_microwell_han_002.py | 33 +++---- ...peripheral_blood_2018_microwell_han_003.py | 33 +++---- ...peripheral_blood_2018_microwell_han_004.py | 33 +++---- ...peripheral_blood_2018_microwell_han_005.py | 33 +++---- sfaira/data/mouse/placenta/external.py | 1 + .../mouse_placenta_2018_microwell_han_001.py | 33 +++---- .../mouse_placenta_2018_microwell_han_002.py | 33 +++---- sfaira/data/mouse/prostate/external.py | 1 + .../mouse_prostate_2018_microwell_han_001.py | 33 +++---- .../mouse_prostate_2018_microwell_han_002.py | 33 +++---- sfaira/data/mouse/rib/external.py | 1 + .../rib/mouse_rib_2018_microwell_han_001.py | 33 +++---- .../rib/mouse_rib_2018_microwell_han_002.py | 33 +++---- .../rib/mouse_rib_2018_microwell_han_003.py | 33 +++---- sfaira/data/mouse/skin/external.py | 1 + .../skin/mouse_skin_2019_10x_pisco_001.py | 33 +++---- .../mouse_skin_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/small_intestine/external.py | 1 + ..._small_intestine_2018_microwell_han_001.py | 33 +++---- ..._small_intestine_2018_microwell_han_002.py | 33 +++---- ..._small_intestine_2018_microwell_han_003.py | 33 +++---- sfaira/data/mouse/spleen/external.py | 1 + .../mouse_spleen_2018_microwell_han_001.py | 33 +++---- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 33 +++---- .../mouse_spleen_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/stomach/external.py | 1 + .../mouse_stomach_2018_microwell_han_001.py | 33 +++---- sfaira/data/mouse/testis/external.py | 1 + .../mouse_testis_2018_microwell_han_001.py | 33 +++---- .../mouse_testis_2018_microwell_han_002.py | 33 +++---- sfaira/data/mouse/thymus/external.py | 1 + .../mouse_thymus_2018_microwell_han_001.py | 33 +++---- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 33 +++---- .../mouse_thymus_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/tongue/external.py | 1 + .../tongue/mouse_tongue_2019_10x_pisco_001.py | 33 +++---- .../mouse_tongue_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/trachae/external.py | 1 + .../mouse_trachea_2019_10x_pisco_001.py | 33 +++---- .../mouse_trachea_2019_smartseq2_pisco_001.py | 33 +++---- sfaira/data/mouse/uterus/external.py | 1 + .../mouse_uterus_2018_microwell_han_001.py | 33 +++---- .../mouse_uterus_2018_microwell_han_002.py | 33 +++---- 310 files changed, 3707 insertions(+), 3305 deletions(-) create mode 100644 sfaira/api/consts.py create mode 100644 sfaira/consts.py diff --git a/sfaira/api/__init__.py b/sfaira/api/__init__.py index c61b4e96b..2caf3b54c 100644 --- a/sfaira/api/__init__.py +++ b/sfaira/api/__init__.py @@ -1,3 +1,4 @@ +from . import consts from . import data from . import genomes from . import models diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py new file mode 100644 index 000000000..45e9646bc --- /dev/null +++ b/sfaira/api/consts.py @@ -0,0 +1 @@ +from sfaira.const import ADATA_IDS \ No newline at end of file diff --git a/sfaira/consts.py b/sfaira/consts.py new file mode 100644 index 000000000..8cc2a8179 --- /dev/null +++ b/sfaira/consts.py @@ -0,0 +1,93 @@ + +class ADATA_IDS: + """ + Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. + """ + + def __init__(self): + self._animal = "animal" + self._cell_types_original = "cell_types_original" + self._cell_ontology_class = "cell_ontology_class" + self._cell_ontology_id = "cell_ontology_id" + self._doi = "doi" + self._gene_id_ensembl = "ensembl" + self._has_celltypes = "has_celltypes" + self._healthy = "healthy" + self._id = "id" + self._normalization = "normalization" + self._lab = "lab" + self._organ = "organ" + self._protocol = "protocol" + self._state_exact = "state_exact" + self._subtissue = "subtissue" + self._wget_download = "wget_download" + self._year = "year" + + @property + def animal(self): + return self._animal + + @property + def cell_types_original(self): + return self._cell_types_original + + @property + def cell_ontology_class(self): + return self._cell_ontology_class + + @property + def cell_ontology_id(self): + return self._cell_ontology_id + + @property + def doi(self): + return self._doi + + @property + def gene_id_ensembl(self): + return self._gene_id_ensembl + + @property + def has_celltypes(self): + return self._has_celltypes + + @property + def healthy(self): + return self._healthy + + @property + def id(self): + return self._id + + @property + def lab(self): + return self._lab + + @property + def normalization(self): + return self._normalization + + @property + def protocol(self): + return self._protocol + + @property + def organ(self): + return self._organ + + @property + def subtissue(self): + return self._subtissue + + @property + def state_exact(self): + return self._state_exact + + @property + def wget_download(self): + return self._wget_download + + @property + def year(self): + return self._year + diff --git a/sfaira/data/human/adipose/external.py b/sfaira/data/human/adipose/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/adipose/external.py +++ b/sfaira/data/human/adipose/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index a0e19c524..8e30b8219 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adipose/hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/external.py b/sfaira/data/human/adrenalgland/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/adrenalgland/external.py +++ b/sfaira/data/human/adrenalgland/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 094a44de0..9f2f2abc7 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 600746da2..7fd2962ee 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index 1583829f8..b84a42735 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 7b794fbd2..97a4e963a 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 2b56ea90e..86b16cb6e 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 439ac8d67..2818399b4 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/artery/external.py b/sfaira/data/human/artery/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/artery/external.py +++ b/sfaira/data/human/artery/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 754d504a9..f45d4eadf 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/artery/hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/external.py b/sfaira/data/human/bladder/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/bladder/external.py +++ b/sfaira/data/human/bladder/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index 48ff8872e..f1b75c65e 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index e0a68f985..570145175 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index afbf3049b..f13a35aa4 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/external.py b/sfaira/data/human/blood/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/blood/external.py +++ b/sfaira/data/human/blood/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index ef4e3a3d3..92c02574f 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -61,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/ica_blood.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Regev' - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = None - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Regev' + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = None + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = None - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = None + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 30e6a101f..64b44f8c7 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -45,20 +46,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = '10x Genomics' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = None - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = '10x Genomics' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = None + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = None - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = None + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 0aa599f3a..ca485bb3e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index a9066f16f..c621d8d69 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index ddd7ff8eb..90b9de279 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index 08ed4bc3e..f666730d8 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index b08a77faf..405fd7232 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index e33fc2e47..2dfbf824e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index 2c07335e1..f6c9988d7 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bone/external.py b/sfaira/data/human/bone/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/bone/external.py +++ b/sfaira/data/human/bone/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index 5b8e68eec..27bad3280 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -61,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/ica_bone.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Regev' - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = None - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Regev' + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = None + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = None - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = None + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index d05f88665..8c1dc77c2 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 93fcedda2..5729b893c 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/external.py b/sfaira/data/human/brain/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/brain/external.py +++ b/sfaira/data/human/brain/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index e4db82a37..d5f361d87 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -62,20 +63,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Regev" - self.adata.uns["year"] = 2017 - self.adata.uns["doi"] = "10.1038/nmeth.4407" - self.adata.uns["protocol"] = 'DroNcSeq' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Regev" + self.adata.uns[ADATA_IDS.year] = 2017 + self.adata.uns[ADATA_IDS.doi] = "10.1038/nmeth.4407" + self.adata.uns[ADATA_IDS.protocol] = 'DroNcSeq' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index f4462bee1..b663b1a56 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index ec6ab470b..450e4ba7e 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 7a7c42f66..390605718 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index bb3edc3ac..17ba2ad33 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index a43f47937..edbe29c8a 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index 74859be68..2eecc8d91 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/calvaria/external.py b/sfaira/data/human/calvaria/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/calvaria/external.py +++ b/sfaira/data/human/calvaria/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 00900b80e..f103428b7 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/calvaria/hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/cervix/external.py b/sfaira/data/human/cervix/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/cervix/external.py +++ b/sfaira/data/human/cervix/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 88e2a25d5..6401d36fa 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/cervix/hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/chorionicvillus/external.py b/sfaira/data/human/chorionicvillus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/chorionicvillus/external.py +++ b/sfaira/data/human/chorionicvillus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index 9c5065766..3d10a937e 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/chorionicvillus/hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/external.py b/sfaira/data/human/colon/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/colon/external.py +++ b/sfaira/data/human/colon/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index dc0608fd5..7a607b32c 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -127,23 +128,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/kinchenetal.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Simmons' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1016/j.cell.2018.08.067" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] - self.adata.obs["healthy"] = [line == 'normal' for line in + self.adata.uns[ADATA_IDS.lab] = 'Simmons' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.08.067" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS.healthy] = [line == 'normal' for line in self.adata.obs['donor_organism.diseases.ontology_label']] - self.adata.obs["state_exact"] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') - self.adata.obs["state_exact"] = self.adata.obs["state_exact"]\ + self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') + self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs[ADATA_IDS.state_exact]\ .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='Accession', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 3606b7ec5..4efa64678 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -86,20 +87,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Regev" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1016/j.cell.2019.06.029" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Regev" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.06.029" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index 92c8528cd..f41aadf23 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -54,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Chen" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1084/jem.20191130" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index a7f8152a3..3a9ddbb5a 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -73,20 +74,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Teichmann" - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1038/s41590-020-0602-z" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Teichmann" + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41590-020-0602-z" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['cell_type'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['cell_type'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index 698fff04a..ac4db7ecf 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -71,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index bf71d5056..46fbda2d6 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -71,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 36a8f821c..a928ff480 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -71,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 2a71bfb62..0df344dcc 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -71,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/duodenum/external.py b/sfaira/data/human/duodenum/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/duodenum/external.py +++ b/sfaira/data/human/duodenum/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index d2e14855a..12dbae9cc 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/duodenum/hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/epityphlon/external.py b/sfaira/data/human/epityphlon/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/epityphlon/external.py +++ b/sfaira/data/human/epityphlon/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index e33f5fed1..2dd5617a7 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/epityphlon/hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/external.py b/sfaira/data/human/esophagus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/esophagus/external.py +++ b/sfaira/data/human/esophagus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 84d5d42ca..b00195eee 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import scipy.sparse @@ -64,21 +65,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Meyer" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1101/741405" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Meyer" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['Celltypes'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Celltypes'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7413619', - new_index='ensembl') \ No newline at end of file + new_index=ADATA_IDS.gene_id_ensembl) \ No newline at end of file diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index b117fd396..4b2b9ee4a 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -68,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 87fc666ec..2c8e22d6c 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -68,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/eye/external.py b/sfaira/data/human/eye/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/eye/external.py +++ b/sfaira/data/human/eye/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index 7361ffc6d..e5551a126 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -61,20 +62,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = 'Wong' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.15252/embj.2018100811' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Wong' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.15252/embj.2018100811' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index bf8c624a4..20aff74d4 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -51,20 +52,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/menon19.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Hafler' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1038/s41467-019-12780-8' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Hafler' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-12780-8' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 977603919..89803593e 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -55,20 +56,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns["lab"] = 'Mullins' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1073/pnas.1914143116' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' + self.adata.uns[ADATA_IDS.lab] = 'Mullins' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1073/pnas.1914143116' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index f03e981c8..51c88c859 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -67,18 +68,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/fallopiantube/external.py b/sfaira/data/human/fallopiantube/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/fallopiantube/external.py +++ b/sfaira/data/human/fallopiantube/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 856a59805..eb635aee1 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/fallopiantube/hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/external.py b/sfaira/data/human/femalegonad/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/femalegonad/external.py +++ b/sfaira/data/human/femalegonad/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 518941a26..7566194d8 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 3643e5a9c..79da1d4ac 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/gallbladder/external.py b/sfaira/data/human/gallbladder/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/gallbladder/external.py +++ b/sfaira/data/human/gallbladder/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index ca768c108..dd3476d37 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/gallbladder/hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/external.py b/sfaira/data/human/heart/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/heart/external.py +++ b/sfaira/data/human/heart/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index 662c91f72..c38755a8d 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 4ba0b416c..94a9ac30f 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index b22792c45..59a34f92e 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 130392ed9..e947faf65 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/hesc/external.py b/sfaira/data/human/hesc/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/hesc/external.py +++ b/sfaira/data/human/hesc/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index ead563c93..8eeaa4f45 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/hesc/hcl_HESC_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/external.py b/sfaira/data/human/ileum/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/ileum/external.py +++ b/sfaira/data/human/ileum/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 2ec29b13b..5d0874892 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -68,20 +69,20 @@ def _load(self, fn=None): .multiply(1/10000) self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - self.adata.uns["lab"] = "Kenigsberg" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1016/j.cell.2019.08.008" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Kenigsberg" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.08.008" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 95d1a1c2a..c1993453f 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -54,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Chen" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1084/jem.20191130" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 187f29aad..98b87b328 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -70,18 +71,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ileum/hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/jejunum/external.py b/sfaira/data/human/jejunum/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/jejunum/external.py +++ b/sfaira/data/human/jejunum/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index f91f4cb54..ec836f332 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,17 +44,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/jejunum/hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/external.py b/sfaira/data/human/kidney/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/kidney/external.py +++ b/sfaira/data/human/kidney/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index 8bc1ea72d..3e208b9a4 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -76,20 +77,20 @@ def _load(self, fn=None): annot = pd.read_csv(fn[1], index_col=0, dtype='category') self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - self.adata.uns["lab"] = 'Jain' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1038/s41467-019-10861-2' - self.adata.uns["protocol"] = '10xSn' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Jain' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-10861-2' + self.adata.uns[ADATA_IDS.protocol] = '10xSn' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index b0b6a4ec0..7ff976fcc 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -124,21 +125,21 @@ def _load(self, fn=None): self.adata = adult.concatenate(fetal) self.adata.X = np.expm1(self.adata.X) - self.adata.uns["lab"] = 'Clatworthy' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1126/science.aat5031' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' + self.adata.uns[ADATA_IDS.lab] = 'Clatworthy' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1126/science.aat5031' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' - self.adata.obs["cell_ontology_class"] = self.adata.obs["celltype"] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["celltype"] self.adata.obs["cell_ontology_id"] = None - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index 8c2c79afe..1bd2c52e0 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd import scipy.io @@ -95,20 +96,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/GSE131685.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Mo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41597-019-0351-8' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Mo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41597-019-0351-8' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = None - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = None + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index ea7033528..6a5590593 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index 3fd1638e1..e4b0943cd 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 5c41ce28e..9db23183b 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 3d9a04730..729555170 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 0fbe4b250..15c6c89d9 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 6ef4147e2..b6a90b195 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 498321158..4ac0e9ba2 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -93,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/external.py b/sfaira/data/human/liver/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/liver/external.py +++ b/sfaira/data/human/liver/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 52e8dd7cb..e5b816254 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -69,20 +70,20 @@ def _load(self, fn=None): celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - self.adata.uns["lab"] = 'McGilvray' - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = '10.1038/s41467-018-06318-7' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'McGilvray' + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-018-06318-7' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 1b4e83eea..32f0de473 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -69,20 +70,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Haniffa' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1038/s41586-019-1652-y' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Haniffa' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1652-y' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["cell.labels"] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["cell.labels"] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index 906badbfb..64617b392 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -71,20 +72,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/ramachandran.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Henderson' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1038/s41586-019-1631-3' - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Henderson' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1631-3' + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["annotation_lineage"] - self.adata.obs["healthy"] = [i == 'Uninjured' for i in self.adata.obs["condition"]] - self.adata.obs["state_exact"] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["annotation_lineage"] + self.adata.obs[ADATA_IDS.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] + self.adata.obs[ADATA_IDS.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 5c6316083..fc24a03db 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -89,20 +90,20 @@ def _load(self, fn=None): self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - self.adata.uns["lab"] = 'Gruen' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = '10.1038/s41586-019-1373-2' - self.adata.uns["protocol"] = 'mCEL-Seq2' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Gruen' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1373-2' + self.adata.uns[ADATA_IDS.protocol] = 'mCEL-Seq2' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 43d2f97f9..c1a2edc26 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index dbf7b742e..b6e8e437f 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index dcf1884d5..03f612568 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index cf8d491a3..843a3743e 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 325c5990a..7a8359857 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/external.py b/sfaira/data/human/lung/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/lung/external.py +++ b/sfaira/data/human/lung/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 788fb0d7e..fba31f7a0 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns["lab"] = 'Teichmann' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1038/s41591-019-0468-5" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' + self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 9f51dbc0e..79a597c52 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns["lab"] = 'Teichmann' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1038/s41591-019-0468-5" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' + self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index cf396e708..90f7952ca 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -67,21 +68,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Meyer' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1186/s13059-019-1906-x" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Meyer' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1186/s13059-019-1906-x" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index 809f442a5..a8533afdd 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -61,21 +62,21 @@ def _load(self, fn=None): self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - self.adata.uns["lab"] = 'Teichmann' - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1038/s41591-019-0468-5" - self.adata.uns["protocol"] = 'dropseq' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = [self.download_website, self.download_website_meta] - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS.protocol] = 'dropseq' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = [self.download_website, self.download_website_meta] + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'uninvolved areas of tumour resection material' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 3a538150d..79b4b9365 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -106,21 +107,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/habermann_processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Kropski' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1101/753806" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Kropski' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1101/753806" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = [i == 'Control' for i in self.adata.obs['Status']] + self.adata.obs[ADATA_IDS.healthy] = [i == 'Control' for i in self.adata.obs['Status']] self.adata.obs['state_exact'] = self.adata.obs['Diagnosis'].astype('category') - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index 93786b516..37448e9d9 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -56,21 +57,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = 'Eils' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1101/2020.03.13.991455" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Eils' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index bc37b39bb..88a1b8cbb 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -61,21 +62,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = 'Eils' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1101/2020.03.13.991455" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Eils' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index 2b20ac0d7..54ae54729 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -72,21 +73,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = 'Spence' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1016/j.devcel.2020.01.033" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Spence' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.devcel.2020.01.033" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['Cell_type'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Cell_type'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index 57a19ffc8..e088eb8a3 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import scipy.sparse import numpy as np @@ -113,22 +114,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ .multiply(1 / 10000) - self.adata.uns["lab"] = 'Krasnow' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1101/742320" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Krasnow' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs["cell_ontology_class"] = self.adata.obs["cell_ontology_class"].astype('category') + self.adata.obs[ADATA_IDS.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs[ADATA_IDS.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 61b95de1f..9805e8aa0 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -94,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 7d44e00d6..96a2a1e9e 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -94,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 11d4dcd45..3cd8e4a48 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -94,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 79810cc98..c75e9ed28 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -94,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 5f69a87a3..631af8494 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -94,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index ec9e1a422..291e9649e 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import scipy.sparse import numpy as np @@ -100,22 +101,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ .multiply(1 / 1000000) - self.adata.uns["lab"] = 'Krasnow' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1101/742320" - self.adata.uns["protocol"] = 'smartseq2' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Krasnow' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS.protocol] = 'smartseq2' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs["cell_ontology_class"] = self.adata.obs["cell_ontology_class"].astype('category') + self.adata.obs[ADATA_IDS.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs[ADATA_IDS.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs["healthy"] = True + self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/external.py b/sfaira/data/human/malegonad/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/malegonad/external.py +++ b/sfaira/data/human/malegonad/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 3934d0812..a484700f2 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -58,20 +59,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Cairns" - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = "10.1038/s41422-018-0099-2" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Cairns" + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41422-018-0099-2" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index f1f13bb2a..56934ea9a 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 5ff1da1cc..17359506d 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -72,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/mixed/external.py b/sfaira/data/human/mixed/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/mixed/external.py +++ b/sfaira/data/human/mixed/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 273ff0df8..80bfb0f1d 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import tarfile import pandas as pd @@ -149,23 +150,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/mixed/GSE126030.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = "Sims" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1038/s41467-019-12464-3" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Sims" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41467-019-12464-3" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.obs["subtissue"] = self.adata.obs["organ"] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) # If the subset_organs() method has been run before, subset to specified organs if "organsubset" in self.__dict__: diff --git a/sfaira/data/human/muscle/external.py b/sfaira/data/human/muscle/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/muscle/external.py +++ b/sfaira/data/human/muscle/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index 8d924dab4..5285a2236 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index d960599d1..9868bf9f7 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/external.py b/sfaira/data/human/omentum/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/omentum/external.py +++ b/sfaira/data/human/omentum/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 8e1a29ac2..2d5cbfdd1 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 2c1ea3cc6..ca75e6835 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index 49763f028..c142f5802 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/external.py b/sfaira/data/human/pancreas/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/pancreas/external.py +++ b/sfaira/data/human/pancreas/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index 20a355e7a..213331765 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -61,19 +62,19 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Yanai" - self.adata.uns["year"] = 2016 - self.adata.uns["doi"] = "10.1016/j.cels.2016.08.011" - self.adata.uns["protocol"] = 'inDrop' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.uns[ADATA_IDS.lab] = "Yanai" + self.adata.uns[ADATA_IDS.year] = 2016 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cels.2016.08.011" + self.adata.uns[ADATA_IDS.protocol] = 'inDrop' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index b8fe46882..21e237c34 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import pandas as pd @@ -68,21 +69,21 @@ def _load(self, fn=None): # filter observations which are not cells (empty wells, low quality cells etc.) self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - self.adata.uns["lab"] = "Sandberg" - self.adata.uns["year"] = 2016 - self.adata.uns["doi"] = "10.1016/j.cmet.2016.08.020" - self.adata.uns["protocol"] = 'Smartseq2' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Sandberg" + self.adata.uns[ADATA_IDS.year] = 2016 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2016.08.020" + self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["healthy"] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] - self.adata.obs["state_exact"] = self.adata.obs['Characteristics[disease]'].astype('category') - self.adata.obs["state_exact"] = self.adata.obs["state_exact"].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) + self.adata.obs[ADATA_IDS.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] + self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') + self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs[ADATA_IDS.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) - self.adata.obs["cell_ontology_class"] = self.adata.obs['Characteristics[cell type]'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index ffe8d252a..17f9b617e 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import tarfile import gzip @@ -128,20 +129,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/GSE81547.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = 2017 - self.adata.uns["doi"] = "10.1016/j.cell.2017.09.004" - self.adata.uns["protocol"] = 'Smartseq2' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = 2017 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2017.09.004" + self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self.adata.obs["cell_ontology_class"] = self.adata.obs['celltype'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 87e1b7d35..a4d3e1c6a 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -82,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 2fbfae543..8b8db477d 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -82,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 4a853229d..a1972f90e 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -82,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 9e56450cc..623a6bc42 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -82,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/placenta/external.py b/sfaira/data/human/placenta/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/placenta/external.py +++ b/sfaira/data/human/placenta/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 0c583030d..01211766e 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import pandas as pd import anndata @@ -82,30 +83,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns["lab"] = 'Teichmann' - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = '10.1038/s41586-018-0698-6' - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs["cell_ontology_class"] = self.adata.obs['annotation'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 73b575c47..6fa0eca75 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import pandas as pd import anndata @@ -82,30 +83,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns["lab"] = 'Teichmann' - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = '10.1038/s41586-018-0698-6' - self.adata.uns["protocol"] = "Smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[ADATA_IDS.protocol] = "Smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs["cell_ontology_class"] = self.adata.obs['annotation'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index 6f83acd15..ea19ecc32 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -75,17 +76,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/placenta/hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pleura/external.py b/sfaira/data/human/pleura/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/pleura/external.py +++ b/sfaira/data/human/pleura/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 77e0d70c2..420ff7854 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pleura/hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/external.py b/sfaira/data/human/prostate/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/prostate/external.py +++ b/sfaira/data/human/prostate/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 289a4f875..503693f54 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -55,20 +56,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Strand" - self.adata.uns["year"] = 2018 - self.adata.uns["doi"] = "10.1016/j.celrep.2018.11.086" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Strand" + self.adata.uns[ADATA_IDS.year] = 2018 + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.celrep.2018.11.086" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 7d12b433e..9583d2c60 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -64,18 +65,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/prostate/hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/external.py b/sfaira/data/human/rectum/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/rectum/external.py +++ b/sfaira/data/human/rectum/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index 9fb9c9840..adc138c01 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np import scipy.sparse @@ -54,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Chen" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1084/jem.20191130" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index 633d1e4d1..9869ddae5 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -59,17 +60,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rectum/hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rib/external.py b/sfaira/data/human/rib/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/rib/external.py +++ b/sfaira/data/human/rib/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 3a39c234b..b0501b60d 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index d7ed31090..23db36fdc 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/skin/external.py b/sfaira/data/human/skin/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/skin/external.py +++ b/sfaira/data/human/skin/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 735eda4ca..1e8e0d6fa 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -73,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 0e023b2a8..6e8684d5e 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -73,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spinalcord/external.py b/sfaira/data/human/spinalcord/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/spinalcord/external.py +++ b/sfaira/data/human/spinalcord/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 7ee1262d1..69c2b56e5 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spinalcord/hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/external.py b/sfaira/data/human/spleen/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/spleen/external.py +++ b/sfaira/data/human/spleen/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 5c4063379..e825156ac 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import scipy.sparse @@ -73,22 +74,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns["lab"] = "Meyer" - self.adata.uns["year"] = 2019 - self.adata.uns["doi"] = "10.1101/741405" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = "Meyer" + self.adata.uns[ADATA_IDS.year] = 2019 + self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs['Celltypes'] + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Celltypes'] self.set_unkown_class_id(ids=["Unknown"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7463846', - new_index='ensembl') + new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index 88ee3ee3f..af67bf446 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -65,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 35c6be5f0..3bba3ead9 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -65,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/external.py b/sfaira/data/human/stomach/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/stomach/external.py +++ b/sfaira/data/human/stomach/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index e1e3831a6..733a55869 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index fc83d816e..03fc54bb9 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index 7eb67e803..b6aaad868 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index cf2df4df6..cd9154bd9 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index 20e4e8cce..604ab45c0 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index d429383b6..4c8c810b9 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 68ed90bfd..cd9b06a1f 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index b98844e92..ac902dcc0 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 1e09a35f6..c122528ca 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index 58a2fb6dc..da4e8dd33 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/external.py b/sfaira/data/human/thymus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/thymus/external.py +++ b/sfaira/data/human/thymus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index bd62d0f1a..ea58c2d62 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata import numpy as np @@ -88,20 +89,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns["lab"] = "Teichmann" - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = "10.1126/science.aay3224" - self.adata.uns["protocol"] = '10x' - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' + self.adata.uns[ADATA_IDS.lab] = "Teichmann" + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = "10.1126/science.aay3224" + self.adata.uns[ADATA_IDS.protocol] = '10x' + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' - self.adata.obs["cell_ontology_class"] = self.adata.obs['Anno_level_fig1'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = 'healthy' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 28bd582bb..9f9a7834b 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -57,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index ac84d199f..fd15c0948 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -57,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/external.py b/sfaira/data/human/thyroid/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/thyroid/external.py +++ b/sfaira/data/human/thyroid/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index d208b424e..67b3b48f9 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 043daf36c..4873c054c 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/trachea/external.py b/sfaira/data/human/trachea/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/trachea/external.py +++ b/sfaira/data/human/trachea/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index eee074bc4..25eaf80d0 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/trachea/hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ureter/external.py b/sfaira/data/human/ureter/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/ureter/external.py +++ b/sfaira/data/human/ureter/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index fd5ac933e..e6edef218 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ureter/hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/uterus/external.py b/sfaira/data/human/uterus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/human/uterus/external.py +++ b/sfaira/data/human/uterus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 24ba226e8..f6d95e932 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -1,6 +1,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS import anndata @@ -43,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/uterus/hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns["lab"] = 'Guo' - self.adata.uns["year"] = 2020 - self.adata.uns["doi"] = '10.1038/s41586-020-2157-4' - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "human" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.year] = 2020 + self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "human" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/external.py b/sfaira/data/mouse/bladder/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/bladder/external.py +++ b/sfaira/data/mouse/bladder/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 39f29ba3e..198763139 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -60,21 +61,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index e19bf95b8..71894cef7 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index 8dd1c4dee..f52109a85 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,19 +51,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/external.py b/sfaira/data/mouse/brain/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/brain/external.py +++ b/sfaira/data/mouse/brain/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index e0cac5c68..86cfacb57 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -58,21 +59,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index 6f4fa79d5..a3480641f 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -58,21 +59,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 887670bd5..191ee0821 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -64,23 +65,23 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - self.adata.uns["lab"] = "Movahedi" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1038/s41593-019-0393-4" - self.adata.uns["protocol"] = "microwell" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Movahedi" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1038/s41593-019-0393-4" + self.adata.uns[ADATA_IDS.protocol] = "microwell" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index bcda28f45..14af822c6 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -3,6 +3,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index 49cf49058..bee23b0a0 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -3,6 +3,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/diaphragm/external.py b/sfaira/data/mouse/diaphragm/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/diaphragm/external.py +++ b/sfaira/data/mouse/diaphragm/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 4bfc20d5f..30a5efe33 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,19 +53,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/external.py b/sfaira/data/mouse/fat/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/fat/external.py +++ b/sfaira/data/mouse/fat/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py index ddb71b954..a947c1f06 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py index 6ae2a20c3..82fe8f8bd 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py index d7519c67d..0a01b969c 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py index ec66c4f87..df0e95c86 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py index e93410ad9..febe77a28 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/external.py b/sfaira/data/mouse/heart/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/heart/external.py +++ b/sfaira/data/mouse/heart/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index a668f2a9e..4eb9be00d 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index 7bee524f7..521050224 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index 113a05a7a..5c44cd140 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/external.py b/sfaira/data/mouse/kidney/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/kidney/external.py +++ b/sfaira/data/mouse/kidney/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index 68f5e7223..1a090fb7c 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -44,21 +45,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index ebb4540ab..b558e86fd 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -75,21 +76,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index 5e1e0c8c2..ada193dc3 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -3,6 +3,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -55,21 +56,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index 5d97568c8..229db3b93 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -54,21 +55,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/large_intestine/external.py b/sfaira/data/mouse/large_intestine/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/large_intestine/external.py +++ b/sfaira/data/mouse/large_intestine/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py index 0b7010fb2..1f7487c0d 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 4cb70806c..488655f32 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/external.py b/sfaira/data/mouse/limb_muscle/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/limb_muscle/external.py +++ b/sfaira/data/mouse/limb_muscle/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py index a91050ac8..bcb567396 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -61,22 +62,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py index de9fd3590..f292f0f66 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index a1436d46a..fd8204d13 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/external.py b/sfaira/data/mouse/liver/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/liver/external.py +++ b/sfaira/data/mouse/liver/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index 131417f26..657fbf2e2 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -62,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 2688fc5ab..607c69b39 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -56,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 39ea195f0..8106da0d5 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 724c7c01d..6475c5bfa 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/external.py b/sfaira/data/mouse/lung/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/lung/external.py +++ b/sfaira/data/mouse/lung/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 4194c434c..1b30d387f 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -76,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 0a084a485..019cfd443 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -76,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 3f1b21d44..03d8ff093 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -76,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index a61851467..8408ff169 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 056973243..007a8849c 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/external.py b/sfaira/data/mouse/mammary_gland/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/mammary_gland/external.py +++ b/sfaira/data/mouse/mammary_gland/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py index 43c767b27..00009fb32 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -60,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py index 13a01a6da..b6e77fe4a 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -60,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py index 9142e5089..36ade749d 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -60,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py index 2615c658b..e1ef2e8b5 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -60,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py index 13adf557a..2a0c5b2ab 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index e1c1436fb..d798eb413 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/external.py b/sfaira/data/mouse/marrow/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/marrow/external.py +++ b/sfaira/data/mouse/marrow/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py index df2092dc2..5559ca1e5 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -59,22 +60,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs['Annotation'] - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs['Annotation'] + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py index e0ee5bf18..51b86576f 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py index 31a9fa0e8..2249300ba 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/ovary/external.py b/sfaira/data/mouse/ovary/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/ovary/external.py +++ b/sfaira/data/mouse/ovary/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py index f28051a0d..552190784 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -56,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py index 412b8ec49..c88d9d28d 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -56,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/external.py b/sfaira/data/mouse/pancreas/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/pancreas/external.py +++ b/sfaira/data/mouse/pancreas/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index 4510fc358..a7b376c79 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -66,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 2ef395265..639aab148 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index d09459d5a..7f929aacd 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index 11498807b..bb5f8e14a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 5b9b592c6..74c0a61c0 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 423f0d36b..6863f1b80 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 7641e4c21..15ee99997 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index 55e065fd7..0e877fe98 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 5cb3af4ac..36149a200 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index 6d4d40260..f2784693c 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -57,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns["lab"] = "Bhushan" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1016/j.cmet.2019.01.021" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = celltypes + self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = celltypes - self.adata.obs["healthy"] = False - self.adata.obs["state_exact"] = "diabetic" + self.adata.obs[ADATA_IDS.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS.healthy] = False + self.adata.obs[ADATA_IDS.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index b60993e8c..a03486980 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -54,20 +55,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/external.py b/sfaira/data/mouse/peripheral_blood/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/peripheral_blood/external.py +++ b/sfaira/data/mouse/peripheral_blood/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py index 63838308d..b3a422119 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -65,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py index 5779e420f..0255e4ac5 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -65,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py index 806a2a8df..3c9368e7c 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -65,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py index ba0300acc..5aa2cceac 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -65,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index 36db6d2a1..771e2825a 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -65,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/external.py b/sfaira/data/mouse/placenta/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/placenta/external.py +++ b/sfaira/data/mouse/placenta/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 61cd8bae3..58d0cea03 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -72,21 +73,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index ad2401ac3..8d58805ae 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -72,21 +73,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/prostate/external.py b/sfaira/data/mouse/prostate/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/prostate/external.py +++ b/sfaira/data/mouse/prostate/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index d9430a650..232cd5e44 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 545793907..01070bd48 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/external.py b/sfaira/data/mouse/rib/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/rib/external.py +++ b/sfaira/data/mouse/rib/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index af132a49a..ac1d0e4f3 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 78d0b13c3..81a102255 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 34245b902..86ccb2a50 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/skin/external.py b/sfaira/data/mouse/skin/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/skin/external.py +++ b/sfaira/data/mouse/skin/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 397363db3..20a55f165 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index aded7f7bf..739ccd78b 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/external.py b/sfaira/data/mouse/small_intestine/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/small_intestine/external.py +++ b/sfaira/data/mouse/small_intestine/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py index 17f2293f0..6cd027f9f 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py index c866f90e8..f8231ed8a 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py index cb1dd0343..c8f718761 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -67,22 +68,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/external.py b/sfaira/data/mouse/spleen/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/spleen/external.py +++ b/sfaira/data/mouse/spleen/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 306ddb2e3..bba3afb06 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -55,21 +56,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 7067a41b4..11ec52199 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 3aa007671..8baef2938 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -52,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/stomach/external.py b/sfaira/data/mouse/stomach/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/stomach/external.py +++ b/sfaira/data/mouse/stomach/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index e8f96901f..067e46ab6 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -61,21 +62,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/testis/external.py b/sfaira/data/mouse/testis/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/testis/external.py +++ b/sfaira/data/mouse/testis/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py index 6647e52e2..392a77526 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py index 7e36fc4f3..8249fbd2c 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/external.py b/sfaira/data/mouse/thymus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/thymus/external.py +++ b/sfaira/data/mouse/thymus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index 3cd6bec4b..ea5974713 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -50,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index 20f125a02..d109ad046 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -51,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index f80047914..858802079 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -51,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/tongue/external.py b/sfaira/data/mouse/tongue/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/tongue/external.py +++ b/sfaira/data/mouse/tongue/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 3f85122e0..d0193b8f0 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index 44d5ba04b..e826771e1 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -2,6 +2,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/trachae/external.py b/sfaira/data/mouse/trachae/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/trachae/external.py +++ b/sfaira/data/mouse/trachae/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py index 9fe60b810..a6f099ff3 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py @@ -3,6 +3,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -54,21 +55,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "10x" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "10x" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py index 4db9eee1a..be56897d6 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py @@ -3,6 +3,7 @@ import os from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -53,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.year] = "2019" + self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'norm' + # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/uterus/external.py b/sfaira/data/mouse/uterus/external.py index 9f4e3db68..a4d155b9a 100644 --- a/sfaira/data/mouse/uterus/external.py +++ b/sfaira/data/mouse/uterus/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 27f4e33e0..e3ba57883 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index 508a91e38..568a0e9bd 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -4,6 +4,7 @@ import pandas from typing import Union from .external import DatasetBase +from .external import ADATA_IDS class Dataset(DatasetBase): @@ -63,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.year] = "2018" + self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS.organ] = self.organ + self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS.animal] = "mouse" + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) From 4b082e596609ad95facb7eb45057a76a24a0e16f Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 12:28:20 +0100 Subject: [PATCH 003/161] adapted adata field refectoring in data base classs --- sfaira/consts.py | 25 ++++++ sfaira/data/base.py | 169 ++++++++++++++++++++++++---------------- sfaira/data/external.py | 1 + 3 files changed, 126 insertions(+), 69 deletions(-) diff --git a/sfaira/consts.py b/sfaira/consts.py index 8cc2a8179..ce9404987 100644 --- a/sfaira/consts.py +++ b/sfaira/consts.py @@ -5,12 +5,17 @@ class ADATA_IDS: """ def __init__(self): + self._age = "age" self._animal = "animal" self._cell_types_original = "cell_types_original" self._cell_ontology_class = "cell_ontology_class" self._cell_ontology_id = "cell_ontology_id" + self._dev_stage = "dev_stage" self._doi = "doi" + self._dataset = "dataset" + self._dataset_group = "dataset_group" self._gene_id_ensembl = "ensembl" + self._gene_id_names = "names" self._has_celltypes = "has_celltypes" self._healthy = "healthy" self._id = "id" @@ -23,6 +28,10 @@ def __init__(self): self._wget_download = "wget_download" self._year = "year" + @property + def age(self): + return self._age + @property def animal(self): return self._animal @@ -39,6 +48,18 @@ def cell_ontology_class(self): def cell_ontology_id(self): return self._cell_ontology_id + @property + def dataset(self): + return self._dataset + + @property + def dataset_group(self): + return self._dataset_group + + @property + def dev_stage(self): + return self._dev_stage + @property def doi(self): return self._doi @@ -47,6 +68,10 @@ def doi(self): def gene_id_ensembl(self): return self._gene_id_ensembl + @property + def gene_id_names(self): + return self._gene_id_names + @property def has_celltypes(self): return self._has_celltypes diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 11ec1390f..06dd8f72d 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -11,6 +11,7 @@ import warnings from .external import SuperGenomeContainer +from .external import ADATA_IDS class DatasetBase(abc.ABC): @@ -94,12 +95,12 @@ def load( self._load(fn=fn) - if 'cell_ontology_id' not in self.adata.obs.columns: - self.adata.obs["cell_ontology_id"] = None + if ADATA_IDS.cell_ontology_id not in self.adata.obs.columns: + self.adata.obs[ADATA_IDS.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - self.adata.obs["cell_ontology_class"] = self.map_ontology_class( - raw_ids=self.adata.obs["cell_ontology_class"].values, + self.adata.obs[ADATA_IDS.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[ADATA_IDS.cell_ontology_class].values, celltype_version=celltype_version ) @@ -143,8 +144,8 @@ def load( self.adata.obs_names = obs_names self.adata.var_names = new_index_collapsed new_index = new_index_collapsed - self.adata.var["ensembl"] = new_index - self.adata.var.index = self.adata.var['ensembl'].values + self.adata.var[ADATA_IDS.gene_id_ensembl] = new_index + self.adata.var.index = self.adata.var[ADATA_IDS.gene_id_ensembl].values # Match feature space to a genomes provided with sfaira if match_to_reference: @@ -160,7 +161,7 @@ def load( raise ValueError("data type %s not recognized" % type(self.adata.X)) # Compute indices of genes to keep - data_ids = self.adata.var["ensembl"].values + data_ids = self.adata.var[ADATA_IDS.gene_id_ensembl].values idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.genome_container.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -187,60 +188,90 @@ def load( obs=self.adata.obs, obsm=self.adata.obsm, var=pd.DataFrame(data={'names': self.genome_container.names, - 'ensembl': self.genome_container.ensembl}, + ADATA_IDS.gene_id_ensembl: self.genome_container.ensembl}, index=self.genome_container.ensembl), uns=self.adata.uns ) self.adata.uns['mapped_features'] = match_to_reference - def _convert_and_set_var_names(self, symbol_col: str = None, ensembl_col: str = None, new_index: str = 'ensembl'): - + def _convert_and_set_var_names( + self, + symbol_col: str = None, + ensembl_col: str = None, + new_index: str = ADATA_IDS.gene_id_ensembl + ): if symbol_col and ensembl_col: if symbol_col == 'index': self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename({'index': 'names'}, axis='columns') + self.adata.var = self.adata.var.reset_index().rename( + {'index': ADATA_IDS.gene_id_names}, + axis='columns' + ) else: - self.adata.var = self.adata.var.rename({symbol_col: 'names'}, axis='columns') + self.adata.var = self.adata.var.rename( + {symbol_col: ADATA_IDS.gene_id_names}, + axis='columns' + ) if ensembl_col == 'index': self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename({'index': 'ensembl'}, axis='columns') + self.adata.var = self.adata.var.reset_index().rename( + {'index': ADATA_IDS.gene_id_ensembl}, + axis='columns' + ) else: - self.adata.var = self.adata.var.rename({ensembl_col: 'ensembl'}, axis='columns') + self.adata.var = self.adata.var.rename( + {ensembl_col: ADATA_IDS.gene_id_ensembl}, + axis='columns' + ) elif symbol_col: id_dict = self.genome_container.names_to_id_dict id_strip_dict = self.genome_container.strippednames_to_id_dict if symbol_col == 'index': self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename({'index': 'names'}, axis='columns') + self.adata.var = self.adata.var.reset_index().rename( + {'index': ADATA_IDS.gene_id_names}, + axis='columns' + ) else: - self.adata.var = self.adata.var.rename({symbol_col: 'names'}, axis='columns') + self.adata.var = self.adata.var.rename( + {symbol_col: ADATA_IDS.gene_id_names}, + axis='columns' + ) # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, # match it straight away, if it is not in there we try to match everything in front of the first period in # the gene name with a dictionary that was modified in the same way, if there is still no match we append na ensids = [] - for n in self.adata.var["names"]: + for n in self.adata.var[ADATA_IDS.gene_id_names]: if n in id_dict.keys(): ensids.append(id_dict[n]) elif n.split(".")[0] in id_strip_dict.keys(): ensids.append(id_strip_dict[n.split(".")[0]]) else: ensids.append('n/a') - self.adata.var['ensembl'] = ensids + self.adata.var[ADATA_IDS.gene_id_ensembl] = ensids elif ensembl_col: id_dict = self.genome_container.id_to_names_dict if ensembl_col == 'index': self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename({'index': 'ensembl'}, axis='columns') + self.adata.var = self.adata.var.reset_index().rename( + {'index': ADATA_IDS.gene_id_ensembl}, + axis='columns' + ) else: - self.adata.var = self.adata.var.rename({ensembl_col: 'ensembl'}, axis='columns') + self.adata.var = self.adata.var.rename( + {ensembl_col: ADATA_IDS.gene_id_names}, + axis='columns' + ) - self.adata.var['names'] = [id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var['ensembl']] + self.adata.var[ADATA_IDS.gene_id_names] = [ + id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' + for n in self.adata.var[ADATA_IDS.gene_id_ensembl] + ] else: raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' @@ -300,8 +331,8 @@ def load_tobacked( x_new = self.adata.X adata_backed.X[np.sort(idx), :] = x_new[np.argsort(idx), :] for k in adata_backed.obs.columns: - if k == "dataset": - adata_backed.obs.loc[np.sort(idx), "dataset"] = [self.id for i in range(len(idx))] + if k == ADATA_IDS.dataset: + adata_backed.obs.loc[np.sort(idx), ADATA_IDS.dataset] = [self.id for i in range(len(idx))] elif k in self.adata.obs.columns: adata_backed.obs.loc[np.sort(idx), k] = self.adata.obs[k].values[np.argsort(idx)] elif k in list(self.adata.uns.keys()): @@ -321,7 +352,7 @@ def load_tobacked( adata_backed._n_obs = adata_backed.X.shape[0] # not automatically updated after append adata_backed.obs = adata_backed.obs.append( # .obs was not broadcasted to the right shape! pandas.DataFrame(dict([ - (k, [self.id for i in range(len(idx))]) if k == "dataset" + (k, [self.id for i in range(len(idx))]) if k == ADATA_IDS.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns else (k, [self.adata.uns[k] for i in range(len(idx))]) if k in list(self.adata.uns.keys()) else (k, ["key_not_found" for i in range(len(idx))]) @@ -341,9 +372,9 @@ def set_unkown_class_id(self, ids: list): target_id = "unknown" ontology_classes = [ x if x not in ids else target_id - for x in self.adata.obs["cell_ontology_class"].tolist() + for x in self.adata.obs[ADATA_IDS.cell_ontology_class].tolist() ] - self.adata.obs["cell_ontology_class"] = ontology_classes + self.adata.obs[ADATA_IDS.cell_ontology_class] = ontology_classes def _set_genome(self, genome: str @@ -400,14 +431,14 @@ def write_meta( self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ "ncells": self.adata.n_obs, - "animal": self.adata.uns["animal"], - "organ": self.adata.uns["organ"], - "subtissue": self.adata.uns["subtissue"], - "id": self.adata.uns["id"], - "lab": self.adata.uns["lab"], - "year": self.adata.uns["year"], - "protocol": self.adata.uns["protocol"], - "counts": self.adata.uns["counts"] if 'counts' in self.adata.uns.keys() else None, + "animal": self.adata.uns[ADATA_IDS.animal], + "organ": self.adata.uns[ADATA_IDS.organ], + "subtissue": self.adata.uns[ADATA_IDS.subtissue], + "id": self.adata.uns[ADATA_IDS.id], + "lab": self.adata.uns[ADATA_IDS.lab], + "year": self.adata.uns[ADATA_IDS.year], + "protocol": self.adata.uns[ADATA_IDS.protocol], + "counts": self.adata.uns[ADATA_IDS.normalization] if ADATA_IDS.normalization in self.adata.uns.keys() else None, "has_celltypes": self.has_celltypes }, index=range(1)) meta.to_csv(fn_meta) @@ -559,15 +590,15 @@ def adata(self): adata_ls = self.adata_ls # Save uns attributes that are fixed for entire data set to .obs to retain during concatenation: for adata in adata_ls: - adata.obs["lab"] = adata.uns["lab"] - adata.obs["year"] = adata.uns["year"] - adata.obs["protocol"] = adata.uns["protocol"] - adata.obs["subtissue"] = adata.uns["subtissue"] - if "counts" in adata.uns.keys(): - adata.obs["counts"] = adata.uns["counts"] - if "dev_stage" in adata.obs.columns: - adata.obs["dev_stage"] = adata.uns["dev_stage"] - adata.obs["has_celltypes"] = adata.uns["has_celltypes"] + adata.obs[ADATA_IDS.lab] = adata.uns[ADATA_IDS.lab] + adata.obs[ADATA_IDS.year] = adata.uns[ADATA_IDS.year] + adata.obs[ADATA_IDS.protocol] = adata.uns[ADATA_IDS.protocol] + adata.obs[ADATA_IDS.subtissue] = adata.uns[ADATA_IDS.subtissue] + if ADATA_IDS.normalization in adata.uns.keys(): + adata.obs[ADATA_IDS.normalization] = adata.uns[ADATA_IDS.normalization] + if ADATA_IDS.dev_stage in adata.obs.columns: + adata.obs[ADATA_IDS.dev_stage] = adata.uns[ADATA_IDS.dev_stage] + adata.obs[ADATA_IDS.has_celltypes] = adata.uns[ADATA_IDS.has_celltypes] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: # Fix 1: @@ -577,13 +608,13 @@ def adata(self): if adata.uns is not None: keys_to_keep = [ 'neighbors', - "lab", - "year", - "protocol", - "subtissue", - "counts", - "dev_stage", - "has_celltypes", + ADATA_IDS.lab, + ADATA_IDS.year, + ADATA_IDS.protocol, + ADATA_IDS.subtissue, + ADATA_IDS.normalization, + ADATA_IDS.dev_stage, + ADATA_IDS.has_celltypes, "mapped_features" ] for k in list(adata.uns.keys()): @@ -596,7 +627,7 @@ def adata(self): # To preserve gene names in .var, the target gene names are copied into var_names and are then copied # back into .var. for adata in adata_ls: - adata.var.index = adata.var["ensembl"].tolist() + adata.var.index = adata.var[ADATA_IDS.gene_id_ensembl].tolist() if len(adata_ls) > 1: # TODO: need to keep this? -> yes, still catching errors here (March 2020) # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. @@ -604,18 +635,18 @@ def adata(self): adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key='dataset', + batch_key=ADATA_IDS.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) except ValueError: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key='dataset', + batch_key=ADATA_IDS.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) - adata_concat.var["ensembl"] = adata_concat.var.index + adata_concat.var[ADATA_IDS.gene_id_ensembl] = adata_concat.var.index if len(set([a.uns['mapped_features'] for a in adata_ls])) == 1: adata_concat.uns['mapped_features'] = adata_ls[0].uns['mapped_features'] @@ -623,7 +654,7 @@ def adata(self): adata_concat.uns['mapped_features'] = False else: adata_concat = adata_ls[0] - adata_concat.obs['dataset'] = self.ids[0] + adata_concat.obs[ADATA_IDS.dataset] = self.ids[0] adata_concat.var_names_make_unique() return adata_concat @@ -644,7 +675,7 @@ def obs_concat(self, keys: Union[list, None] = None): (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns else (k, ["nan" for i in range(self.datasets[x].adata.obs.shape[0])]) for k in keys - ] + [("dataset", [x for i in range(self.datasets[x].adata.obs.shape[0])])] + ] + [(ADATA_IDS.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat @@ -780,7 +811,7 @@ def load_all( self.adata = self.dataset_groups[i].adata.concatenate( *[x.adata for x in self.dataset_groups[1:] if x is not None], join="outer", - batch_key='dataset_group' + batch_key=ADATA_IDS.dataset_group ) def load_all_tobacked( @@ -832,17 +863,17 @@ def load_all_tobacked( if not as_dense: self.adata.X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse keys = [ - "lab", - "year", - "protocol", - "organ", - "subtissue", - "cell_ontology_class", - "state_exact", - "counts", - "dev_stage", - "has_celltypes", - "dataset" + ADATA_IDS.lab, + ADATA_IDS.year, + ADATA_IDS.protocol, + ADATA_IDS.organ, + ADATA_IDS.subtissue, + ADATA_IDS.cell_ontology_class, + ADATA_IDS.state_exact, + ADATA_IDS.normalization, + ADATA_IDS.dev_stage, + ADATA_IDS.has_celltypes, + ADATA_IDS.dataset ] if scatter_update: self.adata.obs = pandas.DataFrame({ diff --git a/sfaira/data/external.py b/sfaira/data/external.py index b7f8e0ee3..97f4c333b 100644 --- a/sfaira/data/external.py +++ b/sfaira/data/external.py @@ -1 +1,2 @@ from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.consts import ADATA_IDS From 8aa4ef6e4e10d63e93a1255098692d9b32d9480e Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 14:19:30 +0100 Subject: [PATCH 004/161] updated cellxgene data loader to use refactored constants for adata fields --- sfaira/consts.py | 148 ++++++++++++++++++++++ sfaira/data/databases/cellxgene_loader.py | 53 ++++---- sfaira/data/databases/external.py | 1 + 3 files changed, 177 insertions(+), 25 deletions(-) diff --git a/sfaira/consts.py b/sfaira/consts.py index ce9404987..1b4f980db 100644 --- a/sfaira/consts.py +++ b/sfaira/consts.py @@ -3,6 +3,30 @@ class ADATA_IDS: """ Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ + _age: str + _animal: str + _cell_types_original: str + _cell_ontology_class: str + _cell_ontology_id: str + _dev_stage: str + _doi: str + _dataset: str + _dataset_group: str + _ethnicity: str + _gene_id_ensembl: str + _gene_id_names: str + _has_celltypes: str + _healthy: str + _id: str + _normalization: str + _lab: str + _organ: str + _protocol: str + _sex: str + _state_exact: str + _subtissue: str + _wget_download: str + _year: str def __init__(self): self._age = "age" @@ -14,6 +38,7 @@ def __init__(self): self._doi = "doi" self._dataset = "dataset" self._dataset_group = "dataset_group" + self._ethnicity = "ethnicity" self._gene_id_ensembl = "ensembl" self._gene_id_names = "names" self._has_celltypes = "has_celltypes" @@ -23,6 +48,7 @@ def __init__(self): self._lab = "lab" self._organ = "organ" self._protocol = "protocol" + self._sex = "sex" self._state_exact = "state_exact" self._subtissue = "subtissue" self._wget_download = "wget_download" @@ -64,6 +90,10 @@ def dev_stage(self): def doi(self): return self._doi + @property + def ethnicity(self): + return self._ethnicity + @property def gene_id_ensembl(self): return self._gene_id_ensembl @@ -100,6 +130,10 @@ def protocol(self): def organ(self): return self._organ + @property + def sex(self): + return self._sex + @property def subtissue(self): return self._subtissue @@ -116,3 +150,117 @@ def wget_download(self): def year(self): return self._year + +class ADATA_IDS_CELLXGENE: + """ + Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene + objects. + """ + _age: str + _animal: str + _author: str + _author_names: str + _cell_types_original: str + _cell_ontology_class: str + _cell_ontology_id: str + _dev_stage: str + _dataset: str + _dataset_group: str + _disease: str + _disease_state_healthy: str + _ethnicity: str + _gene_id_ensembl: str + _gene_id_names: str + _protocol: str + _sex: str + + def __init__(self): + self._age = "age" + self._animal = "organism" + self._author = "contributors" + self._author_names = "names" + self._cell_types_original = "free_annotation" + self._cell_ontology_class = "cell_type" + self._cell_ontology_id = "cell_type_ontology_term_id" + self._dataset = "dataset" + self._dataset_group = "dataset_group" + self._dev_stage = "development_stage" + self._disease = "disease" + self._disease_state_healthy = "normal" + self._ethnicity = "ethnicity" + self._gene_id_ensembl = "name" + self._gene_id_names = "ensembl" + self._protocol = "assay" + self._sex = "sex" + + @property + def age(self): + return self._age + + @property + def animal(self): + return self._animal + + @property + def author(self): + return self._author + + @property + def author_names(self): + return self._author_names + + @property + def cell_types_original(self): + return self._cell_types_original + + @property + def cell_ontology_class(self): + return self._cell_ontology_class + + @property + def cell_ontology_id(self): + return self._cell_ontology_id + + @property + def dataset(self): + return self._dataset + + @property + def dataset_group(self): + return self._dataset_group + + @property + def dev_stage(self): + return self._dev_stage + + @property + def disease(self): + return self._disease + + @property + def disease_state_healthy(self): + return self._disease_state_healthy + + @property + def ethnicity(self): + return self._ethnicity + + @property + def gene_id_ensembl(self): + return self._gene_id_ensembl + + @property + def gene_id_names(self): + return self._gene_id_names + + + @property + def protocol(self): + return self._protocol + + @property + def sex(self): + return self._sex + + + diff --git a/sfaira/data/databases/cellxgene_loader.py b/sfaira/data/databases/cellxgene_loader.py index a8e1b840e..2f99311f6 100644 --- a/sfaira/data/databases/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene_loader.py @@ -1,10 +1,9 @@ +import anndata import numpy as np import os from typing import Union from .external import DatasetBase -import anndata - -from .external import DatasetGroupBase +from .external import ADATA_IDS, ADATA_IDS_CELLXGENE class Dataset(DatasetBase): @@ -41,35 +40,39 @@ def _load(self, fn=None): adata = anndata.read(fn) adata.X = adata.raw.X - self.adata.uns["lab"] = adata.uns["contributors"]["name"] - self.adata.uns["year"] = None - self.adata.uns["doi"] = None # TODO - if len(np.unique(adata.obs["organism"].values)) > 1: + self.adata.uns[ADATA_IDS.lab] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] + self.adata.uns[ADATA_IDS.year] = None + self.adata.uns[ADATA_IDS.doi] = None # TODO + if len(np.unique(adata.obs[ADATA_IDS.animal].values)) > 1: raise Warning("found multiple assay in data set %s" % self.fn) - self.adata.uns["protocol"] = adata.obs["assay"].values[0] + self.adata.uns[ADATA_IDS.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] # Select tissue: blood is handled as a separate tissue in .obs #if len(np.unique(adata.obs["tissue"].values)) > 1: # raise Warning("found multiple tissue in data set %s" % self.fn) #self.adata.uns["organ"] = adata.obs["tissue"].values[0] - self.adata.uns["organ"] = str(self.fn).split("_")[3] - if len(np.unique(adata.obs["organism"].values)) > 1: + self.adata.uns[ADATA_IDS.organ] = str(self.fn).split("_")[3] + if len(np.unique(adata.obs[ADATA_IDS.animal].values)) > 1: raise Warning("found multiple organisms in data set %s" % self.fn) - self.adata.uns["animal"] = adata.obs["organism"].values[0] - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' + self.adata.uns[ADATA_IDS.animal] = adata.obs[ADATA_IDS_CELLXGENE.animal].values[0] + self.adata.uns[ADATA_IDS.id] = self.id + self.adata.uns[ADATA_IDS.wget_download] = self.download_website + self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs["subtissue"] = self.sub_tissue - self.adata.obs["dev_stage"] = adata.obs["development_stage"].values - self.adata.obs["sex"] = adata.obs["sex"].values - self.adata.obs["ethnicity"] = adata.obs["ethnicity"].values - self.adata.obs["healthy"] = adata.obs["disease"].values == "normal" - self.adata.obs["state_exact"] = adata.obs["disease"].values + self.adata.obs[ADATA_IDS.subtissue] = self.sub_tissue + self.adata.obs[ADATA_IDS.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values + self.adata.obs[ADATA_IDS.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values + self.adata.obs[ADATA_IDS.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values + self.adata.obs[ADATA_IDS.healthy] = adata.obs[ADATA_IDS_CELLXGENE.disease].values == ADATA_IDS_CELLXGENE.disease_state_healthy + self.adata.obs[ADATA_IDS.state_exact] = adata.obs[ADATA_IDS_CELLXGENE.disease].values - self.adata.obs["cell_ontology_id"] = adata.obs["cell_type_ontology_term_id"].values.tolist() - self.adata.obs["cell_ontology_class"] = adata.obs["cell_type"].values.tolist() - self.adata.obs["cell_types_original"] = adata.obs["free_annotation"].values.tolist() + self.adata.obs[ADATA_IDS.cell_ontology_id] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_id].values.tolist() + self.adata.obs[ADATA_IDS.cell_ontology_class] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS.cell_types_original] = adata.obs[ADATA_IDS_CELLXGENE.cell_types_original].values.tolist() - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') + self._convert_and_set_var_names( + symbol_col=ADATA_IDS_CELLXGENE.gene_id_names, + ensembl_col=ADATA_IDS_CELLXGENE.gene_id_ensembl, + new_index=ADATA_IDS_CELLXGENE.gene_id_ensembl + ) diff --git a/sfaira/data/databases/external.py b/sfaira/data/databases/external.py index 9f4e3db68..6d7d44bbf 100644 --- a/sfaira/data/databases/external.py +++ b/sfaira/data/databases/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS, ADATA_IDS_CELLXGENE From 2a145fdcb2f265f9895a84b77692b6bb8749ed02 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 14:29:44 +0100 Subject: [PATCH 005/161] updated missing refactored gene id fields in data loaders --- .../data/human/adipose/human_adipose_2020_microwell_han_001.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_001.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_002.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_003.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_004.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_005.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_006.py | 2 +- sfaira/data/human/artery/human_artery_2020_microwell_han_001.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_001.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_002.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_003.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_001.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_002.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_003.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_004.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_005.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_006.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_007.py | 2 +- sfaira/data/human/bone/human_bone_2020_microwell_han_001.py | 2 +- sfaira/data/human/bone/human_bone_2020_microwell_han_002.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_001.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_002.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_003.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_004.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_005.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_006.py | 2 +- .../human/calvaria/human_calvaria_2020_microwell_han_001.py | 2 +- sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py | 2 +- .../human_chorionicvillus_2020_microwell_han_001.py | 2 +- sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_001.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_002.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_003.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_004.py | 2 +- .../human/duodenum/human_duodenum_2020_microwell_han_001.py | 2 +- .../human/epityphlon/human_epityphlon_2020_microwell_han_001.py | 2 +- .../human/esophagus/human_esophagus_2020_microwell_han_001.py | 2 +- .../human/esophagus/human_esophagus_2020_microwell_han_002.py | 2 +- sfaira/data/human/eye/human_eye_2020_microwell_han_001.py | 2 +- .../fallopiantube/human_fallopiantube_2020_microwell_han_001.py | 2 +- .../femalegonad/human_femalegonad_2020_microwell_han_001.py | 2 +- .../femalegonad/human_femalegonad_2020_microwell_han_002.py | 2 +- .../gallbladder/human_gallbladder_2020_microwell_han_001.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_001.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_002.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_003.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_004.py | 2 +- sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py | 2 +- sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py | 2 +- .../data/human/jejunum/human_jejunum_2020_microwell_han_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_001.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_002.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_003.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_004.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_005.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_001.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_002.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_003.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_004.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_005.py | 2 +- .../human/malegonad/human_malegonad_2020_microwell_han_001.py | 2 +- .../human/malegonad/human_malegonad_2020_microwell_han_002.py | 2 +- sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py | 2 +- sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_001.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_002.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_003.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_001.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_002.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_003.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_004.py | 2 +- .../human/placenta/human_placenta_2018_10x_ventotormo_001.py | 2 +- .../placenta/human_placenta_2018_smartseq2_ventotormo_001.py | 2 +- .../human/placenta/human_placenta_2020_microwell_han_001.py | 2 +- sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py | 2 +- .../human/prostate/human_prostate_2020_microwell_han_001.py | 2 +- sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py | 2 +- sfaira/data/human/rib/human_rib_2020_microwell_han_001.py | 2 +- sfaira/data/human/rib/human_rib_2020_microwell_han_002.py | 2 +- sfaira/data/human/skin/human_skin_2020_microwell_han_001.py | 2 +- sfaira/data/human/skin/human_skin_2020_microwell_han_002.py | 2 +- .../human/spinalcord/human_spinalcord_2020_microwell_han_001.py | 2 +- sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py | 2 +- sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_001.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_002.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_003.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_004.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_005.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_006.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_007.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_008.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_009.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_010.py | 2 +- sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py | 2 +- sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py | 2 +- .../data/human/thyroid/human_thyroid_2020_microwell_han_001.py | 2 +- .../data/human/thyroid/human_thyroid_2020_microwell_han_002.py | 2 +- .../data/human/trachea/human_trachea_2020_microwell_han_001.py | 2 +- sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py | 2 +- sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py | 2 +- .../data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py | 2 +- 110 files changed, 110 insertions(+), 110 deletions(-) diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 8e30b8219..9de2fdf75 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 9f2f2abc7..1c14752b0 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 7fd2962ee..b2f4f1110 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index b84a42735..f35148c4a 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 97a4e963a..d69eb2c07 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 86b16cb6e..8476743d6 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 2818399b4..745b38862 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index f45d4eadf..1dbfbce69 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index f1b75c65e..4c4562434 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 570145175..9767f3b5f 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index f13a35aa4..77c485d57 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index ca485bb3e..a92898424 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index c621d8d69..6b2940b68 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 90b9de279..f9fc685b9 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index f666730d8..c54ae255e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 405fd7232..8fa8c9a9a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index 2dfbf824e..a7bc6b690 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index f6c9988d7..ab033d684 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index 8c1dc77c2..8b7d9bebb 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 5729b893c..fbc20f730 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index b663b1a56..a0bb0a12d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 450e4ba7e..120e5a2f0 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 390605718..1d334f4bb 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index 17ba2ad33..d67d77caa 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index edbe29c8a..c21f57b48 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index 2eecc8d91..8773fe541 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index f103428b7..c9b963260 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 6401d36fa..8da363801 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index 3d10a937e..e6cdc5c47 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index 7a607b32c..89b8b3241 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -147,4 +147,4 @@ def _load(self, fn=None): self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs[ADATA_IDS.state_exact]\ .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index ac4db7ecf..4a2d3ae56 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 46fbda2d6..555decf66 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index a928ff480..59cb160e7 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 0df344dcc..36f84e4bf 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 12dbae9cc..7802950a0 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index 2dd5617a7..a467adeab 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 4b2b9ee4a..d69e823b5 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -82,4 +82,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 2c8e22d6c..0362fa385 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -82,4 +82,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index 51c88c859..89cae1dd7 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -81,5 +81,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index eb635aee1..10138616c 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 7566194d8..29e23e826 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 79da1d4ac..0ae5a7cc6 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index dd3476d37..4bf294880 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index c38755a8d..90b917f60 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 94a9ac30f..9d0ec7791 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 59a34f92e..70f3370ae 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index e947faf65..1e4d0f55b 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index 8eeaa4f45..9bea6a980 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 98b87b328..5cc1b544a 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -84,5 +84,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index ec836f332..710bb6b30 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -57,4 +57,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index 1bd2c52e0..b2eec4be5 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -112,4 +112,4 @@ def _load(self, fn=None): self.adata.obs[ADATA_IDS.healthy] = True self.adata.obs[ADATA_IDS.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 6a5590593..18757a74a 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index e4b0943cd..3b6915dcd 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 9db23183b..859553d7f 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 729555170..1f1949771 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 15c6c89d9..112849f12 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index b6a90b195..e6e2cf1fd 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 4ac0e9ba2..9d6b6f1c7 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index c1a2edc26..353aca5bb 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index b6e8e437f..25c6ccf83 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 03f612568..42601d1b4 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index 843a3743e..cd2c95b29 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 7a8359857..4f2347383 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 9805e8aa0..41ac5376a 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 96a2a1e9e..52130956a 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 3cd8e4a48..e6522c3d8 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index c75e9ed28..9d8782c6a 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 631af8494..5f364a81c 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index 56934ea9a..fa39456ff 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -86,5 +86,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 17359506d..74a837ed5 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -86,5 +86,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index 5285a2236..ddd1ec9e1 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 9868bf9f7..9d7acada6 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 2d5cbfdd1..b8886f7f8 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index ca75e6835..8f14ba488 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index c142f5802..cd413eecd 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index a4d3e1c6a..87fef7d0a 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 8b8db477d..478bfcb4e 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index a1972f90e..6a26abd06 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 623a6bc42..0718acdbb 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 01211766e..fc6907aea 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -106,7 +106,7 @@ def _load(self, fn=None): self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 6fa0eca75..5c9d84a7f 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -106,7 +106,7 @@ def _load(self, fn=None): self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index ea19ecc32..7cbf9bc92 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -89,4 +89,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 420ff7854..7e87b61fa 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 9583d2c60..e4643c1f2 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -78,5 +78,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index 9869ddae5..b43bedc49 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -73,4 +73,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index b0501b60d..1fbe46715 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index 23db36fdc..c60be9e12 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 1e8e0d6fa..638a1db5d 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -87,5 +87,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 6e8684d5e..205a5231b 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -87,5 +87,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 69c2b56e5..20c1e9bc9 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index af67bf446..84a84cbf8 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -79,4 +79,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 3bba3ead9..ce8806721 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -79,4 +79,4 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 733a55869..d13e95e29 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index 03fc54bb9..404af4d2c 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index b6aaad868..efdded1e6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index cd9154bd9..2a9a5cdb6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index 604ab45c0..de4bc04ce 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 4c8c810b9..b8473e20d 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index cd9b06a1f..13f6903ab 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index ac902dcc0..18e76408a 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index c122528ca..f1985a4e4 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index da4e8dd33..ce6b94e98 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 9f9a7834b..9da737f92 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -71,5 +71,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index fd15c0948..07f9bc445 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -71,5 +71,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 67b3b48f9..bdba21705 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 4873c054c..0cf4a1b2d 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index 25eaf80d0..3a1fd4e23 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index e6edef218..46efc85ac 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index f6d95e932..d0d047f5e 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 191ee0821..7e844a29e 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -65,7 +65,7 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) From d70f5240d6aebe1bbbe9e61f60610c3f404d88cd Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 18:16:37 +0100 Subject: [PATCH 006/161] refactored adata fields constant container classes to reflect core shared features --- sfaira/api/consts.py | 2 +- sfaira/consts/__init__.py | 1 + sfaira/{consts.py => consts/adata_fields.py} | 227 +++++++----------- sfaira/data/base.py | 138 +++++------ sfaira/data/databases/cellxgene_loader.py | 7 +- sfaira/data/databases/external.py | 3 +- sfaira/data/external.py | 2 +- sfaira/data/human/adipose/external.py | 2 +- .../human_adipose_2020_microwell_han_001.py | 4 +- sfaira/data/human/adrenalgland/external.py | 2 +- ...man_adrenalgland_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_002.py | 4 +- ...man_adrenalgland_2020_microwell_han_003.py | 4 +- ...man_adrenalgland_2020_microwell_han_004.py | 4 +- ...man_adrenalgland_2020_microwell_han_005.py | 4 +- ...man_adrenalgland_2020_microwell_han_006.py | 4 +- sfaira/data/human/artery/external.py | 2 +- .../human_artery_2020_microwell_han_001.py | 4 +- sfaira/data/human/bladder/external.py | 2 +- .../human_bladder_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_002.py | 4 +- .../human_bladder_2020_microwell_han_003.py | 4 +- sfaira/data/human/blood/external.py | 2 +- .../blood/human_blood_2018_10x_ica_001.py | 4 +- .../human_blood_2019_10x_10xGenomics_001.py | 4 +- .../human_blood_2020_microwell_han_001.py | 4 +- .../human_blood_2020_microwell_han_002.py | 4 +- .../human_blood_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_004.py | 4 +- .../human_blood_2020_microwell_han_005.py | 4 +- .../human_blood_2020_microwell_han_006.py | 4 +- .../human_blood_2020_microwell_han_007.py | 4 +- sfaira/data/human/bone/external.py | 2 +- .../human/bone/human_bone_2018_10x_ica_001.py | 4 +- .../bone/human_bone_2020_microwell_han_001.py | 4 +- .../bone/human_bone_2020_microwell_han_002.py | 4 +- sfaira/data/human/brain/external.py | 2 +- .../human_brain_2017_DroNcSeq_habib_001.py | 4 +- .../human_brain_2020_microwell_han_001.py | 4 +- .../human_brain_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_003.py | 4 +- .../human_brain_2020_microwell_han_004.py | 4 +- .../human_brain_2020_microwell_han_005.py | 4 +- .../human_brain_2020_microwell_han_006.py | 4 +- sfaira/data/human/calvaria/external.py | 2 +- .../human_calvaria_2020_microwell_han_001.py | 4 +- sfaira/data/human/cervix/external.py | 2 +- .../human_cervix_2020_microwell_han_001.py | 4 +- sfaira/data/human/chorionicvillus/external.py | 2 +- ..._chorionicvillus_2020_microwell_han_001.py | 4 +- sfaira/data/human/colon/external.py | 2 +- .../colon/human_colon_2019_10x_kinchen_001.py | 4 +- .../colon/human_colon_2019_10x_smilie_001.py | 4 +- .../colon/human_colon_2019_10x_wang_001.py | 4 +- .../colon/human_colon_2020_10x_james_001.py | 4 +- .../human_colon_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_002.py | 4 +- .../human_colon_2020_microwell_han_003.py | 4 +- .../human_colon_2020_microwell_han_004.py | 4 +- sfaira/data/human/duodenum/external.py | 2 +- .../human_duodenum_2020_microwell_han_001.py | 4 +- sfaira/data/human/epityphlon/external.py | 2 +- ...human_epityphlon_2020_microwell_han_001.py | 4 +- sfaira/data/human/esophagus/external.py | 2 +- .../human_esophagus_2019_10x_madissoon_001.py | 4 +- .../human_esophagus_2020_microwell_han_001.py | 4 +- .../human_esophagus_2020_microwell_han_002.py | 4 +- sfaira/data/human/eye/external.py | 2 +- .../eye/human_eye_2019_10x_lukowski_001.py | 4 +- .../human/eye/human_eye_2019_10x_menon_001.py | 4 +- .../human/eye/human_eye_2019_10x_voigt_001.py | 4 +- .../eye/human_eye_2020_microwell_han_001.py | 4 +- sfaira/data/human/fallopiantube/external.py | 2 +- ...an_fallopiantube_2020_microwell_han_001.py | 4 +- sfaira/data/human/femalegonad/external.py | 2 +- ...uman_femalegonad_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_002.py | 4 +- sfaira/data/human/gallbladder/external.py | 2 +- ...uman_gallbladder_2020_microwell_han_001.py | 4 +- sfaira/data/human/heart/external.py | 2 +- .../human_heart_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_002.py | 4 +- .../human_heart_2020_microwell_han_003.py | 4 +- .../human_heart_2020_microwell_han_004.py | 4 +- sfaira/data/human/hesc/external.py | 2 +- .../hesc/human_hesc_2020_microwell_han_001.py | 4 +- sfaira/data/human/ileum/external.py | 2 +- .../ileum/human_ileum_2019_10x_martin_001.py | 4 +- .../ileum/human_ileum_2019_10x_wang_001.py | 4 +- .../human_ileum_2020_microwell_han_001.py | 4 +- sfaira/data/human/jejunum/external.py | 2 +- .../human_jejunum_2020_microwell_han_001.py | 4 +- sfaira/data/human/kidney/external.py | 2 +- .../human_kidney_2019_10xSn_lake_001.py | 4 +- .../human_kidney_2019_10x_stewart_001.py | 4 +- .../kidney/human_kidney_2020_10x_liao_001.py | 4 +- .../human_kidney_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_002.py | 4 +- .../human_kidney_2020_microwell_han_003.py | 4 +- .../human_kidney_2020_microwell_han_004.py | 4 +- .../human_kidney_2020_microwell_han_005.py | 4 +- .../human_kidney_2020_microwell_han_006.py | 4 +- .../human_kidney_2020_microwell_han_007.py | 4 +- sfaira/data/human/liver/external.py | 2 +- .../human_liver_2018_10x_macparland_001.py | 4 +- .../liver/human_liver_2019_10x_popescu_001.py | 4 +- .../human_liver_2019_10x_ramachandran_001.py | 4 +- .../human_liver_2019_mCELSeq2_aizarani_001.py | 4 +- .../human_liver_2020_microwell_han_001.py | 4 +- .../human_liver_2020_microwell_han_002.py | 4 +- .../human_liver_2020_microwell_han_003.py | 4 +- .../human_liver_2020_microwell_han_004.py | 4 +- .../human_liver_2020_microwell_han_005.py | 4 +- sfaira/data/human/lung/external.py | 2 +- .../lung/human_lung_2019_10x_braga_001.py | 4 +- .../lung/human_lung_2019_10x_braga_002.py | 4 +- .../lung/human_lung_2019_10x_madissoon_001.py | 4 +- .../lung/human_lung_2019_dropseq_braga_003.py | 4 +- .../lung/human_lung_2020_10x_habermann_001.py | 4 +- .../lung/human_lung_2020_10x_lukassen_001.py | 4 +- .../lung/human_lung_2020_10x_lukassen_002.py | 4 +- .../lung/human_lung_2020_10x_miller_001.py | 4 +- .../human_lung_2020_10x_travaglini_001.py | 4 +- .../lung/human_lung_2020_microwell_han_001.py | 4 +- .../lung/human_lung_2020_microwell_han_002.py | 4 +- .../lung/human_lung_2020_microwell_han_003.py | 4 +- .../lung/human_lung_2020_microwell_han_004.py | 4 +- .../lung/human_lung_2020_microwell_han_005.py | 4 +- ...uman_lung_2020_smartseq2_travaglini_002.py | 4 +- sfaira/data/human/malegonad/external.py | 2 +- .../human_malegonad_2018_10x_guo_001.py | 4 +- .../human_malegonad_2020_microwell_han_001.py | 4 +- .../human_malegonad_2020_microwell_han_002.py | 4 +- sfaira/data/human/mixed/external.py | 2 +- .../mixed/human_mixed_2019_10x_szabo_001.py | 4 +- sfaira/data/human/muscle/external.py | 2 +- .../human_muscle_2020_microwell_han_001.py | 4 +- .../human_muscle_2020_microwell_han_002.py | 4 +- sfaira/data/human/omentum/external.py | 2 +- .../human_omentum_2020_microwell_han_001.py | 4 +- .../human_omentum_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_003.py | 4 +- sfaira/data/human/pancreas/external.py | 2 +- .../human_pancreas_2016_indrop_baron_001.py | 4 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 4 +- .../human_pancreas_2017_smartseq2_enge_001.py | 4 +- .../human_pancreas_2020_microwell_han_001.py | 4 +- .../human_pancreas_2020_microwell_han_002.py | 4 +- .../human_pancreas_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_004.py | 4 +- sfaira/data/human/placenta/external.py | 2 +- .../human_placenta_2018_10x_ventotormo_001.py | 4 +- ..._placenta_2018_smartseq2_ventotormo_001.py | 4 +- .../human_placenta_2020_microwell_han_001.py | 4 +- sfaira/data/human/pleura/external.py | 2 +- .../human_pleura_2020_microwell_han_001.py | 4 +- sfaira/data/human/prostate/external.py | 2 +- .../human_prostate_2018_10x_henry_001.py | 4 +- .../human_prostate_2020_microwell_han_001.py | 4 +- sfaira/data/human/rectum/external.py | 2 +- .../rectum/human_rectum_2019_10x_wang_001.py | 4 +- .../human_rectum_2020_microwell_han_001.py | 4 +- sfaira/data/human/rib/external.py | 2 +- .../rib/human_rib_2020_microwell_han_001.py | 4 +- .../rib/human_rib_2020_microwell_han_002.py | 4 +- sfaira/data/human/skin/external.py | 2 +- .../skin/human_skin_2020_microwell_han_001.py | 4 +- .../skin/human_skin_2020_microwell_han_002.py | 4 +- sfaira/data/human/spinalcord/external.py | 2 +- ...human_spinalcord_2020_microwell_han_001.py | 4 +- sfaira/data/human/spleen/external.py | 2 +- .../human_spleen_2019_10x_madissoon_001.py | 4 +- .../human_spleen_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_002.py | 4 +- sfaira/data/human/stomach/external.py | 2 +- .../human_stomach_2020_microwell_han_001.py | 4 +- .../human_stomach_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_003.py | 4 +- .../human_stomach_2020_microwell_han_004.py | 4 +- .../human_stomach_2020_microwell_han_005.py | 4 +- .../human_stomach_2020_microwell_han_006.py | 4 +- .../human_stomach_2020_microwell_han_007.py | 4 +- .../human_stomach_2020_microwell_han_008.py | 4 +- .../human_stomach_2020_microwell_han_009.py | 4 +- .../human_stomach_2020_microwell_han_010.py | 4 +- sfaira/data/human/thymus/external.py | 2 +- .../thymus/human_thymus_2020_10x_park_001.py | 4 +- .../human_thymus_2020_microwell_han_001.py | 4 +- .../human_thymus_2020_microwell_han_002.py | 4 +- sfaira/data/human/thyroid/external.py | 2 +- .../human_thyroid_2020_microwell_han_001.py | 4 +- .../human_thyroid_2020_microwell_han_002.py | 4 +- sfaira/data/human/trachea/external.py | 2 +- .../human_trachea_2020_microwell_han_001.py | 4 +- sfaira/data/human/ureter/external.py | 2 +- .../human_ureter_2020_microwell_han_001.py | 4 +- sfaira/data/human/uterus/external.py | 2 +- .../human_uterus_2020_microwell_han_001.py | 4 +- sfaira/data/mouse/bladder/external.py | 2 +- .../mouse_bladder_2018_microwell_han_001.py | 4 +- .../mouse_bladder_2019_10x_pisco_001.py | 4 +- .../mouse_bladder_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/brain/external.py | 2 +- .../mouse_brain_2018_microwell_han_001.py | 4 +- .../mouse_brain_2018_microwell_han_002.py | 4 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 4 +- .../mouse_brain_2019_smartseq2_pisco_001.py | 4 +- .../mouse_brain_2019_smartseq2_pisco_002.py | 4 +- sfaira/data/mouse/diaphragm/external.py | 2 +- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/fat/external.py | 2 +- .../mouse/fat/mouse_fat_2019_10x_pisco_001.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_001.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_002.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_003.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_004.py | 4 +- sfaira/data/mouse/heart/external.py | 2 +- .../heart/mouse_heart_2019_10x_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_002.py | 4 +- sfaira/data/mouse/kidney/external.py | 2 +- .../mouse_kidney_2018_microwell_han_001.py | 4 +- .../mouse_kidney_2018_microwell_han_002.py | 4 +- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 4 +- .../mouse_kidney_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/large_intestine/external.py | 2 +- ...ouse_large_intestine_2019_10x_pisco_001.py | 4 +- ...arge_intestine_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/limb_muscle/external.py | 2 +- ...ouse_limb_muscle_2018_microwell_han_001.py | 4 +- .../mouse_limb_muscle_2019_10x_pisco_001.py | 4 +- ...se_limb_muscle_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/liver/external.py | 2 +- .../mouse_liver_2018_microwell_han_001.py | 4 +- .../mouse_liver_2018_microwell_han_002.py | 4 +- .../liver/mouse_liver_2019_10x_pisco_001.py | 4 +- .../mouse_liver_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/lung/external.py | 2 +- .../lung/mouse_lung_2018_microwell_han_001.py | 4 +- .../lung/mouse_lung_2018_microwell_han_002.py | 4 +- .../lung/mouse_lung_2018_microwell_han_003.py | 4 +- .../lung/mouse_lung_2019_10x_pisco_001.py | 4 +- .../mouse_lung_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/mammary_gland/external.py | 2 +- ...se_mammary_gland_2018_microwell_han_001.py | 4 +- ...se_mammary_gland_2018_microwell_han_002.py | 4 +- ...se_mammary_gland_2018_microwell_han_003.py | 4 +- ...se_mammary_gland_2018_microwell_han_004.py | 4 +- .../mouse_mammary_gland_2019_10x_pisco_001.py | 4 +- ..._mammary_gland_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/marrow/external.py | 2 +- .../marrow/mouse_marrow_2018_microwell_001.py | 4 +- .../marrow/mouse_marrow_2019_10x_pisco_001.py | 4 +- .../mouse_marrow_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/ovary/external.py | 2 +- .../mouse_ovary_2018_microwell_han_001.py | 4 +- .../mouse_ovary_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/pancreas/external.py | 2 +- .../mouse_pancreas_2018_microwell_han_001.py | 4 +- .../mouse_pancreas_2019_10x_pisco_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_002.py | 4 +- .../mouse_pancreas_2019_10x_thompson_003.py | 4 +- .../mouse_pancreas_2019_10x_thompson_004.py | 4 +- .../mouse_pancreas_2019_10x_thompson_005.py | 4 +- .../mouse_pancreas_2019_10x_thompson_006.py | 4 +- .../mouse_pancreas_2019_10x_thompson_007.py | 4 +- .../mouse_pancreas_2019_10x_thompson_008.py | 4 +- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 4 +- .../data/mouse/peripheral_blood/external.py | 2 +- ...peripheral_blood_2018_microwell_han_001.py | 4 +- ...peripheral_blood_2018_microwell_han_002.py | 4 +- ...peripheral_blood_2018_microwell_han_003.py | 4 +- ...peripheral_blood_2018_microwell_han_004.py | 4 +- ...peripheral_blood_2018_microwell_han_005.py | 4 +- sfaira/data/mouse/placenta/external.py | 2 +- .../mouse_placenta_2018_microwell_han_001.py | 4 +- .../mouse_placenta_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/prostate/external.py | 2 +- .../mouse_prostate_2018_microwell_han_001.py | 4 +- .../mouse_prostate_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/rib/external.py | 2 +- .../rib/mouse_rib_2018_microwell_han_001.py | 4 +- .../rib/mouse_rib_2018_microwell_han_002.py | 4 +- .../rib/mouse_rib_2018_microwell_han_003.py | 4 +- sfaira/data/mouse/skin/external.py | 2 +- .../skin/mouse_skin_2019_10x_pisco_001.py | 4 +- .../mouse_skin_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/small_intestine/external.py | 2 +- ..._small_intestine_2018_microwell_han_001.py | 4 +- ..._small_intestine_2018_microwell_han_002.py | 4 +- ..._small_intestine_2018_microwell_han_003.py | 4 +- sfaira/data/mouse/spleen/external.py | 2 +- .../mouse_spleen_2018_microwell_han_001.py | 4 +- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 4 +- .../mouse_spleen_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/stomach/external.py | 2 +- .../mouse_stomach_2018_microwell_han_001.py | 4 +- sfaira/data/mouse/testis/external.py | 2 +- .../mouse_testis_2018_microwell_han_001.py | 4 +- .../mouse_testis_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/thymus/external.py | 2 +- .../mouse_thymus_2018_microwell_han_001.py | 4 +- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 4 +- .../mouse_thymus_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/tongue/external.py | 2 +- .../tongue/mouse_tongue_2019_10x_pisco_001.py | 4 +- .../mouse_tongue_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/trachae/external.py | 2 +- .../mouse_trachea_2019_10x_pisco_001.py | 4 +- .../mouse_trachea_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/uterus/external.py | 2 +- .../mouse_uterus_2018_microwell_han_001.py | 4 +- .../mouse_uterus_2018_microwell_han_002.py | 4 +- 314 files changed, 707 insertions(+), 759 deletions(-) create mode 100644 sfaira/consts/__init__.py rename sfaira/{consts.py => consts/adata_fields.py} (73%) diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py index 45e9646bc..8e2dcfe64 100644 --- a/sfaira/api/consts.py +++ b/sfaira/api/consts.py @@ -1 +1 @@ -from sfaira.const import ADATA_IDS \ No newline at end of file +from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE \ No newline at end of file diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py new file mode 100644 index 000000000..68c8acb49 --- /dev/null +++ b/sfaira/consts/__init__.py @@ -0,0 +1 @@ +from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA diff --git a/sfaira/consts.py b/sfaira/consts/adata_fields.py similarity index 73% rename from sfaira/consts.py rename to sfaira/consts/adata_fields.py index 1b4f980db..ccc84e12e 100644 --- a/sfaira/consts.py +++ b/sfaira/consts/adata_fields.py @@ -1,67 +1,36 @@ -class ADATA_IDS: +class ADATA_IDS_BASE: """ - Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. + Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ - _age: str _animal: str + _author: str _cell_types_original: str _cell_ontology_class: str _cell_ontology_id: str - _dev_stage: str _doi: str _dataset: str _dataset_group: str - _ethnicity: str _gene_id_ensembl: str _gene_id_names: str _has_celltypes: str _healthy: str _id: str _normalization: str - _lab: str _organ: str _protocol: str - _sex: str - _state_exact: str _subtissue: str _wget_download: str _year: str - def __init__(self): - self._age = "age" - self._animal = "animal" - self._cell_types_original = "cell_types_original" - self._cell_ontology_class = "cell_ontology_class" - self._cell_ontology_id = "cell_ontology_id" - self._dev_stage = "dev_stage" - self._doi = "doi" - self._dataset = "dataset" - self._dataset_group = "dataset_group" - self._ethnicity = "ethnicity" - self._gene_id_ensembl = "ensembl" - self._gene_id_names = "names" - self._has_celltypes = "has_celltypes" - self._healthy = "healthy" - self._id = "id" - self._normalization = "normalization" - self._lab = "lab" - self._organ = "organ" - self._protocol = "protocol" - self._sex = "sex" - self._state_exact = "state_exact" - self._subtissue = "subtissue" - self._wget_download = "wget_download" - self._year = "year" - - @property - def age(self): - return self._age - @property def animal(self): return self._animal + @property + def author(self): + return self._author + @property def cell_types_original(self): return self._cell_types_original @@ -82,18 +51,10 @@ def dataset(self): def dataset_group(self): return self._dataset_group - @property - def dev_stage(self): - return self._dev_stage - @property def doi(self): return self._doi - @property - def ethnicity(self): - return self._ethnicity - @property def gene_id_ensembl(self): return self._gene_id_ensembl @@ -114,10 +75,6 @@ def healthy(self): def id(self): return self._id - @property - def lab(self): - return self._lab - @property def normalization(self): return self._normalization @@ -130,18 +87,10 @@ def protocol(self): def organ(self): return self._organ - @property - def sex(self): - return self._sex - @property def subtissue(self): return self._subtissue - @property - def state_exact(self): - return self._state_exact - @property def wget_download(self): return self._wget_download @@ -151,116 +100,114 @@ def year(self): return self._year -class ADATA_IDS_CELLXGENE: +class ADATA_IDS_EXTENDED(ADATA_IDS_BASE): """ - Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene - objects. + Base class with extended set of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ _age: str - _animal: str - _author: str - _author_names: str - _cell_types_original: str - _cell_ontology_class: str - _cell_ontology_id: str _dev_stage: str - _dataset: str - _dataset_group: str - _disease: str - _disease_state_healthy: str _ethnicity: str - _gene_id_ensembl: str - _gene_id_names: str - _protocol: str _sex: str - - def __init__(self): - self._age = "age" - self._animal = "organism" - self._author = "contributors" - self._author_names = "names" - self._cell_types_original = "free_annotation" - self._cell_ontology_class = "cell_type" - self._cell_ontology_id = "cell_type_ontology_term_id" - self._dataset = "dataset" - self._dataset_group = "dataset_group" - self._dev_stage = "development_stage" - self._disease = "disease" - self._disease_state_healthy = "normal" - self._ethnicity = "ethnicity" - self._gene_id_ensembl = "name" - self._gene_id_names = "ensembl" - self._protocol = "assay" - self._sex = "sex" + _state_exact: str @property def age(self): return self._age @property - def animal(self): - return self._animal - - @property - def author(self): - return self._author - - @property - def author_names(self): - return self._author_names + def dev_stage(self): + return self._dev_stage @property - def cell_types_original(self): - return self._cell_types_original + def ethnicity(self): + return self._ethnicity @property - def cell_ontology_class(self): - return self._cell_ontology_class + def sex(self): + return self._sex @property - def cell_ontology_id(self): - return self._cell_ontology_id + def state_exact(self): + return self._state_exact - @property - def dataset(self): - return self._dataset - @property - def dataset_group(self): - return self._dataset_group +class ADATA_IDS_SFAIRA(ADATA_IDS_EXTENDED): + """ + Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. + """ - @property - def dev_stage(self): - return self._dev_stage + def __init__(self): + self._animal = "animal" + self._cell_types_original = "cell_types_original" + self._cell_ontology_class = "cell_ontology_class" + self._cell_ontology_id = "cell_ontology_id" + self._doi = "doi" + self._dataset = "dataset" + self._dataset_group = "dataset_group" + self._gene_id_ensembl = "ensembl" + self._gene_id_names = "names" + self._has_celltypes = "has_celltypes" + self._healthy = "healthy" + self._id = "id" + self._normalization = "normalization" + self._lab = "lab" + self._organ = "organ" + self._protocol = "protocol" + self._subtissue = "subtissue" + self._wget_download = "wget_download" + self._year = "year" - @property - def disease(self): - return self._disease + self._age = "age" + self._dev_stage = "dev_stage" + self._ethnicity = "ethnicity" + self._sex = "sex" + self._state_exact = "state_exact" - @property - def disease_state_healthy(self): - return self._disease_state_healthy - @property - def ethnicity(self): - return self._ethnicity +class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): + """ + Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene + objects. + """ + _author_names: str + _disease_state_healthy: str - @property - def gene_id_ensembl(self): - return self._gene_id_ensembl + def __init__(self): + self._animal = "organism" + self._cell_types_original = "free_annotation" + self._cell_ontology_class = "cell_type" + self._cell_ontology_id = "cell_type_ontology_term_id" + self._doi = "" # TODO + self._dataset = "dataset" + self._dataset_group = "dataset_group" + self._gene_id_ensembl = "" # TODO + self._gene_id_names = "" # TODO + self._has_celltypes = "" # TODO + self._healthy = None # is inferred from _disease + self._id = "" # TODO + self._normalization = None # is always "counts" + self._lab = "" # TODO + self._organ = "" # TODO + self._protocol = "assay" + self._subtissue = "" # TODO + self._wget_download = "" # TODO + self._year = "" # TODO - @property - def gene_id_names(self): - return self._gene_id_names + self._age = "age" + self._author = "contributors" + self._dev_stage = "development_stage" + self._ethnicity = "ethnicity" + self._sex = "sex" + self._state_exact = "disease" + # selected element entries used for parsing: + self._disease_state_healthy = "normal" + self._author_names = "names" @property - def protocol(self): - return self._protocol + def author_names(self): + return self._author_names @property - def sex(self): - return self._sex - - - + def disease_state_healthy(self): + return self._disease_state_healthy diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 06dd8f72d..145fa5b3e 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -11,7 +11,7 @@ import warnings from .external import SuperGenomeContainer -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class DatasetBase(abc.ABC): @@ -95,12 +95,12 @@ def load( self._load(fn=fn) - if ADATA_IDS.cell_ontology_id not in self.adata.obs.columns: - self.adata.obs[ADATA_IDS.cell_ontology_id] = None + if ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[ADATA_IDS.cell_ontology_class].values, + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, celltype_version=celltype_version ) @@ -144,8 +144,8 @@ def load( self.adata.obs_names = obs_names self.adata.var_names = new_index_collapsed new_index = new_index_collapsed - self.adata.var[ADATA_IDS.gene_id_ensembl] = new_index - self.adata.var.index = self.adata.var[ADATA_IDS.gene_id_ensembl].values + self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index + self.adata.var.index = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values # Match feature space to a genomes provided with sfaira if match_to_reference: @@ -161,7 +161,7 @@ def load( raise ValueError("data type %s not recognized" % type(self.adata.X)) # Compute indices of genes to keep - data_ids = self.adata.var[ADATA_IDS.gene_id_ensembl].values + data_ids = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.genome_container.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -188,7 +188,7 @@ def load( obs=self.adata.obs, obsm=self.adata.obsm, var=pd.DataFrame(data={'names': self.genome_container.names, - ADATA_IDS.gene_id_ensembl: self.genome_container.ensembl}, + ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, index=self.genome_container.ensembl), uns=self.adata.uns ) @@ -199,30 +199,30 @@ def _convert_and_set_var_names( self, symbol_col: str = None, ensembl_col: str = None, - new_index: str = ADATA_IDS.gene_id_ensembl + new_index: str = ADATA_IDS_SFAIRA.gene_id_ensembl ): if symbol_col and ensembl_col: if symbol_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS.gene_id_names}, + {'index': ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {symbol_col: ADATA_IDS.gene_id_names}, + {symbol_col: ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) if ensembl_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS.gene_id_ensembl}, + {'index': ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {ensembl_col: ADATA_IDS.gene_id_ensembl}, + {ensembl_col: ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) @@ -232,12 +232,12 @@ def _convert_and_set_var_names( if symbol_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS.gene_id_names}, + {'index': ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {symbol_col: ADATA_IDS.gene_id_names}, + {symbol_col: ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) @@ -245,32 +245,32 @@ def _convert_and_set_var_names( # match it straight away, if it is not in there we try to match everything in front of the first period in # the gene name with a dictionary that was modified in the same way, if there is still no match we append na ensids = [] - for n in self.adata.var[ADATA_IDS.gene_id_names]: + for n in self.adata.var[ADATA_IDS_SFAIRA.gene_id_names]: if n in id_dict.keys(): ensids.append(id_dict[n]) elif n.split(".")[0] in id_strip_dict.keys(): ensids.append(id_strip_dict[n.split(".")[0]]) else: ensids.append('n/a') - self.adata.var[ADATA_IDS.gene_id_ensembl] = ensids + self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids elif ensembl_col: id_dict = self.genome_container.id_to_names_dict if ensembl_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS.gene_id_ensembl}, + {'index': ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {ensembl_col: ADATA_IDS.gene_id_names}, + {ensembl_col: ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) - self.adata.var[ADATA_IDS.gene_id_names] = [ + self.adata.var[ADATA_IDS_SFAIRA.gene_id_names] = [ id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[ADATA_IDS.gene_id_ensembl] + for n in self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] ] else: @@ -331,8 +331,8 @@ def load_tobacked( x_new = self.adata.X adata_backed.X[np.sort(idx), :] = x_new[np.argsort(idx), :] for k in adata_backed.obs.columns: - if k == ADATA_IDS.dataset: - adata_backed.obs.loc[np.sort(idx), ADATA_IDS.dataset] = [self.id for i in range(len(idx))] + if k == ADATA_IDS_SFAIRA.dataset: + adata_backed.obs.loc[np.sort(idx), ADATA_IDS_SFAIRA.dataset] = [self.id for i in range(len(idx))] elif k in self.adata.obs.columns: adata_backed.obs.loc[np.sort(idx), k] = self.adata.obs[k].values[np.argsort(idx)] elif k in list(self.adata.uns.keys()): @@ -352,7 +352,7 @@ def load_tobacked( adata_backed._n_obs = adata_backed.X.shape[0] # not automatically updated after append adata_backed.obs = adata_backed.obs.append( # .obs was not broadcasted to the right shape! pandas.DataFrame(dict([ - (k, [self.id for i in range(len(idx))]) if k == ADATA_IDS.dataset + (k, [self.id for i in range(len(idx))]) if k == ADATA_IDS_SFAIRA.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns else (k, [self.adata.uns[k] for i in range(len(idx))]) if k in list(self.adata.uns.keys()) else (k, ["key_not_found" for i in range(len(idx))]) @@ -372,9 +372,9 @@ def set_unkown_class_id(self, ids: list): target_id = "unknown" ontology_classes = [ x if x not in ids else target_id - for x in self.adata.obs[ADATA_IDS.cell_ontology_class].tolist() + for x in self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].tolist() ] - self.adata.obs[ADATA_IDS.cell_ontology_class] = ontology_classes + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ontology_classes def _set_genome(self, genome: str @@ -431,14 +431,14 @@ def write_meta( self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ "ncells": self.adata.n_obs, - "animal": self.adata.uns[ADATA_IDS.animal], - "organ": self.adata.uns[ADATA_IDS.organ], - "subtissue": self.adata.uns[ADATA_IDS.subtissue], - "id": self.adata.uns[ADATA_IDS.id], - "lab": self.adata.uns[ADATA_IDS.lab], - "year": self.adata.uns[ADATA_IDS.year], - "protocol": self.adata.uns[ADATA_IDS.protocol], - "counts": self.adata.uns[ADATA_IDS.normalization] if ADATA_IDS.normalization in self.adata.uns.keys() else None, + "animal": self.adata.uns[ADATA_IDS_SFAIRA.animal], + "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], + "subtissue": self.adata.uns[ADATA_IDS_SFAIRA.subtissue], + "id": self.adata.uns[ADATA_IDS_SFAIRA.id], + "lab": self.adata.uns[ADATA_IDS_SFAIRA.author], + "year": self.adata.uns[ADATA_IDS_SFAIRA.year], + "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], + "counts": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, "has_celltypes": self.has_celltypes }, index=range(1)) meta.to_csv(fn_meta) @@ -590,15 +590,15 @@ def adata(self): adata_ls = self.adata_ls # Save uns attributes that are fixed for entire data set to .obs to retain during concatenation: for adata in adata_ls: - adata.obs[ADATA_IDS.lab] = adata.uns[ADATA_IDS.lab] - adata.obs[ADATA_IDS.year] = adata.uns[ADATA_IDS.year] - adata.obs[ADATA_IDS.protocol] = adata.uns[ADATA_IDS.protocol] - adata.obs[ADATA_IDS.subtissue] = adata.uns[ADATA_IDS.subtissue] - if ADATA_IDS.normalization in adata.uns.keys(): - adata.obs[ADATA_IDS.normalization] = adata.uns[ADATA_IDS.normalization] - if ADATA_IDS.dev_stage in adata.obs.columns: - adata.obs[ADATA_IDS.dev_stage] = adata.uns[ADATA_IDS.dev_stage] - adata.obs[ADATA_IDS.has_celltypes] = adata.uns[ADATA_IDS.has_celltypes] + adata.obs[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_SFAIRA.author] + adata.obs[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_SFAIRA.year] + adata.obs[ADATA_IDS_SFAIRA.protocol] = adata.uns[ADATA_IDS_SFAIRA.protocol] + adata.obs[ADATA_IDS_SFAIRA.subtissue] = adata.uns[ADATA_IDS_SFAIRA.subtissue] + if ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): + adata.obs[ADATA_IDS_SFAIRA.normalization] = adata.uns[ADATA_IDS_SFAIRA.normalization] + if ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: + adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.uns[ADATA_IDS_SFAIRA.dev_stage] + adata.obs[ADATA_IDS_SFAIRA.has_celltypes] = adata.uns[ADATA_IDS_SFAIRA.has_celltypes] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: # Fix 1: @@ -608,13 +608,13 @@ def adata(self): if adata.uns is not None: keys_to_keep = [ 'neighbors', - ADATA_IDS.lab, - ADATA_IDS.year, - ADATA_IDS.protocol, - ADATA_IDS.subtissue, - ADATA_IDS.normalization, - ADATA_IDS.dev_stage, - ADATA_IDS.has_celltypes, + ADATA_IDS_SFAIRA.author, + ADATA_IDS_SFAIRA.year, + ADATA_IDS_SFAIRA.protocol, + ADATA_IDS_SFAIRA.subtissue, + ADATA_IDS_SFAIRA.normalization, + ADATA_IDS_SFAIRA.dev_stage, + ADATA_IDS_SFAIRA.has_celltypes, "mapped_features" ] for k in list(adata.uns.keys()): @@ -627,7 +627,7 @@ def adata(self): # To preserve gene names in .var, the target gene names are copied into var_names and are then copied # back into .var. for adata in adata_ls: - adata.var.index = adata.var[ADATA_IDS.gene_id_ensembl].tolist() + adata.var.index = adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].tolist() if len(adata_ls) > 1: # TODO: need to keep this? -> yes, still catching errors here (March 2020) # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. @@ -635,18 +635,18 @@ def adata(self): adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=ADATA_IDS.dataset, + batch_key=ADATA_IDS_SFAIRA.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) except ValueError: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=ADATA_IDS.dataset, + batch_key=ADATA_IDS_SFAIRA.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) - adata_concat.var[ADATA_IDS.gene_id_ensembl] = adata_concat.var.index + adata_concat.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index if len(set([a.uns['mapped_features'] for a in adata_ls])) == 1: adata_concat.uns['mapped_features'] = adata_ls[0].uns['mapped_features'] @@ -654,7 +654,7 @@ def adata(self): adata_concat.uns['mapped_features'] = False else: adata_concat = adata_ls[0] - adata_concat.obs[ADATA_IDS.dataset] = self.ids[0] + adata_concat.obs[ADATA_IDS_SFAIRA.dataset] = self.ids[0] adata_concat.var_names_make_unique() return adata_concat @@ -675,7 +675,7 @@ def obs_concat(self, keys: Union[list, None] = None): (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns else (k, ["nan" for i in range(self.datasets[x].adata.obs.shape[0])]) for k in keys - ] + [(ADATA_IDS.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] + ] + [(ADATA_IDS_SFAIRA.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat @@ -811,7 +811,7 @@ def load_all( self.adata = self.dataset_groups[i].adata.concatenate( *[x.adata for x in self.dataset_groups[1:] if x is not None], join="outer", - batch_key=ADATA_IDS.dataset_group + batch_key=ADATA_IDS_SFAIRA.dataset_group ) def load_all_tobacked( @@ -863,17 +863,17 @@ def load_all_tobacked( if not as_dense: self.adata.X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse keys = [ - ADATA_IDS.lab, - ADATA_IDS.year, - ADATA_IDS.protocol, - ADATA_IDS.organ, - ADATA_IDS.subtissue, - ADATA_IDS.cell_ontology_class, - ADATA_IDS.state_exact, - ADATA_IDS.normalization, - ADATA_IDS.dev_stage, - ADATA_IDS.has_celltypes, - ADATA_IDS.dataset + ADATA_IDS_SFAIRA.author, + ADATA_IDS_SFAIRA.year, + ADATA_IDS_SFAIRA.protocol, + ADATA_IDS_SFAIRA.organ, + ADATA_IDS_SFAIRA.subtissue, + ADATA_IDS_SFAIRA.cell_ontology_class, + ADATA_IDS_SFAIRA.state_exact, + ADATA_IDS_SFAIRA.normalization, + ADATA_IDS_SFAIRA.dev_stage, + ADATA_IDS_SFAIRA.has_celltypes, + ADATA_IDS_SFAIRA.dataset ] if scatter_update: self.adata.obs = pandas.DataFrame({ diff --git a/sfaira/data/databases/cellxgene_loader.py b/sfaira/data/databases/cellxgene_loader.py index 2f99311f6..7472c6c49 100644 --- a/sfaira/data/databases/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene_loader.py @@ -24,6 +24,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.fn = fn + # TODO from meta: self.species = str(fn).split("_")[2] self.id = str(fn).split(".")[0] self.organ = str(fn).split("_")[3] @@ -40,9 +41,9 @@ def _load(self, fn=None): adata = anndata.read(fn) adata.X = adata.raw.X - self.adata.uns[ADATA_IDS.lab] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] - self.adata.uns[ADATA_IDS.year] = None - self.adata.uns[ADATA_IDS.doi] = None # TODO + self.adata.uns[ADATA_IDS.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] + self.adata.uns[ADATA_IDS.year] = adata.uns[ADATA_IDS_CELLXGENE.year] + self.adata.uns[ADATA_IDS.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] if len(np.unique(adata.obs[ADATA_IDS.animal].values)) > 1: raise Warning("found multiple assay in data set %s" % self.fn) self.adata.uns[ADATA_IDS.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] diff --git a/sfaira/data/databases/external.py b/sfaira/data/databases/external.py index 6d7d44bbf..8b1378917 100644 --- a/sfaira/data/databases/external.py +++ b/sfaira/data/databases/external.py @@ -1,2 +1 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS, ADATA_IDS_CELLXGENE + diff --git a/sfaira/data/external.py b/sfaira/data/external.py index 97f4c333b..55e607c98 100644 --- a/sfaira/data/external.py +++ b/sfaira/data/external.py @@ -1,2 +1,2 @@ from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adipose/external.py b/sfaira/data/human/adipose/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/adipose/external.py +++ b/sfaira/data/human/adipose/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 9de2fdf75..7ad5536f2 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adipose/hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/external.py b/sfaira/data/human/adrenalgland/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/adrenalgland/external.py +++ b/sfaira/data/human/adrenalgland/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 1c14752b0..9b9dacb36 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index b2f4f1110..5c35b9b75 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index f35148c4a..bbeee8651 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index d69eb2c07..6f2f49311 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 8476743d6..6df07e9bc 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 745b38862..faa871a63 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/artery/external.py b/sfaira/data/human/artery/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/artery/external.py +++ b/sfaira/data/human/artery/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 1dbfbce69..6bdadaac1 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/artery/hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/bladder/external.py b/sfaira/data/human/bladder/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/bladder/external.py +++ b/sfaira/data/human/bladder/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index 4c4562434..2811f3d52 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 9767f3b5f..ba3bb5914 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 77c485d57..4b8cb989d 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/external.py b/sfaira/data/human/blood/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/blood/external.py +++ b/sfaira/data/human/blood/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index 92c02574f..ff15c9e93 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -62,7 +62,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/ica_blood.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Regev' + self.adata.uns[ADATA_IDS.author] = 'Regev' self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = None self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 64b44f8c7..db2dd4f1a 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -46,7 +46,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = '10x Genomics' + self.adata.uns[ADATA_IDS.author] = '10x Genomics' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = None self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index a92898424..5e53b4c14 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index 6b2940b68..cbdf7ef67 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index f9fc685b9..3169d1ffd 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index c54ae255e..1cbfcfcd2 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 8fa8c9a9a..e6d892389 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index a7bc6b690..a55342f9e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index ab033d684..4702724b9 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/bone/external.py b/sfaira/data/human/bone/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/bone/external.py +++ b/sfaira/data/human/bone/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index 27bad3280..2f5271afd 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -62,7 +62,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/ica_bone.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Regev' + self.adata.uns[ADATA_IDS.author] = 'Regev' self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = None self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index 8b7d9bebb..d432f1479 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index fbc20f730..57032a6da 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/external.py b/sfaira/data/human/brain/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/brain/external.py +++ b/sfaira/data/human/brain/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index d5f361d87..928ff5744 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Regev" + self.adata.uns[ADATA_IDS.author] = "Regev" self.adata.uns[ADATA_IDS.year] = 2017 self.adata.uns[ADATA_IDS.doi] = "10.1038/nmeth.4407" self.adata.uns[ADATA_IDS.protocol] = 'DroNcSeq' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index a0bb0a12d..28f7e2a86 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 120e5a2f0..5071dc165 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 1d334f4bb..6e8b6b08f 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index d67d77caa..8973f8a59 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index c21f57b48..b0ae46267 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index 8773fe541..c2becfdf5 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/calvaria/external.py b/sfaira/data/human/calvaria/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/calvaria/external.py +++ b/sfaira/data/human/calvaria/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index c9b963260..4f86c6e27 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/calvaria/hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/cervix/external.py b/sfaira/data/human/cervix/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/cervix/external.py +++ b/sfaira/data/human/cervix/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 8da363801..2cddd72b1 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/cervix/hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/chorionicvillus/external.py b/sfaira/data/human/chorionicvillus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/chorionicvillus/external.py +++ b/sfaira/data/human/chorionicvillus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index e6cdc5c47..006762709 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/chorionicvillus/hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/colon/external.py b/sfaira/data/human/colon/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/colon/external.py +++ b/sfaira/data/human/colon/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index 89b8b3241..a4f5a01ef 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -128,7 +128,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/kinchenetal.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Simmons' + self.adata.uns[ADATA_IDS.author] = 'Simmons' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.08.067" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 4efa64678..91d13e418 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -87,7 +87,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Regev" + self.adata.uns[ADATA_IDS.author] = "Regev" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.06.029" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index f41aadf23..e55124ed9 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.author] = "Chen" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index 3a9ddbb5a..398e54066 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Teichmann" + self.adata.uns[ADATA_IDS.author] = "Teichmann" self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41590-020-0602-z" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index 4a2d3ae56..f75d1929e 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -72,7 +72,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 555decf66..3e19e0c93 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -72,7 +72,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 59cb160e7..87e022665 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -72,7 +72,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 36f84e4bf..4309ced25 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -72,7 +72,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/duodenum/external.py b/sfaira/data/human/duodenum/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/duodenum/external.py +++ b/sfaira/data/human/duodenum/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 7802950a0..1cb5fe30b 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/duodenum/hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/epityphlon/external.py b/sfaira/data/human/epityphlon/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/epityphlon/external.py +++ b/sfaira/data/human/epityphlon/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index a467adeab..d0280631c 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/epityphlon/hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/esophagus/external.py b/sfaira/data/human/esophagus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/esophagus/external.py +++ b/sfaira/data/human/esophagus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index b00195eee..91976a81c 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import scipy.sparse @@ -65,7 +65,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Meyer" + self.adata.uns[ADATA_IDS.author] = "Meyer" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index d69e823b5..20bb945da 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -69,7 +69,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 0362fa385..60bca5620 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -69,7 +69,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/eye/external.py b/sfaira/data/human/eye/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/eye/external.py +++ b/sfaira/data/human/eye/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index e5551a126..af4036d6c 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = 'Wong' + self.adata.uns[ADATA_IDS.author] = 'Wong' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.15252/embj.2018100811' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index 20aff74d4..d2255098c 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -52,7 +52,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/menon19.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Hafler' + self.adata.uns[ADATA_IDS.author] = 'Hafler' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-12780-8' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 89803593e..e80ef522b 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -56,7 +56,7 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.lab] = 'Mullins' + self.adata.uns[ADATA_IDS.author] = 'Mullins' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1073/pnas.1914143116' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index 89cae1dd7..b9dd6df7f 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -68,7 +68,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/fallopiantube/external.py b/sfaira/data/human/fallopiantube/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/fallopiantube/external.py +++ b/sfaira/data/human/fallopiantube/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 10138616c..22cd08a45 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/fallopiantube/hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/femalegonad/external.py b/sfaira/data/human/femalegonad/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/femalegonad/external.py +++ b/sfaira/data/human/femalegonad/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 29e23e826..2a228b857 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 0ae5a7cc6..b27286716 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/gallbladder/external.py b/sfaira/data/human/gallbladder/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/gallbladder/external.py +++ b/sfaira/data/human/gallbladder/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 4bf294880..5b35f7b30 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/gallbladder/hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/heart/external.py b/sfaira/data/human/heart/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/heart/external.py +++ b/sfaira/data/human/heart/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index 90b917f60..f860dae11 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 9d0ec7791..5699855b4 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 70f3370ae..823e7a4b9 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 1e4d0f55b..9bd3f322f 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/hesc/external.py b/sfaira/data/human/hesc/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/hesc/external.py +++ b/sfaira/data/human/hesc/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index 9bea6a980..41c1ee8de 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/hesc/hcl_HESC_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/ileum/external.py b/sfaira/data/human/ileum/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/ileum/external.py +++ b/sfaira/data/human/ileum/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 5d0874892..1eb5f2015 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -69,7 +69,7 @@ def _load(self, fn=None): .multiply(1/10000) self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - self.adata.uns[ADATA_IDS.lab] = "Kenigsberg" + self.adata.uns[ADATA_IDS.author] = "Kenigsberg" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.08.008" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index c1993453f..75cd14e3a 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.author] = "Chen" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 5cc1b544a..ecad424bb 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -71,7 +71,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ileum/hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/jejunum/external.py b/sfaira/data/human/jejunum/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/jejunum/external.py +++ b/sfaira/data/human/jejunum/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 710bb6b30..5e458b010 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/jejunum/hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/external.py b/sfaira/data/human/kidney/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/kidney/external.py +++ b/sfaira/data/human/kidney/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index 3e208b9a4..c2fff3afc 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -77,7 +77,7 @@ def _load(self, fn=None): annot = pd.read_csv(fn[1], index_col=0, dtype='category') self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.lab] = 'Jain' + self.adata.uns[ADATA_IDS.author] = 'Jain' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-10861-2' self.adata.uns[ADATA_IDS.protocol] = '10xSn' diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index 7ff976fcc..403a29edb 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -125,7 +125,7 @@ def _load(self, fn=None): self.adata = adult.concatenate(fetal) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.lab] = 'Clatworthy' + self.adata.uns[ADATA_IDS.author] = 'Clatworthy' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1126/science.aat5031' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index b2eec4be5..bccc80567 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd import scipy.io @@ -96,7 +96,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/GSE131685.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Mo' + self.adata.uns[ADATA_IDS.author] = 'Mo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41597-019-0351-8' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 18757a74a..39ec9a7bf 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index 3b6915dcd..16669aa63 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 859553d7f..860da838d 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 1f1949771..a9b28bc1c 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 112849f12..d846a9f35 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index e6e2cf1fd..302085a48 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 9d6b6f1c7..fe5c99d29 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -94,7 +94,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/liver/external.py b/sfaira/data/human/liver/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/liver/external.py +++ b/sfaira/data/human/liver/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index e5b816254..1a53bc47f 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -70,7 +70,7 @@ def _load(self, fn=None): celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.lab] = 'McGilvray' + self.adata.uns[ADATA_IDS.author] = 'McGilvray' self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-018-06318-7' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 32f0de473..624841b92 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -70,7 +70,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Haniffa' + self.adata.uns[ADATA_IDS.author] = 'Haniffa' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1652-y' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index 64617b392..adae74f19 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -72,7 +72,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/ramachandran.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Henderson' + self.adata.uns[ADATA_IDS.author] = 'Henderson' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1631-3' self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index fc24a03db..ccc41e021 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -90,7 +90,7 @@ def _load(self, fn=None): self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.lab] = 'Gruen' + self.adata.uns[ADATA_IDS.author] = 'Gruen' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1373-2' self.adata.uns[ADATA_IDS.protocol] = 'mCEL-Seq2' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 353aca5bb..c62b58df1 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 25c6ccf83..d28abc786 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 42601d1b4..434632306 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index cd2c95b29..c79a13d86 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 4f2347383..1a098706c 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/external.py b/sfaira/data/human/lung/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/lung/external.py +++ b/sfaira/data/human/lung/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index fba31f7a0..88fb112c2 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.author] = 'Teichmann' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 79a597c52..5af29652e 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.author] = 'Teichmann' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 90f7952ca..03c51288b 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -68,7 +68,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Meyer' + self.adata.uns[ADATA_IDS.author] = 'Meyer' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1186/s13059-019-1906-x" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index a8533afdd..ac01e3176 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.author] = 'Teichmann' self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" self.adata.uns[ADATA_IDS.protocol] = 'dropseq' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 79b4b9365..06da0317c 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -107,7 +107,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/habermann_processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Kropski' + self.adata.uns[ADATA_IDS.author] = 'Kropski' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1101/753806" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index 37448e9d9..a693dd817 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = 'Eils' + self.adata.uns[ADATA_IDS.author] = 'Eils' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index 88a1b8cbb..c40785d6a 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = 'Eils' + self.adata.uns[ADATA_IDS.author] = 'Eils' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index 54ae54729..b3555b1de 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -73,7 +73,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = 'Spence' + self.adata.uns[ADATA_IDS.author] = 'Spence' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.devcel.2020.01.033" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index e088eb8a3..2e35bdd39 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import scipy.sparse import numpy as np @@ -114,7 +114,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ .multiply(1 / 10000) - self.adata.uns[ADATA_IDS.lab] = 'Krasnow' + self.adata.uns[ADATA_IDS.author] = 'Krasnow' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 41ac5376a..e7b447626 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -95,7 +95,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 52130956a..117487c58 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -95,7 +95,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index e6522c3d8..4698a543b 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -95,7 +95,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 9d8782c6a..53c040180 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -95,7 +95,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 5f364a81c..9b4e99a04 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -95,7 +95,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 291e9649e..8ef7a81bb 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import scipy.sparse import numpy as np @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ .multiply(1 / 1000000) - self.adata.uns[ADATA_IDS.lab] = 'Krasnow' + self.adata.uns[ADATA_IDS.author] = 'Krasnow' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" self.adata.uns[ADATA_IDS.protocol] = 'smartseq2' diff --git a/sfaira/data/human/malegonad/external.py b/sfaira/data/human/malegonad/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/malegonad/external.py +++ b/sfaira/data/human/malegonad/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index a484700f2..b483540f3 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Cairns" + self.adata.uns[ADATA_IDS.author] = "Cairns" self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41422-018-0099-2" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index fa39456ff..ac2c050ae 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 74a837ed5..683eca580 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -73,7 +73,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/mixed/external.py b/sfaira/data/human/mixed/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/mixed/external.py +++ b/sfaira/data/human/mixed/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 80bfb0f1d..c06146fe9 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import tarfile import pandas as pd @@ -150,7 +150,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/mixed/GSE126030.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = "Sims" + self.adata.uns[ADATA_IDS.author] = "Sims" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1038/s41467-019-12464-3" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/human/muscle/external.py b/sfaira/data/human/muscle/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/muscle/external.py +++ b/sfaira/data/human/muscle/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index ddd1ec9e1..c5aebbc16 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 9d7acada6..a89a595fc 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/omentum/external.py b/sfaira/data/human/omentum/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/omentum/external.py +++ b/sfaira/data/human/omentum/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index b8886f7f8..854e6e572 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 8f14ba488..1ffce6d62 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index cd413eecd..f7080fdc0 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/pancreas/external.py b/sfaira/data/human/pancreas/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/pancreas/external.py +++ b/sfaira/data/human/pancreas/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index 213331765..1c37d4eb0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Yanai" + self.adata.uns[ADATA_IDS.author] = "Yanai" self.adata.uns[ADATA_IDS.year] = 2016 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cels.2016.08.011" self.adata.uns[ADATA_IDS.protocol] = 'inDrop' diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index 21e237c34..b724286b1 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import pandas as pd @@ -69,7 +69,7 @@ def _load(self, fn=None): # filter observations which are not cells (empty wells, low quality cells etc.) self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - self.adata.uns[ADATA_IDS.lab] = "Sandberg" + self.adata.uns[ADATA_IDS.author] = "Sandberg" self.adata.uns[ADATA_IDS.year] = 2016 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2016.08.020" self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index 17f9b617e..0e4b742ca 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import tarfile import gzip @@ -129,7 +129,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/GSE81547.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = 2017 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2017.09.004" self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 87fef7d0a..ce97af838 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 478bfcb4e..ffc6c4e32 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 6a26abd06..721687ae0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 0718acdbb..221bcf3d5 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/placenta/external.py b/sfaira/data/human/placenta/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/placenta/external.py +++ b/sfaira/data/human/placenta/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index fc6907aea..e6dda5a6f 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import pandas as pd import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.author] = 'Teichmann' self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 5c9d84a7f..cedaa7c72 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import pandas as pd import anndata @@ -83,7 +83,7 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS.lab] = 'Teichmann' + self.adata.uns[ADATA_IDS.author] = 'Teichmann' self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' self.adata.uns[ADATA_IDS.protocol] = "Smartseq2" diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index 7cbf9bc92..165754ab9 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -76,7 +76,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/placenta/hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/pleura/external.py b/sfaira/data/human/pleura/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/pleura/external.py +++ b/sfaira/data/human/pleura/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 7e87b61fa..b581b182f 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pleura/hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/prostate/external.py b/sfaira/data/human/prostate/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/prostate/external.py +++ b/sfaira/data/human/prostate/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 503693f54..8931a1f7b 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -56,7 +56,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Strand" + self.adata.uns[ADATA_IDS.author] = "Strand" self.adata.uns[ADATA_IDS.year] = 2018 self.adata.uns[ADATA_IDS.doi] = "10.1016/j.celrep.2018.11.086" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index e4643c1f2..d54a69417 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -65,7 +65,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/prostate/hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/rectum/external.py b/sfaira/data/human/rectum/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/rectum/external.py +++ b/sfaira/data/human/rectum/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index adc138c01..71dad32dd 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np import scipy.sparse @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Chen" + self.adata.uns[ADATA_IDS.author] = "Chen" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index b43bedc49..f3d9a6bd4 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -60,7 +60,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rectum/hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/rib/external.py b/sfaira/data/human/rib/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/rib/external.py +++ b/sfaira/data/human/rib/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 1fbe46715..14a6341ba 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index c60be9e12..c7fd63078 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/skin/external.py b/sfaira/data/human/skin/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/skin/external.py +++ b/sfaira/data/human/skin/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 638a1db5d..6a60655f0 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -74,7 +74,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 205a5231b..e2658caa5 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -74,7 +74,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/spinalcord/external.py b/sfaira/data/human/spinalcord/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/spinalcord/external.py +++ b/sfaira/data/human/spinalcord/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 20c1e9bc9..41f7dd4a6 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spinalcord/hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/spleen/external.py b/sfaira/data/human/spleen/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/spleen/external.py +++ b/sfaira/data/human/spleen/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index e825156ac..3fddbbba9 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import scipy.sparse @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.lab] = "Meyer" + self.adata.uns[ADATA_IDS.author] = "Meyer" self.adata.uns[ADATA_IDS.year] = 2019 self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index 84a84cbf8..6ce8ad513 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -66,7 +66,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index ce8806721..39d6e459e 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -66,7 +66,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/external.py b/sfaira/data/human/stomach/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/stomach/external.py +++ b/sfaira/data/human/stomach/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index d13e95e29..628e13254 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index 404af4d2c..01f98e71c 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index efdded1e6..fa0141b50 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index 2a9a5cdb6..3bd20a857 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index de4bc04ce..992646eaf 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index b8473e20d..681f4adf4 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 13f6903ab..5da38a27a 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 18e76408a..7ae696af3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index f1985a4e4..408970e7b 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index ce6b94e98..f194548a5 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/thymus/external.py b/sfaira/data/human/thymus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/thymus/external.py +++ b/sfaira/data/human/thymus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index ea58c2d62..fe532b4cc 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata import numpy as np @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.lab] = "Teichmann" + self.adata.uns[ADATA_IDS.author] = "Teichmann" self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = "10.1126/science.aay3224" self.adata.uns[ADATA_IDS.protocol] = '10x' diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 9da737f92..50ea0ad46 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -58,7 +58,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index 07f9bc445..a4efd187e 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -58,7 +58,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/thyroid/external.py b/sfaira/data/human/thyroid/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/thyroid/external.py +++ b/sfaira/data/human/thyroid/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index bdba21705..be2fd0d8a 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 0cf4a1b2d..d364560cd 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/trachea/external.py b/sfaira/data/human/trachea/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/trachea/external.py +++ b/sfaira/data/human/trachea/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index 3a1fd4e23..3a6ef7519 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/trachea/hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/ureter/external.py b/sfaira/data/human/ureter/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/ureter/external.py +++ b/sfaira/data/human/ureter/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 46efc85ac..3defbf60a 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ureter/hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/human/uterus/external.py b/sfaira/data/human/uterus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/human/uterus/external.py +++ b/sfaira/data/human/uterus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index d0d047f5e..641881669 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -1,7 +1,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA import anndata @@ -44,7 +44,7 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/uterus/hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.lab] = 'Guo' + self.adata.uns[ADATA_IDS.author] = 'Guo' self.adata.uns[ADATA_IDS.year] = 2020 self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/mouse/bladder/external.py b/sfaira/data/mouse/bladder/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/bladder/external.py +++ b/sfaira/data/mouse/bladder/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 198763139..4b7a6b5e2 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index 71894cef7..b3ddbfbf3 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index f52109a85..fe1309f01 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/brain/external.py b/sfaira/data/mouse/brain/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/brain/external.py +++ b/sfaira/data/mouse/brain/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index 86cfacb57..a7477c191 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index a3480641f..e85596a12 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 7e844a29e..bbe9b8e34 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -69,7 +69,7 @@ def _load(self, fn=None): self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - self.adata.uns[ADATA_IDS.lab] = "Movahedi" + self.adata.uns[ADATA_IDS.author] = "Movahedi" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1038/s41593-019-0393-4" self.adata.uns[ADATA_IDS.protocol] = "microwell" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index 14af822c6..a80d92f43 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index bee23b0a0..c1ab8935a 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/diaphragm/external.py b/sfaira/data/mouse/diaphragm/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/diaphragm/external.py +++ b/sfaira/data/mouse/diaphragm/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 30a5efe33..7a184c1f9 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/fat/external.py b/sfaira/data/mouse/fat/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/fat/external.py +++ b/sfaira/data/mouse/fat/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py index a947c1f06..b76321d32 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py index 82fe8f8bd..31b6189a7 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py index 0a01b969c..ee821fd89 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py index df0e95c86..a5998c978 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py index febe77a28..b727b5365 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/heart/external.py b/sfaira/data/mouse/heart/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/heart/external.py +++ b/sfaira/data/mouse/heart/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index 4eb9be00d..022b71553 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index 521050224..496832d0a 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index 5c44cd140..e8df358a5 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/kidney/external.py b/sfaira/data/mouse/kidney/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/kidney/external.py +++ b/sfaira/data/mouse/kidney/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index 1a090fb7c..5fafa5267 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -45,7 +45,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index b558e86fd..ade327075 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -76,7 +76,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index ada193dc3..1a7afe1ee 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -56,7 +56,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index 229db3b93..c2966e4fe 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/large_intestine/external.py b/sfaira/data/mouse/large_intestine/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/large_intestine/external.py +++ b/sfaira/data/mouse/large_intestine/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py index 1f7487c0d..c2449b6a7 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 488655f32..0ff6b4522 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/limb_muscle/external.py b/sfaira/data/mouse/limb_muscle/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/limb_muscle/external.py +++ b/sfaira/data/mouse/limb_muscle/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py index bcb567396..c348f753f 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py index f292f0f66..81027a49a 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index fd8204d13..c36a3c4f4 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/liver/external.py b/sfaira/data/mouse/liver/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/liver/external.py +++ b/sfaira/data/mouse/liver/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index 657fbf2e2..62d52862b 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 607c69b39..7566ca230 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 8106da0d5..5f07d8ffa 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 6475c5bfa..28f70a88c 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/lung/external.py b/sfaira/data/mouse/lung/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/lung/external.py +++ b/sfaira/data/mouse/lung/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 1b30d387f..4f2f6e323 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -77,7 +77,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 019cfd443..1ea4176c2 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -77,7 +77,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 03d8ff093..142304ae6 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -77,7 +77,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 8408ff169..127bda80c 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 007a8849c..96aa17622 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/mammary_gland/external.py b/sfaira/data/mouse/mammary_gland/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/mammary_gland/external.py +++ b/sfaira/data/mouse/mammary_gland/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py index 00009fb32..01ca467f3 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py index b6e77fe4a..f83d7570a 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py index 36ade749d..8d198221c 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py index e1ef2e8b5..ad8d34841 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py index 2a0c5b2ab..03bf5a49e 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index d798eb413..ad02d815a 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/marrow/external.py b/sfaira/data/mouse/marrow/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/marrow/external.py +++ b/sfaira/data/mouse/marrow/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py index 5559ca1e5..ef1dcedc3 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py index 51b86576f..3bb45b84b 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py index 2249300ba..970256e11 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/ovary/external.py b/sfaira/data/mouse/ovary/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/ovary/external.py +++ b/sfaira/data/mouse/ovary/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py index 552190784..1595b143f 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py index c88d9d28d..cf5ec19e5 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/pancreas/external.py b/sfaira/data/mouse/pancreas/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/pancreas/external.py +++ b/sfaira/data/mouse/pancreas/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index a7b376c79..075cb94d9 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 639aab148..d15445430 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 7f929aacd..938320e1f 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index bb5f8e14a..2c765e7c5 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 74c0a61c0..3594d9e61 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 6863f1b80..d24636972 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 15ee99997..676c61173 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index 0e877fe98..bf4ae4823 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 36149a200..84ce5f079 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index f2784693c..4fddb888e 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.lab] = "Bhushan" + self.adata.uns[ADATA_IDS.author] = "Bhushan" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index a03486980..889e5bda6 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/peripheral_blood/external.py b/sfaira/data/mouse/peripheral_blood/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/peripheral_blood/external.py +++ b/sfaira/data/mouse/peripheral_blood/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py index b3a422119..126a0f662 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py index 0255e4ac5..cb92bc8d2 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py index 3c9368e7c..381a925be 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py index 5aa2cceac..1ab939bac 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index 771e2825a..1641c618e 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/placenta/external.py b/sfaira/data/mouse/placenta/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/placenta/external.py +++ b/sfaira/data/mouse/placenta/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 58d0cea03..19848f06d 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -73,7 +73,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 8d58805ae..2dd2a070d 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -73,7 +73,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/prostate/external.py b/sfaira/data/mouse/prostate/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/prostate/external.py +++ b/sfaira/data/mouse/prostate/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index 232cd5e44..f20772195 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 01070bd48..99ce454ee 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/rib/external.py b/sfaira/data/mouse/rib/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/rib/external.py +++ b/sfaira/data/mouse/rib/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index ac1d0e4f3..6b537abc3 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 81a102255..480301200 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 86ccb2a50..12552aa07 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/skin/external.py b/sfaira/data/mouse/skin/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/skin/external.py +++ b/sfaira/data/mouse/skin/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 20a55f165..bdeec61ed 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index 739ccd78b..69f6e31e2 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/small_intestine/external.py b/sfaira/data/mouse/small_intestine/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/small_intestine/external.py +++ b/sfaira/data/mouse/small_intestine/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py index 6cd027f9f..6f782d749 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py index f8231ed8a..d9ef4a7c7 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py index c8f718761..2a2f2a23d 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/spleen/external.py b/sfaira/data/mouse/spleen/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/spleen/external.py +++ b/sfaira/data/mouse/spleen/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index bba3afb06..b6417b69a 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -56,7 +56,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 11ec52199..012e3eb46 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 8baef2938..7fd4c9a51 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -53,7 +53,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/stomach/external.py b/sfaira/data/mouse/stomach/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/stomach/external.py +++ b/sfaira/data/mouse/stomach/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 067e46ab6..88f84e57a 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/testis/external.py b/sfaira/data/mouse/testis/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/testis/external.py +++ b/sfaira/data/mouse/testis/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py index 392a77526..d60df3d93 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py index 8249fbd2c..8e4d7491c 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/thymus/external.py b/sfaira/data/mouse/thymus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/thymus/external.py +++ b/sfaira/data/mouse/thymus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index ea5974713..d8419d46b 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index d109ad046..95a8a61a7 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -52,7 +52,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index 858802079..d44fd4b66 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -52,7 +52,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/tongue/external.py b/sfaira/data/mouse/tongue/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/tongue/external.py +++ b/sfaira/data/mouse/tongue/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index d0193b8f0..778a40711 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index e826771e1..b16fbe013 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -2,7 +2,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/trachae/external.py b/sfaira/data/mouse/trachae/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/trachae/external.py +++ b/sfaira/data/mouse/trachae/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py index a6f099ff3..24da9e72c 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -55,7 +55,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "10x" diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py index be56897d6..75f98dce5 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -54,7 +54,7 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.lab] = "Quake" + self.adata.uns[ADATA_IDS.author] = "Quake" self.adata.uns[ADATA_IDS.year] = "2019" self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" self.adata.uns[ADATA_IDS.protocol] = "smartseq2" diff --git a/sfaira/data/mouse/uterus/external.py b/sfaira/data/mouse/uterus/external.py index a4d155b9a..cc51e6fda 100644 --- a/sfaira/data/mouse/uterus/external.py +++ b/sfaira/data/mouse/uterus/external.py @@ -1,2 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index e3ba57883..4af24dc90 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index 568a0e9bd..b1cb13f75 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -4,7 +4,7 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS +from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.lab] = "Guo" + self.adata.uns[ADATA_IDS.author] = "Guo" self.adata.uns[ADATA_IDS.year] = "2018" self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" From 4a280d0b57dac34c887172519801e528db0436aa Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 18:23:04 +0100 Subject: [PATCH 007/161] updated old usages of ADATA_IDS to ADATA_IDS_SFAIRA --- sfaira/data/databases/cellxgene_loader.py | 44 +++++++++---------- sfaira/data/databases/external.py | 3 +- .../human_adipose_2020_microwell_han_001.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_001.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_002.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_003.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_004.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_005.py | 24 +++++----- ...man_adrenalgland_2020_microwell_han_006.py | 24 +++++----- .../human_artery_2020_microwell_han_001.py | 24 +++++----- .../human_bladder_2020_microwell_han_001.py | 24 +++++----- .../human_bladder_2020_microwell_han_002.py | 24 +++++----- .../human_bladder_2020_microwell_han_003.py | 24 +++++----- .../blood/human_blood_2018_10x_ica_001.py | 30 ++++++------- .../human_blood_2019_10x_10xGenomics_001.py | 30 ++++++------- .../human_blood_2020_microwell_han_001.py | 24 +++++----- .../human_blood_2020_microwell_han_002.py | 24 +++++----- .../human_blood_2020_microwell_han_003.py | 24 +++++----- .../human_blood_2020_microwell_han_004.py | 24 +++++----- .../human_blood_2020_microwell_han_005.py | 24 +++++----- .../human_blood_2020_microwell_han_006.py | 24 +++++----- .../human_blood_2020_microwell_han_007.py | 24 +++++----- .../human/bone/human_bone_2018_10x_ica_001.py | 30 ++++++------- .../bone/human_bone_2020_microwell_han_001.py | 24 +++++----- .../bone/human_bone_2020_microwell_han_002.py | 24 +++++----- .../human_brain_2017_DroNcSeq_habib_001.py | 30 ++++++------- .../human_brain_2020_microwell_han_001.py | 24 +++++----- .../human_brain_2020_microwell_han_002.py | 24 +++++----- .../human_brain_2020_microwell_han_003.py | 24 +++++----- .../human_brain_2020_microwell_han_004.py | 24 +++++----- .../human_brain_2020_microwell_han_005.py | 24 +++++----- .../human_brain_2020_microwell_han_006.py | 24 +++++----- .../human_calvaria_2020_microwell_han_001.py | 24 +++++----- .../human_cervix_2020_microwell_han_001.py | 24 +++++----- ..._chorionicvillus_2020_microwell_han_001.py | 24 +++++----- .../colon/human_colon_2019_10x_kinchen_001.py | 34 +++++++------- .../colon/human_colon_2019_10x_smilie_001.py | 30 ++++++------- .../colon/human_colon_2019_10x_wang_001.py | 30 ++++++------- .../colon/human_colon_2020_10x_james_001.py | 30 ++++++------- .../human_colon_2020_microwell_han_001.py | 24 +++++----- .../human_colon_2020_microwell_han_002.py | 24 +++++----- .../human_colon_2020_microwell_han_003.py | 24 +++++----- .../human_colon_2020_microwell_han_004.py | 24 +++++----- .../human_duodenum_2020_microwell_han_001.py | 24 +++++----- ...human_epityphlon_2020_microwell_han_001.py | 24 +++++----- .../human_esophagus_2019_10x_madissoon_001.py | 30 ++++++------- .../human_esophagus_2020_microwell_han_001.py | 24 +++++----- .../human_esophagus_2020_microwell_han_002.py | 24 +++++----- .../eye/human_eye_2019_10x_lukowski_001.py | 30 ++++++------- .../human/eye/human_eye_2019_10x_menon_001.py | 30 ++++++------- .../human/eye/human_eye_2019_10x_voigt_001.py | 30 ++++++------- .../eye/human_eye_2020_microwell_han_001.py | 24 +++++----- ...an_fallopiantube_2020_microwell_han_001.py | 24 +++++----- ...uman_femalegonad_2020_microwell_han_001.py | 24 +++++----- ...uman_femalegonad_2020_microwell_han_002.py | 24 +++++----- ...uman_gallbladder_2020_microwell_han_001.py | 24 +++++----- .../human_heart_2020_microwell_han_001.py | 24 +++++----- .../human_heart_2020_microwell_han_002.py | 24 +++++----- .../human_heart_2020_microwell_han_003.py | 24 +++++----- .../human_heart_2020_microwell_han_004.py | 24 +++++----- .../hesc/human_hesc_2020_microwell_han_001.py | 24 +++++----- .../ileum/human_ileum_2019_10x_martin_001.py | 30 ++++++------- .../ileum/human_ileum_2019_10x_wang_001.py | 30 ++++++------- .../human_ileum_2020_microwell_han_001.py | 24 +++++----- .../human_jejunum_2020_microwell_han_001.py | 24 +++++----- .../human_kidney_2019_10xSn_lake_001.py | 30 ++++++------- .../human_kidney_2019_10x_stewart_001.py | 30 ++++++------- .../kidney/human_kidney_2020_10x_liao_001.py | 30 ++++++------- .../human_kidney_2020_microwell_han_001.py | 24 +++++----- .../human_kidney_2020_microwell_han_002.py | 24 +++++----- .../human_kidney_2020_microwell_han_003.py | 24 +++++----- .../human_kidney_2020_microwell_han_004.py | 24 +++++----- .../human_kidney_2020_microwell_han_005.py | 24 +++++----- .../human_kidney_2020_microwell_han_006.py | 24 +++++----- .../human_kidney_2020_microwell_han_007.py | 24 +++++----- .../human_liver_2018_10x_macparland_001.py | 30 ++++++------- .../liver/human_liver_2019_10x_popescu_001.py | 30 ++++++------- .../human_liver_2019_10x_ramachandran_001.py | 30 ++++++------- .../human_liver_2019_mCELSeq2_aizarani_001.py | 28 ++++++------ .../human_liver_2020_microwell_han_001.py | 24 +++++----- .../human_liver_2020_microwell_han_002.py | 24 +++++----- .../human_liver_2020_microwell_han_003.py | 24 +++++----- .../human_liver_2020_microwell_han_004.py | 24 +++++----- .../human_liver_2020_microwell_han_005.py | 24 +++++----- .../lung/human_lung_2019_10x_braga_001.py | 28 ++++++------ .../lung/human_lung_2019_10x_braga_002.py | 28 ++++++------ .../lung/human_lung_2019_10x_madissoon_001.py | 28 ++++++------ .../lung/human_lung_2019_dropseq_braga_003.py | 28 ++++++------ .../lung/human_lung_2020_10x_habermann_001.py | 28 ++++++------ .../lung/human_lung_2020_10x_lukassen_001.py | 28 ++++++------ .../lung/human_lung_2020_10x_lukassen_002.py | 28 ++++++------ .../lung/human_lung_2020_10x_miller_001.py | 28 ++++++------ .../human_lung_2020_10x_travaglini_001.py | 30 ++++++------- .../lung/human_lung_2020_microwell_han_001.py | 24 +++++----- .../lung/human_lung_2020_microwell_han_002.py | 24 +++++----- .../lung/human_lung_2020_microwell_han_003.py | 24 +++++----- .../lung/human_lung_2020_microwell_han_004.py | 24 +++++----- .../lung/human_lung_2020_microwell_han_005.py | 24 +++++----- ...uman_lung_2020_smartseq2_travaglini_002.py | 30 ++++++------- .../human_malegonad_2018_10x_guo_001.py | 30 ++++++------- .../human_malegonad_2020_microwell_han_001.py | 24 +++++----- .../human_malegonad_2020_microwell_han_002.py | 24 +++++----- .../mixed/human_mixed_2019_10x_szabo_001.py | 28 ++++++------ .../human_muscle_2020_microwell_han_001.py | 24 +++++----- .../human_muscle_2020_microwell_han_002.py | 24 +++++----- .../human_omentum_2020_microwell_han_001.py | 24 +++++----- .../human_omentum_2020_microwell_han_002.py | 24 +++++----- .../human_omentum_2020_microwell_han_003.py | 24 +++++----- .../human_pancreas_2016_indrop_baron_001.py | 30 ++++++------- ...pancreas_2016_smartseq2_segerstolpe_001.py | 32 +++++++------- .../human_pancreas_2017_smartseq2_enge_001.py | 30 ++++++------- .../human_pancreas_2020_microwell_han_001.py | 24 +++++----- .../human_pancreas_2020_microwell_han_002.py | 24 +++++----- .../human_pancreas_2020_microwell_han_003.py | 24 +++++----- .../human_pancreas_2020_microwell_han_004.py | 24 +++++----- .../human_placenta_2018_10x_ventotormo_001.py | 30 ++++++------- ..._placenta_2018_smartseq2_ventotormo_001.py | 30 ++++++------- .../human_placenta_2020_microwell_han_001.py | 24 +++++----- .../human_pleura_2020_microwell_han_001.py | 24 +++++----- .../human_prostate_2018_10x_henry_001.py | 30 ++++++------- .../human_prostate_2020_microwell_han_001.py | 24 +++++----- .../rectum/human_rectum_2019_10x_wang_001.py | 30 ++++++------- .../human_rectum_2020_microwell_han_001.py | 24 +++++----- .../rib/human_rib_2020_microwell_han_001.py | 24 +++++----- .../rib/human_rib_2020_microwell_han_002.py | 24 +++++----- .../skin/human_skin_2020_microwell_han_001.py | 24 +++++----- .../skin/human_skin_2020_microwell_han_002.py | 24 +++++----- ...human_spinalcord_2020_microwell_han_001.py | 24 +++++----- .../human_spleen_2019_10x_madissoon_001.py | 30 ++++++------- .../human_spleen_2020_microwell_han_001.py | 24 +++++----- .../human_spleen_2020_microwell_han_002.py | 24 +++++----- .../human_stomach_2020_microwell_han_001.py | 24 +++++----- .../human_stomach_2020_microwell_han_002.py | 24 +++++----- .../human_stomach_2020_microwell_han_003.py | 24 +++++----- .../human_stomach_2020_microwell_han_004.py | 24 +++++----- .../human_stomach_2020_microwell_han_005.py | 24 +++++----- .../human_stomach_2020_microwell_han_006.py | 24 +++++----- .../human_stomach_2020_microwell_han_007.py | 24 +++++----- .../human_stomach_2020_microwell_han_008.py | 24 +++++----- .../human_stomach_2020_microwell_han_009.py | 24 +++++----- .../human_stomach_2020_microwell_han_010.py | 24 +++++----- .../thymus/human_thymus_2020_10x_park_001.py | 30 ++++++------- .../human_thymus_2020_microwell_han_001.py | 24 +++++----- .../human_thymus_2020_microwell_han_002.py | 24 +++++----- .../human_thyroid_2020_microwell_han_001.py | 24 +++++----- .../human_thyroid_2020_microwell_han_002.py | 24 +++++----- .../human_trachea_2020_microwell_han_001.py | 24 +++++----- .../human_ureter_2020_microwell_han_001.py | 24 +++++----- .../human_uterus_2020_microwell_han_001.py | 24 +++++----- .../mouse_bladder_2018_microwell_han_001.py | 32 +++++++------- .../mouse_bladder_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_bladder_2019_smartseq2_pisco_001.py | 30 ++++++------- .../mouse_brain_2018_microwell_han_001.py | 32 +++++++------- .../mouse_brain_2018_microwell_han_002.py | 32 +++++++------- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 32 +++++++------- .../mouse_brain_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_brain_2019_smartseq2_pisco_002.py | 32 +++++++------- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 30 ++++++------- .../mouse/fat/mouse_fat_2019_10x_pisco_001.py | 32 +++++++------- .../fat/mouse_fat_2019_smartseq2_pisco_001.py | 32 +++++++------- .../fat/mouse_fat_2019_smartseq2_pisco_002.py | 32 +++++++------- .../fat/mouse_fat_2019_smartseq2_pisco_003.py | 32 +++++++------- .../fat/mouse_fat_2019_smartseq2_pisco_004.py | 32 +++++++------- .../heart/mouse_heart_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_heart_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_heart_2019_smartseq2_pisco_002.py | 32 +++++++------- .../mouse_kidney_2018_microwell_han_001.py | 32 +++++++------- .../mouse_kidney_2018_microwell_han_002.py | 32 +++++++------- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_kidney_2019_smartseq2_pisco_001.py | 32 +++++++------- ...ouse_large_intestine_2019_10x_pisco_001.py | 32 +++++++------- ...arge_intestine_2019_smartseq2_pisco_001.py | 32 +++++++------- ...ouse_limb_muscle_2018_microwell_han_001.py | 32 +++++++------- .../mouse_limb_muscle_2019_10x_pisco_001.py | 32 +++++++------- ...se_limb_muscle_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_liver_2018_microwell_han_001.py | 32 +++++++------- .../mouse_liver_2018_microwell_han_002.py | 32 +++++++------- .../liver/mouse_liver_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_liver_2019_smartseq2_pisco_001.py | 32 +++++++------- .../lung/mouse_lung_2018_microwell_han_001.py | 32 +++++++------- .../lung/mouse_lung_2018_microwell_han_002.py | 32 +++++++------- .../lung/mouse_lung_2018_microwell_han_003.py | 32 +++++++------- .../lung/mouse_lung_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_lung_2019_smartseq2_pisco_001.py | 32 +++++++------- ...se_mammary_gland_2018_microwell_han_001.py | 32 +++++++------- ...se_mammary_gland_2018_microwell_han_002.py | 32 +++++++------- ...se_mammary_gland_2018_microwell_han_003.py | 32 +++++++------- ...se_mammary_gland_2018_microwell_han_004.py | 32 +++++++------- .../mouse_mammary_gland_2019_10x_pisco_001.py | 32 +++++++------- ..._mammary_gland_2019_smartseq2_pisco_001.py | 32 +++++++------- .../marrow/mouse_marrow_2018_microwell_001.py | 32 +++++++------- .../marrow/mouse_marrow_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_marrow_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_ovary_2018_microwell_han_001.py | 32 +++++++------- .../mouse_ovary_2018_microwell_han_002.py | 32 +++++++------- .../mouse_pancreas_2018_microwell_han_001.py | 32 +++++++------- .../mouse_pancreas_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_001.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_002.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_003.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_004.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_005.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_006.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_007.py | 32 +++++++------- .../mouse_pancreas_2019_10x_thompson_008.py | 32 +++++++------- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 32 +++++++------- ...peripheral_blood_2018_microwell_han_001.py | 32 +++++++------- ...peripheral_blood_2018_microwell_han_002.py | 32 +++++++------- ...peripheral_blood_2018_microwell_han_003.py | 32 +++++++------- ...peripheral_blood_2018_microwell_han_004.py | 32 +++++++------- ...peripheral_blood_2018_microwell_han_005.py | 32 +++++++------- .../mouse_placenta_2018_microwell_han_001.py | 32 +++++++------- .../mouse_placenta_2018_microwell_han_002.py | 32 +++++++------- .../mouse_prostate_2018_microwell_han_001.py | 32 +++++++------- .../mouse_prostate_2018_microwell_han_002.py | 32 +++++++------- .../rib/mouse_rib_2018_microwell_han_001.py | 32 +++++++------- .../rib/mouse_rib_2018_microwell_han_002.py | 32 +++++++------- .../rib/mouse_rib_2018_microwell_han_003.py | 32 +++++++------- .../skin/mouse_skin_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_skin_2019_smartseq2_pisco_001.py | 32 +++++++------- ..._small_intestine_2018_microwell_han_001.py | 32 +++++++------- ..._small_intestine_2018_microwell_han_002.py | 32 +++++++------- ..._small_intestine_2018_microwell_han_003.py | 32 +++++++------- .../mouse_spleen_2018_microwell_han_001.py | 32 +++++++------- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_spleen_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_stomach_2018_microwell_han_001.py | 32 +++++++------- .../mouse_testis_2018_microwell_han_001.py | 32 +++++++------- .../mouse_testis_2018_microwell_han_002.py | 32 +++++++------- .../mouse_thymus_2018_microwell_han_001.py | 32 +++++++------- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_thymus_2019_smartseq2_pisco_001.py | 32 +++++++------- .../tongue/mouse_tongue_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_tongue_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_trachea_2019_10x_pisco_001.py | 32 +++++++------- .../mouse_trachea_2019_smartseq2_pisco_001.py | 32 +++++++------- .../mouse_uterus_2018_microwell_han_001.py | 32 +++++++------- .../mouse_uterus_2018_microwell_han_002.py | 32 +++++++------- 238 files changed, 3329 insertions(+), 3328 deletions(-) diff --git a/sfaira/data/databases/cellxgene_loader.py b/sfaira/data/databases/cellxgene_loader.py index 7472c6c49..bb8ac7488 100644 --- a/sfaira/data/databases/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene_loader.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS, ADATA_IDS_CELLXGENE +from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE class Dataset(DatasetBase): @@ -41,35 +41,35 @@ def _load(self, fn=None): adata = anndata.read(fn) adata.X = adata.raw.X - self.adata.uns[ADATA_IDS.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] - self.adata.uns[ADATA_IDS.year] = adata.uns[ADATA_IDS_CELLXGENE.year] - self.adata.uns[ADATA_IDS.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] - if len(np.unique(adata.obs[ADATA_IDS.animal].values)) > 1: + self.adata.uns[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] + self.adata.uns[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_CELLXGENE.year] + self.adata.uns[ADATA_IDS_SFAIRA.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] + if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.animal].values)) > 1: raise Warning("found multiple assay in data set %s" % self.fn) - self.adata.uns[ADATA_IDS.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] # Select tissue: blood is handled as a separate tissue in .obs #if len(np.unique(adata.obs["tissue"].values)) > 1: # raise Warning("found multiple tissue in data set %s" % self.fn) #self.adata.uns["organ"] = adata.obs["tissue"].values[0] - self.adata.uns[ADATA_IDS.organ] = str(self.fn).split("_")[3] - if len(np.unique(adata.obs[ADATA_IDS.animal].values)) > 1: + self.adata.uns[ADATA_IDS_SFAIRA.organ] = str(self.fn).split("_")[3] + if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.animal].values)) > 1: raise Warning("found multiple organisms in data set %s" % self.fn) - self.adata.uns[ADATA_IDS.animal] = adata.obs[ADATA_IDS_CELLXGENE.animal].values[0] - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.animal] = adata.obs[ADATA_IDS_CELLXGENE.animal].values[0] + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.obs[ADATA_IDS.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values - self.adata.obs[ADATA_IDS.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values - self.adata.obs[ADATA_IDS.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values - self.adata.obs[ADATA_IDS.healthy] = adata.obs[ADATA_IDS_CELLXGENE.disease].values == ADATA_IDS_CELLXGENE.disease_state_healthy - self.adata.obs[ADATA_IDS.state_exact] = adata.obs[ADATA_IDS_CELLXGENE.disease].values + self.adata.obs[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values + self.adata.obs[ADATA_IDS_SFAIRA.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values + self.adata.obs[ADATA_IDS_SFAIRA.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = adata.obs[ADATA_IDS_CELLXGENE.disease].values == ADATA_IDS_CELLXGENE.disease_state_healthy + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = adata.obs[ADATA_IDS_CELLXGENE.disease].values - self.adata.obs[ADATA_IDS.cell_ontology_id] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_id].values.tolist() - self.adata.obs[ADATA_IDS.cell_ontology_class] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.cell_types_original] = adata.obs[ADATA_IDS_CELLXGENE.cell_types_original].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_id].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = adata.obs[ADATA_IDS_CELLXGENE.cell_types_original].values.tolist() self._convert_and_set_var_names( symbol_col=ADATA_IDS_CELLXGENE.gene_id_names, diff --git a/sfaira/data/databases/external.py b/sfaira/data/databases/external.py index 8b1378917..1437719c6 100644 --- a/sfaira/data/databases/external.py +++ b/sfaira/data/databases/external.py @@ -1 +1,2 @@ - +from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 7ad5536f2..797e89b0a 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adipose/hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 9b9dacb36..fafaca273 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 5c35b9b75..2e3b74baa 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index bbeee8651..ed5ab9d2a 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 6f2f49311..d7b45119e 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 6df07e9bc..c16c073ed 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index faa871a63..483a715df 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 6bdadaac1..b0719cbd4 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/artery/hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index 2811f3d52..01e560720 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index ba3bb5914..3dc3d15d5 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 4b8cb989d..cb35370e9 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bladder/hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index ff15c9e93..814fc8b27 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -62,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/ica_blood.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Regev' - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = None - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = None - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index db2dd4f1a..00c9c9de1 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -46,20 +46,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = '10x Genomics' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = None - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = '10x Genomics' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = None - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 5e53b4c14..da96a37c4 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index cbdf7ef67..a169af39e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 3169d1ffd..2a9d21da5 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index 1cbfcfcd2..b6b7998d2 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index e6d892389..0d283031f 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index a55342f9e..42ad29021 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index 4702724b9..88f8b86a5 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/blood/hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index 2f5271afd..aa826583b 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -62,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/ica_bone.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Regev' - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = None - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = None - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index d432f1479..cd2ff1b70 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 57032a6da..36fafd970 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index 928ff5744..49638cf96 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -63,20 +63,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Regev" - self.adata.uns[ADATA_IDS.year] = 2017 - self.adata.uns[ADATA_IDS.doi] = "10.1038/nmeth.4407" - self.adata.uns[ADATA_IDS.protocol] = 'DroNcSeq' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Regev" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2017 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/nmeth.4407" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index 28f7e2a86..58b431a22 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 5071dc165..0eae3d776 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 6e8b6b08f..78c2b1015 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index 8973f8a59..e937b0026 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index b0ae46267..0a7353ba3 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index c2becfdf5..96c599c0b 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -76,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/brain/hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 4f86c6e27..3191905b4 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/calvaria/hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 2cddd72b1..cfe091469 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/cervix/hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index 006762709..3c9d9c157 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/chorionicvillus/hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index a4f5a01ef..d7ee97c3c 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -128,23 +128,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/kinchenetal.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Simmons' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.08.067" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS.healthy] = [line == 'normal' for line in + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Simmons' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.08.067" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [line == 'normal' for line in self.adata.obs['donor_organism.diseases.ontology_label']] - self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') - self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs[ADATA_IDS.state_exact]\ + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[ADATA_IDS_SFAIRA.state_exact]\ .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 91d13e418..e1aea6674 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -87,20 +87,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Regev" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.06.029" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Regev" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.06.029" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index e55124ed9..a95aa5821 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -55,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Chen" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index 398e54066..c82964973 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -74,20 +74,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Teichmann" - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41590-020-0602-z" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Teichmann" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41590-020-0602-z" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['cell_type'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index f75d1929e..8fdcf216c 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -72,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 3e19e0c93..1a321bb85 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -72,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 87e022665..50ec658d3 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -72,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 4309ced25..c033cf2a2 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -72,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 1cb5fe30b..777283949 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/duodenum/hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index d0280631c..0b6236add 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/epityphlon/hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 91976a81c..63cd946a1 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -65,21 +65,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Meyer" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Meyer" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/741405" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Celltypes'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7413619', - new_index=ADATA_IDS.gene_id_ensembl) \ No newline at end of file + new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) \ No newline at end of file diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 20bb945da..6ffd42e15 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -69,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 60bca5620..2bfdac39d 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -69,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index af4036d6c..2cf6960ec 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -62,20 +62,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = 'Wong' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.15252/embj.2018100811' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Wong' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.15252/embj.2018100811' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index d2255098c..f040aa9a7 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -52,20 +52,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/menon19.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Hafler' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-12780-8' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Hafler' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-12780-8' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index e80ef522b..9910963d7 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -56,20 +56,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.author] = 'Mullins' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1073/pnas.1914143116' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mullins' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1073/pnas.1914143116' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index b9dd6df7f..f8925f88c 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -68,18 +68,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/eye/hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 22cd08a45..45ef1270f 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/fallopiantube/hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 2a228b857..1be28c8ab 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index b27286716..fafdca4a0 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 5b35f7b30..eb199409c 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/gallbladder/hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index f860dae11..c4c0e7ff5 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 5699855b4..d4d7a8cee 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 823e7a4b9..79d668bfe 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 9bd3f322f..188d88184 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index 41c1ee8de..af26edc9d 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/hesc/hcl_HESC_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 1eb5f2015..1d4ee7d4b 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -69,20 +69,20 @@ def _load(self, fn=None): .multiply(1/10000) self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - self.adata.uns[ADATA_IDS.author] = "Kenigsberg" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2019.08.008" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Kenigsberg" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.08.008" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 75cd14e3a..b22c1a004 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -55,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Chen" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index ecad424bb..0429f4b10 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -71,18 +71,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ileum/hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 5e458b010..75cbdaade 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -44,17 +44,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/jejunum/hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index c2fff3afc..418203a03 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -77,20 +77,20 @@ def _load(self, fn=None): annot = pd.read_csv(fn[1], index_col=0, dtype='category') self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.author] = 'Jain' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-019-10861-2' - self.adata.uns[ADATA_IDS.protocol] = '10xSn' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Jain' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-10861-2' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10xSn' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index 403a29edb..555910cd1 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -125,21 +125,21 @@ def _load(self, fn=None): self.adata = adult.concatenate(fetal) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.author] = 'Clatworthy' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1126/science.aat5031' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Clatworthy' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1126/science.aat5031' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["celltype"] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] self.adata.obs["cell_ontology_id"] = None - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index bccc80567..2030ee714 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -96,20 +96,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/GSE131685.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Mo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41597-019-0351-8' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41597-019-0351-8' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = None - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 39ec9a7bf..a7e25142a 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index 16669aa63..c3f78cd5e 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 860da838d..16695ac76 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index a9b28bc1c..d354fa368 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index d846a9f35..2d081f2f0 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 302085a48..ce1c84950 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index fe5c99d29..16544a729 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -94,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 1a53bc47f..5a0272a6d 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -70,20 +70,20 @@ def _load(self, fn=None): celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.author] = 'McGilvray' - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41467-018-06318-7' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'McGilvray' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-018-06318-7' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 624841b92..5cd1a0165 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -70,20 +70,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Haniffa' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1652-y' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Haniffa' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1652-y' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["cell.labels"] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index adae74f19..070736cab 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -72,20 +72,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/ramachandran.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Henderson' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1631-3' - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Henderson' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1631-3' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["annotation_lineage"] - self.adata.obs[ADATA_IDS.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] - self.adata.obs[ADATA_IDS.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index ccc41e021..42e4ea49c 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -90,20 +90,20 @@ def _load(self, fn=None): self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS.author] = 'Gruen' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-019-1373-2' - self.adata.uns[ADATA_IDS.protocol] = 'mCEL-Seq2' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Gruen' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1373-2' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index c62b58df1..6d03203c3 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -73,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index d28abc786..2037910ba 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -73,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 434632306..43e5116b5 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -73,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index c79a13d86..b3e3cc849 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -73,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 1a098706c..c5f0bfdcd 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -73,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/liver/hcl_Liver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 88fb112c2..563ffcf50 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.author] = 'Teichmann' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 5af29652e..667005ef2 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.author] = 'Teichmann' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 03c51288b..5f4c5012f 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -68,21 +68,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Meyer' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1186/s13059-019-1906-x" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Meyer' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1186/s13059-019-1906-x" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index ac01e3176..0e08d16da 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -62,21 +62,21 @@ def _load(self, fn=None): self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - self.adata.uns[ADATA_IDS.author] = 'Teichmann' - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS.protocol] = 'dropseq' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = [self.download_website, self.download_website_meta] - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'dropseq' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = [self.download_website, self.download_website_meta] + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'uninvolved areas of tumour resection material' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 06da0317c..e68b6283c 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -107,21 +107,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/habermann_processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Kropski' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1101/753806" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Kropski' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/753806" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = [i == 'Control' for i in self.adata.obs['Status']] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [i == 'Control' for i in self.adata.obs['Status']] self.adata.obs['state_exact'] = self.adata.obs['Diagnosis'].astype('category') - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index a693dd817..f562a1a34 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -57,21 +57,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = 'Eils' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Eils' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index c40785d6a..fd6da33f1 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -62,21 +62,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = 'Eils' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Eils' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index b3555b1de..7e6c61a83 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -73,21 +73,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = 'Spence' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.devcel.2020.01.033" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Spence' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.devcel.2020.01.033" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Cell_type'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index 2e35bdd39..666cdea5d 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -114,22 +114,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ .multiply(1 / 10000) - self.adata.uns[ADATA_IDS.author] = 'Krasnow' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs[ADATA_IDS.cell_ontology_class].astype('category') + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index e7b447626..bc4299812 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -95,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 117487c58..f548c278d 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -95,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 4698a543b..ba6ecd2da 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -95,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 53c040180..8474e6435 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -95,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 9b4e99a04..7af72e778 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -95,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/lung/hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 8ef7a81bb..e8f515208 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -101,22 +101,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ .multiply(1 / 1000000) - self.adata.uns[ADATA_IDS.author] = 'Krasnow' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1101/742320" - self.adata.uns[ADATA_IDS.protocol] = 'smartseq2' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'smartseq2' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs[ADATA_IDS.cell_ontology_class].astype('category') + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs['state_exact'] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index b483540f3..170912480 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -59,20 +59,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Cairns" - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41422-018-0099-2" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Cairns" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41422-018-0099-2" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index ac2c050ae..1a1430759 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -73,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 683eca580..343da82ca 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -73,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index c06146fe9..0f5275220 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -150,23 +150,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/mixed/GSE126030.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = "Sims" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41467-019-12464-3" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sims" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41467-019-12464-3" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs["subtissue"] = self.adata.obs["organ"] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession', new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) # If the subset_organs() method has been run before, subset to specified organs if "organsubset" in self.__dict__: diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index c5aebbc16..88e1abf7a 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index a89a595fc..ff1d80cab 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/muscle/hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 854e6e572..1f6bd0727 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 1ffce6d62..8ca0b7e01 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index f7080fdc0..6a9887e0b 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index 1c37d4eb0..c1cf35afd 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -62,19 +62,19 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Yanai" - self.adata.uns[ADATA_IDS.year] = 2016 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cels.2016.08.011" - self.adata.uns[ADATA_IDS.protocol] = 'inDrop' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Yanai" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2016 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cels.2016.08.011" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'inDrop' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index b724286b1..7f533cd97 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -69,21 +69,21 @@ def _load(self, fn=None): # filter observations which are not cells (empty wells, low quality cells etc.) self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - self.adata.uns[ADATA_IDS.author] = "Sandberg" - self.adata.uns[ADATA_IDS.year] = 2016 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2016.08.020" - self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sandberg" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2016 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2016.08.020" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] - self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') - self.adata.obs[ADATA_IDS.state_exact] = self.adata.obs[ADATA_IDS.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[ADATA_IDS_SFAIRA.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index 0e4b742ca..4f13c70c5 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -129,20 +129,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/GSE81547.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = 2017 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2017.09.004" - self.adata.uns[ADATA_IDS.protocol] = 'Smartseq2' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2017 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2017.09.004" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['celltype'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index ce97af838..681b7be90 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -83,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index ffc6c4e32..97a13d9f3 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -83,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 721687ae0..157d1abe2 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -83,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 221bcf3d5..20b79d86a 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -83,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index e6dda5a6f..7385e1a45 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -83,30 +83,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS.author] = 'Teichmann' - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['annotation'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index cedaa7c72..ed8505189 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -83,30 +83,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS.author] = 'Teichmann' - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[ADATA_IDS.protocol] = "Smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "Smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['annotation'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index 165754ab9..c288a8439 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -76,17 +76,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/placenta/hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index b581b182f..5336f2163 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/pleura/hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 8931a1f7b..29b39cdfd 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -56,20 +56,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Strand" - self.adata.uns[ADATA_IDS.year] = 2018 - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.celrep.2018.11.086" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Strand" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.celrep.2018.11.086" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index d54a69417..de42e8b13 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -65,18 +65,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/prostate/hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index 71dad32dd..b0b74bdd5 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -55,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Chen" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index f3d9a6bd4..b87ce3220 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -60,17 +60,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rectum/hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 14a6341ba..a50a31b5b 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index c7fd63078..c9a15e6c1 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/rib/hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 6a60655f0..fc618bc87 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -74,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index e2658caa5..f344e2577 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -74,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 41f7dd4a6..4eeb530e6 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spinalcord/hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 3fddbbba9..2baff592e 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -74,22 +74,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS.author] = "Meyer" - self.adata.uns[ADATA_IDS.year] = 2019 - self.adata.uns[ADATA_IDS.doi] = "10.1101/741405" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Meyer" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/741405" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Celltypes'] + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] self.set_unkown_class_id(ids=["Unknown"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7463846', - new_index=ADATA_IDS.gene_id_ensembl) + new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index 6ce8ad513..79de48740 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -66,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 39d6e459e..e436db28c 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -66,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 628e13254..4c64c46aa 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index 01f98e71c..d121d66f1 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index fa0141b50..baa3de5b3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index 3bd20a857..dd0827648 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index 992646eaf..d3281b286 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 681f4adf4..6e23f365e 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 5da38a27a..6f1664440 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 7ae696af3..87843f10a 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 408970e7b..731399f04 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index f194548a5..b856bacf2 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index fe532b4cc..4a9a5ac85 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -89,20 +89,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS.author] = "Teichmann" - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = "10.1126/science.aay3224" - self.adata.uns[ADATA_IDS.protocol] = '10x' - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Teichmann" + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1126/science.aay3224" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = 'healthy' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 50ea0ad46..2a749b1ec 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -58,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index a4efd187e..07ca1245d 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -58,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index be2fd0d8a..97c3daffb 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index d364560cd..6c218e9f2 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index 3a6ef7519..d25ebafab 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/trachea/hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 3defbf60a..cb9f9d582 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/ureter/hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 641881669..7ddf1a88b 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -44,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human/uterus/hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS.author] = 'Guo' - self.adata.uns[ADATA_IDS.year] = 2020 - self.adata.uns[ADATA_IDS.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "human" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' + self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 4b7a6b5e2..ac243bb11 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -61,21 +61,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index b3ddbfbf3..cc42fefb4 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index fe1309f01..e979b4aa6 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -51,19 +51,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index a7477c191..2e40f68d4 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -59,21 +59,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index e85596a12..ae2a7c217 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -59,21 +59,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index bbe9b8e34..2425a7614 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -65,23 +65,23 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col=ADATA_IDS.gene_id_names, ensembl_col=ADATA_IDS.gene_id_ensembl, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - self.adata.uns[ADATA_IDS.author] = "Movahedi" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1038/s41593-019-0393-4" - self.adata.uns[ADATA_IDS.protocol] = "microwell" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Movahedi" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41593-019-0393-4" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index a80d92f43..6cad9a8fb 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -54,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index c1ab8935a..44b27c9dc 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -54,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 7a184c1f9..635b54582 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -53,19 +53,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py index b76321d32..7d03ceb14 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py index 31b6189a7..6fac381bc 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py index ee821fd89..74545adb0 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py @@ -51,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py index a5998c978..b1f1ffa18 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py index b727b5365..d05a05985 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index 022b71553..91a38349b 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index 496832d0a..8c7317263 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index e8df358a5..d741ce9d0 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index 5fafa5267..61afd036e 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -45,21 +45,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index ade327075..79ce5f241 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -76,21 +76,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index 1a7afe1ee..31c75fe80 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -56,21 +56,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index c2966e4fe..5dd9f11b1 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -55,21 +55,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py index c2449b6a7..8ca29e546 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 0ff6b4522..0d6ced71f 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py index c348f753f..57f988119 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py @@ -62,22 +62,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py index 81027a49a..a83ef3883 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py @@ -51,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index c36a3c4f4..54e8cd275 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -51,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index 62d52862b..c27fbb977 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -63,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 7566ca230..456b8859a 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -57,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 5f07d8ffa..5a8eab646 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 28f70a88c..8fa24bf71 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 4f2f6e323..6a38082d7 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -77,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 1ea4176c2..0aae7fe45 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -77,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 142304ae6..7dc52d140 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -77,22 +77,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 127bda80c..d4e99c34c 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -53,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 96aa17622..9a2ef1966 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -53,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py index 01ca467f3..959cd5008 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py @@ -61,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py index f83d7570a..6de99ec52 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py @@ -61,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py index 8d198221c..b23c0de7e 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py @@ -61,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py index ad8d34841..cd7bac286 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py @@ -61,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py index 03bf5a49e..67cfc89c9 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py @@ -51,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index ad02d815a..14db17f54 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -51,20 +51,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py index ef1dcedc3..1a00f284b 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py @@ -60,22 +60,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs['Annotation'] - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs['Annotation'] + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py index 3bb45b84b..f9ed5dbdf 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py @@ -53,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py index 970256e11..97ecaf91e 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py @@ -53,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py index 1595b143f..792d94ef8 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py @@ -57,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py index cf5ec19e5..9ba1a47d1 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py @@ -57,21 +57,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index 075cb94d9..a66aa1d13 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -67,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index d15445430..1c39c2dd8 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 938320e1f..0f02a7c0c 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index 2c765e7c5..866c001a2 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 3594d9e61..229ba1616 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index d24636972..f1cc6da3f 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 676c61173..ea8b4a1fe 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index bf4ae4823..3759216ad 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 84ce5f079..3a51d770a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index 4fddb888e..cb5876758 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -58,21 +58,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS.author] = "Bhushan" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = celltypes + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS.healthy] = False - self.adata.obs[ADATA_IDS.state_exact] = "diabetic" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index 889e5bda6..d133975c8 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -55,20 +55,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py index 126a0f662..5ccedbcc9 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py @@ -66,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py index cb92bc8d2..6ff1477f4 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py @@ -66,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py index 381a925be..c3cf9e50f 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py @@ -66,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py index 1ab939bac..ab5fb7b4f 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py @@ -66,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index 1641c618e..a41eb9b8d 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -66,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 19848f06d..eff620438 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -73,21 +73,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 2dd2a070d..1cba7e492 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -73,21 +73,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index f20772195..f90eca390 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -51,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 99ce454ee..9fd5659b9 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -51,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index 6b537abc3..b460b4816 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -68,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 480301200..deac2063e 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -68,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 12552aa07..26752d193 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -68,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index bdeec61ed..1322194cf 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index 69f6e31e2..f64f20391 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py index 6f782d749..47ad87f2b 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py @@ -68,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py index d9ef4a7c7..d49957375 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py @@ -68,21 +68,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py index 2a2f2a23d..ac2a65c08 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py @@ -68,22 +68,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index b6417b69a..59f41fc0e 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -56,21 +56,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 012e3eb46..e7552ba2f 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 7fd4c9a51..d102df1d0 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -53,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 88f84e57a..bedeb0045 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -62,21 +62,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py index d60df3d93..8d462eae8 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py index 8e4d7491c..8171d6461 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index d8419d46b..aab2095cf 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -51,21 +51,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index 95a8a61a7..68a89417f 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -52,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index d44fd4b66..0138e1c0b 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -52,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 778a40711..25f8363c4 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index b16fbe013..af4d42e79 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -54,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py index 24da9e72c..35bb4a0f1 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py @@ -55,21 +55,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "10x" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py index 75f98dce5..21fc0ae33 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py @@ -54,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS.author] = "Quake" - self.adata.uns[ADATA_IDS.year] = "2019" - self.adata.uns[ADATA_IDS.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'norm' - # self.adata.obs[ADATA_IDS.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs[ADATA_IDS.cell_ontology_class].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 4af24dc90..664f1a835 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index b1cb13f75..eb7df0b36 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -64,21 +64,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS.author] = "Guo" - self.adata.uns[ADATA_IDS.year] = "2018" - self.adata.uns[ADATA_IDS.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS.organ] = self.organ - self.adata.uns[ADATA_IDS.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS.animal] = "mouse" - self.adata.uns[ADATA_IDS.id] = self.id - self.adata.uns[ADATA_IDS.wget_download] = self.download_website - self.adata.uns[ADATA_IDS.has_celltypes] = self.has_celltypes - self.adata.uns[ADATA_IDS.normalization] = 'raw' - self.adata.obs[ADATA_IDS.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS.healthy] = True - self.adata.obs[ADATA_IDS.state_exact] = "healthy" + self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) From 16e9aba2cd4bef67b1ebca07b16a6434faec6939 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 18:23:52 +0100 Subject: [PATCH 008/161] added constants based classses into api to improve interfacing to 3rd parties --- sfaira/api/consts.py | 2 +- sfaira/consts/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py index 8e2dcfe64..b552015f1 100644 --- a/sfaira/api/consts.py +++ b/sfaira/api/consts.py @@ -1 +1 @@ -from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE \ No newline at end of file +from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, ADATA_IDS_BASE, ADATA_IDS_EXTENDED diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 68c8acb49..b9fa9e2b1 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1 +1,2 @@ from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA +from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED From 60f5bce6ad41256b0ead15cf75d73f1abdc19f90 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:07:47 +0100 Subject: [PATCH 009/161] refactored lazy dataset properties and meta data objects lazy datasets now draw from either properties defined in constructor on available in a meta data file. meta data files are streamlined, both in loading and saving. --- sfaira/api/consts.py | 1 + sfaira/consts/__init__.py | 1 + sfaira/consts/adata_fields.py | 20 +- sfaira/consts/meta_data_files.py | 13 ++ sfaira/data/base.py | 203 +++++++++++++++--- sfaira/data/databases/__init__.py | 1 + sfaira/data/databases/cellxgene/__init__.py | 2 + .../{ => cellxgene}/cellxgene_group.py | 5 +- .../{ => cellxgene}/cellxgene_loader.py | 17 +- .../databases/{ => cellxgene}/external.py | 1 + sfaira/data/external.py | 2 +- .../human_adipose_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_002.py | 4 +- ...man_adrenalgland_2020_microwell_han_003.py | 4 +- ...man_adrenalgland_2020_microwell_han_004.py | 4 +- ...man_adrenalgland_2020_microwell_han_005.py | 4 +- ...man_adrenalgland_2020_microwell_han_006.py | 4 +- .../human_artery_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_002.py | 4 +- .../human_bladder_2020_microwell_han_003.py | 4 +- .../blood/human_blood_2018_10x_ica_001.py | 4 +- .../human_blood_2019_10x_10xGenomics_001.py | 4 +- .../human_blood_2020_microwell_han_001.py | 4 +- .../human_blood_2020_microwell_han_002.py | 4 +- .../human_blood_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_004.py | 4 +- .../human_blood_2020_microwell_han_005.py | 4 +- .../human_blood_2020_microwell_han_006.py | 4 +- .../human_blood_2020_microwell_han_007.py | 4 +- .../human/bone/human_bone_2018_10x_ica_001.py | 4 +- .../bone/human_bone_2020_microwell_han_001.py | 4 +- .../bone/human_bone_2020_microwell_han_002.py | 4 +- .../human_brain_2017_DroNcSeq_habib_001.py | 4 +- .../human_brain_2020_microwell_han_001.py | 4 +- .../human_brain_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_003.py | 4 +- .../human_brain_2020_microwell_han_004.py | 4 +- .../human_brain_2020_microwell_han_005.py | 4 +- .../human_brain_2020_microwell_han_006.py | 4 +- .../human_calvaria_2020_microwell_han_001.py | 4 +- .../human_cervix_2020_microwell_han_001.py | 4 +- ..._chorionicvillus_2020_microwell_han_001.py | 4 +- .../colon/human_colon_2019_10x_kinchen_001.py | 4 +- .../colon/human_colon_2019_10x_smilie_001.py | 4 +- .../colon/human_colon_2019_10x_wang_001.py | 4 +- .../colon/human_colon_2020_10x_james_001.py | 4 +- .../human_colon_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_002.py | 4 +- .../human_colon_2020_microwell_han_003.py | 4 +- .../human_colon_2020_microwell_han_004.py | 4 +- .../human_duodenum_2020_microwell_han_001.py | 4 +- ...human_epityphlon_2020_microwell_han_001.py | 4 +- .../human_esophagus_2019_10x_madissoon_001.py | 4 +- .../human_esophagus_2020_microwell_han_001.py | 4 +- .../human_esophagus_2020_microwell_han_002.py | 4 +- .../eye/human_eye_2019_10x_lukowski_001.py | 4 +- .../human/eye/human_eye_2019_10x_menon_001.py | 4 +- .../human/eye/human_eye_2019_10x_voigt_001.py | 4 +- .../eye/human_eye_2020_microwell_han_001.py | 4 +- ...an_fallopiantube_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_002.py | 4 +- ...uman_gallbladder_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_002.py | 4 +- .../human_heart_2020_microwell_han_003.py | 4 +- .../human_heart_2020_microwell_han_004.py | 4 +- .../hesc/human_hesc_2020_microwell_han_001.py | 4 +- .../ileum/human_ileum_2019_10x_martin_001.py | 4 +- .../ileum/human_ileum_2019_10x_wang_001.py | 4 +- .../human_ileum_2020_microwell_han_001.py | 4 +- .../human_jejunum_2020_microwell_han_001.py | 4 +- .../human_kidney_2019_10xSn_lake_001.py | 4 +- .../human_kidney_2019_10x_stewart_001.py | 4 +- .../kidney/human_kidney_2020_10x_liao_001.py | 4 +- .../human_kidney_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_002.py | 4 +- .../human_kidney_2020_microwell_han_003.py | 4 +- .../human_kidney_2020_microwell_han_004.py | 4 +- .../human_kidney_2020_microwell_han_005.py | 4 +- .../human_kidney_2020_microwell_han_006.py | 4 +- .../human_kidney_2020_microwell_han_007.py | 4 +- .../human_liver_2018_10x_macparland_001.py | 4 +- .../liver/human_liver_2019_10x_popescu_001.py | 4 +- .../human_liver_2019_10x_ramachandran_001.py | 4 +- .../human_liver_2019_mCELSeq2_aizarani_001.py | 4 +- .../human_liver_2020_microwell_han_001.py | 4 +- .../human_liver_2020_microwell_han_002.py | 4 +- .../human_liver_2020_microwell_han_003.py | 4 +- .../human_liver_2020_microwell_han_004.py | 4 +- .../human_liver_2020_microwell_han_005.py | 4 +- .../lung/human_lung_2019_10x_braga_001.py | 4 +- .../lung/human_lung_2019_10x_braga_002.py | 4 +- .../lung/human_lung_2019_10x_madissoon_001.py | 4 +- .../lung/human_lung_2019_dropseq_braga_003.py | 4 +- .../lung/human_lung_2020_10x_habermann_001.py | 4 +- .../lung/human_lung_2020_10x_lukassen_001.py | 4 +- .../lung/human_lung_2020_10x_lukassen_002.py | 4 +- .../lung/human_lung_2020_10x_miller_001.py | 4 +- .../human_lung_2020_10x_travaglini_001.py | 4 +- .../lung/human_lung_2020_microwell_han_001.py | 4 +- .../lung/human_lung_2020_microwell_han_002.py | 4 +- .../lung/human_lung_2020_microwell_han_003.py | 4 +- .../lung/human_lung_2020_microwell_han_004.py | 4 +- .../lung/human_lung_2020_microwell_han_005.py | 4 +- ...uman_lung_2020_smartseq2_travaglini_002.py | 4 +- .../human_malegonad_2018_10x_guo_001.py | 4 +- .../human_malegonad_2020_microwell_han_001.py | 4 +- .../human_malegonad_2020_microwell_han_002.py | 4 +- .../mixed/human_mixed_2019_10x_szabo_001.py | 4 +- .../human_muscle_2020_microwell_han_001.py | 4 +- .../human_muscle_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_001.py | 4 +- .../human_omentum_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_003.py | 4 +- .../human_pancreas_2016_indrop_baron_001.py | 4 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 4 +- .../human_pancreas_2017_smartseq2_enge_001.py | 4 +- .../human_pancreas_2020_microwell_han_001.py | 4 +- .../human_pancreas_2020_microwell_han_002.py | 4 +- .../human_pancreas_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_004.py | 4 +- .../human_placenta_2018_10x_ventotormo_001.py | 4 +- ..._placenta_2018_smartseq2_ventotormo_001.py | 4 +- .../human_placenta_2020_microwell_han_001.py | 4 +- .../human_pleura_2020_microwell_han_001.py | 4 +- .../human_prostate_2018_10x_henry_001.py | 4 +- .../human_prostate_2020_microwell_han_001.py | 4 +- .../rectum/human_rectum_2019_10x_wang_001.py | 4 +- .../human_rectum_2020_microwell_han_001.py | 4 +- .../rib/human_rib_2020_microwell_han_001.py | 4 +- .../rib/human_rib_2020_microwell_han_002.py | 4 +- .../skin/human_skin_2020_microwell_han_001.py | 4 +- .../skin/human_skin_2020_microwell_han_002.py | 4 +- ...human_spinalcord_2020_microwell_han_001.py | 4 +- .../human_spleen_2019_10x_madissoon_001.py | 4 +- .../human_spleen_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_001.py | 4 +- .../human_stomach_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_003.py | 4 +- .../human_stomach_2020_microwell_han_004.py | 4 +- .../human_stomach_2020_microwell_han_005.py | 4 +- .../human_stomach_2020_microwell_han_006.py | 4 +- .../human_stomach_2020_microwell_han_007.py | 4 +- .../human_stomach_2020_microwell_han_008.py | 4 +- .../human_stomach_2020_microwell_han_009.py | 4 +- .../human_stomach_2020_microwell_han_010.py | 4 +- .../thymus/human_thymus_2020_10x_park_001.py | 4 +- .../human_thymus_2020_microwell_han_001.py | 4 +- .../human_thymus_2020_microwell_han_002.py | 4 +- .../human_thyroid_2020_microwell_han_001.py | 4 +- .../human_thyroid_2020_microwell_han_002.py | 4 +- .../human_trachea_2020_microwell_han_001.py | 4 +- .../human_ureter_2020_microwell_han_001.py | 4 +- .../human_uterus_2020_microwell_han_001.py | 4 +- .../mouse_bladder_2018_microwell_han_001.py | 4 +- .../mouse_bladder_2019_10x_pisco_001.py | 4 +- .../mouse_bladder_2019_smartseq2_pisco_001.py | 4 +- .../mouse_brain_2018_microwell_han_001.py | 4 +- .../mouse_brain_2018_microwell_han_002.py | 4 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 4 +- .../mouse_brain_2019_smartseq2_pisco_001.py | 4 +- .../mouse_brain_2019_smartseq2_pisco_002.py | 4 +- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 4 +- .../mouse/fat/mouse_fat_2019_10x_pisco_001.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_001.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_002.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_003.py | 4 +- .../fat/mouse_fat_2019_smartseq2_pisco_004.py | 4 +- .../heart/mouse_heart_2019_10x_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_002.py | 4 +- .../mouse_kidney_2018_microwell_han_001.py | 4 +- .../mouse_kidney_2018_microwell_han_002.py | 4 +- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 4 +- .../mouse_kidney_2019_smartseq2_pisco_001.py | 4 +- ...ouse_large_intestine_2019_10x_pisco_001.py | 4 +- ...arge_intestine_2019_smartseq2_pisco_001.py | 4 +- ...ouse_limb_muscle_2018_microwell_han_001.py | 4 +- .../mouse_limb_muscle_2019_10x_pisco_001.py | 4 +- ...se_limb_muscle_2019_smartseq2_pisco_001.py | 4 +- .../mouse_liver_2018_microwell_han_001.py | 4 +- .../mouse_liver_2018_microwell_han_002.py | 4 +- .../liver/mouse_liver_2019_10x_pisco_001.py | 4 +- .../mouse_liver_2019_smartseq2_pisco_001.py | 4 +- .../lung/mouse_lung_2018_microwell_han_001.py | 4 +- .../lung/mouse_lung_2018_microwell_han_002.py | 4 +- .../lung/mouse_lung_2018_microwell_han_003.py | 4 +- .../lung/mouse_lung_2019_10x_pisco_001.py | 4 +- .../mouse_lung_2019_smartseq2_pisco_001.py | 4 +- ...se_mammary_gland_2018_microwell_han_001.py | 4 +- ...se_mammary_gland_2018_microwell_han_002.py | 4 +- ...se_mammary_gland_2018_microwell_han_003.py | 4 +- ...se_mammary_gland_2018_microwell_han_004.py | 4 +- .../mouse_mammary_gland_2019_10x_pisco_001.py | 4 +- ..._mammary_gland_2019_smartseq2_pisco_001.py | 4 +- .../marrow/mouse_marrow_2018_microwell_001.py | 4 +- .../marrow/mouse_marrow_2019_10x_pisco_001.py | 4 +- .../mouse_marrow_2019_smartseq2_pisco_001.py | 4 +- .../mouse_ovary_2018_microwell_han_001.py | 4 +- .../mouse_ovary_2018_microwell_han_002.py | 4 +- .../mouse_pancreas_2018_microwell_han_001.py | 4 +- .../mouse_pancreas_2019_10x_pisco_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_002.py | 4 +- .../mouse_pancreas_2019_10x_thompson_003.py | 4 +- .../mouse_pancreas_2019_10x_thompson_004.py | 4 +- .../mouse_pancreas_2019_10x_thompson_005.py | 4 +- .../mouse_pancreas_2019_10x_thompson_006.py | 4 +- .../mouse_pancreas_2019_10x_thompson_007.py | 4 +- .../mouse_pancreas_2019_10x_thompson_008.py | 4 +- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 4 +- ...peripheral_blood_2018_microwell_han_001.py | 4 +- ...peripheral_blood_2018_microwell_han_002.py | 4 +- ...peripheral_blood_2018_microwell_han_003.py | 4 +- ...peripheral_blood_2018_microwell_han_004.py | 4 +- ...peripheral_blood_2018_microwell_han_005.py | 4 +- .../mouse_placenta_2018_microwell_han_001.py | 4 +- .../mouse_placenta_2018_microwell_han_002.py | 4 +- .../mouse_prostate_2018_microwell_han_001.py | 4 +- .../mouse_prostate_2018_microwell_han_002.py | 4 +- .../rib/mouse_rib_2018_microwell_han_001.py | 4 +- .../rib/mouse_rib_2018_microwell_han_002.py | 4 +- .../rib/mouse_rib_2018_microwell_han_003.py | 4 +- .../skin/mouse_skin_2019_10x_pisco_001.py | 4 +- .../mouse_skin_2019_smartseq2_pisco_001.py | 4 +- ..._small_intestine_2018_microwell_han_001.py | 4 +- ..._small_intestine_2018_microwell_han_002.py | 4 +- ..._small_intestine_2018_microwell_han_003.py | 4 +- .../mouse_spleen_2018_microwell_han_001.py | 4 +- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 4 +- .../mouse_spleen_2019_smartseq2_pisco_001.py | 4 +- .../mouse_stomach_2018_microwell_han_001.py | 4 +- .../mouse_testis_2018_microwell_han_001.py | 4 +- .../mouse_testis_2018_microwell_han_002.py | 4 +- .../mouse_thymus_2018_microwell_han_001.py | 4 +- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 4 +- .../mouse_thymus_2019_smartseq2_pisco_001.py | 4 +- .../tongue/mouse_tongue_2019_10x_pisco_001.py | 4 +- .../mouse_tongue_2019_smartseq2_pisco_001.py | 4 +- .../mouse_trachea_2019_10x_pisco_001.py | 4 +- .../mouse_trachea_2019_smartseq2_pisco_001.py | 4 +- .../mouse_uterus_2018_microwell_han_001.py | 4 +- .../mouse_uterus_2018_microwell_han_002.py | 4 +- 247 files changed, 678 insertions(+), 532 deletions(-) create mode 100644 sfaira/consts/meta_data_files.py create mode 100644 sfaira/data/databases/__init__.py create mode 100644 sfaira/data/databases/cellxgene/__init__.py rename sfaira/data/databases/{ => cellxgene}/cellxgene_group.py (82%) rename sfaira/data/databases/{ => cellxgene}/cellxgene_loader.py (86%) rename sfaira/data/databases/{ => cellxgene}/external.py (73%) diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py index b552015f1..00cc5cb17 100644 --- a/sfaira/api/consts.py +++ b/sfaira/api/consts.py @@ -1 +1,2 @@ from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, ADATA_IDS_BASE, ADATA_IDS_EXTENDED +from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index b9fa9e2b1..15d95e718 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,2 +1,3 @@ from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED +from sfaira.consts.meta_data_files import META_DATA_FIELDS diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index ccc84e12e..d94b3b51e 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -4,29 +4,33 @@ class ADATA_IDS_BASE: Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ _animal: str + _annotated: str _author: str _cell_types_original: str _cell_ontology_class: str _cell_ontology_id: str _doi: str + _download: str _dataset: str _dataset_group: str _gene_id_ensembl: str _gene_id_names: str - _has_celltypes: str _healthy: str _id: str _normalization: str _organ: str _protocol: str _subtissue: str - _wget_download: str _year: str @property def animal(self): return self._animal + @property + def annotated(self): + return self._annotated + @property def author(self): return self._author @@ -55,6 +59,10 @@ def dataset_group(self): def doi(self): return self._doi + @property + def download(self): + return self._download + @property def gene_id_ensembl(self): return self._gene_id_ensembl @@ -63,10 +71,6 @@ def gene_id_ensembl(self): def gene_id_names(self): return self._gene_id_names - @property - def has_celltypes(self): - return self._has_celltypes - @property def healthy(self): return self._healthy @@ -91,10 +95,6 @@ def organ(self): def subtissue(self): return self._subtissue - @property - def wget_download(self): - return self._wget_download - @property def year(self): return self._year diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py new file mode 100644 index 000000000..09f686a12 --- /dev/null +++ b/sfaira/consts/meta_data_files.py @@ -0,0 +1,13 @@ +META_DATA_FIELDS = [ + "annotated", + "author", + "doi", + "download", + "id", + "ncells", + "normalization", + "organ", + "protocol", + "species", + "year", +] diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 145fa5b3e..f76653dc6 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -11,7 +11,7 @@ import warnings from .external import SuperGenomeContainer -from .external import ADATA_IDS_SFAIRA +from .external import ADATA_IDS_SFAIRA, META_DATA_FIELDS class DatasetBase(abc.ABC): @@ -19,38 +19,37 @@ class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict meta: Union[None, pandas.DataFrame] - download_website: Union[None, str] download_website_meta: Union[None, str] path: Union[None, str] id: Union[None, str] - download_website: Union[None, str] - organ: Union[None, str] - sub_tissue: Union[None, str] - has_celltypes: Union[None, bool] - species: Union[None, str] genome: Union[None, str] + _annotated: str + _author: str + _doi: str + _download: str + _id: str + _ncells: str + _normalization: str + _organ: str + _protocol: str + _species: str + _year: str + def __init__( self, path: Union[str, None] = None, meta_path: Union[str, None] = None, **kwargs ): - self.species = None self.adata = None self.download_website_meta = None - self.id = None - self.download_website = None - self.organ = None - self.sub_tissue = None - self.has_celltypes = None self.meta = None self.genome = None self.path = path self.meta_path = meta_path self._load_raw = None - @abc.abstractmethod def _load(self, fn): pass @@ -395,10 +394,6 @@ def _set_genome(self, self.genome_container = g - @property - def doi_cleaned_id(self): - return "_".join(self.id.split("_")[:-1]) - def load_meta(self, fn: Union[PathLike, str]): if fn is None: if self.meta_path is None: @@ -407,13 +402,141 @@ def load_meta(self, fn: Union[PathLike, str]): else: if isinstance(fn, str): fn = os.path.normpath(fn) - self.meta = pandas.read_csv(fn) + self.meta = pandas.read_csv(fn, usecols=META_DATA_FIELDS) @property - def ncells(self): - if self.meta is None: - self.load_meta(fn=None) - return int(self.meta["ncells"]) + def author(self): + if self._author is not None: + return self._author + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["author"] + + @author.setter + def author(self, x): + self._author = x + + @property + def doi(self): + if self._doi is not None: + return self._doi + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["doi"] + + @doi.setter + def doi(self, x): + self._doi = x + + @property + def download(self): + if self._download is not None: + return self._download + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["download"] + + @download.setter + def download(self, x): + self._download = x + + @property + def annotated(self): + if self._annotated is not None: + return self._annotated + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["annotated"] + + @annotated.setter + def annotated(self, x): + self._annotated = x + + @property + def id(self): + if self._id is not None: + return self._id + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["id"] + + @id.setter + def id(self, x): + self._id = x + + @property + def normalization(self): + if self._normalization is not None: + return self._normalization + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["normalization"] + + @normalization.setter + def normalization(self, x): + self._normalization = x + + @property + def organ(self): + if self._organ is not None: + return self._organ + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["organ"] + + @organ.setter + def organ(self, x): + self._organ = x + + @property + def protocol(self): + if self._protocol is not None: + return self._protocol + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["protocol"] + + @protocol.setter + def protocol(self, x): + self._protocol = x + + @property + def species(self): + if self._species is not None: + return self._species + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["species"] + + @species.setter + def species(self, x): + self._species = x + + @property + def year(self): + if self._year is not None: + return self._year + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta["year"] + + @year.setter + def year(self, x): + self._year = x + + @property + def doi_cleaned_id(self): + return "_".join(self.id.split("_")[:-1]) def write_meta( self, @@ -430,16 +553,17 @@ def write_meta( if self.adata is None: self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ - "ncells": self.adata.n_obs, "animal": self.adata.uns[ADATA_IDS_SFAIRA.animal], - "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], - "subtissue": self.adata.uns[ADATA_IDS_SFAIRA.subtissue], + "author": self.adata.uns[ADATA_IDS_SFAIRA.author], + "annotated": self.adata.uns[ADATA_IDS_SFAIRA.annotated], + "doi": self.adata.uns[ADATA_IDS_SFAIRA.doi], + "download": self.adata.uns[ADATA_IDS_SFAIRA.download], "id": self.adata.uns[ADATA_IDS_SFAIRA.id], - "lab": self.adata.uns[ADATA_IDS_SFAIRA.author], - "year": self.adata.uns[ADATA_IDS_SFAIRA.year], + "ncells": self.adata.n_obs, + "normalization": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, + "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], - "counts": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - "has_celltypes": self.has_celltypes + "year": self.adata.uns[ADATA_IDS_SFAIRA.year], }, index=range(1)) meta.to_csv(fn_meta) @@ -485,6 +609,15 @@ def map_ontology_class( for x in raw_ids ] + @property + def citation(self): + """ + Return all information necessary to cite data set. + + :return: + """ + return [self.author, self.year, self.doi] + class DatasetGroupBase(abc.ABC): """ @@ -526,7 +659,7 @@ def load_all( :return: """ for i in self.ids: - if self.datasets[i].has_celltypes or not annotated_only: + if self.datasets[i].annotated or not annotated_only: self.datasets[i].load( celltype_version=self.format_type_version(celltype_version), remove_gene_version=remove_gene_version, @@ -561,7 +694,7 @@ def load_all_tobacked( keys.append(x) for i, id in enumerate(self.ids): # if this is for celltype prediction, only load the data with have celltype annotation - if self.datasets[id].has_celltypes or not annotated_only: + if self.datasets[id].annotated or not annotated_only: self.datasets[id].load_tobacked( adata_backed=adata_backed, genome=genome, @@ -598,7 +731,7 @@ def adata(self): adata.obs[ADATA_IDS_SFAIRA.normalization] = adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.uns[ADATA_IDS_SFAIRA.dev_stage] - adata.obs[ADATA_IDS_SFAIRA.has_celltypes] = adata.uns[ADATA_IDS_SFAIRA.has_celltypes] + adata.obs[ADATA_IDS_SFAIRA.annotated] = adata.uns[ADATA_IDS_SFAIRA.annotated] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: # Fix 1: @@ -614,7 +747,7 @@ def adata(self): ADATA_IDS_SFAIRA.subtissue, ADATA_IDS_SFAIRA.normalization, ADATA_IDS_SFAIRA.dev_stage, - ADATA_IDS_SFAIRA.has_celltypes, + ADATA_IDS_SFAIRA.annotated, "mapped_features" ] for k in list(adata.uns.keys()): @@ -872,7 +1005,7 @@ def load_all_tobacked( ADATA_IDS_SFAIRA.state_exact, ADATA_IDS_SFAIRA.normalization, ADATA_IDS_SFAIRA.dev_stage, - ADATA_IDS_SFAIRA.has_celltypes, + ADATA_IDS_SFAIRA.annotated, ADATA_IDS_SFAIRA.dataset ] if scatter_update: diff --git a/sfaira/data/databases/__init__.py b/sfaira/data/databases/__init__.py new file mode 100644 index 000000000..89402624a --- /dev/null +++ b/sfaira/data/databases/__init__.py @@ -0,0 +1 @@ +from sfaira.data.databases.cellxgene import DatasetCellxgene, DatasetGroupCellxgene diff --git a/sfaira/data/databases/cellxgene/__init__.py b/sfaira/data/databases/cellxgene/__init__.py new file mode 100644 index 000000000..ac116b424 --- /dev/null +++ b/sfaira/data/databases/cellxgene/__init__.py @@ -0,0 +1,2 @@ +from sfaira.data.databases.cellxgene.cellxgene_loader import DatasetCellxgene +from sfaira.data.databases.cellxgene.cellxgene_group import DatasetGroupCellxgene \ No newline at end of file diff --git a/sfaira/data/databases/cellxgene_group.py b/sfaira/data/databases/cellxgene/cellxgene_group.py similarity index 82% rename from sfaira/data/databases/cellxgene_group.py rename to sfaira/data/databases/cellxgene/cellxgene_group.py index 6468ef844..4fd786d51 100644 --- a/sfaira/data/databases/cellxgene_group.py +++ b/sfaira/data/databases/cellxgene/cellxgene_group.py @@ -1,9 +1,10 @@ +import pandas as pd import os from typing import Union from .external import DatasetGroupBase -from .cellxgene_loader import Dataset +from .cellxgene_loader import DatasetCellxgene class DatasetGroupCellxgene(DatasetGroupBase): @@ -16,7 +17,7 @@ def __init__( fn_ls = os.listdir(path) fn_ls = [x for x in fn_ls if x in self.accepted_file_names] datasets = [ - Dataset(path=path, fn=x, meta_path=meta_path) + DatasetCellxgene(path=path, fn=x, meta_path=meta_path) for x in fn_ls ] keys = [x.id for x in datasets] diff --git a/sfaira/data/databases/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py similarity index 86% rename from sfaira/data/databases/cellxgene_loader.py rename to sfaira/data/databases/cellxgene/cellxgene_loader.py index bb8ac7488..d506653d0 100644 --- a/sfaira/data/databases/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene/cellxgene_loader.py @@ -6,7 +6,7 @@ from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE -class Dataset(DatasetBase): +class DatasetCellxgene(DatasetBase): """ This is a dataloader for downloaded h5ad from cellxgene. @@ -24,14 +24,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.fn = fn - # TODO from meta: - self.species = str(fn).split("_")[2] - self.id = str(fn).split(".")[0] - self.organ = str(fn).split("_")[3] - self.sub_tissue = None - self.download_website = None # TODO - self.has_celltypes = True - + self.load_meta() self.class_maps = { "0": {}, } @@ -56,11 +49,11 @@ def _load(self, fn=None): raise Warning("found multiple organisms in data set %s" % self.fn) self.adata.uns[ADATA_IDS_SFAIRA.animal] = adata.obs[ADATA_IDS_CELLXGENE.animal].values[0] self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.annotated self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.obs[ADATA_IDS_SFAIRA.subtissue] = adata.obs[ADATA_IDS_CELLXGENE.subtissue].values self.adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values self.adata.obs[ADATA_IDS_SFAIRA.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values self.adata.obs[ADATA_IDS_SFAIRA.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values diff --git a/sfaira/data/databases/external.py b/sfaira/data/databases/cellxgene/external.py similarity index 73% rename from sfaira/data/databases/external.py rename to sfaira/data/databases/cellxgene/external.py index 1437719c6..5caf39357 100644 --- a/sfaira/data/databases/external.py +++ b/sfaira/data/databases/cellxgene/external.py @@ -1,2 +1,3 @@ from sfaira.data import DatasetBase, DatasetGroupBase from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE +from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/data/external.py b/sfaira/data/external.py index 55e607c98..3ef466858 100644 --- a/sfaira/data/external.py +++ b/sfaira/data/external.py @@ -1,2 +1,2 @@ from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.consts import ADATA_IDS_SFAIRA +from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 797e89b0a..bb4f114a1 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index fafaca273..6565e189b 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 2e3b74baa..c5ef69cb4 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index ed5ab9d2a..58b85716b 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index d7b45119e..002d7a9ea 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index c16c073ed..9bde4e591 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 483a715df..0c55b0674 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index b0719cbd4..4585e7132 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index 01e560720..c741d6356 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 3dc3d15d5..22d7d6da1 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index cb35370e9..5bf1e331f 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index 814fc8b27..b5151c1bb 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 00c9c9de1..2731d52a2 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -54,8 +54,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index da96a37c4..b6ed12462 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index a169af39e..670222385 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 2a9d21da5..7e458b379 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index b6b7998d2..b741d005e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 0d283031f..d4ab3e10c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index 42ad29021..ebebd234c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index 88f8b86a5..8f66d62f7 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index aa826583b..c6fdb32fa 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index cd2ff1b70..b4de3c370 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 36fafd970..56a645ea6 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index 49638cf96..212f5d3e3 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -71,8 +71,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index 58b431a22..4b1eb916d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 0eae3d776..81a4e19c6 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 78c2b1015..6bad62df3 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index e937b0026..2e399aa26 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index 0a7353ba3..a54ebfb5c 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index 96c599c0b..cc470274a 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 3191905b4..2acfc4d77 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index cfe091469..73e3280b1 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index 3c9d9c157..f9dcd6e07 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index d7ee97c3c..665546536 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -136,8 +136,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index e1aea6674..4bd685fbf 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -95,8 +95,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index a95aa5821..0cba30892 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index c82964973..564759558 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -82,8 +82,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index 8fdcf216c..e6cf51fc1 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -80,8 +80,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 1a321bb85..2fe4de69f 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -80,8 +80,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 50ec658d3..762222ffa 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -80,8 +80,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index c033cf2a2..5f508546d 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -80,8 +80,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 777283949..18f7b6682 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index 0b6236add..d2fd22c3e 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 63cd946a1..a923b843b 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -73,8 +73,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 6ffd42e15..8fe6b4ed5 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -77,8 +77,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 2bfdac39d..a2cd07343 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -77,8 +77,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index 2cf6960ec..c9b54aec6 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index f040aa9a7..44de32331 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -60,8 +60,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 9910963d7..bdc7b56ec 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -64,8 +64,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index f8925f88c..f1e0ac3cc 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 45ef1270f..3d6af5aca 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 1be28c8ab..6cc78818b 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index fafdca4a0..f5ba480b9 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index eb199409c..244258326 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index c4c0e7ff5..fbc251514 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index d4d7a8cee..8eb85ffbd 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 79d668bfe..45568d6f8 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 188d88184..9acbb98bf 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index af26edc9d..77b9324d5 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 1d4ee7d4b..9e50888ce 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -77,8 +77,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index b22c1a004..9671e73b2 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 0429f4b10..5c2fe272a 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -79,8 +79,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 75cbdaade..54aa2165d 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index 418203a03..254dcaf0f 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -85,8 +85,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index 555910cd1..61e39c00c 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -133,8 +133,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index 2030ee714..93534736b 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -104,8 +104,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index a7e25142a..b2cb54571 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index c3f78cd5e..d174bfa5e 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 16695ac76..e77163b10 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index d354fa368..8879259bf 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 2d081f2f0..bd5e6fec8 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index ce1c84950..940814ec5 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 16544a729..25ae01e2d 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -102,8 +102,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 5a0272a6d..19403f3df 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -78,8 +78,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 5cd1a0165..2e789fceb 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -78,8 +78,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index 070736cab..ae339304b 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -80,8 +80,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 42e4ea49c..5d0773c5e 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -98,8 +98,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 6d03203c3..282799135 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 2037910ba..8ae7f12f3 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 43e5116b5..73d5fdfe5 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index b3e3cc849..1aa7f2002 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index c5f0bfdcd..b0097a22d 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 563ffcf50..3f5015f62 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 667005ef2..2238529ad 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 5f4c5012f..13bd19a70 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index 0e08d16da..afcbb8e04 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = [self.download_website, self.download_website_meta] - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index e68b6283c..8a5cb9bd9 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -115,8 +115,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index f562a1a34..02a36a0b5 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -65,8 +65,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index fd6da33f1..5ccd1132c 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index 7e6c61a83..33146cc53 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index 666cdea5d..ec70b910d 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -122,8 +122,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index bc4299812..9213ba0cb 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -103,8 +103,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index f548c278d..61cd4080c 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -103,8 +103,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index ba6ecd2da..72c6e1dc1 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -103,8 +103,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 8474e6435..b39130ead 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -103,8 +103,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 7af72e778..904692b7b 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -103,8 +103,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index e8f515208..99e57a332 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -109,8 +109,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 170912480..83de8d248 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -67,8 +67,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index 1a1430759..05d105b8e 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 343da82ca..3533f6eb2 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 0f5275220..762a3970b 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -158,8 +158,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs["subtissue"] = self.adata.obs["organ"] diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index 88e1abf7a..70fd853ec 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index ff1d80cab..7c3d0d028 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 1f6bd0727..cc165c7d6 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 8ca0b7e01..72dcabf37 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index 6a9887e0b..f7c4c7c8f 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index c1cf35afd..5201c90e3 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index 7f533cd97..bdc03b1dc 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -77,8 +77,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index 4f13c70c5..b1dd43011 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -137,8 +137,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 681b7be90..e0f55ce0b 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 97a13d9f3..bfcdc28a6 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 157d1abe2..8607345a0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 20b79d86a..2994104d3 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 7385e1a45..8eca32560 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index ed8505189..fd36741f8 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -91,8 +91,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index c288a8439..e70044c26 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 5336f2163..a880ebb6a 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 29b39cdfd..21f280460 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -64,8 +64,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index de42e8b13..28f351d08 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -73,8 +73,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index b0b74bdd5..a294a07a0 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index b87ce3220..a2e450ef9 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -68,8 +68,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index a50a31b5b..a32a85f99 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index c9a15e6c1..aac37606d 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index fc618bc87..7599c8e13 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -82,8 +82,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index f344e2577..15eaf4209 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -82,8 +82,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 4eeb530e6..847198cb1 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 2baff592e..817de894b 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -82,8 +82,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index 79de48740..b1ea2dd2f 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index e436db28c..e05d0eb16 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 4c64c46aa..efdacf67d 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index d121d66f1..2a9edcd78 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index baa3de5b3..d037dbdde 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index dd0827648..7e3030fb3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index d3281b286..c1518688b 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 6e23f365e..809dff1b8 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 6f1664440..adfed6896 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 87843f10a..fbdd72452 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 731399f04..b00ef1487 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index b856bacf2..8db9c7220 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index 4a9a5ac85..5db839de1 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -97,8 +97,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 2a749b1ec..ca30b30ae 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index 07ca1245d..d798d6cc8 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 97c3daffb..24ebf7942 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 6c218e9f2..d133951b7 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index d25ebafab..b3313e97c 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index cb9f9d582..71b3c24e3 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 7ddf1a88b..39a89fd08 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns["dev_stage"] = self.dev_stage diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index ac243bb11..9eac4bd73 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -69,8 +69,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index cc42fefb4..0755d85a0 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index e979b4aa6..c486f3836 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index 2e40f68d4..288cff59c 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -67,8 +67,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index ae2a7c217..cf6a8f5b8 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -67,8 +67,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 2425a7614..a0957a4cd 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -77,8 +77,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index 6cad9a8fb..0332e7d3b 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index 44b27c9dc..ab83d4cc4 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 635b54582..8afc16cbb 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py index 7d03ceb14..663c98dbd 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py index 6fac381bc..e33b29373 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py index 74545adb0..cd3264d65 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py index b1f1ffa18..cbe4aa284 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py index d05a05985..997d45584 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index 91a38349b..e3a647312 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index 8c7317263..f95971952 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index d741ce9d0..5516f5571 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index 61afd036e..df65fb66a 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -53,8 +53,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index 79ce5f241..48fa4410e 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -84,8 +84,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index 31c75fe80..d25c569a0 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -64,8 +64,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index 5dd9f11b1..ab39f2674 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py index 8ca29e546..f5a7ee324 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 0d6ced71f..ddaf686bf 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py index 57f988119..2a400ee85 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py index a83ef3883..e65915900 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index 54e8cd275..a2f402278 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index c27fbb977..f959b61cf 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -71,8 +71,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 456b8859a..42ac84262 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -65,8 +65,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 5a8eab646..9619eb280 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 8fa24bf71..48318680b 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 6a38082d7..b4bf92442 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -85,8 +85,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 0aae7fe45..6a60e482c 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -85,8 +85,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 7dc52d140..561f3b2d1 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -85,8 +85,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index d4e99c34c..12ff8bc44 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 9a2ef1966..524c94f20 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py index 959cd5008..c5e6ffe78 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py @@ -69,8 +69,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py index 6de99ec52..6da666a26 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py @@ -69,8 +69,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py index b23c0de7e..85998656e 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py @@ -69,8 +69,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py index cd7bac286..394d1021f 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py @@ -69,8 +69,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py index 67cfc89c9..ccac92cdd 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index 14db17f54..1caedb7c9 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py index 1a00f284b..b28dbf6d0 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py @@ -68,8 +68,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py index f9ed5dbdf..a4c4c4059 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py index 97ecaf91e..d4dfd0f27 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py index 792d94ef8..abd074ef1 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py @@ -65,8 +65,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py index 9ba1a47d1..c2fa97626 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py @@ -65,8 +65,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index a66aa1d13..7d7a28100 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -75,8 +75,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 1c39c2dd8..e64734484 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 0f02a7c0c..35bdc885f 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index 866c001a2..1f01cc483 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 229ba1616..f8f8aff52 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index f1cc6da3f..430120946 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index ea8b4a1fe..b32bad29e 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index 3759216ad..73aea171a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 3a51d770a..41473d5ed 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index cb5876758..a03bd30e0 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -66,8 +66,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index d133975c8..d1e38e56a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py index 5ccedbcc9..6298be044 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py index 6ff1477f4..06758f8c2 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py index c3cf9e50f..0fcbf163e 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py index ab5fb7b4f..d76777112 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index a41eb9b8d..818e4dbbf 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -74,8 +74,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index eff620438..2a7b47cf2 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 1cba7e492..aaaef0b37 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -81,8 +81,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index f90eca390..fcf58776b 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 9fd5659b9..e653abe55 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index b460b4816..f0d318f0a 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index deac2063e..c0fbf01cf 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 26752d193..d36fb3387 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 1322194cf..d74f35962 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index f64f20391..8001a5f18 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py index 47ad87f2b..e8644c9bc 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py index d49957375..d3cf9dded 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py index ac2a65c08..d5a497bcc 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py @@ -76,8 +76,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 59f41fc0e..04d657f6a 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -64,8 +64,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index e7552ba2f..2fe0dae72 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index d102df1d0..b8dd1d444 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -61,8 +61,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index bedeb0045..5e0a1da52 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py index 8d462eae8..379f27a03 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py index 8171d6461..ee4284ba3 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index aab2095cf..607feaa03 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -59,8 +59,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index 68a89417f..eab745f4e 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -60,8 +60,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index 0138e1c0b..01f84ee40 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -60,8 +60,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 25f8363c4..983c15fe4 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index af4d42e79..f8053c10b 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py index 35bb4a0f1..a9681665a 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py @@ -63,8 +63,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py index 21fc0ae33..ac0a83c94 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 664f1a835..c29e86ecc 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index eb7df0b36..a837ad6f5 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -72,8 +72,8 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.wget_download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.has_celltypes] = self.has_celltypes + self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() From 1e32103fe02caea70432eaf21cc8c5d2125e37bb Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:13:41 +0100 Subject: [PATCH 010/161] renamed remaining instances of "animal" into "species" --- sfaira/consts/adata_fields.py | 10 +++++----- sfaira/data/base.py | 2 +- sfaira/data/databases/cellxgene/cellxgene_loader.py | 6 +++--- .../adipose/human_adipose_2020_microwell_han_001.py | 2 +- .../human_adrenalgland_2020_microwell_han_001.py | 2 +- .../human_adrenalgland_2020_microwell_han_002.py | 2 +- .../human_adrenalgland_2020_microwell_han_003.py | 2 +- .../human_adrenalgland_2020_microwell_han_004.py | 2 +- .../human_adrenalgland_2020_microwell_han_005.py | 2 +- .../human_adrenalgland_2020_microwell_han_006.py | 2 +- .../artery/human_artery_2020_microwell_han_001.py | 2 +- .../bladder/human_bladder_2020_microwell_han_001.py | 2 +- .../bladder/human_bladder_2020_microwell_han_002.py | 2 +- .../bladder/human_bladder_2020_microwell_han_003.py | 2 +- .../data/human/blood/human_blood_2018_10x_ica_001.py | 2 +- .../blood/human_blood_2019_10x_10xGenomics_001.py | 2 +- .../human/blood/human_blood_2020_microwell_han_001.py | 2 +- .../human/blood/human_blood_2020_microwell_han_002.py | 2 +- .../human/blood/human_blood_2020_microwell_han_003.py | 2 +- .../human/blood/human_blood_2020_microwell_han_004.py | 2 +- .../human/blood/human_blood_2020_microwell_han_005.py | 2 +- .../human/blood/human_blood_2020_microwell_han_006.py | 2 +- .../human/blood/human_blood_2020_microwell_han_007.py | 2 +- sfaira/data/human/bone/human_bone_2018_10x_ica_001.py | 2 +- .../human/bone/human_bone_2020_microwell_han_001.py | 2 +- .../human/bone/human_bone_2020_microwell_han_002.py | 2 +- .../human/brain/human_brain_2017_DroNcSeq_habib_001.py | 2 +- .../human/brain/human_brain_2020_microwell_han_001.py | 2 +- .../human/brain/human_brain_2020_microwell_han_002.py | 2 +- .../human/brain/human_brain_2020_microwell_han_003.py | 2 +- .../human/brain/human_brain_2020_microwell_han_004.py | 2 +- .../human/brain/human_brain_2020_microwell_han_005.py | 2 +- .../human/brain/human_brain_2020_microwell_han_006.py | 2 +- .../calvaria/human_calvaria_2020_microwell_han_001.py | 2 +- .../cervix/human_cervix_2020_microwell_han_001.py | 2 +- .../human_chorionicvillus_2020_microwell_han_001.py | 2 +- .../human/colon/human_colon_2019_10x_kinchen_001.py | 2 +- .../human/colon/human_colon_2019_10x_smilie_001.py | 2 +- .../data/human/colon/human_colon_2019_10x_wang_001.py | 2 +- .../data/human/colon/human_colon_2020_10x_james_001.py | 2 +- .../human/colon/human_colon_2020_microwell_han_001.py | 2 +- .../human/colon/human_colon_2020_microwell_han_002.py | 2 +- .../human/colon/human_colon_2020_microwell_han_003.py | 2 +- .../human/colon/human_colon_2020_microwell_han_004.py | 2 +- .../duodenum/human_duodenum_2020_microwell_han_001.py | 2 +- .../human_epityphlon_2020_microwell_han_001.py | 2 +- .../human_esophagus_2019_10x_madissoon_001.py | 2 +- .../human_esophagus_2020_microwell_han_001.py | 2 +- .../human_esophagus_2020_microwell_han_002.py | 2 +- .../data/human/eye/human_eye_2019_10x_lukowski_001.py | 2 +- sfaira/data/human/eye/human_eye_2019_10x_menon_001.py | 2 +- sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py | 2 +- .../data/human/eye/human_eye_2020_microwell_han_001.py | 2 +- .../human_fallopiantube_2020_microwell_han_001.py | 2 +- .../human_femalegonad_2020_microwell_han_001.py | 2 +- .../human_femalegonad_2020_microwell_han_002.py | 2 +- .../human_gallbladder_2020_microwell_han_001.py | 2 +- .../human/heart/human_heart_2020_microwell_han_001.py | 2 +- .../human/heart/human_heart_2020_microwell_han_002.py | 2 +- .../human/heart/human_heart_2020_microwell_han_003.py | 2 +- .../human/heart/human_heart_2020_microwell_han_004.py | 2 +- .../human/hesc/human_hesc_2020_microwell_han_001.py | 2 +- .../human/ileum/human_ileum_2019_10x_martin_001.py | 2 +- .../data/human/ileum/human_ileum_2019_10x_wang_001.py | 2 +- .../human/ileum/human_ileum_2020_microwell_han_001.py | 2 +- .../jejunum/human_jejunum_2020_microwell_han_001.py | 2 +- .../human/kidney/human_kidney_2019_10xSn_lake_001.py | 2 +- .../human/kidney/human_kidney_2019_10x_stewart_001.py | 2 +- .../human/kidney/human_kidney_2020_10x_liao_001.py | 2 +- .../kidney/human_kidney_2020_microwell_han_001.py | 2 +- .../kidney/human_kidney_2020_microwell_han_002.py | 2 +- .../kidney/human_kidney_2020_microwell_han_003.py | 2 +- .../kidney/human_kidney_2020_microwell_han_004.py | 2 +- .../kidney/human_kidney_2020_microwell_han_005.py | 2 +- .../kidney/human_kidney_2020_microwell_han_006.py | 2 +- .../kidney/human_kidney_2020_microwell_han_007.py | 2 +- .../human/liver/human_liver_2018_10x_macparland_001.py | 2 +- .../human/liver/human_liver_2019_10x_popescu_001.py | 2 +- .../liver/human_liver_2019_10x_ramachandran_001.py | 2 +- .../liver/human_liver_2019_mCELSeq2_aizarani_001.py | 2 +- .../human/liver/human_liver_2020_microwell_han_001.py | 2 +- .../human/liver/human_liver_2020_microwell_han_002.py | 2 +- .../human/liver/human_liver_2020_microwell_han_003.py | 2 +- .../human/liver/human_liver_2020_microwell_han_004.py | 2 +- .../human/liver/human_liver_2020_microwell_han_005.py | 2 +- .../data/human/lung/human_lung_2019_10x_braga_001.py | 2 +- .../data/human/lung/human_lung_2019_10x_braga_002.py | 2 +- .../human/lung/human_lung_2019_10x_madissoon_001.py | 2 +- .../human/lung/human_lung_2019_dropseq_braga_003.py | 2 +- .../human/lung/human_lung_2020_10x_habermann_001.py | 2 +- .../human/lung/human_lung_2020_10x_lukassen_001.py | 2 +- .../human/lung/human_lung_2020_10x_lukassen_002.py | 2 +- .../data/human/lung/human_lung_2020_10x_miller_001.py | 2 +- .../human/lung/human_lung_2020_10x_travaglini_001.py | 2 +- .../human/lung/human_lung_2020_microwell_han_001.py | 2 +- .../human/lung/human_lung_2020_microwell_han_002.py | 2 +- .../human/lung/human_lung_2020_microwell_han_003.py | 2 +- .../human/lung/human_lung_2020_microwell_han_004.py | 2 +- .../human/lung/human_lung_2020_microwell_han_005.py | 2 +- .../lung/human_lung_2020_smartseq2_travaglini_002.py | 2 +- .../malegonad/human_malegonad_2018_10x_guo_001.py | 2 +- .../human_malegonad_2020_microwell_han_001.py | 2 +- .../human_malegonad_2020_microwell_han_002.py | 2 +- .../data/human/mixed/human_mixed_2019_10x_szabo_001.py | 2 +- .../muscle/human_muscle_2020_microwell_han_001.py | 2 +- .../muscle/human_muscle_2020_microwell_han_002.py | 2 +- .../omentum/human_omentum_2020_microwell_han_001.py | 2 +- .../omentum/human_omentum_2020_microwell_han_002.py | 2 +- .../omentum/human_omentum_2020_microwell_han_003.py | 2 +- .../pancreas/human_pancreas_2016_indrop_baron_001.py | 2 +- .../human_pancreas_2016_smartseq2_segerstolpe_001.py | 2 +- .../pancreas/human_pancreas_2017_smartseq2_enge_001.py | 2 +- .../pancreas/human_pancreas_2020_microwell_han_001.py | 2 +- .../pancreas/human_pancreas_2020_microwell_han_002.py | 2 +- .../pancreas/human_pancreas_2020_microwell_han_003.py | 2 +- .../pancreas/human_pancreas_2020_microwell_han_004.py | 2 +- .../placenta/human_placenta_2018_10x_ventotormo_001.py | 2 +- .../human_placenta_2018_smartseq2_ventotormo_001.py | 2 +- .../placenta/human_placenta_2020_microwell_han_001.py | 2 +- .../pleura/human_pleura_2020_microwell_han_001.py | 2 +- .../prostate/human_prostate_2018_10x_henry_001.py | 2 +- .../prostate/human_prostate_2020_microwell_han_001.py | 2 +- .../human/rectum/human_rectum_2019_10x_wang_001.py | 2 +- .../rectum/human_rectum_2020_microwell_han_001.py | 2 +- .../data/human/rib/human_rib_2020_microwell_han_001.py | 2 +- .../data/human/rib/human_rib_2020_microwell_han_002.py | 2 +- .../human/skin/human_skin_2020_microwell_han_001.py | 2 +- .../human/skin/human_skin_2020_microwell_han_002.py | 2 +- .../human_spinalcord_2020_microwell_han_001.py | 2 +- .../spleen/human_spleen_2019_10x_madissoon_001.py | 2 +- .../spleen/human_spleen_2020_microwell_han_001.py | 2 +- .../spleen/human_spleen_2020_microwell_han_002.py | 2 +- .../stomach/human_stomach_2020_microwell_han_001.py | 2 +- .../stomach/human_stomach_2020_microwell_han_002.py | 2 +- .../stomach/human_stomach_2020_microwell_han_003.py | 2 +- .../stomach/human_stomach_2020_microwell_han_004.py | 2 +- .../stomach/human_stomach_2020_microwell_han_005.py | 2 +- .../stomach/human_stomach_2020_microwell_han_006.py | 2 +- .../stomach/human_stomach_2020_microwell_han_007.py | 2 +- .../stomach/human_stomach_2020_microwell_han_008.py | 2 +- .../stomach/human_stomach_2020_microwell_han_009.py | 2 +- .../stomach/human_stomach_2020_microwell_han_010.py | 2 +- .../human/thymus/human_thymus_2020_10x_park_001.py | 2 +- .../thymus/human_thymus_2020_microwell_han_001.py | 2 +- .../thymus/human_thymus_2020_microwell_han_002.py | 2 +- .../thyroid/human_thyroid_2020_microwell_han_001.py | 2 +- .../thyroid/human_thyroid_2020_microwell_han_002.py | 2 +- .../trachea/human_trachea_2020_microwell_han_001.py | 2 +- .../ureter/human_ureter_2020_microwell_han_001.py | 2 +- .../uterus/human_uterus_2020_microwell_han_001.py | 2 +- .../bladder/mouse_bladder_2018_microwell_han_001.py | 2 +- .../mouse/bladder/mouse_bladder_2019_10x_pisco_001.py | 2 +- .../bladder/mouse_bladder_2019_smartseq2_pisco_001.py | 2 +- .../mouse/brain/mouse_brain_2018_microwell_han_001.py | 2 +- .../mouse/brain/mouse_brain_2018_microwell_han_002.py | 2 +- .../brain/mouse_brain_2019_mouse_brain_atlas_temp.py | 2 +- .../brain/mouse_brain_2019_smartseq2_pisco_001.py | 2 +- .../brain/mouse_brain_2019_smartseq2_pisco_002.py | 2 +- .../mouse_diaphragm_2019_smartseq2_pisco_001.py | 2 +- sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py | 2 +- .../mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py | 2 +- .../mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py | 2 +- .../mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py | 2 +- .../mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py | 2 +- .../data/mouse/heart/mouse_heart_2019_10x_pisco_001.py | 2 +- .../heart/mouse_heart_2019_smartseq2_pisco_001.py | 2 +- .../heart/mouse_heart_2019_smartseq2_pisco_002.py | 2 +- .../kidney/mouse_kidney_2018_microwell_han_001.py | 2 +- .../kidney/mouse_kidney_2018_microwell_han_002.py | 2 +- .../mouse/kidney/mouse_kidney_2019_10x_pisco_001.py | 2 +- .../kidney/mouse_kidney_2019_smartseq2_pisco_001.py | 2 +- .../mouse_large_intestine_2019_10x_pisco_001.py | 2 +- .../mouse_large_intestine_2019_smartseq2_pisco_001.py | 2 +- .../mouse_limb_muscle_2018_microwell_han_001.py | 2 +- .../mouse_limb_muscle_2019_10x_pisco_001.py | 2 +- .../mouse_limb_muscle_2019_smartseq2_pisco_001.py | 2 +- .../mouse/liver/mouse_liver_2018_microwell_han_001.py | 2 +- .../mouse/liver/mouse_liver_2018_microwell_han_002.py | 2 +- .../data/mouse/liver/mouse_liver_2019_10x_pisco_001.py | 2 +- .../liver/mouse_liver_2019_smartseq2_pisco_001.py | 2 +- .../mouse/lung/mouse_lung_2018_microwell_han_001.py | 2 +- .../mouse/lung/mouse_lung_2018_microwell_han_002.py | 2 +- .../mouse/lung/mouse_lung_2018_microwell_han_003.py | 2 +- .../data/mouse/lung/mouse_lung_2019_10x_pisco_001.py | 2 +- .../mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py | 2 +- .../mouse_mammary_gland_2018_microwell_han_001.py | 2 +- .../mouse_mammary_gland_2018_microwell_han_002.py | 2 +- .../mouse_mammary_gland_2018_microwell_han_003.py | 2 +- .../mouse_mammary_gland_2018_microwell_han_004.py | 2 +- .../mouse_mammary_gland_2019_10x_pisco_001.py | 2 +- .../mouse_mammary_gland_2019_smartseq2_pisco_001.py | 2 +- .../mouse/marrow/mouse_marrow_2018_microwell_001.py | 2 +- .../mouse/marrow/mouse_marrow_2019_10x_pisco_001.py | 2 +- .../marrow/mouse_marrow_2019_smartseq2_pisco_001.py | 2 +- .../mouse/ovary/mouse_ovary_2018_microwell_han_001.py | 2 +- .../mouse/ovary/mouse_ovary_2018_microwell_han_002.py | 2 +- .../pancreas/mouse_pancreas_2018_microwell_han_001.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_pisco_001.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_001.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_002.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_003.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_004.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_005.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_006.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_007.py | 2 +- .../pancreas/mouse_pancreas_2019_10x_thompson_008.py | 2 +- .../mouse_pancreas_2019_smartseq2_pisco_001.py | 2 +- .../mouse_peripheral_blood_2018_microwell_han_001.py | 2 +- .../mouse_peripheral_blood_2018_microwell_han_002.py | 2 +- .../mouse_peripheral_blood_2018_microwell_han_003.py | 2 +- .../mouse_peripheral_blood_2018_microwell_han_004.py | 2 +- .../mouse_peripheral_blood_2018_microwell_han_005.py | 2 +- .../placenta/mouse_placenta_2018_microwell_han_001.py | 2 +- .../placenta/mouse_placenta_2018_microwell_han_002.py | 2 +- .../prostate/mouse_prostate_2018_microwell_han_001.py | 2 +- .../prostate/mouse_prostate_2018_microwell_han_002.py | 2 +- .../data/mouse/rib/mouse_rib_2018_microwell_han_001.py | 2 +- .../data/mouse/rib/mouse_rib_2018_microwell_han_002.py | 2 +- .../data/mouse/rib/mouse_rib_2018_microwell_han_003.py | 2 +- .../data/mouse/skin/mouse_skin_2019_10x_pisco_001.py | 2 +- .../mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py | 2 +- .../mouse_small_intestine_2018_microwell_han_001.py | 2 +- .../mouse_small_intestine_2018_microwell_han_002.py | 2 +- .../mouse_small_intestine_2018_microwell_han_003.py | 2 +- .../spleen/mouse_spleen_2018_microwell_han_001.py | 2 +- .../mouse/spleen/mouse_spleen_2019_10x_pisco_001.py | 2 +- .../spleen/mouse_spleen_2019_smartseq2_pisco_001.py | 2 +- .../stomach/mouse_stomach_2018_microwell_han_001.py | 2 +- .../testis/mouse_testis_2018_microwell_han_001.py | 2 +- .../testis/mouse_testis_2018_microwell_han_002.py | 2 +- .../thymus/mouse_thymus_2018_microwell_han_001.py | 2 +- .../mouse/thymus/mouse_thymus_2019_10x_pisco_001.py | 2 +- .../thymus/mouse_thymus_2019_smartseq2_pisco_001.py | 2 +- .../mouse/tongue/mouse_tongue_2019_10x_pisco_001.py | 2 +- .../tongue/mouse_tongue_2019_smartseq2_pisco_001.py | 2 +- .../mouse/trachae/mouse_trachea_2019_10x_pisco_001.py | 2 +- .../trachae/mouse_trachea_2019_smartseq2_pisco_001.py | 2 +- .../uterus/mouse_uterus_2018_microwell_han_001.py | 2 +- .../uterus/mouse_uterus_2018_microwell_han_002.py | 2 +- 239 files changed, 245 insertions(+), 245 deletions(-) diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index d94b3b51e..1ff804cd4 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -3,7 +3,6 @@ class ADATA_IDS_BASE: """ Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ - _animal: str _annotated: str _author: str _cell_types_original: str @@ -20,13 +19,10 @@ class ADATA_IDS_BASE: _normalization: str _organ: str _protocol: str + _species: str _subtissue: str _year: str - @property - def animal(self): - return self._animal - @property def annotated(self): return self._annotated @@ -91,6 +87,10 @@ def protocol(self): def organ(self): return self._organ + @property + def species(self): + return self._species + @property def subtissue(self): return self._subtissue diff --git a/sfaira/data/base.py b/sfaira/data/base.py index f76653dc6..8d31baa5d 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -553,7 +553,6 @@ def write_meta( if self.adata is None: self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ - "animal": self.adata.uns[ADATA_IDS_SFAIRA.animal], "author": self.adata.uns[ADATA_IDS_SFAIRA.author], "annotated": self.adata.uns[ADATA_IDS_SFAIRA.annotated], "doi": self.adata.uns[ADATA_IDS_SFAIRA.doi], @@ -563,6 +562,7 @@ def write_meta( "normalization": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], + "species": self.adata.uns[ADATA_IDS_SFAIRA.species], "year": self.adata.uns[ADATA_IDS_SFAIRA.year], }, index=range(1)) meta.to_csv(fn_meta) diff --git a/sfaira/data/databases/cellxgene/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py index d506653d0..f43346af7 100644 --- a/sfaira/data/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene/cellxgene_loader.py @@ -37,7 +37,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] self.adata.uns[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_CELLXGENE.year] self.adata.uns[ADATA_IDS_SFAIRA.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.animal].values)) > 1: + if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: raise Warning("found multiple assay in data set %s" % self.fn) self.adata.uns[ADATA_IDS_SFAIRA.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] # Select tissue: blood is handled as a separate tissue in .obs @@ -45,9 +45,9 @@ def _load(self, fn=None): # raise Warning("found multiple tissue in data set %s" % self.fn) #self.adata.uns["organ"] = adata.obs["tissue"].values[0] self.adata.uns[ADATA_IDS_SFAIRA.organ] = str(self.fn).split("_")[3] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.animal].values)) > 1: + if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: raise Warning("found multiple organisms in data set %s" % self.fn) - self.adata.uns[ADATA_IDS_SFAIRA.animal] = adata.obs[ADATA_IDS_CELLXGENE.animal].values[0] + self.adata.uns[ADATA_IDS_SFAIRA.species] = adata.obs[ADATA_IDS_CELLXGENE.species].values[0] self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.annotated diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index bb4f114a1..35ee5f198 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 6565e189b..f7ff1aea5 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index c5ef69cb4..008f50462 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index 58b85716b..e4b975df6 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 002d7a9ea..4a8e189ce 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 9bde4e591..17c79d022 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 0c55b0674..4a0eb5e51 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 4585e7132..9b05fe785 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index c741d6356..b8969b60f 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 22d7d6da1..281512488 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 5bf1e331f..81fc62825 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index b5151c1bb..b27a2bb0d 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 2731d52a2..4b0da8c61 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -52,7 +52,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index b6ed12462..9395c471d 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index 670222385..3ea9979f5 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 7e458b379..333cf7daa 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index b741d005e..fd54c0efa 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index d4ab3e10c..66faeca5c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index ebebd234c..3f60960fb 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index 8f66d62f7..e54565913 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index c6fdb32fa..dc5e2c45e 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index b4de3c370..e2bfc7cfe 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 56a645ea6..fa2ee77d1 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index 212f5d3e3..b2fc47640 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -69,7 +69,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index 4b1eb916d..2fa168dfc 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 81a4e19c6..430ad3f0d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 6bad62df3..8c9d115f5 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index 2e399aa26..c35ac3d57 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index a54ebfb5c..d31e0665d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index cc470274a..fc030ab12 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 2acfc4d77..62a347869 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 73e3280b1..c3cad3af5 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index f9dcd6e07..c4fe7b110 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index 665546536..e5406c54a 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -134,7 +134,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 4bd685fbf..3c1f2a1d5 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -93,7 +93,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index 0cba30892..086bd76f1 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index 564759558..e40f0105f 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -80,7 +80,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index e6cf51fc1..cc0c3e9bb 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -78,7 +78,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 2fe4de69f..d0e1eaead 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -78,7 +78,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 762222ffa..6d5225af4 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -78,7 +78,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 5f508546d..cab2ed356 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -78,7 +78,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 18f7b6682..31b38f35e 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index d2fd22c3e..0612889dd 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index a923b843b..563b423e1 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -71,7 +71,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 8fe6b4ed5..0e1a6b012 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -75,7 +75,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index a2cd07343..6d2242d04 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -75,7 +75,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index c9b54aec6..e01cee387 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index 44de32331..f9c5d497f 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index bdc7b56ec..0c0cb96c9 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index f1e0ac3cc..bb71f0c01 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 3d6af5aca..6961b28c9 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 6cc78818b..051be7563 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index f5ba480b9..98bb43dc8 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 244258326..75c1faa80 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index fbc251514..9e7ef8589 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 8eb85ffbd..6664c0bd7 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 45568d6f8..8c5058ac2 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 9acbb98bf..76d54f986 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index 77b9324d5..d7ee8d9c7 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 9e50888ce..6378752c5 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -75,7 +75,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 9671e73b2..792ad33b2 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 5c2fe272a..87a9ab8f9 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -77,7 +77,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 54aa2165d..6085adc05 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index 254dcaf0f..f4d630a4a 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -83,7 +83,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10xSn' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index 61e39c00c..d37963426 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -131,7 +131,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index 93534736b..bd4590027 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -102,7 +102,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index b2cb54571..791af3883 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index d174bfa5e..f623c4c1c 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index e77163b10..7f3e36f62 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 8879259bf..f66bc7db9 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index bd5e6fec8..68c304254 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 940814ec5..83c035039 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 25ae01e2d..8fce0aa76 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -100,7 +100,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 19403f3df..2e8cd9ba7 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -76,7 +76,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 2e789fceb..0e2e55aad 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -76,7 +76,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index ae339304b..1c4c192ba 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -78,7 +78,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 5d0773c5e..656dde868 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -96,7 +96,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 282799135..27dc3c2ed 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 8ae7f12f3..9146e4339 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 73d5fdfe5..a48826384 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index 1aa7f2002..c749ebcc7 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index b0097a22d..8a4ecf706 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 3f5015f62..81933a02f 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 2238529ad..b24891994 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 13bd19a70..62c0dd849 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index afcbb8e04..280ff991a 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'dropseq' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 8a5cb9bd9..22326b4f6 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -113,7 +113,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index 02a36a0b5..a72e5caba 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index 5ccd1132c..3bae58de9 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index 33146cc53..a01e2cc95 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index ec70b910d..d1a3c8551 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -120,7 +120,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 9213ba0cb..627199ddf 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 61cd4080c..5efd59b81 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 72c6e1dc1..cfcf99f45 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index b39130ead..96d16219d 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 904692b7b..b65ea2a8c 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -101,7 +101,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 99e57a332..8147dc611 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -107,7 +107,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'smartseq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 83de8d248..0ebc7036e 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -65,7 +65,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index 05d105b8e..372074230 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 3533f6eb2..74b0d6eb1 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 762a3970b..2559948d0 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -156,7 +156,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index 70fd853ec..c24351538 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 7c3d0d028..5f8f4c2d5 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index cc165c7d6..3275d67ea 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 72dcabf37..aeaf8842f 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index f7c4c7c8f..d2ff1416e 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index 5201c90e3..b8f672703 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'inDrop' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index bdc03b1dc..33d382dd0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -75,7 +75,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index b1dd43011..d9cfa8d4b 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -135,7 +135,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index e0f55ce0b..63dfb2c74 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index bfcdc28a6..009adbdfa 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 8607345a0..3932c1255 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 2994104d3..2abd51705 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 8eca32560..72c453c24 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index fd36741f8..22d461ad2 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -89,7 +89,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "Smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index e70044c26..fde6c3d50 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index a880ebb6a..b50155990 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 21f280460..82a775f96 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 28f351d08..322236187 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -71,7 +71,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index a294a07a0..f92d2981a 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index a2e450ef9..e8f80e4d9 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index a32a85f99..4de6aa18b 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index aac37606d..5eac488d3 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 7599c8e13..af794cb5c 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -80,7 +80,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 15eaf4209..0bcf79a5e 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -80,7 +80,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 847198cb1..e9a0e0561 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 817de894b..631710bb9 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -80,7 +80,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index b1ea2dd2f..ca36b269a 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index e05d0eb16..08126ca29 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index efdacf67d..9b994d4bb 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index 2a9edcd78..a4f315103 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index d037dbdde..0ddfd91a3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index 7e3030fb3..d756c8490 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index c1518688b..61e958ae6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 809dff1b8..8895c9ff4 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index adfed6896..368acf00a 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index fbdd72452..320653c45 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index b00ef1487..623a6ccd7 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index 8db9c7220..2e5fcdf2b 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index 5db839de1..2d2dce074 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -95,7 +95,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index ca30b30ae..020dc1314 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index d798d6cc8..c48987fc9 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 24ebf7942..708a50506 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index d133951b7..db2477eea 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index b3313e97c..d28142b4b 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 71b3c24e3..170a2257c 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 39a89fd08..eec59d718 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -50,7 +50,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "human" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 9eac4bd73..95964f1e1 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index 0755d85a0..03bcf78c9 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index c486f3836..33199af3a 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index 288cff59c..fd33e496b 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -65,7 +65,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index cf6a8f5b8..74aaf387b 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -65,7 +65,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index a0957a4cd..084e66ab0 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -75,7 +75,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index 0332e7d3b..6ca638f84 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index ab83d4cc4..15c487af3 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 8afc16cbb..02e25518e 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py index 663c98dbd..375b0e14a 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py index e33b29373..a3482f3f9 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py index cd3264d65..a320624bd 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py index cbe4aa284..5a94230e4 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py index 997d45584..299f40343 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index e3a647312..f80ec2445 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index f95971952..d7f35b3ab 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index 5516f5571..e1e2b287e 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index df65fb66a..f813d68ea 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -51,7 +51,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index 48fa4410e..033fa731e 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -82,7 +82,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index d25c569a0..c69d74681 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index ab39f2674..ae9e88206 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py index f5a7ee324..17c8372db 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index ddaf686bf..766e6a5fa 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py index 2a400ee85..7dac35d7f 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py index e65915900..f538e283c 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index a2f402278..0515158ae 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index f959b61cf..edd19a92e 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -69,7 +69,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 42ac84262..24f0a03aa 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 9619eb280..6131ea70c 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 48318680b..245ac729d 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index b4bf92442..1ca4e5964 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -83,7 +83,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 6a60e482c..2a8bea9df 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -83,7 +83,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 561f3b2d1..6d0f14686 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -83,7 +83,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 12ff8bc44..015eb167a 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 524c94f20..10c5e48a2 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py index c5e6ffe78..52d749e52 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py index 6da666a26..42674693a 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py index 85998656e..3ffe43c9b 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py index 394d1021f..2806abd02 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py @@ -67,7 +67,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py index ccac92cdd..ed91fbb7b 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index 1caedb7c9..012bcde2c 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py index b28dbf6d0..2a2704d75 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py @@ -66,7 +66,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py index a4c4c4059..9916020f8 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py index d4dfd0f27..cf4e02f27 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py index abd074ef1..31406e47b 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py index c2fa97626..a033284b0 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py @@ -63,7 +63,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index 7d7a28100..dd6814c75 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -73,7 +73,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index e64734484..19590fe66 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 35bdc885f..18c2574f7 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index 1f01cc483..dc7ba1914 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index f8f8aff52..307ca856e 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 430120946..90b0a7147 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index b32bad29e..18c413c0a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index 73aea171a..c3bb0281d 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 41473d5ed..570a9596d 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index a03bd30e0..ba12aa485 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -64,7 +64,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index d1e38e56a..d78535bfe 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py index 6298be044..0d716fc5b 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py index 06758f8c2..d467164e8 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py index 0fcbf163e..e735ba7b4 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py index d76777112..98332ad7d 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index 818e4dbbf..fece325ad 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -72,7 +72,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 2a7b47cf2..636fa1f15 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index aaaef0b37..076cb182b 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -79,7 +79,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index fcf58776b..9ccac2ac9 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index e653abe55..46ddc0a83 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index f0d318f0a..17ab250bc 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index c0fbf01cf..676c15725 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index d36fb3387..0268b6e81 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index d74f35962..262885440 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index 8001a5f18..b582e2011 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py index e8644c9bc..6dbb2d9c2 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py index d3cf9dded..2cadc4d9e 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py index d5a497bcc..4de9bb758 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py @@ -74,7 +74,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 04d657f6a..5e31c0eea 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 2fe0dae72..451baa207 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index b8dd1d444..9bde5f267 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -59,7 +59,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 5e0a1da52..56ab7c683 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -68,7 +68,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py index 379f27a03..31ba920d7 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py index ee4284ba3..9cc44c15b 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index 607feaa03..125b2ad69 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -57,7 +57,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index eab745f4e..188d26cb5 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index 01f84ee40..c8f960e2a 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -58,7 +58,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 983c15fe4..2db82fdfb 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index f8053c10b..a168f163e 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py index a9681665a..df3dd8cb1 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py @@ -61,7 +61,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py index ac0a83c94..39b5c48c2 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py @@ -60,7 +60,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index c29e86ecc..5ffd95ce4 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index a837ad6f5..db2724715 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -70,7 +70,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.animal] = "mouse" + self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes From 77a6dff6ec99b0c488289b0753b1450c9c24d42f Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:21:08 +0100 Subject: [PATCH 011/161] allowed maps of meta data file nomenclature --- sfaira/api/consts.py | 2 +- sfaira/consts/__init__.py | 2 +- sfaira/consts/adata_fields.py | 5 ++ sfaira/consts/meta_data_files.py | 18 +++++ sfaira/data/base.py | 71 +++++++++++-------- .../databases/cellxgene/cellxgene_loader.py | 4 +- sfaira/data/databases/cellxgene/external.py | 2 +- 7 files changed, 68 insertions(+), 36 deletions(-) diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py index 00cc5cb17..82845022a 100644 --- a/sfaira/api/consts.py +++ b/sfaira/api/consts.py @@ -1,2 +1,2 @@ from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts import META_DATA_FIELDS +from sfaira.consts import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 15d95e718..c3143a03d 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,3 +1,3 @@ from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts.meta_data_files import META_DATA_FIELDS +from sfaira.consts.meta_data_files import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 1ff804cd4..3c777daec 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,3 +1,8 @@ +""" +The classes in this file are containers of field names and element entries that are used in streamlined adata objects +in sfaira and in associated data bases. +""" + class ADATA_IDS_BASE: """ diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 09f686a12..52a52d9c8 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -1,3 +1,7 @@ +""" +The classes contains constants related to sfaira streamlined meta data files. +""" + META_DATA_FIELDS = [ "annotated", "author", @@ -11,3 +15,17 @@ "species", "year", ] + +META_DATA_FIELDS_CELLXGENE = { + "annotated": "annotated", + "author": "author", + "doi": "doi", + "download": "download", + "id": "id", + "ncells": "ncells", + "normalization": "normalization", + "organ": "organ", + "protocol": "protocol", + "species": "species", + "year": "year", +} diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 8d31baa5d..d2ed32388 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -394,7 +394,7 @@ def _set_genome(self, self.genome_container = g - def load_meta(self, fn: Union[PathLike, str]): + def load_meta(self, fn: Union[PathLike, str], map=None): if fn is None: if self.meta_path is None: raise ValueError("provide either fn in load or path in constructor") @@ -402,7 +402,45 @@ def load_meta(self, fn: Union[PathLike, str]): else: if isinstance(fn, str): fn = os.path.normpath(fn) - self.meta = pandas.read_csv(fn, usecols=META_DATA_FIELDS) + if map is None: + usecols = META_DATA_FIELDS + newcols = META_DATA_FIELDS + else: + usecols = list(map.keys()) + newcols = list(map.values()) + tab = pandas.read_csv(fn, usecols=usecols) + tab.columns = newcols + self.meta = tab + + def write_meta( + self, + fn_meta: Union[None, str] = None, + fn_data: Union[None, str] = None, + dir_out: Union[None, str] = None, + ): + if fn_meta is None: + if self.path is None and dir_out is None: + raise ValueError("provide either fn in load or path in constructor") + if dir_out is None: + dir_out = self.meta_path + fn_meta = os.path.join(dir_out, self.doi_cleaned_id + "_meta.csv") + if self.adata is None: + self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) + meta = pandas.DataFrame({ + "author": self.adata.uns[ADATA_IDS_SFAIRA.author], + "annotated": self.adata.uns[ADATA_IDS_SFAIRA.annotated], + "doi": self.adata.uns[ADATA_IDS_SFAIRA.doi], + "download": self.adata.uns[ADATA_IDS_SFAIRA.download], + "id": self.adata.uns[ADATA_IDS_SFAIRA.id], + "ncells": self.adata.n_obs, + "normalization": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, + "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], + "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], + "species": self.adata.uns[ADATA_IDS_SFAIRA.species], + "year": self.adata.uns[ADATA_IDS_SFAIRA.year], + }, index=range(1)) + meta.to_csv(fn_meta) + @property def author(self): @@ -538,35 +576,6 @@ def year(self, x): def doi_cleaned_id(self): return "_".join(self.id.split("_")[:-1]) - def write_meta( - self, - fn_meta: Union[None, str] = None, - fn_data: Union[None, str] = None, - dir_out: Union[None, str] = None, - ): - if fn_meta is None: - if self.path is None and dir_out is None: - raise ValueError("provide either fn in load or path in constructor") - if dir_out is None: - dir_out = self.meta_path - fn_meta = os.path.join(dir_out, self.doi_cleaned_id + "_meta.csv") - if self.adata is None: - self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) - meta = pandas.DataFrame({ - "author": self.adata.uns[ADATA_IDS_SFAIRA.author], - "annotated": self.adata.uns[ADATA_IDS_SFAIRA.annotated], - "doi": self.adata.uns[ADATA_IDS_SFAIRA.doi], - "download": self.adata.uns[ADATA_IDS_SFAIRA.download], - "id": self.adata.uns[ADATA_IDS_SFAIRA.id], - "ncells": self.adata.n_obs, - "normalization": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], - "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], - "species": self.adata.uns[ADATA_IDS_SFAIRA.species], - "year": self.adata.uns[ADATA_IDS_SFAIRA.year], - }, index=range(1)) - meta.to_csv(fn_meta) - @property def available_type_versions(self): return np.array(list(self.class_maps.keys())) diff --git a/sfaira/data/databases/cellxgene/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py index f43346af7..5ff9ba7fc 100644 --- a/sfaira/data/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene/cellxgene_loader.py @@ -3,7 +3,7 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE +from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, META_DATA_FIELDS_CELLXGENE class DatasetCellxgene(DatasetBase): @@ -24,7 +24,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.fn = fn - self.load_meta() + self.load_meta(map=META_DATA_FIELDS_CELLXGENE) self.class_maps = { "0": {}, } diff --git a/sfaira/data/databases/cellxgene/external.py b/sfaira/data/databases/cellxgene/external.py index 5caf39357..11aaeafb8 100644 --- a/sfaira/data/databases/cellxgene/external.py +++ b/sfaira/data/databases/cellxgene/external.py @@ -1,3 +1,3 @@ from sfaira.data import DatasetBase, DatasetGroupBase from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE -from sfaira.consts import META_DATA_FIELDS +from sfaira.consts import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE From 9f844c7594a849fdb9bcd0134d5408bad67d7c6c Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:24:44 +0100 Subject: [PATCH 012/161] moved meta data code in DatasetBase for readability --- sfaira/data/base.py | 113 +++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 55 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index d2ed32388..a4d2637b4 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -394,6 +394,64 @@ def _set_genome(self, self.genome_container = g + @property + def doi_cleaned_id(self): + return "_".join(self.id.split("_")[:-1]) + + @property + def available_type_versions(self): + return np.array(list(self.class_maps.keys())) + + def set_default_type_version(self): + """ + Choose most recent version. + + :return: Version key corresponding to most recent version. + """ + return self.available_type_versions[np.argmax([int(x) for x in self.available_type_versions])] + + def assert_celltype_version_key( + self, + celltype_version + ): + if celltype_version not in self.available_type_versions: + raise ValueError( + "required celltype version %s not found. available are: %s" % + (celltype_version, str(self.available_type_versions)) + ) + + def map_ontology_class( + self, + raw_ids, + celltype_version + ): + """ + + :param raw_ids: + :param class_maps: + :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :return: + """ + if celltype_version is None: + celltype_version = self.set_default_type_version() + self.assert_celltype_version_key(celltype_version=celltype_version) + return [ + self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() else x + for x in raw_ids + ] + + @property + def citation(self): + """ + Return all information necessary to cite data set. + + :return: + """ + return [self.author, self.year, self.doi] + + # Meta data handling code: Reading, writing and selected properties. Properties are either set in constructor + # (and saved in self._somename) or accessed in self.meta. + def load_meta(self, fn: Union[PathLike, str], map=None): if fn is None: if self.meta_path is None: @@ -572,61 +630,6 @@ def year(self): def year(self, x): self._year = x - @property - def doi_cleaned_id(self): - return "_".join(self.id.split("_")[:-1]) - - @property - def available_type_versions(self): - return np.array(list(self.class_maps.keys())) - - def set_default_type_version(self): - """ - Choose most recent version. - - :return: Version key corresponding to most recent version. - """ - return self.available_type_versions[np.argmax([int(x) for x in self.available_type_versions])] - - def assert_celltype_version_key( - self, - celltype_version - ): - if celltype_version not in self.available_type_versions: - raise ValueError( - "required celltype version %s not found. available are: %s" % - (celltype_version, str(self.available_type_versions)) - ) - - def map_ontology_class( - self, - raw_ids, - celltype_version - ): - """ - - :param raw_ids: - :param class_maps: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. - :return: - """ - if celltype_version is None: - celltype_version = self.set_default_type_version() - self.assert_celltype_version_key(celltype_version=celltype_version) - return [ - self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() else x - for x in raw_ids - ] - - @property - def citation(self): - """ - Return all information necessary to cite data set. - - :return: - """ - return [self.author, self.year, self.doi] - class DatasetGroupBase(abc.ABC): """ From d3e356ab038df78caa050c20e8569fa206a2448a Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:30:29 +0100 Subject: [PATCH 013/161] introduced meta_fn attribute of dataset class and depreceated 3rd party database via meta objects --- sfaira/api/consts.py | 2 +- sfaira/consts/__init__.py | 2 +- sfaira/consts/meta_data_files.py | 14 ---------- sfaira/data/base.py | 26 +++++++++---------- .../databases/cellxgene/cellxgene_loader.py | 1 - 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py index 82845022a..00cc5cb17 100644 --- a/sfaira/api/consts.py +++ b/sfaira/api/consts.py @@ -1,2 +1,2 @@ from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE +from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index c3143a03d..15d95e718 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,3 +1,3 @@ from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts.meta_data_files import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE +from sfaira.consts.meta_data_files import META_DATA_FIELDS diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 52a52d9c8..8478aba28 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -15,17 +15,3 @@ "species", "year", ] - -META_DATA_FIELDS_CELLXGENE = { - "annotated": "annotated", - "author": "author", - "doi": "doi", - "download": "download", - "id": "id", - "ncells": "ncells", - "normalization": "normalization", - "organ": "organ", - "protocol": "protocol", - "species": "species", - "year": "year", -} diff --git a/sfaira/data/base.py b/sfaira/data/base.py index a4d2637b4..a3d28daed 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -452,23 +452,22 @@ def citation(self): # Meta data handling code: Reading, writing and selected properties. Properties are either set in constructor # (and saved in self._somename) or accessed in self.meta. - def load_meta(self, fn: Union[PathLike, str], map=None): + @property + def meta_fn(self): + if self.meta_path is None: + return None + else: + return os.path.join(self.meta_path, self.doi_cleaned_id + "_meta.csv") + + def load_meta(self, fn: Union[PathLike, str]): if fn is None: - if self.meta_path is None: + if self.meta_fn is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.meta_path, self.doi_cleaned_id + "_meta.csv") + fn = self.meta_fn else: if isinstance(fn, str): fn = os.path.normpath(fn) - if map is None: - usecols = META_DATA_FIELDS - newcols = META_DATA_FIELDS - else: - usecols = list(map.keys()) - newcols = list(map.values()) - tab = pandas.read_csv(fn, usecols=usecols) - tab.columns = newcols - self.meta = tab + self.meta = pandas.read_csv(fn, usecols=META_DATA_FIELDS) def write_meta( self, @@ -481,7 +480,7 @@ def write_meta( raise ValueError("provide either fn in load or path in constructor") if dir_out is None: dir_out = self.meta_path - fn_meta = os.path.join(dir_out, self.doi_cleaned_id + "_meta.csv") + fn_meta = self.meta_fn if self.adata is None: self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ @@ -499,7 +498,6 @@ def write_meta( }, index=range(1)) meta.to_csv(fn_meta) - @property def author(self): if self._author is not None: diff --git a/sfaira/data/databases/cellxgene/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py index 5ff9ba7fc..ba9bd42df 100644 --- a/sfaira/data/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/databases/cellxgene/cellxgene_loader.py @@ -24,7 +24,6 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.fn = fn - self.load_meta(map=META_DATA_FIELDS_CELLXGENE) self.class_maps = { "0": {}, } From 4f988a989a420b6bcd4bbad26629f99a0411273d Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 27 Oct 2020 19:46:44 +0100 Subject: [PATCH 014/161] added datsetgroup subsetting based on meta / lazy properties --- sfaira/data/base.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index a3d28daed..10d5bfd0f 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -862,6 +862,32 @@ def format_type_version(self, version): self.assert_celltype_version_key() return version + def subset(self, key, values): + """ + Subset list of adata objects based on match to values in key property. + + These keys are properties that are available in lazy model. + Subsetting happens on .datasets. + + :param key: Property to subset by. + :param values: Classes to overlap to. + :return: + """ + ids_del = [] + if not isinstance(values, list): + values = [values] + for x in self.ids: + try: + values_found = getattr(self.datasets[x], key) + if not isinstance(values_found, list): + values_found = [values_found] + if not np.any([xx in values for xx in values_found]): + ids_del.append(x) + except AttributeError: + raise ValueError(f"{key} not a valid property of data set object") + for x in ids_del: + del self.datasets[x] + class DatasetSuperGroup: """ @@ -1060,3 +1086,17 @@ def delete_backed(self): def load_cached_backed(self, fn: PathLike): self.adata = anndata.read(fn, backed='r') + + def subset(self, key, values): + """ + Subset list of adata objects based on match to values in key property. + + These keys are properties that are available in lazy model. + Subsetting happens on .datasets. + + :param key: Property to subset by. + :param values: Classes to overlap to. + :return: + """ + for x in self.dataset_groups: + x.subset(key=key, values=values) From a415e5d926d7c9101d500cf9d39fa8e03198a970 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 11:47:08 +0100 Subject: [PATCH 015/161] Master merge into dev (#26) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add plot_npc and plot_active_latent_units (#9) * add plot_npc and plot_active_latent_units * make sure handling of z and z_mean is consistent for VAE embeddings * clean up and documentation * formatting Co-authored-by: Martin König Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> * added data loader for interactive workflows with unprocessed data * made cell type loading optional in dataset .load() * enabled usage of type estimator on data without labels in prediction mode * recursively search custom model repo for weights files * sort model lookuptable alphabetically before writing it * make sure mode_path is set correctly in model_lookuptable when recursive weights loading is used * fix os.path.join usage in dataloaders * replace path handling through string concatenations with os.paths.join and f-strings * fix bug in lookup table writing * add mdoel file path to lookup table * reset index in model lookuptable before saving * add method to user interface for pushing local model weights to zenodo * fix bug in user interface * fix bux in summaries.py * use absolute model paths when model_lookuptable is used * fix bug in pretrained weights loading * fix bug in pretrained weights loading * automatically create an InteractiveDataset when loading data through the UI * fix bug inUI data loading * Explicitly cast indices and indptr of final backed file to int64. (#17) For the background on this: https://github.com/theislab/anndata/issues/453 * update human lung dataset doi * align mouse organ names with human organ names * fix typo in trachea organ naming in mouse * rename mouse ovary organ to femalegonad * rename mouse ovary organ to femalegonad * sort by model type in classwise f1 heatmap plot * another hacky solution to ensure a summary tab can be created when both vae and other models are loaded at once * allow custom metadata in zenodo submission * do not return doi but deposit url after depositing to zenodo sandbox as dois don't wrk on sandbox * updated model zoo description * recognise all .h5 and .data-0000... files as sfaira weights when constructing lookuptable * Update README.rst * Add selu activation and lecun_normal weight_init scheme for human VAEVAMP. (#19) * update sfaira erpo url and handle .h5 extension in model lookuptable id * add meta_data download information to all human dataloaders * updated docs * updated reference to README in docs * updated index * included reference to svensson et al data base in docs * fixed typo in docs * fixed typos in docs * restructured docs * fixed bug in reference roadmap in docs * updated data and model zoo description * added summary picture into index of docs * fixed typo in docs * updated summary panel * add badges to readme and docs index * updated summary panel (#20) * Doc updates (#21) * updated summary panel * fixed concept figure references * Doc updates (#22) * updated zoo panels * move from `import sfaira.api as sfaira` to `import sfaira` and from `import sfaira_extension.api as sfairae` to `import sfaira_extension` * add custom genomes to sfaira_extension * fix loading of custom topology versions from sfaira_extension * fix circular imports between sfaira_extension and sfaira * fix dataloader * fix celltype versioning through sfaira_extension * fix celltype versioning through sfaira_extension * formatting * Doc updates (#25) * added mention of download scripts into docs Co-authored-by: mk017 Co-authored-by: Martin König Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> Co-authored-by: Abdul Moeed --- README.rst | 29 +- docs/api/index.rst | 154 ++++++++++ docs/data.rst | 116 ++++++-- docs/ecosystem.rst | 61 ++++ docs/environment_brief.rst | 26 ++ docs/genomes.rst | 14 - docs/index.rst | 54 ++-- docs/models.rst | 32 ++- docs/news.rst | 1 + docs/ontologies.rst | 48 ---- docs/release-latest.rst | 6 + docs/release-notes.rst | 11 + docs/roadmap.rst | 20 ++ docs/training.rst | 8 - docs/tutorials.rst | 15 +- requirements.txt | 1 + resources/images/concept.jpeg | Bin 771343 -> 0 bytes resources/images/concept.png | Bin 0 -> 79259 bytes resources/images/data_zoo.png | Bin 0 -> 86510 bytes resources/images/model_zoo.png | Bin 0 -> 81462 bytes setup.py | 3 + sfaira/__init__.py | 22 +- sfaira/api/__init__.py | 6 - sfaira/api/data.py | 3 - sfaira/api/genomes.py | 1 - sfaira/api/models.py | 2 - sfaira/api/train.py | 2 - sfaira/api/ui.py | 1 - sfaira/data/__init__.py | 3 +- sfaira/data/base.py | 24 +- sfaira/data/human/adipose/human_adipose.py | 4 +- .../human_adipose_2020_microwell_han_001.py | 3 +- .../human/adrenalgland/human_adrenalgland.py | 4 +- ...man_adrenalgland_2020_microwell_han_001.py | 3 +- ...man_adrenalgland_2020_microwell_han_002.py | 3 +- ...man_adrenalgland_2020_microwell_han_003.py | 3 +- ...man_adrenalgland_2020_microwell_han_004.py | 3 +- ...man_adrenalgland_2020_microwell_han_005.py | 3 +- ...man_adrenalgland_2020_microwell_han_006.py | 3 +- sfaira/data/human/artery/human_artery.py | 4 +- .../human_artery_2020_microwell_han_001.py | 3 +- sfaira/data/human/bladder/human_bladder.py | 4 +- .../human_bladder_2020_microwell_han_001.py | 3 +- .../human_bladder_2020_microwell_han_002.py | 3 +- .../human_bladder_2020_microwell_han_003.py | 3 +- sfaira/data/human/blood/human_blood.py | 4 +- .../blood/human_blood_2018_10x_ica_001.py | 5 +- .../human_blood_2019_10x_10xGenomics_001.py | 3 +- .../human_blood_2020_microwell_han_001.py | 3 +- .../human_blood_2020_microwell_han_002.py | 3 +- .../human_blood_2020_microwell_han_003.py | 3 +- .../human_blood_2020_microwell_han_004.py | 3 +- .../human_blood_2020_microwell_han_005.py | 3 +- .../human_blood_2020_microwell_han_006.py | 3 +- .../human_blood_2020_microwell_han_007.py | 3 +- sfaira/data/human/bone/human_bone.py | 4 +- .../human/bone/human_bone_2018_10x_ica_001.py | 5 +- .../bone/human_bone_2020_microwell_han_001.py | 3 +- .../bone/human_bone_2020_microwell_han_002.py | 3 +- sfaira/data/human/brain/human_brain.py | 4 +- .../human_brain_2017_DroNcSeq_habib_001.py | 3 +- .../human_brain_2020_microwell_han_001.py | 3 +- .../human_brain_2020_microwell_han_002.py | 3 +- .../human_brain_2020_microwell_han_003.py | 3 +- .../human_brain_2020_microwell_han_004.py | 3 +- .../human_brain_2020_microwell_han_005.py | 3 +- .../human_brain_2020_microwell_han_006.py | 3 +- sfaira/data/human/calvaria/human_calvaria.py | 4 +- .../human_calvaria_2020_microwell_han_001.py | 3 +- sfaira/data/human/cervix/human_cervix.py | 4 +- .../human_cervix_2020_microwell_han_001.py | 3 +- .../chorionicvillus/human_chorionicvillus.py | 4 +- ..._chorionicvillus_2020_microwell_han_001.py | 3 +- sfaira/data/human/colon/human_colon.py | 4 +- .../colon/human_colon_2019_10x_kinchen_001.py | 9 +- .../colon/human_colon_2019_10x_smilie_001.py | 3 +- .../colon/human_colon_2019_10x_wang_001.py | 3 +- .../colon/human_colon_2020_10x_james_001.py | 3 +- .../human_colon_2020_microwell_han_001.py | 5 +- .../human_colon_2020_microwell_han_002.py | 5 +- .../human_colon_2020_microwell_han_003.py | 5 +- .../human_colon_2020_microwell_han_004.py | 5 +- sfaira/data/human/duodenum/human_duodenum.py | 4 +- .../human_duodenum_2020_microwell_han_001.py | 3 +- .../data/human/epityphlon/human_epityphlon.py | 4 +- ...human_epityphlon_2020_microwell_han_001.py | 3 +- .../data/human/esophagus/human_esophagus.py | 4 +- .../human_esophagus_2019_10x_madissoon_001.py | 3 +- .../human_esophagus_2020_microwell_han_001.py | 5 +- .../human_esophagus_2020_microwell_han_002.py | 5 +- sfaira/data/human/eye/human_eye.py | 4 +- .../eye/human_eye_2019_10x_lukowski_001.py | 3 +- .../human/eye/human_eye_2019_10x_menon_001.py | 3 +- .../human/eye/human_eye_2019_10x_voigt_001.py | 3 +- .../eye/human_eye_2020_microwell_han_001.py | 5 +- .../fallopiantube/human_fallopiantube.py | 4 +- ...an_fallopiantube_2020_microwell_han_001.py | 3 +- .../human/femalegonad/human_femalegonad.py | 4 +- ...uman_femalegonad_2020_microwell_han_001.py | 3 +- ...uman_femalegonad_2020_microwell_han_002.py | 3 +- .../human/gallbladder/human_gallbladder.py | 4 +- ...uman_gallbladder_2020_microwell_han_001.py | 3 +- sfaira/data/human/heart/human_heart.py | 4 +- .../human_heart_2020_microwell_han_001.py | 3 +- .../human_heart_2020_microwell_han_002.py | 3 +- .../human_heart_2020_microwell_han_003.py | 3 +- .../human_heart_2020_microwell_han_004.py | 3 +- sfaira/data/human/hesc/human_hesc.py | 4 +- .../hesc/human_hesc_2020_microwell_han_001.py | 3 +- sfaira/data/human/ileum/human_ileum.py | 4 +- .../ileum/human_ileum_2019_10x_martin_001.py | 3 +- .../ileum/human_ileum_2019_10x_wang_001.py | 3 +- .../human_ileum_2020_microwell_han_001.py | 3 +- sfaira/data/human/jejunum/human_jejunum.py | 4 +- .../human_jejunum_2020_microwell_han_001.py | 3 +- sfaira/data/human/kidney/human_kidney.py | 4 +- .../human_kidney_2019_10xSn_lake_001.py | 4 +- .../human_kidney_2019_10x_stewart_001.py | 5 +- .../kidney/human_kidney_2020_10x_liao_001.py | 5 +- .../human_kidney_2020_microwell_han_001.py | 5 +- .../human_kidney_2020_microwell_han_002.py | 5 +- .../human_kidney_2020_microwell_han_003.py | 5 +- .../human_kidney_2020_microwell_han_004.py | 5 +- .../human_kidney_2020_microwell_han_005.py | 5 +- .../human_kidney_2020_microwell_han_006.py | 5 +- .../human_kidney_2020_microwell_han_007.py | 5 +- sfaira/data/human/liver/human_liver.py | 4 +- .../human_liver_2018_10x_macparland_001.py | 5 +- .../liver/human_liver_2019_10x_popescu_001.py | 3 +- .../human_liver_2019_10x_ramachandran_001.py | 3 +- .../human_liver_2019_mCELSeq2_aizarani_001.py | 4 +- .../human_liver_2020_microwell_han_001.py | 5 +- .../human_liver_2020_microwell_han_002.py | 5 +- .../human_liver_2020_microwell_han_003.py | 5 +- .../human_liver_2020_microwell_han_004.py | 5 +- .../human_liver_2020_microwell_han_005.py | 5 +- sfaira/data/human/lung/human_lung.py | 4 +- .../lung/human_lung_2019_10x_braga_001.py | 3 +- .../lung/human_lung_2019_10x_braga_002.py | 3 +- .../lung/human_lung_2019_10x_madissoon_001.py | 3 +- .../lung/human_lung_2019_dropseq_braga_003.py | 4 +- .../lung/human_lung_2020_10x_habermann_001.py | 10 +- .../lung/human_lung_2020_10x_lukassen_001.py | 3 +- .../lung/human_lung_2020_10x_lukassen_002.py | 3 +- .../lung/human_lung_2020_10x_miller_001.py | 3 +- .../human_lung_2020_10x_travaglini_001.py | 7 +- .../lung/human_lung_2020_microwell_han_001.py | 3 +- .../lung/human_lung_2020_microwell_han_002.py | 3 +- .../lung/human_lung_2020_microwell_han_003.py | 3 +- .../lung/human_lung_2020_microwell_han_004.py | 3 +- .../lung/human_lung_2020_microwell_han_005.py | 3 +- ...uman_lung_2020_smartseq2_travaglini_002.py | 7 +- .../data/human/malegonad/human_malegonad.py | 4 +- .../human_malegonad_2018_10x_guo_001.py | 3 +- .../human_malegonad_2020_microwell_han_001.py | 3 +- .../human_malegonad_2020_microwell_han_002.py | 3 +- sfaira/data/human/mixed/human_mixed.py | 4 +- .../mixed/human_mixed_2019_10x_szabo_001.py | 9 +- sfaira/data/human/muscle/human_muscle.py | 4 +- .../human_muscle_2020_microwell_han_001.py | 3 +- .../human_muscle_2020_microwell_han_002.py | 3 +- sfaira/data/human/omentum/human_omentum.py | 4 +- .../human_omentum_2020_microwell_han_001.py | 3 +- .../human_omentum_2020_microwell_han_002.py | 3 +- .../human_omentum_2020_microwell_han_003.py | 3 +- sfaira/data/human/pancreas/human_pancreas.py | 4 +- .../human_pancreas_2016_indrop_baron_001.py | 3 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 4 +- .../human_pancreas_2017_smartseq2_enge_001.py | 6 +- .../human_pancreas_2020_microwell_han_001.py | 5 +- .../human_pancreas_2020_microwell_han_002.py | 5 +- .../human_pancreas_2020_microwell_han_003.py | 5 +- .../human_pancreas_2020_microwell_han_004.py | 5 +- sfaira/data/human/placenta/human_placenta.py | 4 +- .../human_placenta_2018_10x_ventotormo_001.py | 4 +- ..._placenta_2018_smartseq2_ventotormo_001.py | 4 +- .../human_placenta_2020_microwell_han_001.py | 5 +- sfaira/data/human/pleura/human_pleura.py | 4 +- .../human_pleura_2020_microwell_han_001.py | 3 +- sfaira/data/human/prostate/human_prostate.py | 4 +- .../human_prostate_2018_10x_henry_001.py | 3 +- .../human_prostate_2020_microwell_han_001.py | 3 +- sfaira/data/human/rectum/human_rectum.py | 4 +- .../rectum/human_rectum_2019_10x_wang_001.py | 2 +- .../human_rectum_2020_microwell_han_001.py | 3 +- sfaira/data/human/rib/human_rib.py | 4 +- .../rib/human_rib_2020_microwell_han_001.py | 3 +- .../rib/human_rib_2020_microwell_han_002.py | 3 +- sfaira/data/human/skin/human_skin.py | 4 +- .../skin/human_skin_2020_microwell_han_001.py | 3 +- .../skin/human_skin_2020_microwell_han_002.py | 3 +- .../data/human/spinalcord/human_spinalcord.py | 4 +- ...human_spinalcord_2020_microwell_han_001.py | 3 +- sfaira/data/human/spleen/human_spleen.py | 4 +- .../human_spleen_2019_10x_madissoon_001.py | 3 +- .../human_spleen_2020_microwell_han_001.py | 5 +- .../human_spleen_2020_microwell_han_002.py | 5 +- sfaira/data/human/stomach/human_stomach.py | 4 +- .../human_stomach_2020_microwell_han_001.py | 3 +- .../human_stomach_2020_microwell_han_002.py | 3 +- .../human_stomach_2020_microwell_han_003.py | 3 +- .../human_stomach_2020_microwell_han_004.py | 3 +- .../human_stomach_2020_microwell_han_005.py | 3 +- .../human_stomach_2020_microwell_han_006.py | 3 +- .../human_stomach_2020_microwell_han_007.py | 3 +- .../human_stomach_2020_microwell_han_008.py | 3 +- .../human_stomach_2020_microwell_han_009.py | 3 +- .../human_stomach_2020_microwell_han_010.py | 3 +- sfaira/data/human/thymus/human_thymus.py | 4 +- .../thymus/human_thymus_2020_10x_park_001.py | 3 +- .../human_thymus_2020_microwell_han_001.py | 3 +- .../human_thymus_2020_microwell_han_002.py | 3 +- sfaira/data/human/thyroid/human_thyroid.py | 4 +- .../human_thyroid_2020_microwell_han_001.py | 3 +- .../human_thyroid_2020_microwell_han_002.py | 3 +- sfaira/data/human/trachea/human_trachea.py | 4 +- .../human_trachea_2020_microwell_han_001.py | 3 +- sfaira/data/human/ureter/human_ureter.py | 4 +- .../human_ureter_2020_microwell_han_001.py | 3 +- sfaira/data/human/uterus/human_uterus.py | 4 +- .../human_uterus_2020_microwell_han_001.py | 3 +- sfaira/data/interactive/__init__.py | 1 + .../{mouse/fat => interactive}/external.py | 0 sfaira/data/interactive/loader.py | 44 +++ sfaira/data/mouse/__init__.py | 20 +- sfaira/data/mouse/adipose/__init__.py | 1 + .../{large_intestine => adipose}/external.py | 0 .../mouse_adipose.py} | 16 +- .../mouse_adipose_2019_10x_pisco_001.py} | 10 +- ...mouse_adipose_2019_smartseq2_pisco_001.py} | 10 +- ...mouse_adipose_2019_smartseq2_pisco_002.py} | 12 +- ...mouse_adipose_2019_smartseq2_pisco_003.py} | 10 +- .../mouse_adipose_2019_smartseq2_pisco_004.py | 71 +++++ sfaira/data/mouse/bladder/mouse_bladder.py | 4 +- .../mouse_bladder_2018_microwell_han_001.py | 4 +- .../mouse_bladder_2019_10x_pisco_001.py | 4 +- .../mouse_bladder_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/blood/__init__.py | 1 + .../mouse/{limb_muscle => blood}/external.py | 0 .../mouse_fat.py => blood/mouse_blood.py} | 16 +- .../mouse_blood_2018_microwell_han_001.py} | 10 +- .../mouse_blood_2018_microwell_han_002.py} | 10 +- .../mouse_blood_2018_microwell_han_003.py | 85 ++++++ .../mouse_blood_2018_microwell_han_004.py} | 10 +- .../mouse_blood_2018_microwell_han_005.py} | 10 +- sfaira/data/mouse/bone/__init__.py | 1 + .../mouse/{mammary_gland => bone}/external.py | 0 .../mouse_bone.py} | 12 +- .../mouse_bone_2018_microwell_001.py} | 8 +- .../mouse_bone_2019_10x_pisco_001.py} | 8 +- .../mouse_bone_2019_smartseq2_pisco_001.py} | 8 +- sfaira/data/mouse/brain/mouse_brain.py | 4 +- .../mouse_brain_2018_microwell_han_001.py | 4 +- .../mouse_brain_2018_microwell_han_002.py | 4 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 8 +- .../mouse_brain_2019_smartseq2_pisco_001.py | 4 +- .../mouse_brain_2019_smartseq2_pisco_002.py | 4 +- sfaira/data/mouse/colon/__init__.py | 1 + .../data/mouse/{marrow => colon}/external.py | 0 .../mouse_colon.py} | 10 +- .../mouse_colon_2019_10x_pisco_001.py} | 12 +- .../mouse_colon_2019_smartseq2_pisco_001.py | 72 +++++ .../data/mouse/diaphragm/mouse_diaphragm.py | 4 +- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/fat/__init__.py | 1 - sfaira/data/mouse/femalegonad/__init__.py | 1 + .../mouse/{ovary => femalegonad}/external.py | 0 .../mouse_femalegonad.py} | 10 +- ...use_femalegonad_2018_microwell_han_001.py} | 10 +- ...use_femalegonad_2018_microwell_han_002.py} | 10 +- sfaira/data/mouse/heart/mouse_heart.py | 4 +- .../heart/mouse_heart_2019_10x_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_001.py | 4 +- .../mouse_heart_2019_smartseq2_pisco_002.py | 2 +- sfaira/data/mouse/ileum/__init__.py | 1 + .../{peripheral_blood => ileum}/external.py | 0 .../mouse_ileum.py} | 12 +- .../mouse_ileum_2018_microwell_han_001.py} | 10 +- .../mouse_ileum_2018_microwell_han_002.py} | 10 +- .../mouse_ileum_2018_microwell_han_003.py} | 10 +- sfaira/data/mouse/kidney/mouse_kidney.py | 4 +- .../mouse_kidney_2018_microwell_han_001.py | 4 +- .../mouse_kidney_2018_microwell_han_002.py | 4 +- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 4 +- .../mouse_kidney_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/large_intestine/__init__.py | 1 - ...arge_intestine_2019_smartseq2_pisco_001.py | 73 ----- sfaira/data/mouse/limb_muscle/__init__.py | 1 - ...se_limb_muscle_2019_smartseq2_pisco_001.py | 70 ----- sfaira/data/mouse/liver/mouse_liver.py | 4 +- .../mouse_liver_2018_microwell_han_001.py | 4 +- .../mouse_liver_2018_microwell_han_002.py | 4 +- .../liver/mouse_liver_2019_10x_pisco_001.py | 4 +- .../mouse_liver_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/lung/mouse_lung.py | 4 +- .../lung/mouse_lung_2018_microwell_han_001.py | 4 +- .../lung/mouse_lung_2018_microwell_han_002.py | 4 +- .../lung/mouse_lung_2018_microwell_han_003.py | 4 +- .../lung/mouse_lung_2019_10x_pisco_001.py | 4 +- .../mouse_lung_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/malegonad/__init__.py | 1 + .../external.py | 0 .../mouse_malegonad.py} | 10 +- ...mouse_malegonad_2018_microwell_han_001.py} | 10 +- ...mouse_malegonad_2018_microwell_han_002.py} | 10 +- sfaira/data/mouse/mammary_gland/__init__.py | 1 - ..._mammary_gland_2019_smartseq2_pisco_001.py | 70 ----- sfaira/data/mouse/mammarygland/__init__.py | 1 + .../{testis => mammarygland}/external.py | 0 .../mouse_mammarygland.py} | 16 +- ...se_mammarygland_2018_microwell_han_001.py} | 11 +- ...se_mammarygland_2018_microwell_han_002.py} | 10 +- ...se_mammarygland_2018_microwell_han_003.py} | 10 +- ...se_mammarygland_2018_microwell_han_004.py} | 10 +- .../mouse_mammarygland_2019_10x_pisco_001.py} | 10 +- ...e_mammarygland_2019_smartseq2_pisco_001.py | 69 +++++ sfaira/data/mouse/marrow/__init__.py | 1 - sfaira/data/mouse/muscle/__init__.py | 1 + .../mouse/{trachae => muscle}/external.py | 0 .../mouse_muscle.py} | 12 +- .../mouse_muscle_2018_microwell_han_001.py} | 10 +- .../mouse_muscle_2019_10x_pisco_001.py} | 10 +- .../mouse_muscle_2019_smartseq2_pisco_001.py} | 10 +- sfaira/data/mouse/ovary/__init__.py | 1 - sfaira/data/mouse/pancreas/mouse_pancreas.py | 4 +- .../mouse_pancreas_2018_microwell_han_001.py | 4 +- .../mouse_pancreas_2019_10x_pisco_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_001.py | 4 +- .../mouse_pancreas_2019_10x_thompson_002.py | 4 +- .../mouse_pancreas_2019_10x_thompson_003.py | 4 +- .../mouse_pancreas_2019_10x_thompson_004.py | 4 +- .../mouse_pancreas_2019_10x_thompson_005.py | 4 +- .../mouse_pancreas_2019_10x_thompson_006.py | 4 +- .../mouse_pancreas_2019_10x_thompson_007.py | 4 +- .../mouse_pancreas_2019_10x_thompson_008.py | 4 +- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 4 +- .../data/mouse/peripheral_blood/__init__.py | 1 - ...peripheral_blood_2018_microwell_han_005.py | 86 ------ sfaira/data/mouse/placenta/mouse_placenta.py | 4 +- .../mouse_placenta_2018_microwell_han_001.py | 4 +- .../mouse_placenta_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/prostate/mouse_prostate.py | 4 +- .../mouse_prostate_2018_microwell_han_001.py | 4 +- .../mouse_prostate_2018_microwell_han_002.py | 4 +- sfaira/data/mouse/rib/mouse_rib.py | 4 +- .../rib/mouse_rib_2018_microwell_han_001.py | 4 +- .../rib/mouse_rib_2018_microwell_han_002.py | 4 +- .../rib/mouse_rib_2018_microwell_han_003.py | 4 +- sfaira/data/mouse/skin/mouse_skin.py | 4 +- .../skin/mouse_skin_2019_10x_pisco_001.py | 4 +- .../mouse_skin_2019_smartseq2_pisco_001.py | 6 +- sfaira/data/mouse/small_intestine/__init__.py | 1 - sfaira/data/mouse/spleen/mouse_spleen.py | 4 +- .../mouse_spleen_2018_microwell_han_001.py | 4 +- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 4 +- .../mouse_spleen_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/stomach/mouse_stomach.py | 4 +- .../mouse_stomach_2018_microwell_han_001.py | 4 +- sfaira/data/mouse/testis/__init__.py | 1 - sfaira/data/mouse/thymus/mouse_thymus.py | 4 +- .../mouse_thymus_2018_microwell_han_001.py | 4 +- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 4 +- .../mouse_thymus_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/tongue/mouse_tongue.py | 4 +- .../tongue/mouse_tongue_2019_10x_pisco_001.py | 4 +- .../mouse_tongue_2019_smartseq2_pisco_001.py | 4 +- .../mouse/{trachae => trachea}/__init__.py | 0 sfaira/data/mouse/trachea/external.py | 1 + .../{trachae => trachea}/mouse_trachea.py | 4 +- .../mouse_trachea_2019_10x_pisco_001.py | 6 +- .../mouse_trachea_2019_smartseq2_pisco_001.py | 4 +- sfaira/data/mouse/uterus/mouse_uterus.py | 4 +- .../mouse_uterus_2018_microwell_han_001.py | 4 +- .../mouse_uterus_2018_microwell_han_002.py | 4 +- sfaira/data/utils/create_meta_mouse.py | 18 +- sfaira/data/utils/write_backed_human.py | 2 +- sfaira/data/utils/write_backed_mouse.py | 20 +- sfaira/estimators/external.py | 2 +- sfaira/estimators/keras.py | 90 +++--- sfaira/interface/__init__.py | 1 - sfaira/interface/external.py | 2 +- sfaira/interface/user_interface.py | 229 +++++++++++---- sfaira/models/celltype/external.py | 3 +- sfaira/models/embedding/external.py | 4 +- sfaira/preprocessing.py | 12 - sfaira/train/external.py | 2 +- sfaira/train/summaries.py | 268 +++++++++++++----- sfaira/train/train_model.py | 28 +- sfaira/unit_tests/external.py | 4 +- sfaira/unit_tests/test_models.py | 2 +- sfaira/versions/celltype_versions/__init__.py | 31 +- .../celltype_versions/mouse/__init__.py | 38 +-- .../mouse/{fat.py => adipose.py} | 12 +- .../mouse/{peripheral_blood.py => blood.py} | 12 +- .../mouse/{marrow.py => bone.py} | 12 +- .../mouse/{large_intestine.py => colon.py} | 12 +- .../mouse/{ovary.py => femalegonad.py} | 12 +- .../mouse/{small_intestine.py => ileum.py} | 12 +- .../mouse/{testis.py => malegonad.py} | 12 +- .../{mammary_gland.py => mammarygland.py} | 0 .../mouse/{limb_muscle.py => muscle.py} | 12 +- .../mouse/{trachae.py => trachea.py} | 12 +- .../genome_versions/class_interface.py | 25 +- .../genome_versions/human/genome_container.py | 3 +- .../genome_versions/mouse/genome_container.py | 1 + .../human/celltype/celltypemarker.py | 10 +- .../human/celltype/celltypemlp.py | 10 +- .../topology_versions/human/embedding/ae.py | 10 +- .../human/embedding/linear.py | 10 +- .../topology_versions/human/embedding/nmf.py | 10 +- .../topology_versions/human/embedding/vae.py | 10 +- .../human/embedding/vaeiaf.py | 10 +- .../human/embedding/vaevamp.py | 18 +- .../mouse/celltype/celltypemarker.py | 10 +- .../mouse/celltype/celltypemlp.py | 10 +- .../topology_versions/mouse/embedding/ae.py | 10 +- .../mouse/embedding/linear.py | 10 +- .../topology_versions/mouse/embedding/nmf.py | 10 +- .../topology_versions/mouse/embedding/vae.py | 10 +- .../mouse/embedding/vaeiaf.py | 12 +- .../mouse/embedding/vaevamp.py | 10 +- 421 files changed, 2327 insertions(+), 1486 deletions(-) create mode 100644 docs/api/index.rst create mode 100644 docs/ecosystem.rst create mode 100644 docs/environment_brief.rst delete mode 100644 docs/genomes.rst create mode 100644 docs/news.rst delete mode 100644 docs/ontologies.rst create mode 100644 docs/release-latest.rst create mode 100644 docs/release-notes.rst create mode 100644 docs/roadmap.rst delete mode 100644 docs/training.rst delete mode 100644 resources/images/concept.jpeg create mode 100644 resources/images/concept.png create mode 100644 resources/images/data_zoo.png create mode 100644 resources/images/model_zoo.png delete mode 100644 sfaira/api/data.py delete mode 100644 sfaira/api/genomes.py delete mode 100644 sfaira/api/models.py delete mode 100644 sfaira/api/train.py delete mode 100644 sfaira/api/ui.py create mode 100644 sfaira/data/interactive/__init__.py rename sfaira/data/{mouse/fat => interactive}/external.py (100%) create mode 100644 sfaira/data/interactive/loader.py create mode 100644 sfaira/data/mouse/adipose/__init__.py rename sfaira/data/mouse/{large_intestine => adipose}/external.py (100%) rename sfaira/data/mouse/{peripheral_blood/mouse_peripheral_blood.py => adipose/mouse_adipose.py} (54%) rename sfaira/data/mouse/{fat/mouse_fat_2019_10x_pisco_001.py => adipose/mouse_adipose_2019_10x_pisco_001.py} (88%) rename sfaira/data/mouse/{fat/mouse_fat_2019_smartseq2_pisco_004.py => adipose/mouse_adipose_2019_smartseq2_pisco_001.py} (88%) rename sfaira/data/mouse/{fat/mouse_fat_2019_smartseq2_pisco_001.py => adipose/mouse_adipose_2019_smartseq2_pisco_002.py} (88%) rename sfaira/data/mouse/{fat/mouse_fat_2019_smartseq2_pisco_003.py => adipose/mouse_adipose_2019_smartseq2_pisco_003.py} (88%) create mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py create mode 100644 sfaira/data/mouse/blood/__init__.py rename sfaira/data/mouse/{limb_muscle => blood}/external.py (100%) rename sfaira/data/mouse/{fat/mouse_fat.py => blood/mouse_blood.py} (59%) rename sfaira/data/mouse/{peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py => blood/mouse_blood_2018_microwell_han_001.py} (91%) rename sfaira/data/mouse/{peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py => blood/mouse_blood_2018_microwell_han_002.py} (91%) create mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py rename sfaira/data/mouse/{peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py => blood/mouse_blood_2018_microwell_han_004.py} (91%) rename sfaira/data/mouse/{peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py => blood/mouse_blood_2018_microwell_han_005.py} (91%) create mode 100644 sfaira/data/mouse/bone/__init__.py rename sfaira/data/mouse/{mammary_gland => bone}/external.py (100%) rename sfaira/data/mouse/{limb_muscle/mouse_limb_muscle.py => bone/mouse_bone.py} (59%) rename sfaira/data/mouse/{marrow/mouse_marrow_2018_microwell_001.py => bone/mouse_bone_2018_microwell_001.py} (91%) rename sfaira/data/mouse/{marrow/mouse_marrow_2019_10x_pisco_001.py => bone/mouse_bone_2019_10x_pisco_001.py} (90%) rename sfaira/data/mouse/{marrow/mouse_marrow_2019_smartseq2_pisco_001.py => bone/mouse_bone_2019_smartseq2_pisco_001.py} (90%) create mode 100644 sfaira/data/mouse/colon/__init__.py rename sfaira/data/mouse/{marrow => colon}/external.py (100%) rename sfaira/data/mouse/{large_intestine/mouse_large_intestine.py => colon/mouse_colon.py} (60%) rename sfaira/data/mouse/{large_intestine/mouse_large_intestine_2019_10x_pisco_001.py => colon/mouse_colon_2019_10x_pisco_001.py} (85%) create mode 100644 sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/fat/__init__.py create mode 100644 sfaira/data/mouse/femalegonad/__init__.py rename sfaira/data/mouse/{ovary => femalegonad}/external.py (100%) rename sfaira/data/mouse/{testis/mouse_testis.py => femalegonad/mouse_femalegonad.py} (61%) rename sfaira/data/mouse/{ovary/mouse_ovary_2018_microwell_han_001.py => femalegonad/mouse_femalegonad_2018_microwell_han_001.py} (90%) rename sfaira/data/mouse/{ovary/mouse_ovary_2018_microwell_han_002.py => femalegonad/mouse_femalegonad_2018_microwell_han_002.py} (90%) create mode 100644 sfaira/data/mouse/ileum/__init__.py rename sfaira/data/mouse/{peripheral_blood => ileum}/external.py (100%) rename sfaira/data/mouse/{small_intestine/mouse_small_intestine.py => ileum/mouse_ileum.py} (57%) rename sfaira/data/mouse/{small_intestine/mouse_small_intestine_2018_microwell_han_001.py => ileum/mouse_ileum_2018_microwell_han_001.py} (91%) rename sfaira/data/mouse/{small_intestine/mouse_small_intestine_2018_microwell_han_002.py => ileum/mouse_ileum_2018_microwell_han_002.py} (91%) rename sfaira/data/mouse/{small_intestine/mouse_small_intestine_2018_microwell_han_003.py => ileum/mouse_ileum_2018_microwell_han_003.py} (91%) delete mode 100644 sfaira/data/mouse/large_intestine/__init__.py delete mode 100644 sfaira/data/mouse/limb_muscle/__init__.py create mode 100644 sfaira/data/mouse/malegonad/__init__.py rename sfaira/data/mouse/{small_intestine => malegonad}/external.py (100%) rename sfaira/data/mouse/{ovary/mouse_ovary.py => malegonad/mouse_malegonad.py} (61%) rename sfaira/data/mouse/{testis/mouse_testis_2018_microwell_han_001.py => malegonad/mouse_malegonad_2018_microwell_han_001.py} (91%) rename sfaira/data/mouse/{testis/mouse_testis_2018_microwell_han_002.py => malegonad/mouse_malegonad_2018_microwell_han_002.py} (91%) delete mode 100644 sfaira/data/mouse/mammary_gland/__init__.py create mode 100644 sfaira/data/mouse/mammarygland/__init__.py rename sfaira/data/mouse/{testis => mammarygland}/external.py (100%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland.py => mammarygland/mouse_mammarygland.py} (57%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py => mammarygland/mouse_mammarygland_2018_microwell_han_001.py} (90%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py => mammarygland/mouse_mammarygland_2018_microwell_han_002.py} (90%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py => mammarygland/mouse_mammarygland_2018_microwell_han_003.py} (90%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py => mammarygland/mouse_mammarygland_2018_microwell_han_004.py} (90%) rename sfaira/data/mouse/{mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py => mammarygland/mouse_mammarygland_2019_10x_pisco_001.py} (87%) create mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/marrow/__init__.py create mode 100644 sfaira/data/mouse/muscle/__init__.py rename sfaira/data/mouse/{trachae => muscle}/external.py (100%) rename sfaira/data/mouse/{marrow/mouse_marrow.py => muscle/mouse_muscle.py} (61%) rename sfaira/data/mouse/{limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py => muscle/mouse_muscle_2018_microwell_han_001.py} (90%) rename sfaira/data/mouse/{limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py => muscle/mouse_muscle_2019_10x_pisco_001.py} (87%) rename sfaira/data/mouse/{fat/mouse_fat_2019_smartseq2_pisco_002.py => muscle/mouse_muscle_2019_smartseq2_pisco_001.py} (88%) delete mode 100644 sfaira/data/mouse/ovary/__init__.py delete mode 100644 sfaira/data/mouse/peripheral_blood/__init__.py delete mode 100644 sfaira/data/mouse/small_intestine/__init__.py delete mode 100644 sfaira/data/mouse/testis/__init__.py rename sfaira/data/mouse/{trachae => trachea}/__init__.py (100%) create mode 100644 sfaira/data/mouse/trachea/external.py rename sfaira/data/mouse/{trachae => trachea}/mouse_trachea.py (84%) rename sfaira/data/mouse/{trachae => trachea}/mouse_trachea_2019_10x_pisco_001.py (90%) rename sfaira/data/mouse/{trachae => trachea}/mouse_trachea_2019_smartseq2_pisco_001.py (93%) delete mode 100644 sfaira/preprocessing.py rename sfaira/versions/celltype_versions/mouse/{fat.py => adipose.py} (76%) rename sfaira/versions/celltype_versions/mouse/{peripheral_blood.py => blood.py} (60%) rename sfaira/versions/celltype_versions/mouse/{marrow.py => bone.py} (83%) rename sfaira/versions/celltype_versions/mouse/{large_intestine.py => colon.py} (67%) rename sfaira/versions/celltype_versions/mouse/{ovary.py => femalegonad.py} (67%) rename sfaira/versions/celltype_versions/mouse/{small_intestine.py => ileum.py} (64%) rename sfaira/versions/celltype_versions/mouse/{testis.py => malegonad.py} (66%) rename sfaira/versions/celltype_versions/mouse/{mammary_gland.py => mammarygland.py} (100%) rename sfaira/versions/celltype_versions/mouse/{limb_muscle.py => muscle.py} (72%) rename sfaira/versions/celltype_versions/mouse/{trachae.py => trachea.py} (77%) diff --git a/README.rst b/README.rst index ac3c3b500..cf758b0c7 100644 --- a/README.rst +++ b/README.rst @@ -1,14 +1,31 @@ -Managing single-cell data sets and neural networks used for analysis -===================================================================== +|Stars| |PyPI| |PyPIDownloads| -.. image:: https://github.com/theislab/sfaira/blob/master/resources/images/concept.jpeg - :width: 600px +.. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow + :target: https://github.com/theislab/sfaira/stargazers +.. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI + :target: https://pypi.org/project/sfaira +.. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira + :target: https://pepy.tech/project/sfaira + + +sfaira - data and model repository for single-cell data +======================================================= + +.. image:: https://github.com/theislab/sfaira/blob/master/resources/images/concept.png + :width: 1000px :align: center sfaira_ is a model and a data repository in a single python package. -Its data API gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. -Its model API gives user streamlined access to pre-trained models and to common model architectures to ease usage of neural networks in common single-cell analysis workflows. +We provide an interactive overview of the current state of the zoos on sfaira-site_. + +Its data zoo gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. +Its model zoo gives user streamlined access to pre-trained models and to common model architectures to ease usage of neural networks in common single-cell analysis workflows: +A model zoo is a software infrastructure that improves user access to pre-trained models which are separately published, such as DCA_ or scArches_: +Instead of focussing on developing new models, we focus on making models easily accessible to users and distributable by developers. sfaira integrates into scanpy_ workflows. .. _scanpy: https://github.com/theislab/scanpy .. _sfaira: https://sfaira.readthedocs.io +.. _DCA: https://github.com/theislab/dca +.. _scArches: https://github.com/theislab/scarches +.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 000000000..0230f0b1a --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,154 @@ +.. module:: sfaira +.. automodule:: sfaira + :noindex: + +API +=== + +Import sfaira as:: + + import sfaira + + + +Data: `data` +------------ + +.. module:: sfaira.data +.. currentmodule:: sfaira + +The sfaira data zoo API. + + +Pre-defined data set collections +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to curated subsets of the data zoo, e.g. all data sets from human lungs. + +.. autosummary:: + :toctree: . + + data.human + data.mouse + + +Functionalities for interactive data analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to functionalities you need to define your own data set collections based on the sfaira data zoo. + +.. autosummary:: + :toctree: . + + data.DatasetBase + data.DatasetGroupBase + data.DatasetSuperGroup + + +Functionalities for interactive data analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module gives you access to functionalities you need to load new data live into the data zoo to handle a raw data set in the context of zoo data sets. + +.. autosummary:: + :toctree: . + + data.DatasetInteractive + + +Genomes: `genomes` +------------------ + +.. module:: sfaira.genomes +.. currentmodule:: sfaira + +This sub-module gives you access to properties of the genome representations used in sfaira. + +.. autosummary:: + :toctree: . + + genomes.ExtractFeatureListEnsemble + + +Models: `models` +---------------- + +.. module:: sfaira.models +.. currentmodule:: sfaira + +The sfaira model zoo API for advanced use. +This API is structured by streamlined, task-specific APIs for specific analysis problems. +This API is targeted at developers, see also `ui` for a user centric wrapping API for this model zoo. + + +Cell-type predictor models +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This sub-module handles models that predict cell types. + +.. autosummary:: + :toctree: . + + models.celltype + + +Embedding models +~~~~~~~~~~~~~~~~ + +This sub-module handles models that embed expression vectors (cells) into a latent space. + +.. autosummary:: + :toctree: . + + models.embedding + + +Train: `train` +-------------- + +.. module:: sfaira.train +.. currentmodule:: sfaira + +The interface for training sfaira compatible models. +This is a sub-module dedicated for developers to ease model training and deployment. + +Trainer classes +~~~~~~~~~~~~~~~ + +Trainer class wrap estimator classes (which wrap model classes) and handle grid-search specific tasks centred on model fits, +such as saving evaluation metrics and model weights. + +.. autosummary:: + :toctree: . + + train.TargetZoos + train.TrainModelCelltype + train.TrainModelEmbedding + + +Grid search summary classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Grid search summary classes allow a developer to easily interact with a finished grid search by loading and summarising results, +which were saved through Trainer classes. + +.. autosummary:: + :toctree: . + + train.GridsearchContainer + train.SummarizeGridsearchCelltype + train.SummarizeGridsearchEmbedding + +User interface: `ui` +-------------------- + +.. module:: sfaira.ui +.. currentmodule:: sfaira + +This sub-module gives users access to the model zoo, including model query from remote servers. +This API is designed to be used in analysis workflows and does not require any understanding of the way models are defined and stored. + +.. autosummary:: + :toctree: . + + ui.UserInterface diff --git a/docs/data.rst b/docs/data.rst index 1cefc1745..21ac5972c 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -1,22 +1,46 @@ Data ====== +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png + :width: 600px + :align: center + Build data repository locally ------------------------------ -Build a repository structure: -1. Choose a directory to dedicate to the data base, called root in the following. -2. Make subfolders in root for each organism for which you want to build a data base. -3. Make subfolders for each organ whithin each organism for which you want to build a data base. +Build a repository structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 1. Choose a directory to dedicate to the data base, called root in the following. + 2. Make subfolders in root for each organism for which you want to build a data base. + 3. Make subfolders for each organ whithin each organism for which you want to build a data base. + +We maintain a couple of download scripts that automatise this process, which have to be executed in a shell once to download specific subsets of the full data zoo. +These scripts can be found in sfaira.data.download_scripts. + +Use 3rd party repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +One example for such an organization is the cellxgene_ data portal. +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon! +Contact us for support of any other repositories. + +.. _cellxgene: https://cellxgene.cziscience.com/ + +Add data sets +~~~~~~~~~~~~~ -Add data sets: -4. For each species and organ combination, choose the data sets that you want to use. -5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data -using the described processing if this is required: This is usually done to speed up loading for file -formats that are difficult to access. + 4. For each species and organ combination, choose the data sets that you want to use. + 5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data + using the described processing if this is required: This is usually done to speed up loading for file + formats that are difficult to access. + +Data loaders +------------ Use data loaders on existing data repository --------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You only want to use data sets with existing data loaders and have adapted your directory structure as above? In that case, you can immediately start using the data loader functions, you just need to supply the root directory @@ -25,10 +49,8 @@ Depending on the functionalities you want to use, you need to create a directory can be easily done via the data set api itself, example python scripts are under benchmarks/data_preparation. This meta information is necessary to anticipate file sizes for backing merged adata objects for example. -TODO example. - Contribute data loaders ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class @@ -74,7 +96,7 @@ before it is loaded into memory: if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "human/eye/my_data.h5ad") defined file in streamlined directory structure + fn = os.path.join(self.path, "human", "eye", "my_data.h5ad") defined file in streamlined directory structure self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad self.adata.uns["lab"] = x # load the adata.uns with meta data @@ -108,13 +130,59 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. - -Handling ontologies in data loaders ------------------------------------ - -Each data loader has a versioned cell type annotation map, a dictionary. -This dictionary allows mapping of the cell type annotations that come with the raw form of the data set to the cell type -universe or ontology terms defined in sfaira, this is, however, only done upon loading of the data (.load()). -The outcome of this map is a new set of cell type labels that can be propagated to leave nodes of the ontology graph. -This dictionary requires a new entry for each new version of the corresponding cell type universe. - +Ontology management +------------------- + +Sfaira maintains versioned cell type universes and ontologies by species and organ. +A cell type universe is a list of the unique, most fine-grained cell type definitions available. +These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, +an ontology ID. +Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality +does not allow to distinguish between T cell subtypes. +To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again +defined by a cell type identifier. +Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. +Intuitively, the cell type hierarchy graph depends on the cell type universe. +Accordingly, both are versioned together in sfaira: +Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an +incrementation in both of their versions. +These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the +file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available +even after updates. +This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed +for older cell type universes and thus ensures reproducibility. + +Contribute cell types to ontologies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be +incremented and the new entry can simply be added to the list or modified in the list. +We do not increment the universe version if a change does not influence the identity of a leave node with respect to +the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to +a type that previously did not have one. + +Contribute hierarchies to ontologies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set +of the leave nodes (cell type universe) of the corresponding universe version. + + +Using ontologies to train cell type classifiers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate +cross-entropy as a loss and aggregate accuracy as a metric. +The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms +that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. + +Genome management +----------------- + +We streamline feature spaces used by models by defining standardized gene sets that are used as model input. +Per default, sfaira works with the protein coding genes of a genome assembly right now. +A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. +As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. +Note that because protein coding genes do not change drastically between genome assemblies, +sample can be carried over to assemblies they were not aligned against by matching gene identifiers. +Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst new file mode 100644 index 000000000..aca80b451 --- /dev/null +++ b/docs/ecosystem.rst @@ -0,0 +1,61 @@ +Ecosystem +========= + +scanpy +------ + +scanpy_ provides an environment of tools that can be used to analysis single-cell data in python. +sfaira allows users to easily query third party data sets and models to complement these analysis workflows. + +.. _scanpy: https://github.com/theislab/scanpy + +Data zoo +-------- + +Data providers which streamline data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +Examples for such data providers are: + + - Human Cell Atlas data portal (HCA DCP_) + - cellxgene_ data portal + - Broad_ institute single cell data portal + - EBI_ single cell expression atlas + +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon, we are working on interfacing more such organisations! +Contact us for support of any other repositories. + +.. _DCP: https://data.humancellatlas.org/explore/ +.. _cellxgene: https://cellxgene.cziscience.com/ +.. _Broad: https://singlecell.broadinstitute.org/single_cell +.. _EBI: https://www.ebi.ac.uk/gxa/sc/home + + +Study-centric data set servers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many authors of data sets provide their data sets on servers: + + - GEO_ + - cloud storage servers + - manuscript supplements + +Our data zoo interface is able to represent these data sets such that they can be queried in a streamlined fashion, +together with many other data sets. + +.. _GEO: https://www.ncbi.nlm.nih.gov/geo/ + + +Single-cell study look-up tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Svensson_ et al. published a single-cell database_ in the form of a table in which each row contains a description of a study which published single-cell RNA-seq data. +Some of these data sets are already included in sfaira, +consider also our interactive website_ for a graphical user interface to our complete data zoo. +Note that this website can be used as a look-up table but sfaira also allows you to directly load and interact with these data sets. + +.. _Svensson: https://academic.oup.com/database/article/doi/10.1093/database/baaa073/6008692 +.. _database: https://www.nxn.se/single-cell-studies/gui +.. _website: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/environment_brief.rst b/docs/environment_brief.rst new file mode 100644 index 000000000..6cca3acde --- /dev/null +++ b/docs/environment_brief.rst @@ -0,0 +1,26 @@ +.. role:: small +.. role:: smaller + +sfaira fits into an environment of many other project centred on making data and models accessible. + +Data zoo +~~~~~~~~ + +We focus on providing a python interface to interact with locally stored data set collections +without requiring dedicated data reading and annotation harmonisation scripts: +These code blocks are absorbed into our data zoo backend and can be conveniently triggered with short commands. + + +Model zoo +~~~~~~~~~ + +A large body of recent research has been devoted to improving models that learn representation of cell captured with single-cell RNA-seq. +These models include embedding models such as autoencoders and cell type prediction models. +Many of these models are implemented in software packages and can be deployed on new data sets. +In many of these cases, it also makes sense to use pre-trained models to leverage previously published modelling results. +We provide a single interface to interact with such pre-trained models which abstracts model settings into a API +so that users can easily switch between different pre-trained models. +Importantly, model execution is performed locally so that data does not have to be uploaded to external servers +and model storage is decentral so that anybody can contribute models easily. +Users benefit from easy, streamlined access to models that can be used in analysis workflows, +developers benefit from being able to deploy models to a large community of users without having to set up a model zoo. diff --git a/docs/genomes.rst b/docs/genomes.rst deleted file mode 100644 index d144348e5..000000000 --- a/docs/genomes.rst +++ /dev/null @@ -1,14 +0,0 @@ -Genomes -========== - -Introduction to sfaira genome assembly management -------------------------------------------------- - - -Contribute genome assemblies to sfaira --------------------------------------- - - -Use a model architecture on a new genome assembly --------------------------------------------------- - diff --git a/docs/index.rst b/docs/index.rst index 9c0c95392..c699c8c1d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,28 +1,46 @@ -.. You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +|Stars| |PyPI| |PyPIDownloads| +.. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow + :target: https://github.com/theislab/sfaira/stargazers +.. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI + :target: https://pypi.org/project/sfaira +.. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira + :target: https://pepy.tech/project/sfaira +sfaira - data and model repository for single-cell data +======================================================= -Welcome to sfaira's documentation! -==================================== +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/concept.png + :width: 600px + :align: center + +sfaira_ is a model and a data repository in a single python package. +We provide an interactive overview of the current state of the zoos on sfaira-site_. + +.. _sfaira: https://sfaira.readthedocs.io +.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html + +.. include:: environment_brief.rst + +News +---- + +.. include:: news.rst + +Latest additions +---------------- + +.. include:: release-latest.rst .. toctree:: - :maxdepth: 2 - :caption: Contents: + :maxdepth: 1 + :hidden: installation api/index tutorials - models data - ontologies - genomes - training - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + models + ecosystem + roadmap + release-notes diff --git a/docs/models.rst b/docs/models.rst index a8b3f44ed..89b6f9545 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -1,12 +1,24 @@ Models ====== -Introduction to sfaira model management ---------------------------------------- +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/model_zoo.png + :width: 600px + :align: center + +User interface +-------------- + +The user interface allows users to query model code and parameter estimates to run on local data. +It takes care of downloading model parameters from the relevant cloud storage, loading parameters into a model instance locally and performing the forward pass. +With the user interface, users only have to worry about which model they want to execute, but now how this is facilitated. + + +Model management +---------------- A sfaira model is a class that inherits from BasicModel which defines a tf.keras.models.Model in self.training_model. This training_model describes the full forward pass. Additionally, embedding models also have an attribute X, a -tf.keras.models.Model that desribes the partial forward pass into the embedding layer. +tf.keras.models.Model that describes the partial forward pass into the embedding layer. Such a model class, e.g. ModelX, is wrapped by an inheriting class ModelXVersioned, that handles properties of the model architecture. @@ -16,11 +28,19 @@ In particular, ModelXVersioned - has access to a map of a version ID to an architectural hyperparameter setting (Topologies), allowing this class to set depth, width, etc of the model directly based on the name of the yielded model. - has access to the feature space of the model, including its gene names, which are defined by the model topology in Topologies - Contribute models ------------------ +~~~~~~~~~~~~~~~~~ Models can be contributed and used in two ways - Full model code in sfaira repo - - Sfaira compatible model code in external package (to come) + - sfaira compatible model code in external package (to come) + +Training +-------- + +Estimator classes +~~~~~~~~~~~~~~~~~ + +We define estimator classes that have model instances as an attribute, that orchestrate all major aspects of model +fitting, such as a data loading, data streaming and model evaluation. \ No newline at end of file diff --git a/docs/news.rst b/docs/news.rst new file mode 100644 index 000000000..632cf17ac --- /dev/null +++ b/docs/news.rst @@ -0,0 +1 @@ +No news yet, stay tuned! diff --git a/docs/ontologies.rst b/docs/ontologies.rst deleted file mode 100644 index 0091c9a45..000000000 --- a/docs/ontologies.rst +++ /dev/null @@ -1,48 +0,0 @@ -Ontologies -========== - -Introduction to sfaira ontology management ------------------------------------------- - -Sfaira maintains versioned cell type universes and ontologies by species and organ. -A cell type universe is a list of the unique, most fine-grained cell type definitions available. -These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, -an ontology ID. -Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality -does not allow to distinguish between T cell subtypes. -To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again -defined by a cell type identifier. -Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. -Intuitively, the cell type hierarchy graph depends on the cell type universe. -Accordingly, both are versioned together in sfaira: -Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an -incrementation in both of their versions. -These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the -file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available -even after updates. -This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed -for older cell type universes and thus ensures reproducibility. - -Contribute cell types to ontologies ------------------------------------ - -To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be -incremented and the new entry can simply be added to the list or modified in the list. -We do not increment the universe version if a change does not influence the identity of a leave node with respect to -the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to -a type that previously did not have one. - -Contribute hierarchies to ontologies ------------------------------------- - -To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set -of the leave nodes (cell type universe) of the corresponding universe version. - - -Using ontologies to train cell type classifiers ------------------------------------------------ - -Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate -cross-entropy as a loss and aggregate accuracy as a metric. -The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms -that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. diff --git a/docs/release-latest.rst b/docs/release-latest.rst new file mode 100644 index 000000000..913476eb3 --- /dev/null +++ b/docs/release-latest.rst @@ -0,0 +1,6 @@ +.. role:: small +.. role:: smaller + +0.2.1 :small:`2020-09-7` +~~~~~~~~~~~~~~~~~~~~~~~~ +Initial release with online documentation. diff --git a/docs/release-notes.rst b/docs/release-notes.rst new file mode 100644 index 000000000..7f8561271 --- /dev/null +++ b/docs/release-notes.rst @@ -0,0 +1,11 @@ +Release Notes +============= + +.. role:: small +.. role:: smaller + + +Version 0.2 +----------- + +.. include:: release-latest.rst diff --git a/docs/roadmap.rst b/docs/roadmap.rst new file mode 100644 index 000000000..69b1a3ee2 --- /dev/null +++ b/docs/roadmap.rst @@ -0,0 +1,20 @@ +Roadmap +======= + +Cell ontologies +~~~~~~~~~~~~~~~ +We are currently migrating our ontology to use the Cell Ontology_ as a backbone. +For details, read through this milestone_. + +.. _Ontology: http://www.obofoundry.org/ontology/cl.html +.. _milestone: https://github.com/theislab/sfaira/milestone/1 + + +Interface online data repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are preparing to interface online data repositories which provide streamlined data. +This allows users to build local data set collections more easily because these providers usually have a clear download interface, +consider the cellxgene_ data portal for example. +We aim to represent both these data set portals and data sets that have not been streamlined in such a fashion to provide a comprehensive collection of as many data sets as possible. + +.. _cellxgene: https://cellxgene.cziscience.com/ diff --git a/docs/training.rst b/docs/training.rst deleted file mode 100644 index f15189d9a..000000000 --- a/docs/training.rst +++ /dev/null @@ -1,8 +0,0 @@ -Training -========= - -Introduction to sfaira estimator classes ----------------------------------------- - -We define estimator classes that have model instances as an attribute, that orchestrate all major aspects of model -fitting, such as a data loading, data streaming and model evaluation. diff --git a/docs/tutorials.rst b/docs/tutorials.rst index bb1654138..a644fc8bf 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -1,4 +1,17 @@ Tutorials ========= -To come shortly! +Queries to data zoo +------------------- + +We provide a tutorial for queries to the data zoo through our python API (dataloaders_) and for assembling meta data across the zoo (metadata_). + +.. _dataloaders: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/data_loaders.ipynb +.. _metadata: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/meta_data.ipynb + +Queries to model zoo +-------------------- + +We provide a tutorial for interacting with our model zoo through a python API in a scanpy workflow through our `user interface`_ + +.. _user interface: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/user_interface.ipynb diff --git a/requirements.txt b/requirements.txt index 8c0d197f1..2ecab3f17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ sphinx sphinx-autodoc-typehints sphinx_rtd_theme tqdm +requests diff --git a/resources/images/concept.jpeg b/resources/images/concept.jpeg deleted file mode 100644 index 77b1822e023572d09646da8d5556391753943f99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 771343 zcmeFZcUTlnw=dd=ib^t&vw(s`$w6R5B}>j(K|pfOVHA}d1Ox?yAtP}l=bS_YB@=j;=2qotOl8?gOHa6qGOi8UZZ7>vw)vuMg8lOhf~v7uR21{j`O86o6a2AgJ*8 zv8kGds;t6OncqL^0_$@pC%a!4+dH_qXvjTc($hC!B3J+jz&~Pu0$_S>?&>71ruOvL znE(F&@A3cfaXk9_w*y1mzsCAb6CZd7dR*30_&fD)vf-jfBjWA`>WpmyN{N}W3bOSSZ6YS`5YVr z4+Yk_%>H)X>TmUbn-}Z?5I(VXb+UWz&7>yq>+An)akKCO*8u>J9i6;gtgT+UF-e2F zWWl81V9vwE#DAYp1OR^R`CqdhsQ=zeJc=LxJojb}03@4mI9%O7&nY$mKnZx|wVHpP zV;2R08-xJxt;XrO%k$sk;e($SEJ149&jkQteE_)i6THUI)lz`}cRL=@BLKKGi^Cn< z1ptCL062e-!<}W|aOar-fIkfY%?`L`;KmigYSJ`(JZ9j+4LtlCc(`_e5!~KOc)!bE zKOFpi0srEq%LG@hULyoMR9pux;Njz6xQKu0(l6q~^9S33i#IOayv-+l`Ih=~0%m72 z{?{?-S6CjEw2*5IAXx>>Tmr9NqoAasrnz&Mjr|^nppdYLsF?WUCo-~f@(PNYTG~3g z;Bw7hSXf%Uw6<||bNBG{^7eW2_FYhLNN8AW+=uvtkDn5gGBUHWb8^4r<(HO~S5#J2 z*VMMQwRd!Oee3={I5a#mIyOErxv+>>T3%UQTi@8*KR7%(K0%$Lf2|7-!2fGm;OAc} z`%mk-0j}%9#f$hC34W~$?}F#Ah2OY%={Dcxo6_n8&z)~E^S{1A_9!O3q~$7$fCiG> z%w^yj1*_n~oxNX6`@ORNcMA*rKUWWU6|O#zRAi@(d|ix)3ny?pudRidk4AtELE zRY-4={w_EFt&smN)V~VdKMD>U1n=UdO9TW2MBqPqvg>5@|JwyO0}3-VZW18E#{-E8 z{{{d7&d_1(X&3$+fA-)H4g6nC1Ejd0e|Y^5jsIUw<9~4MFDxg;4gBHtKfL~j*Z&1* zfD||Qhu8lF?(-jP`(vk);)ee4`X65ZgX{kSG(d_Q{=@740{8h3uK&SxQryTNUjM`E ze{lU@fCfl$qknk)U*JCf!Sz45PKq1*|C!fAbKgl}!kRQVD4I%48(goWYjHp%V3<)B zRg}YP_z4GG?v-_KV0W)JZN7ERj;|BeDaABMfT@Qfim+rui`=Zx9y91XHqO6D2t zrnTw8Ks4s!AhcB}8wYsffZe!J79B@_m;|2~m>1D&PqOHyaaX=#!eTesSz-agH zCt%1A)~T`3ITp-I{LGA+ew3o%oC0Jw1}g|{RnI{y-~hkqy)=@juvN$c`w+B_?R+27 z8U3r*PWM0BR&c<=J+P7Y-x}TKc6$D;F*fW^Bgvml07>Z2Cj8lizl6yjO87$we<5@ELG8!Ev^h4pnHMT z?*dfdA>F{<-2%RL_BZDvtk|p}sfk4ew0D{>lj#~~DS-Ft{=Jv@;b&|imS?+nNef{^ zg0OM%&rO^3(D|pTheDGB59w@BHmj%{>BDYpU^WA_<6<@bTGP4D=F2n^BQHzH;%68* z+0lmdLV?2pWF1RBsQ!=+iJbZ9&5G$lX$A*#K^ksj_gv+dKrVo;w*R%4f%?JtI)YBA|1!(OdK)SZ~Ou|i-sT!$!lFm7Ol zIe?si!tmF8{eR~Bv!8$H-Jd%#=UJ&lC(j8QNA+z_0EM-HWN8o>G{QxQ7v+mu&47zQAq6@HHi(9KJv zyK?8w8eNM8OQK2aQw)g&r%9XE(R40E^6A0Vy?|NAVvj4f@v`ZOyyI%vP;fhq*0lE& zX=WhA1Xd({H!fG{BuyQ=+k+|5-$%I&oQBQ{pl)EHG{7cvDs|`D3w?t8_t1DH6K}TQ z-Sh|BS-LDaS2e+QnPKfS8%9*PFM5_`t0Yk!VEd+^`T0OWr>@&8`b`b}!qXZiK9DnJ^+u!|BM z`-{Xt_6@yxU0Naha@!WEH|sdpFxO}$X7{7sU(HD=cjt>&T4ICqa7wP}+GC;m~LbiF9UT>a_z!mN^`_jmkP``?+r9X0F1A0i2&f}`wK_KdJCiH8|s>_A3iz^Ig% zLblCVt=_=fCiCi8VjS?YbC)}8l|jGL`Lkt%Yb(QTDGuaOaqY7+zM8qShN-=h38`aB zv8mMfZY4^^!fB?RvAiv_eLqWz2-~rD8ONTg?g0_&FEhe6C7gcG@oS#HAC0axsFOPl zKfXfEF7KzrL99ESoMA`WJ0^6akgCH6+*aTBrN*wu_4IAmY=0)gYIn$5>$l9|yrsW)H+W&;#weVn{NTjf_u#<3)A^OpivV{+ z`8{Y*#b%~Omd%YBl`lpaRu80s)Q3AVmK!Zf9}|t%^$d1k61L zkg4JQmxEjdi|tn`1ARVfiO;IuS}ZxdvSR!(Xn)5;%WP@bta!pDY;ge0KpDtCxW#YT5 zhY0uR-RjEOqI)ha`9Mp>`K?jbq3kewUxoZ&7X0p3RB7006_)0*DjJUza}@_{Ywt=i zjHc~Tgjy9D66bkRxzzNHuJxn}RJ^7>F7^?v6-tg21F1lXKF-`pO|yNd)W-p~G*Ko) zzG8YkfLY{qc4?^HrQInC+3t@$Q(wPe5msVF`6KVxYkDW>^q$_a)7E|zSPxUCm^_jc z{!deDLE9dJL9mCnalmWHd44st9NE;5g6vCzCbq5+4uC;U1))FCieX4e5yWhaM1E3n zqEH5dNax}+$!{FNj89=U$1?(2Lr2N%NAx`IHB}!hj7b^hPcC{A#29Ni3+Ceh5?zVU z4%*=@Q=5X*nwzy}BLe|p2fQJW8??=XSM>vOeGbZ<}PQrfMG<2 zO!xWXJ2Lf$m&kMzz1kL2UoUXR<&yhOjdY?iny<#h)IA20X??rl=(SkEEWA9pZ zJG74Vyu_5g9Zqtt&WIqs3=2K5CEYR(cjbS=xJ}L35V*>q)+aC|bVx2kr!3xG>u@a6 zW$~%-R)q8YWk-8WOfpEiwWNX|B>;aNW$uxW&)DP}M=p3bnO7zFCxWay&Rk?bEucpkhO~tBpVK>(5>})V<65LymQn-nsWTIeva2;W=3@8O;lri zj}oLw#(REkKU;zt+R1Ea}OVNRchHR3sEJR#)Z)Cl4_swwb6Ik zA{O~0Yqb!mTqPO5vn)SXe@*SuA(aRgO4lwD98hlM_^HJ;n#+p1Prw8Ru!i0gEWNMw zP(O4HSc=n?yQNap-5#ORyjJO8j8}Fx{L$T+8I$NC|zr!0i(zC z?OY6ETH;^Vw1cEEGS=7L!`fK;48{$H?KWCE&xcGC|Hi(Lot=pbZyFm_N3pBb)wyY_ z$EtoM@5BME-AIx1%S4 zAXAqC{==agjoMtyLM}1~ zFa4(9Uo`wbe9(Idsz0RuC@%S~Pf4yMdXJ6q+;Q+L+fQEZsot`<0P$zr&#?g&6QDQE zj|Q9f46D7E2y7vUPjNt$ZEoq99j{Y5CXW=^#2B+?qwqY;ZGxY`^pSwk(OfW#T|@E~ zWX3^mbvU3LZ%W<7og}^BWgqh=is;fN7VHIR|6pw|#Hh5X_40l;qF?|<}ZvnYM@Kmo2FHz;FO6&Kxkud8~@SqweqsG<#?bpb(n%V>}XK<_}uTE#AJr&0!HoZQt(lun$=8 z^yljiYwvt3w&W`&tF3t-zGYo!?QzDkt^~h@a_IiynN;+dB=?eu*YKM|HX)lH{edIl zsa#XT89J-$-=myy0N&k3Ze&v%{3fb$V7I&)q2y+k`l7#FfJ7d+pdv6vRoDP+Ghss7 zW7IaBE6NK++PLiF?L2i%5Y_Y1o%PY%ir+c1@_cj0i^*CPGuwtbuzPMp`!y$Z{P_uH z06uH#$iYdMzZN~))1mDgqPQRX@;6FJ6Vd=@>@oD3<137DKB}iRY3>~yGU(1;XrV{< zo<`g41zu?LG}Y_sRM&nnwpMr0KkpUZx`}UF3A3QQ^A;-MSZ674LR}IORlYagB_X-q zrNDh=!xhXLNz_rO{3Mqk3!1DvS~cV!K6?>4lN(kq&F~TGWM9;7&l;6=`L^#p2Y!V^ zp`)fi3~i>@quCX1nnlBqofQ3u!pOk1hzntv4SvSyYcy3MyDG4ZmgDeOLCVSTQtGc% zj9yQB!gJ~gtn>WDkg{;!MMF*eHUX8AU0EJs<}cqV@Lh>qjcN?lJ)L%U8FDEexkj_{ z(KKpr9oE&&`C`YEm5<78Pvsnw zFbo(I|FE+#b>ehS%E0H6>E`fBapT)x4Mrkpc$u)RDt%vjO ze$lLzi^k?Z|9Xhv3b-TFm=xd$twtaBt9>0d=U+eJGfJB;S1E=F-7VH#&C;})Wf<;x zw6AY!Ag?vQZDAqGgk_2OIW{>*TCW5N!|>VX8^7X_ewj0QpD=xcz%GK$XH19mzV^}r z`$VdWXIpH~dS^XbFF|h7_U4B73LNSAaJa&7M%LJR(Kl}?G4AUAN3(Vp-&-aH3zFBS z+6*ogM_P;3gv|v^knm590m+G3k_pog})qtGi&OE7DwJ`Onai|=_| zq^K7vOSW}1inS_er;^a-YK&JL4;HN~tkI~dMW5Pv;sE_&*^IyjVjWu-NJZ-A1i{<;MXCcQS{^A|mg$nSs zMtse5v9`51^D(7& z(P%WX22CvE&D#p0vSGM4P|SU#d*D!h@`V4y&B-I-$S&{V38K+V-Nn`D9Y2wa1`LsGAE6$}OCQ|BxaRE-Et%$z@rU0iHoP2olP4M0 z{LBWy*+nYniSjNR;XT?WRf6(r4s#bVQcnqf31*aK8VRS@d^&QyCugS3<4wV1=5GXx zS!2z@!ZbMzUkDAUvg+$T$GuLllvDuYg=3no3;pz7*W}&1?Q@2{-zOAwC{BYdm_@V^ zY@nO#Qp0d!=)6mK#}AX9v?OjclM-IEo^WAdlS{8KZF=g4`OFdeB620zwo>FMSuiv`?IcjFPm8P!um9Fq4mQ{ z^+=<(>uGb!QkxA}xp5Ei%J{t1GvR&0h6cYxil_b_ak>-nWM%I`9lM$QbnlBbx+L!Gn)Zi7miN-|Y|QONdeg2ZQuiX^`Kw3l&toSH-yQgU>uYVFzs$yz zN&q#OB2M&G5O;~p#>(BbZX+aAp$gOfkXKoJ#x~|FbV9e=HL>vNrODGU)R~UQduMFH zU=8&3eg|m;o3*hT?Y+x-uLK$)1_wCcx|dSB6vaNIsz|EmIAvD1FS^cB#J|(d+-XzW z4MMtsWv#b|$W)yvx2C@@URM2Z{c6v9@tE0q>apaGGhJf0p{0P=k9pJSQWnVF7OZyH z?G636bEaTve7$7NpNx%2vgIM)Kkvk($I4#I=T{H~e3SNIK}oWC^QuG}CsKE-gfm2J zBM9W~+>~-I`D7Ipf6yQ9XJ3SBJX&px&#dg<*^Y@$c;$vjchq5F8-J$zbM%Kchx!M^ zu2k?Ul@v_^FS$lMP2N!Og08E5%}#@#)rD*)RGnVb`7&)M&Pu?Fu#a?^~^DUwp;yb)kNB0UEX| zp}){nVIwNyQTmDX8DA4{-;M;2Y5weusidFsUS}TMw#w&)K)r$e=g4bMvR*BdjCyv> zcGFp{0fcJ)2;^1Tt-sd{-b`tB~Y4kV|MI263~xVIDmQtj)`Uu4TX zl5UZ|p7fqgr9NVMD;o5<99i%N5PlY_vT~Z=5GBVFW2py<1B$NMJ7sHQ*Pi98Xt6Kus)K4m z6Nc~PhZ@UzEO{BZ6zFjs%&@m=Z?!;JunxTui9=2_huzGB=doqmtD_U`EV1{cu z>ZtemMy$rfEjLOc*}{t4?QPpN_03gm^DJ+9$G~D0NQSCBc3ea|+|BqtqXK-?Yduwa zKukzjO6?NRa(36EqcgxB*BWd)p0o5H-lQ}dN{j+(--4;m-kW+@mz5aE^o`n56rU}s;*hkB7x1cU!*T^{jBH8u+r`LdW5$ycj7Z44&pCU( z7pUBHir`5$IF_mkcM{~()XvvY|DajwQQshBns?YhrfzBwAWXjB)TIpF_8BRDO=$Vx z;=vt!tWVE3g0)X9=Ll2P*_oE-R+2|Uc0sek@lyk8FJRwvS9i+tRf}xKd@B0UJv9c8 zBt;m2WBk`96pn@ou{pI^`7mGj+eAh8)rg7j3 zWZIAO^%gAw_L7lfh#&EJ3bIOJ^rBa7lxn#Vo$K< zFI?@wgQNrgX?mJ+qkP#NY=GSGo~(Oh3xY+7SfeLiGH!IY)~dBp04Y^8Ur?Ra^dgD& z)l*Hml%lFHO#4(XUiczyOHka4oRJUO+n#vd9n;XERh8zrXYqXJA?I$)8mBKJIMgjg zX^TcUMck;zZkMZuunoh217gr^=b$_BP*Z0F2P~O2=&rdugDfRO=V7ZXTWHQsDY0Hbshbrq zjWfBSRChpb)5@ju=UjzcF)^+R7u2?1Vw#(}K*#{~X*}986ChVC&F=B9g^?#9hdQ6l za%)$>v*A@K{q<`ZVMozXoJJ;whMd${G91}o_3)!RnUuX}ww)08hclPR5I{&>J*M9wAOc3<+GbCu(iZnS(==&EeD=}JIvD;0;} z%kLH9`5Ljh1||>Ij(jndQlF4ktuW%==UKKN4I+B|Hi^fhL?W2jZgOmM=poWq8iU}` zm>bBG&KsLPJhkWmF26M%zd2#vqbET#RRKGn5ug*dYw@IAQg-khb1&ojXzLwKSN@xH z3ABOt=r3PkjZ$%LCXfoEy>X3o%D*J>C_ zcgne>g)ZyB{*rn9buZx`u%upN+Et_57+z0KRddgJS-Pe61tsDvFw?U0%6eSP9OSWA zUHfR`&7Z5IJhJRBx!iCnC%J^-g8qwr^HDp{TBzk=0X?IS;2|J!-rNk}XcX}&++U8Cm32mahU@8y zH_$70WL0y*O*tD>Ygl5%-YeZAkfkQdWjeo-Z&2~UAT{1M_vs4zPFgI5b3}e^JX@+L zfzTcSm?K(*Qrs6SWV5OyPP9*qPCzu#;~hQ|z!Yqz80wUd1u$%gH-z4$E#hAola$*@ z=J0R%Cbd}i6}6`OwtR#;nn2(i=kn3|*;qu9qA@IK_cRp@7+-_5H67AJzeD!m>ZrHr z5?2e-mbgn86F3*b&f>b}_L*#8M_^)G%bNlfxBN3vm-X56{&Q(OWR|b$A^{H+{-0?qThKBb)Gmk=+bar>=QHRK1HXIWp+)9ZMT| zpxu%q(CuJsbkx@#8ZBGl;v%%b&&5xQURz|(IYqb`t-Ln(MQ1qYXW!)7B>22F82WxZ z)Wwl=YlXe#)%;0^FG-cFBg02uy4Z}WiKY3k1Un2P$SzP&S`XlWP4pZNkS+3M?0{VG zD$v`L9IeQi@E$e0cvqEQsI%j=QRmsT`Cw(+Vr=F;_3SYV&jZmiu6)fdjqIq7Lwn)a zi6Rk0nAF%DmZ7e~p7g0qA|b7wEI!fdvlI9$&M^!ZV1K{_2!GUKs4c6EUmY}{gYeiw}(ZZXpYBpvwXfz;DhWZL8-P&3DhMD zR%2G8i3JCR$vxesyJA(-y5Y7r5?zLmwK9M72lZ5&PcAgpgcoehADw#cH$C3Ag_3)j zZMS>WHhd`V!qf2TW?Q2bYA_{4WiFhAO0W=l8WhTp?hChYDK|WJ2M$?85iI1Res}kZ z3$iyyQyG&ilT{m`ygjZk?s30tfuYYA!~=v6G>Qm?-OQ?Wl3RPoU&H(2=VlrU z;4z2r@E!ZHHkx5&4uUh{=1D!PyLY?Dkutup_T39!`6um@RUt)o^}Q0z-z^SUsN@&a zR;`~++e2hdmO&3n;oETU7H2pj$Evu{Wy8R@P zI*bQ+rmEOv%I-zE&>wNwM7Hizbeytm(8lBm6=r$I;oa=wi`MnuGhLMI3Lf|JedPaf zkjXk{!%Sz5^1PC6^=qe?7^5_i{g>K`%pS+CeaM6M7m9k{o;zu4t5|r96jp3n_M0}2 zx<&FbPI`_WCoXvp7J8!{m@`Vo4!M;vMI!1IwXS{Rpyt!9AcX8p%wl3w7xfY3{PT)S{P0te6)oyZN<9AB>ZJj}ez z9n&Ejv8ffwVYBb>ZMMF#WN7hVcZaz$I_K0_9V*PE-mOh!q&;6JI?8 zQ(?a=B8hc%hbt1Ff7D9MO_;q6crJ(Ku{ua9PP^b-*JAolxqY(bpKH75NMb5qqHj~_TIWO|_8SOC zXZ#o!VHdsLv=UPuw{tLmejnR8^*(0UDv)%T=otPKC4w;47W(AJ=_{aWVyw~oY5ppL6?XE@$EuwX$A;!1;#x}-1Y+v zV7OQOpxxkd>NY(#*Vb;cMc?uX3zC&a333AkReLY^I+R@QX6hEP7{K zFj{Xl3{4x|d+jb&SOTrz&RvEMpN)o{7R+}=x+_IKLFuZ0n0{zxRb?gSb}vwuEjG*Q z%@Cclsp42kP`hk{kOo`(N}~PK%eNOc9vrFGUW-yle~T}Wk3Zy$e>xn0u-mnJN?^@+ zG--N%$$HV&mqsJccKkE^($NpC`+Yhqn@zI)eo-s4mQ!S3R*T7BL$cMH#@v+ez4b{= zw}097zDzO1``Ch0%GqcujxqCax46}k^ysnuPz>%m1K~3+AkP3PgeYUGY z(}kY$^8zJOh2D@9uV}FoT`nQ19l9GfvB&WZ9ix*?U5^ii@0q2trV{A0#5`unyQDAw zjVbC1OL~6QWKLoZyT9TNt&BsEHN|}>1#|u5cG&eh$l$naG}@Clold~i`A(^3+6rf_c4;dcjlhLy9Y#5TsAj?Gl` z1a_R8k?2%J8%lPM8tRaJI4y=()S^oXYAYkd6!=+4(!O4X(Um+Ue6lg`91q5bIZ#cD zCzub!s_O*UP{m{3S55oerXUxrgNB;5-e6bFB0Y&Q9G*TEv9SD@I|wpqvcWTob$oyg zu8t+UsRH`m^lo{TwT^C=`kQ?h?z=yBH(1x)%T`HT|B^!H?y8)oSmyd<4C8 z;w2<(SE!6=o3{~oa)_6hp6AHb37p~h(6O{}{J z*$kF$7WG`4&qq0_bthW3*Jr9S_N7+8jvm^p#Sigr?|h?XrdI#PD08=h6ND|DpY_rU zhSft_l?k2D2HMr)2L*XdFv+R>bBLJPscZRd`JnyK{u57sl~s?}x4w_w-*L)8C2X94`}JHI?4p2+2kyQDNV z`Hwj-FS>3sCunuoDdS#o4w%fjRWY?KhVAt$V&mush_)v}LN#YbuRz%-2|gAyH|H8IWH-dKe%tZ~dV)C-qf zd8U>R=@%0EkNGrz=Uh+${;uW;-EXF1;?#lpmXT5$aK|v?$kBU4PTQ*lwq{ zqXLWveFI_?BMzddT!--v;WS!%TK8OK720-9U8P17*zWz{GwE}i=CrAMP;?ZI>Ccj& zq>>2aPaH5?ja45r+RU%M+LJJw`epy+JOgRS%OO`6W#QzJ zsRRQ^H6j3vUVK{vHTqRv?A!PwlcH>E^eBucF~6OyBANDQY})qiU=Hc|S3YCkVfdhb z&JBt__z2noipQoAwGFfJ^MeTv=C)IBD?Lm|I(00mPo~~%DVtny4kFQrt<@Dng_DY( z&WLMeTMN-jO@SVQ23gR38y(E6zHJw|+*=lGT3Q~hCAb)*dQa*RiFQD>T+tWe% z@#y%R59|cP`EOu3a-;QZt&<$6@OEKw&Yq>B;bi@@cR)#34L<&CW6cnj3`MCT1E%ph zRl9E-)QL$g-KyGGH$v$z+UBgeboekt*~)P?f4|;UGm=sazl#I39otcJx+6U9MJlU0 z^^j0xc)t!khNZDJ8GSNj;d{7d_#Bw39Id6(Yrs9qHbShxH;c^1)`40Ehlurv_VyzQKd)M>yM?Wy7 z6j6-KfEv`{hdKH~UwNH5#eo&K$VW`EG7*>X0l#$gH6)zEEA^yfvTc)cAWKC{HjheF z+vE*ftoITQ;E6wyjX^?N4D7B{=@dc5U)L(6)JP)39=+avb0l=I+YxeT>qyZM6naqy zInhF=^8SnVftcm`FsAv|7|Z2q=b!EQirPy>6AV~sQ;r0_p=OPbLMcmU{=3j8KD=A! zC+@pNaO?;?vtX5R;%sMg&d~I{z*J7RECE{Ewt8B&fXcBpEBYUV^TI3@f@N{fidrzdpUR)W{We|mYWE7u5`o*0Z^5z2M+ESn9G2fA7z*T4T z_LGz4pmm)+-pdi!QY;~y zw=#!I=C5rwe`nN=wzWm4NrzO4OCi{G_52HG^jPd*QxmYaD6&VeF;%~z>_&)Jpw zc1BgKHMRW4)nBBgV$CBZX{{#9JS6CyqIOB0E1U{&5>-1oco$!0Ly0tSyf(&P=sT6h z)BFPKetb6`8QenS#$Gm&y#B#&P-V*nI-@~G+FI47-LC86 zQ^;&V5QcN#q)Mx*h!RbFu2xV_R5i16wPiQPB=2usS6l9ujKAVd_X z?j`hTJ@ifaHqQ9!ZYB7#0+u@)O{~rf53(T1{c%JD)ClKKM8obovMFd&aCrcV!t}9$Va4u+hkW3X?)Zh{we2u z7RP3b+7|wGU-}y07#>+}EV_YiNcg~d!@tyPVb*&J%wneBON}2(G}B_EaSn7j(3NnY z>bYY`Dxs`6`1aN$cfo|sOw&St`fh;KI7dvb_04a-)LB}LvB6k(P(2T+m}riB(&5f4> zGUQp8n6=+M%26jb#Pd399wT{9q?nU!V(TBXi(K|A)k|!{lgYJzab|2?H@94}1HpqV znG8cWL7$U#kymg#DdC0*?H>7fZNz!%idDkZho{E$q46J>rrahC@^jHS870e}BEE1m6DJ{2^sSmZ#t#qj-5{S%oxgdU{s>ttrkF>e1 zh34&b8TaEaJR_<6Z6tN_7w^+~&PO$V)*-(g-Zs?4?{qe{==prpf;!<&i>6Z%+^g95%zSkqFo3x)f>shStK;OMyx{>65cCc@s) zHaJ(Xu;Q?0Aof|pExr!W<&u&y?vw zWI%2dM^yxtQwBkQ<1qBl{q;#m_g&-NHaLNw)Sg_B{m$xFeYn{V zPt%4E_nmy+(*ab^B8URCe|o)XVZ4Un%TdVRsC_9P2e2oZ?Q~_N@e{#tfT`j}B%RS0 z-dP@{07mDfe%iRfuBKz_GYeixV-O6-hRj!ZubOt}c)=7KL*AR!hKufGzj}_!V{U9j zW#a&53OEih8Un)*cf3tryCXeyM(qyIc^}tNx++R!bRY}GrlA2|RNeD?CV6{G*eBek z*qU5ygyIl^QXH}Ftg`<0nyZy)g{eSGm7B`t+|%AA+vcbF~N((qdDHa&WM@dAl-<{gyPK8cuVeHsR~#tnwjLH_mH?JRx4H4|o^s*GTJA4NRInaPt0$*lNiHu6w2<$4 zeUF8Q`h^`2?Lb_+?++uzjEt@KU2UeU^&6@-ZTXcO6@hN{b z6CHzAOw77wWQtE^ih)N}94|}y#|64C3w(4t22)AA4x<=<(yv(B%{kx>ab1umNQg8R#h$TpP`3{R#)f+T(yg=;!D{ zjPybSV+V!x^P*;}K(UMY=@a+o$q6{5*E(yGdEccH4DB*Cl@Cp@RmNCE)~~D3U$GG7 zk#3>iOyA=TJ%nE!znfVoP{^-%+c->b=k_Nj$q!?9axq3ohis^QuAven#hv3Z!&(UK z=XXY_(@AI=IC1}3kE{3O>dLtMO4J)K2dGu?SXJsILkjrBjrGEHV`x|T+>XdTyf5sW z5_}h6v~|<8Ex`h^a}F4+Frk=3-?Lquqq8~j84Ii6tgGTJ z^=6J>2&LiER(tR)phjpavRyX*ipjmI={A zVPC>^O6JS1w=cfaaRJ!4`euM_ly0im?xOo{q`lctV`qX#jQpLc>Tk=<9Q@;Im#L5h z2vFvqFCP%BoQ5 z(ctSzJRoR%-7kItjqiK2tY0l$=n){*kk`h1g^KUvfFS6;B$>U}yPpF%19d ztBC0P@AIKVv69Vj_$C7SX7ao^c(H~4ASAshWNO#^75sg7+>Y#AXJeEg1wzgd2ssbI{7=)g0O}+$ zj97LrA%Sf9+dwfap8ks_GOU~_9ehSDz;R!W79mNxP*mSvjA~?0@J5OV#s-NdQ>`Nu!aBINMG7hne|T^XeP+i;_BXhg zTmpr3&l4~pFb+NBGg;U(Y{T9__P_Nj5^g25Eyx>Sc_wg|a>b0gbiBD`wHi)}3A;ehn8h4#)YXFhEU&-C5pc&8Em+R|1{r+3+zn>puDso+Ty9&m6?18{Ic7?_BK zB=i9Gqq`>Ev~ zyvK4#7ab&@i3*jhI;nEBkEt$}Vvf#9&RTE)1v4@zm?GLSlj}!eKWk$^8w=ltT!Ie~ zuSsCmo@2+idj0hNjOn!k$|UVrQ;2iq@X~z-y%T!5i(`h!u2vJgJ=GvY>~V+Xu>C~r z>PczdS$n3vF8(EXYJBR;wdan_riAs9IlcKCNj6;Sj}UbsiPUSRrLNsbbN%Q-HzA3qO~?Js8Rt<59fKdbD`AVO(2%A8F*_<=@s zQNh^SKKbC!wVqG>c)_Vf!P@uJUBpfbYl65+{;Lmx3b$R?S1wtA zwiL13iY~f*``pG`;{IZpqQorJKPCDBN+ip0qfsDADW0VAe#18ne!{>nHE4=&C*;R6 zeyYf_cHu_4JvR}`K9OX(UHtZK|H z=#0rP8Tio1pzYYX+BrvcnC#En8QW0$)^JT@=?fL!r&2yfjVzdpZPPiZkq&Rdc0qo= z$N2-xu{qkvwD0?U-^b<%)}by(RyqvYPzo|KBO)XKKgP9=+^j&pbQrpG~13Rk1LCO z@64{~wS9=ZP@*RF=&HnvB}Ap9dk}NBO`qt?>{WXe#ZwQ;ORwKO9V}c)X_%@rnY!a% z<(q1c=nvIx6bYhbd70qxUSZ`Ok#Wy_vC?zd`>LRi?HyzvvJTl*0`(P>23iC~^a7M* zO<3F`+Tv2tNJsxCMfL+C=S=(rF*}Q3Ki$t5P|C&N08nr9F+IO|)U6@J5)oLyS(iE4 zHrzJcZPP=j8cSDJ8W3%gI}E+qv?MbM*$l@1;9N{$!9J=qJ(`7_U(ubM`+JMM@c+Z! zd&f1^t!cxdC`uItq(|up3Q`oLM7n^0p!5zmQ)o zOPRx>4=*$X0wB|T@DTiy3u*)l#w{G3s8E}LYfr#@Y{;`pqEk-Rwctn?N3?KD&XXR4 zzJF&?e-Y*{-i;XWw$W)oxvHk>oc8uQtVC$YOelR}CfG1Zu=o(pqacNn2G(g&~65BSvM>%mXaiNrL*-&1usDYTlEXK(|arq3DfZbbUIf-DL z6}L63tghnk4plAZ2XUtOrKP;2>TL3A0h3DJchAtbdGuZM@FY6KEzvxj$OUwpIO@^z z(T>qvr!8OGBMP7#Y&qs&F#TkY@6`I?Hi&0>&Wkur$cyoyoVr8up_DQwi-^+j%8^sS zs&xcwO0Le{tZkEP!zliNxJCecUmAeUp?Hyp`iKg?CCFbLKyu@Wj({y_t#%l%NDV;I z&c}5JkYMC8OAZ`m1wW)kWE8K>Mb7P04nejmk!PeRbu;`9WUFw+VcXl??DHxXcl11u&vi#0 z4per$_}0ap%dhb8j!wkKVfEsJ>3qbwWn?>}=xn%aW`SzKcRQToPU$&6NrL!uf9d!I zbT4OPp(MZ7lX0Ohn^(k1(4xoc+32~2gjn+j*#%ZT6pf_%GkKptBo~#-X}<2FE6=Qv z#ARqMC+xXVH&5OF7(w$36u7XYu{#N$%lj=v+t9e?0a3(o zMcZdVOFr6IuVYGV{YiNh7{t5V7#EQX${yT9TX=jVo~ubxo7^b>-Se+U(uQ}smPZPl z7B@_Ey#?~s^WzL)Z!vMGXYE^7RWmOZxz^PoO(u1iV!XU3d}~o))v6puhc*(*>AqLP zewiW8l>{q%Z<H)tZ)HUhv4f5s?@4wPR(pf}g!bE@23z>Z&7c zfd!&U_G-YXhbo_M?M`CJTHn2Yq#IJ%B)d@EacM;D_Muvvz)nekKDLC#|NB^8)7|bpapw41Q>! z>j#9FIyERs(;{n~@MZ?iD`Dc~>?QJfUej#!SjXL5_D~8x7bx^FGpVa-3_BI-9w4qgeXYn527dM^qU)d(LUyi! zQ3|w;X&qgvtI%j5-_*OOu0BRAc=kHd`luKgj;BHTmD*Q&?%swsF=uf@ z5-b|hyYeyVR2tTfz3{T&Ya@@0+m(qlRTncHasD>KRc4nN9LT&!I`j^z_}SRK-X}Pf zD%6JpL;J%C+tv1cDC#-Ng`}XQ zf1TttD$#vtJD3>2vD_$9c#|l*$@J+*bj*WV(0mXxp4heQWL|W1;J7n4-;Qfc)m}8Q z`pET}7Le@hq%3evG2)FoD_7<*D~M-L zw)Q4#k;FjC2;1ah=mu7zMn!E!_^oA&N`?~MAg#XL zLS7f*Wg2a|&?}<0Zr{6<26x9W%M>^6dl&T_*g={l$t^O=ifXOPDvb@ISBKBW_XU&k zC|+92T@6bATG1R*S~c2}t^@!Pj)IpK{uL3)-y$dZr(;Sc6g1CBgbOWIxh%?=3=7q} zHa4CPRh`;?GO*UaBSz`7j{|F-p`PnMN{FX8+XHj7WxvBQf;MR5M;=}K-gYZOhGz4i z3G%$*r^SOB>!QVM13ju3G*xTg9JQEB^S)^S^jb{xf9Y%8$HF z?mplr7ul88#^+C+jwF{A(j{F!I8d2pJHw@$8rh|rNtZgH65oI7>EXskW*lb_wrQkm zr_7p^-;a&a(2)+9Y><}^CwBBL$_A0rYSXZRXvjel&kO#Ndgx!$5d9nfBR)%W!}v$< zRVg`@@(l6w~pA8gA?R0kPW;YBM=Cow3iebvPoWi-<%{_yFmOf z?!CV%b#(2#izHRqW&19m5Ol^uuQ#RYSYGzf^T|Ni2TTf47z+8*Gb@`<9tg>&M0wm!dcjPM;%2Qn{s{_W zmB|2ICLwHGU%7X;VAKk3Vlnh$iGC#1)uG(k@WJt+4ssE8v)F^LeNuFxy#7nl@|B3E zPlg2gGnSnEuQGh=a)dsCT~}?er!XF$o$9sc(0x0v5|*T^(D{%c?FH)VPh+vS^&)q0 zPr46;rMU_tS}zPrMbaWRj!cwV@qB|-bui5mW!7}3^%E3%37gXuq5H(lP88m%=%U%h z;g)xAZ)!>k6@KQ~)DC*%Bt%K2a`@;Ddi;`NaZ#?q|OTbrs!O z3rRW2&NyC``ACEi!nN z6|x-gNy@8R{?1!aw?J+BXQ1QU zvWIa8d@l;-%%­~O9Jg2JB9HJs)kxnN7^dvni*5mh#ZQ_4H-7zMV<=&yzQ6MhW0 z$j>kM);D<&x)gHb!!P!{!0gFpvl2J>RU`S_hP~tc8ygnweW}9UveyqE=jFe-66sDT zFB!*OBU!jk>Fp$?7&=tSGaB&z*g~4>rY@O*=z4=PIPIj~u4)0-k~nrSJ}p0?^)1R@ zqS*cT*4RucA{^%INSmzeSr{lO(!a;-v+^k(x(0Q-cI6wx<-~l#Mupg%ogJ12ICR;2 z8at*H3fDE-D*Xjgg<_-$o6*%x7G92T`0dQEJ;ChhL2Ya~?3i7|x)CLn0#pF608FqF zeT21ftF64^&PyIf`66Cyy@gpf_ouPP+j<&vZm+x&kJJ3?W1X!jp5jJ~?ixkX#HdDI z1lD?^w8~JKjZ}T&G1o()`r7$lp!?IL(_b;jcGY!VmEj|@-mvh@SH}Bh`E#VuBSg&x zVIaMx-;Pl+uVbvsrU()tJd#LXR5{dcYQ}r7bOWxKz6!ft>;dk0r!g^k*80XPM`%&h zigaHQYbh08&PEPm+qh^6EfpQf;}LMRoX)-+9vX^B1J{f z30;(s(4r+l!2ist+Dy0bM$V@xW1PQD28X(QvMUe~7@@N!Z_br*g2Tn-CkcI)$qC|~IFYD-k4&>Awdkc)@0t?MS5iIC9?3)$8dW)A!61z^21J}1qQ4ajJxywY=2#FqMCE& zHp%FyJDy`qy>uQ#4zX*HcsSwz-IzLaaQJ-p<_HKqtfZ@#^2gkd}_$GIi5&wn62 zHLG3P!ldGm6X>5|CL6B68_oW$`LcqSg;~+~oS~ZC>{};9a+qHL@AELfVOvM!wPqR{ zy86^k!~#&(Fn8$VQveIe|8RP2Qc{O&a8i;L>p3e6-SUKS%8I->-t-t;ymN>d?i>m= zI&O4_W}@vrFGkXPXfyV=?l1NTIp2tkW5wzc6K(lD`wXYWc0D`zp=*#j;!AsXRb!_Q+nlcayCeOtP{{q>q%P3|Q(dmVda%sEu1)JJJN;TpmTt7yLE{!yuoETO$zzx&Wzsf8^$Yx@;+;gX?5*_BqsJ9h` zq=RWLnz=FBLqx}YI?nrruMViODf2$@Cm6tzD}X?PVuX#zZJ)HCQ^WYHN64)TY%?%L zb9f+Qd!wV74)y?`?%JxKC(?{2Rl0Kf{hfC)iZaf_0ls2;U3nha06gq{aNEfwk0X<@ zytdvUxww}1*UL8VLXUY(H-_`Kvnr*zX_FT3(<4epe2#{VQ0ixTdFrv%f+*ub2qipcbfsC6N;>i= z6D<7Dra`3@+8C>gg!B1i>doFBc{AjqI-@wtm1cSSO@xS6M>?=F2aGHxt1S`M%9TSF zt-~>4BROS>>T7G$$>0xfqH_~98Z4ajLjw1emt#A;vw<|cEbuJutp+~nY&%KW2YDoj zB<>$FCq{wid=*)Z(_Bzy$&}Nwfr5UIh&(eD5hEHc_n*78OGrjZi3b%6-S*oHR%I@y zGLya)^vkVZKu(^MKE9}OP2hEvixJRgCT& zPBk6JlziHUMp5w%*LFh;jv*0rx|2Ee9B1Ao2Z(rH3HCR9wV6!5%wRL9(od7uLLH1+ zxdYkdN20ZP+V@?`(>i!%x4efSt-Jj>*0E%Zk5CR_pAIB1Ztfcw16#O(UY-Ez{dI`!kci9U;e5V~$0q zikTP*Z6}QL=aG#D^Mr=0xg)tstoiH%ZH92)9(ER+txdk~X(qWJj?ZVF<0286?XkIu zl1M z$C~T25Yz#I*48O$Gf#!4H>=5R@GFSA-bMAVeS5_H5vIMcVsn@7%*Ms7=L4wCahKK) zlx}KpGRufL`CxlbQnP4ky1)DqQy#^t7(dOGVTM~4lAw$?&9jqrGk)m?1_xu$^Z7R) zG{B#pDP#PfA6Gh*ho$Ygp-K}sVG8r!5A}v&#yPV_Rwv5WjWXN>n?Yukv0XB0Hrrt8 zbvkLTp?f0UkHPm$+o&4D#C4z!aOqS&r_kNj`E14Oc{9V(aM6J~X2rwpJo_pV`lmBs z3T*nqTu9@6iI5$W)sY+3tF-)q-i8c3-k;F}25Y$}n4J4tk)A4m1rw`Dq_Umj!gRH3 zA@t%>&Sb`JOV6GOY~R&=dvyrk`19~Z?GP;F6%VhnYZH?0O^Mq6aabotohZ{HEa8PI-CHB&N5v#|6J4ySudtplhhAIj0?3|EE{N=qd%Zw5%sY4K19y5$eQG0hWlU8W zArEKBk}nA$ETO;f^+-`TH2yTAap1#Rc(G!3USfs&oHw!?dqaItbadoy$Z(nO^-PXd z0Bp-TdW7mwwfTr7*i1MsmmI2zeXqdZb)!=qwB{>p5}-gMbfl89Q3(AcOv#=mKrMG| zBX@gd$NNa*P?{Fq99Y`5Xv?N7f{T}Ls(E8}!xt2-%(0Y#sIv&XcRS{)^m3?X^ZS=0 zl{&SOjBOnq64T55&V{R@w>8@m6Z_X%@$6`lkVbK@yNzR7(~nE!-Kp6n?V2JUi-~Pg zsLp>hHkH)Q=8kpvQo7CGW)lb!>yb3uf;#>*BIEH-j(hF37q+DykgF4FTMYEUPX0K3 zzF5=NXE3Kx+8wa=h(69)>7DM#K*-YiF0~t^JbZZ7mR0V@6@DRUy;$ZW?iKp2ej7-e!JDdlkB_Rz&+7HmQ!{Ps>74k@ zT5ha2-|qK2@UQGYa{G6!Yrp4L#0$KvGYR3g5va{CnBdCIpRpQBEp#PoZFi&0)i^)| zi_Q^Z)7ws*42{Rt*!Zqh$cJHSx8(??-6TVC&7nJ;FEc+>%_K|mCrXtj)bS{&2+cqa zr0-xIZHx26Tmn7H=!nxfXdMDpB-2`(0Mu2R8|Kx$(i~eg^-Gi5wfa#m#E0Y(H@>ZV zQE$`exHw@I;|ls_gtHmij7gLxRr9nRs^rvt6`3yY;rf4W9{Mt}-<0wMyvBwfUJtrtnWgMX8nJDQkET1bR= z{Q|Wh7d>=a2ZD4Bh?m+;ZW$+Ls(iaFv)@Ke_e$s?bqi63DP zE$04CDDEukPMHH}AlE~H+X(f35{gPXqY)dkOAd!Uic2)y*s_8Az$)pY1wtxKt!s3I zB@^#^=CUio`RQ))(Gp)?65ENMZEBlWPlub)RSJ~m*36FKU?Z>liIX}X_w&uImxVIC z<{8yCyNm2TaHixf4Qo{;(@NH@Esit4FZ3W_Pq1KE*G{H6`yMk9VR5C<}?J!<_T#y#15nDd(%jh(GXd(o7g$_o@M zObRwLI|ep4gYNhj+U?c243A*_0eA74Bq;C7z`1upAr^%C|qN320QyGHR;Ar#{-@AbIg79Pn_+si81U zW4(V0`?`BP{HqOeuXLyg7}_EV_o-SXy!Bx=uPh=QiLD;aQj|M05up6^)1}D@Om^U9dd1UKU)qUz^%QGi` z5nGLCy5a%)l_inIpIeutnLU zo7;4T&K>inXU2`X$Qan)Sf&s(KhTT?ZV~a7C##8>>*glfLH%s{G-XZHGu#ptCvKoK zdtrDX+sM9@8+de_o=e8(X-BHYwa3eViKp);=D zznuj@eSoY{Nj+_De`?a^iliEpNOfH0j>Gn0&497A(%KUKHhKH3N(EbsZqvnq{twnbN6mF<~3^-j6Mg z!_%spnDjWA4?&gC84k5#V0~cLe&krIQT!qgkyo>`4Ewya_x`Luy#a&EybBp?N73+wxLu&0p%u+Ul4FHTKo^vhjvH z%T=a@SJK%G;9MWSp+Ds`vTr2FmR#g;e((F*f2TAs>&sC_{-vrzx1EamQ!oL)2{*=g z($T$W8XT{BGtTcwyyb##0}+U%(+C`5?H4Dw$)&d_ zvjK#?QB&&5Y=GO#6uJ-TNnX?!D`m5V(D61nRpM`#xL@{ta4U`PQ-TM*kYG1kE<*fE zQ{7xxdg_&9E!R$kM)e9Xn2XP~7k_-0K;2M>HckW3G>go0anXEa+EJzxbv1v{l#m4Pqco9Z4(=jHz)e)v1Y4+;N^$B@scYDC~! znmC}fbAvd>)}K}4BE11-p79^5#qpPq&cOI4*Q3Bvt8|;lookVEw>B;oHjWN6C~FWR zy6g*mhCej(ryu^vK6!mYgoRU%qW*?<#NR2|{v+kxKd$|6HMbu%uL^0LU zSYr>mBD1%JeMSOVlzQ6BeQaCaP3-w;*Gu4Z&%i(@t(xu^Nbq87f9=eN_G89)((6a> z&*xK4@Nxc&{gwZa zKU98H46Zgvlu3HE;@#!=o22l~AW^W8uqp3p_G#>lS7dAbWC0E&z!U0MNI>#Ru%&TleEQ32o+P{;(vFh*4;WJ2 zu=6Ka3>vqP5rcZsh;u8*{z&#zSx~C6Evh{76K8;)L~%)3rLkUo zl5|{D?k`>{)pj{a*^LrrYRHp1sowETH{LnSpvq~K!9ijCLrGaf*KzueKQn{Bhg}%F z3O~J#U>}9smMZt&3t6xEBC>@Cu4UlP#s0x~c`I)l#`s&P%<=oe@> z&Jn)DnRfy3MWo{mZRDia(-)v{&BuCJB8wd>?(@biIsEK3`1WG#HoPhu-aA$WkVbxh z9OnW^j>mfE06}*DMJd&J(-s`dn11dSxjisBjQ2T0o>OYzC7>BJ&*&f!fGHAUqjCBh zrzBb&g53d6q*vvi1H6$~W?%^NF;51+5BwMCjn7#6!3luenRkL?CLw=0=AXar$3-b* z?YGXG762ZxR0;pfQ5T>r-`^N7E|PEt@PXMH;AVS(?k!9FUkcuj~r@ZSGcGaxf+W2026h!goKoGt44O& ze1NBVGkE20B*$!z*xs%;`IIc`2v}|+6L|4CJ=VC?UqDjaiot%d`uSVuEnH08rHIHj zaHT~nm_|s>zO2jC;TLHC`-NIL!w%5o;-c}h&&x zNM%JV;MQHq#GAJV#ass*90wzQL#%>O{{zwoL=UwqaOR&g7+g+aG(i9 zKQ2Wwd$=#`1+QVaL+m6!TKc?@`4Q7CGsSGvx%$={)WZRKaz2b|RKlErCr)2(-tVvK!+|6-Rq8GA9<4Nn;7>)yz^vBy~mOiY5=fbb*?flFr0s&$MVjh|so9#|r zBPp?4hJXxe>;J{UJrx+U_l=4Wi+NpJSIOv;bIP_qNbEHG_1=S?y;{3NQz*MNDo%H$SdGtujTH)T8^S0{wtB7pAgUH3b?G4UY z<;8)Z==x8QpYNqJNJK@rPf-uBxVhr@%*y~T8Gey#sFyI;GDp2jXifZ~cogN3y?f>GL{&%Hdz8vA8(!G6}2eR%yBna%-is+~@1HHN4HKp?A2wsrZJ1>YAm}|^3Q7L{kr?o5eiDZ>YVc0Pb{E0p<(Nx$s8!t^0u1*H zY{OEqeK0GznRA~0^eHvJ#Y$cMihf~qH06f%f1KV(wVwhlx@>7-lYEi|6R;VvoiJseNW1cFOtdQ!Suac zdLa{oPRPDc|9Q@Fmd&@r znRe2#@n!U86GUUhxFfZtz6#K5>{TGX@WNO5mC3AG?JDVYJ7OK+a_Z=+uB2CJ)-On~ z;@at|?<4DfSg|?F_irmX|8e}kYQ})P2$7E z5*yr3#vLEag|59^*7t<88qa=Q*)ZY837kJL7FL0OU@U=dyz~FGEk|kQ%PbNgMgIOJ z{HxeP@pqVOfzkOp?8!yE`E6SvUVH}`Z?~butEXQBhSwx!oG(^r&nPUcY)!2w!sBcx znksE*G!X=vmuZ#05YbT30r;%T|JYK0A*#BNXO;LaT0_u4j$=wVeAXbL)3ej@@y&Ha zIAj4)f?U@HQm}+wzd#HKgiFKELaXZJ4;uW&a?DfCsSP6!ggYIRK}~t z`6`FY@`eqy@Pjc=TJ+@&CBIO5D)u|?7$pK66Qom%kPuXx=6dAIKJr-4QU?xjLl+aQ zYAf9Ubap2c1N{hRxP{ezkVHQD+!{Km%12mnrP@=0#09qIUZe5@`y4YiwHVaooT-rA zoH906>bfej_)NB*Yns@|`+QBRfN0M0&*d@%2^N8Aom+4#x5!b=ZqBmX!4I-nP}`nP zIQoaDH>9kVz__r(Bps?Vvi&TamBXftPxSMy@6JByB)oS&6m;^h*Gh>!^%mK}9+h=6 zPTqpiJm=(%pj+a#&Vd>+vHtrh%+D_O(PA<#5G78+3!aYwo|y7qpw8m$57Www-D2bf z=j#NA|I(D}CY;M-n--{oilVlJnmW$Evr0%fFlxPzuKOxmw9Km9vj&<+1tE7lb3#<| zgLcV{KTY%4xX$8&LXV;Wb&kGT@3 zuA6a)mDp8g3%9Y2Lj)ZPwAxv%%T{4Z)GZKj28x zxJo_f(i<*wDOI6O8{2&lsyCI*4xr=H@XRAskcqY><(dX}Z_aTAfI1}Y_y#!PDJ6nc zxC_H;@*cHb1?$17V;KO!K$85>lBK@g_wM3E3DwTQtT;;WxZP34Q=A<`R&9{PM|XX; zc)9Gt?_a+FGCUa(@tiOujt6_%sE^*Zy!q9>89h~6*wgdi$5mfENW^U7lDEv`|296y z-|&sDtc0oFXrEzGNpIJ^yWsis(fjJY&Lc^$LI4DgGBh#(mt(_NeS6OU%C4;pD*3E` zANb{uoX`Kvc_R8fd?0m5n%)X^y{_}hD5@4ho18M?j;{bQtlUK&B!h;)UN-X0XNKLC z&qrN{rJ;o{(~RS;u75mTEFC2HNAZv*au`&n?L0K_Y^>Z4C&2Y_+bxHgTC}U1-YY8s zj-_~a1Y^M2U_;)UFrrh`+C)$X0=U{x_OqVH^w6&s?}^>&5#hVfSCg9C-fEoaQt`63 zZ-W5RAe-&)2YZ3iU?@3EV$dbLfkRb3LGu?#?ajq@(wQ)|Nq~&Zzj>!lX_5f*9k?BO zOx^&nOA0`vo^-GnxoTZG=OH?F!f6g4cKZw$8Y@`QIW}A9p&&Z9w5254O~9jby+f`N zs$)cA{+7#Ptl!mqpi;r@dtsAJV>Ps;V-Y>u6+QlXRINmY(hsX?XO{# zv~hH$wvM@T--1c!JC}aOMPx_NGkXp4rI(NB0=3WzWT&xQhdQ1NW3;-Y86?b^+k0^u zExj+kyD3gF8syyFvy8O)P2v|_gG)^BXT0p)py1=>)04ihDJp;?2BT_-F~JUt~=p*&2cNk=r|}_-251Y_QKK8YTts6JN6U1$yZfd2v|WL zOn9iO)HNpE3gj<6kqo0%+C>$L5h7WON9d?R=5Fo=Pu$(DdOf`KBt{Hx=;cCI^}_Bp zO_W4x-^i*TIRBWH3q4y1nW*eD=-)T}1>&Di6K9Gve`RBbWI9TvGC}9%xm?%{**-dB z7E>lrWDU!&?OyQf@GixPpT8PuR1)@J>d>Wd_GEse%9lj>I{Nuu=korK>{Tt#Tg+*f zAJ-WkH~s=eDFp#WZ*d7=SXJ4lnh3zFq(F~W9M2P);D7hDuXHW82JJMrSW^Fb*cy?S zDr)|WjvAmF70KM)2(vxuw-H{@Z7GHao*nf!(0w`kXf#=cNEp20OTLc#%2IK1Kg=D$ za)CfDod6A|NBw)J9Y$T-Nq^!p10P)ld?TfAkHHzE(eOnI5|vlUdAT-kT3&~&KI#8W zw6tfgGQT6-S#IPu`e9I+Nk=km+WsP&juvxjG#}-Qmv*lx)gSi@QS~;HW1W{YLLNR; zTfo^@c@i^)tVXdFOFJf*#dMMQ#cn+Y7r8Ow1B320R{nsr}$8UsFeNk$=hpf}GS zVUTTJA3Ymo-WgvCxbo4( z`#b%VU`{x7`rZ{qK%6E>(1stI+ZSJ-JEjEEzXJx)uiWRsk_td0-XMJ4Rv%eaa~|>T z;Y9%QxB}(pq1(yIi5_jv%=37q>%U|`{dixNDz!@al$||FFG@&!`_UlD=E_N^q;T%Q zv9YS|*UHa$&ptItL1Qq2Z=cTq%^iNo2idWO%Qsh4 zI|+-R>kHmU+Hp82-U-oyLVj zFkOCr$j*l3I*bfVrxx9jay90D+>rJY^-j6M()CEKoesRaTI6BVF)N65E)_YGYM=BT zp{n+>BDm7-00=pQ&YXlKu36F!pse})v9?RSR3DmYt1+}OA96T4oG&}QN^-1op1vl> zyPM;V0Nygo$Z*Eoh=XvO*L-`dD}?o)Z&z zFyZm8)mvqmV*g5~o4?ON`GSnpcv$kL9XmD_UA*&7C;L)=;^hh((aOQzA2+)0(2s{* z5Btt^Dezx&D`Y8oX*N^0Nqhutpn z$kIEX`yy9&4b7-{n_bI`aGO9nj(lVHp6w3XZ`#tY{;Vy131+@P@_Bo!%xV?7OZKV8 z$FKq#hrb0x2959%{`$V*Vu==9_E9HBm zvx6Oy_B=!vQFHf?wKQ@G>(1<%%ljC6$%2&_be=~k6|kj4W2K3(@r%gdMSp2aOV9Xbedv7p4Its?H!T)Z&9v)kV$QqG zLN7gKQ({*-pd;ak=MHmFfMvVeIGVbPhELM3)kJ4SQ)#N&0}ywiP;r-jL(Zm44} zM`x=!n(`N6`b+7F*%vrXd+{IrCaz3 z5PYo5>-GUP<%gXVO5dnu7Bnax#uh=_)>NIO53j|%cnc8iblbt-lSe#ZUqn!0Ejp1m zs_JeHEv4%q`Bv3LhAgbtm~s;2x~kPRtS|c%PGTEc6Nj#pR27p2oB5Vcw=btZy(~J2 zS^gbu7;hFWx z+;+D<@)E-yt0(dvJ&=3!;Vlk|9{pE9xb#I`KQ?Umf~TbL-6g#c&IqXJvnMiPYfP#F z&{7Y?d-xkB4N{NKY8W$H>+9-yG;^00p;l6j77iyb!)9`~3HYVaBx;1e1{j(- zf<_=SY6WE9l`wC=`%1(JcUqFAYt76~=qas1jg;)cYD2X$r{#p@?3na`m8cH7hWv&I z($W!myiDkD`I!uN(Ghxec)Gma&B*N${g-$~AF~fGoJJRiQWi)sZ8d&LB)u*bi zR9UTc@QG8IQG`W5j(OglU5q(RJma2EHhW1ZAUa?grF1J`Lmknx7YwHXD4MslL${vR zdA?O8a5QJXK@*bZP}hpgrj*%=j)Loyma#Qdhwc)6T2+YFDEiWUK|#d*_rj2(nlYWp z`1sD$cm9T61+xs#4qrQH`<(^o6l75ON|RXYLa#PUD=$#UbXPYgt8r<0P&CyTDo5(P z{B-OLEpR>c!acKc_JA*P#V^i4PP9x~Vn|qgepTW=Tk33v2NYqfk-ZE!-q{f2%~woF55UR>Iu82ml=U zLsiuBB_AaA051=xhK~#F{Q}81`8VgQ(>k|n0vEGcoeN@oF^Ou2P+Fl2 zU!A?D9A8@wX^(&tYuRi>PU1;s77QcjI~KuaxERPIVn^gxi}N@&Rs6T>z&4NNJLEz> z`~@<shp^|1v-e;!$$ zx5xmbfqED(-e9#Cd%^VBo64hiIdu~xT>MuEdX+>}Xh(xJV;+(pX;Hl+U^T~SVBhq< zz`EgYxoEm)_1(z%#1!WIsJ$m$*4S{f3PaLHp=iv#$Q`t2YMblI96CL2w5wZkdKTN^?~Zp8=V zuGK~Z(cJUL;1_sQ`u-L4+y%@CzJAq21}6ybdHxaKSO$qfA^yM54U%NyQF9pF0{m>v z{d}*jV-8h%V1^ALHxa#EG2MdT-&HMt16(?TALY6Z&;m7|JJGydT6iPCb z77?h=N+q(rim!PY_=HrQJTr9?_qFWct+iwH`S@` z9k-3F_zq-LIC3(Rs@XA0jhycM)t6_Npy z#ByR1z6#4FDosu0^c~7K7)k? zZSG3wYim%^wDSeAw2-(qybh83sYj2zB9nEAx&$nRJgfG5uM zI~GN&hq#-WmgqyR-H>*bHEH5pH8T}`3NKaczO}p*^guM?Z>=9}0(qT#3AQM&@eKp> zS02QX|8SNDi%n*RBmI#k>r})Ld8Cb3OAT8d-1;V3JJ%>X=L6-|B1SxZ{Zry1TP| zjXVSF_7;x5Wl-85D~@qR>Z1;?x3`H_x|@1k4mrLqo!RN*->iue6|X9_2GojbC32f< z6WqARl=$JPK0)WyRaiM!??M|e1;5?Ssy|0fZOnXcWl3q{ojo@#C*Kq&u_MGl`t^+) zmaJ?ehpK^moZ%y1u6f`h9z$d=D^-DfV$TWDeTl(73|2zl=x;LSygFWd3hFmsA&_z? z^o2+tm17dy+@!g%wq;YsSrwB8J)T5SSCwr?x=CY)-AH5C)tsRfsE^x{%IGbP;p}*y znQZQfdu#(gigHdhA{zAX2Mphe7;V5*bm&OaVI50)obQx&X4-PbSH50IQy%GxXF2A( zDc$Oq9$)n@!dK)?zT*fKUMlMs$Ppe-D1Wp%cN2COz61$=*A;x2!MN`d@#ICyWy{z4 zvac}*wk-n;q8p;^Kr2mGvTj=`3I00E8?uid;o`TZ-WCnieY+L49l-o3*P$r$-LOiR% zrlOxO-TY9}_^=r3<|!Z$63X|vHY?1Z<_q)@Z&-kvV1_$eiAU_Q7TWyW9{0IK;6nCNHPWVDP5WhycgE9XCDMees_50-T~U{+54p+mJ}= zL)l^5!*DigdyBUXufmmY>x#E<#LkT&S&#bI)*g=`4kbDj(=}=nCKbb0ROLIaKJQ{+ zc7*0aD*8(cfY*ccu+Ej2QUC6wjf=ZDm)P`s27o&rmOcKAWKAg?zU+iTZnFc0pKJr< zlV%{F+rx(514QM1fmApVeRF{8(Y=Czk1K8gs(@8Mx{?4er3wpkXKN6Aoa$pXaEJ*J zR>51sqhaw*&OK_=gWS4=Ti=9ai`c{aP=UnBm_CxFn=SAl$lKtSNxRI4Qz{H5qOTAu zyHkBX{Ct@r74}T-;^eWPwNO5sn~z+7l)OFkuRi{1v7!S;fLnrc+qvL4qFr@19-K2u-hiktU7a=?Zjdx}~o_ zBgBxKtRW=J?iwvy=Z>V6)6 zxQo~|xN@<^YR3pMMA%=Bt0Xx21^QPJE*O}g%!TwbKp^WBHHEC) zJCA9IJi`G!HiO|X1vDJ5^3MUVfe(=4;{=4sj<18yxKywMz#|6!0xbw*(qb24I@nj{;Oj|zYXTG9M1X!8hl^nr1kGpwy0|?Elgjci zQ~#(k_;&GYRiJE<;{6nNv(b=EWscaMGPy-=KRg(lH^%b33zI)lS>;rr>TtY4A62#; z@5&s{Vl%`yzXxo^XK`t4!!Wj(uU6>E(xpyHqCWO||Jep@AL!HM%Pt1G?cj;tg$TsG z{CS+jRKqZnVbb06iJMlpqNIn}B2*%iWR~(Tm!I3?o_Cd6CQfn(S1Nwo8SJqep6R9> ziB_p*m+^P-=zZyaxp81#qc!#6q8p-s#d`J0TbIg^G_fbwRfZBKiLNn}pygRe?b&0km;8Pl;GCp;JGb=rRl>wC{d9H#5|3e>_p$bR62G=m8Mns;M>?_i<> zhzZ@b!F>N$$1I8Mek#zA|1H{-KOZ|)J+2(;=n8NNH@aui544I6WDPLgIP0uBB-x%T zPBoh!2Z~+RxOF90(;yY*uu;CB&vf*wrX932SUfXIi+mN9%{?c(~b(JxTb9jc3DUx+LKVIlq3=##%huZe50!n-3E1#p** zfe0`W(M@^*$E=-yzyn=GU(Dz5A@9z8GoBt9tEynr(Lp=y!m}F=#zkMJCmTu|Hx*y( zLgP|c!w;xb7VPXA6&81N`|4x1^hU}AGs-}5D_5tq4@^#5B zW;3krt^Fq~Yh97S?*9*a?;Y3Fwyh0^B0;1}Z$SZ3K#<-Es5DU!6%eU{NRy)UmVor$ z1O%mt6lsxOC3FM?qy|BR&^rk=KoZ}z&%O7(=j?mx{_b~w=e&3SksrxgB$-)j&N0V$ z#xtH_LGRD@c*L5~q48O^UF|*9JqmGtA0_|(&p^$iSbG9<`IhS9T(*jH!BKjm(+Mg$Zm5gQ|+y#F|Re8i1eI1f8(O1 zjDky)fsj^jn{c-`0>u$!$6J4cyVFq!f_7ztX#_!%i2T@<_!d4r7yI(rp<;=;ZKnlw zgYVvbU5rIn<0M}?u8DmEYLUa53O7D_&vvtvS*1RX%`DAhtd5~Njw^gcb#{uDt(F$V zMJRN!ceHD|`dG_onCUV>sEfoJO*hq~J0_-Nk=JYQD77j@$j{TTq|3lRAseuR^)?0& zvTEj8n`%?fINE_E_$~|jxk{V~K&q4RHZD(CLhEPykX+D@2Z$5!_n)BI!e6ASQ{M&3 z#glw{kEFpX7S4901n`Vpd6(cpF~gh^gjb_zVZ@bS&UI$djtP$W%Q}jF$kpmvuv1HV zNCJIvliiB7Do~Bv)f+K#>oVCc>n-d~oWG)j`v9LQvZC&G{6+a0eQhPQTd4d-pwJ2s zWi4b!oa8zQjjY^F_2ThZ?22*FGfc8wC{)(rX+>=?H3o$2o-M46(gt5hXG+8+X4Heo z$O11j$3tB}fW}f{ z)BQTtocv^$w zw$(Nsn{mc^H@@vBh(^f;OL{;N|5&m^lDK$fW&AyE`gL;LjBpWP-4a_QhHJvHu=+J?*S@LHUh2ad5yEqQ+|*_a&IRDcF}vZ)#)0hwoo8RS1ncf1OGEk_A3`FS87#|2JfR_k3R%BWvGPRtBsXML8ng;4K02quFzUCURMxjwPNX zOcmYR^<>gmmv&`7!|-|Au%(PX_nR#dRZ`DD?CaXhCwb~*Dvt;yPNF+XobLtROX&6& zy}ar0Flb485G1{a+QjBNq|UMc{n$iSqNH@n2CYe3VyTEngJ8Ys25pX*j{Ga zr7_g4!Wp#VT$+0--BoSv9!vk!!(G3hS)?^kLXB;c;%8&{x@QEc1&mkQT5%ws&nVt#_S1cJ4aOfv6%G#&RWC9x~Qk-pcb>IJr}ZX z1APt9gN)2fjY3RT8ZGXI?+$oYgY?yzet-P$yapt{MWCyBL-Y6`9DX#k>^Q8)h065y zT1FBBo_v-Ag7=;TIh@Lw=S>lE*%rB2g` zo4RlT>ErlQ0WrK0N0YC83{)P#9H}Vc0a=oFL4QmAm#IcsW!iAmdB3FfHfusJd5|t` zFH?ynpcIdWV7^#T{RFN201{%PoOt2YTbD`Ytl}t>D8t5LuNQ3uQh*;N{c|y6$=+Zy ze6r|K)Hp$VK(_Qv@F1A-eU$`W*qRW5OPdYvWT3_teAIU zM3lV}+yyutv81-E`TxphF7K6dBx+oXAOtvtuP|@m`GYMLE4xsF@Ak$JUc__#o)|&m zF%bG(^*!@X1JV5d0sH*^nSULEgZ~CeFv=xGE4S6cV z*}i%g>iAh*_%}c=L-B8c4}b2wWQ|+wrTr5%LE=lty&H$x5UlZrATf9L)L3}G&SXIY z3EkfkBL+I|-9H2ru@XBT0XT&>K*IW!hdeWWIOpZ&s z5`(UJm#$H0uj>j=2xCXvvv;KceEfjm0=&ZzX|+lZI1v8^-y#N(@vtjc>F};t&%qv} zPqlT?n{5}P=5MM~OkI*Qu%yuDKg~FS9-53xE1R@tq;!{fNiUWZYXz+&zPEhGrb}X_ zE1}&53K0Hda=N+dEDw>9kI99;n_ZW3*LEGcmFY`>i2{k@p0U}Ty1g9D&8)N!la7x( zYRmHb1go24ZNgM3sbeH)DaoJsMj_38Fv&#iv@qx&v-l}pK*vD*m;bC ziS~S^+B25Tk0F&>r%~2k-wnGZ6ehgFU{}1)bt^=U_{)~$Z%xMLi$^TosL&!`8rEdz z-m_4E0oiRdI{swC3xLMD%uP6!J@POI36NGpI=&Fm5pUKRpBkupM^94W8>c?k0=_$ozjE3_Z&0bB zs_S7f0*}ZNHR^|`Y#SY4Q^cVCRp!>Z5QR=)YJWSzx_7SuU=QbPJ zyJyCr8&ehp5Nf$@6G(pX!2m?X5)0VNd&pZJL&u~Jkb{4-^Co|=^T_|O^Cf_t|0MwZ zhwOZd7492$6UVXEd=>p5F5t;!$pPLO?vIaCIN?#(XuDKXr#lUw(pOSGnQMr^sk1JhS1duqWHUK$(|l>U)M6wk z7pHd>x)VZGf7s=5-`^Mev}*2?Nn4A^;|~IOp@bQy2N^O(h9tFt+*+nj0H|ItKj1kA ziZ%~I!@+AOpSE5ZaddwmH64&DE*~xpexZ>LxlE<8>G5oF0rY&fI=-Diwa{MWJ*`z8 zH>y8sOufuZc`0=A!uK4Iu?{I`xdg3uE4b5~3D^+}fo%%00ViNp0$! zwm2KjOASh1>rZ~R5kwh(zESt%tsQAUr&}^HxwW+x)jm-gRQ@?VFXfJ3Q-O|!+bpVk z;;Mxw`LI%XCZleV%;PK`77r4yFUHmTuM}wN@9Xu@MP^Kd&T-Rq!N|r`zBHD7@w3^@ zOLmvIzD;6m&?--+^n@V_zt!OVq#bOg%3FA-e3FS&ySfh>#n+7 z=gdXHr8Vaoa0Uc>>K)#x&=YkK9SgUpKQ$ms%*vQWys5pcetcIbMkuJ2;8=u7>(b-# zX4Ol(g#r3m2^;52Hu}(Or&ARtr(3%XtG3F`H;PS>vgP2K5M87qb_LT^KsI66N{Phc zHmuK6#>kvx?8owtD-RtiL1tuJKJ?h6%Vgv3E)j+&&~_)Wo-D~m(IFeADus99sm5Tc zh+HE#oOrewKb zJWAKk_v~i98ez$XiY;DagMhZs_3l})Z5MXp+3h~PAO2ALo!!*Px7$DL9>+FSEX99# zZ1H-ZqU+$;1KHCp{KjDvlGPa?L$13VO&@=~1ViBY0a}9%I{p-_2KXFQyO0=*Y7z5u zpVB&b7^4&i9*K$yCNGE0M4aZ3%6?XiLHlyO4f^hFVXDH8JNI~=oZXM+>{C^rGZP{? zcoR~jctOVU$=bG{XU06bze!ePaD_kno^}qM)|iTv|NXIL`xUIfqD|a$GD@ZZ7T$NY zc*RpSy#7~FMs4Z)A6jW2agH-mKz8P|pcIHkA_ra=vl56`bb2C^`{rJfv}1odgV~RJ z6mx8G^gPU(Y%59HD&W&Q{$j%-?%sHxoGF2~*e?k`-t+Q1Cp{5)quPy`B#&$8T~mdq z`HSN6-r;qJGY-4%lpU0_Y!63VFe*#|EMHDtOU|iaeXPfV4UBtqpsy9;WB17oEM{FJ5 z>aSDG-MWw`+@%1`GHr?lRM%}@tw^?9o7%7}J%*vhkg z29&N`&^=p<^PI}MG>Sc&btnFv8qO&!^11wTisV$~dANF-xEH>VPt=JKU-AQ0Wm$&U zHr}2X&3D0G`CDR{2SYK^1wnn^9b{^2A2-c=SV->0T*KRub*fy3jVjFi$Wuwg92!)9 z42|NP&&_`wcUHAEMDIpyPr2qTESrirUca`1RUemutC#QS{RG_!KLe)*8jZwK!O!EO z?_drprZ45WagoGh!Y$81jsQUki;^rZ3^>I9YF^hndxa;YVmFvV2cyfx z<^{e}9P=|gGBLfuUlhT_@~SxRf`~3F)XQEOpbS!VEI_-iDtOcf1G0A#Z?`h>ODbJY zsKs$cli6p8DqRcxDVU+iytvUvLboL}JB<`Iw5VVUfs!!^K78E5tn*^gaN8XYoYmeP z;z(80v6Z#0l4oUJk5Ehw^+m8fFNJxC-&}mi{*1<+*4Wh=GnE_6Krx$?u0lyJbs->H zaN>s@>QXogZh@F?WN|f~3T(QyMqlborrHYQd}3&>S>vBPhnN!Usla6nTCh1j4d8)_ zZ(Ex<+SnSzMisw`H4$kg3FA7DL*%HaI3#XllUYsVa$1k6biBwbCvP&Pn?Nd+IhBh! zTUFKE5ZP<&;Se+Ql)J0kE0@E*ts+jyC0_JSJ<%K$dvmj*mqE?$`bYEWrS8LL9&Kb_ zsZKQ8AuR7Ph}JiB?6?9S50m49EJ{PoFUfrZ2^Z7kTMbN;+yWKflI70LetXmUNFyDL z7hu^cU03(IwZ)&J(uNmHS-X^<8=a?ud;8Tk41=q)8U{~-K!!||@C=WSf zq7AFSYskAkNhro6GPG=-3uDjU##ebjW1$o+ zmvAq3IvyR2HCjwtT`t)Dp3D^DUnzWP{|p^(DicW%#_ces8B2=*hx)r;Gf)|2DI|2HB+SL@ z)*J?q{W*Z)FCCMpnf*_%a{)P7LezxN1FSW9DBv4SX6)dDr0Gr$(}22RY3%GKFtz#8 zBA}aG*(cI==b@Sa*WzE68ubfC_Po)EhmViYpt(A}YOvDi#UlVcn6OwJUw!ZoW7U5K zI{5uFego$JPjDSceuI=9)uU{Da5-`Vz+P4_HHcBW13aZb_~!WddiE|2fKHU(!IuE0 ze5U>cagak>@VDT(KXab?lN(VrBb?y(3&)BBKS4->F1k!0H+gZ%d#=CF%sDXi0)x^|nCEC7p-eHl zp~c1@?FuWy3!grq+Bh&gqgP)pxv-t@RDi#Xy3+x2wvPKDK9i-*8nn+2RkJvUm&2g| z)9-CUU{N&JCkE-_K-VG+ip*4pp1g}pd$4i(Er?__V6$xbEzyZoj1&fFE*0A zQkedIVo;Lu8Cb6WIUppsgC06L0z%}2V;hlsZNhB|QZF>RjM?b>I$lyLy`J#TP z#4qlUafV-~y+$V|$3$|d3ACzEG{op8+T?os-f)7KM6z!`K%>Kyb2gRJdt04bxn8J2 z7;&;~z*d?)e?Q;eWyS1snQ~WGe`Zl&pt;8J>s{43Ms0)-ZmDpCcoi2gHydemb|t%l z9BGFl;czOAt#-fnjeV6C?28o(vtT$73p2mqoR_;=&7%m>X zn<_-d9$qQ^UhOFs9Lb)j@kGat)RMM7aRw=of|cB&ot8I!Ydg|XfJJVpm3!yi%)b8B zJ!OPuoh|>(;NIfo0(Fs6HI>$a(p&eo<(5ZD@)r`{rQA|rEG@mJ#zn8Ts!j?uhMkA^ zN@98l!A<}WL1G?i?uhY3i?*iUnQS7%fj{kgO*Xd^GK;r|ZqbgPMISmdXq9W0kI=wM z@j_w3@u69qb(!Re{RP+KN+sBhLA>$Th4-fCXWM6t&~*`)B06{NjwDzr8`^{(?DAQE z`|7H;5fgWr?BL?y_Uzs2m&Ae2kEY#~-3xz+PRF=V!NgI8%$l_Jl79KuXQj_Yds~4& z*d*L~zW;DYded1k+@*27$BI; z=)tzrtR^IKmz1|QggK1YH{pHK!y_wgsOlM#7m!e(l-XYs)71=0C63m(#pPP3n8nW( z-wAL$)xqDNz{_4OeAz6o`0b7tsG+eTGU_wm($Xu;6S5p;Sq@sArx=v74dOyxq^HDzWFnrP$0s zmpgsm;f;1=?L;1b8LU_rxB2{#mb)5=amMt!_|%);Zr5{?;$X5CTCEL>>t@8hDbLVc z2Zf2c`745_H1AQIyg^o`UF^?Tz5HY1VEX2qnC$t%d<;UrxxS`K=k$)6zB8|vc#^e< zeoE^zhK6taR6$AJif9*Yy+Y$Uok+%-xPJi%)RFAyB z@rr@2s>f#~o=`2mEFUS7{WQD;%?^Qv0pL?!0Kk{=kl8B3m>7d*cp(LtoNbgBU^$+w z04)pj{MAtI4O0DbW$xMXw|bR}k#BYWaPUgrsS zd}Q^*N6P`6UySF~01jQ7NX`pS=3cT2!5T7o&Y^G9Xhe))Loyl}HBZOFzBQt?V;v*o z8Iu=&*VXl51%eZsy}5@4w@tXMhh6s2Ip-n9UmIhb2o}hYp_(L1uD(W=^xV{_0};`< z(qZPhp>QqECPnGPT$-Z5?G^GkHE@Ea-t&Zjp04Z~-7?4)L+@6AdpYu2PcjVs| z*H2Be_Q#ODbk&yI&(Rzt(qrR3%>_7cyQwzVJ&g=<@7-N8U%MC_cK^hKO-}k`>AeHP z`iVhm8DomhBwwc5I(ZZu5U}$3bECO!yH5yp;rm**lXW}%=3Yf9;YZ{*i=isPNoQb9 z_J?=ZYFVZ{TKr2MCNT|}73FHKUM4sF2Rtm2lyaa{>-=2+vbKlslUZ;R0pTfm?_gpZ zkY~sKQPoHwp%?cPWQ929`bCS%auG=RmH@aQVhZ)F3z#+ZKQV%QDz2AfZS8mlAr0 zL3ukFe+LEaPsAGTjk(W2jVjZ0$DQ6a*SvK~y7Efl+R;7AlIpwQuo>c@#aZWan${6> znjV3R_33wsX9mF^yF4>w+kb*Us~iSDqVw&(-f)Dohpi~4FWne7*AV!8I1{x4sKtoY z;;v#EN9Ki_TjN1Rk7iTxc{z}Q2>KCCf|z6w>13Q zDO#h#_2`A&y9s03qZTfl7-qB`b;U1#HBtJDt?<j%Apmuze`iT8B2~;GOX! zur0#{erM;xW&S;rS9d978qhQ6I&0yU*fSv@F3jvoo1Y` z(y5*+cu}CskC}-i%?3b|VTmk3j*c8_UpskqJUK`E$vCy1En1_b7f*m)2lM8BX1Q+c zp^@kJ_8<1v7HwqD^fz;iYJq{;qvL~rf@qcaT+bl1 zhyLh#!_L2}EsX#m5Tg`i8zfXhQH#h`E7F<#B~uSi_32&|D8FE>RzW$mC9R;n_As@g zR9~Z^{ap-5a)=awsr5dNJMn)U+nR5xeeL93pt{=ewS<($^j)CRjk|NO*hhoZai)bwad-S-R=$Y>|jmkL9w8O0B*8O(ZSs?1Z=LEZsrn`@Y)K{a8!9apu_=S^?7!dq_yYL0On zURAt3>h32sy8q^3>DD_otsM1O@bCi?qoN#YCwRw{Sdex148g&(_Y=H% zF##J1qbl@xncUtqz6U;x(A zC<}=Et8Pn>Q2w@n=x@-(`5$8AfH|FdA~gvfE`RV#03SZ_6&!URAM4BgJ6-he zWV!smNIixB2B~1`Efb4hk7}E_d$WbQ}uMWX{f&qF@Uwgm= zEFeC>V1DB#h&mHZcS5}md1dj-`Q9G@^pZ1}d#n!p5`E%FOJq9X3$(h(3r}Ybh&^mz z(uiSl(49y)E((hF#REgijiWoAw;*0@4k!NcNT9zOCc&}|E_wPIZhv)1Ip=%y zt2?Rxt6?%PLVtCEes%daD9#;cfA2WG6%KDb*!ll-C!zm#Bf}3Xh-g_R@nKXH{Fc!d!@&K&T?KfZvkj_2Gaa03|GFQp}aDR=$P zew?Hm@I3f{&A1#WCdx%+{qd{cYZm^8BE4VN3;Z*@2LA>bTIwx}e-ASxfiUAfkYPi9 zM%^j-RiF{RD@8Io{EsgglC*O8ALS`BW-B%Eu_q`(a1X$ZAYAz?;3%2qtdzd`-%UkR zVJ9GB zf7w^;^J%dKDe&!eQh#j-e}<1IMi2krLdQ#lf9`{BAodYA5K7TR4Il7@LYd}!{1Qll!(D+!8`~GjhwSKAO`pb!*|K3V2x-huo z(D6BM7vd*CEt(%u<_|=%7vsW#Z1AZIfvINKXv+uaQshDdP}V;65JOT95W&br{jWbk zn-G9l^e(kUG71{5GoHobgy~!bz$P5u3gDpSkpAI;gB-p_NtY?kbIF{*a@`ymMsuAK z`c@mnM0J^hvV>Q6z8xO!-Nl&{e4iZ>_%d*{RZN)GihK|(qCNj9>iF4*#}}erj2MoN zZG8xJ&Eu&1epr0@U9(%nUv&NuJh)>o4{J=GNn>S(j!(k0Q_`i&9kSwNFDdzW=UXfZ zZ)ymh092t`+cM@{{@C$Kul7@~dupbxopHjG9ZQvEp;=dMeXkoG7CgzU#+19;7@k6M z0LamM<_KSMUYls4r@2Cz1noB#bz{c`fuKoXBJIv6O(BYmQ z&c?&-u~O>jxi_vnmo86=eeO$eY=b_faeFk@8~}GIT4$Bjrx7wbD^4tSH{a?oFKohS@}R$c$e5B2vtWrlxVnx{Jj3;x1#Ja6__fqy_2 zV(VX776Q0sv=Y!`b^zHYrmT9bBm(Zyzwa~uz7GClJ^a7=SNQxr?4{86PaIf>Z*~;h zx&iDY;Z&d{gjfbodmVKtrwKWZ|PD6pcJ@@>H(GVbsS9=E=vR?Nu`r z-YBZ{BY&poE6;Qif{`QaX_J%U{6W=Y4-;_w_?%@2@%Wkbo)pREn9|DYBEzR2hgmeI`?0?4Bkl!FdL_K5uOEk32vE2NJ1WWj? z^vDKeml|M$xT2*0JwSpc=Fn#SUn~CiulWBl0zL_5=x^=mxBhCl)csr&v*ugXKG4FJ z$<3s1LGF)vAS)|#=(#&H)AzBBDlJL#f`NgCj<^yCe9yt*SnRL}8#|c&e(eWsMMB|W zqGNPfA$b=&36Kt{97=+JXP$o9-h9VToSy#Z{|S0FV+1{#L|nvOz$B{hI~cJ} zHOI2V7F0%Fciq!!P2{Mv1odat^U^jz9yvm)knLMKdVV^};?--1%yIK)ZA8N>F3hBS zI-=_|8nd!rlCvk6yE+{NAenJ|6D3^Sf}4wPYAtP%Sz%(%WD)345k>w^oo!yU`j z$h9vEwa%50jqx~c=zZ%UYO4U*B{OIEz8h*~B?geCpF=Xv;vDsl^DC3>T(9w8mxu%B zT>*198m&S$7l5W%*+3epybgLIzmX?jNaf=sci$^^JUb=5Kt?={Lr6>qF$VMp#tn6} zfJX)q^Ex%P;fngt8?`x4i-J?3IqON^`I(#UN8m&W><<9Ynv<5D3_Mk-Y1sT-DeKZy`GU7V6tju2d~KtL7iib zsAj+Vj-TvmY5P58eC>ldKbe4%br(AC_?uYs6qXwFX?1heo4f*VzT(Qa%#Xj*)A)bD zCLacTEp_M2X2S8XR3+=h^gp+4$mEaod3wDw#fF{VmfXEFIno6DXjGTeU zKyntC3|nrEF6_C(2lTa&33e*H^H|Dr%e73n1WeVJuK`+F9{S30ymMHBf=w^b$g0Lx zX4n+I?ikSt>xCS(U%)$lX`y#;!|Bb}M!7Jx&N|54&wCyZy_PV$T}6g7a)t=zDIuq7 zcB-dP+p>i(Lq2}r@|$Ql@hCE%UQ1KfGHSv1OQ^2#gCkUHOJaabz zdXM^`ju@s3cka%8Zd2<9-(GG)Or7h5OhV2bNNY^X6x{2E!ljngaNP(CA=nop2XP7) zCT)M;okN69MNd!qC&-KK1plRjFoo{NNC^a&M3p0)kO)IJbkTixLBn^`*6WDKpCAtR zoiK&`f;69kJN_tF_708pKv0*f8hE5)k&x%l?SwvOao(&=!pyunnl>x{(fy^0drS2O zw~v$#KwKPHv8f)S#tbX5nT{Ciu&hGvf1wH&>E==D9rV_o!><%(YHOc$PnJnU-oT*k zndRFUh5;}A{*O9j)CPX#@jVG*%}}UIzpwy<2_pf zagNyF4k%;d);{LY_tn{D9eY_3R$dkjE`^t>L*!m$QFBCI*iaVfoA$I(oI~qcW9%zB zDcYpZZC@Nrtizv@%ZO*?%&k=5X3@GODzR8dM_q#ArHxg~fI@cd*BWytG2-*Zav~Ix zbnIr4~CtcR~?;m113oj&`cI{0@WANJ&K5uAHK2*wdaOv%(7LH$cyx`3=osk!DDt zmQ}J5jlk%Ywo?Uq(g5zp(<|I(5fp&Aja1>c`0k+498VitC{Y?D=d<`Y`>F{}l}*%l zE>L0d#nQ1e_v0){Ho}i+4E~EN?vX=UU0KDND{SIu{@Hh&^QCBI#-(7A1Mvj7(9uqK zHai}IWgvDv=KJRGScg+F25hPDdXljmoO4{!3oH9Dd{KU=E~Uf3$`IJ?(guYKYY?o(g7O()4Hi!WJmtp{NzJ-<)695FqnZ~ z;;o_4FzzYu+6jovuA}DDP(dA7-1Ao+R<>ndov5c<1K4me-s(704;8&ljYT`|^59?a&Nop};TIq|*qvCw) z7foRX(>R^O_n4LnFMt*=b?k1K^0KsDO^Hm&X4*l5F5o@wqS5nw;t-N*e}k#|jn=7q zkn7%%Na*(yd}%QfLoW;zxyA9E#Q=4Jct=7ay7%Gqoo8tjEKbW5Paju3UyQl@I7ydI zIo5a5!Ps2FIaJ6=<>V*m4PO7-Ew)=Dhpy4L338g+`Zkfld4YX>FDR8MmD|cWd*BTI z;+P#>6F~ex*J~j0OtY%gNFvH1NsfCm0bH)d+zlJz2p#wdYA+Mp(lwn{`g%nvGxEZe zN)#>lBO;{bvUmGR50A`P&4(vq&H{E$M)GILpM;Q&di_vYM5PDzDc6d+KP5~m`=17^ zh(f?akM1T9L$1#dCr}o4P)p1dCCS1-gTQCtNA(1qpj>ib!!0b>(9q@B;%Sv=_;V*A z^<}vvcCK8Kn}Jshm4h#X3*4fj(b=0TXg_7DS$%9*#h8b1wK28GVB&NbD5O0_WC0hs zphVKGcNLp<2>a@uO=pTLX=6|>v0qy@HQu(@p9e3yi|Sla+ha(gio2$%5^Q{P{ghI@ z(C>;na`uE9viTyrD$4#y{YwUsSmt?@SU|5Vo!WrKsn7(&Uhwo7PEN6d#`ip+I!q3T5W}3UHH}*7nH-abQ zIvGAAkQZ<{$^Kj}kr(djrrz#?hM18pIUP*dtZh0DeW@%ObzgsMhE^0n~#TP zObTe?k7Ay{QuZ!|zu!<%!kV?DF@*(a8UZbmUM26zi|ooT&QVwSQ4v7HP+Niy6f<*l+(at#B)x$SgGl^QwBh>@IbPIX&=&EUSr$axHzzEBKG%z_>Qd6T5bBKYJ*7og>sHQo)U z4DKj-<79o^o_}~}Chx4^mxHd6>LS5$dM-{=iokJlYDbn8wIi%GCbdU}&{WZu3C>@*v z*y;a_f#2UdCax#&T?s%~k0AVYV+cS3e&K3bteJ=ZQ%19Y?~nTH|96iO)H_E7dL$f- zQn~2ur#ZuPFQ9hrc_kM!c!s!&mk22N?16~nxj62n?!x;q1Yl@E;XKUztC zVR!YJ4>LVyyRX}d!lER4b`j#^Euz=oU=di8ktJfExk<$IcW<2R=F$M>mqcA(Q(}*VLdCb#R zXeh|-l~JYrEM@sDUGXgcjmGQwLxlZFvR&yJdq4;tFKqqtaLauGS6~Ilj!z-;LH?g3 zcCTrTHSq*eC*zFNZRd8FEG(zjsx0?KTY%Qrc8KD#XClw;_gB_M%>?kN=IxV~eFYE7 zAgTOyRH%1coE5`f^!x;Ilf!Dhn&5eqI6vvF^F^Pe5>vLu?t)V~Fv%Tj!O8<;;olN9 zi7)jw9wquqU|zvtKeVAgQjmxDguem^p|lFbTUo)r4_2uKfAe6Wm3W?8Y7)uabAmEb z2fZk4<2euAqmDiR+~LUPGicV0g2K5$XtI%oI>5#s zn5K!t8`m|7N(G-}qCnV^_~64bpxI7Fa$$917~#fMOEZ6d6kl&mW8GWY&PvFlCQw5P z=&cL=@CJ9K8jBS+M=5q1-X>CM6NJ<1tDCCeqTB4eF4{~V@Ua2;`u$AzbK{;E(;pau zf^i%mTtU}_pz;#Wr$x0JPzK4$Sw=V+dQ#~qxv`( z+{Q$*`ADy;zv!^1?2$;x4=vgW;C;Gk&`yP80Mc%1g4 z`{TuiB^`Rlg9#)j+*a@SY8DINou@-aE^_We5~u#OmD}hjBX8apZqTC6!kRfwMFcUq<`61wKrUNqYVeS&PC) zaIg|WHHi_a<*+X3?rmxKDGlI*y`R`8X(LW4{HS@q!NYn-h`8MQq^?nvH)VnPMo-KZ zpB&JvDupPx!2$1_tfqnAt8c7AU%+=YZkL!tX{pst{q=L<_VY6W%fDen)g^tA-mkr-9&P@_5OmEi6o{ zq1$;PjtFjH@^0#K!{5ckoOd+ZRTr(W;mkAbGe{{^?NfV3uf`iZW|Twxda||CB^D=) zR%UKqtjM^^F|Iiy$=X>OATG=-IDEzx#x}gwtbBT7E#pl3RhI(qN@1;jk1I47^`RC( z52|li-DjuEf%X=AqZx%5!#@ZyTq`YuWxBdpop0 z%={^EyBC{0O#&eLekL#BaeXdnKHPif&w>uRK(KbBbo|qX`YPQ0`KA(W^Rs!QqU`*! zQI(nD2^y5rYq$E=|D%EUccV1CGvI`%13|&*8K7${MIcm2>AXGGWnQ#IG;(f(aGiKa z$+5I>`aGIJScqjtqzLIVX34cTTIL&%zdSURObpoh3Bovm0k~>n=og_L0lfzRJ%S79 zu6EzaqHv!CcT~Oxsn+Ybj53@MK)Kg5d~{}9*S-KA{&9tyB7kk~6g8e*<+a6Gj6*%J zAi-_lOuL`RUW&M0%zj8Zz!g<*cB&@SN*$=Sl6>fKkW!Fuqml`y`fg&TF|2bwrX=rW zX{q8%S{j%n?$!k~A}+%qD`_e3BbMKAm|1>?VbGEoI{XBY z;CN1)I5l4CF*wL9zJE8<>AAY_vcQUy-)piIeBBMu{MAv1`Q12uVIH~nd=B@8e_v;W zjqXE8@G;ei3);44mK1O3_BdZ#Agr|5xYYS$r9@mAC#j*T;@mb_QqEm)ozaTmmKip< z>$&%!UbqT(%NxClk5PQ?S6{zR9*__TGESqZ(NCkIJu4``05Qi~;p%KK@EesAJJoB_ z{GS?6uW7y2c(7^vU5BivKMG%tOTm)P_E?;E@rOG;Ijk_#apJ8@?6`KSq~EP(DZaG@|vcPgj7Z>KT|NaiWC}Xm^TzGPO39KU2!5S9ej) zHc7fyEVgX)QV9A?>{x^b$Ovz^Ebo(?MCU2?zJJ)u$MUo;B1ua$_%vw*wFr>&B-@2ZN_Vg#v~t^SW+HQ7o{4a&vMOCJfqiYqX<-a z`3|X^PiPQ}`4-%U6|tCc9C=8*NDp2zZ^QH-A~3e`9*%H2N0xgyxPFu31&Pm<(@j|s z0`2qz_zMZ7-h*1?Wa}9?Kzd0cXZ`O!|7p_e`tED%anl7#4@3yLX|Ex15-H5lL zr+>}-7@NF66W_KatT!%J{mnyQOwf+B>tgqV;_Y4$TF2edo@{!j=Eqpwm3bx>7bof? zLlo)EYe^DPjrYzVT2EiE9J;A>;Uy#x&ec#PVimh(TIj34mW$Xl#2J&1+>XoZyS`|7 zS!scJ-}w{5?g!h5v&Rf8GtpNCJo+^lZ$b2*Fi zbkbnvKNtNWwEmgb!*PB&<;n*qLAn^^R!y$Cxu((g}fLxZd9qr-#D>psd_h~>1XZvQtN4C9p8t2m7bRh!EMXUWDDRe&V2kutaM!% z!rl0QT~uTQqr+4j2u{*Eocplo<1_RL6u_tMkHB&QQZ%+fOLru1S0$T{``7s7MyYXK zlLYxUT)NQmJnMRH&vmu==G2bEh2$-{aixoA8k8-Y`-@JbRArZ|Nlwm~`(aJG8pDHB zcs9(hj=YO<{3IigB=uH2CN=T?GF6|itd-Dgpvnbri+RJBSr;%hy%~UHVnzM5h=e6$ zl9(0A`dM%hzfrdZH`4x&uptn`V#=hn7wUB)foWC4{-9vP9%JGB9bM}x32fcjhtF}} z=3Qc-m;?Z8P`9kd)Sy%;A{}WF5h5ZYAiYFHqzQ<02t;}jh=70~9T5>B(xoNz-lX>ust`&jA*A^& z&voX^o_+SrH?!xP>y&T*;8NDgN?32+^}f&Z+|PaA#AEno9ij}imO#B}p+mOD)xrx{ zlrwGj%F-KFRi07`_xl%+NGG&_w`_l#Y!cF%dGSFG+f_yJL|%>8cj(fqvlwZA2cq+r z$raPzAqoEvxaq%j=lu=1$!HaDTQylhG&KxW`S;_#R8rwbd$26#UFfF%0>{Tq5p8^E znoV1p>FfrbqV@Y=j!YYP@K#ejF6x_AOt_p`?m+iau7&vq<^^+T0NV>uXUC(S%xx=B zKjp+3k^n|g$E6o+5~&46unU#O(~|kc+mNyu|m+}S&6rA#>9l>YA2-P!W;O5 z=;p(|^kWIs&0T9Ct(bz{haZ3{s%4`;CwiS^ zOd;%~PE|#@+W^`6W0#_q%q^e7+I`em7bTyVJ3oav?mhV%r0LydRvzXj*G6||={?X2 zU3RGRLx!N!EIvu!l)4grGXJeh?%;vr6XRva;L6*CXrCzJG;YdM*4-CdZx}T?>p}a}hSfQI4#JXV3gI9U?6tJikcOp>Xt?e5y zTevN8Z((?cIu-`!@T>{l%H>cK>)|_ktkHZvyp3%_lg@>%;5G;hBGS)yqnIgoO6qny zOMZSb>yDJz4O9D&FeEzfx^{X~Lr25goZsPKBw>3Q(YFv3oU7p>Z11+k6rI5V_=x2% zv{mG{eOwI#SEz2RsFHh_JVB^M*dk>2KV?(iaW7H?{CS9|&?I~9& zvY1uN+Lo^-F`~l6Cr%_<1kU4$#H~-26&TcyC4{{=hdNdSeA>kH*6!BJWP>=>MY%I4 zo|3;X9gP*!x&fS~bBAvdR}fiI80Mi&`5?uPSe)t~7;ChAkyku3F~Q;(S;Cc9bJ*sY z!_+di;T5)TtR%NURlHN#o}53`4&o(N3Y|oMd#y8CH@zS#a3^mBL z|0BYq4X99ke-DHaH-)2(D3JvejI1zbOF|`Z7{EO^ynvPmpmSqDJ3yOwnFd8LA;up8 z!4Q-`pmZIvdmfnK#gd3M0gON$&``mP3CG|BfpS1@d@L7@k@si7L&>?I{DW!g>AxRS zk(@ia0)UM1=j6!gI+;r`;K@&&0FjNQ12V(@L@^-4eS+!+Qw``TbXH+)u_AB!yaLL; z2p%Simy>H|1tj=p1(%c#K0W${4sKxd&`RTeWS?$-DO&Y6$lGu19T4o-+V7BdSMT9Z zZR7maM|EUWUCO#n4FU$>hF8zWY8(TN_NA5h+_Q!nac5+XbT4~+w^wryhyrlt%_o(M zSmsD1)oKvC2ie{qr?`@yp$kW7aumry_pxC_mDu)~f*U7G_W0eRL(eE&ULU8cWD>G!@N1wq()|YS@L1Ufjj?VAiCgwt_!8zKhPyYW=yNowD@}>bR<+7hg>g`X z@GOxFI$(Q+!w73h<0-}MYhE8F;u4=v%~@ZVek~E#!8{SvJb)6_5Nl5VjKhFiCPNiG zxE_q6i@Ql%zMp)do42uREky!_SM;LO_ln*%9%%pM^m9USiElTZX14-xm6Ebfvt=v(R(LtG(4;{3aS-mCQ%IqqTUofM zKT|2h7Ih2Gg#_k|$%@4$X-ggzND=-HiIR4&ZjT?U#6H}gd~QxlJQT+bVf~NQF*z-u zs_xc&H8+==&^mTQ&uhzIT|+U7xQDe3DW;rX^w)yA+hI+j3CJOLMlFp7GSmCT+~YTF z`o?<XFn!JLNs(ET9i-*$q6`dHzRu;O`z&5hwb+s)4^So4Ki8A9=+l zjYs5q?9q738vt+cceXK<-s4nSt~y%tyILTGwU>oU?d&#V{eNMA21e(xDE%%nY(n6o z$K3K(_f20F<=ZGNS|c?d@ZKse1L&W@08D1Yx&y$wq6tzmoA0&Plh<-elBM z=X_ou7`=OXJBo~=vFbF;D zn^Nb5<@#J$n8(-QsL$)45{F7vRH-Q}r-+$s$s9>t^X;_5E+(3Sc0jbz5;F8)(`;co zTFm2pWc=QPA0pCBiG1fimB@Y8c!F;?rCu4J$-|{b%YPiOjAoU*&gyh=pEdt7GiFa7 zKQIrTto9D4H}uFgz341vrv0!?SjtU5s^)g)r1_lymT5LLv9@aWH)wX+4H*9|R3^d( zawG)Ej|K?<*x8N&6B5Sytq$SgltpwevuB5gsYxBj*u9U*4tmG%ejOwlrG2|2`sQ!_2E|Qd{deK6xcihn`=oe?V9$2YupbJ|{|uZXO&yx9eU7 zCa{h|_RpqL&!PGue|=;UsC6}k=TML}c{@A+z~XiXxDxcQ{QN`Gf}RabiO!|f{02SP zSJ(r9*(DqB!5GOX`~Oa=+5P*EO=3Yy3Ffc}idPQN&mGv#9BRS4ZBhe3C3lAPh-?&0 z3~ozZjnIO+iYL7ERb{!mNG;wZ;@3`s{8TV_k$Zz)s?P%EUdgxEUJTx%01HTFQ=PsA zKB*piywm5E^_$J2Ljj;-3ksr1)xC3H(D*OPnQjTKq_R?r9E!DY&=+-=S z*k5cWPm!_WAdUH!l4P^$cyQ(B55Qp2q!yf_T3v*z@QsMOpPK1n-P5$W^CO{ zS_vY+!Y#%FI_^KTmDx-`Tjyp;^g^voeU4O_alSprzz>Y0y#lhCclOOKY1pL=(-R-d zr;_gVJ#^aBm@`lc;LY?UAekt7L*j&~4NU@DKS$fcdsJW4V%#=NX@CCb9k64&sGCM< z)56a-K*;LiIz#?g2Z<@F0>#*wuHv?QIVqH8H?@NUG^6|gi-+rWqy;m_T!KhhTRVgo zOfW-s%#&WcQU1#kz`yJNe+?8QlKEkP{p~SZZK*|AXzy4z5K{vg8s>WW9mL#2Av(gd zKj?2X#0hnDL0X)A21&grX8y;8>jO~zBya!=qh`^D2w}Q5Qi*M0>Jq|7HfQ$>(Ay$! zVs_wa1}QOri8<>5<#|IIZV6x^EJlHLF;n3*HOtEYP~nK8GAh(!At!(p(TL1L^fIhO zdaQ;&^IPJ1ZMFBwWNhuiHe|~{{9j%`@*^P1Du-20c)xBUNQl+jm=doTOnIG10g+|>VQ+3P;p0;sO6~OJAIH|>gLKGPbQUoVON7(UTR65RT&&fp zd=Vk1k%GQ&L_5hVgzDK2Pfmu=LF}w@9VD+*gdObHCOgwjPG7j5SYI6r%R+fkE9zX9 z#@8@RMSRWECBx1QO1OvZXEI+nt#W@AqU!-ndBqvDvB*ePwCr$WsxqTMa!-Jl#f-Co zpz?i~M1Aaep92gf{W=hY;1K(ZFn?N429QpKNG+(O9$v)Nk9H2{cdTAMwk(dicv2px zi3$`07QJ7rw00on$!7;q15o{_DYJHtm7i#Clg^f>>vC_v*xRuuA*%HeDqkJ9$!{aX z#FlHSU9a2%y0J$!n$d6};MXayj87n#0fr_;8phgB@qS-U5Mb*G{?xh-0X^ zN{*jm#M{_CYY|rGf)=0GxP2|m`OcIWdcVPXt{xN+6wqsV8yFCvo$bTeB^UHhfKOxL zme6@CaJgj5>z=+~?TZ>r%-rt%Iv7wgu0;ZFRE?P>tFed$FfkYsvb8j2j}`LF5)5<$ ziGzn;yTD!>qW6eEfXer_6_~PTe*hWYdB7MbpdWm>4ziQmR)xNf=abW)GPf@^`8s)B z@dzm`d0egL{Ez6pv{g={3B=AMS^Snr&0gxa?#xUBi!U=G%CDGB+mDL92sy-R(xV zh;8Uc3o#pk#M!Gm==1Z$$tSK=bpA~|9;X6oU9t=ZJa@XqrJTsv9TBi(>0 z4o|`q#Bn?T2=Pz8{A`jpSbUR^hD%+yz(i;~97C^7aw7pHEipmW(r}&95UXods_abq zp>zq;K$i)~0ic~9+Y{f^$1@%_%rRoh_9Gv%hezyy@Pu+Vinv=$m#HWSbA3rA>d3`lyFnBI zR{OP@+S9b{)H$-OBQ|sLGVPHXuyLY_6#p!IYjpeh&6#I_*`TFw0f?xB%fF-5pPS_9 z*NL3Fbs`};R%GBy&YLroFVydtrSEdo;3CY&vGw|ZCjsNIU1JVf zL^l;zEAgW7#G6-8Qz~!plc9Z`)6L%;%8Dw|6Qi`HQHQ#hZW(800bPqx2uGI)6_GK> z_kb(E?oJ5AbGe;%T~fQCjCXVumJsL*Qn^Ym0*Q7o2I zC=e9Vl;6GsE_Ng8H^VN%#d4~m_s2bS3Es{IwkJa4?}x2iQf($8uT_Yey) zH~SPV#Kga0&FpqB@WnNmGrpEI&Zyrw_}B#Ay|I>gu;BEEM~D$a%ON_u*CB*ULx5oU z6Tm%2e_<{Z%n0W%_*}No&opTa)A;sQ`hnLM*Dz4LYh3#vnx0xzhbZvQZ^y$GhqzMP zH6#imnJ!yC%!>E-6bX6k%Qag;@_+$5=9QuYKU6ID^@^eH^BPW`&ntwnvU}`swQ#>Vyl5q1MgH39Cqk~rA4f<$~ULXN0-(83Q!kqWBA%{T#^@N)&VWLuIt>nJr@Hj zg9gLZ7ixQ9ji%2PRH|(*YhUq~v1!ZfW42^=;-o7m5he)LB}942Hth z4)}7qBbvSgQ*Cw83`lFI5tQURTriw9{?yi(C7S5wf5T64JaxC%c(pr2$tNoJ=RlIm zuPw(EY6WEJt9>Tn1Q78&aCUT@w{6ILOFK<0$o5V)AJG1sX}5QJnQTzFsrfV>QWIas zZR~!k1mUD}B%EqX4FrOP!H=n+gYAhig2zr#d#EVIW3iTb#HGT0Z*#L)#qN4wbxf4I zm+PKQDTwDO=h#=cCGqUeUW*FD2;ZQ)qO#zV?-fbdLBFedbZCCji!D}ah1T}dj8bvE ze)P@6+~9@)jxheoxL<}2S)RvFfDDmQkR{U*fH~E*_#32N0SfR8XjPaQsX}^WG97&~ zowNTu-#*@bq9hdZxK!EyJmBUfM)b4YS!@juuNWw@9yJLVbZ*ML)h)Viv6oTwV|4>2 zMr)hfH3AaD?*{F(=qNOnnhRNq@-C#+=K{&%&jI@O%u?_hILBfFwx6iw{y|2k#6P%W zuTfE{zucvbfjngdc|c_yP=e$T%^rl`+c`)xWMLskziPVm)#9xLMU!u=8T`E+#(|iY z`wem~+$Q~pbzJ|NkotGHL;eHo`PaZ61HW1=#|`_JsArm+t9Rp}Jm>z{9MA_b6kgGP zG?}HMig&>kSbKT#uLf7>d~aKY*wpB)Rnu@$Ue*JWC5OfO+SCL%l)r%;U2S`UZE5OL zJX$$8qJ5*i-hW8l?_Iw{2~r#~U+neG-@T{@o{truu)a5ZGQ{-MH}{SrF-n0_htiQ? zf}cdS&tv8Y4waBiSX6^d98ft~%>U-i*8v2I(uYi8v(qP7|zy;$V+#E2Q* zeAzTL&(O6wK9G@VAG2tM^YnQ`^(LJmM@7}$w2f@OX{t9=xvbl2@TrJC8-FvL zEC8!$s*02uj|ZtfFN;YT_d2#T$+}U2PCUpDpS&A79?FGo~bFBn>}d}=OWV1=Z4!;s*DNjo9Rv~Hu|z*vCOR3I-2P6 z?SUojaG!E&L%V-cu3)z+2uN6rl|-=VaxGq|JHxy~d6y$px|g>+=&fc}(6W^r*?kh* zuqQv&uYR+6`S5+=gH43R@gn;XuaDL9b1dl7nNjQ&NndLE2`=NCJsjsT>9wL}xA!h{ zxkj_&0$AWGkn?b98aMFpPbzL%F81!$q0nDzJ+}is`mWadsx#}gjU_UNzprl{?51%# z*F#Tg*E8I-BCD5I9!;&}3ivL*YI9ubr6(9WUA7piNWfa%`d$;Z+D+!3q3MTD)%MHx zNu+W58jU^C4Qm@~F3^bnmO&24eo+6w{)=*-2o@Z|gjBkQSkUz%fGdw+=!q4l-8@Z| zmE}Iq#V;GFyNEC_=;K0v6n#Wrjm$Y!J{6;hIus_>%Lg^YB>~g;sjlSY>uz)R(xK&g z2e0L)r@va)I`V_ge&MUEj(HJgAhb3wS^B;41UCck7n$A6XoO7+hh9xQxrse;kkVv4 z+4I>r5HkHrO^9%~%sU^kGE#*6B? z;(6kdvg1aLNW$3|R9HeHo9T+QV_!lY+fHRVS%4U;`DWyxTmE{#zzkm^2Gk@$8uP$H zCTvwR?^aao6zxqpc;ebjwmLlkm%^(zk2~=$3)i2f7mPdbXP#5qh)oojRuTzOl z_E7h!)@M}HCcL7m?Wmv#vZh8sX1NwF56g(F~eH!CI+|p*e_mEWaEKI zsn3e+t0gxIb8j9J#oyfn2mB8vJnA4=WpO|@GUWXu>8XyJb*Pffxup(Nl~DD*Y;-!Xq&1bK!@0fpu zzM;rbGwUSg&38wS@RGA*DYjKm5XMeG&IK-^e>GqsqX!fhm2Ij`DoK5xBBpe9hB3QVT9Wjv$d z_tX$`iVT4Fdf~Q$2!<)`*_x_;$o}5&T7FE?$!^x{-Slu=MUI)bGpT9yiIl@(A$#7U z+K0`ij@Vgqvqt`)L{XT$(yxFqelefBbckR{tZ>Je|R=TBmY^E$7kt(^xm)iYqj_PLyzw< zF~c=W2E8Wi$j)2sCrPbrhrHWhtM-D;J6y9xMfRy&CAGt=#`;9n2{moQ_;(23#hLIK5>w9@U4cU~seZ`PK}veJ$`hvyTBHB|^$O^cX6I6>2n;&l zVd1-on|yx$2H24as4Sm8~|W#bDIv|d04yy$`Xz~ z)^I%O%^4vemHtos+~z_wDzHY-X)39x1J#qHpH^m&O~x4F-nJzNRy>@v62$Hft27l? zd3k3UO4b_o_u)P`0JP!pV$Azvkt$qklkMwEc)%S;CTkw5U}v#<)G}4C>&)ODRu&%p3u-Xo&udK`jXdRF2KXM3@4}zL-p-0v(ONZ))CUKepU2Sd*|Ub`E; z>JWR3&uFcxfr)O$n{L55E@xfh$TVaDb#q|H(4=oZ#PhlKm)ZHTi#sTS*^W_j?V87? zo)`WU%2`caw1YQ@l`}k+v#L)KfZTyyW~ltw(ZpXjs_WDG5WDJ1B7Y?35o^Ch&MqPw zB=D(wnsw~kgxiCQNy?ZPWN6AtB2dsY+0rZlruhJKEheaciyk^nl-L&E(<-QmxW1c-(*5 zC?uej`}7{l0~NvQN~v6z-na|pn7i~4T2tJZsd2r2anTF+8{~&iHW@l##mf;4iqoB+ z#ZKWeyk~JHOEas`Zcs1>A{-^No_P*VXS37a=*i7g`f(HXoN;ja%5Kl#{nnJO<&rUw zvpneB06pRlM*^@g<~l`Q0JkX;%aQvC+{V(d^a@=d$E{Qz<-7jPTc0tS=xfBN?$)`o zttza)Jkav9?t3Nqj8uH>VyFIAzOaAmQ;G{U9fy{1sBx~~E}_PF+0dB*SS9e?r*m(1 zKt@}uti(6cXG;32!c@*$5)t#Xf}PZj7Kn4~(amQ;xJevY$%~F3j*_Xe>Xw@`-)g_w z3xaz9B1fJ2BM=-tg!X>Q=ob~BOcAkWDvGrLd$>a2T8 zdvf$sj}4g4{{TnnyCD_%Z^V_XcD9slwIX_b7CKGWa-z%3YBn?JsA(dZHZprCTrGa? z{*JU8k)zG))Rb1u$G)-}s}J*uM2~j|PvFNna?%nS`2v#PG4MnO!>&=F)OU4YZ0)ZK zKXUxV!7XWLJbj!UK(zD5vH;a>XRd#M) zW4`*|p8H?0WEclczyes20-e#|suUd0luh0gGZh!Zd%o|9?MmVfsDI`R(`}S>IN=&z zOQ&+NW%LGPz^>Y#1VIr>Q-(dF2vE}~%sZv?ju*=|*auFkFE@o=q-)%@W;-3%_{^l_ z9m9zz(7t$R1|ty0(k~Fg7X*2FtzRnb)}I@^%=bgN{X;L(figtRhAb_~xR6LgnEljQ zjOoS_DgseDj$dtgi;3WEnX@84sT`yx;{u%L%33+S_muYgw#6Y{(T7<~MRtJry~u7Z zp$XtVG|UkgD`_sUuLrxCZQzLn8cTaL(_6>_AFZpm+>izxr~C#T_@SL>aDTTUm&H3= zg6&_nmZ|9#p`|A}L|;x+9_)_VJ{6y02GOdl_=Fj6B*^ftM{<5`o`4hPFR@V zy2oJ>9p~bFrSm;6pRKdPywR`M_DQ{;ZpQdVdN@0^))ih4DixTCW_zS@)q@_e<%otn zdwaI`u37(yjkc!QyT+!o8a51?eJln%n_M6pK`!F~`Vkrrv0!JlnMDugsKWa_l-FX4 z#Xa5-jJp?7n`v_jBcLKffx@CJA@z9WI_;D13@Xc^WGq8jdDYV%&1duv7X{f+`TIKK zLAaShZ)6Mpt$|Wr+KG-uWx9?#YO!RqUzWjmR>HlV(dtjX4AI9klrOD5jLC6RNI5xJ zGE-v&mfdmdV`yvu9lHb8gsY577r`1y(y10pa#o^ZUPpQ&?*;f9oIcHy*(r@gvMcvT zvib&Vm5K4@_RALmlAfX$B;-j(vtW?Q*>y0zd#~w~-Y|Cf#Rz|7N0_WZ^u4Pd)=7Qa zs?lCwAy*ME`p?6SN0M6{zi8z;%$m0Hm9&+%3+K;>vk7S(Yk0c&<{16)H_DEx#6+Mv z-O8$KO!R9p4aoLTNWOpVhsK9Da+jA^5p)T0?Y@*v>NhxBfj1Nz+h*qj%WTWEqzFN>0+ zx{hy+oUPt7$4T&-Y9l0CZi+9c)UwqnV0N^Js;9&~2M1*m(!A~s?_ z!Y(si0Omi3ye3BGt2U}cX!`q0MosBVD1258Q__oq2i5-8-~%d^BoDQ`_N zR3CH1T4f!Uq-mypDSOgKuYA_s)?rDt zTDGbB2diB8R1gJ2jlnDxYXgX4A(G0*?B7XN=Y=%Fm`}1nY}AOSA7>8!5D$ix-63jX ze<|s~q0~b3pq4Jvgi^FmbWE*Rx}f%tHpK%}o=1$J&e(oo?`6XfbCbEdJK7xX&)+dy zZ8pxws)GVv)7mo>c91|{hQ2*E-EiuGld5RysT$Wyka$~0xymaSSHw^3P2~kduaQQP z*s}}=7H0_OHM;Z~1uNrhVxr=U!t_*uN{jDUi*F`QF;ClzFBS6EwxUvSDHhvn>%kp5 zY8(jg8Bh8f?BqAlEg%DNaCUJ_Il65~;>T`X`s~Xh!RcvY8_s_*&35vQGRuFObTCLJ z{RiLUzrGG#^S4c=jeZ$c^~D50eoevv$ap0m2Q0F@VHLnP4m1Jg>H&UdU;hJsXW(KzGxGnGzLg>Re2;M#3EFhT{@ya*uX*mIFWv{gBktA{#JzL`QBmfcdLT z0icB#-1Z8BF}YAlbZQ~;=TCL!9ccf*&7A(vYn*{W6qVnggggNL{*EpIIJD+tYc>Hs zf9v2-IPHou>yrP7_#cA3S?a&_r?HuCQvzj}NQ7j7mnDOr`$OI1MMfZSoS7sUM;zes zamN=)_3Z^dkezr)xB>9on_C2F*}s4>R|kX{m;WKm_(vV##_2PX?Eq((C8~JLGxBgH zCg2;SEn6WYt=<_Z5TP5h&JG*ek4l$}E<+ToM)!f4B!2b?ypwg_RD_r8rwKF~1Rlu| z^$;$3Df=KsOhaez1MfA>eu~*)Uh&PG84Zi0WD{=|K9}D6tI7Ppd|EYtPs}^H#!Y z(v$DGgK@}~)IA3gt%rkK6=s1aK*Fmlab*?If(F4II_YB>5renxolJ=Z(eWjWpE>A1 zOA)WGo$$k6a#~P&>w(n(h$sNNn6R;5+}uQhhEQaKtZr4;^RA@-{6SDfz7IejOaRS* zs-F=3?KFkCg*&X@zP;R^vMK)_`ZQEQ_#6|e`aqy)RF1;O4y8~Ki$yfRe(IlJ%^_|0 z|HD%d0u2adAat}Nmz_M?>>y%w9x*3Iy8_UF0XCEKFF~3m{Su-nO{Pc)=>6@YVHWPN zzc+j}QroZOo-NNmvKYM(Y{nBmDN1&;S#MAG3Ppdc7&e;Ky|JN zWsyyXr8@2T8NrqkmUQ^X@3#`Nj>5C7C~D)h6OZY;4AufJ69d24ofO^nvM^?pP1tLL zj0;wg1(6Sp6tPU_*3lf*yB+{e54vlo*K){t!r}^;Y_GjFO|4&Q##Bc&&9yaag z#M}f(fWZRm9B9yR8M|WVTRQKia+2u^E_N_SaCMw8@-Tjqukd;~)&3}@OY&`bVo=n) z3?d4_RL-U0&ucuox#=Pnap@kp>IToPzALdI?3<$ExC1RA88H=ujLCJGq#GBOO|Cw_ zcyMJ|?Gcjx9mZe2$Sww;-mNnjoF7INlo~~Q8nO0Hx$;S z@SDifR9c$774^wFF1CWT0TP+PPmFOaf<#b)MwvhnO`-DdRNOlIXRa252|#-GPA(j= zv4Wr;a}*&M4vV_igIiO>@?5!!%Fo*kYKfdsznUE14l)}0CMNja&){UgKZ#89%7( zeE#Gm*EA=$2yR9S7v3Zr^cw_zLF-5M_3Matag@cnMvp}h1DGbN$X+@?#S=b`uzwrT z`+4l6SK8I9gf00#oVD^sGM%tQsDa+wDf4G^o@l^LG zhQVdtc;Vc$mx%)2L>qY>b}u}4bH*psr*C<29}u_#oN2w8y9O^28?yjkQ$3Y>y|{-Isvw`pu3K6< zwc_$YKUz5Nq|3ypLUu3OS$PopeqggPlAa?mkp#08V$ z?CjZQ#LMZ^Jg+t~)q-C6t1cNyVG$7-r*~SLf0@!y=M$&YR~SCc#239|0v+&-DcKFb zujXZ$b98u-EpVhaye9@Bq!9fJ)@3JcDk}37M&rzB>XvK5FPPfc;$fI` z5e^Q`vz&n>pR8ZHbiZBDfiKD!-w!fcHw?t{HqeAHwC(|!2yy2wwiBAQt^kAd&aYIG z=&DF8TkF8vigZ#&k{m;U*+jl|RZgL$<;1ViLCf{a?M^Niy1Rj=mpkvn8wmR@7nBf4 z)Xjge!)3d9Gn^i`(|w)Bc-e{+CNr<*bnANZyLZyD`^4G!K)V1 z?EZ;pr7uq|PDf|<>=_Bsy_L8dpqqI(BB z*G=Z8&c@fAH@D*!KKS*`y~{^5$vDj#Q6Wt=4}|E_fK}LGi00c# zLWGJ_cs&t&gE~EkPF&Sktm){khC17tds`6&QL zhNkwv3lO-+tr5Z#NHN&dlsox`;Pg^^LSs-Nk6ZU0B)##ncmoYO}Qs; zb_=$sk%Tp6GU&A>^UnE|a+X8g8K>tyTp2ots%y&q3mcXgmj$t#gZx3$HP`n+$erSh zD9|cqfdkN%&WNX3{{}_o(k|ADAN>+vfe=&I?6MgB{^EDYU%s$7?Ej~i23S*o3Pgx= zH_cb`2pGb@90%YWP;uud`!k#YnF|)oSB~`oyMrSz5uOA4Ln>UxWzv=s>R$es{w7=s zY~KNBjsv^Ny3|52MGl<$XJh%6^G1GjXQYFiaE-Kth>;?*8U<6eVj+M6PpBtw9SgW6 z3DbI`9g{+n5%x~j53HPSLv2()$Q+`Gpgz~|1;32C>U%L2)LDtwPW!;hFpvqWgFHe! zSQFbSDo*yVd&@&mR>`e+YHpVbKf0#$`QQ=cQDV4ksK3Lk)BBJ3A^XW@3e}EoV6FJn zREufo$0NCp=XAvj743_2_~OcTgVJI8E%54AOfysSpPG{NU&Sm2^(E!QP8oe9{#3S8 zY{t(K2gq#BsG~iAg~_t5 zM{F7C2c7rJk4AH}`m8tvfK@puWmB=t)S=R}!->DiqZa-OIEKU+}*9^#r*_^A&+V3KMxb*45E2x^~;yf}UM@RfV{@jNE7Llaw zwNfxlIJ3>UbyJRysz z`hU6~R1(Av6B}@YsOq0R-o`qhq8`$LA-`9ce3Ad`fFw}Vxx|1302(u5XSv}qB?qtfZFt~qK5bEDPgX| zSG!l8UB{xaK=N*rfaTpOXS)0mQ&B{106Xm|HGH@W8HG=|rFm}L0Ym=|Q`f&)mHan4 zpZ~o6*ze*0y?|s+;}`JU|?JN-9<#%;8Wa{?}t-~Bz7_h zovF}UIDo?0DViDOjBP8MdrIahq`jX(c4o4ALt8NCC|cByUy`b7-$WJ_TRo%u0ue3w z-pGl3#?Xiwa0x-E$)MF24;(0x8oB1|*Nqa>-{q-*V!L~I~b7i8*Cr176DRd$@hBZ9uUb)55(K;27~3a1+Y*C|J!dSQ`}U0o$zzzNxL zjtVq=qi`u+B19L9627BXvrysKiFhS26=c*2yM$QE@y?|+jbTX28>k!E>{lu3LO9Gq z?|1hRCH4Ec`y-g8j7&11XR&#P(nGWdz7y6aKy!cvhys3g!ah3zUn@{18y5ta)J7|| z%{oUyXjWhrRAka(A1nhAKyW0jMoN^v(TKDakVMhIr*R+K2|bMD*X8^EMa#RhA>LMA zon)H?bVspMMuu>3E4*zR_uc!GVMSjgypr@o)h)lZU%jW>0CH`hlV?5%-?a4+4d56k zKM?HB*wr-l{g68e*_Sfk#wFyNl>=XwF`bg5({b(v&W=9FrME|V-9E2U-0_i?I+V?$ zby2Y|3wah|h))n2+jSir(7xDoUuC9BF`$=RG)}Iq>mBdMThr(!(T;I&BS1$XhP}FU zWIx3<@wUc^+fFq2KSv0C>`S{qA2g^L~O_A7q3-TRXxl;SNhpFbnP^uSV;h3 za2v|Ht$rlF-gd=}O<=@HU(}no1@GP6jLKHGeu;mz9NkcH~?&m~M@wujU&~hxZz>-j){=&(B`&n(j}2RjcM^;p_~kegxgfd zk1cpb@<+Vueeo0j>l(vcF6USygigBK$qZW@=;)cf7;7TfNZY@tRB)sa46}N7_sZI6 zYo|r~z7fJ_)8=Qil|t`J-|bLf8??~Z8{|df1yIyvFNH0ZpH2J{tN9V7Nl z%G*>*!M>oenmoCr+*KyNXigN}yR7uk+F(}Fv!W8?qNZ?_RQmNj;pGu4;U)0=7XmT8 zU3ebGa{H3j_14;f za@}jdG8&kQm2pANG1u8Ty5`x9x=yuAN+tT*<=J$%L&(G0|9gPp3D^TM-zFIZA5dJMfDuP%8cR*GC)kDdzL7+TehEid%?Te#v`RQhcrGw2!8oO9|xkW{I+u5||wrqV; zFgQhmcucMt!wkhj$5<$QlIqK6-MHb6(OY{Q`Ge@I0C^y0Qji)VhS_%*+vs&_F|HY| zwpX3dRK(e*7^aCa)UzE;k=4D)PsS+L9t*}RH}dSZc#YrBD>Z5yBJ@rRSFh1?CYa7K zAzlJ1eOTwswn(t}>p>pHXb zRP{{2Q>b#`G{z?LszXij$FJUIx+(eLS{F6#%$Ghn8%z7p)_9n?N}&&p`oI@duEXyU z1M^iqu*Ruk0&L+)-y42T9rY9;(@RYVTMeS#ee|XT@Pr6WE#Ml^k!J_lmo5J0uYeLw z9!+(#FhZ?3MnumtlPyetgQT{Eg>oLTdA;GrI&(NtkC(`l_)hkXomgn2#8u z(cs)gn_Ym!o$9h>ZxrHxk16VCm2fRw%DZimT=+DYcEb5I?aArP03fXW^J)LNeC2=P z^^3g!?0IA9^A{>_|K=Tui0F=Nx-Ykc!}D_(G03cM>DDbG#~d9mu|JBY-X$~Xn;5c- zR%sK8?tDMr$29AL%HPX6vuo~9v;UDbB5+M!^+37TX~IsnlIZgr1g8OFp#k7UGu?Jh zA>PQlU-yq^-eCaIfu5c*{o9^J|1OQK|6oV#f9ySlZ`(vMWZ0+i*`1Y=XaITe4bY_z z^4aq}jmS!R_6o$MDKkGSjh{|U@fU}_PC|HU(w~A^tzr6q87SE=+IzG+K^|Fi1##=B z-QeoaAu+X(m=v>)7aJQ}I(TW?SYJ%CXFQHv(Rr$vU|8mia}qWQ{`XnvV#bfc zhrMVB(@m>Xk%*S>-`$-<44DZjP63!Yj^|vQ9u$A|*S`dE7syEC@qHQ#cwm73)6XZ; zRE;^^@SZojD!Un95&gcHf9c2c%JSBZOlBmnW35vqDJYhY#@c>|NM+L9k}oDifrw3{ zrfuz-_%4z_m|ZNJ8D*F0nZn2eQgf@yfrDq|{hz0m-HRX2vLzP(gh(T2ntd|5l}<|qI3wo zL+GF&y+|h%0ci;(1X8%ub@n-TueJBxXW#R!d%t~t*FX5_oJnRGbH498#xtJr46EE3 z1EKoZ&mWyw%sY~`PtLclBEC_f*FpPSVUESsAy)v*g{?z)m*dAj98rHghb#c^h|4iD zr1ujZ9QLI3Q}Q+8OXt?tb`6iaclk4DFkS*B5vMIEmFGk%V`W%~CVH=En)JOrMINf% z&=q(C$i~Gdl8k|x2xL^Phl4Yg0#x4I>MI?WW0PYmxn>)k5L2gU?TFfSTEYYaD&ULF zCs$RZu~1HXb@|?Wm^QZNQ*BYry>#>~zuc&tFFrP^K~NDYAnnXZ z6?A~IX5!&7ndbJ@`N_DvheIZGmF(5&TGqEX-<*+&Rs>}x+vZT4cPrR^{92Vgl+IuF zb~!8UbL`yXJdIs8^nwQ1ZY60QiG6=+@!m+mTv1`E{^r*+OJ|q<5 zoPV@?ZLL-UJ6d-7PPDXi1{bzy()vzJq2H6U{u*dGN)c6f%CY`J5SEhn>9O41xg;zN zc0I##rXow{?bCfO$6eHOREgaSRS5Q-lwVu*W97E8tfsh9)0j|^GE>W@lP7HS0^qjy zZ~E}iMze5Vp;T-y7l?$kq;MsSvAb@W-wS;u^g%*r>j30GGOEssPYfnMOKYB$VX~51 zd)-nnbF^5QGVgVt_pUG7z+uDyodTX*#5ym1vuU36_(zRL1={e$D2kj&npYdu?1} zLk_qY*$im64`8R#b9{2ImJb%p4t;+v{)W9tK5#NHg010PJ3f^)x1e6!Y>a#s8s=hi zvcg$eO?x#hn5>f)_@wQkqF1Xsf?i$40mCaXlv>oEF<^21u4P2=x#umd?fe>T3h*-T zIotQvA7RY+Q;8WG3Bw6Vc5%#x5obl5kEZ&9Bo{Wmc~Gxd$pC@40(75akmiRka?Rnx zQoZ84M&Id>g2z)Mw@A#p$CNoV#J<_I^sVAXI>Ry=jZz%6E@X{d7q)%>( zi4{9;Ds$Ff`_%Z2^4Ntz0j|x#=ybJjSnVqGapGmtH8YqM{#bWVraiX;lf&bYi}!TT zDl=cqx(#BGszIg|KnS1lAq2(dP6H+lk{zC|cS?46YMdQHy>@}hn~kyVp+nq~T`3qIlUJ-)wIM77Z8*@pTJx}XNfxhcI-wVTY|WU&uE?# z{2u9LouQWzDXkRqqWIi9a{mjqQf6ysKkugLu?j6*Pq3=mi~|hfraBzcYHMc}Ey@0E zf}_B#=lVRv33-TkwUI9|_yaB_o<1Sbb)vNCxYlMQ&uw>u8%g`tGbmvG3pPhKyD<3x zlWB8#BH!!sq6yW_Gqw(jTodlD&(`__)D-6!S}kN=sPmWO1$SOlhgn*QzloAKa@l7- z&g~p4NJ~rY$8-}S)C9p>S__?%Ip%XhGqFYJY`q@;=~tFtW4jkWh%lF2*aq#Y0kdL8 z9m!=Gf3Mbx4_8}pp0!4Pp#e~lFS!;6-ktQ3S>v2@ww{HyzC*GskFQI4=Ei+mCVcMR zt9UhkUzsb`FaS}_VLFatM%W@l>Y4fg00k>NAhok*t7%%5g6FayzTqCDHIt>O)F@m8 zh^K*6QO$_*@1b5#6R2l#^JB1c8nR>E(h7@Jk+HJRr(IvHNvuY<=lQiq=u;U#Y ztI~;{9$G$XuY`dP>y5W*7y@OX@~}sgH)Q@<1nZ=u)sX>_zMXNMsk_#th60M>TCj|W z2bZ6Obw;qu#z1A^pTWmn$g=puKBv~T_*`BoX1<;jt%=kV4e;eUYAx&~?y(EBk88#! z&WR%%Cn5Tn`T?84;7^h*xg6oVx5M})T$Q?=f`$Nz#A#alDW-myo`;iMQbNgzuPlit zBz0cJsy_Gxz!DEb1h~{*2k1ZBT?3xF$RYqKtZV`phGzaV)G7uis)=8#YmX#5oUn8# zFY!<5?=5F{IBqj`!`;vwbW1}*Y36=_*BO$#B_&HPg%!CvIcW9%XaDIqWPg1o(EyKb z0&}1H=**ciVpwK5q98lhr76Kbh>Y(|QDjMe#2ohS9+4Y|vh4fRXM<})*+;#+?W7|b z=+k3sc8k74byG*JQNVN&(x_06V_0LgP7Uq8KUm-y9(akp!0H@Z5C4#b;444?uQ}Io z?8(cFi50%qFXm=uZ8k{`ESwx(Vh5m;gDF6*s&^<*Hi%|5v#hIlK~_`GWXJtnKpRlT z5n>UhKkkrBD%SG-vo6mFa&=F>b(SuZnVy-sp!23JbI?vgC{<`^ak|*_1(l5$qFWo+ z!bLF83?0T6Il;K}6R@UEnoJ88V&&gK(XN-~g!!tq*IJ+KvbCdq^k)N5DHaxms|=+s ze6++4$yLN5;uau1SbC&Z8|#EBHLv0DdQh(O(JEU6!NkIvcKEoC@JJ%efRa)VCU!QP z_Ac!UX1vY&C_QX6s+G{vmHOK35S@w1C6kFu41uija~L3@+Jp%Q6csG9Az|C;fUYc; z<~>Tr)U79?F*mdy-&T2aCfbf?F(2gr8K_35`4j&b4u<}O1nY|Eq+jO?A?)V(o$0Y_p5OWx$!nIR&C=R zlGw;{SzQbFEo~1ck*~}-fXu=>nQsu)uO>r4;i>QPEx!?Eb>2VW){oDq^;|H4J-*wx{F<)9`jUj;WzK%; z=Om84(UmE&c%3Nku>KQ2tm)=~+XFOF1#uy(J9vhc#G=UCUrQ|nsGELy47BwfCAFv< zGkzmW+QF)dpL_sk9!H1-Cpy@^GgU2NckgPixgZwz!ztuK%LCju@#ksMwz~)ws=tZ= zG3oX4tvhFGPykP=^jP{B%xpx_f3!CA&VF#9Dm68XRI}EQvn?V%rRUjUU*41AZp4u*1m0l!_cHeBr^gZEE-fRwK4EMj@=5CDluu;K zjAi)Ycg;fXr`GlP2rqkF7+He+=^-?ExTI#N}2xy_+YA7CzzP&KB2cNzT_5HF=epP?kfg&n3? z@M=@>`8N`VmHV|)Jd)4qv$@E)EpY}Fis4vf+nkmR#v~e2&f>e@reDT7fh%+!RMDrm zM3MG3w^%oLFV84zqnYjADT39tRnH%L`JAE*$@vCBiauhENWtnm1BYYZWra)M2+{nbDPIF=iU%W%ZGFmpBT1Qy2fQw zRoG|k)AIRCZU1qbvwm4eQ`s7YV`9P<*2#?c^6rTx4~Oajb7Wk{tIl&g+bxg#=C%tk z4bT!~gcpgJiNA5|!?I(;=kJ_b^PtVFfY^@*plA4~RH4`RUGLswo;sEPXsrx`oosq} z^bWY}^}wp%$ZA!Zeg^UAqG{#rOygQRt!WoR# zwEKF$s5SQfxFRc6mQul|CGPRkY*wOk!k6y;>P4S!71Jg8wfKxrcIqeudylhwBu7!@ zm*Vq{hkeW4_B1by( zrd`>}7MBtIfeK+?84;rID>gtj;zD9m;7m5UeT~3jpy5&}hPJ zbho~K8Lq;9p3+Z7A?1`*^=7_0T?w#t>Qu6Rg&5HPm`POyI$Qd%w#52Kv?W1by=Ua& z2G`wyOu%ukhJoG8%K2M$UNKx!24k$VO{#0WyoK zK;y-F8%jpK@bWNRP@T5uz0IJ^eB^q}vkO;V=%gWSpE5lkd7?WLK^}0Lk%{D^txI|L zX+;74pg9~WCn}U&s_n4sdf@GsUhs~AVsT8irGdZzYdF8SIJFRE$Y&m3S!vk12zj^`%JT{bx z`bz_zBh~~bellhdY;r8 zjCmKgX4Q!=!7`A~z2o*(WY#x zKMkI#rCKpal|O6x2YdS=O3K}}*%@A&M<+!!1$EV2FA1d3!F>@;Xks0WGniY*Edd> zvR5fqER|8{%wH09HtJHR^S6F`4ZpBp0D;0 ztI>;nD;779DiPNC%JOVs=BrFY^@N?li1q|Wc~Q|Vi>BSX`G#owlaiQ6-6y+iUJW=9z(nHNQ)NJ{oqfc&qhpd>q=ILak(1^O#wdN%rrewqIpa+lPof9 zl0nN?Y$=ld`MhX|gIj{sh*En79V{R3LGT%YTGZero{MJ~qU3E73%mXWWA7$gjcU#lz0uS_ltVh@)2fhGl zCzbz$X{Rlh2t}|B5jyL@*tWt5q=GR)b~|WEN~m*~z15l8_Wki(djK}P*VjYsu?$%| z;d_qCwxS^*-&pYyF=LblAuh=?tU6c|5W(h=E6nm!`DYl0Zu8uM*JPbgulx{p8W5la zI?4R^{G(|D+GBJ2K@iaEj=9e2&Fqvj%OnikSoW zJ$XEr3(H)l&hLGD%22o~&l34D+>6^(bT@N7o61I&10~%IE2OKc;f>2d+w7A@wEv}z zCI8k?uj**dMco;YN)kxdCW>dw;f;v!xvq8)&0ri<&1a-7)~Wa%6aAS7y!*U5>xj{9 zaOmR4`b+rqmD}ZSGm0#Ddb*l}&Hyg!q+kd0K`4rZ+pXXXtN`y)C%1?9Z=N)SGv^tx+LovkSge@zi>v_pqH3?(J!dYWvyNreZ9+Y0Ls?@o<>0J(=nq{k$LKm-^`vdAQwv%9e}L&{3VypUj=<} zXB7@3Ce?*tccbb%`j(WtP?eYNid*lpKC-4vUmef986dRvPXZJSUob$YWedp5-DzC7 zk9)tAfz@Vnm(9bIWm7a7SXaw$g5y`EX%45jLDiP)Pg4r0XDtfSQ~h(sZzey9(z(sV zbAA6~th2WeHmTNQb}K&wSmozhj=x=-9fpm5c!GARyC}E*4cJUG&MUarV9X<|dEylT*c;*0GveaU#Y|hb~(uGqV~3 zY*#gy%_wAzra=0G+X=@$$+!cMY8wFOrH-ts!7ZpGRQlRpxCoNxDjSL6l}Kc5gSv6m z%fnAXdgG1VR^pbNRty5cr3^1sJ+SZ8Ake|I0rFKF_NyL(6XD=7UQ;2@8!Zge5yeq= z79;LQvwk{_W+3L{HyR-5aU!bXPZ%d1YC($Cf*c1Yw_4~L9+O5)Q65u>gI#nacbBm%EJQL8{dzQtfn z(*BW%VW+BGp7|gqNXz$Od z$&Ll63lTq)%=NH!Pga^xENk~`Q@(Z?{W!5O$DZB%(J#f(4CLE_vSsMsy;M7&-kHd=5wwE_5g~a?^_;}24~|oc_sm8uJEU)XvFLoA{-I+o zj#PEMX#GMG#WQY}J0Pvt@;a2=!NGCPy=%nvh3IJbOfSQNcPl0ce7u(K;~24~egC84 zu5x5YImke&W%OH5Vyq@@K;=vl=^a4}K*}xvPZE%Q+!utUlMG=_ed=OU%T#{cyLXY2 zy50$TAC87*J}7;BEU1Y4(nIx!?E)Z!0aUS2(I-gLJ4)=49aYiB-YMkQmx2PV+tEkvxtmZ0_ntbTbSSa!S;P zbv-i%tp*|I$uhX%$Y--^>iyKQQRk24mG4Df>*Sg8-wX5@7xd5&XkW_*xSGwt+g>;U z+1LNs$!wY&%SsBpcK|xQ!cQ0mPV)f3B)&ZXQ%+UpqrPoWbr~ov1&G(4G=P-iNe)!0 z5R#HUZ6&ry^$oq7Nri+_BG4UZWD5H*oQwRhYD8{r7&kQu)c-G69KFQyh`ms-grkaQ zdkl!Y5#)=MR2U7OL8~HZ&ZxZK%QdG$!PGtPoLGC;!zt}IFBa)^h5cuaf&9wgO2qq` z;5EVf)u!iJxlV^utNbefim(3$9!UttxjZLByd1D!Z%!NIkF~f6mOhP&`^ciD`uVM{ zG2`GFl{YiiyrT2qQAk4~^&Ig{id40hGo+bRApW+B)k9d4{n5_jxRcxCJ%$=W(*aPX+@2J}0X((wSBP1nst>W@D^HoAnR)fXLz|$TXHZ0p67Mfl@KjkZp6I8T6Ow z&8sj7m9j{U*V6!B9huLFIRHrjfGOb;V9I6dkrhk$bqDa~Q&wOZdf7uwPy@kJd{!E! zgoxqTP9(EW60Q8l`AesteijJdeH2;MEbV^;z)2Z7_ZDZTzb($53iS{l338Dt%k^~b zMZYCDrP6eCz_eq2hzSrlwuwINDT zjk}Rvesxw&pr^tZBag@5s2tC(#ZZroqVuVTr~^PWXJo)YtR9mTd~)s7nEl0*Ri~LQ zt|^_yYHieB^@uo5vkT>hT_)Onv`@U~lWR6$)jE40r&`oyd(S=h$hrO#OB^E$?F}{D z!}Ur_$I>+VG?IcW4-4#>;GaYXt3Rio*j4`?0I~jS+Qy&p|AXOwwOqk@I0Q`=L6pLT zPP)X1CEgs=&iF>qntuE0JH{Y)i$oZ~ZAone*P8Op>k16BSEOALe+5_hNY0{N8Iuen zx}jx4YZ=p={3`0cCc_^bfHJZaboZ3Yo|ba_vqjP{S&|hny)+|sHr#;eMcm(MW@C@T z6GCDbM=X6p$723rssW#DNLIGW!vWn8BmiIEv=OZO^TeEgESG?4nMl`RCr`8xAfqY5 zB~kk$K5?C|A3eD@L($Urnpw$_$C~I}TX)uSaGH_ z`mi=qPgEsrH7DKSI&mY#qE;iYGuqYNfcH@KkL51BTkAtXdr2up`BoESN2M0vZQ1PB z0AjX3Q_$xBK=0cH^jNzQ*A(4=XW1pc$;)?l`#ag`@98vlVy0s+y3( za)PV*y!U4M5rbJR`xA;lrsw~mntRTeydPI@mYu8pDscqzmMm#JX{{C0&3RdDpT^34 zO}Z{;F$PQx4BAJ1w?EV!npxHoa;s`DSQnVEPk2i#lUL9gB4YMMsZCi8LQSaO^W*-g z&cpfMD|cBziF>5bVh;UIBy&ro@H-x0Y)cU<@j{7Q5X`ef2Oz~%mhvzx7-$VAh9z5m zKo3)=fufl^60;*seIsprGE@dYC}In5=|iAUHULM1y-swl4yZBdH`B- z={o@Zwj)4|J!}g@w3z{xXOBS0{&B$@Y3V#qRx6@e=zN zll4VVJ1-DNAd`X=_w*zx)kgjWb2YWhA0RijNBSRFEw5D@J84m$~5;<$a0ni0{^9k zyF~`n(;{@LABdQr5@^XO0V+o&PmNghZs>5CN9H%Gsa63|VT^qK-}fahdpL7WNF7c; zJDs+}35ZR$jNa=>b3Dac6&eMec=}))Aj6q{$d%0UQab#IZNgM0|E_=WbXNqNAgx`w z4rxlqwWWX8^4v4%O3v^@yAf|OSo{&gytffrt7tRptU7itGPdBx#phoXcNi)+b{Y%9 z08;<7NxzGy#j@PW_UJh~+yekir7v;%G0p9Bd!&URWY*&CcIZJOKq2zNjH*$TM>-SB zP0hUq%T0^kWV|ddPH88zT2BS2p|R7E)Lu~>6)mx$-RU9;u2m5QL)TL%%5&U#JD{n#FW4X!84THi~nN;&Es?2T(p0#}h1OUnZIY*442gP#$vO_SD zEmG7h3!pl~ACOio1yb=1Vf-ep((@rt}eerG)l9of+T@y zAB#3+Na!Q{4O2D;?p~`St?81w^E$=OJxN#A$0N3oSD~JU8UoYdK<(a!fW5cSA3Ggb zbpf3Ypj=MSLP}m@tR8lQQG(iub z^8^keIR|C1Pf^Ex-FT^bnO`k_^X0=A3|hB&Eg28fn}8w$RgP-`6aX*MG4tB)VXKQi zX<}<;=MG0OJT9!%FsSc?`t@lDnfl>6EXgOKFR$Jnt+)ZyJG#W7b{uoRKeK4JtQyB4 z&FA3sWICXO=LL`43B(h`V}#?!GsqHuROhB%GJwR#J@qD8b8gFXfK0za_#t+ZG7)@} z!5FR*O1PKP6X#DrS03%;=iw6MvTz5>uPf2oH$1k>IB4ck^z|o3VZyl56rqY*YJbY? z!n$4cg=gZOpd1Ak7@aZghLF3@SKmnvKuQ(NiLe1PlP?c1;uS)(>VvYT6iD%GjH&UY z`i2hU@!aDrC%P}{s_SyED#0HeLU6Yf?Mgmsp@O;?R-kEjZZUi=p?0Lu7#zAsgc?-F z%lm~yxvV+#>Wb7p+q-r@>u!fF-JL)m@lZo!I19Xs8^u)X#2UU{dsUDa$oufPcoEV& zsmA?Yhg($LG5=?ppl7D7=oeOk#JUfcKRLPUIjJQ_O63o#=(Tm7ZwNkoQGtG8?vg-+ zX;%_rRVRV(ec3*-qk8;C?5Z-H@knD)&6=tmPdDT!sVNME--tv2urAh>Xf@D`7{iQ$ zZR+%TR60mnYcZdNzSckT>3N&HSaH*Fqj5SIip#DA1H{5iLLvdZeJF2ic zEumlM4d?vv`W6H492(?fF{|)upCN3ub*M^Z&h=^M34+grd;UIK<6$=_pChVpNl|)V zCS1WHZd3eg#-ibprP!_MTS3cEgrNrb@Di$!^3gP=b5@3c-BEk)+B-D z5?yJaK8BJG-(iY)7Bk1Q5@!hz6KD`k#W)4jOiu;E!T)6j#2#qrKz)psnV9;wmJYUc zEI>P&`5MTmv$OiSidI&lZaDqW{7`)>LDfSnUncSU9wprRX-`vq>4$)eUXx6^c!e16 z)RlzK-VgkE!|an?zaVa1o(F1%*gDW*s#i&D4yN2_NU*BDRD7kVb!VtD6Esu09Fg_{3OL@Dv3V*$%VM; zY4HZ3d^RtkdDF`Ni@9e^K!N8kF9CTzI+6sj)XLsGF-RJ`TMa`wRxo4-TM4k*xW!Q^ z{$9}c-Z*^{gzO)4^mRXMvok6(b5iKS%7W*v&&sxzl4z2A!YP(0TFEmV!kb6b8dL54 zfoQzxpD^-&#+sdj3Q^g8GG{HRNDu5t*|nmuJ8^657X@n0@Qg*K1lNkeyD(g@=`2=` zGk}<~5yVNKstj{{QN*(~7fEW?Oe@yE`Z}R#cZU%-gmRpWPq=Tn7M-?KS2_0 zUBVFnX8}knJiiU#1J#frAPWG&uo6#FVu6kbxC2n2KS>h?OvB7y@h(&Zz?44=r7CLw z%{Ba}0lq#RLOxFYjOZaE8r3Nu>H%0H*p^rU9FPDdasm}T{ONzZfE~*NP|rp)^$6k- zW|0h;LBac=G>B$dm<4b+k_`E1;sGcUfF*u^_Wc6jc6Wc?6{HxtMFs8?V~-BNVBgkD z0f(a`Qt9*#Ky6EZdwPl~VvF?irof{zfCX8J64Rxo4bVRt(DQGufSQZhqX4(Xx|#@> zFNAzzV@z^J=5NjSx0hf%O{M}j7DCXYBATg;yKDc<6ZHJ|2YB_@^ZV=9=hyT5HU9p8 z8h-{M)0>_ZU;_6rWSc38!i1K?Zd$N62RaoXl$@=PCJjI*(S(Jw9FA zwYLOcBu$>C{Q*7UQ`4+|Y@j=PHuCy0g>QDpWCQL-b<~zr6bJg}&SSW!->U5^shvc( z5Bj+iRpSD?mt%Nq9>0_o4O+iNGJWF!q4z>?If8NVFn7FSoGRu*x!;Q|!p_m|_D9zt zl>swun@RiJ%@xqYWJZS3QbFyj15pYklpI=FB;li-^mM2kOZ%(xB9eslH7S_^)<)03Ja)v)6VU*_3FD@@wULuz#-&5hG7S zY{WHq;pIcHr{!p7xTcG_l2iRlS_ZEB?LEywb!+Fp!8}6IBX{<@KOBJ8_aw=Ex!-`0 z!;LHr-jW4!Z_{54i1$!Tm?hgHfJ%v?ID$6gDRJsTNKCfl;_^9KK&XfBHT*N6bjU!) z{Aj0+Uk286$w36x9R{rOX-Vx=5`bD59)OtQur3tlVaP6!@i2d$0^})~({R&Ngcp=V z0aD_;fJ+o?NOS?#kt8&Q8gc-N7y(3v=ZS!eaaA5k**yRar&GBlnx^-8^AH;tY8mJm zl6(=_jMyU&AvU%lWLqTB1`HI4Ztdj&m%W4_e)aVxOB=kzZ2xv22#O=*_ZM}G;SEqu zdYY^VI5A&M0fmgKz@-Ch`d^pv%NoDljlUkgzpU}g8oy@IUu#O$nl<4W(GI(pcK&OY z5-5{msUe>MZ8w!4wi>$aD4SSD2c5Yeek-v#nY{@7I*<8#j1 zn2%=-ZgYOHj{9M-&|mex?HL$;hv2?Ijsf_^mNDS=04WcA5(N7i&JO?=-*XQ@h%^Ae zkzbz1RjC7*PVZNs%)}oMvd#i<@XxpkYOoWSwDFUME(CnQoc`0zk^hUE17IuKSPYr! zh}qo-Q)Pe36a3CILjLr&5Wjg_K$EWhL7HDzV*bk}zuuFdA;KTW4#39;#*X)IQ3U{x z=3iI8K$eF5C2Rq09)SKB5P<(?Q14v8Bd|f9cm))(>qDMID>S&+f11M~h_aG3vR9bO zD*7McK=ogul74_ijBPvsC9d5?jN3zqt$r{eb(~aRYsIVWDqZTryc9X5nn_>pCWg1D zI(P;#AYls35W1esn7*6%oUB=Zf7w~#-2<;3SO((SNA?QKr1YtA#! zDYGB$PUU#Yj7`**>;65O3qS$PmL((v;k0I;Tg#{2YQWrSqkLbrJgl#p&}q@_v2&6d zSpO+c&97MJ=S~DP@vkY$XQT1+ ztfWD?S(fFMN%+-)E<-ZIG%0gr#VQEMTo{VEoG870M*E8K>j*%IUsKP)+s)cTaN%R{ znY8;I%~>LK?X%iJ>O=g*5Iy8>hpZbPvOb;IUb*o@X{{&d7Q+yg4X=p_-R`^8WU2Ww z<-%y4Hc4uHbIsBchDdcvOc5GPAAH<>9k=AGER=)DO#5Ds?)TDDb8HgSYOT;b-BX?N z!~cjXAJAaw&!x5gnH1T7bS&C~-hZ*w6_7S!eGPF02D<+2G=oI=c9!=KxDfLu9at_Q z=q=w4{|aFnX{qN0(tTHNCn_hr3ec?Fk}7b0;n(IGGVYR%u6WOW&3^vY zmxjP29l|?@O&2O!QFKm_5}>2e3Y5#o@VW!{?NWmF`*74wIdUyfWBpcyKu`c{udier z&LE-`Ddgi0w7Zx6ex^+w6qhpP?REFldw z10-3y(I*RY3pFT*aMZO1qwb4qsA|cjKVN$_cy^XaN36Ok=3y|0jA&6)PS$6!lmHfe z_)|+kJwg<5fCXh;Cs#OSu1LlezF1fg-nRQJ3TgRLAHR$$MYZ1EBidl!GRNJx<0!)| z5w|&Bpd9N873!RP=?OmKX@nbSiWM(@IkFRw@8>qy%tK1RqaH7O^Rj*(oioUY@^Bz< z`5M10f1BA-Xg+o=x|wfVXO6~LyAX7=!pdVthWnW9H;2Rx>ia{ucB*;m1{JrlGBRAp z_p?d--zV<;H`y=n2(ft)xxYe@>|N;rmbRGJh!NS0k^P_mkQszjVPZAbgIIdPJs%vkI8y0pfl-^4sbHe>$6{Q|ZDJJuqT)4J?tR2c?jZ{*=mmJg^m!u|6@e<% zAM~A$lzFF(!#m?NOY$y&YQC1xd_32r?P$c5)Bw**>_ba~OQQ3yG z5JZk+F_`e^xi&Raen*OfrTwce=`2|eTBn|I05Y4S#vCn#*kFQb(S+yw-4J5fzCsvi zZ-C0{jrHDj0rbxfKt(@+QY)e*rE`OP1WL<+kSDQ{tNxu&A%O=0xlmA|0$FXi?wGWr_WpcfM?ngV%XNWyT zeZvT68bHh9q>M)&x!j zEnSEi3A)!qzPQ^oy$(gp0MUvy^jZL@VOf(7tb%Bkfk{ypcBzLE1i8Q7kl)tlpxLVU z-Osdtw*|chFs^n9X zEBgEJuepwHD8d*w)lkY~7f~#p;=Noik#I@?sqhT~F^;*gXF4i(i=m}`#=9NN4Bu*k zOK-&}U#pO9Mv-hA(;?wNom6{!Pw*-Ps}3M?rFqfVjM($(LV>f>7Y-rDZab~*YyHnk z_Ec6z9CVEhx7(U8_-UxwzM|*(f)&;f`Jqew>ROG1+DpIU!uz~80T!SY{~U63w?XKF zuC$bMaCnx2PG!lhEI|C*M{Boj7&3)M4e!peuC92-Kv~x?OdCh5&h>%}XzniLPxrEw zraZ6iWtj3brJZ37Rp%i_X82<+qfTfspqx$}?v0}?^av9kM8m6S>r)R^LWIalxSkfv zbIW5C%*HLS$PZPvaM+#!)*wUVJs{ zz#>|CV>=RGZispfxIK&=qT3I{XbwN}{O~*PSKtbGHPTfmX8i!v4kklB`Kr05hzC*h zJA|?jZ>VL%4!}GzF!LrkHUOM8L;VmS)Ax5oU`)GFY(T@INKY?+r3==s)5=jXN^7h6 z5o=v$kH}r?ovGZMz42TGXi@piSNKW8({{Czu=&v6np65I z29OPNR^ZaH`ZpiWTG-u*a19MM*)v^I2HGw7y@wUy_7Nq-8iR*E?n{Unod$R%I(z9K z#l!!}XE-F#Azhc~g8RPyz0F0&WXQwDx)d8f0o#n4>%0+-sH+$bP@4+dWPOfdGn%z) zu7g;<&;3A4%BZgqZ zF-%VQ^@=K9U2cACJbeC@ftJx;L{Oth&C%1en?bsRwKgO<*wxBDugavko5@HeZV{EN zi&p@RG#3G?l2*InK0~V0zIN#LJAaR>RTjA_pC+O(S)y6@eS|kz=}YTR!=!N=tp=kTejo`<q*Z_KI-Yb6RWhk{Y3n`= zRzR=v$W_2I)oy8lv)xCs9nO!Z##UoOq_5?i7Z0sBdtgnzcB{vLJBD)huoM-gTsH$L>O`yl`WiaxH^zrx;fU5cz z{;iM=q4WH-bKGle*4RN*a|(UDjJ(xti=yD*iS3hqszKB}qY(*_LiNM&?&^>*8Eaw) z{)!UcuqjEi?tR;h#3$|E@6^{}#m|mDgJ&*~8VY1*zztu$@AMbci>|Dc&#o~o7u~Fi zwQTq3HmkCnJIQMD`3Xm$|5=?W3Hy(?3tr@&;Sr7kb2GXLaRKp8 z^;9|~4nQBFQp4Rr`IAe05#EH5OW1*uv>%!KzI>qlwy>W59TokDZBkc0|AZea)du(3 zrb_=y6T*Ko)%{;PCT+xDwQ7Gh{~wUZ-_DR|l`6<%Jg-&ib%vSuhkzH_~{xNUBmB8mn9~}pH0fz&EK4mWxlWY7A8qNo)C*@dWP4ppUmx^ zw&&WFs(Wa=*!BL+`%Ul0J=1yvgLv{$6uUf%&7gp&)#Pks7C)Lq)oH1}6Dv@f@(5FH zssxeUlx<^`-{_@wBiL*2uRO!Ewrc7LPMJNE5%e$YdWFHUYoyu@DkEEZLXs|@@t;*w zRIK`;(9hUH3)RBB$BQzt=)2J!z0mx7YVm>Q3J*thRWF5w+&C4s10`(%DJmhfC6EICx#y+7KBCwgZFBt>;JdqyV|(pOPJOgNSBRm;yjVQ!}NKn*paPcLag`ZBH8XIsoeE zV2D5h`VTumn=(Yl4ZsSEKN)Xt5X2oYZfN=#X0hjI()v7Lg&#Z9L}OqNUBcxM*8Rh4j^2;cHXZZXw$}gk*oM?Z|DZeJ#|2I~j&S6zrT)S?u zUnZ{nwZ#XSQIWkj+hFglR4uxY*7vgQ<^qZ8f@QVkFDKe;s;}uc_^-XQ+*&da3QC?6 zNei4B>ld#m30CnjdJ!dlBup7=tO4>nS|Z>gnP-@7=MJLHmiTl?X@kuNrHOkVFu3zs zP*^&POPEhvqWFw3JH3Gzi}Ju4!qFg@%VgQf#U>}uErMqLr}I#qMDU@4eXI!G9C)ro zhwL7qn=5Z(EngNz*F0~S@m$gWTbG1)9{NY$0rCeQWoQM1`{}jxqCX*K#Gqt-@b5Qi ze+~R!9@a0n^Uw6ffq-Oh`)7ax{kH(+j~>?V0v3R+1}?K}HaY-FR|A~fayA71M-0Ld z6>OKM980-xuq@?UL^Q<#$~#?XAc;J44j95((~MN!3MjJ!=4CbO{At>+bCfJNH)P6h z=>RlU0pKk1xh^Cfvgn)Vr}EG7QYm?hfNqagL5@KXg6R{sR`wxe66vxjQe2=;Ll&*- zd5irv0@>f2#*T4?id zIeY!W&K0L-G{zSEGVy-6kIcD}DgHn=fMt6vzQGYyY@BC6KrsOXpWNne zdis{}qD8`T=Y2UC&}79`r3~b+rg_=f%h}IGa56btl|AcGtO9dHW*Q*0>lc<1a5V78 z@EBoAPp>1^=hW?3ZINITqekY;K-RQuK*}JrUJNEUqR#gNfh$%&p?B_rd7Ky_Cid&m zorkH5T`vFxTESIm0lrZO6D5{?g5B(AJn8Het_gN3>%o54PPy@jde%oRbcb5YM+^ZV z{{VS|m+E3cv+d1|1SjJ+{8`6g(KX2eSqGqmi;E6cVK*5J1GL8nO?hX){0@lYsu!GQ z)y}Q4N5);dY0jIt(KdZEb@w)_@9GkdX&B6d=mZ9uP;?LsY4esJWR^VGMMcy?v}uuq zfVCt-^=aC6j14Mu)2FBLE7sr2avu|Nr8NN}SDyJQc`^IOW0wzcou-3jdzgG6Cv;SW z5L$$n!mP{b`&Zm#I3iN%oOl$mlKkd1&Akq8JXS{C&?h3)&0P33=9B~N@lGv*wIyfZI|ub z`NwGy5m%ZEzAt9YWRcw((R6=l&pV1+1xo2hw#Or8#JjFh6eFndgkvy6{?gm$!gr$s z)j!k^glQ&;#jCGmJ=hkNqY?jmSN6~BgI|LWfY}k)`(*9Pz&5C?Zf}K|`OJdlYwEt? z_@Y~Ywz0ANe%#zRgu_w&_>kNNy&heCkvC)TlvBtfod}eq`n;+0myhv#aDr?r+!&@y zc@OizRWzr!Oo`MCWRyAdPdn!o*S?!)zTDgr1U}OvM>7cj0tHUUL=3DK8WbP>M2vGT zc|^{3XWRti#o7>4hquu~)#Om3H%Wo4K@7##KzWEyN%|G#N!oW?TL#2(`jl?FMpp_% zM99N)0qw!#&DgKXtXy*JPHY;XPYJ7EPkc$I8!S%SE2`nwVr+lhOHcV{4Dly^2Yk@~ zFPK#LsXS2y;U$VTt3&!8^}-BoE1vdzNr}BiYyh-X^3=4gLWE;1Z8&K5yywt?)9m$h zj%%|Ube28OK3&n}PMF(`k+4iOWDcy)4-Cm3bCn#b7laRt(U%uF)GKNB%S3V9%h1mY z8Qif)jz~_sE<>M2A8DND+l!lpu2#%I`Ae%)0wGwzp+}wE6&lX-))b;UNMh*del*ag z)nr6rH(J}v;j963>P20CdJ~2;=8H%6@B9z;-aD?THCrDKqM%YD0-{oa0@4JOs+0sp zKtK$jg3_WQT|`AXNkjyc7L}$TgkGdX>AhDG>Ai;DNhl$Z;%_^5&fM?JnLBgN+`03+ z_jB(bCirIWWbb#sWvypD>sfuf)5u9@44_3JEnL=J)g}Et&_&-mHcOFz`06VWwV}80 z#9T@px-jKzrt`FuVAeAwMP@TSz2;HWV zrr3_XrpqI`7_$}Y9qU-~lJqX6t5`eKPn@dF#Tx$`6gqy3R0_M?6m#&Z4-1}OyP^1k zO%diu6M8nKW1|Th%<=Kr3AL;Z-96H>QDsT5bBdh#*wyYfAy?a{cV6#>BMj-i@oMxz zB3t!~hr;c-7|O^;fs?b%jP#+z=Dzm=ksFcw?&%&fI&uBr42O>9KG@BwzZ0j}4x;w_ z-8hBGqV<9Q9Cy%|DQBvh)$XvFSzq>wA}Rh^2%X`TFvWn0#=SZ58fK3Ph&i2SpAXOe zeu2gD#LbS1cTUH54Ei=nImFGW<_5ir@vm7MC}q1urBh-d)pd)6wld{oTt_NL2x%UB5dR8kPT`7q#MXU5_{2yhZeO}eMnV6m`Dh| z(a#BHOW4iHiOrCT7kT4F>r-P8R6zP_avc;~t_7C#-Oc7I4ti6ma#}og;YRVdr{}26lq2&i?#u~CcuxFAu5DzeV{!}dHSGOcN zRvqmqj~&y3y~%~tOMXa4Ycg)aXbvk1HX~`~*gc#2&EvU#r=v@6==1s294KViW7j>_JLA*O_DG*jK#8=-hnZT%Gj{{-7c^ zV}Q2mX6nN+Jkp?;VkmaSaDiJLgp}yxGC23&fTquo{WZv<U4=e^#3o`DjAbPxkGQx6Jg!$g-uQ`g@aP4;9Vf>fr&&)~O@j?5pH0 z?luqfRVPcb5aWz?z#EDiv`*fBs(t_PiZ%@xa%KihP?{spiLc%ETa zi_@9I%~KT`bor->iL#0@v>St)d636(SS zw=FCSO^Q;Sk-bpMOvs8bV6Ka2F0UYS5`*SzPo`u)ld|u+|8~#mDXK#$?b5fW0?aoC zO~za|l~&az3kQ5oK!T(=mC9Jw?lVg{pE^8E-#215$h3{qC96zxPGbW1Y;Y>;6|bRf zZr;0{cpc~(0K^d__Iv$Q$m)o#c7U*^eIXDWpQJsK*LxZ5j+VH8`VCUn5JM?kNAIH= z&}GOs@T->Y6OW@NtUt!5>D|94QaLE*+?1#4ohn^A8RxAdSu%Fp90+Vx&&CB39-iF5 z4py5uV)r?k$`$E{CC}mV;72EsN?Jfk`fC&cV8_4oi*sO(4(N!?Qnk!z+dYC#b>nVY zM3)1>)g~QgpvwEgO+YgKm-XS7_fsGsl|oI^6Z;x^V$my?k|P|<8~(i$_N;kKdu$Ik zEhfm`=mt4z5wg!|FXQw+a{@hc`V2Io(>+BF)c);41{=u z<8Me)s@baWy(3pW0k7*KU4OTeIjHN#=1sbssq$(~-UlH*{c?{jR+t$u(GE)z~eG8%m;**a&gw?A;XlnYE*7+8!1 zEP2!fkRI&F#!S<2$kVD+&C$9TO@Lp-&{Yg$f%TY@(<$}fPNiRq6%!_KHj0A8SoQ#0+PgC|DQy=pE zj4>Y|R(dUV(G_LUp{P`VbQy&ubWZRxTPQ=Q`@(;Kf|BVcg{Dnti~#IYBMu{fqUj|) zhe^TkNl*oJr!x5e4*Flw1sT#Xdq{}!mk*D!R}-(t#&`8TphPvIGEL?>R*^(rigQE% zIH)Ie%r*%{TDHgM&R8;RZ@iga`Ucho?K1sUmkHo%efne3IWdu|))$1{Xd*?@=v@;$ zP8C-kk!Z4}NR}UElASf<#oVaD#Y!&94EaqdQ2_?|k)&>1DgZ+SI;9N3B=oVP zS5YCloSn?VTM29svK=NAvBmBORHXsblBWQmE8rEm?*~YI0Y*3sr%O~D0U#SN6aMe8 z|DGKZ_-_vj{pSx09-5vO$3th(q06J|=;WCR_v`SwCO8q-3!si019YC~=2mpyVmq*L z_KX6OOS(`gdN+@%+2Rg>$#$rn#m=9=^#aLA8#5mDq}=K$UbY9~^tCsiVC%SiKsf4+ zJ)$rzf8$kQ#*SA+A^po#wE?hReiI=dpy+x8&-Sm0hMM6<3eSsCFYAcYB-6X9@8h_> ze;q$XNJ+94=?WQeWk#j1$5i3m4kZR?cNfbtw}dsj*yKK`Z95l1qnKHX0gOwW5wGG z(p2>@O8nGY{0Cj-quD8Ufhydc%#~!u#);wn>8rgEn(Tqy@xZ z4=ZazWkoK0UjT|qLl^E8@j8x{ z>T}mW|3Ilk$wK>BHDGfrcQUJ}Zj650;Z$4{7I{yJd1$C3AIz$IXVl+7CA>Qsr$Mm6 z3>cRnZoP!G6mLO7_s2a7PHj#ZOwF4t-A!>eRzov%8W(aI82mY*@;_H-*XYRcBU+c1 z!~q|{pDsq$xJeeBR)N1B-w&73D~Wzcnx>gQqE=jErshQ;C)SD?kDF3>>MX^&skZoK znWJ{=W#dF8Cz$8)$1M}9k`Ev~LB&&SEc!&nV9x{x;%-rNm%NdTk3fjpiR*{s8yFG% zRE~ZHNC50;A^auk)Eip-nia`LRpuklvqx1gJcwj}pxhp&_~+9#>MfvHm|ZXH#)W?? zuJE6XL>g&I)Nd#d2Uc3WUGae4eipuO(}%RuouLYr9fqH|3>#-nUnnF+`drth0QItKCx3u?4xIs9 zmBfMm!VQ^~PE;##zS+xea)0GCz0@)ZNFoK2qR~LtAN(anTrwnZhA6fZK~E}Io`%d4 zCC~#8Xh0dlz+Zg1zw*v>rVu|S)NsU8`6ENi@jJ}y)oaCw`t(8%?blFVhhZosPDQCd z85@BjYEyKTY1%WAixOgtQ#pTNR0 zXfy&CAW>C0ziN7}1beW)d)f<5^8qNH3p=A!!~ z`;XrL=04ajzSuwGy#i)H`v_%SD&A3!Pfn2EF?m|ib9JCNHeTnDjTMES^3HKoECI|2 z*pYY1yufllK)-H@ChqOFkv@`D2=n-P-vXLd?Vk3ajA-w4u_v_aiL{lS&C_!0x4k|s z>4LUlg?pTcQ>bCc#m3^x_xk{p%1*b@a3ZBW+uh%HyOn>4gWyIU2E zfFa$SH|gm)=A!f+z}L|vkhlz%nVtT{Q@bg z>b%bYdxU#;opQxN3Bp+=X9qj{Z0Jy7Pf$*+W%srH;Yx`clZ*m^k8zzB=Y#qWN!#9i zdnI)Yqy9-2J}J4GPjU%Eobb^iIyVGwpV77zFDF}WXt`i^3>7Ji>JaOpVKaNuo7;Dq zP8S!2-ha)Llmz<3{F#y4Cg{IVTI7%M_TSd?AB10XIh_r35IoyJV$1=#SW5I&=VzNoq zTOOGTx^VjHeS?hW6260n=T)Tu2h(QokoEESY1W}7W^3TYGOwRT7UMrXP($7!B@@~k z!|ke%7Wg!}-;*_KKka%P`Tz_#Ja_A%OOh)RQz#*_r_ZfP3+Ky{m1|DI=fQgpem>d+ zS5r_I!Tr@L)dKG%*HBb)D8nozRhv+)4JWg_sP=zWxY4|lh${qi@wfT+8IKO%8sCD9 z1>xNpfT5IyZK_I7*jFV^2BxN`uGvdW@woerm%{bKzdk6 z=NXjmpBV|kWz!9>0c#wvAch$T`Bj_w5b^!hZG!g@%bpJC{^IadYvOFd;r#EL(6->4gk3K2D!l* z)qCcWl=Me=f~)imdUAYICG=N8*MItQ^-qpd|Bm?a#dS_}3lJ8rDl}970ebTk(7>Dc zo=wKmF9p)J>HlxzbHxl`E7?ScC?|GR9V2HM7!NO}nQH}hrD~kS)_lgrpsn@gO?{M0 ztA*yPdPN@YrP#ebZ_m~smcGJLpPigR+h!R$5`r%suh*3p zfl_jXn-_zaF!sPg_)pKH{YU&+6P9UxlUCnnCGZZJ_A@Fn_sH1Ub&I&ZZ^mvL7S|rX z6}KXzLp?0EXNq^8j=$j*kiDF1zZPaX{t$2gqrG37M&RI_@htqg@Kby#cQM4s*bAVQy6eb=%(# z4Dp47^0Q8zW{#1 zc8SA2Z?(wfLnPpDk%!k!6~O&CpD1D;oN46TOyq*QYCFW7=19pdpm0V|Dx?PcsE(5Ud~V zl>f0^X6!-DZt{uQ=6#j>fVoV;y*|l?9jO?U!H`gi$6}>NY%j(j%36IlwdC5u4)7q3 z%JagCJwGELE40f}iWWuRUF54>!x`s7Y@_)OZvhGV|JN7))A|CTMDG_6?b*f!@C+kU zTvEy`%bgw1N1f4>nbqCZQk9)12cDR!>ijkGA`7!|=3Cf<)H(`R1gy5_rjRhR>gyk* zQ*ADnEG0lbl*xGQ9sS#5($eBr@p1w5-yZ5of9D&0fl7b0zdh80|Mn>NI>5y+zw@_; zy7b?i@!Shs2F%#8zd7y2{?3dyNibCZzrOglHTZu{UpP%POt9U7D^zxmX>~4rwi~UP zRBLY@9YnuY`?=C*b{1|39Z@mzmUQ4W!jMrle_2XE&&?`#L|N%QNg^5lB=6WQl6K4LIhc zxvB%WRG95Vs28}Ff<6L>8QdO2=zWU4ad>3quqsByjpVl9_S^9qwLNzb%@nB32PEq_ zmA}wkpmSZ>@(#|Mh!FIxN#N$rT7!TJu#9S#+#%R z@_;*5#yqSKZkHnklnH8T-p+NHpPp}$%)3@4QE3iX6|z8JZQ*bM0|laql4vKoPo>Py z6H%Y#Xd^=dK>E&;Ti^!R>l?^*PDh#T$`Tkw4TKu*LcE3$;(V_10kk-cZ1kiBAOpq& z2O2pAXMJwuXm3@OKqzV|=Pe*b5hhc;kmFU7X47p%x7;T*N+|-sWUb7KstZJ$9A#N8 z#mgR|J2Zn3G`5p(ItGIzH@wylGm13w;qcUoalBrs4>#}mpDFj|5*_}}GVo4;v|4NML z2@@^W6mO2&ghN05kN=E`JfnIK?@^(1?^TUi3&Xk6^`*c;dLY(xH_gtv_`_t}VI5v> zYtUl8UWfPq2O5L;!O!M&5?>qOoHSfj`(AkLqstTm;x#Gfqa`689 zz|XR@(6n}x8l8A5au!Krp7Kaw=V=;FAMH+pVr1xFVYpdKPh;s^>x$BMOIFd5!H{Mu zK!*2Bxq6>BuH_bEsp{cFDqyBXv_pmR%jEMTvTY6@avy6J>`1DD3a6;k?#%+1`{&O{1 zq!s+dlFFF~`Hgex!L9rE_JD3h`2$pdSlHfD_4mrU-wn1v#R1g+3JrJv&Eqpb*brks z0*gcDAhI1x4Y1(I4pN9buxkXYFa0QN2z@o-TgZsnAE9nLX ztQ1*+`x^8X`YjPn=Dk-xKx2@vslZw4WEc=PGMPdH(!X05mY4T-nSod?H4XuQu491I zZ|8KvHQGMFojvl)^fTcBXjCnpunEkbiM_cckmY%52JmlTi|PWe|6s3=vD58Rac{s49!w%NJ;#?4{t@Foyx>qKJfyPHWC(mhJiqBd{n)}`M9 z=S1mV0(tE1mcG9p{M_vbkDl6Lv-jI-?$->jxsfCttAjnwvuJa_6_HBovMfCFrK4|L1_<`vKcOTE+w%Uj)2q>n^j zpq+_<%f+4cq|AP^ z566TBM&A2k9d@Ie5QP)wrOKp9HmAvYO zO2f0Xs<=nWZ7vd5fKcum$&H=Hi9+{AILEr5UOd~rzPx=yY1YjZd5`WQJ4HO+6cVwa z>uf7YJGV<*0^)5v;nc(0!r+EJE!psbTSKz%f-2(P?paTvhqLgIz~Dw(PiV0+v8d^^ z32eJt<0J=A^<#d5gFYO8x^#laU!KK`dqKG3Z7#1s`Himz$k+EQIa7VOh}`{!q$O0zkVEpu$nWNc6&LxVMhuwK?j3GG-X7Q=BO0Qrcd=ZtxOhXS-=47ul?Q|5 zAOq7q9M!weJsX3LGeH_MlmH2dVUnTIa4n4Qb4w>HvWPX z5O0dlW8BlODrCRzoLFw_?^;XIV9DKO~hS;F(9mWVDw2F z|Es8N$hgZ9vRs|p575$9Fg)u@Hd>LXV5)2D6|8q~q?x{Q74{}aS$8p1;>obsysOPl z`01-jm{(mU3!wC#Ov%ek-8VADefLL^fNC(=uOYNqru^B?0)RX?UwPeh)b&N?5C`u^>nNm93o>gNa% z;w5Cu_=QhhWE(okqi>)Vx$j!6-pG46#f%TVAb~cWbA}#v5XXLYc-*!iqJ`Nu+TOaQ zSSlKM&zdB)=#qOPO#=3CS9S;Q9+JIb-1X?Ddw?b^ncKPhi_;tt>zB5r*=U+mDYS0E)qUJ-Vpe=Zi;m{arxE-U*7BWv!;Vl_Irk$9ei>o#h8lV2m|4fF-mh zoOwH!#fYqs1dr;Go-45GLL5z?LX81Mkx)<9r@F*`HdxJ8rtM7;Vj8B*=R8H5dWxvk zGBv=d4Mjni2Fq4rNF7;MEkJux!nTE z;_0uw>Y;M0=cEMGlw`vf&F3#|X>zkwHt-mvwr9VVmt|`Kv|$-LUtBny_U9#Le&Zqq zK0x-vLso`jO?$#=$82az3=NOGF{Imc?Ip>!<&`^=0dqXXC6|PsiW?ix9P#o)$a_@R zZF)Ex)a~ncd3VocFLMWyG54Ia4k{X3@AcfRZCKy`19Q3!5Ln_f={W*t>lJr-G<7xA z;A?#%b0RJ*tEUP=E~m&^cNOw9rnFOv)r4F|X#F%Ww18p#zcT9jTehIzbIfmNcDhvk z+F^fL>N!H^ft}k^LUbrB zc-g*^AG^WA8Y=?ZXeT?@f1!$DqH$`5&4t*NPAp!dRbX{sqY>mK@LLackvEY1>6C>!vCuV+p||_cao+y_ z<^}}+(GBoTNcw$oKn9{oNEKo@T?1Ne1DKW%C09+!Sxuv;-aRl4a1a*Q5DuLT2p;+% zWJy(=zP-Nj0sy�q6(3Eda#7ymodksmraDzRPa@d6Dm%k%^KOX}~j;ptWcHkGSDALjPv2Ee`SJASga~hkRKR{~WJIppXbsUWB z#5_b*JBzmNUX}BF>Shm~GyB+rx{sbuTozpETljbWE>Ko$ehTxVioY&&%0gYFcY5d8 zV?U|h2Ex88rH|kzE|~A&B#G{gap*?V$_`&TyQh)8j{RRR9LwQRMXX%f)9i10D(^8 zG0e2tP5Rn1N)!DtxTSwKdKH5AzG&3bJbXP9&k~^GK0ZLow$(*zS+!**5^ z4tcqxR^AZV&j+q<2@>qM!!|Dyl>&X>j-+*V2e-11{NuwnBUni)I<+L#{#l>QPgEYl zu|!u|YPzXT!je#AdAGHr&dvBq?G4+}9N}YFKB@?YtX*N~IOTn1(J}Y3v4PrwZ!o#@ z+bA&>h_N9;v6i@Q+I1e%0B3ZFxovrJ89tI>1}1$-9&Q1r9m?IQpWvl)lCchpDABy5 zO8X?EDe@n-4qea^U0~a%W*iiDBC#aY#<&0KsP&zLf|&akG6BNTKSsHoofvX4>3&xI zLivV+`)5;`Vd?KSZ>=E96O;y%+zB5`@}0C93v#iJG5@VcQ*ICf*L}1uXvM(lrdTo@((l9NV$bOhyego|NK-h?}mdX#Sw|!Mhis7>;nE2`&b}e(4~)H=<4wU86~u00kAN>l4o{$2E~D zq`T}lDd(8V4*vnJl>BW`083whCd+)2-d5Z>LV0AtxvJcrTD6c<)kBJ#t?qh%ch^z+ zVmSH9Knw^7y#7Vn%Rlgc*uSmer56z3$z^mhz@1hf9(#ua2e3H_!JbXUTkg0mB~S zKfEt<&k39-w-E+vQt?El@<-7+{r&rFvCx1EJKO-5^saX&b9xuzkViVHFF&LL%0k%u z<~agQ6a+3Db$|3KpaJ%x3Q6QyRU!sA9Ef!FO|6^Q_+BRx4AE8rpdsMiw)1q33d`!& za%kzDuVEY=;LN1zlU0iEXr{=9ZQMD1pxc!1ufli6aN8fD|%Yd%vwD=1)W6<*H53(dZwn5%kr z*$4O5vhUv{z(`HTNCD zHOJ!oELDs5H`{YJ!R9{#>qLGe5NfGK$#;$FOGkQgXuK7+`^Gcz^a1#L>AXJ<90Xod zJ{6P<3vgVAJr=_?u5wN@HzE#~V6G9R6AuE4I-lY-WMydiv0j+3l|A){A`{HBska`{ z`g#5L5fzJN!&ZQ3k3|>VFlbI0-aRZi2Rnr7hq0($X?4!hdy-&Ye7s!0i*xm?_j7@@ zV^twN3QjVn`T81K6)*Ps;pGYy!4i;UaY6bshQwbDOAkbY^Dr-dpgvzJVh253mhB0- zpLc4=tlQ*!3$0eAdPsqQX@Cs7PicozM^spVO=D}IW#jDo*LRS@*g$CDqeitx5GsJY zJ`JzO240#XGJOj5N)DEEo-pTJFucyG791(CO$-b}KXg9kP-Ttv&9>@t`mSDzN~i}asCa*=6F5X^&6WEuX~5g#zGS~)-C7pKR|w#{5^{8S>+Bc zVUH(-)d3lMcKFm|q-1yfY)Q~?lcL4Y+?3o&u4fUs6rmdF+zMwR_=b{6zU6Xs;ERoX zz0U$eR)stxzLx$;J{*0yZf!V=)EI$lPf#21L{%{=i%EA;e^ik{PT@!K{W^6i4&2(E z>=F)p$|ppQJKoxal|oby$~jWv(zkZe18y0A z6vlWAO7Au)y&fT+bt4pWH1$re`^Q`n5h)Re2R~myB8qW3>W6gnbgu1BNjEZJv={G< zV0Gduh-d;n<^m1tx{81l-vP(4{psW9P9~*=MHR1V-H+uZ&WxQxC7NWPoL&PU2|OA)7@B} z^>8TL7}~W5GI6&SY&?QYT`Yk8$^@!U7{EXUsfv}72@dXFdYNq_MLub;)p|iORH_TY z+=GYI4lB%&u^>5lwg-PjY}ZmC_`VX6qXAgkFb zE>^&vI!$B*Jj_Efg5RT5vgW^>5V`HBvnkWCL)dK>Uo?DLT|-j_h8dWNywS57&)JvI z%f(tIbpK|@&eTI?bWMbb6(TU&6dtU~iRLFs%P+q&J8xlDY7V&-31%iTRfUtiDV84f z>KiMn&Y*#nXvPk6O}FF$!H3}jAq;B66({NpeDaKKOAj?U#z-&F&Lve$*PxH79wMF| z)xjh6MM_x8^mNefE|6R+)i_PI0ZWQNSuI*-BKYd% zi#AdSUvz{{MMl2>B&-mCEJ)*dkFffqZ;C&I&cG4c0?pNlR58XczvqVje?0a#*J@yl zBf_YDuvGwSH1z|Pgf|XofKQtK*BYNI+5sW4U0zZLu#;N=8{yz1Fzrlf?hgV0uT%S+f(qR38%A-7DqF4o~67OXb3P$-$>AInjnIjp^0KZ zKs3e)T=Wg|P~f=t^f~c0q~(o3^9j+V1E{Rdzp1H&W9diF$an);2|(bhGt@iP!!jR; zf_YEut&7rM+HuMO&wl?OP^xD$K!$?rN!wrrNmlLilxrGfb87v<7+Q58aWP)7$MxD- z*@qE;p`nDYW^X3oxB&dd7pq}tsgdT9Z}hpr1yaxe^Y8cu4IS8Q_Y(Lp#in$#uKr9b zoSKi~C9I$iGdTgw+TR>f`Xz#AsHre!rSi|K zY?4i4`UA3jh9jv1pI`|v=~MKV5m#lSCfvt5;4Ln@CYk-$ zU#GLl=?4vBu)HZhKpxxFG8A{trAxos{(qqY-E%;l?s@ht<(=m{%&yHxZ1ZQTV}}Ba z+rc!z>UCB7Av%}p+qE(DSp(+8rQdd>iJa!X65tm9lwjK5Sv_+eK;tY-b_YsK0v8Y) z$AQO`n?+q76*~!d8NVxZ4&nBgreBW*W_>Z7b|C8~{?ZKDcs9&TD?=|x{cnd~Oukif zsUosHdK!#;P7$PfOjGQ58%}<-a7si)bt%D>4F@R6^aB`Oa^>_86hc-+YQrx<^O2_q z7v0FMQETWZSagY{dDx^?NiH*f-tR`OCBNRGR3+2Mp2Gg*XU#Hdy%jAp@mD(-2fgNe zj~Hv6e*-G)aPQvGZVYF`@U(Dp%(JhJc9f=OQ7 z)iO+ixMeK>$}o^kvUM7p8{jRZQcK zBLFA*R34dy5LM441J*pnKljewg{<IkdZo!TS(?-UR-;+yuv$hwu1!k}AfaomAR60-oYbGV}p8+dXiiN><$bVkaXNyKXkx*ziTsb8D41FmC`dgWV9 zrMR(Xt9y3kvoSaP4XLf7&zW>oLHa(e?$ZDtjORAaZyd-1)ghk@0kBR^-Uj;CK@Wf4 zeA?PeflqR0R}0~xWerdzVg%S3?`SfSWY_&Hf<4+@uPYc_68KzAOON8gpbomr{sO?` zs@Z(R2{8W|igj*$af}~-Hv$1EFfZS+5}Mz?GkmP_7y#JbusEtI4hS?K7Yum;zAxTu zz#QR-KIo(5>P;)hyGPdzPr2}uqI}|!!ml5PrIUR@#V)2*C&`sl;6qDA<44Kudm3-X3L-5J;_~#GyxUC(%&O}bBNl$C`ufALRzUZdI8}dR;`!P^CvPCt_ zj$;+{LiQq4RFwtUe#*%VKkzz-e$=CCc5SPz6OHHVO@qlvus=+5UQhS|GWHH(=(~y3 zRDbasi2(`=-1C5pjw^hn9LZK)G}{I3$sRUa3mRF4*^isiP6U9X@}@t)0{J}?!0P`5 zGwokjv#-j7mO<5a$FILU88mc0%)-d-p}XIUqnAz9VdGbaK7uh* z!e`|v6vu#OYn7x{(zh4Sq1J%l%SC%E=d%-3lc}tcSTl%PLQz0xQtPP@wzcm|lhW5a zL9K!t7dlv!>f(6os|B3s$35QJ&voZI3%$6QWF2N4HY>;YAWM4kTO!@^>V-z6Cq>XB z{aP=IuND9D15;wkm~O`*Awmze!x*MfjxRkbj3Zz)J{nmTR%c+m07j2hqk&IUAkbPb9IR0a4DT{hO_xZ%U4=x zuIdTt)Xc$-v=mG6qF1flBXVEf4ZWBAlzcR!rSXykgSAlS{5le5kl*)EYFiC@E53hC zAx8YjtOegAV`f0Z^aJd`S|WV-El>;`g7j}n%#z?%Rj6tm0{8gKGwk{Qhx__3`}<$M zb8qf6B8FvZVQg2*9VX}H8o=H#sY4%uo}*qO zXL`;!Dm{|oZw^m4WxCDK@l|o(wa^9gFI&yJ{RCVs8`|WME3TSk@n!xVZ~8s@`V|1< zdI~GI;5ygms2BC|k;_1XQBvtA-Fnq06m_ZraapZ$E@K!O@ezrEf8%)LeSUw-{{N9$JHA|nGW$CR@6Fm zKZ8Xi@(kp(*+6tF`ore?5aOy9Oo^`D5hGJ0bq18@IQ+60-rEe~Nbd|UeYNL=GwQB5 zFuBTO|GC?#Zy>p)3IJs$j2;OQa_inz4&RpGsyZlCmlcXLnT|cqC3ZL&^lV>{tBBi2 zE4$Q`g#9VN%TH#47^C7(KV$pK#J9SSYvWO;b71(^$gru;PRspQ(HbnkoCnP!D)iTmBpRo{%%5T~+v6mIAm z^@rzEx>gOoFE#6aT3)l|oP>;snteHco2t@uGZ^0B~IAFwxaM2S@g!wWE>-2%6PY%?zFIE=M{ zJ__k`la}M7F~zBfA#6i5d%oWshe8K{D5UL)BYrD-?xV^E%tPp-P(?BeK1Bamu_q~1 z?~zg62>4=$e!ZYpdS6S+VtnN8I#CiRP2fBI1Eg5@)hb~T{FdaXeOmj*@SPHyty6B9 z{5J=p$nLe4qQt@;3-u7Iw{3c!kFR$(P@A8$x^BdR6T9 zFciWmhgu*v!q+X+IHhtBH**A5HqYM)J$X{eeFy^eK;Q#kSLo$UHj;Q1=f8jCc)Rp^ zCw7paw!GlX=5vlv9jJKi9>sNdbKV>@2wO=XLs-2)E_W?TEvUJiwGei+C} zT`50R5V54Y4Ht6~QGWNFSLga4M=Zn7X3(EEia#F~w--Ae5m69PqYD65jJ%pZk*z<# zTfe*r{;BQX{?a^a*v_jd%M5Uk@4eAZWX|z)e}&>&h>2g^3a7oZ=r`zR zsXvAAKWUR#wS4)W)nm_%@7KZeKAC&vE?D-QTN{U3N(erJrUkyQ>2`~ZJV8X)S3Z(l)_Ei!OTK&g3gSet9Zu&q`u?;`Z)XU4mRxd5CAE1pu?j-T0m!Hw5NL<9lMp*N-iP#`n zAEcBftzk+RJ{2kTo5rP>2E;w#@5^2i5WjyH0Fk(@g*nzF`gHzdkM1v4d`oL0V;yYx z4ZqcO_*RL@PEh58OVlB6uP`;qpNguXj(R$MK)y1$kmbyXux+{xe=p_a7VMQJ2Zbvq zcd?a@5vT2eC#|R&v?w(scmN}j0M6c0bLg?`N$!;bv(;xVg(zq4p^{?;aM2Z_Q+KFJ z8Lvyd?>x@eQo*c)Nzui#2z@j2T2E*GzWmpxBQ+%1U!Da0v((}D{a)h6?>L@a7Pfmr zo90T7TyUKS_TOKG$$k|SWOr`>D5LzOI^d@XHTS_p=1MRRG=i1FeFa;JoU>HQg}Gx{4K9Nx8W- znrB_DWlH&45j)LOFNQipwp!s(uFV`eAM@RWlK84#$SN-P+#&vh%pdPQ_fT5tBkI*F z8~cgA@VVqJfPEYdRw&3n@>Vx*TD*e5cTNp181f94D^1nGkFeZ8dVey#^eFksJDs;T z1WHA@cOg@-!{p#PWxvIEq0cc*uEGqbsX1W_)3Z<3QiA3exI0j^MCfU|aVYUt9@%HN z$n>WE!IGx^-`k@kz$#3j{o6BJjo!^Q)jSJTjAUuo4z5vJOkW|3FQ``P)NskwUf27# zDl4y^P}tD+g`V`lO&v?*PYVPhOtz!@i=XZjeC^U|yKr?S`7Ba$2|dhMcf`XAFB;$@ zp4cedHRQn2P<}JNyg=^QeI`$L;YoK@pic1~TQ4AdoNtvOaeSt4yJhV#PDOS3`6^jV zbOYC~OKu~%k>#@OPsWfp8>7~(g(ViP9c3e$b7GEaLzZ(G#{Mr2ZimFx&o=n*7t0{{vr$F{4NIPUh|V7cs;4A$9X}5Uu6i zxk~UgB2(4s2~@`%+({xd zkwh~!N>@%eeO|oAqCSL5ck_G2;KQsJ|oC^3S3I8T7eII`QgN^sQeX@jfnG z60s+&Yk>i(PR;2%OpTvpV1S@ zmwHjHll%z;$9MwlWp)JzK3Il*2z#!kJS~E@n1*`rwtO~L&5M=2myQ4ar=*wa+&Aws z)PRh-{#>@`0{RsSjAyG zWuDeqho}{jX@mu=;<4{9E+-8$Z4+$tcOtL2 z*7?YSasl=T)y!tk^N76SvCVzqHrEAAE;ASwOC*+F6g^%Gb2DkiGQ;f9$Ea7x;V{2K zZ+@sGSI}&?*@MUj-_GuvsmM*?7yv*}694Z>fU|K_?hnVWDk81S%Y(jt9dd7z|Hge2 z33usaXjNr*j3fsVA@vZ}f-?iEdQUzp8nZmBjd5pG5i$>urN8O?tlFO&ogZH`> zcI~o_uHCW!e6-~y;Sl2oVKo+x_!@>rkhgXdY&h!OreeY?oD|g++DujOLk~w^` ziU!r@!NWiv&b37F$_QF^`MCFo$KA!bU`>HGjL(4m^%Puo`gQr~wgV4*Qgz>DO52xz zDKP3b4!Cb*)NQcu-(CRh;IDaNVhn|*Yu){OW2-Vi(StYlrr&Ca3rrG?GyljA*d1s> z91wcELqFma+N$_YiJ4wny0~RU4vU;au>!7Y^~aQT5D6p;T_z$+FgJo9E}Z9Y@Hi?{^!TY-nD%tL&zh3x%XQ_G&@tE~M8og9 zDEMhrRyBew+B&@cs=|e6(tvR6OUIh@inN3>kA}K*9l2+9X(D0mx+l(tfUfoCJrYe9 zw=OSPl8UAo0b)R2r6fwV={6Qjw$db2zR&MkZBU=~j(_k}CEpd+;;GypR z{^1d&q9$eErlM3rDUmR`im}ABF;YyTkZj353`w$15sDZ~MOh}Ylx4;)RCYp^8M`bq z*@jtqj;^h`uIqPQzx%oG-~a!-p1ape<2&cfIN#4X-*Z08`}6)hr+TWZ8YkGV%n4W6 z701(01`%4oDlFR`v5OpT5y<77)crga!qEg#S%ufvMQeKeBD-Iu$hqQ+S<|flEwDp39w0dC}D&<)^D;&zdeyxRGj?0F~n%aR~9z(0o>AcxWJxYjUUMAbB9mW=IGOfRU!k|+! z9hp8g0T*4q^fKRGzV+OTSY)YLbG_=~nXVwfEOsxWrG%3*Ga2gtcmm1}Jc0@HIu2aw zm(@YOM1IYuIJv%n^t!a(2-JP&=C(NtwuhRrI>aYh%Tu|sQOM`{!6sK3)|{o_bbn1M z9r{W_9np$E_Xlh5f70aquYC9aXUtUo&Ku#Py25+Xr6Zjn61oQgz8zF7OH6(`-fbO_ zw`3|jzjd}ZT17#0Q~E>pxP5gHL#oeD9a{wx_TAXC6zBFj>zvv1M{0IrX2}}(VqN}D z9`0(#uG|i!YKQDvEB0qw!?f8=S4bLrCmmhI0`pp@hKdr{lg``R;aVqkv!D2iV~)A< zCJMEGki2yciDRwuhb7%(H^aHN9~Cz(KYq$D!`2BRS*B&~KhS?iatm!vNLf;#R5|!c zedNetsXZ4qm*`|X8ki$A^wW*|`LJWgpnL&!4bZgi5OU#{2^;AFmCNpT&X~8#&vnGv zxd<2C*3l0{)tL=gi-=T1e9nVK{@(;<_)&xZzXIIA5tR-6*;mG+E(8SCw1JYVuVAt0 ziV}@tl34#z?0(xf9(NqEi}7hg@C2v(>*BnW{M+(($vJ<1c^Tw4i{q(;>ktS;c-1R7 z=Z9;z?X_wZVR9M)k=QNFjMAi)l;_`R%QEGVRi+LP??)mPPl4pO)+qV{EZlP>jgqB* z@~yLSS2DaJC{n$LFdPDzQ~#%3M{JT%D;E79z4_htr%&`>joxPQ z1%f$%DnPv_<+=EzBB;KhZC8DFx2D|lu{4|Rk68`55houn7@qoJjEc?3DGce|&-!q8 zD{HmwDXss4v3L$SH3M=s3ivP$D+GdWrmTz4svLb|KuHgtxLqVmGobEY>SPhKs^E7pqd>$cgV5t0(} z85gc`@D&yA=jfRCwt$eXINp6#q0g=ufA0_@sR1Z8Hhe>FC{StGSAK%H&vL}3Z&Uwc zTeJ3Q9Mk7Laqv#LxIp=YZ0DzYRGC0~rT|$^xzcG=InesPt z{o}2B!h^*a@ImEDvvVp3^wUim*ewl?pE>6&?wRM9>kpn9#Td{p+u(ij^@9pvEp@1l~3=yRs-ijmBbl_ zeE%`Wsn<74Ho|(4E_1dY=8=d>`eA?&0xa zehU3H2l@pVP;+ORW!ZJ69#v^EfLwB(yTr!C@QoGEuwJfDQ^TXFT2lx!mnv6V^Cr&| zMSjy6@xa<$pUg9>BJXrWg{1C$n?V0yL(t3>Ro(K*h2D|`)7V^UW-&i2sct!}+ZY>3*KL3n7N z9K-i%qffe-ax51gO@=G$*O&ciR=@pHqY(~% z&mrxDvndJ6hS~P5ZKWP+d}f$BB_zyr+v&L$uIQ=mH@AmztyuQ#>1Rgf=vd|!%1iNg z9S#X{QcK{8Ua==iXUqT6toRZ3{kv`P4Ho>r^5%)Jq32)BM}Rcv)zdTG9rklb@B6^r z3h-R#YYP20f2U?8r9RBws3bS9O)jxzVMn6K8WC|T*zAY)B0YnhnKki-V@sqOG;elZ z)^fy6(UG^8g2b|%k!Ld)Xn7M}&76+lM6)HwBUK^qN* z1@e5j>`6un(!FcC_o@P7635L8_0E+tMh;^Wp2iYwes-yZ>c*gcKBfi0mK*q_7Hc2l zD#?x4I9pGu`1Aoswslq6F9?^klnv)AiwyMfAuMU`#Q`-k<-<5(}WE-kL=A;l&oz6X)NAspNtXx z4BN?R>HMN&;yL?`&w%PZ7M<}P3=o-H7&U5bWgS|*bl)|a+i@|-(LC`!$H4wNX~n%c z`w>5hn22veYz;u`dMH$%qKDKrJPIThLi9?NH@@dio*BcgUwWQ*u+(ryhX{!IeR8W@ zCJ|0PM`tu)W~WSwHomr{Y*0PII1@(O?v9W+3yLs(G;aSxHJyJallyf*y{i^2?g5Y)uqT{aWoK+mU1s+y;S0n_QORBk91GTW}I7S5V8~{9>b4eZ( zHNUktOTpeX%4w(2p5c2*pX=35LJ{-Q& zR6{IQG{Iq(QSC+xm)icJ`Z*c+}Iel{)I##XUO~mk}W=jmV_!P zxQnCM+01@{MEJq=R=dn)hcG{YC*u)4;|m0S2v6UbKL<*`FlaI$D#>?E>0yZ;2zD!0 zk0RLy=Tts7f7pO6QrZu zefJ&Nr;UByf8GswoWB2-1W9z7WK&j4-k8U~*}WUchVbCi?Cy(kh_mavEqS3LYbg{S9u))4D-vmK_;Nx0<=rKewp` zJ_Bx&d5JhpEr@ka@vziscIXoI=8^Tb6;;%W4N=nGqr(*zp~&6hr*Y#SNDn`Nt?B1y z>eG~_L$kY0ddlssI`?!u+mmA zWec{;bTAJ*wnOO!Yb0NGblG0B_GC*+#>4QjKTT!_lDWt_^M|&x4-P1Jf^lDwmy?Zs9 z%cww%9^KnDooU#@ba&pnrlRHDMV;$?&IzL08wwqILrcV`-y`%OEpZbQ3&&hBM%g=- z9`9Of*1K?1m$PZJ$IRJj2aA^lG1KImTB{C5)aItw!Tki(PxOft)68+1u4)tCRej4LQ`T@L8B`Kgtc&gcq7?2!}4Xy>8Ba zXzI-)cc@!CEb<8@u~+!nR&gumyDC$f@4W}DYZU~ThiKPC*96&GsFxI+`WW+C_uS%< zH;-3ECe>_mW7G_K9#4C_<>@M@pr?-(Tfth=1B={Qz1^SNmV zbe<2SK*FQ-HCKa@9ay#@0c_~g{n#<*-$~F(TfRX4f(2axc-!yJVP|StvU%CRU+m&+ z{#J?kC%=QE=5Gk$%&~vVat0hnHrlkrpR5W3zljR!O%%rxZmDTfnV$++9|P(jtrPe+s(a(690e0&ROBnN!x$TJsVrQ|1%7{G%{ zl(*c_+G6&eG$Xkk?oW6+1^#V~=;)BXEqs3uLNw6mR1;($MDpg-Bza7W20!Mi^K15w zaz6HiP18PR*X$`$B?0G#R>Ftnh3iCwx~DzhSbm_ZG}q3*cIna?#!ohnCL{$~)HFle z#@(NO9Bh+os#`X`MiSXOcjjfSxj5LZB1G*n5RpA>&b{XgeRy_55*X-y~A^0e;MSuu1u1^Na9v{ zDWFjuuq{}$&z#7W$T6|CTgV8)PE*tZHQpY5UI5>S@u8WXEuu%`H_$D&#U3k&QdZWy zuF$N3&bw6xSBsn%}>>++D&OyAFP_ zlJ^kxNompEQ^%9}HbI_0-v{CJgAA>|)h+N~8WZ~-dj)7VkKg~Z zE$DNP4#`pc<08O1XW@AXEtTfbLpY`D!EL~jqFlU=aHyd9uVbfO#O!h)Bl)Sf(wwL|C?N? zf6^cJAFGE~GDvBRsIA0uTuqccuInJze!5%veP^8UTE9UqKSK(KIOjVrbxan+VjZr+ zCJcS!YRm|7(M=CN2a@J0NC#`OTbDO2__isY& ze@eHjjU$9&$*c3u)w~|59hw8sc6iIQ2h*H%K5>kV5Sh({vS<(KfBJxR%^&Kmxv{qd&Ht2 zo&WHm#;oVHy1-fYD=O=cM4cZB}_ zjN^L|vao{l^cN7dRtmBT#)r8`i@zGXYeLbz_}6vE9RySFh30)4_3#i$if z4dX{I1jZT_of@q_yjFF|#)sR5iAeObyh}eo9lsH(!$f6ju@kO8EC>(Uf4p)ATQc|= z{fyv4cv*iPL_VUuLi@ZuvE>@dN^jLuZ|o+yI*#lBcxqzUQuB8>YQh6GEgGrHOM~>x zcArUCiLtw4Ss2Bj+VXnRBNY+K>-{Q49ygwxnsz0>h?XrBwX+JMtaC357cxnl{iG^8)JR9OcY)}F4{lfn@OFdT zVjfU@2nN1^`S|PSuaUh;ui!Z}v)Dc#YhCnC!1B$eA((U%_Gv7w9!|!3q_Q!4zkWNs zz^SzVBkTh`yJ1En=P@CQME0exw35Kz9)?j#7{KVEA&9<@XPUu!RLp0lp_3TY z22QgF*tOMeXwP}q9T)(R{j~##5Y&M#E!c`>kO@vdhwWEfxsk79hyOet;}%psmP#!7 zdAN&jSH=r24SpEC(1M0+E~D}Jxa=^Jj@AI8!Sg88BiZt}@2{pFDY!3k|I+!H>@b>+ zmi(^sw^!eFwv`>m(#_ewcaCBUk%CF16%C)GzCc1#8P+_#tn!~{b0S1<$@05qFoA(n z6!a1;cV#XqV_zrV31kR;olK-I8U(5;7HA4gdY3Pl%p2H>`EM@1op=D1ko&&1V(u@x zBD65zY~g9GBFzq3|2!GsvhkbCuX97jT2hJ?N|qzy(F)j#S(*$O#L9}gfv1)*!4zs7 z!v0~IoNPgXum`oI$}e5vLop4o4>#M z+8O%uqtkjYdRCb!-^hn;jQ&Mr-(P+EoV5Z0BED;05g?%oJv;o%#C|WpOOZsN#{_m# z6qY{!F2JvsdEYL;Uj%3xNwfvs+gG~((*DEcZ}RpA3&l8+Xb0L`fA2mHC*PolvN2ik z6%qZm`6PnP=l)|t{7P%kudP6UlE50p%ObxMpbU6?9|HmQf8R{M!2YYwwz4BL-#61Q z^sRJ0^7Wfze-+>suu-)D0b-f*j+?NJ328qs(C@E)?Yy$9HP1!Q;+XR2-#f2JJGKfo zJ8Jvgp7y3{0qr>l_OxNp9`$oO{Fkl!n@i-^y`hr*b9eh47~r?$!GU-X7-`)X2)zh{ z6?R>`ce1OSd`ALCVs6C!vZ}OKgmQ`&bQ4&e{AE_cH(6_>9&98{lAg0u3zMmGGANom zI8^v+pV>y~rd@TjzIpiErg^R#9zEg#<;xKodD(VVY{Ax&Q+)Mhn%Q0feZX*wt)h8?@jJg2GTvXVyuv;_Y4$iPsJfr^?XI zlusgGp~wuGV753tlBYrUaZcOb{&$71?obg1^XNyQvz&(x8jbI`!yTPd8`) z91lKNZi}*D8Trbo@;LGNGfe$p^^WYg@8LZz-awX$>~8MAw;Idi=sp^S%>%n?V4<2f;X2INV2klvi9xEQt8r zTy?iTIXcM6Zm@d}%|U-PRw#9Xgq;)VxrrT)N(3f6_`zpuY6PbE9TgD|(E4*1{h1vx zDwdv4T_aS=uK-nuTRPGMf@)Z)En)}d@Gux|Wgbkcio1DP*Z(HS%8cj6vR)c#KojtTj>7U}E-KOZHc|+sR|K#>y?(rd-ytjIT zMA^NzgMKi*oTt7Gk{TUW%EX=RQ`d&>57${2#lxq3+GW^lY&fSS5QB^5@Alqvp=2o6 zG8r=<2Hm`8DO>v!sPCcRfQ){A;Q~l|IHDWGzWvM`X2Fp1aM%aWS7I~-^uLKv!Okfh zDvEdPvLxS!coNb-Vmo!)uw#5#oH-p0jEl@2Va$6Ik-(ebfM*;`c~&?TOU>~zv6Gv- z3H{`>Clmb=Rbj$r$E)FIE`S3h_eF*s<1iCuUL>a6F)_NVw~J>Jz6WSB_6p~}KuCi9 z%qVcu-)^K^0xw~fBb(WS{cLan_L5W%W=s*(hcoB}I@N?b^FVF+%(74|d$k#sHnFN2 zO1}m^hMrsBL7I6-0$Dhx0FnVuZD9MGcCwbhp*R!*qCB@f>A;iw(+VgtgTPitFb_0- z`zdk3Y0U&YmtPM44>tfOzhsMGn`BLapg^7~^PVY;+6d<+ zjiF(50axH`PXEIlYMKB}ogZ%jMg>6@UecIA7Pdbg%ht()(yj3U{c{R9=6XPoI@tE( zEhg=?1{7dF-UIGOksAknFV7$D@uc7Z!07YCEg*i7xa_eX#QEbb@`wN}f3aYILGtSkv+`JDKWl-b`G>s$C;&e`kvc2EN!}yl zUj3{kfP5N?^I~_Ac#vkwrszyTkncAF6voOUObPg`0%&aq8;ImnSXbu&Z5(i}6|Fp3 zd8>n^SUJA@@VpZr*b6~Q{GZys%+o_L&FLPWS#V5Bfq08haZL_59b>2U9P>9cx=25}UA2Wql z!8z!G(Fc*6#yfMBbttcc@oai;@f_jq4BU#uS-?AXE8xE4^rD(42S8$K|ztR1UPdl7`6v(@Ob^KW}_-mQ7M znb1*+EyEXFdRc2p6Vm&TQr zbPUWr3-Y_$$oEKH7oqt#&i*S}rhlWh(EF8gwO?xs9ZkZ%QG)k5j-L7BTbg1A0X!2E z&H%ewok9zUxdPmM)P>F@j1t)MUC3w#m&q5%xGDVxJ{AjI0NJ9p4GyV4EqfYhw2tmk4ZH-|c zWrjZeQ+49{pvu~c<?O`YwQwOL8NuSdR{p! zg25iwR1G$LUtp<+5aPkk>Wu*hLvjR`K~Z3D$I;FE1xaI;VA?~03Y7$X3@6_^2wxV? zCe2h}X-1R%>xum5FnbHy>@)M%DbysN@@p3(XOfz_1czVDimC+8dHzCu4m zqAN-#cC`q%?5YufoK%MloLuX3i5^th4G+ol4(CP}GK>N{8V_xM8(*N}x^y>#S23V` z>cpRf)k2xa=&{enh!Q0o$wB+idtzT9KA5P5JgU3t(VNJ9kk>7Z>OKJAxluozTZzTV zJLod8-v0DX#GjOEA|3||@KzMt?-)>UfKx`4@Wf^%H1`{I*x#w~|C`SFBN+ULYtR1{ zW_Q$kxMWZ?fvH?N8AlI-&v);HWy_9DEcGnhMaxvO`P+*&9H>^cuY@gJAH#CKI?26) zlsWlpY!wxc(WFHrr}%_cYViA^>1k#z8}Sal6VSm|u`MdKGV0D_>m=T1JvSNiGV9BE zrn5aiH+Uqv(duZ_yqeRIxDk!j9+wtRoo@_f?+SIupEx-01qvu^Jj;ypH`E7(;R_Bn33YC0MWnJ9`$mU_nA&PM}Kg1M92a%4Q0$FcIe zVptxVg}6&EDuuTjxqpH90IGqq;OwVg0p`O@J4&xY8BwsU1eJM6E^q8apv%pvW-0VS z!ktsoHk{?ez2?{oPa5nP-NM0ryhy&+&a}37hev*)<6>}Mz1K7Dd@p-X>xwMyv$-t} zOe2Z~NCM*yu~rRVKE}SjO1^_=(%|Bam{Z%$z~hR^>Ju%-yU+^ zPJG&OFhhbz|7G)ySGNS+_C4NIqq^5Q-3WG<0Hg08kUMj%pp;e{)}|c7)H6Px+}*;# ze%^kdX=@GO+!`3Qk@;vN{eLSNdi#H*H3MAKdkKEC}s$0f5j=)@u zD&y5|@m^)uGL{6bQ@GtPT;fa>JK0vEA;myWO477S``lcRNm;lNt}1f{MODB zQnA-aYd=n?3I+{ks1(X4Hrlx%c39%=j@PoHolp}LVirxglEmpz*WtKj%*`vT{!sAc zJDy2i>(;;|yRx57*VMD$o2f2yy~ zBF#T_P?yZT-ehpSP34gab2#nw0x~qpWz7lf%*(w8jA_`KLR#R5S>LikROj`3{v*Az zHQeoS+85RNmr-qn{kiZ8v4)zW+p>&+ay*s~ChPO( z7QA1qIXC3OJbh&ITi-PQvNDRFYW=VI5P$ia3FW6pouCE<}ZI7U@%LyAEms)M58y?ZGs@38Um!N&ry$=hn;sA@cS$r`mY znG&eeIJbvs`}RV0FHbz1<~Cj^o4HU>n&}ZhDa6UN%X2?W|AbOGUMDL$cbq#{+mCmd zH;xfdX)c*dB?jB2m_U1bo{Hq$oc6TVnDks2Hy=53bD{n4NNGF$Wz{?9#2UOb4CHT1 zT?e3E^bO3uZ>XhHKxU&DdI$Y5ABYrlBTm-4N*vo7hI_5#V~5`9p};m=N1BvM$Bu15 z1wUlofLGz{b?p8qA-3L*G?wSlFOXDe zf{=>-#23gxZ_fkRb4v%h0S|rjjHGTO9akBL4Zg9EL%P8C_Bymw%nLVhr1Nd;CJ!8W zG$=o0L&}Jxg3VOo_k9b2nvF7~SCrB%kNw=ewWxpn3Y=5!eBq&MH#(IKN{z}m#Z9xJA@Qx=4dZN)T`tdBJT>4;Vno?%Xr z8?c*gYr3dWwK2Dm(}lo(wpwH|P;cGcVg-d1&-Erp*qCJOovIC!B&k@v{-71L#}r9Ii0N$Yo(Ic5OGvwNNd$0v~|%%g{-n!Zi7!v zDr{m9poV|_BWxm@hayykVsD#2Xm4-a~tEz(DA4rtm@lQ7%tkSvlqj` zP^2R(pGho+uI+T?xM_J&>p4s_!D^Yr%}6O7tYGsuVZ+IKTC#%?W6IVNdfr|(po zj8o72$HajF6bO4^Z6%sKE6Or=he+$AWwG$?*X-E-$r7K&Oh=hPSFVm ziSj0!j*4(eCWX)o2N9D?3#Syqh~gEOY;XzAX_e&{u3Hsy-ivq&o?;I6)T*52{aP4D zh6=kWi_#4BS94ThcWd=nyN$x^j9|`<%mvP5_I`F}A7lGpX0eJ}VLvP4mOrs%n?5y4 zeSzG&4@zH;n+N@&cCYwMFmN-9fP(Zw%Wb0E%z!hj5wfTRl_;eK5yCe-5XR&~QEZVjfaIS!fnQ`Ru$2%@ z#WaxdvD6NqVWOE=R}kfafrB&4BCcpAFBp7iDqBzpM;g1_j-@JM+46oGL%@4+c7>M> zuA0UI9YBd7R60|QjwUzWb~5jwd+9$EWZfHL=x5xuGF(m)l`H@QqWO>C8xWb#Y1{z$V z`-F}-{))CHZ%QKiS(x6$laOF(lk&15KND+lN`9976d^06G&fqNFoEk{od|DsrlK%k zITz}lZxi~nj@mKy6p4Mm8odLvf!+)Emp@bKD|PQFl{xb{r$H$-XZXJB{-}msb?O;Xzf*i}UxS3)gEO z4dV7Ky{^u5wHJqzZs$KAAgzt6p6fjw z>q5C~T!Z9PH}H-6zC^US5b!3xF<)dC*8zW}=%OCA)SCU%B5A%4$y(dA^9opE|Jb** zvU`kREXd`QM(D>e?ZHWs%`VKlVp>im>tnV{m@7dT*)`ACPAb$b=Vq&u81~-pUuKT? z!CIfRyV_#nP27TfGCzDQN335rfw)THuAT{c`&0OdAsRcyk-gndz4705GQPRdD0tOa zIKpz_K@j!zQc!ad+mmptRqg20i3cWDUAsZ@uVbaJ1dgovuILvlMrB5{Z%Op3>F%X$ z7=95IwM;2_UW^po7s(Pvig9VCC`so&8j7mn9=%|;>dLggxAoILrTGtgFa0E?B_89T z;l~m8@=v^Wuj6)YLT#Z5A=-dXd(DKXVYE3ogbr^&81)?|g?FD2`?=Nl9KQGLt1|0QyFl*{u4l7na@7-`+*JJoY z2bXP2VB1a%$_#jkN&>~q|EAD8M7I13#Mb+;yf5#kmt}CDOC6og_+uNYt|39Cy2TKR z?+#lTbGkRhOakT7J05!qkPlp|>Vfa9hPWJx-QQ<=%u{H$n%#Dm@G5wMNj4|h{HYT5 zHd~DNenb_+mox8iU%Q|FIc{d`H-rX_6in5n^g067aDCFHEmg{izR7uawE`1&+~2I~ zxOKHgs=)l?QwT0y-d$1=O6vj34}j8hCaoe`(_JB*L{)mRS=0znT*A?VZb-prfwKd2)!6~LN<#IOftIcLABS)qTd?zhm zBDiU8(s!?~j~i$2VgA&vWPggd1)~zrmT{K_RQU!+4;v_U5ovqj!^IJNM*-Krij;Ll zpGXS^melRxu7zvJ&ofBEmrQzEyYxO7iel{&X#%U~hg8%R8h?H2!b9iGPs+P(t*-_S zMZbr@-U4vQbtGW(Hn2O_s;2(r<0sR172RFk+dQp&{h)2Xvf1Cy%H6nwCH_E3KhG&^KRtP%mY&jULElLaO zEh<7!yB}BMW53sDT>>;7`&O?4H2q0O|6!@Yx%IOkjHH^jU`D*%-*bHoZ`)xnx2R>m zfX|zeV~168H=J{Ri?56*Y2yCxA&~3%&$!{Bjo1kII<^u$o)VFtMc1yGu-`d4*>|kg zlV8v6{BhhZ+q1X?lk01|IwK3q8oT%N7mpV{f3B$8wiUtwG_OzdAw+Lh8uOGV8GFks z=!5irN5|*)YOw|th@S~yH%Ulb=$w+xbOC%i%aV3 zBcazivp-;jSt&|l7Osv@u3im1cF8W!pNjnuV|0ySsWdk_iKQ}h@~D}lP1BdQT#cR8 zcjO9hjN-DAT|H;*)PqanjBr^9g!u09NYkO?2QmT+Xs55n*^xdTQ1geeYI#qUQVo10 z7(2#&54K^hVon`w>9u)rzR~eOev3w($tr)cqR1L2y~qbD1D*YSGSr&TL5cMq9#f>>Nes?*nFY#a39GcI_ABKsm!81KF z$mXOZ8^M~{lf@C+c=s6T3&bg_?A0WnO*DYG79k>-d=#GYG#ia=txB>fPpS?TI=$Q3 zKH-)c<^PdV^qfg&#S=GSbO7`Dt9jCvvB(c5S&wKB%_tVH%ewbP)IRb;jj4ZkkO?e( zfiNyHbre6IV=A`pSlZU(nLed{A*_;bnRA41Iw;?#fnw%cSwV!gVGz#`^_U&%(+}R? zc8hCDa?*qIwGzG3**ObiK>JvT+5p0`W+k@b^J~S223ZeUlO|K1@4tRk_WI=={6h=s zwF9tQT*kBXngw{`wQ}2g%^Xz`8fnAaAC@~?T#*K?SvL|idQRjT+%>Jq*$(hgshk?@ zr`et3DkoHv^$vEes%(&DGP~X z0H_p4%qK*a2!Vr>tABy74L-Lnqv|bN6-GhigDB-j68+pFFtN?+$bsL^)Bma8F~=A` zfhf-LJdoVG3qEHRy)gfWw939uWkzFNVkaogXFfi>QlU>%93NV;+m@1`(o=V5 zI;HNW`9xn3E9#@Kdd4U?s65iDVe8c_1#_NmarC*w*bzLNYw})Md%$PNxqqzk?gpQ8 z=B|T4Ig$z9OYE{rx_YSLv)e4c%L||b-B_memGE}TFElUYb(WA|beZB9OV<6;Xv8$A zhPRpSKFESnP=Qk=$YhPp=tvKfo75AKelbSakmSHk6{U}zgK6c2kmbS(xFF9*wW7-n zYhA?*1Mk90lI9}SB400_@iXu(wG}N$AlSMUy}zg0d(ONjw{nm8ZEX>5=FuTZs1kLw zD{cj)@T+F^SFZx2zG~Dv9aE&d%q=X{)kDyws{wRup9D$agx3y!4q6VExa8W5BDYl6 zC+5{yE5?pHLz1;^u#<>2r3i}5EzDEwCfjG9_rI%GxP%RZ9%Gs_mpkvC;#Pa zX*=c%>-*hwX6xb3b93s|4ybyz2u+|dNJ*eLRDp+l1;=P$OQpk`I-Mz`JB@DP7UTsR zibc?)@JyF3#7T7|`h@FN31n!Aw|%PPi}eCgzfI#@W_6n)5Q7O z5A04a@kowm9&RIRXBU$bcBsr(?#0H~a89n5#Zet`Y6^qB>iICWhdAS*i$HN}*BSnEE3nq}s&&m}<#pTJ|)jWq2zs1S6oZ!ES`)Ta_%ar?Zjs7&|i zpHSB~Iq8Uq5KO7~Fg5i-`yER74ZT_I#S)p^P%(GZnfk((!*1bnr&1QSOt~;_idB>96!$16v+#6D_NHU&){E4sa7BeJ5_njtYL_XjEtF#) z#)?Lg(|UMRgthEl_Uc9z%UsjR8Ny&&E$=1AeJ@J*u|hsV);vC3QxI;F5DNCZ4;KfC^eLTNFA8wOyw-& zLZLma?b1yREW9^2Iy!~(=e(*f+E=+-@|HMi6tm?j`X#pJ)o9q8{-K;`(krADdV zR4Fz?)9d6;-|Xg-rXPFydX9P8Zg1ck%hjH^o*AZ?2~FnSp7zJM{EAaa1jYklk$Zx$GFEx%+EkiW*cR(kYagzt0s%l?Wj++ASd zLtuSELSipJ6~AkoBS2a4Y&p6kqN%4`a>r|&VdUY&^XG#fNE_yE(mNn25MXsvhfCa5 zoOP$WBLbY@cHRuv0K$XIn)t;IXH6I$El%X_VXEGQ@_D#fqfsWx33< z4R18JT3Jc1J50F2ipeh(I!z@eTi9A|NKCsdt?x#&@=0FnxJR3dTFILx?wmM3O3K4l z_d`8!CxXh&g%T8+tI6q?h{s~3@1vK(SPYv?Y z+xG|x1fBr#sRh#vIQHn4gE(M_-yv?M2m2(`y-|nU_EmFAUlN9`9yGRmJ$PjZZwg^} zeSY71DlZOrnO_yWIwpQkhf_9#chk{<0CWOcOk+(ieIo_AiS8;nSUIaK72~aKS@C+} zWTHT0^Xqa4JGLk&(#+zO`vURnATh5dzoIYEu1xpopU-kK;H^Kc)7Q}+9esQ4MT>GS z*ERlSR5$&K7kR>4m) z;pBJFilS>?h@i}roIG5L_Wmf=h)j%S?}JDmI|rgL9Y*t}D1;KXn~eExZ-3Vt)@<0w zcbltDrz>_ur~Q>w2XQnh>Dz$84*n+&%r5p- zOxn!GzWL=NzbG32E=y&Ke`4!D-@$wMG?a_8JPG=~o+V8HPg$Tob&>5a#Rhb?;HZ{uPXCs# z@$Y`;JP#DjyVZD>D|*zNY!3T|J0i&jxDo#)cJW8;>jzJi-H%+6fcOE;xQI=srZGB6 z_3Ifw5x?b*fHdX~{7MIz&;Of;%5VJ=H+Y|!fCj#xULwGE4iX3%L$CmVL3Q<&`2ukR zn(46!{{wR%d*=7e_<}irGjALF0_n#sqinyuI?0^^{1E>NTKi)g2RPvWhW`;D7XHqG zb&^9z^P>!fQ4lkr;lqj+W` z-BOd9_$Uq;bnQ}30g@bcy_WI5cI3+9B?HdG2;d>C?R6h*W*$iOa1$J7J#*dB_F&Sj zXElT&nK1Dho~Jg7N!-(%r8{x=gV3otc^C6kEe^|uj@gCqmrKc=YU=5H%??>R@4%kj z8p*1qAqEXY8}DE?*k)8|FpJcuTe#D2JDCyvLgq|&@rZZaV`wuE7ZsT)SJ``#^%sD1 zS@d1;c^@6%Ym*!f^aSx7=k)PqLiq!Q8x0Q#azpKMAq||FclxDEMRd7m38=P%@05)T z?Bt9)#2VZg&4!`dVb5P|FR`qGza&)pIP5Zmu1EFjh#ReHPr8)6g-0K zI>OdT1OXbf5O!Jp)3_*ou%eHbUQDgjH-MID>}Vy-y)qjLX*7Ddi@(^xVx+A4y>8=Y zfsV8)e8e)g0XyJxz*s87Wa6ifu%rYNT#Y}>;1pU320}B!7p{epuhh)@T$#U(rW!%=kS&ba@$1F-Wy;{aT>Quwk&$(;hfr5a$89Amb}hOn@!ZnvQEPw-yE z6^$~0?5va0J@s>zVW#3*wM|DSHb8v6eH+kX;M7kzgbeCacx|Au<-o^t=6dCq)|_w* z(N>e$nwz6?c!EE^|ccd{BI$zE)~0?g>VUhFH){oM8Ldk)z-4 z*E%3%e`x6bTBOR&6MTiuXkqrEriK@`Yi2MpLFzn${OPXnzys^RXcB+aikoe$vlK3V4 z#y+e$JQ~6<{Fwmx7t={&KPy+w=?00%wr};zs4aGiJ}E<-Eq$xj2e6qBq*#?`Rd%DV z=&ZbBiIEe2uhe7#2>yD$XyKa^Ls0CZSV<)pF?O``=Sl*Z4d+GRFQ;ZTpOo#bOZV1e&e*VhpwPpF$+7El!%MEJ z^?I&{h(mOX-MejmE*>fT4T2B&;6zq@aG*%o&rY~cSC?eU@t>Ulg8mmrUCD{Y!DP0y z_c_OMG;=TGcE~P*6?H{LTJ?YWhq#TEj2%ib#dZP{T~UwWwekjhD5d+CH)t-nMQogU zNjz(GsPWq4w9}{}&tm|f1iX=;+90T;Q4L+=0HxR~Y9N$#QbAT?q91k)jnDEzO}?f* zrX!7!5Rb?~(L+yUmZh^j3SPUk$~?ZF2)+10ZBP+;@*o+707i9$HML zXChN)6EwzR?tFnbE&k+F?1f7v;NI^OCw$`Xz+q=K3U}AKC|I1DC;)KE1!4%Zf{taJ z_ZY0))H9waQy5w$xM@#~1TOkUdMf9lp8y6r^USihxU+`@C8}an>A2w@o9;UWZ5Ob$ z;UnAe8X=0tNIIu7QFa8Hs($kyOvaib={wDDlB4~(B#&~ZE6oOlGk5h#mNK-`ch(Hx z_kMvm7u|=q_KchNP!zU|_lXtE%Ex1-;qL444@+4=uUQ6|@oaj)DcXjv*CgzHpEsSi zj*fB9svAHqt>|y4CrtU0&q`fc!py>bkDv>8Z^BOiW*eXnJNlMWS89pe0!u+uOM?=61}$Q-W|^{dulvOE zqq5H_){Y;w)$e~M)`cv0F67dr#?b4mTgg7TpWIJ&(K;{wKkU7CSX0}&HyQ*5MVf$g z0)n8@MCmP{(v>DnstQW4f&>Tz0a1{SfS?pXdWrN-L^??C9YXI3HIU++uCsPo>+F5@ zKHojhd7k^+?~iDh;v~#D#+dK;wf9FKmX33Xc<72OJ%0009s7b%x*1wgki<7L*ZuZH zO(CElhImM-@T=%z{&y=*qprCMaOBPbnC1~y^Zxr*iHe5eM_LqBQJ`X9?{HVYJJC?_ z!w3G}#L-J?b99b8j$03I2xV5?eh^21Tma2s$g? z$*p&u?bU_Mpzu$T0&56b*g33K@%rE+)y%s&8b)-}CNx3=b)3{}(>J-sbkU91o?u-T zJgS<2e$FZ#hqmz_5Bch9LHKd3Up8PQfVVpMdVZ~Uu-)}W!s2xXG~038V%3;RiWA+k0B*6gUlPCHU87_t%a zAqzfxTeI%q%qzAu5+dnZBlo~lOj^VhhY!y0{383@^snW(BysC8FTa|sAtmLPWA#El2_z#w%5ofXL2n){Q=~*N?{2FVx7^ zfyfsujr@ll`_Q#BGcRx3UtY@Fy6^6$Nb3)1JNC3h0B@a$B=K!vqYoh$vXrO(6J!Al z+|GhE%Lxck9RLXkuL0C^fYzwbvVt5Re;u;fHFqKo^81Jp(L)aU({3D}T9td9&$&^M z*L=-2_Xo{WhFBVVtnWP2YZyIhj#<-cQJOKHd&a?jk9ibj{>_RjuvL|tgbWv;>>p8_ zW>Wz5^k`7wmM@w~4>6v~Nde3XVJ*2$lb+vwvq8M=Kdx)Nq zAT7K6aCH)JkUZQtd~%AHzERj3S^HoF-iGT(KjWZpYh{TqM5b&wZxCkXrum{WgZlfyuN zubUHCiJF8ry+c5+%4s@rx`^@5ws=bHMVHcG&ri;N1Dl*$s$iBbyJu=cea(;s(X()<%zTIyA>a`LrvYwe{*Lx(3xI6-?a3tDVX zF7y_r{D=+M7^@dM^eeCeBT5>LAOs4+ z4^jmB08jz2*k3)hB!mFl=|P9&1DX)zo)!$?@*lqg6ZkdGMUDcC`Y)*#i3O0Sv5-~O zGIx>Vrp(mF<-+G>No5t*Z5Jw~#}|Vl-;k3t%T=^*3!qCUy@h5s*2?ee#5iNSKDp@t zo~;LXKrEP6fW(vAgYbBaWU!q2{N~XRO#O+_`!2P!?H$;i$Xmto!^ta~2R^$;QCN96 zsi~#hF>RH8LVs;qu4CBp7z(*HTzb;|+2lN=UMzS2WGh{@EtMXv8U}NMRdTWoE!RDD z-6MIMdF)NomAT>WYdyAJ^&R=f5J+Pc;tXX7MC};fHt#)078_08oq8vi#Atuxfj0rU z&5sTnWUQ)4KrzR?R@;80?+2PU%7!wbR9_RXGytFJ^cQl8I-+k{~v2uCt1~+Z7QJmh^UQmpl8>K0%68^%)tF zaR&~@-}XjDg_uM%rm!TebjD(M*c0gJ>p?OpuitX?0nJjIqUemVrYMWhLVI^XiBO(a z!Kh>1=Mfpx+K(yEXg^VmDuyh?DF8VP{o25Sg8T}{g#D)jr@^$}ZhZ>sxRUTHm1*7u zKwiIdl3*A(`G5`(=Zl>N@99@VlW+0r<#1nl*pKP)&H9rP<$qeJwxDp;fi*H{eslYw z`p1si;1;^00xOUFoDeei^V^mp?HC&rrCPqHRiGkZA(v4EsZ#0sWH|9gfQd~x#9gc;d59xwifsF4~6=Lf5CN# zm;aqIjB(-g4}DwC{VKVBo1OpYSNR9W$XhL>fc7cb{gCaUpCBcBoMGdi(p<|ye^iDc z%KzBAMdu<;_^+w0x=O?h$g%2P9w0D__6P1m=Lj~9FttmdmN`9bRKh}ly50x{KXySL zNUhOB6mx;jlYapS8Cy3j_c)^n*q@-Rm+^o11qfhc|4tY2Z@lOHm(i?%gN{w<2u`C3 zp75x9$J81%nBja?r&E&+@4nG{`Tk)NW(nO6MnN`ZetRZ2a%G*a_g{ZF zmg2JMzi?;*;BbK^!Ocx`=+F&#-AKKVT&?+%YW{~$Cypem4tzI#*x8M~{>}aRUl*;e z3euSE#Gr+$vOw|?@gDX1BFKZFq?~B96w71~_5)hJ2V{z=iWG|?U4Q58&xzVtuKy@| zO=W#2TMqC_BOL_Wv&- zIR9*i_Ek=IAqCtfLp2vtfql@H2Fou~Deh1+q`uju#Qcmr$n;QEZ}?z_war}Hs4k?f z7j!XhTBe=#jN;jpx1_wUI)PA5%X#E0I294R3D|Qe*Y`{<1zl)gran41*0xYF9yuGo zInudClt)9}x|~Z%hrW3*T-=pNJ2|5)_R%PBGF*%NMcC%wx=IxbpdevPP5xwKa$3d$ z<0fhto6kuh<8KzZbrvS_NnrA}}NOTNg#pU)zk3vi;U;1Qs*jcOnI_4PPSxi%j|h zOVhDV&WphE8U)8tYygb?2#dMW@`9uD)0{-=H1I?5>-@Cz% zHDSR2?o%}&TI*vTTwLRQHF{5;MU7{lp}3MGb?LpXvgldHi^md2c`IgJG5p6Vfl)t( z4JTi}-tljL!2qnIsm?|C&@4}1>fSM-0cD1kzy1JS-EJ`*?gTI6e$VGzB-mt7Y*>+# zuMIS7YcUX(Ah;J|6XJ3i*X$CV)m_bXb7KhpLtjUhL~!Kd_N z@q!)+LQje^H}~XiA3sIA(EAx_Cmn7B%Bqs3-_&o6XIn>#GQ1NX@?IybGhf5^Ts}4& zb33Ul{nANTdffcmub)z^tt_6Tt66g)Ggcd9peD|tu>Rv^mCldN5#5ouc#@1Y!S3?k zE)c^|lXKl_jFqJ!7tN31%cqfP(3g|uccnQpJzbU_2o$}ssd3Ltwk<+=uX;va8!oG( zWz@eIt3D5sNsg_711^b=% zgPKQA9ghaJio1DgOC!RJm#DqZG|;rjI43Q_spr;i&hf{4eCAYRvA({4ZjD;j3n7g* zYNC>C^1o7rp=!(%|B4mS&7WZQ_5Q((_=0w{v6+KplZm2K36>WKt5#16nK{1S5meqF3Kuz zyfdr#j5r_T<^QP1R`2(J)w_Ko@P{I4o>+?zV`^<>!rsYwZ&Gza)0G5#S#tlEWM8W6 zDquDK@CU4@@yZ`CGu+ER9ry!?<^=RMMY!&%0L@SDTm5Q-N|u`yc74${TiN4DAOJca zpUcV!kQ*`sY%AM3VwE>M*88O2eds*C+fLY1hE^iyU1+gR5iKPwMg2==O0nl;t32`0 zV?^OP1=-_$3l;wBpH1~^EE7*SE5Tw2O_h_xw3Adfboc zxJy4lL(-9&RyahOqQ22uRDeGpRuW~HLioI3x||YCDqe<9q>Y*Gl!y_mQ4z~2L! zwvZZA_@-v)%(!`~zb-BZvVCp+(6VDJj2q+9T;#*Hsh&M}Ncp0cW#OqLHO|q%YGt%4KO94R{nf2CUbF2jSN*U&hwmixj#2bzhS+N0DL!NJ1+|0wTR&;xi z)G2cx;JzGiMOPd#dbdOW*;Culxp*?eSi{41F?JWs6}{l!xY~b-yunUa5zOb{x+yx> zak`;Vb&Eaj8g33S2;O&l#CSwC6Q70Wn>}Jk<=ZGG0V90&sT>wOKjE?m_#}46y5a=W zg8UY+D*X$6=>r@m|uyy9ZC&7e-5RL0M#Z7jwwmsQ}sC;Y!j=k%LXs z1;E>hQitrOQ&|*)7iLx`@gmN_%S92gW4Lm%{W$W z{LRAyaC5{>TOtU(^Xc5^8||uD$dqj}gedm_7K5`klV5G6;@1ja?Hc?Hf>-hG!yi-} zi`QRwB72EJ$6QlBz4k5LHu=9(Lj$c0MpNl)lEk7|O;s$eF^o@tWf`YrdYkrCGyF7K zc5IUIuKwTw!9lxvu)@@%w)|IrB(+wCB#?O)F{ z>R^nv;&~Ib-R66qO3lBBuWF>)^F42MQSG412G~S<`~*$#x9N38JTx9Eij#k`|J>1M zAy)LXpC=9-f+uHW`u!Ql!(wLu{hLS*hCJ2+7f)*s8ZG zI3S`^Vl7u5P`jr~f19llC|~3{bL&M^c10AOtp&KmPtyb$*fnu(k++zEZoAhw^_z9- z$evd?+Td7?yRWKg4RpzO zqs*B5{mV(<+CWbjo_09H{D2&Ho_KbYuTLHW-z52U`wz2QCK8VC4PWfL0^LO{8gTmX z&fD*Z-*wOX*5Rb{Pok7-lYI{CxnUcCb|`GaR* z8t8oJ%ibN?>z6lVqi!h+-VAGICi9$85i4Kdfz8aM*TvOGX^7Kh!GA#7jK%N-#4lNEnmn!AMpmOu*eNnG>e>%b(gt- zfxkk6g~jW-XP8hbh)dI5x?4H%R|0Pj%Q<%BrXL8T)`zcx2rg$!+n;fK`leRE9Px_f zBB-fDJ_44w=rW&R3}eZSknMF}8C=l3Q&-dCjJ%b*^+j^@RhqHc?0V?)W2U(8 zm9Mo{W8SOiA9nIHBf7!w^q>|%zO!)7PY{KmTZ)W2k8$f)!c>>Ry^yQm@0L>S9^Bx8Dg0yp)5~n|IWoHT(5y{+xeQ8 zpWp@6+I2{pKvD3)C#iE9rB$`k$=7()&+l}lOKzF~`>*f+tn&T+I89%JIdXxHkiYG0 zb;6TqTppd8CN!_a4U_{B$pke-x5y;mN3w!h4ILVsXipUGlT}j>{vDAAbF*M6mWgUr2!S@_?315=NklcCr1FZ7i$TqUB;bh2z)gN z#b35g-qr8OBZ?9L=<`Uo-M0N*$m6eX4>_;Er=Dxp2@XL_SQv4=)uIq|bzsxOK3gPk zwmE6*_qzxQ35|KgcC+Z~Zm1*1E?^Nyl^CHV8)fu+YQ0$l-_BuhqtXPkLiZbpp8Pe& zsPL^q7f*rrr9(x_wbkzzuDqtK11sjA7hfZvUu|U!+9q$7obn^{cEPRg`o|xUZ} za1MRj!^Sel913iPK25Od7IcfgeAAZ(rSK*EfcZ2qpak3CW#je4^(Yao?j&bJ!=A4@ ze9HKk0`IWvGGW?W+ZMGzFx4IAqMNdhBC9MF$Se%|+-zkfLBUVPP7CD`^B+m&9LPrd z<%E8T;2GX7&a8^mZI*K<=exZ@!uBScS|)iFV(sU@V=TT)b$_{NUex@~7s4Pe_P#Is z_0!eS0EJ%Q+W(?#NLC@2>oxPCkcAxtN}e%B#Br3K#gjS{Ht@MC6)C`q|2leT=Q*S0 zX9(r0N?3T;QMta{X1u4S=z4qlB8@%436t|$5azP*agNenulsBTT5~eNX~W;*uU&5`)jRc(ZFq z%#1_uW@XqDU=M-@U?)>cqPV37y!k4Wi5NJULpTI*8CD>ARp1-);4fW8`k%%DyGK1!WSdVtYKx0R;vA^^cPl=!eYm!uB(Jt@ zrig7opS;!0`;-b922^e>5B+bR@ds}xUc~pq*On0P7F72iE<>O%kJD)e0L1nmqi%N; zS@_Q+)K8&~@o_z-#BulsSA!vEXmpEM?IDwRQd#YxyUa99TyG zl`8Jvuyy{TLssM*77j3$!Qu!iDRP`vm#-&(nOd*_{F^D6pFop8wS#ssUcA3JyD!bf zCk5YV zjXZT}thx!y>MIYTN=@RWj(=Kj+2ZvOb{#W=Y?Zxg^?4)JJnC(RQglJeSBuA9;VDIP zE~E*3uWoO4-MGdVvckM0H1jOXfI2dm@!6ATrCL_`e?2Kb&zmcoJ5QlFS$ZjYro%_u$GStBCQQHE9jgkBfSX! z?aC@!MA~uj6~Y2AnQS@w?q$>mZnn-b$gxwx6w%gk6FGCkV%HVMXgMa*9W}yaVrL2} z(Nz&Jp+(Kxz6&WT0>hS#`5K+zvbJ$gykejIIZv^FD&IA;jFNliFdsp3_~?p5K~0L& zRuO$-bxqvcMIGs-J-2broHGHpVV;Y5aZ|dc3J>d-YOIfc2!Nt!$U`lzLcw=t| zb%0_fVK=clskF&1jL?iF2SA;ZNDw}G(|sxJ9KUI9|K}S|llK3b2>)+iKL`G| z;K!~}@gBD3QSCg3v3|RwyiwE8%VMX`-m)BUI4mRBh~u1R`|;hD6q7wD%B+q{`!0?& zxL(Ri`8e`29r7`Q6;26kCwg@O3&krJey_6o2@;M5%PcB{h;^EsHV*7sep2AWVlZ(i zzllLXVVgkN@ZhfliyngV=N}9y_y)H48oh@ino_T$4Mr*u?*w^=c3d7aiMC(YTakEf z{)6PI*0P)gO$Er$VG%wh`KWGRNRISNR^u&CQeT zVZ_T0r{r(8BW6E)vEO07dm}+W%-P$$;23a!`cwdsF8er1JSR489uca$nR;!2_iKr% z^y01Zn48n{PRy!Sz48JFtiO-OU!xMy<1!^{4!}TS5og)l?ULaing=yQ&jY0d>Iz;>e?0w#=wFOT>&_OaemB^=}cW?dgFI2&$*Ju~WFXF;?B3eCf11W_hn ztay|>cneaB7x}V|opjpfR{05na!teOlh*_ew!|SeDzsZMCkcExZ2p7yws1I>4?R*@ls&# zH`4P~lV`==Ef0!Y814id;mqo9u{8!ft= z2b(-|o#_5NY5m;d43Ak(tmh(hrk9&H@`GN_#_ z_1gxNX`UD{FaPYYNP@WG&HF0dI`R2np#k!>(ndz3JT@n$nfL>IVGG=&WA@g;)bqJD6Tv-DFrhe;UyvSf;+0 zu28&m19$CU-pxo(6}{N5a5h5RTxI0*Yh#iZE?kepeXKus;VyxW5mk+>(3fM)3Ks^q z9xL)DkfpwCqtv{1vE_78R`o06#O$Lf`^$00vu$Nct|8KNA$J8X(%N5N8i*IR>)db9 z%uCS__kP0IQLbQsmBqyA)eNVj)85%gURip(?L0?#9E~pNfWMVgZ2yFIr{S!_)IQ?I z+7xtBzISy(ce$#b-t%jP2?1TEwW**Vbq42Ss_d%VmDy9+jptN_T%aBf~YuuNRdY7=yl@#dJEQ1qGI+#ChCYDPI?@~(xPV(!#;LN zhE2M(a49ITArw(vumG9ZBx(Q{;@GD-oRHsydCl!cHjAzK-uMy-FQ9mxJ=z1m9gAs} zi^&b7W_$~FeR-}i6Duzm2a_1!O`4c)=X(^X-cl@+e4@R$ihuvE?{tBj_>S>Vy9sf^ z=L06Zi2Dgrm3NP8 z@(fn{^4bmFC;p5mvqfofMl67RhZ#RRf9HDV@tVK0gA9;TLRzSL zIg`ECX@i7UPX*gS+*5l)K1UuI(%6AjU5vQfZb4gLHLRVKw?5y@`-w(OCXS8$@wxe+ zgFb(0%SnqtCiGDZOy9f2yj*cHrBuZ@oSuhqU`r zAb~0=zSr!5a_Bd|r<${tt5&TrgLxMYtVNr^9K&Si+49o2G0D*7b9cVH3Vz8G;N7Z& zP7B6ALgf|He^1z!OX$irFa2^)fIiZEOq!>{Py4^%I0VsRqvB6?wupqtT&oQ^hLX!h zmM>SUVZ8l#rG>e3t=Z57i7*Ubd}aB&>lfO2Rt%qDpEpQ^32fy()SpkURBbcJ-FVQm z^CJZs-(PY0VW7}+uBc-we`j0H8zXo~p~A>QnJvImGk5n6Z$ zxgOvLe}p_BwM?F6972uMYk#qO5#H+UOC}7N@MhFrb6CBdBVwl+5AYcm{e(3#w>I{L zAPaqWqJXljRr_sB8vS(B)&A;k)vG@WSX{jclcLPpJM(ATH5MGjA@ops^XC<@PLeuh zex)kVB~K%Z@SOf7&J5&(m`l9Ii#l%uZsan9r~_y;u#$^-x@HXPLO3%>L^vRm5Zy1hg{%Wv98r4>@l@8~T-AZ)i zm%<3zzWKlcV!t7#2fbs*p=2wZ?x7GIn}q)mHkfUZ69P@@teN}?%1e=JYG(L4jCxOB z(t{_#5Q|U@4d~Gt*RdCm+RCQ7xo5bexF0==5-N{xlG}6lF){+GiTlK}gP+R=17xx@ z*zc{zTo5LS&Wmvm z_zXzaC_7wbTr<&(IVkXJPI+`h(x^xwa$vq$Y;;fH#6S48Yyfz+jl9T@DHEoOl}7Kh zne#MP-PUH)7_5HH8=c5?{bXs9;(PdGR5hTFA@-X*uSlP>^`ZVKC5djRd*Gq_@=KV( zTV;{Bpfhv|+Y#6Q$?pw7=8$!5K9 zk}EX1XRX&t$@xelug#l0@=bW(yT&sZ!f}{qAKkb~$K=h3XSXQd#>1-9b-DL`ysDm@ zcIweTSdz8We{%@PFbH%*t$eL+y`gKdH?|8{ z7yX^qS)DTr?O#qlk7G<loI{bKs>J6_&T88)}H+#tTsXcK^#`B0I2$PP~_gOv&yH zwO}@gnyKwx5Q<7!oKr5zEZ5Wnf9&21t&{`x)FsLFGuGU z?{M7{Uxu3LJIg-S=o=f)Jg;fo{;p|JW&N<{`dE10;+3Moj-we(!>@y4brH``brKQ= zSW>kzZCMsMUQBT~%%`!!Gy$X31D)}Sq>R$|BhzuyrG!3GsD-uF?zb(0Sp~UXA{hH3 z3+p~<x4y?W1Lx9|@j6K%cW++!aZ^!G%Uk+%>@0Mkd9wIYagN`4 z!xoDK;rN*2*)o(V6@N36!VN>F%h6M8@2^3l8M{F6$qo>1>|5x&D{=2b{I`98V#RIPaVAMpLB;eoPO;*W$O3FheL=% zpj`qqMugk2Z1`Y;MMJHosmaigJeMoxK$^V4^SN=Te#CXzTyZa#mX+?Cn4B9WhA-Zg zBImuBkfe>gj~bUCRih9O0H$Hdz&av+?Qw0YX#>A3UqwtP zMa^<%-^{ZEcFs4$K1UI_`=;^wm~-J9&AaM$T#x-@I>X^}&ud)(!1Q}4W@i7RrmS18 zSxwC6Y8r;f9ru?JQ^s)<=W}!3WUR70-h2cU;mu5P%FoWLvZ-u#6WNck&mvL0sh4Sf zC1HASg$H57V$&-t1A8Y0G_C2H&j{LPX5@#H^rbV(`vvYRh@SxJ2Y^ifU5IE&=ii^U zr|Sghx@ib3SN-b#bO2h@jzjiv&ORXS@7Yx80lJiFxmUkBv7VCvQnBw_{fDXKy504F z_>_D*6aO;XQJMDwVj|;*P_;%`Mid-5p zDj8x!T7|nCmE}i{1|KEWqsvUOswlEP&rCOthRtWo6&z=fdKenqI{E!@lIpcs9@Q%$ zhRj^yfiybGFx+C_8yl@l4l)-mS1e}-xLX}QJG9iDS|3{6XU~Z}v~?PJsZb@-BfYIr zNdUh%Vx3%>-J2Dtlh%XsD`Gm?&KVi9Tr8C0id15h_c!zJLzYvGHFsCn3!(BX;8bP` zbdiq=Q=;aShFi3SgK}mq+}RG#{eB8{LtB33SZUf2;1q^>znx;Ae}Mj<@!h|UcV3CK zoxC0`bF;~ih0Hg|!m9`-K5$4iWcOwKhxd2u z=ERBVM_LpBMHN47?AmN9<~Ko82NmqZ^WfN5u?yeGfK-eInuVZUMfRxgzP;j9@=?cp zx+ewjt%|MqC4XjgWW@sfQu}SE>arwGz66DD5zX$YsyAe;eaDJ*$X7(RdHttwN)wZu zcZ$=fA+wL+NA|eMMsn;TZg(q>|oTmb);SeufG) zPfXJ)te`lr_=Qg2-h4IN*qhIqotU#};@1|RKb19-HORairWF570r`V7eNP&QHtP!k zGvQUy-%-{>10o)q8LSS{p%;uFyQqNO&^NUxcygtfPsNcAJpx;}xaJp0KS8z54E%({ zG6;6t3fq39RfoHuXmM_M@un!R$(*yH_Op0W=cDP&>C}o92iz^Z;jNSBgTo@nJzZ7@ zs|V?-EEoY)Rj2~LrLsg(>16Meg7O#Xz5difclTaZwkYR-7nWMY&Q#{QvO&)%#u;w4 z3i8O=DIT^2jy4s}%ok@*^9qq?iJdC~A9m%u@>B)Y+?@oyo)D6sUr;=}P;>Ca(%RVg zk=;w3^+}KOP7aQNQW@cA^$q4hXF;aGUHy$HV=rxWq3@G@yBMJ@wp9t=rLn15x_Xh2)LOspn zus&BQZfi%NlznxjlqilNZ$gIHpj^VJQ(oLQT_4pA-I-B1G5-d{HnT*%_y$VVA|dlG z8buUXYB-Cy*i^s&l$fCMl7Eah&fF$Mfgig2o$0*7yTa)V?#@8fTi<5W+fFR}6v_L; z2uwpn3YfrU3G`i-YzGZ>Cn{#MJk`h0R@BsPG(4g7}@U+Lkft za}tlE|7-M?^BkO!HF_duIl_`f%D)Qts$?Iap4ca9xr z8kMyhLfRy4vOq89(0Ts9ya~MUxpgFV*+(2U0t73N^AkXCh`^G1lz(dYkD0rFLNEPa zz5GAX&NFtv6o)DztiGKd#V@Q_5AST*gpI`GGx$8XDPR#f^82m+Ucc%dLLOxG6xhd{ znPK(yBoKWMPJkP3c0uto#g*eO7$FaSix@gIEw96g&zJRAII5=4MxAa6nz?h%L6 z_6fjgf?SAGs`zVZlW|0NF<^jj`U#q{B?ZnJ((jknJlK9@E<7Ze$p1W=_^jr~n-zYd z4`(TLkHss?cp3h0h)M3kajgHrJ4^BXqX*`4$lDMFSk23wzS-T>3dH~6&pseFKE;O8 z;CbNecp=o)6DRzG95Xxmw1s`Aj?_!=Nylxl#)%h^L34T&)0>1U7FpS(H^s{*JtWYZk21(e4!5y0$o z?07<17n`fx!G}jySBpA#dD-XTUg;oV|4)b{e|{`AgcIvlB$OqQToLD&sa1CeP;L9b zx;qTl#@CxR&cYroh>-aAD@a71lyjIIE)5B@j_$B{0zdGX#g8a1Dfy_oy)@@ufs4wG zR>=Q+cL0+tJx&kacQ{9+0@9z){LkL{17f{RJDdzJikd1zad6Bvr}9M!UCw(fA?>2x z|K#~YyiyHM#5k&v?@u2CjLv}DYo8G7euA2+fD}=nJ0A?P2%)u2<%(K3Ze@dRRn(B+ zs1D3b?A0J)R}`oCk29rK(^<|m*f*X-5>g;`9J)`z7;WS5e;4J3NJxhStH zegF)1pglA;a9bEpzu4j5jY84{n`ZHX6_`Nnu373-)vm43J~bE2yWxo||WV@=mIr=`>CG^eM-u8=NT=O^k5h5nNm^GXHV2=;r0&U5)Ya zD}&RlrH@u2frdDR__R_bgXMdPIfL&bCIo$2=pn*YYc zqC15v`#*;NaqdR_dDa4fqXfp2SAU*Af10(5;GBbhJvu-@qd&_Jf4(_DIQAc=PdUjc z{ND`C#ghK%|5gMc@WO-){B4f+wW9^zXEr2bj1`R@#y%M8i(yTPoKk8!`FKG#MiE@_ zZw%&t|Kt8!*Rg>(3}=AZpqn3|Lq0DE%yF4r-JRr$x!Wvt_gw#)-z``D5EdyOaY(bv z_`C*)k@QnXlCnk3AXbXS$v zk%LA14xoNk$jCwTX-_OJXty_9LUHJNN}?;@ZgFbLj@)KxT0-^5EXVbiF}O`#LEa&j z|2fm>UwuvTbMSwMD5mMOoQ&Praf$FbtlwAXmDsfXnkjnYRT1}d3YrU)m$YO)B~N0- z7l3NaaE~UcqC0E2SJGsbretEZdO!1d2gf^gPI#T=7ylb*a8SLA(~&mg`sK;kSFi0J z9>0Z*$GENoGKVLYywJHL(ZL;9HxgvOZNuGpIq@V`@Kmrv5-@dpo8kJ&$5I*4yTNjh z?6y>@lCtvBcqPL!eM+va*0EiJy70l?RYpv}vch;@%B&Z~SHP=WBecRX%^2 z60n{&sY8O|q1#UrHf@u#sR=6;DQ+YkJ7b1(!No=@Vx8oKo`K)gS zWK{w3Z-d??D$COo>xGa9dSl8KV076cxEao{W?~qVy{V&`+vISO#=#vHs+nNU%zd{RdeTL`Iaj-kcPbokwWo>y;nx%p}u)!o0jO;Wvf0>;ljVf#61>}rw zns5!Ev5GhOWWDESm1&^O$M;nts0o>6ry(~K?pr2Px*VJ}0nT9l_HA8V>kDEysyhU* z$Xp}&1w{OAkO7YWCFJ7-{42-9<*zv&jWvjv-@-o50hQgK5_j>8z=GUY4QLsTwEtk? zF7kC2-dZi^KV$n#f>}>NtT+kHn(kScqcgEE7}K#n=W*+d>X?9KZv38V8$4L% z{p5S1FuJ17EURnDpZrV``<E#}sfEMN+}1zxxb7BJi_bDGu`bSIHAAg?m> z8Ef@t!saZxoHdb%%bt!Z6JS?zeFcQe%f1M)@Cjd(TNF>q-?X@mO)P8?4xdb!n+ail zxUA@wlAC!!#@Py+yqcGCU2i#uskb-GTOh)8ICe>O6nRlWXv31W;$w=8IiD=hCE~VN z!F5N8`TXh|GE1oq1?% zCf;LzJ(Us@D8=pW?i=g-v}P`R?!pS@6WwMlt>w1Ez43v#a&6{L%D8WyyxaUipXdh* z7+<#-HnsLWDuU0`zixf9w#b{ePM(9OM`E2&lrEQ`0vf|YQ!jYWlQqi~mVhE@hQp(t zQoS@XGCVRIoHg&b5<+%MZOfHW-#YGq@uc}P_G4Qbm8W`Ush!8k)N<-9lxW!gpkuKL zFwng@S1$JuquDkqS!FJuTYYBY1HFsmN7AqBbu7C zPi)^&=Nr2=uesWGqdhrT%aTX!h@U@LUC$r>OzufeRBcHEIX|t!MmVttle)RcImC@r z?zAsuHy$8AhRVP6{64bJT|e}WGv+HY{}@NfVZHgY_S*P{w>$` zIv?EF4Dwu)Wk@pflc43!F=Cy(U3`~boBsRwkJ|ex-i21|98#?_d`LbRl^+m~hl@JJ z!3h_~HpKlcES33>2<;v4#cdpTH<@s^$X$K|k9;V2Y;OtaKj}M)2a*dk?*jxR<|ioQ zOR?sW)lX2=94&lI0KR(FbFMtO461p5^88PbHV0($eS9bJNA9LC>P(m7-u7vf_q9AA zgGLS5Rxm#V&ZCbk&fP@XX`j%0?BXbLh@lD9$cgBvZF#8I-eX{cnA-4W(ekw?4I}pI z_Q8kn1knoz?!ult){FSv5P2%@ zaA0Fewr28{Jq?X7LTb&DUDfGl2KW!|OjweGlHk~Z%)2fbZGY#JtTLD1k?V+h|g&ucP6Otegy zYfbxuAKFjWwGsp_=H`2>TfD3s8yhxot?6I4>XNdIpgx}QyIs&hPTOM{BpX~9x?5F?J6z(F+5NTk*7cbyNB@#ujs~sX=f>O zA6H!BqEqDr`-Cqu`UDMSspy7A@S*p$wi7=+$ubbU$GWU`lGr}CCl3V-Ene_b>4Jk7 zN+4SwS9THy8e)LJ{>S!#&Yz(3eQzU&dUL#{h#$rM!`CeL$cBD`HYY_0Aj1g=rT}jK zDAEnj3K_fS250)OC`UgB4D>SArT8_Af(EeJDt>3lho?}r4E1rlp}4mr3m@Xky!jIZ z&)mZ|ivVF&Mf>>3siO}=%MV;sM%-{UMX7)%qJn;>3D1kF6Pc(otExutd|o(mG@CR_ z`kso>$iJl!o&(X{v?#;SG)Xkdp_gjKeq2dm)^rkOnYa;n4nDyZJ=-p?hzNEr2Nduy z(Xm1AOHQ|r&MBC6-DZo#nVvOQ+mJrElF5)0+~)Uu>>Uv~pZMCfb)Hz{s}2lInn9lv zQ&U3b%GXg5N3m@_wmJvz)N3pIZ}%CQ1HPT*p$d=mF^X}3vl6)+YGKLGS~pCaZ*50U zweh^vyQ$=?M&2m8K3FRf3^{)&Lc1|~y3^orwK2-^yTLxmL)ZCb1EtmH-p|*3sFw`P z+On^>+CHGem7OYloPv>Ha*vYF1Pev?rtk>kAIug#{-EL&eDX5-p>P7x;T zdfP%84RpJr!91_<&4gxTy^-6>2x+w7S|eo+sqN=-3f1kljP}&9sYw+^O}8he^5~o9&kt zdpNV6Rm7XgeeE(UUGww&wR>DhBard}MuSi!wst>?0qQ$X9xQ%0b5mlds9!lIL0TAW z>l_e`za-uMnQ^%Qvx`zJmuTC&v}DzLlm7#R8`#Ek6!uZT6fiC=MOL^wOZO`iFEP~& z^OFS;w!aXLwm|b<-wN}e=IWQeL=h-_zoiZd2S1_b7CLT*lwPR^;=&cFXx#hY(7ZsD9J!RM2S<1EidNBAskt z(nmj!%f*W0FOh_GObOLnKQ8r#j?XUL3ON=>8`dqjFykdgV^OHkPkww@SBdn(({E7O9KAq$Zj{+ENt$@% z>_mLGZ67hc2;Z$e5ouN67_O&B$wpk-F_4tFA71)hjqE0McupWSg+e;fb1ev<>06#S z9Kp*?ncCzQ&+MKq?${7X&1PQ#mNQno_88SB?~q^q{b_spyuJ6-=V!k0k_5cllH!mv z)BiehV)Z&`MZS>NO-wrLU2ctbwLiOqSLzkCLQKI1RpScOjfay`%u%Q0*r^8ZP+174 zS*vVh9}p~;y>eedS5Tw9;IVuAOr-khXsch*)vFYFE>e^V>$(<%;2E*)M#0|-> z*Pl4a|C*$%*3%m_s`e)RWzzkPp8WR9LH{b^SIgc@u?b-yCvFhhZutGTlT8$VKwENO z@fv_EW{Cd4h~UfkvG3s?#~h6*@?}TX*qvFnx;G2E?{?rQT@Re2Ft40RS3AL!evqF( zXuxzlR{YDocvfkKTTc)2eN`4N$=43~Hkt@{x0 zI0|vLDgFQFB+|;QN{_mzWm8jWz}gI7)^l=PGs14rDE>t`kF`9i2VLs0#}#vX=WWy+*Sz#$o+I084Pk;Xc0kY)=Bf`D)IZ!ez0+X-S|t9>tsP&AKixd ziwpp5pbLwGC8GzWpQZV*2zP=VX}kE3;&8bfsT;<{w{C|6!BpRnuY7Ex$|qnCQK4C^ zKKJ{ZT+UApmXg$@oNy~ifgZ=Dsz}chDepzYYzMf)pYYVB==)MEoq5ICgmQ_R6-D#6 za=g&|ke!ox*XygP#+hdxl$BQx`ZRO zdLKv1o4a(AeSXP?8IpB8YFHcp{?zl}se?oA z> ztykacd;RX`_k5rGzF)sTn3<0`ouALS9OtpTkN07=%vuy8WVKt^>}r&_`cxE$()&xp zPw370P&Y4D1>h-ml`%EX#-$|SJ-E~Nb~4YZdSoIgt{X38Y7^#*?<(!|vTuygX=^x-Oczz@@JF1ZJ?*%C``kZc<01h?`mk zo!IN>-#5Jv5r6*t*m<8;W>5n{_a!#3bvk?LsDfp2K}qm6 z-}5pCYBW(^v)g1?Y_R5}qnVdn`729e-g+3Tx?g7jz%C=kGo7!Y;`H&^J0n7dEeH@y zn!ry~l|myR4`{k?XDj(A@pk`;!>7KS7<4{5=jAJdYYda-mcl-5d7lhg3+7{HLP4H+ zAZ&ey2_Q^pq{~W9@y}I#8&-BijmM`k(8&$`*29*pM8H*a%ok4V?w+32(X;(j5i#p* zMa1a-T-5`JRN5NMQ5;^JgvTc^gkkH-+vgEQ=A^U3s}~d3#KrpL^WYadKKAD=Lc0x6 z5~MKxkCfM$1=(45IzKTT5toG2tlCKHdenUi(sk|jINFMlMQOfN~PKlIA(k_?1LzI+Q^F~;a$ zrSPF%J{>t_O;)->{$1HY&Hea_UJWJ#tBFm z;p57T*I=(p<+7}c@9{cQX|4F#{du?85x%Mq1Z(KiUwe)34eNXjh}s>F4$bYV!_e6Q=$(QAShuKN$f%I0 zf2@T=w^)~*Fpb6@%CmrnS?)7J)rvYSE6%~0{2a2u*^i2Z8AKpJ3g?h;)v@B9(D-1jsUL$F`YA2T^-BPzD`c9nA1Aa z50=}-eANjiqhe`ti@hH`&JoTG6=n@--JC1qR%r2!8gxH&jpjnK(P`JdXVQGn0~Ky; zQWjCKl39-1iLdr76+=ntFkRm#>olO}jc~mHML4n1o{)S>WhA7;^T1iKVb{BAJ-UZ> zuMUlLQu8>lV|5*ATmrNyV8?Et_Gm*%_r!__OaV9d4!lb>D~8RMjp7cXm{kvvf}dv0 z@BzaISvj_5FlF%Wlv)K7Yt-a%EoCT);SfrjydOf=YyzIkv``9vc*%>vr3sAW>XYIk z8y0!!um^}aJ?oh88}$a3rS7J0zrI+Qe`F70!jK$*bn<(AX)1G-^)2qlAkwd!&~|E_3DO zrTX^bb1E#SqeK?h8I(4=EH_uct$EzfOdjEWEM4+H*ltYY>N z!Q03@^XNBF(j)UA09c$0Y8fkh3fRXF}R3qRr~X%7O6Us`WK__Ph~V z7tWpH*^zk+)b`MR8mnRR;K}2rm!a(HuPt;d8t(`0J|65`m86!qfA1P#3i=Z*c0_Py z;FW)c1A85ZlL1X6MQIT5bBPTQ=KN+X_jFrFhiNjpq=;o%bsTUo*?Uey=)#O6qr?Nl?V~@W*SyO(+7ND=Qvmcr z?qlB6a{aBzDfNeZl~xQH{kWQ12SNrOAeMRY{UMGG{@7hFh-NYK+09oC^CIZ$D#H?1 zOp<1g-C-wpk&^IXnG@*leHo9lwj6rd_H?p>O1t7$h;UdwM~X!r8sZ(GXxo;+{S6P^ zgbBO&_;+`IfWB5pv%3O+GDHYvGO9?^QJy25NcB%i>&RI1QrT+|?*f?Lz89QnWYK0Xj4#_ z=K35U+LgXJZ~kzX{U1~DZwJY5xuoglmQk<4Lzl{$>aNX%mee9E0sb<7r};QyW^zOh zF+c$+R-7e-zTC8$S8N7w+}@yCb?hWs?1AfHPldCTCki622*oB#0z6QpMMvnQ7Q$Yv zdylZ^P1T$JN}IhaX`kFuQ-BiE^jjSJgVxM4(tDK=RxCZU3$>GBnUN1RU3vhK**}^^ znj07kCU0>Oc=IxR{;kIRoNZ+3**8JvfTxFpyD|h(JI=hcGQ&l5uZS0HIa-bgd2^&}+^W_^ZicHMKHs2anmZ))ft$oKZX6|#n{XK%)MbrNTX z7s+ZlukmJ~^5DnA2s$~oY(p(*YnH7OcZC}!k;+HXMst$*^$FS8<;Hc$m-nLHTIzFV z@bfs-_n;&RP=Bw?}>R4KoP}Towy)=9$lhBxT=?g)Ngjo&o0nEoz8#mD!NC=%bUrtR!_FoL=f_>fwPu}j;7V32s3l= z3K^)mp>FapfI6jC z=S~aPpSTkhpZCnzX3c%Iu&A-xekb7U2BC_CN@g=tw545!VUKkO0=qLko{hDQOY;R; z>A78Jk?yP4B&}x?E5X*{_;;@B`WT>vKDKL77k%nTB&kA2-6yS1*W8^D72(5 zqG$YDMZquXPyi`xkcp&HN|!U0+09p&A5p6iUfc<#D%6=4xc0w$h%TB#LI)15`uR(y zD>Nqb<>ylt$?lM6p7I@dXC$`*}MC9=Q;~G zVs|-#Ix`*&xWbVpg_h@CCbNOvGcfBu6AGIzQ*`doBbXnT8%FUT66!$i{F8XizxxjK zbL)S`rUt1(U0GFk{DM%2L+K1-qFaP(py_hJMw2_?R`diO_g^pw2U}!^@NGx zera$VUEVsp$i7=Iw14E1uI~A2I3HF13{i~5 zmSP@|zgpM3TVadavnoGOWub#=MbZBfz4_G){7=9Bmur*7{@xEOM+M4etX0C#LD)WIE~GMGW!)WmDrX)df1wo5)G88 zjg2wq101B{yF)XAU+hQG(Ty{>zQS^JHQqL0^g%QTKJR=E{mB3zy_%fcRz4ybhzP27t?2rW@%}ZDF6`>> zvrJizdoU>Lt2MUgV|gpLaabB_<inYDa=;n^>GUyz8eRM-Vg2M-Fj{9 z?%QcCH#@Q_$(rmdd)E2MD)Y#Oq6rmZT>OpBHLIE`w9cK)?l>PDn3v6@z|oTo&f6W?MtFp3 z%g-R=t918b+9E|WSQcYt)r0tMuo-UJCIcM5>|`&Zop-6$fUUYUia!@26X&9On~m?% z&~4f96qF2C83tgvEYa-!2s0>-@GO~Evr8;^=23{cw`s*S59XLq#ZR0&AZvwS)n5Vg zyw$8A+Zx5)!r$(c6`7w~2&oeqv++I^@<=WmvI4Tp-mL49Pqs*_C0Dy-y_elTG=CL# zF$Tf(mcbbgZ5k#i5AF>bdG3mJl69jojL@6b z4WuG;Cv?;odo$J~SbHgzIV0A&{yp#cOYwD_mj%`YSh`8PCM1s(ZH_7wSAr2o-Ybz0 zn#o18Q3ftUhlOTzHjyur7;M>nvYj$8@OGxi5Z&>P_^trVki+RD!mD?2}`WJ{@s z`Jk4XYDr$pRo`1Jm-r(UTssYRkgnCpD@v}>7?oVhOF`&*WT|0`uzu^y^TJuH83*&z zH*#^mWp?~*guQ7l6*U6*hBtE*7jU4SI==l;{yCNJ8^Ha?*fU|f})NRnMY-+XOe zBuQ+uN9L#ABN=!vPH%UEIwIMKL5UOgR6g2?v?oTzfkZIJR}X#AOto%k(98}JZn+GYdU%}`3_5ZztwNm-;0QPY=0+)><7#X6l}sY^@k6N+yQ^uUVmq{|M)k$1hSOR zG?hCVmOJ76sPxsz<9-WA>n&hnu!Y8@2hKKMDkuM&6wz<$(U#k&6NHu7^u%0a^B(AC|BEhU89+FZW4XEMF7riUagD+Pe9qf6aO| zCk#37){COvUAIil1q^v#~dD-AhJJgW)bavlm< z?l2$n664gvvC24F473FFlh^zr`eL_^SlkMdzAC8oyh%*>({g2Lf?&~>-7h)W!DP)9H_+{YXn~3yFtr- z!J!}R@LUYT4J$V>3D@NF7!fT~K9qz(YHO>U+fek_I@|=IZUVSy?`%uHGVLNgqWEZW z<7oH(-Ol=lfmg}#e`+r#GAV*x_+wn;f8{;x=eGZp8Hnz(sxVIT#EN3>J2o~}WXc%_ zr&K)csVMruMkU;Wj+bK>NjgOHihdS3`F+W0nqfBQ4hD4}M7z;H9QJe*i&%jsadPfb z>>z~_rhx3-gOrr^lUyu=$(N4PJ=&bOE0=%FTbB7P<^yg&c`xZDA?TuPP8?Tjd;5p8 zvN_T4drNl0t6`kCT`*8UV>H`DO4YG|{u@?NcmX?OdAvGmEIWi++p)>POGdP6nRkelmU?9``VHv@TB*F5?E_E z#bRL>t@>CiDDwk>01aXGt35K$6w(LkEw~d}&UfTj&o9w++Ft)ecOy?q`Z6)f^n*l| zBIaYi?`OysId@`YKjO}f#%epxcUEt3VIWEHByyVOg0su;KK@pV;f`*2A^q-;geboI zLqyz1LR%ZEDN@n9KBnzbHbcfR7f33mu~D`Livm{JRpcr zq!-I@^1L*I?P#rG1@4L?PiC|bPgO+`$=BZO-Vi9k;WsGixoBOg_z{t1*e&SxF+wJs zk5+g(Bv3PCM5AojlSXm^mVd2b8>$h@PIRO6Xcw}E!)|ofp9A4k&x2#1I>uAND(rWv z9C`jyGu6m&6F`h8acw*336n#c0|F+J)%o^o#EMB<5zJHHU~uY)UJh!ws3b+H{&4YB zE#zGOn0x?bH|B$#XS4KOm^A`N|4HiTp;xVo?X(ZfeLjT2h~owWh^xn?I&LaJI3)Grf~rT372%gqkmmTbF@)DMCJYqGx7g4;NK?a6U+LDosJjli0}rlzqb zwMSq*+e8i9BJ|NbCV)=!~pRDh<75r8dPL@m$!dS!473f9+gcQ@(vhMM>+Urkpsf}r2vVdv#yh4f2CotM%NdRy@>rN)IvjUkk$G2BXgR+U$zb$E zrb2Cg#L_2xiY-!}Cb{4<<|(HF1#--%!0#^#Bq*wj=wIDILQsn@eTIym>*(*J1sQJ~6}V-3 zFv(4Va0vD9%~~&sEO2z~`ceDcP!g-uKn9%vh^XM2-!PWWf2%m+_)Xvcvjt5(DS@h# zu-t^sR!T&_mxT4%jp_gOk|M`(fMEjl^{hZ*vnX(;O-s{JBQA9$B7qtm?tq7{Zw7YB zgsedVwG;T9&n)8B8Gv+2>5v?UGN6Z_kPN7A_rcb*BN@1|!6qu-cz!IDlH+(L%p=h? zShwJ{%Av%@frL4w#mwDfT?2W+dwFo%Tz0vdP$mettAQF+HY4OS#JXy% zXwz6DQt4=RTU;4OWOvmz-ee(~sLOYl6+@q+E#2d|sN6cX7P@d1cV07rD)I|%6Xp)< zbDnoBxK}aH5X6QGEI*O%?h;L=>usvZ&IeL4{zJFNP8w-nh~YhPS;c)j3oDl zCY$xVWg{Zr8t$GG16%gd8QaBxtA-zTyiF1J6`9|6t)~)WCJ!ltKs?ldYW;=DI%j8P zAB5zTt$e5>Am4qfuJ{UL&>pWj@tG^YwRh2~%ERFTp}#`j&ZtR*y?MF!%KV7#cGHEb z;QQV(7kSfKV#(NcITZ}a*DfDX5?r5j_%=f&=CH!!fNH#AXi6!1pue+PYk@Ih(B!du zQrwGA2}RYbVWIU`;3B5kPcOZX7T1t0hP`&vRh^bMV(AfFrbiVty1KtL6DC@C!x*il zrO5-O=Ms+Utm|rNYnHik1N;iVI!D_tD$SB`<1<4hNe5O>C!>hC3hB(?H$;iI{Y}W1 z8miy+s;gNZu&vzt*XH=CcM-ipwPSc|mHEMU9W1485 zEXl`eJwK_8ZNG8za`L^+OS}d4w1X=ZqX$%<-F5}0mGox_vlgig)B*E}bQcjS%G&k?xtu(OW7NidWP1VML^I*0gJ#OF8 z(KgQngvF{XZqH}PYYzals8)Qmw&lmk$`J*L>kk&ubJY}~{2x9D&^CS5*Z%42!oTGj zGW)kxgMYRaFft@RLPZ6dc^Yr3t6`mC#I-CNoH0LZn~*LrlsT3e&-C&wcQ6p2VG2b= zUcA>jf2FCo7FsRnZbyE4-2rM))-~$A)bg8>@~Z94 zJmO~7VXO;Rx41b~SAukAR#TWCm2m>pPasc6=sI0roH`3gZo(f(KaklG=f0Acop^hE?jPzxPfR%dO%7^DuYX;9dJB_T{n>wwRiCx{5pb~)>vs&T>2vfP|t5Fy| zYG0S@4xwy7i)(9M*;UA)+d8t}O8%)odh?l1wM+DikUQ^w@T#Uc2Qo}Dnd(*jUXRGboOV@RqP=}HFCrH7rSZE{X z@c12zyEWo~B4={k13#HDpKw(|kyXxah;nA|%~EwmgRct%_kF+G_JVQhfbvRqwn_5q z5VmTFfou?WX8yF_`m2d|`it_UAtLi2r-@IEk`;K2-)5nk(o{RdGN@Pwwx2GgnOO`b_urO zrA2EJ^w6W0)#7MI*#$GNf5@@}S^V|!{#QQ3KDYm8EMWSqH!jLA9(KcRq}t~_w`*KG zeVIvnzt066{s^I(vQ-#II0&O2!R{iB5q1uS24e-MP=^VlF$;wy9uU&U+&HM^b5CU2$0lAi>AmZCb2AOf-nLJVG^8lbc&J`yN+ zjp8Ay#zq$kS<9)k53(_i(O&NgsV+)|cuWCtx}i7Iv4W)N!gv_rS{7a?cNe_;qRh62 z81c9!7KbNZ79dc1#J642RAJ1FbJZRMtueUkuy|>AkDERAx`}SW6xnF#lkx_Y(_iqezegx0RvFl+J7 zcej@@EQ|KOeSsmU3)Xw@pLqKb;;J@d3k<E4r>dj|nF(;-)7!wxF1*3i_u2VK_o^~NJ_ z51M4%6F4t6P}p(phT{;-)nLK{xkRWKLRTd0bp>fl4L*xOZv>wwKdkcIcA#+6zKSi( znxAT9paKmj#JKUdQaeKPnB{^dbS_o22fn3zvi)L*S?7ggPzuXORHngOTw#^>Sp_!Y zX+(McvUt(@TMK_wxbBvOd$OU+X%#d3Zr^TFT6UNjet91iDAbrWhUx6MxUS>7#Kje2 zTsy2z_w;NjVA|bJm0XlWMR;Y*93#ZUj3ljsx)XVo+6w6SWDuZ@>aza=HT^SK`#pk- z36boFAT@)kN}k~o4I8$*cL&(c9*8||n=5hIr<6Gg7KX_c6!K&B@jYwP) zn>wmTH84cIk?3E)d7WBANkoqg>4Q)@h&v4xECH(V&ST)ehp$(xUqmm=!a&CKFmxKz zMMp(R!pQV+s*KvQ!3?mBVC)Iht)gJvVKCMLfA$9L0y)P{{lDD12@jJx++=srq7M2SP5b&K=fFlq? zlSVw8G}M;Q!p4TN1ZrCXc|EiT8BBS!u83}^|N8KPusj7A7>#cigr;IOVYxt;gB@D& zTLwFJtOqqTjGk@?1u%q#uXk7b*JgKqY=-;ND&eoKP``9$^Sf64q5VH~g@XNX$3j!a ze{F-Cp8KT@_UAUTe{Iw7z0GAXnUQEKGEm?W;x-lv-Nr;c&-gx3U}V8Wg+}1=s6fYW zXx|t=pD(K^fw)3N6@a;hl5b;Qq$IB&hAzy*UD1E6BLy8QVA2PpX$+>H39N=s3O}_s zXj_a#TU1a{XXcn@jL;3mlaLqii5mNuqpOj+o*hN1G=VLe*R?OE<0Qfgw2d)lnPFey3cju3x)>zgF zg)OXnABRJ~y}yQBq}$JZC2*sEyuTygmd=lT;Vel&9sk;wS|>^ljA!51Wns{I{Oc?M zMZUme4#TFu)Q%ejOQ1+Qgp=uKzOx0xPimW1AnNJRYhhQ*F?s z{V>u9es=wYp_Q62GE%fS)Fl%x5qvupS63K>y&_{K|UhPm+$6|x9+e*a%FM>Lr{q{HK-7SMGV;e3KpVO0vhH|W!8Xm5(50IHtfLBT98WQ zKI}pr6O9?w0GT7!s~b5Vf9K5jM}PB6%51X$@Ss;&jtUKaDE0)=R$wiSD&&8)nMjD` zH^6y-@x@KuYoOla@}LaBR+&q|rn|hs&=5snngO!)0Gt8cu`_|PLw0?=3_Z{5>Oy@R zov?ODV2^Cck1_8*Ljar>-w5fa8hC-vHWJ+dpD^?LOMHNH4h;D)8(^qqU2A2hBm3x6 zMyXD)-&}O0$=lG+-}e^+7B7k@wc-|fLh|Dsx(fIzzXA&Y@v4MUBU0S}`)~qCrNx~A zS9+jpS`KDjgbu1k!#(sgGg!kZ6N*h;O@9KBSH;Jc&k|-nRu(Z}%xH!39`J?+S4+~7 z#ic>uY8(ds7itKTlT}9*CTr%C%<-5REB{M@m7Gu986{#jyJCK7 z1gZveRG=53AQX$U?`U98o1r6`>la)5=HF-vIeoE7)jzp$cWNkkd%*Jsk{J}-{kX*| zax$=MuG3CnNEUVs0K9-&pi-*MY6wMm?KnbVw{Bs}+%qh@Pt~;L1yiiyWwD|TCnPN> z-&Zk+qtM5|BGF_m54-i_t{v|b#9InFhzd!E6Wb#c4~lQuqMS7g72vuh+ep^}mNIbj zW9UfW*zN}iRYFxlrmKx1zPR|!E}3x22Yba{n={Nl^^{gPDH%%^>@d;Q;49kHf2Z;- zXK4Pr6{9H6u{JGN*fixCS%_#mv-GCuF46wkJI;jbts-0S9>bx<=IX8Ytqus4BXx)^ zgV3NwAq?Y`i>`*H>n_cVO}tzoImgN@khMY&39w4|9#rk^WCg4 zpO;((GKoQq2&bZLaRhc4A;-P4pG$#{l&4YLxMZrbC93&e^ogVp`C7(Xh9cA45U|Y3fA`w{qX*W&50Iwq&jVqZEb>&306Xxp{OrH}H}OR%i|Y_o`!O)QigYMj zvSy4)S=G>a$1XZb33~h}+@_J{C;uz95d%!K8%#^wdqHNw%Bqs0#B?#NRgF4My>L{8 zZq5HQq!(J73Y@q~Bw$hS8w{dX>~M=GrluxW=>|SS$Z>EWrh37V+c*RWr#FGo{uXF5 zV9h|kzXMwtqDFOne;-LR9{D$C$ggM0#%CadDy}Ou;3v%2MPjTU*G1&1qs4nhl^4Kh z%QG3#UHby&x25YO%L}6|n`b24ZaZ5ujhZ}pkGhH;OPb=DZX5r%hX~D*R zo{bgtEgJIUY68Pt$)&$8(ywec^U6^te@ZHP{b}L4PM1qGAa?}(pU;a&b@0_@h{k{D zT2$or0;kolk=Xz0XS&ZF{~3?rCJ=FC^2gbRuCgMe??Q16{TZQ*t+s$-D<-z6B=Q2kmsx?25;OnOVoq>+u zGs37}+9y+!2FDf4*=Rj{FY`7IKBfCSGw}t$WSf!1>ue&S@rRQf(yqtejPsCxmoGK{ z7O_|FrnsM_!RgJ{ByY@%ZxxEAgd+v<4DR^$WT{ul)@xBc#`zhem%86cWV-EQF|B`Y z|Kx$hlp?ngIBt@OtW?h~H44!lSecnJxEpf;;-@F}MWvjc-p0uLB#OUy>kSKcA{ZMmZ<=m7>Ch>s~kH=(Laq*61;nmD3R)kj-POM&RC zCl=D2x?W9$(?ZK}XOG1B#-&X-*I7L?QL(<6;4HvwuC#aovg6fvwaS$hJZIh@%%|&Z zu!~me!=-JM(r5~ve9#Kxk!c*AV4G##VQe}%S5$g$Aj(5eI%{4t zz|n29moF1*lbZYhl{(X)(Db})Piv~yBKi^W(FnJ3Q${c9bYx_H)S_Db28rE6h7BgW zZ_tXrl3+KT(NL`Xp|bHtQR6>SE53W{zEnEiV95J3z2M8cH2v!|f`YAGKVn+GL0oM2 z=0`#zyMC9k@yoycxv#Tddbq0hm9E_XiP#Y+y!sz=J(&{6ezu1!Qyo zE51g6_T&Z6OA{{9)d^DD_fXgLgvzM2{ammix_kWk-Ya!X6zW!>Dkm8%q_ z6kXG)0=UIZIX**Fx()h|Gi=Dj@?WupAhA5%24tAG$5gsL>R*3?g+Ds9_nc|k*oGDJ zUzKFPet#3UZ`Or~#x|SL?8bEkE|Lurrwgi3KU`}BGJQjF0XVd-73wK0Aco3eg{|#% zT3PV~mVzo_j^c9^wy0@P1}m5&-`fCKToOSq3QWz-(STI*V0{1jR%7&dWa8P$@uz{C ze?3#}!Tx(+3!B0$Vw5p*Ka|k=#z6D6g#(r!m_=5UeE5{^b?7{U&dTG}R_b4`HE#U~ zMi0u1>ij!oN7NXA_xzpVLaT-V!}{ff!`y`-vH~7g8gI#>9Z|Cq%9wdIT!9KKSOFgQ z_0wcIzbb{@?5%s0ZP7_|QQTIBO@lY=g5&&mXh1Uf*@l8UAW+DXT4b82t;5lZ|szWK#p-RJ#BiTqp{1qJT zajbeT5tJj7aM;S(eTFGKLM*)%oAx48LR1~vv4QZeYJ3xbD`oji440vo-)s^^R|q3P z)Gi+)$19dh^ZnR}7%89pON)F(jV=UIPX= zmAv7kn`%?wBNI8NyRX-P7L|2}^D9Edr zJ*pY@B&>lZ&kR8@!J8!zH9JRCOq-Hn4GDq=@oUY%BDoI7hUQP3NgN_pJ{SK_MN zZSHtG-SGzJ{j5!l+dd*+56y*8carSf9g;{K8U>}Tt<%M>7Y-B!$H0V+n~RIbXC9;D zb-pdY42)3vAbi6bQy;oNeH@xA?I%%>a|>M?<8qCfn~@=2oG4Oqu)ZeI%;Cg4vEDnj zgn5SIC*NqrvfE&x0n-Otg_k%aB+GitW1JOhrH_a;Y;jxKOEV5r5|_r4R>RqRD{ z$1m(k)|Zh(r#}W+28m2(zKDpNPV0`6`|+kvv<}RwDJ{uwap|)1P7K;TX*XI#UeIPG z%Ii!haxz56xu@5RbXGQV4?9Bj8x1_3xgOc}QsfyGfo?ed9$vWwOLcCnU?{x`4pWu! z%wtE4>CAmv$t?7bV$PtC;>I4&IXAgUcI)ki)SLk63~IloRy=ci`5A&l zIj((+sr{XS_4RN6V`%*w!|VThL(3o>X-#}H=3UBqph>~Q-6-mKuZUP{Ps6+)I2sVq zHA6hK?Qd>kjfkAUoBW9EzVErq1eLa>t#AoyhhwXmw%@jY@roiBzZa6%Y9N%}h=gig z4c$Fg{E#w{hO!6?7>>UAX#Cho15=f~-&pa!9%f%(T^nX-I1=u$zpGSUiN+vgDSD#^ z(Pul_U)09zs+O}7Jm{-Gt*U}-A>L`3!O_d~ER2}1KA2vf&dJzU(15NGA@2h1+;9YO;QI>I1>h$(th_1ma?4`( z&S19VFVoJ>`)Yi6y^?KZ@vykl~*H+&%I?@V+FF8Rs@6>mLN!AO)IZPSP7v4AnL$jJ6lQ#Kve|&2bjdp-}MY*LyY42M!c(7(IE*^I&v& ziAEmQuv*{kPM!Dm>bG@?`3{*8OKZS;z<>c z2x9qMl@yGj;A8^L(H6w@WvXKC{W>Z7_fVTHS5arjLZViK2BCCF1ko&It=eBsG{>~> zJ;W{Kk$B!v($z4Lp>BSDPUl3X&O???jCVJR-8@j56>J0U)Z__tzLg*Tj4fmJyTW6q zkG)i4m{+AsRD6xr9ETO4ZE&-$z|NzYgOLIul*>=CyD0GmOd)w>W%W{238~q**|u8G z*U|3-hbFQ)UAOPaD0c3rG&-ibgs!o2Iz>`+33+>Accs*nf*NhZ-EAYZgl*NmSt zjVMz%5-d##a=E?lH2-{W1O8I#$AWh$*;`t5=!4oWvG5Wk_fEBB5Hw(_)_V6e(Un1F z=@<>;i|pE|nXIO=y|eVL!n>^4Cdiq?)cwRQ`J(pncWXE==ZTqtjR`Odd<6qkBNJbnMl z;mNW04J>!#_Y*gV*K9I8YZ1iJVaI#xeM37owrMb&;cyjOWOB9L?%epqdyqh4p77hI z`=i#UEDZ-$ z28^ox`SUCLR`%uQv!2TrPwP|LV(6hX8fR_gfW8$SZJ*zw0jewU4Ok2jSa9M+l8a&^ zE+d7hnnH=igd+d!EABzT3fBig_B6`?O>6A6D14lYvEG{PWe zF}rDLr>}Fx;Bib;f8*Pf?AQHFlgsSk)~x%+W^P0DBo749ws2@HW{eOL0(b&OW3(Ub zmK6SA;0dWY?tbTvf+gmm`;OQggS4=EJp<)bR}*-9#K)HBed>V9@0LI6ekCpGLU;cr z*UTyj;X?ZHts246I!zj|)H9V$dczI|ODe+hR@_c>cB;4cMh>Y{2QFUA=&Uf}8llxj|+i&r#Df+a2O z+JZNzDDqvCt%VGD(`!QI8?#-nvPZ>yAgG`T3;fP_eJ}JZ8T5{8^^v^2kOOl5;2JxW^u3(jXQZ|X{SUUpy~9d2zSO{>dRmpUL;?(14uVIeYH64b1vKIYeijC6*smkmiauZX2}D zuamiXjsqc1?`i);U-BrOVN(-d>kMC^(e@pe%=w(&i6R(pYFpZ`3(hahzuvzXIk5c% zXN0L5`2hFP&yd6m)s%(Plc%5@>IbwPNcvWS8u|^J+goaEZ~4kxy&961nfqjy$J;Dp z=ZFlO1Ih2f*xDLnOb8S)@&URy)t+TtOVqT1+Zws%-7a67HBg1Fa(tz9bWR~fPT zTV#%g+-5ipj^9$7u+=o3dYP_6{t0zX&hTx|XQw48+U?7%-WFcUe4bfM51l1P&Bb(G z-D>Z*bzHmSDkmI$kU5*qs@HA%P^h0VlV7R4RL13%t=D@YyJe#eJftl+ILmoLCH{bB!?# zZDiYW3y&Mt8mFtlf`Db%4GY%i%qnQePJ1CHGTPABQ zIqAw|@k08^JKVto{l4+s9b`$XEVz7G{D8thND|73O_fIf)13}l;+_gw^gX9^;p6RL zPosM+6|Cu%S)JrOPm~9)(c)vs{%cmva0=(rFpM2Nd@UdNp7}uLPq;zD6){Q6kT@BQ zuKC6_dNcOOTgNi?Pg!OREk?;5({VWR)|TT8FCpDC51Po?VQoujsVY+E(~=A`v|0&e zhrToNVXDQ3q}n(F8O-;_Xd!aEHG`mnJAKPBs;3Wmf|we4&B(FZ3A#`dt_;krWfkb|riGgtZG)6t?ZUgoE1NNP}R5of!hGJ z4u4M(v$dm!O3Ojzb9EFwSrBvXqe|ms+5qg~FKGE8;Bo#6&MlJ6%|OF-J`qU>M1 z3u8!tJ>qFtzm989Y=jN7C(KDNWRU-QEs+6CgQtQj1q5=3Vnl9 zks)9x-oog`IH3j*dj(l7ht}W}UtkP0BVrdpzTdny5`c#;2SF+Pnp;EK!H`|q7y>9@%g%YCAU*_Dyax+0v3Nngi0_;mY>-s8o`KU(#O%qWIkD{4)6~f}sefV`hNf*wl10#fh z&=xuy*f}#Eyz7Fl2>@|$_vT8uuOYrIdS(_`$aB0YmxunpG}biaTAdZB%^i)- zRAKOapqV)t?H7kwfqyrkgE#W6U%kTdkni1nrbLO);ou%Le6#@XuNoJB?a3m#n}GBh zzy|NZw(8T)DAJ$)${P7ogZ^LiOn#?T;jKbx@xbIqtb~46;6o=!J1X-O(9_gQc_TKo z|0S%5gi(#+-s64$HxNq03ZYhe+K9`#LtZ>l#EAhD5ws>%Q3{v%tQx2l2~_^VF_Gu5 z;6@b%kb_rRV8%O<4f_=U@AQM9WR#YY7wqqy_sOdB>~lpO4?H}SA@v?pkiI_CVN$IS z$58OKD_uv{4=#oQ2wxlrIsoj%awaQzrv4nIC1PR7j|Hwr(&=p} z5V^=8e>}1?_ogglsyT%2@w%Ke{bfG~$Ofn90G0bsBci$-b&t1sBY_0#3 z=k!xF*bqJ-AP*3(N}dp-F8;y;<{$Iq{?)X;2xo+Kk65uB13T~ZO5Z~0-%>#UZ)za6 z5&uFd7(Ca2dj^^MYZsW?guY3CVtnMK19}UV^UPDN3J|0VOa#nhn^<++&IvA*$=4Uk zie&+qeuZf8dDZ+Gqs!H_bgjSi{#iT{JSA7cVPV5{PyYPeUxxBuO#heu!8vj;=3mn~ ze%W`Brk{gDvtzew4Rh=T)cS5=dz?R#_o`zLWz3k88!r9pT$EFGWjvb%tUhp-YRX)a{wK1sto7?!veN4LTjPCV_W~N1I#MEWM{LhbL3Z zyPD8+(bLbUCZu|#pR`BfPmpF^qgC;+y7;rvTtY=_j{6fq(Zdw)qWm*UBrfA`6vC%1 zTf2n~1W62gZhBY?ut%5T)*&pIBR3r7MTsB1N8qXyzlBsWkd<;di3d)oE@u`M-=rg# zS(DEPHUqy&{D#vW92GR<4dP2;Q1g|G*77q(#9*~9;@F1oMMirg`&QbADx$NK)N5P2 zHtW+khah}JkIXraUoTuIAH2dHxOrvQHcKxf;bCS5Nl}*Bb*0M`gAs{M;3m`gaQC%m zwHV8SUh_&Tqb>D0CsVWgiJX84CSZ<8PP?fAntt#1x&GG?U_li(92Jd29K(&P6=NRR zJ!Fp(`V}Cx2$o<3@5v}%fC9in}wRIv0XGI?lG(Y*VTu~f@^2#)N8-8 zUhvvA>RhDy=u@gR5g9g-r=&bhUGPH_Aw%#dqkW%4vy4lL8)!$j5Tn?6hENyYCuwS_ zK>+!oq&4a{Ui6=N(!Ws2FUcm{Y@rgLfhYZForRt*~PJHoyX1*8o zqv6V8W+u=C7Q_$*B`z`%@3#OJ?g!#PQ5gDl%H#9iJZWxwxQSZReXvXHBk6PWo?&j7 zYY&WkBOW>sVzS?nHt#$Ld%k0ka1a_>X|Q$SWn=K(>PtD7M5b)oJV2Uv@cRJgzDECH z6Rm9&2MxtC=N3jJNRwWbRM7)>;)ouqx6DZfJxO|2Jx*y84mZ;iSG;j}nWCGP5gjwO zo0xRR)0%X~_)V&4+=ESgkfQG?{4MVm|23RkGVH<|sNmCyxB9=1ZtRanjN!sox*bJ% zoS(lN@+(?(jSZt$3(r^0dC}~!|FDYlQi9Zbp$=W<(Bb;l?>M^HHvRWi?h&d9<>pB_ z$9D2-BQ?N6JjMd7YhBZ@qga3z!s&$oMup(DPPQ0}L?!z?DOmjYNR-0|PR+#lWN1RK z$(@(zlFx>wgg-%db4HI01zW#e^6t>R^^}b*MtQp+6C1JS0uS?J8MD`Hm!9BwyYrC8 zzexQ-f@z?)MI}IlBHozeurhe&8Tyv?cCn8;(+gLdauBrKE~m89#9GbyLBTO z>LB1^_{v*(3KT&o{G2%xxRD~U$JPWXA(=>UP(S@Ug)xy*&kvM!Eby{8aD3J<( zAkT>bF=A|tH(t-&a=8kVH84wbnJz&CLtrzT<@%RuoFM&W@zXU!vodFWqnu%KYR}yHWQg85{9zYF0p2T~;MlLdv7;xlft1Vfnh7 zO%#Vc#j@t9g0m$n8*z1P4c|OWt3y;glU$tg6`3Z9zj(e(BgZY<-CImzlg zG)X1x{HG-2HITrXempik6SR-O+__Rk~^ znRtsvj;^U{_Vb?8UHtSu;4LdDHjck*}qc+lCg z!S#9B;UxlG1c{=z;6^TzTAZ+)1 zFMn&qV^T*mUL(|&q$zAlG&KXc6}cAWo74BOlk0wm;tF>)yR;V~6$=#3Z&%hXywWa6 zZN{ZZA-fGL6IQj)>3;<^mQ}l)5qoU?H)& zo{S4#&{yv7_$r{;agVn4il9IW`pVQ{8TiE03m10aJa7`caaPLv1yvHj9WI*ZvhYNq zr?(YjYSa|IK<@lv$J?@b+C{Lykm3N35`WvB3LEUDyV)PZ?8@GBy6!na&-0D##!A)U zfN^EzNQDX>K^bRpac#Ueh=?N|CZg()hN`R=JZ>tFb6ijO@HZs*LH=C?#vBX^v>6 z9>&j#GWaCuiC707`yhl{&{|JRUB?u#2>~bx(TI7SH?2s*)w)fjGkTJx)8b3 zEPeC-^Pm&M#xAfe8PR$-DKK^GZNXB}X9z}B7OLiEVS zWTE-!)lyU0+$Dnx*$E4SXhkc;2>%^A+13W%=jm2r%uFMhBSqIinF5&8EW7QwwZnDZ zR%u~k74%DVS{lzLx)UJ)F80n1+gZVrWQ(_u5xBo7}J+w*akz)Fr;2c$vb&p-LHcXaEpdkf{2@ls`%` zDo~>?irEnpQ;9Y?fxp5CyYp;HM#reXM-^4?)ELL(~7ZJ)~!hIrg`C=9&=8Ec{jFFyrIAGN>4zN$}_5mzgff8_jQcMhiuvc#RiKijnS?AN;Ho$E)> zrp}opYh@Ztjy&l)ubpD>V7vONATIvJ<&7lHaM~k|WE5MvHsO{IhWU^@d z)R2^J=oyfO=qQpwYht~BTF2BBRBvgz#9*EMEqrJ78xLo6m&EMnEQ(P^=GI?Aunt?| z&T}zRzuPT_d_5#lgyNZZI`%ZL(8`8x7^NTwu30tS3{kP=5-AoK;jOcC>u_5ebnO5! z*~DOg+LR5Df57vALLb6)(?NH=mPX_CR$Gfb0jE*FG8G2AC=dsKp}(#(enK&G#nbX? z`H9WIt?)BP!{frd)-3BP$?TT$b{mGr&BE2ebW0p0IMIE80}ef91VG3VfGD{xdI*jN zfCY^)VCOZiz5R&sd6`8WFE-TMk=;OTzJKM^ap9FJc8%^n!j+TE+93Dgu=I1Z zfhuY>mn(rFbFPo$1ldW1=c4`HU-*@)@Qx@zZ@?kWe4-Oplji+b&_pcDJ%DZ2LRY`CxBBRexiNXZ<2&YXSsu z>)XFQ&;p$F_uqegl+eT@d2KtDd9=hiFmX8UXiDzFrWp0WSAtpF=gZL%aQy@8{oA* zM6|D!H&}I+VF$i;-+HB)8j52w*aWIp}*ATcS|Fnz|bGOQJ=O1&6;XznGa}W`F3tm zlf5zJ!-PNc#8lH4&yr5IrH1LQ(#1#7&o4=dl&eNc_DYm>Gb8(L+b(sIkj?q0R$Rfb z<0#cBV1t@_K*ypRCwg49{W))wJ|tyPtf?k(IcvQ6X~{rT(Z`NOM~ zFa9?N`a;S}F^U%9VO|M!`4DL*(TuYkOFX4AM{%Q$ zzyv;@lLH}WZIFccv(AZT&EvN!u8w!EM2PHmrUPQOL+TUT-O_We+5X`fPJbUD4;P_P z;urS?zF{a3faX1RK$XrTw^DeoUSaK~e!nPB{t~CQC{r#M0WKeCxi)`Ue_%0|vVG;% z2|8@if1$FTBSfl!sf&)Kg-|k{Xe42cnE_LUI%uCNuu5ebzqxy?V5wW5|MqPVf8cs% zz<~at*Z-mX?mF{aqfHS{H^+OSHB3aKht%`dslA~G*!r@KifEeX^V zcnBg>bBjD)YM8jc8eimoq)5!{qIhbkr=}Ix1rN!%NN*IZA1d?+2;;*8^m5;hfJILG zi~>#59JEOEM$qRr0%k%J!oGF|+t;WS$vrz$(?8=%y`NZl&adZTuhfX9o{!*2Bo(+S zu>5#i@mN=@=30ush};qokhlD$Q5Udusz9+^T&h$*UnIv#L6z6Nis+9cbGH)ny2&za zsYhR7W^S61Wqf_yMf8bEuI~wNyB{0Y2{q8F>cm8(SJqIIYGYnQYFGN6cy}FdZqxa_ zOv9adGCv6qn?;LC+0aZqqiI$9*NCSGURuq9^*5w3o%bE5$Ngo#M06u6BVpuLULg%` zu^jZS6;&d99s)g-G`F1Oy78mppyIGreIWEOKtJ~aP4pAQlun+%UerLY9b`1*a^W;a zcWc!4OKmiiA+R@KRO~7p=pfMM$N%!X-Nv8FgVSJ~(XFjeD$)Ey8Tuc`Q#RffwH7tH zEeViEtgqef_r%OqZt5SG+9P+!pCL+ymV?y3C9W#O5=_37Ulw>w$Lbh=8(%--)D%ZQ z@*Q?aJO?u^Z^Hd6v=CHk2hlF=0qjlv}dy|qnON;cQj|uMfcw2N9j~sD2nNc%85bLv0JJ zsy2sH9=B?uS{)+U#I;T0IFo7LXn-JN442K5GfB{+cW|0#9ucQwTC$8JOkK~8Aitnl}caqFu<#_bqr~cv0-~xo4vWKpoR$AV8^{AxC=@wVvZ4I6qGJ8 zb>^04YqxZfx2SF38wAiTfIs|qbc1aX3gxk8rp-RGaC+Vtop-X&VWYS|LW9P^$H*kPKIcuiZpRPD8ZE z-BGBxI|IxWc%xWe%m9)lQtX{r$guwWlt1DogyB&^ZuC~g+l^FlvQRpFcG()EyV*XZ zsE`0O9gbt2a@HE*6iTT#{b9Th+ctF?+cSgDcpuMyMnr~pGVeFp;)?zBRc`w?L-(Mr zqAJeZ=XIPw?SG@G6no~AM$CLoo&dn!f%rALHE$kjiNiY;nuXx&owGJ}i(ai=q`mcZ!jgRkQgRr?aca7ZaQjTKu>JPcw21smDpX;8a~3 zv*>UxYA=M3Fg$m*<7IV1T5ok6Ltuu#cC@a;#N{*%Il8pV9N+3La&OfPNtAl;Xg)6O z+NbA@{wx!L*fjS6pL6(dj#emC0}+R`^iwJmvIDsatgW6;A4<2Yv%3vajX8N;SD(4@ zmklbDvXfRwkAM7VnJz^mx zN=ml3;&Z+1wp5%yZaM5hXGXg(8oA>%(3Z@fT4U@?I%D#EQ|r)Nc5{+wMhCp(iE28B zDuym9#@)0#%)Sl{W^5=0b9{0Y7H$T*eq|o|r>zSt(H$xI%1BcArv}h(=Px^h(-`4H z-U*s3dhsuk@K3M5I*HvD{kXl*Ftq%E!Kvjj(&BQV>1s==W9)c6e=1pdJPO2im- z8YFSl?pEfJ*;*>FI$GTI6J#ORxlbp$X2gRDCT*`sb_=dM@h|LeWx2C?o!Rq9qSIbq zwltzmRWVvtdu$pz=nK6T8kF$r)4-Re)4M%z9py^8mob(NrnerwU#|EdrL_?6xr8rz z2tWc7smvpnoj`WEt-Y;?vRHGRr|5Dj;k(vj@#q2@2ruJXG>N(HzE{n6_L!i9L%8x7 zj@8O-%4c|^ir3LKN-;p!r?aLDA3wkNu98t^4E=k<+B&PBAQd|ESHYrus(QSKkTF=Z z+=7*B)t@C29@}d0%=?c;u5+wy@G-}sXHMA21Gj4x48wk zpzoyam|88}>HqAOd0~B^)aQB>6AbX*RKd6gAhczWz-Mo9d5A0zp-k@}>CDGjH6!KKLG#iLrX>3Wh zw2Wse7iI12 zRh|+Z5Y|F5s?vf{fOic5R>i`CeGam=+`X9dFEU?#fBV=R>^hqI0I&$Lp?_8w*-}dt z${V3`AeJww`$7|bbym6V^u;kWERlZs+wsZSz48FUl|%`>faYe>VCf~s3!BdEU1{yS z7m+Gbk=^Fp43A<+Zc@48M(QrhP!*y=0aJsotv{GK}Ajln55PtJ3# zhLy$cs0AZ?hEPQ_ecstl)uN!8X`_nD#QWa8X*1F~(pE2{r=wr7T?DU+m!3Kj>Y;dY z&rQ{Eb{Zz;%UuHNqIm`&eAci-d?*`aSidT|UQjm4^RoEnu=3@I0_s!F$N0O5zOnn9 zcUq{{BK41WB@3_iGsdg?#v}ppj`ysajwWFGXMqPeX#i|5vBgIF6eCmWyh;5!_II$= zFW=w&cJ=$w5?90iR;SSoZ|gWw0=};Aq_Ly1CpIY(nVPQY$eV}j_5GhxD{M2bu%@c0Xg#{K5+9=4%8|&J>LmQh zQ3X}o`K*52V!Y&9y-?dwIODsTi5NT?2N8ATmQl9jBKZ}=Hu}x8A7+dJ?3olm=Sdou z@CIL1T+u36dv;xIPl=Ee_*aZdh#DC86Wn*CsIs%fi`N6VT|y{b@8 z|4eWzqi#w4l-d=1wqVok2mGoorvqGrIxeJg&Zbcqh`akTuiKfIbGqOH1#B&z$0Cp5 zLKw%QjbyH3XU76H)|kh{yw}wXoIW{_n?QmyHH-@%w9gL?Q0u333?;``b_ zmAvanP~&GA-KnF2H(u}E&OWbxyak%=DEYWMWXbABqK-6Dem*t(LU*R!(`xljhH+PG z9$N#SHo2{$R$Z#y&8E^~`i9v2dfN<@MHcct^VfLQih3_EFWfYmkBBE~@TpF=$mE=# zpd5eh{JBx5wGRB$0$}&G0_NrSyw^4Ptu7*#Np3a^3m;uJgAX1aiB148Gz9q;Q1eyV z0$W?d=io$Vs9;FjO9$oQ1b})A4L`(ul4YPB(;clk!Z7xSqPFCMl)uI5lqth6SwLpy zJq}{7Fj}$%dkTgXR}=~|wwh+ivj)ytT@U66JSB+bI9o!&mC)ktq$1-$L>69j-$>w> z7ZmiCP3)`eRkSBNMil1GRa#Z`wN8$b-R$3#&%c+-3>3Ss9F~eW7aKO%1F$aszJxPz zwN;)*dSbNUPf)Y$AUr`DoUfFH*_4_L(+wUCTRyxb-~Dd;Zt_VQMpu>`phCpM<|$!l z(fwI$xad*a(5K8F+qt7RIUr&hA)gCcvh-CAv^QN=Cq5Xx-{=3MuNKA(DI_x2o*+!o%I|C{cjpZxKCH~G-B1J6&Y#ojcj0%CxWH?&iKq>IP( zABdyUwKQTMVej0F%ok1J8RA#if(=6MeC};?yBrXUYPZ8aA6+U#)hC+8ik*&&GNH1h zSXx1TqTbq=Sis7NOL+ELx3>~eUmAvHmL&}dmGVewOPb>zRXtkkDiCX@ycmT(*i?6( z-|!c0HQ8EpRH4!rQ#qXIp$f|gX_c0_W7vdnM&L+)kbTT+p5;;m6jezq=lrj+nnQx3 zzQlK4vsq`ACwa3)mC9e?Aj>6U^nyfU*xSLVh|P{gb8CJ}sDQaqb^k-W8TdX|V^jqi z9xTXCsZd~$ewfgvwC_V7<;pWEoTmH6u1XMJC;Bz_*cz`)YzmeiX=tE3Zu{k&FaOfx z;(Jlq+S+RB+)>Au8mr-wXri_za%-J_K5m2aFS3s!-w(VT7-5lDHH=*3enaxueDv*o zjq2C$f?{AHHRu8Ah?TsJd$m^mQIxGFHR%OhXPdD!Bcim2@QAxJy>1;-!EH~~+uM=0%@|6(b`-wdCK9o`89qj$8%}wu`U#zKoB8{!0Q=`mR%pCfL zFzpZ>Ab*uia6SQTX%1+SAbK5C`g|VzVclk?kqTpR@Z?*~*wK|J5yRlean&ZY%kffn z5*@xWTMe>8sJz%d$;uq?R>9hSKkUgW)nZbJ*;w?4H1jWydhPTKNV!&s3qbusy76)w z?IuvVh_)e@1n$)%@r*Yu$U>fyf_;5Ttkz$tc{CeCFhNT!q&Cd7)n7;K6Fp~_Ze6PG zBDuPp$HcpqeB|`XIfN;{Q0nNE*~n4zZpkO6v09%tV9nvrSG27pWr|@kw7cOIg2%Am zf^%JQdmSaof9`v=L(FNVR4=foq8==nEAY$81c4+My=UAwA#4*Gauv-pQ>xO7+?p?SDVoa5A&4YXu@egb~a88mmFZIwn)t?|CK8E2I=#bCX zKJiYqWu4QRQE$*g+F3}94CwZD72YKomx|O0$vI>tTsDjGkcLEKpN$_jxEp<|9~K~J z)0iqU_csY4TVoL-%DAz=Nj=X0qey?=!1bo7eo)Kpk$7)wztAyZ1x5;v>v@lto)u)K zgN*SG;qU7m7YC?-B#CpnlAMZ=Im_r<(NU%^Ki!e&*lGRr>1ZhmQs|)q&}n>4sY{xK zDjK5Q%BwGCKHykeGh~`OWq1qf(JTR+mlpTW)j#DeB*rM>PmrXw)O;Pve^Yc(EMUKt zF%s^Ac7c=?_FTA1*avwTV#VCg`qmw)azw)#omylj?YyGC;T?+>qeF2Dz-kB;{t4pB zU%(V2?~BruQC8>-bv&+iH7@$@Ppx%nR>VneE>2oWL=!4r7@Ob{hdYsl&DpIdyZjzr!wrB@-kHoIYUB#4&71n!Wm-Z7)s?s=XoUo zj_aQt)-pFay+ppC>ase@>-$nPzhg_i`BROmmr;wBC9e$ZhEKOC@U$Gfg(C;pBhL9KAK)}8N{Nz9gN`nCNtXOja!MDcv(1 zOKj2995U)&{&Q9PC-bVT^j~tVKbTp8eoOT3`1LvpH4rrw!ZuQo4fafRXL(QwOM#3w zmD-2)<-QS}r+i#(`{am!Fm>bg$F5_3Uh+J6s4T~;w5e3Sx+!O-9R3j;-boXBf9$Lk z1!-9W7S(G$QePgYaNjJHUO1whp}HHin|+L=G&0iPI4*}mpC%q3ubagyU(r_3h}TU8T`EHKLo3NDowKp`Zt4tF zoogQRS__YMP_KYf6w}WcpApLYjZ7)#E~!ej;FyQaYAYKXLk4!rdR83y0%bOCc5pby z)x1w#faC$;y`N2i%$ve|isG=v9X|VDK9W7Hv?>w=*{cJHiJI(f%nK{ zy3AODFWrXkfYHi!xZ&Z+d!7StZ996s%$P6$@6@8P9|*WqQpKcYh;CC{q-c~+B_&RF zDZbt@ml;Ai&KL)ca3-?=7lBaMgPxq|6}@?7zirElffe|N0}M{QqwQ^~Pp0G9^;|HZ`p-;0T>hcBMAg#ko+ z?ISIP6pdPYwjW&mBe&@ICe~h&spo(1>bDu=MUqdg?pmU62E0_>e_0-{%~Yk#HI8)% z^p`b8)ITi2lxz7yjOr3``=-&uj7!|XfDw`uk3W$K;GN8ldx8iB|B_H~Zfy!q+O$ZL z7*V`y@4M{$DoI>KVNHl+u#*||h<@m}zn-6vXmr;*K=3x@M zEPLV^tjW`F596Kgf)hZ#?SC{?!|##3>(R2;%tG$UZ>eURG*C7-*;L0-T;)CnaVB@* z-T!8tQ}c0GfGImshi`Ye{4Q0zvUI%qb7(fSBK4hXfe&9p{FqA0Rb;J$;jA9 zF&+FP*t{65)QAa}+H}mSy7^5+)rtcY!0MveleS2#CKE1`M}ZvlkZMN=huc=j$ZN^a z@n@Ty2In^+3DNM2YiCKgOF+Hf2H7UPK@i(NFuB$;$2E+*y>XJ98s}|o_jQIa(Z-Wq z?&(uPz4v75PcEpwMEq~=Apali9J7Vc+AdMZMbZl@Xar~u{V`o~33p515Q@3h<|d{g zeL_QTKI5oF%eT98jX)beUReSJF;M{dJqC)@VhliAly58qSk{1999r}&ctOe_g$3Wk z?Z-Y>;Ntw#iWuD)YBQH$d|v!6)~M??MS$v*_97cgv$#j$as1=_!4TY)!DWOkI;$#R zS#HrfTvLnU?G=d=YQgQEg@KzG=T6Ds-KI>Vjq!c!c*U|8-_iNb_c(6zf%@?zjzv$X zZ{T%e{^CZ+tQgk=d zF4;_@A`vAW#;7XH^CVCNkBE7(aSixym@Yp0&@IlBd8|EaKpfft2B@kckH=L$FpCOU z08qa_-FI&0j>a-fu(c~}L6Bl%^3lP}3+p7ucEA9ou7ZzRi%gkg0nae_2Hq*aRwk}m zRKFlTgX1m%n~FBXCXN4&-D@9GUHM4%%_UH%N-eSp$V0l$1q)Fs8>z zC}?E8!qPPF4v}KRsO`wp2(kD%qB#8U98TqDigK(=Gg?jMfGk!;Q@8H|?ulN#|mnIN1T7U42)KI@HF8slc!c1|qNQ6iKHvWIcZ0%C%!4dU_ z{=cDrymV*iUz_Ov4^0Fl0v7L6N%#%ep8^4ifSHcT7f3X&c70khmv`QugySnz3WaG;( zJ87kABfxD@_=45I&8lIffhso~O&&h^o_fc&kqwm<*uTtNE)O1kA>hf&W|~^Fyu{XE z6d3pq2>`rOkg=kB{32VrV_etu^*V-s>^Vx*8+EYi+~ z(WaXCUnVD=3UI^S`>_tZTdz*%7r>!M*A7Q<{FoT3o4Jc+O=iJo)7GUwL3!#$L1YsP z*xVAE^@yv0nDHOWG7kWAL&@A@!^V*LX{Yrx>v3ED69wDjw2D1`TvRJjkDg<1T5C)_ zA{~;3eJ}ol^413HKjo z09JAtX6`F?u2PA%Z0nkP?o!C#_<=6-8Usn88icg^=*pGv=aU?HAk?hNaTaC_#w#s3 z>UMj6zkhC=G;`fd35NB`9tN_zVx#F@3OPH9$-YGX1o7?KnoRlSa7F`5)aHfU5S9rk zNZ#`8j5*regx+8HGDWVs<_${U0a3E51t_6MJYf~0LW*J8B&z4qUGftHhWF=zq#&Pe z)W~ryYAU-*sj9yBdAZ8uvZiv`v? zVC0uWevH!D9iEu7E6{Fn@Hw?>IsIsREAoInU>es2yRqk5$O{jppXs|d^%`pRHLhQM zvY8mQd=S>HnYJA-L3|s3r}pMq zYgj7#h$3jLJa{R3xO?UZA4utCD)3>tc>kr|3`TcNli@?gXz{F4DJNrWd$~~qY8t>= zx6f@PH)Sc$X~Ig#4{++`=K9_^EU9P*s3jgRh;#BF_ z5;O0MzWHORrSZ;Niz%!pyfi z8g_kub+bBRhltk4VFBja$v)3s>BZ#d<-y%9nbw*{eR|N&#-}!p81EG;eKz6B<0VNE zm%7As6+Xyt?OaCl2dQF#_ymdWg?Unnb~h|8#Zu;|&TH`aAn>QIz=K#JdNie*xl1Abn|- z0@057KH?F$*=s=OAF3doUvWE0Qm&ngb7P(>B8lU%pL zUj_%VF!<0Qn?jY-G0CVY!wo`H`qvhBJ9dXlTGFGZKa^Z$iha7&F3XfX-%cK*Gu6Iq z`5G~h-`nJ&#K_(Ih`TnB65IxX+ccFuihrf4g6wm>RT>u#h`=m7M$_tQ)CXeP0xoQ3 zaW{Z?0NhPowbB{iI?YB_b(bGhD@Ji0HBws4>KyO7*Fl$E{O)6-kucCk5>0a7wu&pG zIkKFr(WlqrS?QYF)rD_DlJB5&xLzHa+WEGgpCHPoDe=Qo*5>cNCL;Ycj!f!fjcpJ( zek>81ye*a~r_LH?W1f2ve_JnWZa74KvIT@k%U&39+!P8Us}KyCq8&OkHjCF}UR}Xq zhw-922gxEb{biIn{M|?YAIbsDdjI!Ojz*P>LTM(~0xL=P$Zb;*CeBpexAk!(ZMM$a zB|@;dL+YdKhmQ6Hv-OEp8byP>*&IrE2?-&@K&nm5m2~o6_!}twk$Ui|udE7mhly(g ziwN@TjCo2R$Y2~JhZjIA!5FdVL$QeQ0R*LjwCb-B6jK8n9n{8G=ZdOHlx$}mp&FRI z^n2Cux8ctRu&Nx%!*%PgoqmYdS^g4<%}u%_b=z)UKQdw-=9 zQ;ts3!({U})F<}jTD!eA;x%_#`4nXzhsp?}yB17)`KaG!12(DN0+ED-xU$@sQ+&21 z^2H!%w3QH9?R~eD1x3H+bM5?&Z|W}ZPY?@KN!s!4{L6Cw%8RV`!K_}vjK{Ced8Vf3 z`sa969*@)Bdcf$IPFzqRbOY}@$bkT)`aAm3kK@I}EVV$%U8DWR=dRW8FGc&LB=A!e zm1R9GNzZ33OuY+D)Hz$(O(~9cf55F=9rKU_Y+=c)0}EEUD<<^#G`wqJwMo~<8+jY+ zisXOT5%`+yrOLYj0zN(ozF1=4{=uk1^Kr<@nFi{$Uy z@e723>=I`mvN(TD2mCg{$b3I;n7cX1w{b+8`m%1)bC_t;TCzh^_$KVhPDTiWN82;D zj>sV`W339Zwr79kL;gqWt6%n4|IwfSE=Bf#i{jen4(@1gKhWQD(gBtidohG9(2pQL z0@`A=-~<&p0-qq1d)icCJIgNM*s$L9hjgcbzVqjpqBEeqg^P)L#^o3Amti?AS9uJ#S(Q`dXc13(YMk)nFao6=X~ol9I#>vEea}! z{HpfEg0^wYiQuvgw~zHTQujg+`1{Sk!r+6!qqlCa4vGKHs_l1r^Y6z~;AUhAs7$P@ zz+^8xt{ig?)+sei{WG>Zvfk% zdmRl3oCH(_3CC9v_>8>$p}oK+-BujrFn;WLKDj>ZfLvH-hM*f%i(sft$?K)r5 zrf(}lvLi)P2D=f{J7DWMgA2M4^2D;GkKj-y&)?#?f(l^f)9bbuRRZD1pDl*xU3BKH z&(-$l*L#J0Dw7zyu@YN-Ag_NW1^kb^&+>EhzouFz{~>iQ;bVEOIb(g6eqfx`7EKZX z%;QWI*=~%-yv+02ekG6<^7ePWsLtbvS~Pi5y+Z!O!ZyjP>p~VNv1f-LX{`lkFE31D2 z-!0+CQC`X5;Ox(~Wvs?6=rLMOI|(iUXo49z$%ejrt0kI4nD-TZ6?c)!yCVx8JcSZ44IR-=)Y* zyaO_GF$vKj3cRuqWaMg!PD9FOz82WQ8e|txlZKa=2BUm6HA~1#>EJAw?DOXyysci^ zeMx}=Bl_dZ@?Yzp=uh1QckD9~P2DF>Df8eU2A!rXB*bIvkJE79hr7iR5mUu)*NTKX zC0?Zd1UafBp?PDc#pePFNL#@+pu(lVIxfnHbSz}OooaXYyrj1O#Q9!t6f!CHy_wH+ z_&Ofg1=cc&F#rsZLja^C0N4{@4MAR{)H)q!Jr%>f(Wqf?*NL7@#%ZKLiwOPM*c4Rk*w4S`#pLTED%ab80;o}r&uLg zb_ehtTsCbJDLv0RRs#FmdB1}ejWm0XcVvO=zJ2M_b-hO9d8}ci30py^LKlzfky)&_oy{r$7$7MG^X$IOfx@9y)K< z+F#Yx`oE&DeuYx~{R7GG(q-j;Qb&JD%&n@Zt$UgHwZ1o#LoqvyCB#}RvB+Bd3$Ld3 z#^J2u6j*F^-0H1k&Cq-2T?LJddq+>{Z4}=Cv06E|?%WxD{|Be2-}?LitginpP3wNdH*(nqM0NOl2)4p?ojv5bg8H9WGv z)aQtrcDoADDEj**ss=CTY^k_c|J7cMXqDz~s_{lbOeGK>X-;zy>|%dGV+-{eyay~J_{2XDq7CA$$qI8d zg}tohKnekG>UTH%8g75dc)4Qz)7azUQ9FKFX1z)IcAayLXwFW>M=?rMu!RTeNI4c+ zkGduIw%Bm}fn)I?+?a{YUFyK$R`)3m=Ca3W!b|aZ#6&NaM4LTeDu_d9Bs0q}_Gx^3 zUUR_&ou4pAOYH@^nqxDu?$Q7^#G!0gsZ%ef#Iv$Hp6P{6D$vyVc_(yCFOXBcz(BP|_I3qqb;K+KUf%X;BNMJwkS1J<0MZSoh?-?-QO6&P)VNajAeAw@ zbIoF>E%yl@yk9OLZ+kbvNROF52AY!bO+_$-6!d~SrqJg$=0S;%L4ESRce!Qu#CC{H zF2PKK9OZ2UD`f%Aeb(EZMg-21@pX^SudIAf2+cJrEGx4gc(^V2W}Z0{lf9_YfOqd< zPNa=eEoDlql!G3-YPL^Shx^)n8Qw3LV07#EpCGalhNq+1@h8um56kZUZh7*dJ~Y|M4E~a={=$M-a?VydqNEm;+?L& z);?>mz0N-SeBZhI-g}<&2g#h1JW-N4e{+m?yoItv_i z9CGUzAARB>&9@V)ei_> zCGSeg3(BkF1*jY2g!$ec%?bmRZ8)X@!6ZYQ<0*&hnhFrfjjbrT7jC>mA*{&FJTm|M z0zeU`$Xt+E?OHb6Ue|Q`)RZ%IWV;K6d78~8*^;_i9yA`54m=wl`BEG??k8+*MyY)6 zv7uvh&uyz#0^_j~Ub=bmN+Gr;CTuUS3FjQH>KCX~@d$NRmuY^Zb}paSamEEu>Uv%n z;M*&^g)m>)(Y?RXL9Z1BQrUF*B)OLAj#=D{bM~6HWzm23Aj@W#|IULzXpOC4H#)rk zdbhQ1Uii!N@cUo#?jzW2Z2{Y~f+suYbB~|6GO7*pmyOSS3k)odR+TfcZd09VOAK?3Dkban0Etam5q3;KM#a~M6&Npe$Uz}V!2)7U274o(Qm$`2D^tzv z5lu47Cjr7~&b$MNi{WVV2wFtFJWlg5#d!L_X&t4Fg>$XYv%1UgH|YDAWGY34qBEfW zz@7iK7hq#GKC2dklUA-6cJh$URp!qTTiUoxwn+KaW!-d{$+YHTb=fr9A38&axA)9`)kfuZ$l56l}?bdXJDVVB`11r;crbENi^09~20% z&dmN$e{J`43V?L@VZ{&7CFG&Wh|`wPtU*;kKHZvD6n_lcpquNMX+Lxihfcr3MIKR} z=|M05uK8tXT7_?OM4SNB!Yr4V-|c|hul(VX{g0Ye4g_`#T!eSmA$DYqnQqUo_d`0% zB;OGle}H=AhwqH${tQ@Vy|a%1t3pPdsvb-}O-YnLj9n6j#HRt?1Fh1c?xcSg)%jaf z{`+TFbOh?0g~9bfrC@@n8;^hW6PiP3Rt2V{laTmw-N$omI|ET;s z%c_CMtLOO$51A>(iN=MJ2l7!5y3~)W7r{hd`Cm$id;^_WlKAjSF7zYH14|x)lEUxQ z?@#jfi?CnV>|e-KI6bTXs`G(*Vg6HRHV3OB@_oO!1=v{&8ajf8CF|=WKjset?qQcN zol6BFMPU@gKE1MzSD*2YcL1#*L$T(>16iB^&G1`jQMFVZ7zb%eEDLJE>GOv0itTmY zWFx9wql!P;YMP?5D9gClTfKE*&Gua^i^j>v2N7F^3p6ALg$`1)DLJ0kaAl1N8H3(} zPX!V($iR?*m)uY}Nyy7`4XU~0PxigOIh-gTEanY?e|lVS9iaPKXpI@O+0ZD`qok|6 zmgLq|@}pi?sHzQo6$xPy=@OWfR#i2oZ8~?KmR9NZmz$v6;(sSp?zd&{|Jrc5p??tq zlMFPk2;Y|3RKJTEAJ4#OEbdR4O}rN1lqo(p5$<-^mHg7Q5ZCk;i0~92ftA-R@4F~= z5!dcLlm*Wq)!uw;*H^&aLhdVX56`=`=FXqjGm>w+kzU@)q1hgMXp?qba!E#8Xb$S{ zO^WxS2x&^Z$C#g6!c(Wx>(2K{-Y3;Q**zF^h*Ul)kzp1AtyPj`ScpmLYf{&nji$^% zv*h!LV<4_2ifOYLh9e#G_T1x@l12cdNnYyyuUoMGM1Mz)Osd9JyqOO z5FVqGDvi6%`uJD;&n>PQe0q|gj3rB-p_W?YRSjwOiPiPFCi7WKKIKE|-B}br;6A_$ zusq3kr;ZnGiJWMqNfuG9C?`B5&OjB_Ux zt&(Rj^8w+6v$IogP=YL8hpwhRgf-rg!VAXa0rG33@3+C^TxX7$_+DZM7WHNOmgejp zH8#X-mYPlN#>}Tt5wDHnCwq0^F=fNg;gd>zrs_XH`~byqG_I__#sNhPGk@Zn%swD@ zx`equ$xq?SPt+wZRPm>b;q}kZy5BJu{tmSVFsDGcKFW6stO1Mpoo^a0h!(L$SlYl~GfrHSz*=_nDHqMw>RK)B*%? z;zR{q#M)OxH|iy_VDD`4GJnWw#0CBN)227*WKjnUGms1UBC0_z&xc*9^3xSRN&eiz z8@fTTPi(l_SR8Zy**GSp4 z&H12KNm}`&qnBMd5>{3mr_9HMxy5gdk6u0w?ktF?wSVTk6Mxsc`@LJ*-m1f+wj~LV z`-KZ|Oge7m3>%BJ*Bj>?^VCZvWQ$C;hVMM{Z+>^r4ObduIoxUCpte2t{YqCZGoQ^_ z3jHtQ&)y!)y+hQO)h)hX>lkfwXl-GR8BB**Fm}Ds#rt!>zN=**+TS zNk}%F;>2nYvg7oHB`MXNcF#iC zF3HyP;OF_|tSN&X-@p8*#dSSJoI+9CL8dvmuhPgprsqMZHm1XNW{YZ+`K_IfevWo6 zvuvSz0ijDN8=uioIiF_%#P-R=ONRPp`e07I2Er1j#x*S3>GgxxPPj6TS+0NTF4XXX zW_!L-fve&>Y7^D6a(X-rPwP$I4!b0D?MXDr6`k|K9vl_Ol$U%qqS7NT`5B!C$`v@4 zFeA>Qt207_I2uPieaH5u=`V(hANEj9ERI~CNZ-G9H`X3DV8z-*%yc->-Ya_c+_v0~ zif+orvDgFLB8qhWylg6;V=yt!%=+eBc!FG$3NdCuRVG6E@k%Kg$u$(Nd}yoj5q7jA zC!C2y?gHE>b3NlSL0cv&vv85wsa+lFwVAvEsh-xQ87*PF?JN6{i&q@+Vvb zbJ(KCambGwPAB9IE@{iSJ4TeDj4uTclP>dVpL3f{-!H3(O_R37-gM~d~ zKaU}WrOzh7En4h9TJ_4p$(I&-(^N1>|=olfm~L z>)5O82V?|nDNAHyl(&l>sA)<)+JC>$8NvE9oUhXL7YOv9Kw35af~EfIETX`FMd&Ai ziR9w=h zsXH1D0vShn<<6(ExfUWWt^)JTV^qtj>(&s*)UQehNdw<%k%B_aUuAibK^sJ~oVe*p z&TKU06d}lydB*zHY)-hfleCtg^XEUn!x7w1gcow& zyueLN&y)9@>J-&r^hEr8!KRAn473OVP?YgnlNX7Y;q>uYY7>S{5^y4gdnQKS6aNWQOx`AxoU7PYbK3dMyyE4~vP!2AuQOId zorW=krv~4f)^6JIj+p~V@d&dYps-4%w|uO1sRE9+#wNmg6Qq0vj0^p@B`Nja)v{f1 zY~gx`E3Nss3N3!hauTykXOW}XKYy&znoSd#Rqi5~3@Vo{bgk>Te*~X}u20QGFi$?r zGmlQ8(@2hsdqM82B@=7DouzYd!~Xkc8I144V;;}Dx}^=DRx}i0=S&jRWLu&Tcg<fhS)(E{^si*4d|3~^)_gh~t)Vg>-6oT2PI4b8uMIVodIv>Cyr`{IXJsC2opi$m zJEH*HWnkwguk5GG(eHnNNVUHOMNtDMsZPtqTW{StYDvBa<-GGbNLciw+oZon;GOLm zgAwl&3jm|^4a9D$)N$N2oDgM8{R4ynj`c-6#o|ZzDxwjigN!-z?sCXni?NFdj$bVY z@=8*YO?8cJf^#uB+()O{WGL~lCUglR*mjF!yw2t3h+Y6MTM;rvI&?;DlGYJp!^)A3 z3OvcwT0zmQK}^qliW=#N*gQJ`^|wz1E@kaNPeH)Vt5gpeB9`?97&5_M36O?dES*#hDsC(#q6UGrP2kY1>+JMx`V;0j)Uzu z2Fu4*qt}`z7)qdbet^u-hg_5FKR^zE&g#AiG8AnAKL@brp*X44eeQAFuZ?k^!;^a2 zE*-^bw4AISP3=2iiuHkE<35+E!i=dNH$9{v=E04n2CK@ zm$X?|P2m+rxu*gBtjX7TRyaREH1$*r$Yb7qSuxer7<-6pk?-*|!;x$N^~F^mQ|_*z z-GY_EP{KH05O>W+ZA8&(VNLn#j@A8PDd z3rs0S-L>X7jhz+QjWP*)O5_A67gy1b=$YA zTJ{H1t-JPl&p%~x6rtM>0HkgqZPd0|>*$Oee<2$2U4Il|>62_|?Q>!7yFaYGnyYq9 zwx}8+xELB+hIGJ!rLDGPBmM*UQh#$n?J+*E5iBKOw*2Ot_C3m@0(& z%MBqdiTo+Tu{8W&xdD7aC47l%Lfy_QHCt})qsuJ49WF08WLQOiTWJyq5g-8p8Yh>u zls{X<{+j}s0stL;U7Va4{S<@1Vu&g|eu#BiqaUEmm8am1Wauo$BJD5ff16e{c#EEf zG%CahCqUD@cZEdB_U9DmV{ijbbgwyXrbb`0_!7**Or|eNn52bOc>dM=qcca{RUh&0 zT_w~mM^scMbDb$e^l5jZguOuXm~P^glP@yVnXSV|qJzr%MMn>;(!^a>3=>JABv#Hg-+5^$5L6xGrzPIon z1KxSxxJElUMI4?KidB>{2+J97c0XG^8V@8;ygAgmhbS>?eDBa@mDLKy77P0vJ)m~J z2rax?#!2!SA3Qd39QfQVV6h9boo9Ly(cBZ=gn-sWK#tUJTbisuONNVBPvzsiCud+< zB{4HxSo^C8Ap^_|d`xMkEU>-K!|`f}!*}bNr;4!RFS?+E7bVk`Y3q@*<&RUi^b4z6 zEMk?<9$8-i48#H&(@5p6y6T9;9dTo51m&Ew1?V_(Iy!(p(v=c)0xPR1tMZl3Rnagv zdDO=Jl35pam!kF3QE(z!m`H*jXky_BFOE2`3vW-fNq6E*9U_{yzMS`axo+!Rw>OJ^ z_}5gzKLXvq>}&h2LAMzJf0_Jk#Ij;d|KGyOB4-nde@jBUvGK1@$oq}%^k4P-UzWJ{ zv4;0gBKDqAFQ&QWv@ICi>q{M727;D9C`kQEJP|DxJUj>7OE6D}&YKJ~M}HHQN9OzU z9{oq~`ycF+HyHpko9OIb=*9q$1Yvosg}Q+&gO

`2n&)ACYK}0XcU~@RF%5fod)= zavXu1vw>y-A$s)0!L$`Q0jzW2eA=yuqZK}@6F@6s`SU+{2-#PJj6rEU?eoIruQ3B> z#7-5ligyKIg;;VaVTn1p1!H`9SIc<@I_^&OqYx)SyUigDlRU$Lv#Xm_&k<{HU(O@u z4R0sxZvY7L&%fyO&!6%dRibxLJlYSo+$2JIYo(aoMgk3nDHxNo{0eCx7E60zkRvR|xq@OC&!-YzyUPoud) z>^o)*y8Nc6#^36}XolOx+z2s}=$*Lrr5NOG05T@-+{60_?njJNtDNKTTEC@d+Qr&h z^3?4cTUs*OOeT*pk*D!2qS(7>R{8nPmSN(6nE9l_m$|Oj^Mr+C$px$4YahVS>OQ$k zswWxyf*0h;(>%@%Clo1e^J{awB(3yAs7CEz5VF zOF|{RyWRA50j*EZmIy5H0)QQ!gfzaZTbgVT&3@Te9rC0l=~UZc$G&5G-~W6f|D8V( zxPg#*-porb?{n5hR&#~rzTH_dJG|BD7r9gwiJZ6|TaK21MjSE0l^M|%M%bPuIi zBaSI7phuizXw6T6cx-H~DU}C^ESou$5&be@VQc?NxP7>=_`%Gh=NH~yAamEwBb**8 zk#(G53@CYKx~ad`HkaHcvuPaJS{|vm_(G)?k!-E+d2cpY8&*v9_R#MaQ_Wf z@n?m?UytR}%|aT`=RF@_*HIiedP&V#U+0#jGv%|&J;+bX4R4-J(4WGiaU?TKA;k0e zEWQ%bY!}FJ4{1jOJO7kSc44}vc^PIuw^{!N%-Y@v7 z!Q*T)7r80Pu*2>y5I2ov(L2=hk6$BEPJhGq;Q~b~dBKqr5+ahJmM6;GrLbGEw&46l zVQyivv2h%iq99yyLGU*{y&_pg~Y zx+Ux$I}`Uz5cTyqwIH{vDY;72o$tgjtz+Na=>g_kixoiRoIQrpbLY}Ow3PudDoXo< z@3Mvx?^a8Gwc&`#vd}!n9H!25fN8aG)s|j8ga}HUK-H|(v`7z)Zy41UMc)@1J4FXj ztE5~r5C--(4F|A75f}gq=nZp5(Cr>u?1IZGBjm;uC{FeZ>fkw&1m5>}6+ozU6ma4I zJE6ch<=)MICV5u|r8>kWlp&=X$|dROhE{Y*qeIL;i6d6F0Euv(qQ;21+oDLK^ zQZfrwb>6y|(35KtX&AJCs0`LQ0d;x$C)GmLc?Zuv4 zQqz9x#*&}&64$CQcxJ^QZ_eY|RyF@8#ks53LfgI0A;hq554&6IQbdL&C^>2~Zx3i*EVZVZqLlonNovMO zfRs>KwHt|_6C6}cG4sIi$z{Sshj&2Y{~N#wMQU3Io~o*aX^7h zW8Np0l6fz>q%r$}@U1E#kiGBOm8Z^Lbl57zl9FDG&IonaoW!L2Tg4mn?&o}A$Z6i? zTW9+-?r!^9eUw9-qAbZByJc}8`AlC0A2i4GJVK*5c=DP~;r!b+E@m!A4r%+z5>Fm1 z=25?>xu~_`>P_o5lW?{IZr__aezi;AB=ZAWIK-ptH>k6csYE+h;lgGw^6ey;xT8~N z%MpOJ)Dl9akR6CbXP|(Y*hj zC5=Sq7<~s$ow;G#1J*-rAgu1}#svhO%%-q)-UMbe=n3&2rjHSUTN6JHBjhd#`n^)M zolU!*1hMc{ZmD2G%x72FF&^b}PQ+cud|nrfKC)y%`?>7C~Hfx7A>F+<3=3@3t<;zd6=y z{!D~3XeD^JkOyogx{t(j*tq9xTsqRn$wlA|bH7`9vu)=mSI0WU%L}z#f8CLm zZNJOV=?M(Up8)fM8tC?YU|lw6lp=f0Rr4&4QJ6Ue{NxPiix*bd>sIibx9|_pmwv=# z+Tj&D!|xns?jFyG54IYwe=QvnFC_FW%aMPJfMN4fE}UaF)QvwXMnj>gP|5{@^)!;k zHCJ8bd9}Z^#3PDX2LfdF#p%c>*Pagy7`)hqpeKJ;7jvt=Q|O5iD|oe}4!_il;>LD= zm__Px)Tss*k(YUW+bS&i&@+c=X>PAL>^ z5SAvV!m7Zf2#`z9lCMg9TGchS$Z~^kNne!M;}K=>`nV$}boi5~|KKhViT0~zU(@0r zivoi6?mz!Y<|n!EQ&#_T40XE^P!j;7DPUs$*D<;)v@tEv@FM*DvhYH;#-g*VtdacM z$=rC()46DdAb*Y|fVMKz{Z)*`c`+S~H)5T&4foID#d|16i$x~GI8(0}5-tHj9I!>N z0J*(mK2I92`>Wuf$s1)&DOEQ8)9(;@kuZYiO=x|ssI7JR)~AM&e6_R9x55F3bhr&^8E zc~vSJdb?p}=5AunX#n7;2!H)gln%h?V-`b5c@N)b=tJ*5EAiTalX&U@P)lJ5vA$BA z7*dKvnX^#-jKF$<4{2n9`xUG9&{zo{in*vKaN8_>d%`{%emZ|}zOkwu?eFNZ}$#pOscFR^6z#6{># z4)HpoM_-UY4w*Cf0Xh~V$YMLaNlS3f6UpLh5noO8O1{8#6yXfT~S-|I68gNVyz2LkuevW&qp=BN#1Qp zJjyw~1#no>lo4BgfVWf>tF)@i)EV05Mxz@a*xc=|F`NFxbFw4SKUta|u?)vGx+Y8= zC!^pASLoHB~cjl84!PKC$?!Kc?XH?o+VxM7*2*7mhnE(vHBN zhXvfN>G0xpm2iInGyMb+e5mz}(_cnkjDmJ1t5|M4)_A34^3+7{eTJg#gg*TqwOq%O zg0zQ&>G~o1pT@4gu#a&v^rbjpz%hnEjl&)pv_XAK%sGbIOni^srQEMMX@mDJBl5fk z6t3So0nsf{M@BFc@q-{(iz4J47U$G=-`U3Jq>oQ==#bWe;X#w)r(sfDwrweO(5+1 z<@HH2riI#@tRP}w#Sv_)&J#F zIt}R)CXf?I^V3Hm^%2)(>z4{Oxit|H#LM2D?g1gyCSP3RREIX!rw2{w+=LZ7+ZPnK zl_jLp`0XaT>EB;?rSPOhJ6AEhj(LIh$Dd2 zpF$DBQLD_B;CGW)GDx@vQ0J*-1SS(rz7aA=jvk5pqWi4>VtG;QD8;Wk$u;XkRnSc2 zolEZ{f$XDsJ4>MJ=)_yqp%}5w$Pt2QcAX(JlMxGHD`Kmnql`e2xRN^yW%3?K6~u0Z zh=!fUs1D2rsMVy7a@-ME)Jgut9|QvIaVeLeGkKo20E!cKYdZ;0U_2t9+SRUwV24pa z7w|zb;Sji%TYhrUN^t0t(;_|XwnWL>PmhVgcoHHDruheGmtp{V9gSZAiX-XS>>}LV z4z2QGe*O)8SQIy`XzSElpP=r2n^UkJsRUSOqK)=`3eD$Ns750>$({ zq14YGV=^oYRXELOUc;bBs(O1};cxQq{@}OxdxG(Q^)>@NR^E}>f9B#nn=cKVSs!0Z z$inI#wIw)}ZI2V((7bq#3|yC;c%s*08c&QHouY?X`VohJ&FaTg3LB%F-#J^uhIEV{ z@j{^L1~H%UH<)h;#1i!pJ>m-pl0k3De6l|zs`YXX1MBwX5a`h=y1S;n8hRJVGjB>e zafUA$%-rq;^zQ#|YSy29clg|9i1@~nk6>64VmVuMKhN{YEOg3ySn36MdK;?xVrV}P z#DIgfAn@@(FrV5xfW6xitCW-ET1yOpTWL2Q0j<^Bh<)V1`oPk|n+-jN^ZH3|4ux8V zU(1aEcVd!8v^ktc;0?Cu@uP9m!FHXdt;J*j2tZrzjd*v;vD<<@Ir>>1kBWWjpDN#v zDPShNcr3aj-lCd>41zLvN@LTM-o^MoW+ZhR=fj)5kUo4AHWfm7a(-Z)->HhqTDVz= zp(In~oU+sn#q`JE51>a^p925tUu7ZMjGqy$?$CodV$9gTZNERY{Qg%u=zsOe@bmo` zw&OM;et@`xp~sZ^h~*2Ip^`rw=_OlQPXS&511dpe2>OI$Z#T?y;+O}~M%Ry6o89^W zQmO%bFaB&T{@-~F@&ofPLZsTK!fz?q9;-d;k^XA0+ju zo@_#jrfSmye}KR!PNi80-Kaci)EJD`DOK|P5{da(X6oRn1Hb>IG=tP+P48;-tgOJC z0*0Tfrl9g!RKvCQZ$v_bub@YTy6=@--uckGo(&&mp!4BJ3#bM^qagwcO zC`yuK>%2Gw3glKsJjTYwWUfhX?MA-Nkc7J(o|TZZFSYm~;qaK~jJM?lRTFaoigGAP-P=NnG(P;^(_*_9Jyjd`5z z)}TY=9o$O<_`)7afa~ecrUzgU|I}>%Mh^akpZ9ME$$qD%wwkJ@o@1q|kz(h4l<>T= zOr0g}(ph@Da-LA1I(C>I_Qw6$X2Z)t0o>u=IRsoSb{dZ&^swH3$aVo+L(Z3{78S>J zdOVUK!{$TQ(PBIYxT zkwA!R-{XU|D5bKR7X*pn_1!R2Wn{7f`*&|0$J+Df6=Bcan2~asNCOEL`-W<%(bNY8 zLstTI0xIQNV|lH{9qC)68?G(rkZD2W-&xh8cvcc!5(JVfor|^VhBYSj{JJUUB-Vm= zBsTL6pMC3GmSqSeK4B|*C^;b$cA0%7ny~hUJf;Ku1UB}}g_tlA+Qk)^IkUcg`~#$X z1g=Y!DjKqWF|eni#fqQ?;%3E5FEKUjC3$h5GXCO=Smxkt$D*|fD4WD;FyjE?MW zT|g&@YL(V4sAoZ~G@kYgH7!Qvf=$$93S@sq#XQtsBPR~#Aw!_s{J=Vb83WdD?Veoj z$R#6&o~LXYGKJ+0RW}c~hkE!In@-G27B0s#?ps5VDR_W z0vL%(!?r)c_u-b!e9Gm5nSFFIrvupD2r9RaQYv(!8 zG9%e0E6Pf=m>2HhQeMgx;wG9_o0~AWc_rt{8MN;6Bxx+}M`mojlIxE=6IpDA_kpB% znw-P9KRoK96?!_>_RfnL48wgw^+5@Hh~wqq7XK>)FEDu!U4_`L@%8EeQy+6*y)`Efh_jtb1$)5IL8nYAX46+l-vZ*sn-GN_*niEF09b zlOI0Un95cnZ!eZ%31Yt&+!*ovED!bzohxsBnb;PK1@=-5#ra@i@$-~v>Vw+h-#vM7 zEKlR}=6!V>w_A0P9BaOJj?C8JvTEU!#Yszc+lcgCh8uI{O#%u~(WBIXnW+n7__z&6 zoeU}uw=JjffNOT-A~FdHG2X0!=kP2$zmYsbnOHx9oe z8qlorAF3>*GH6Y1*{Dbz+PF#rjv@!UP)&oKS4J7G!EL02_Hcdi&aejXmv1Y3WGInb zt+In*?j+s>>$X)R7ud;};QazOja<#KfjGVh*0YIHk56KhckUfwRI$haC&`E1NhWN< z&xzQmPIAv%QA#rguW+m_TB=3@KAHE6I_$){_>39uZGH&Ps)XFxGT;GNmIuKa+efp` zGPK-Z8}_aiS;@$03@K<;A0`T*tle#5C>*kXE^zSMu75>)ej5w?4FvSZgJYok_kY2T zeoIY!0f0`Lr>G3zDJGX(iQcurK7ACd)NsNlXP}OQcfkZ z(#1rI_v-#Pg{K_h3GgV4WO|>)GhHd^A+|#zi6*%w=Wmw*QmmxVW7&s6x zNzlQYuwL3OoTTwye^&2e0VQ3!f_NlSBdFFVvgXC3TpHfr-FL>Wfo3ca$a_IV52HvJ zf-%GtDb3iU}VqV#mb7UdG01*$2+}3;8`^57^<}hVdl!eGHzL z^8JnS@*g~dI|W1F+b-&e5pAG*>T?-7=<=k-SrOIR1&=A0r;Y^1P=_DPfs&kzVIIvY z!(}}O15K-XU<#8QHEm4N?;gdged@MOe-1bivTJ&)fkOJbqz! zmPBqA#R_z$_i>mL(qi=Ssx46zaEaBPwhaPe5Av4`h6TTF8YVWYJ}OebiggbN)JvuYlhf#IP^9Ks&;^=`Cm~2|F?rQ+K%<>mOMGiqlvC? zJ@jG|V&5`t`s*a39!(8>s{0oBPMMIqdFV0I&Pf6tj^$IwUH)tcmMT67=;s`tim$J^ z2%u=qbCZjy9qGL3T9uX+NR-NcYbNn)vj^+5WA77@q19Z@d?+Fprcoww4+yy>5L@)Zy9=a(?;k$h^S z^Noz(KQs)GG)mq?TKzi)-oKLf0KE4#B2gCqohFYo!_B~*bUQ~2#JR)?Elqo2@0<$q#`{wr!uGMfM$fLbl6 zx7S4wUOns~`TZB_LrHE`PCDEs8*@TO1&g zsE3~F$Ix7S$Lv%v>XLG1eX2!Z4UL~ zm@Mm6icD?t6W@2m`e2a_Au5}6CX0nrHgR0<@ji_~i5{!0Sn?lZ7O*ka`52^HRRNf33ak8dAy33xJ*^%BlA9xf zhEW%=Ycg`o{qfatOqVF?8spA^anOk%7*e9+IY~bnnAo0KWkF@+dK%-d2|u&Eph}%2 zaoo9)6ckX3Z~;(@m)hTB5}v5c_wPZL z-+K)H|1=!@<>dVRiTaPW4^{VD0D^@A*7bY(+f17-CFu|0x(p2J& zL{B-xG@=CDJRx7Z-;3v|cTDnvKMLupsF2?ouT`O>*^jylmVKhA#7(=MQ{OWRte{gD z8s)Bpwisp=kE1ZI`c6&yd`7papwLET4ae>~fHPeh13v9O)+_+&3Ckgy;PwRY@mm zb86)SCjJtk_ru>Re^oJgNf(yEWOy55fcV>Y>8M3RxOn+LfQEm6Rs4Q?2Nnc5Tg{47 zQoESSUDDKfU!i7QLO&2O@sbVsUV>gZsZ74KvBYzg zYby#Bmdd*~nYbL4p~U&q=OOalomC2;5^6<|5bQCY(1j7#w_>@^%TEEfIpt6fq2pX-u6QHx|(detR?q)Y^`j&o0Y7>@+@fI5jD$xnLNcA-P`bgtR@I z^dw`mXEI{eHNHAK#@3x<9Zj;!5`}tMmvr*A~ z&L@nGTE55ks)M)bfNx5}LwHYf{kG`d$V+RMwF3uCndW?f#En777FUk9e+@9ybxHoq zB=}EJ!hQEB4o~Tksmu0*}{#-s882KlQQuhwaS!T5I~hnD+gm6A%CG)^*m@ z=rw3lW%(LYNw;f6lN8i%LvG-6SAgMhxpo2@fOg5s_K*x$W17&6d3*;W_~%p!m8`Ll zD)@mCF3melZ+nXh4{d=kRNo$UzfxTpGyj%3$1!kLlc1sO{aAhE)$)f_-lc+y#_N;x z&R$1?JF)~0b*b1CYsNW%$MvSkKyuWOzf0c{KPwPVj`Ou^s_(UyC{WM2v zQctIc^_k$K8z`wf-NESOiP62?mAlE0FMSQ14Zg;yd`Tf(V(dG1eyj(QAP#x za^3#52KXzn?jP*`Cno3$BE94CSDG5Ws^80+7W0u2VHG%2ig?`*a%MA@p#yr zU=zDpixg?>XQW#1gr!)d>V-`ETqW*L6gL7hNMQ>GZO0;>{C!t?HCv7*3JsopZ(lPm z%wO>1eBoCs!)QhxCHIKzrV8>F9vG_d$djM=6-S($9`oVI!y)Egg?h7jDzF%&u-6R7 zTVH-R-W}f|WZhXPz(n_2lKY$!(i88Dqm#!+e+V;EnqZ5JWbdjv!5cTbMECiuewEgoQS2a_Lzk+0k=46Uy$?Bd4t>&0aXzui+`?Q8_e6?an6Yriuf zc;FRsI<1#*Y<%tZ@kWUz1eR-~$_L%NpRt(6h|_o!jH{t?B5!_)`!1_-=J`y;nWsK$ z!Ko(Smt$FqwM9Pk@0wXFFm$eYi(upY^SO#~WCdo~T{qkU<8E=k?1&NvH5wb!CT1`- zYq$hg45;h6%avu|eLF3s*wi#%%}00oBo-GBv6NIj9^x zx|-->w4Bc|Fl*M(as27!%W9SE9(%hrHubDdla!H}C%0clsnoJ}IaQVxWiE?7{A~5= z-N@Kw$^o@m5z=rFUn3CZrmaxv_>hOuQ7w3}7{=@hYb;W+ojG~YqaMpJxne7<++NW< zOCLq%+U3B4Zr<|KV;|R9<=`yadADIm*(O2WTYPPmVylOJxl%ynM%#8XHCle?T0X47 zKnjZpl?POyw{rDYscAYgt&f|?D-;_}Z3Pbw6YiwD+6)O+-$B>{8C{oxjLRd9H0Cc- zQqc(CjgBxa>?QaNpY>(co&US`a8%Yel@E5yQ|U;V&-1tYHK4BRwoR@b_>Em9W+`NL#H zrM|%u#sKr#T?Kj|@hl!7J&7*5_cX)(jz}ByRt-2yF62z=sH)xBmAj7um+1`^m&r(I zBu}JYJOMxaSY4%r9|AI=Quv*=X!YF+lW%l#oJ)gE-Ei=r+ynjPh7?Q(d;a;p(?rh2 z8ikL+*Asr5n*86`*=t{(-J(yUlDBYretggIzIwCEH>U18D|nW*wN{iMqV+}tu`2E4 zOfR`}Ivy#HqXe%X`z+2~Wjr*71Ry@O=7*zrlw7~dynOP0p2qIZzUYb`dE{E0mMk*`$b5*)ZV3i)0F)BK22@EhK3M;|9#>D2p27S^4~p37d) z>5U+`>7kZPv^By>c+b(e(^&1qP5W;JpqUAp*s=Baxs)tr5(IPU3mgZchU(TM5ijIV zHR)Fv9sIk60o#zX^nr1J!4P2h~MFL9pk^0V>j|$2L*9Dx$n*>R}!_EbtJ(C&y z#9xW)&20$8m3JRm8mZCKl4u`vl@hTb{X*rKd%d7LwSCD_;%GyzCC{@Z5AO)a4)u)e zok-2Wk_vlRui8(jFd1$NU!y(dpm@%#c4wYe6&`lceYhw`pi7Q8JLEol{TVq?O2Gmh zCS%089&IqFt7PPS9TK~GdTDK=Yp5hy(W4@YqD4E>_Y*;l)D0daC+R7K?cS{%{#c!Y zSO|+dlD{Br^WXz?>n5@zH+Q^aMP__-z8tZ2f1zhlCBF&)a;k0TwBdLqr=U$k9x@k> zrz99*6kWt^$Bo-hk6IK?GWTFio_G2@rPEgmMpjG96_mR0)?tP>}}a52P0I%G3;Vwdc-oZU&H4~EF)+R-f0-VRwjs4fSH zV;fS(I`2Ldv7KtWin2I~t(F7>HO$mgD*Vodd5)aGvI<~ECkz6qDh z6`^Pmqjl?6n;sz{vSMAEHpbz@!=qHv6%>KNQb1`I#5s60k2X;(!o!O zPLzDU@Bj|se#*vqNuq-`N+)!Qp}x)*=eVCTcuxzUW*Fk{UzanJuJ$QDkYNo{dVceZ z`$o4iX#|JoU7TQxAWcq#pkJT#N}>3XLS6gW80C&u`|aB`ZVTZOZB+1=o;UFDbixg6 zr~Ad2<=$8F7zNDZsH)TX;yvXa0?&&f5f9#A?sIzkjU0pOy7mEUSl;&c`q^23j(P z+qC_QGxvQL2*ZXza}&*p)>iMor_1%cwU*(z1BEPw z$SHunU#v}tM9?EJ)rcc0DJ*;>j(B_W#!n^;{N(ig;}<_bux~@w+yi^6=16{1rYPn< zmYEuA-p^q=1HDbrj|6S` zoz^7xs8+P7tjx!BjUD~&-PVmytvwYDk4-zn9o;-t94dF&|LS%Q3O&T#^H@pund1)- zi8K832k_ASe+0+`+dPLrUrUg93 zYQ%QB22m`NUjCY~$E4%8?s{d!#lfB*7a{^F@o0UZyaxLv)@SEgyh@+E&MA54GB-(n z8_oMjFFWNu-)~Jw2O(%p zSTi77Uod3>-Mj<+-rEH4oyJDv-5XY5=R@lQh(o`2y&OBfZ7u@59<n)>Ul*= zaRPX^?UK88YX!0O$@takTOnPs#nYs(1^kLi2V^=(hYHsQ|#jX|BNeQ z^v6WK-}F>~s44+yaGM97Die(_Ov{FL1qWAKtQ~)A%1nSDFiBqohhRd3S*{Kvw=%mXb7es^9)ireUy;EjB zao*#-@cqK!+_Kr8SHMDV&C%h7_TLyIs@EnyXrYk%;01&_w0Hw%6)nNlhI2`c#TAKG z%`pWGS(Pc7`&2AmbIWS58ZX0X=*WO(<@T2M<<$@q<~lJKZWY14?C0vR<1arzddKp0 zaB3M^?Ed&Vk-p!s$U&?cKP8j4)jsMVJtx8b8ky8Mw}$C=7$_V$l94;@M{UUNQPb0k zpau}6rc~;<_0oZ&J9G>ls;_lVk-t0DaQTRK3$Ea51ejNf*2EsAOFikEJIE2D+u&gW zs@qe0qWk}gz4wl4s@>CugPe5`2x6Jkr4?m zCFQfDVWrzTQuHUA!=%5UBK_?%_n)F$zcuT9RsN6o@3%Y$6MxIWx`=ZhI;BIIheKJ6 zwxOKVJA~oqwZf`uDIIhH(+-ZhOz39j&@^3w95V|$ks7u@kt-z5X{RuWeGl0VKGt3l zAn;~5Qj~ik1hx=JepFoVZe6+!IQL8;Oo^(%Ks@EiNNXU%x3VQ%(WI#tkZ|eIh5#J( z-YiVePVUq9?@5;|i{u(dEzvLC1?5MU-@Mtu*pfXd7pU6AzF5kw2y&1i0t{K_NMAuN zz7e@VlB+OSG6@Nzs* z`@AYQWwX@HA8G9q2gCQA98&!>M`EOhYguHvGy!j$e_VR{Z^!-F@gK7+LB#y zsm^vgK_50n7?S7RoyTeXaEGy}D&fy|S;ETx|te@oB#kj0S%i1_<9VV@;v1l(4Q}XDgYTx8u(87DP(6KfF;}umH>F;mBn8G zd$XDkMQfv&=7JM80Rq^cryrvPr7y+m?He61p9~wvMVSyXX4h0#mC}#ocqG33^pW+2 zIfFr>A*06;c6d9_Q5r8fhRCl?_RTgeSn<}w1s$s>A7@I&>M{hb_e?&z%Yk44V)kJD zp1as*wb8L249;ss^DdI@M}ktnf^Vv#n@WAaBVT5tV?0xlC*1pIp0FiP7@dBJp`3|p zK|hq)#nfuBPTR50Y=rTJWuN-VzW)Frc>2YzzXVo*F$=_?C7J?IBAfn8O$zgG+CXkg zvTCz+bWDZpD<$%^NbN?yrDw-B)~=wuVX10u6?oG)u?wi~sSmCeTLFH<9bO`?0Y5|E zh@TOZJm7N}#EqgWBB_;Fb~!omOZ@3C4W(-(pEN)>^UTQ>3G|N9+Z8>}eKYNEAD4?y zZ|6fse5ZciYjS`@;51DGjiiSke7uB^B>L6WBJK0Oz(!bG6kT%Mib95kXKS|j_~FeC z*=diV&val=I~#J>3`acMX}y76cjad0xOqwQ4v8%IxSvE~qhJRH=Q@orCji!^N`u_D ziI7Cm-Eps@cdl%m`uXlNSmMJa*}(1Vx6($=H!pfGxN&dl?w<+)lUD}X8jh3_65x8EY; z)CTm~fqXoHGF=s;1W1EMpR<}jTAeRc_kPkP{HDgC!_%7VJuUy-t(a=j>#lIakWJC> z$*nzS2^a3w?T<;C=Ui3aU4K)YDe&=UdAUVZ()do#v5;@}XK+fdD&&ynsH>^Ilc_(B zrQc@Gf2EprC0Nm5Gd$7WkjOuSjHUCed_I14^Qqp1Td;?vgyMD_!3}4g z8fDh47!KiCg*j{W+>h~R75hLu-wD1Yi`BV(&InKfc&Bd_=bF!W46l2mXqgZ>d-A4n zHDU2t-gZU&S>EA?6dd2&`uJpVGy@7zu$pWyMX^J6Y~}(E8RwBp%QBI)4A1vvxWNn)NBYW&>G@r6%El zH;olVJ1jHh+p~XY1-!eiKo52MihY3Gmujas@q-UoEE-nPTZadss{oepVaw!Pt>=*>D)Wi3B^)l-Xe=r2~yWLqMwBT1AiN-KP*1=|=njGQFno05II>$7*3+ zF#D3}(KG}=Ebp^4Cp!kqKcG{hcWI_QP?iaJW^o>2FVfny!tGn-wHKKf@rzL-UnE!z zgcG#do_M7nT;~|!Xu;&|Z?9GYk*Tu;~L*st(Lcc zi0SLei39)(7hBviFVA%P7sVgmLgg|+YHGfL9wC>US|A{zIg``pp&hrRcLno~@in9e z(B!il$3vdMSDVtNL>u_w0XvirKKsS#W9Hr`A8K7={}e7l=nqLDV%K?j9#ik*S{xV} zzPGOo_i^v1Q*hI0Y7!c-!o$4+gxcw`ZWde%DX^04=LtyzL<^{r26DO!1A>3tY^tg{ z=Bu33)@Iqrg6J654h@)Akr|~Pqo}Mloa-&e@QtnVeb#|9t#aq4GT6qQxYWbj719s* z3n-%x7To}wGa_yR%!r&zhP%=0P3cjC*-UeN4XUEnZ`o5UH6OUq+JRu6w~%3s5q{2k*Tt9Z$6@O+F>#EifPwS82$wVxw#48 zHf%P_EKm1&EDqRKZLXibmVK|ihbJ_2+~LykgdJ0ST_vCW_-7*lD=A367twV3G?vh1 zin38HaZPtH5Dze8^AuK{n2f0U7I;WtyRl6#*q>xUMtvNInGm*bzv^*)q1+ndof?-d zJj+aZC;HW|vE91059)^E)UQTi|YevURvXK+rCF!D` zr#B|F-wrz-Q@U5(Bgj^Rf0DfU)a&+PF}Kz?)uYuZQ9i_57~1^6!M<3gE{}!b*6(3U zhs2@^ZzuN-xh@p-Qh@cmD*}5XCsyXwLGSM~=L}|akA9^=2r#1JhaT|w?(KyFPH=jJ zO1EcnydInw0q2EXSegJWZhk>Lp3g}heKHrap^f6O+ zVBam;Z{1-2iFFRA2P!O_pTIl<5uW11SEQ=JW$zq8;$r`J+ISW$`0&>PlVO>|9A_o@IpZeQ8a0WCZT% zpWQ>)|1m%p_qtCDY_k+1(@VV)PJv7ZRCh+wt(a$~RupA&F)@9UsvN?+4=yu_a0bT* z-U%bBTMei*7NDxe5)|7{gsn{g%qT0!4#WCez>VXD6RjM!dczsKb=-6>V}Dq=CuRr5 zwbW*!a??)KxDD@C>^*?V~uag<}SQ{#y;v2Q6k~c;joEu)`3U<4Uro zCpr}^V-~EZt|YfityOn01F3<2$^|endUwdh`d(!jz~OyT_Q)uy%_XFxR7|G;;=@Qh zqT7DMX0)LxE9$iJ7$K^g%KZOVeoy{FO2WxgEoL*BXRb#eI!7H-eML2v8w|n& zA6@AVekC@vE1jTm?>(8nS+zlG=;+8J^cU16#c$)1ZX4}ef|?ANA5d1HRn<=1OpxCN zv*;)f*MBIFmn6gm9C}o!`U)QDmlPHwAi|i=)bp`H3ULp}3SZ&_$~KK7%ie`nV|kcY z=Q~}w{N`(;UhR(k+?tGDoH|4mCJ)T68?0LE#t*;QJN&vreZkFh7acJ4GvJerw46}V z6TJXs(q5zINS6-TbDkXD!UhCj9PM}+*Zj!T?C8Cu$x<$~R^8%qAQJ*u!jKFI31R14 z8&qpNxu(MFnzW5?Oa$1brJ*-D^J{U#cZP?D{fMJhEibfLB7SRypGyD%GF$eqw|4&H z&|X{nJV0~@tOKS{2No`W`4ga<{2kf+U$kZT)&JiFJ;os2z~Q*joeMxF!^9U;?*|+y zADY5tdrupE&n>vH1r}6)9C#KZN0y`9w1dlBuOj3poJ=$8cMnY%T@evn?|sab^6Z2- zX-+ZW$V@2ISZ2ZW*;HvyD8k)Kr7*XQ?0+*u_Q#{x;f?k}C1&T&&5|_D>n0%oQPp;I zgwULfGA8!X-HuM1DvAup{nN(|%!;T1WSy+90~7F^Z+Os*?9}=-(=5)-)y=R?7~cxA z1l3{_U_+6)-M@KCl(p`2%>gEL1-e-V5Lz9eG5p+-gB6tuh?s`G1QQoF_0SYIeT<_@`?r}i~Y*yn^#T;j2@F~yAH8FPyi-uoq*|hK71QQ zBDAq^^}666QOi!;==MVmg)a^kU` z(VWAxV8dD2aQZ=^wL%*d?A&M$i0nLKkWbeU-)XQIC~<|BDWM^>1=Mp0ezM){`eqUQ zN#FaBYrP61^U{NLQi7<#^wCV$mlMmKxk4{p2JaN`^gLBUjVg{jeP91hO>E1$+Orev z0o)q<*Ll>tWaP)tBk$#GfvCuD<(3B}!oWAI)ag?A;YQntw*oi?C7eb?&DEN?Ahv*O z%!4HuRvgxmBR1X2sb8$!GA_7}ZSq$1N*U7)hSswdqJ9__rVy?d5%Oo#BIguW#DoXe>#+OoJPP6*6gR$kn%lXa z_6D`S@#Z5X0fKS(u9T4`eC3ED^SphE#sSm=?5#2SnysOQz7rC8)1oC6CaQsS1-%Ul z`i|^zzJ*+>PZ_kP_o>Y~tdE+MH#C?7U^?HydV+UkP-O#b&dc<4OHmRMx(z9PhsgkV z9?o5-0obsf9hj9@-vF)AbU8O&iVplX7Pq|DNndH1>eeled_7}7K<)NVy`M^;qJrLG zPMD%5k9yW0-wM8Ox_qhtIfEeckN^cR7C;wbi=gG_r?!cLG##LO3>jJ5v52i|$1-BB z)_cA)rrF-UjC59D#&B7|Y5=S2lh7T{o$Lhxpo~%A9EQapx7OdT8p70jC^}24zd&N6 zx-ICkA9Z}pmH++O579$d}bse^I6x&^r>J|a>Bs?tnR^qu9B17%@uq=bm=t0>l zRd-ayb`ScU6}|wVnd7Mb8kx=55Xc|{w;Q(}hx3(YRGYH-0Zw+iPK|8tbW8D?4mn+a zVhiB7Ii@@n-?B;oQBAx?(Jcy0%O`1NmX+i6cTNLY@cSq)a0i!8hby0y%S+Edjnd3JmNAn?)QjAQ@`wJ4H-kN3xJ^wz+Q7eCOSkh1!8 zGDfl3CANT6v<#%74&vIuT%|jnRA%}|1ZvGCkF+L|oH)9XO0R(wcw*Msd>u6Hx`fFy&iDJ=^ zuo9Rke35kFu45fJsBrFy?%Ql?st-7n4Si=5h5*KuSbIqVM++>f>ZzUowYq*g;bT6p zw*aE&eR$`U;P8jF$C>f4=SoE-uJ-l?NAY{Pj;~0*^hd`qKV!0^7NnofDyN=hxTDXI z61H+wgUkLfUST9{M*n$T=j_1s=YIPTzc*KPAVkhS2i(zPyT!4WV#_muvF2^1SjuhJ z#G0+{Xku=Y(EFd8qb7+lYhYVrDz)D20&gH$kal!pT2hn`f4Djar0kn7e4owE% z{oE^JSXR2+q0#wS3M5}$AOwV~Fjj3g_4hPyEqpn9uN0fT$&=FIg5=Z?{&d&Mwb#=l zMkc@nttB~XI%D^wv^M&Rpy|m3ZOl#HQ_67lw~ou*>;rdL04-SW_umv@AS3tR6=DBJ zL;L5{oxc{=Mg>e)#&vm#w}cm7W?`OnX4pw-c15136U>`;9K1QxP8bW9M@}23P=5q8 z6cRg+nClD}?j+5Z06rH$y~1zH>&>PNjdh#H%=!0s5qTj3(6`aOyghDW=Ln=w4pPz1 zjZ2GN-OC(Qh9^^bh`d7TRbo`-HE&qmg(@*S>_)q0DfR-@usF8~dMM-^c_P(aRp%|M z3%kkC=7EMKw1w@J6L>?$;<=5cjal>*UB66UJZLy=+*fF^?Weg9M66U%?5W#Z=IPic zNItO9mN5<4I;m7=d{(L&oBFWD@AKJ{ZyWDxcT$gGsx|m20xo=c@^t~9>bK*#-R^6A zC|i2)*e<&71_cKoR|NiW1cy5b#^S;^`XsU9tH0tq=&{#s1gs3xB(P46>J!pQaCFOvpF1F)1ixrKq=EeF&p$VT zUJrPuk>x5TTpb17)P0;(Mce8Wpt*6(qSw0I@?0z+rwD9ihNQa>Qk(9J?B9J5XWA&G zuWlo`9sY(flAzEGW`{_-_)iKEProQxRYTl2*jS%Sl1@6|w{cGRp3kF0Mlbx?;^tm( z6jGUq{*6#B=9KJ-R~RkHk?}AQvZ%oggL^Ax3F@=;D%cBjX+Xjq!a{!EY~F{F3^D|F zfBrIUP^$m5gN%RNu^z>oe*>zAI9n_oQ+F{B9?5fZ(EWb(IW-xzr(M55441)?jH)Yu zVN(=jiJGqY{UkG8-q8@B-*rep`{DYMst~Se-=pZ|7Z+78`}3lHpgz@U zrVP|)ObYVhricrjort$t<(8^XEn%p1c{Ahi^1O4GSm+6sIfCjT0}yWc_E1s9?x$gk z#817p68e%Jip6o~zX8PiOUnD}L4R^0xIP1+ASy>0m2lsfML{~Gt zKI(L5<_Uj-;`_Su&BRXGwy=5lYxgSxt;J(sGdP6(9E5{QVs|{FpkYd zUDPzLi5cXndq{&8?`m(K)MQmTD$1}f|0Fe0Z|A(gBN<;W?wfX8J8u)2pF<=OhiU;M zhXgb4Lbp5ST}N+OZa=l2c$~$N;ra%jjR~U*lk@4Eo}$ZQQm>CCd_!EXzP$C$T%=ko zTVOg6w^~$R$Cpa9wr=~}%p&X67JB%$S>z375W`nqBCgGIq+#1?uvo)l`fyy&lQ!c_ z&1Z2+(S=RzEUaQymSJyR1hn3o-f?c0Yd2dc&?`@LnLM9kQHIfHs4Vn-4654GRuk9R+9YMO;3iD7+*Ugcn|fO zf`jTR%jddw3io!M>HS!E>toHG56b|*AImZBiawEMeG8)Z++DVq7XJM`53GQ#1zGF% zfrfDx)iBSR0@o-%Z-JD2vjpOb?l)ACT?lwRveV3bpHHb%y3T@~?PrFCWPxt=GfRcZ zVJn8xZ^$ErDmouBDz~x(J~LFlr#NbO`pS!D4q?t?3}2Zqt>jj^)_|oFd|zL&disej z+F(cD2CNYs>mT+nejSZ^CJC$fIaOW+MxJvtiQkbehO&@)V(4cEs1noe4tKDx9UR;x zZ=5};lx&b^cFX_rgpMV(vA7P(AV?nNv$wA6!dm2RQIta^9UpLfYbR&6byJcdU+g;W%^XTo?%{7v!FX1#P#XSM4^j5hW* z))A?v+vXo7hsPZZHbVUT7L@PF9zWV>5MJ`$_f4SL3g$m^&y9<~!%~yzTNssm3!3@eSN2useNg^XX^B1lJai zJ-K%>v#ypg%J#gv{$sT}$Tn;^AbsRf>hSQ{KY_C?IqW{4EPh!+`t;fJ#~3vVK+_a% z7(28X6GuNZ=$LlBf7i_rzi-l(v7iP#6*yzW)BIlahr*Zd-hL6Wd<5a=iZ*&v!8cH5amEz*6Yb`HdZHNC2G}uy`rS5Cr&XXTw=V`{X~4c-$X_%?&BwUKA+X0#pL?b z#ep3&U0*Qg?{mnhCD#fo&Et29T_-roW~QR@yLJJbv<6l5J6Z}6qG6D8GBcQi8Q`tR z)`ubC9Q3Q6>_EFsaR5k>$wM$w07JkIi)Pqv)8a4Cm4pU50Ze`NTPV`!2l@0EBc`59 z&gLe}Rrx#Xy$?flm$@qspw*o&i*NRPjkkzz*#} z4~^nU$fa|6o8PSQ%F-Yu-iZ> zaSqueH*0!u1i%NTfcwh=o#7)&(-`HEn`6kxU<+LkqTsK;7g(A^*tDQ`*no?Pb*J<0 z0;vDi-CpG~)sfaX`a`d(d)*qq}j8acz3DpfoLSmS2d;eI>i zkE)|8MeoHsKa6xeN_hPK>bYY(f|!!OA%}XPLT{|BJXaE&i*;hR^c+QlIDS?-i9uvMS^Swb!z zXoZLm$-X~(uec3cU+&QIN~BJbHosx9seni@t?0e+zX+=RJFfO^x2`S|NWa_aa?<4F z1Y45fDV7hviJ`L&6|7oMCeKW$xI5U^CUkd?djvRMV(ev~dmGA@1V7XuLu8KCaHklM zU&1B6J_bBDmPC|W`XAi-pke%2U%SoWnPYN~BqHnhpoj9*Xwd96F_GTysSUjSHi%>g z28V8;hs-F&Ywj_o+CLEs*eg_-8II#*8AFsVp*7+TUt(BFe2CLY2cP)8OqAb!lluV0 z#~XlHq9DT2&9!U+boJ*^jNhx%1mpp~c#E6_=7P-+e_z(TxQe{m>?P1)*viGA?#TTBxx@m?Y3n*iBK>_Uf1di#AV#8WO(My1N zfWA9Cb#N+~x6u_`s^7TUWKvPT8--kq>=Hpv%}52S0>iRP0EG|EcLiCtJ8_-kV08|t zg|ga3y{o__yvfM+{O5BhdaV^R-MlDxOcxXqVoc$hLhVB2W3ZE@59sDX#mM`1o>j3$ zSOPELa5*0m3CsoPW0VuF`822JCt-W;0P%3$%=&yti$qaRocCHPcyJtPL>Y|*k>3uG z&Jao+)+Z$!GUzqBBEP|}tFllBY@|wra7O%{y+LCQSLl|A0n+-QG!{gtS0%ZT<=%4= z$zIZ4R10oX0QrzR1?#UDHY6jb?T!{YPl^pZ$_&&{DUm=DJ2|%(3KLQXpA>wtmZQiF4%o)Yjf;rV12GI6LVksR}%}dsJdv?6~sj z%mHr0LZy)NF2Ct6eD6iN&DUyEFSZo{NHnPE0O#UB+`e_~xUtbwPjl9CSS~ZBU(R#7 z2LQuT-OZa0g4Ah?QKuNuh0P@~RJYDu)Clv8N{&_e_6HZo9Zp+jGxD|46VU0~6M(F^ zdFI0}5cK;@LE20{Q9A^dxXp`hM;H6XJ(8G9P|V{`ox?dHUq=pjraj`$xt1n~Ri$B5FJW zA51xqa4VH8ep+<~8Y`UPM`H`a8qImok5O$(B~51JOaRDBBg+N3W52SJ5Dcv_30{;f z6$pNft)_@2HHalQqPfZ^rFtu#+hHdws;`B(p361FKZl4`T)MFG5Jo6UMT(UJ=>_B? z3a8SH(9u-xjannJ206Kn%Xdhbshy_!61g@wMK-RpMlL9aAlDaQ^m7o&#T@(5zR|1b4Wh0Ap>*|~;*38uLl2(M|1?S5@*cDrMWE#1h(8wcZ~9(R9{hIy>-Jd0dt z)L*qd9=%pZe7|?ZPDn;sbFnsjU!sO4Q#*ik06+5=a5ufvzc#pgbifEt*cw6-R~A!) z?5G!vH-CXH=3Ex#B^qL@7Zo6vm(1oYm-j9v0$dFiJ&rkBdP*k@;XORxF|e~5^f;*F z2O!vLJh14mwOBKLe_Ggvcy0TjsG_tLm8;%~rYzJ$^=U~B94;9q^A#H(^{F)5(nUh3 zuC4cvlA9F29raTw_Z&!`DUCmec8a@UmJap&xpox!MEq3Ioqnn`k+E$n#;0E4 z=CBoZ{$soBM@T8>tzbh8?g#gbQ!|R)T}?HreAVARDL7NiltP|C?-mn4S)lXGAeQZw zI|b(WWaPz$Od%io(Y!6Ftj1-E6vA@2vh$;^OU~5*sOu|r{K%;j-fOLHOA4&?HZjhU z5!Kk;jQTs;ov{;lMisJufh^~OqO2)uu`NHRHO>++GmB6Cs-k>^jSZ9E&1(R6Xmck6 zFa>EZ#Qy(ubokq8n%YDL>OJWP{Ip0PPa3~F41xPP#fP0+%K;Yq#qqyDIqQX+>3`0~ zgUQo}RR+4IkE_jUyqX@wZ-bT?b-h1x2kAT5jl}Z>{e>JZBJZ4JX?0d zJK0905mWFl&?nbXzsS4^H=**b!+G!AC5u}?JRBkKb{K1B8yIP+5~-dsRaKm4EzPgL z!DbzowXxW4pSKUdkhf?3TPZ)wUFOoM>QS*x-I^ugy(c~@dZFv)@Pig(Fs24xV?s}} zzp7d)Zg^LU1Jd*Q{w-!ltsTh|#$1ikBB94X+t|@XvvO1RC2PZz6DIII7WHTNxI*w< zV^pzb;W&g{TMFB>$=0qgsdAzE0Qw!9%}c0?q6!m*@u=9@!8$FR!lR%3LJ`EZ7;&9% zS3$SrI-3-{lf#l>d8w(DB$dx2)+ZBKeJt_XeP%~^IPM+>Spo)Ju0ntlS4AFCOx~Zq zsF1c`2r#@{_J1=P{=1duKf5nF`{R(~jt`#Livi^l7Rcu8&lB- zQfIi;Y5@=bzx4|faYSw@q7GJRGQF!k62Cx+FOfLdvVFAH;Q;>D&HclcNE}dHW~J|r ze#Id!=fh*%X9ugw?`$~!%y+)8ciz-C_ySNdF4tbq`*dBde9XT(x!UaOos^&yM2vydIi&`Hll^S|GvC9L!pHpYk;qCe^GXCm(a zxooDcFvl%+(p&3YJ5xVnW{1Tq#~!b(so-He%lxg^1(`W0#?EgRUFgL}E4w7qbxjbOAc!VbKCyLefV6~+tK&CVcK zej{)J_Dl2eD7vPdC}!P#x}9;o7Lv=gIlr%nR5dyhY^RGD<5)K%d^5%J;I#hEj zmS9SD@r;<-;X(trI>7pwbq0!*6tW4Bz$pmyA^v>W#-9#@{c+eI7vzs8du$l?gfsXI#V2a?L#sn~Ha#V?t9PKEZqfhQr&-(;r} zxvB-$>Vv5nQp^zYrnR_!RrLHlXpqow*hA@Dcqb!|q{T;>6~< ze;%eQkL(fflc2F0iQ2i9^a0amih6s2h6~paBj`&TrVhLKE#_}JjsY^p&>GUC?PhoG zaodg(U4ilLxpN;xJ2em6#A5z~Fs9$77ytT}cN;Lo`SX--~(%>3_ek_sEa`AW?+Ngo*#cD-Q`P2hH1G?;;s(ix!(V8C;n%fS4;6 z3&~x@k*~i%2PcG9m$Eo6WK3h5+``icsi_xfU54HUVItn>Wk+1dYbc7I&EKc2fk-ikj)ls`t}|Dm}86%O8! zu|ZCG1M?LL2-9oRU)v=(BEJ{8{{jUAt?t&r=M*Sa*#0eVITg`Nove?I1#P`*ID$A8 zF|Q$=kKW6f0Onb4Ytj@;h`=bPJ5?&TkUgGs<{0%Gw;hd#A?04SbNU@HvxEl2f5i!lPUj;AI>oL{SM*(-`L8`j$z0L88i~zmz09A@det6 z=E#Pr?ofGW^p@afz)I{DcIN`j!J=JFni};8y1Jl1W3%`<_}^ck4E<4{{6n0O|BFS= zKUbjqz6}3u%c~T8T*q9$ceuPkq6Jt#j?ZsVJAvnU9jf*_)$w21asr*NPj90NEtEZ? zM+lqE=JN-DiLGt>@b6Z(;Nu}U>=rKpM|%v=4104i*gEE4AaT`b zjG7c4Ru1^_V|^k*(M0Ah|C74p&?6>@ixh3D9XWZ%^SSkK^b9Rzp{;4c{o3R9D%Z`* zm@&bd0rySuTHtp(799Yk0n-}n)*k3rl0`12ZSI;tbgN^nyz1BIgpCMCu6ooX$9Q*J zfjSMw8#06x(9oTp(9t7Qg;$(aqO`?hd&j$JzN68Mnm>H7grtyR>~?$0 z6qymItAv0=iWQlycKQj^Q5+pk25K%4pv=VICZX3>gNY}8fg1UfsMl=fhxl)%+!u;4 zVQbJ&Ic6(!F6m3xG`?8h6|@I5R{!jdATtdc4Z?fI)~VxdJdcs3Z>L=s@~sXUj`py2 zA$*H}5i)lVm{$@8Xn{S< zbRFQa3qeqh6@R~;`5%Yh^-HjSFDYS0UpWIf5>ub{hbSTfaG2z4y}J|UH|k!Vcwuug zRgZ=B!qI1jUzy&jS-=2DQhMDD;L{+0;xbeb$OQs|ID>2+(A9j{w&WP}{hoVh#YQ`sH`a4FU~8q87JmoD|#RyF3q3@h`k;CNBTxN&7LU z!H$;dixDN~H>Ib=YB&{|erWaeIaz8|Ni94IaVP+ZTvVB?zu6Fv^UY9TU#dDFy&D6Z zECb}Fplf=037XxP#L5|fM@@5z-aEy=sZcCGOOzTaHI?c3++O8)@70UvTRXPXK^7~} zW)6W?{3gTFxw*<1Hq@44xWZ!Gx_z!HEG4kt9 z(@FQXTi#mWz`uhfVVYDo@)nxev;52HKK3G7K2MSdKSZxb?xlm+t=HYbg~L>?*?V|} zV=j<$d{d4>d8M`i>eL?@ZGIa~i(=J041<0D412=tno+Za0@GQEElhXMNb_ua3qX&$ zS=NWp$JkE&>~w#;&X(bqdR5947b=h9cDd+$Q(BZ>u!HAF0j-2XvktHuX?oQ{AYU*UiP<<@7h0gs(Si=b{$BlIcF%PbBU;>_7lhQi#vwrt7GHcenZJ zQ8H_;|7o=OUk?NSc?|kLdhD;^KNg37EDnF4NB?H~zjSdZ^9^zt;P;;*I?=-C=_xC_ z^I#Fc@9PD*jNZ8Q3uFYMz9nG7DqsRG)--y8#z?vF?fN86#eN;^qsZW1(VrU(n{l*Y ziUieWw0-7by(+WMsCms&fD15$i`hY^b$Tr>Q+Pus6^JK3cc?u%#6k1rZKU^t*)~JC z;3N(%xKnU(+6NQTmt*(wYXiAQoWs^jK6lbP+tp1#A)jynNWESFn44CH0R*!Fz;W+w zLkC%zsM0fwep*y9BqNr6$-qC5ZeOY82JTSmQ+4aCFs))!4cRsVP<+b>$POE}4g+SW#Rup+Eqt?q;(>xeQ$MIc>^`9Nm2$)D>Du9WTkm z_$qP!6Fh;=<=~@*?|$a-4DDyqyp*!L*9ZnY)tZ6`J-Rrl|$oh%WGY@&}&{{ z^z9(rlK%QxJdCUZ;dQmYnxWwP#$}kSK_`Qi>*3EJ9QSn*KPVT;GT+EZJQ_Tqzipa7 z;W8Y7nzp=_%(Z57mwCt_i(Q7K4O|r2VSB+%_|zX{oOXi1=V?`A8szg1^d>0n z2P|OhyS&_NHRGXM7K{Ure}GJWn?bG8hcOSbocC%M-qG!f(M)OFsyTXj8N{!%<*Re( zQA!)2W`i86B4mEb@Tk*KkpykYWu2N@t|2K@kDpk6V)(^7!9wPI;%w{7zm$Z!rb z>`J{2#C_rkJ#CdC=lP(cP)Eb$j%`Hd9l8FerI7J@;%v%>PSS|+^jmq5ZEIl)bx_w-J^E)Fh3m%h7w-!Sv*a?NXk=}jK?pIU#1pk{Ldb9RFUU9c^3EAIYLuY&#+!+v1WxE}iPLP0f<=MXS`7Qp zhtO~BhGpHMtS-q*o$twSO0v_CeA*?>DP;1-UA-(kA)qzbf}&vyZ!nGGQ?XYm`%*Tj zwC`A979`w$u_;2}XAJ26p~|mbea?*Izb`DPS+oySu}N++#{YxO*#w-O?x48L*Ao}U z&i7zm<@HdSum%u5cm=|XK)FJ^>^@*O`T9?xot8dJ*XO0M9o@{ z8p7^Zzmwjipg8@LRB#vMtI!&LL{pQ$Vg>R?uonaU%N@p zm=YBWexmO@II-5*+oa=o&;}_UyU9DE|eOI>J2+ z_UkwjHvlbC6QF*roT5I}I19w-@gonIZtpR1H~TXAt87Td-eyn8Ka~=t&DH=!M@M8Mu~m zK6LfM*4h^Az1jxG6zIlIArH8j>1;?!dMIQ+r{Tz7?P~apqr&_F?P~(1Jx99ARzp+`7P=I!p1@4iD8ATZ1)pz+7c(~l&6gpDkZ zMtEg(1sTTit0Z(`z0aJDila7ru`~}_toZ(^hU8tqo`A@}EoL56#81VB^v$oZ6I)4e zYV%9b8)~ztZs6?zTnjX^;RHF`&B512v zRIjjt871F(y6txIgg~yNtjeNb(;GJ2vML}wKs6j9rxaI3BUDM)iccJm5#%fKE$2AY zQPA`2u#5&-f_-!@)jjQNrQ6B=v^!PhEZxTYH%pHQ*DkU>cH1fZ3Of`6MyL*>Tf_vb zs*T$ti}s8!8XUTO^D&3uzAnhG;R10nqK+Z|xOr>sOKSwXL7a-!++D@1&RJz#Q;Z2@ zOa=z|XfyhJ$IW)5A&*-ko@d$aqRd_!QomweH%J4%)U%sy`#S*=HOA%zqVhi`RnIqi zB<(-cI)8+nExzaSYJWp_AM^n_Jn(j`jlGMaYjR}sxb65gQIvP3^i}ZF#mUffWPMU0 z*#x@?LCBz>^~E{kz7E&(a9TC zv?yWSv0!@6pz=-b1=-u-K964Lo4;TlgOw#wlJ7yqL`yH}fz@f-5VXH7} zGB@cO*+0jbfQc*W{_?SQ!AY@;=S=&@mXgvF%-0$WS0k4|Du}a`+Xx_AY1Y!#)^;hW zv#Byy=-t(WfW*33ti?Ia)iJ8sY>{*mM7WM<|FL_R&n(8#Sq^M_3sE4>$4 z<{3>rJJX49pFTQY^|bS)y=7V5H;E@ahTM?FZzaOv7X&T~H1?QMlw5xh)n_KLgu*s= zxz^0srq7wJjuhEQfZ;0$1b14OBHA=zd3H6~H=CxMw|2-66JX_f5|{mM zUf5O%6gm)|fv!st@K*_*QFX$Bp2%{!^_jBZ7l+>7I|X;ADF)6qgwXQnd}QO9ko@dX z^3Ah33G`!cVQM<3qy)9v0w0?Ma}%DEe8^6p;1`=A7Y5}v-VHiE6>f(DSnX;5;JyS! zwZI$9Bg)hEmuc_l{A9i@6ZzRKyE_Nml?F;XR(`5ig!CNaI0Hjl5@u@@MlrL1#OA4n zl>5vN7Qd>&imjD_Q>SJt<2cS2fPLS5z7eeI@P=#KX>FgPp-ntJRA)Ea`5^pi!kt(F z@qNb{M@l=A(3U?h$(D4K>3*mtsfW}}_E(x!{~>+5GCo1$>x*i}!LT#XRJUdD5%PAE zJ#+wBfsp>tie#BKC`?IG3_K=rSCB)*(R)n1&Y}fc>fdByVH{*4)a@>`{ZOJNHsC?C zpM2668IBt&Norhe5YB;3DhwbL-b&(gAjK=*6K!!dh}{_a94J;O&e?^o{iz8I_0_8f z6R4qtUBhz2_fJKIq|#SxS~zng zHukIdBpB~?UTU=@XMeB;V6($i!;e3quib;PZA}60O5o*_viU?Ni#c2bf2-Pf(KH8J z(vMl==kU2_q7C;VYP8lzm1XV9+P`tQf@s`BST}5xUm%dM_)KBNE@AN;9gY?QLiWfi4?TB=Xz6sk?{?vW86Af2!UhH@kf)F4Q~KHh4p3XmxvlGz!dFxJcyc=;|+!PC_UIO@yv9 z0x-5UUq+Fuf2sP)aEbkx)s?V~^s~3r_)HqjPyPZ4gRVcmemvj-dOee{5L9w}IOXHm zm3A=~0~q5_%7bpmYvnm`8DNIK>+=7x_ulbv_3Pg7C?NzRN^~PZ2+@g%G9=L=B1MQO zlPEz%?+l|v?<6FIksydN(ff>EqqiVB(V0<)F_U-Q_kQ+y_P+Oi&UyB`_j{i6exCCO zlNr{^vexf+mG5<3-!BfVtH%RZO|;z~S5Z&T^e{;0BbV&wQ+#b#WiAr$Z`<1 zrokz40D(H3UQpqb4tj{6aI3vcTj0^{Z0~>wqwcpRUqv*unz)xheu_eE5K!CPK5i*> zw06}6oV+Lm;GCXeUkLkGt1^yj*nSUL|L+q%XT55rjD z7qJ&mIE&Up^jV!X`~5nE^vTN4tMQrNW7jwo;T?+1*Z2EWQ3Kh;x1@8I(+;VIE_-`y z5gybc`}(Bn!p^Utuh}1FGN9%=Iv_w6#3E=X0XEg9%KTz7VVW6hA2phCCLyeNOu&=8 z!3V>AAa?m^ZnnK**k0UpR&8X?Ynj?ko8nEvEend3h(_murP#M8C>&e+H4+GW54Ern z!i2DHd@8HVYkl$3tyrhNZgA$$02#O+&bkpv|6BuSkp1L=v?7WruTjkbj(2s_Z=7*{qudSl3hhHe?Q553+aKrQc zt?xr(L;RnOXmys>!7iIiC6!wi%(>i@VN_5u{A##_YTpS?jcbB28oj(Bwlr+(>0nt} zOGr|?YAK)mt@?O8)PTMJIt3L*O@#ukk@lOcr|t`4;K&wF;f@wN{nclT_+KD(R&Ekdg`elqA`jVceKTd?Vmqb zN;L((lCx;01S^6fo?hdAKK0bYMU-K^@%$PL>Qw+ZT%$r;7h}A~q$w?4Vmmk1*4nfs_dR#tRNzi|{t+rc zg#nLhg;1i!k>~8PTWp;@YCUqPT;7XSDufY2f~mG&pA1ls!zxxz6E6eFLQK`JPZKc; zNMX|eYkJ55R2j7AigqUVNZQPZ?;2Cb%Fjq`K;EfafgsTJ08k9}KJ}oAJU(h}C(M8* z4`25{MA>n?07m}So@$Q+^UX?Sx_7q0gpMh2xOghM95V|Du97wgK5CAz@J1W-gFKYq+GP>kH zR?z=f?uKLd^+#7Ei->@vvz#jYAf)6Pkto zXc*loQ$IKyu4c#D>@zz$Ox7WAq|C~RMcsGWlUY_k6`nCyx@G&WQNR#ge)(_y$b5wjG;bF;_jgJZsBDjFiT-$ZTY1c1qE+QeXj5#|)Vfxt@xjocL*`&5zS z9V2H>I+Z2*$0M~6Vf zg#7KS)aQV+%KhoAgg=}WFfM;M*6z<2dI?#91pYAK54a8^X#?C^e|0(ItMn{q;`(>+ z>QFoA@h{LA&}r?2IZXoop1(Q{HNYu+4uYJ;M*5E-_I`ni0AY%G#PMZ-i|elsgk{R! zzW{||=2?gU?zr*`WDGPT0EpGDF658LVpMH1ulgepe{(;ee|xZhz##B^NPl}nWdHW? z|Gu4ny}bVWcK+M4{_oZK_cl8K%l(b@p#77XyM(ED_m@_U1<@=+Fp4|&|IJXm$_0k- zMgNDPIi%itf!rnhW*B<)NaH!AeqzzZl`Wr%U!bZHN8%T|eRANGjhFTgE6P{;$I>8M z0?5QuGB75lJ z0Pdjd)tIdEwWTXf=V>w>J0$q}f8)h;KTOD8k1Bgq8nmUyo+=mbf0v%jVr2__IEt-k zdW>TWL%nisvNN)%p_rv#`rz-r!^?`_!DhA7#jlz1RYfjiDcQU$Wr?_ z(|({z6OuaTSdRCfneNvppMIrwKz^yzY2K@W#gYnrJ3ZU%B z)q**s189~8uc*Es1-fF+{<$9W=hsF4|4Ald<_$TG&LRQLCwKO5{7q}g3LL=W|3I4f z?-MD~?#9I;cD3MuX?}JLjDt0JYj!}71yrsAX$>4R6rgg07>QVN!hwmwK;c>=k`TOL z7*LLhPywR>+3=9PaliWSU7_y%DSRiqXScIs4*(U}%|}3`G%Osk$%{OZHC$Z=`D>kC z#eJ^AGB3P7@eH@{vcHXaK3dcc$b|NY#4TJ&U)7v>s7e@J1&{%&-iDQ^dU;Q~OTCmQ zhVQw-p*R2T*5ONnDK5&$E4^aE+##Z2%ZBpOW%c>JSz zhk}plyoB;9wrlS=RNxdoWZU>F`xY=kQoD*Afw!sv&#-=1GfzXpUQNk+5LB9XWXVTl z!ra{Y1sc&Lt&$!eYvX9gcIG@gj>cp=!>fXD?ut)3x_Kw(k_1!_%`BrVzY&kvjE|i$s3-p&9OpF9Q9YξN&-o8U=yr1`c zi{ao1KSTwiK{hEE7PLH)#ZL5R&WE85`P;->gld5N|{4(aQF zMJT2UNyC(aPv7B=zQ$UP=&v`PvWKaC{Fq>nIP(C zH}vH_?@v7K^>y-SbL7%mqMN_Z*SZbG%x@b?bXVvw4u?6tJj-$*UzOw+oA3+7wDF4f>}#Y{m__ZwCEi8W6C<2& zd%+wy%%?x%ar2Khn#l2yAG*7mfwOiztv(bm5UC4Iry`6`p1IhdcWt}{o0crrX&2#= zK#>7tg&nm6R6?P@KtR_`@Xi^Oo!$`^j>CY$lcZAR0~-NVSzABeqy?2NDFMnRVh#t<9rivUz3=l|f;1KZHw z5*N)GF)U-xJ^Z=Ae4~9()bZGl{0@gEq2k2&GroAL146^q=23kvZ;e@N{f5A&%SMk# zp!pPRCiojr4jgVdk%%B&UAqAdzq*JiO;$|ydcl!m`yO%3x(6qgQ>WZ=1nh8=@4G@D{ihbBU0)dc1#?upNz$w3S{sA?JqgGKxPtm z`m(LKB-^ZK$p~irLpypiR^Ov>-HAq)Y|px*e_~oi=$_J}`JOjr1~MqGZTg{0@f_G)_4^IqlI!xyGVwj-|jVe#%VzslWHU>);u^Z&+*pr2K+r zfQra0{)NpJwDfB6$BT5--+T?vo~mp=$>&$Wy!C6y^a@kQX}qPXm?w0i3XAonb(k$( zWB}NSVO^CE3t;u`Ua-S9d|-B8yjN-S@RLAeCh7dj>V2hRi_ZPsjuFEd4A}pom+Jy7 z%p3;^T^3_zN^H4I`Bm#ib|g*H_E`Z&H7c|(u}D@gY!H9J&*DMQB$!#L!yxhi`IGbWbEwvyo;m52bo1@!ujq%^x+^zT&2vQ>&vz=@ zQ1}3Tln%kTn(Qw);0^|_Q(T*TTc14S6OwQzQMuhkd1DtjS6t$a#Vm#-BflVp*j!eR zvNDN_-X_TH!jD`kK^PO!-JQcqY%tz&!O~x#{1FctL;i!xOv;&xKZO_ned(kEm!oD1I(|2xm6Gmo=FI~R&!|sT>M*!$DFn{3x zKEBhhU33u^p>loH{~CVkwz0m2vtUkg8jE#+g9%tRaMjuabaY?k?aAPL5mkRPiFX*T z@J!0_^OWYRu#cDI&Q%jCrTuzA&KUO7mnx7`g)z7~*Jj<5yB9JrHfc-Iai-Zj{DZpw zx4fWjwa6g2j`c!=;Kk&Vw)6$U&Etrg!6ZQ!X;pJgC2r1>mBU^fX7a#KsPRJL3q?wT z`*-TNYxfNVRo2teRu|A=#P{`J$ZHbo+TL{YneEDZS=Z(}jZ_(9y*u8pES3PAQYt_d z9fPkTPRjsvQB%JSFNfiQq2}80))+WGrS<$&htEK^xP5$kA%EXcW0}=g=MVejm{+HN zG7|M^i(!F_I7|zVqh{4>xuyHW;&Gef`VB<7zHjvz=$t-Tr+*Jx9@vR>^X@qz-_my6 zs0frbO7GshpgQrk6dI-XhHe2s0ACy}AR0SM3S{-F+C(lQ@ER(D;vue=d!Xk#4j&~o zrb}ZX`HJzr4ZeDR{p%oO;yV(D7eyPCF2w-H8*H05Be%c$B3-_6bSg0EyN}_(>|gA8 zGj)QH9jAI+vt`iX$;^yi)sOVVJ9*y~y_@*P0b~N32vDyDB_oeWP+TSh8wrT^B~DoO zcL=+p1NUNb(%JFwDWOB#DuU2NaWk8DDn z8Qlsg(>sSKiVp9T=Dxx)85n1l1z5Wf|BIw9e>p5E`y{NT{YGB~_&p~FlW4V*3)zpy zFrN2XLWC>~1n_f{Li1^2#Z}IEZA{$WJ`E-D*A!{2OqYD@;f8}S5a0lmZ?*gL)nSiX zb?Ow2tZ&;Zk1D;jemG%MMG8M*seeZjebGLrBIDWBdiLP zel`>f$WUY@eMVW{)aoo_IljET+L?29N{Vo!WX?NpsZ<;hE<=`~3Q8q7F*Ztn2=2Uu zxrneymv1~VY@Mo;AiF9xku631tY}UrHEmVj8s`Wis}i|RpU^G^%gvyx_XE#6gyq?7 zc8+lhmXE(u6IUv*vZdkCW>?rr1?{Y&=&?In9N2=kr_0DNH&YE;N7T~!n=$qx`A1(yd6$6VYA70`=*KeEt9NeZ)M${c{pXddR>?R0 zN7N!#gxfD(-aadBfjU!?Ap#O%^ec?dV2vy_bZYL(7DZ`14ucf?b8Co`4^UTgmONY+2(pTS(HQZkTiU z7t0bw`>t10*rXT-WrytVhqr`&XIY94GN$;5$_LafK5OCyKj!gmC1NFy2zSFE37O#6 z&=;MTM*OdPxy{6{e5*mOER{#ti{_t%l3{3x?x+s!_8hF@qFQ{bYvxPALBvW?0dq7n zKr4FD0sM+!xS+lV}!fFmd zHEnZ4<t`=!X;h-BtGl*UcT6=mOK`#Jqe^0S*d62A*m1*Q9nBY|JQ&-6 z^lp}nbD!^Qt?QG^^np}5P`z*N!sPe;sq!zk!4ISU9{!o z)d{-FlQrgT>2{*+I}zcNjFn6t?KxLiB7z-m=ZZrvBvcPv^qzO+P0ML#2}_h*vE|!) z&ol8=m5&j82So7H$*_x?5L`kkhJ`vXS`+fQz5r8DnTEZyCB;efE3Gk_`l*sDdc)kU zTFU~-TEkBJit?y|R03B!FC?xRQMO$)|L*(sVI>8zlIMe|iY|q-*vph8x!v4N0PE$4 zg#)l@vjmY#6_WM)IN|Xc!5-0R*Ni#X0!CIpcECQm|M4SLK&gW6w%%(MW_>cX;YB5% z4~YooUYk>3swwgD0)Q`5%mDi|`3r;svy|YaM`{{Ghz#H{z;iAD3Rj2J4S&iESp3`W z>OVgGbYsn5%Y=e89yR!r)gZ}I^L+r z<^=x2c=dt9RyCDs>y z(wMK1p0a=LQzktwgidxWkYW~OEAW2vQF%Mh%8^@ad##>AsR3VZ7Y$Z!n=e^fQz%dx zR+73h&H)1QE%Kuk!j-xU9KGEIbdWNU*HJ)>@B7nvy-=895jGJtc@7m*OFii{^Q1Q6 zmTQW!?b(eVe)HR+2WSjWG23H2GcK!l_g!VMznP?l(uQUyyAa9eW~+6H%IHbQyace96X*FCtTCd{na-NnE zlQ2uAz!Xc)I4Mf*uz%|e&QS~RJ4e_OIhW(e_%eBiKAR>87eO`lz61Lu=+4T{H4vpG zF!W}*b9}WDajv)L~-x(LHOJZm$d#k<~5`i;M;wVuM6Ws$bY^ zG55Gln@o^EmomXQ*LHoPTuXEW`9<|YOcl;u3R90C;@Cb`vRcP z`vr_B6jX(=>pvFI{RMi?<+yYZOaLk@UDrkd6h%gYl+>ik;;%^h=BXk(x(&CT^UjLX zuf14Y`1vCL`+#$3`PE`ou3)E*55eO^Y7zr$jZwRSc2ZL+Xg{ctO)+d)#F^0Q@%iD z8^rfpLG13~;P{08n$+|D!lWVDS1{cdwYfu-#n1tluEpK*(M|S-ihZdcH(OaRE9grn zg8BJk=|N_{Pb^pK@t$g9Q(BC<>^0k}^z6_4_{<)8dpEI3hnJDtNpTbbfcNFHxPf34gSp_ZrPP@pPS6yw=yoQBdmkL!mdx6wdQx9l+@1%mXvuIdG&3J^nrG z<_u$uZm z{TIFG?ue*=WH;~iF=Kbe*5Z9+X!sS{Mr&#TcaDyMaUd!<=L$?=Aazh(P<-uny=Im= zYIXdEJ0^C&QnhP&Qgvoy*osv@y$6d$`I3C`le#jbl9pweU4KZ=gCJ=gb%@qHY4cvEegFdRyAOk&e>px`&#f8ccTd<&9O^{ zE1r>n0HGoAqk9T=Hgs#LVn`bRZMZxxNxml>J#yk&b*dp0WK_Tf=k$0;1O9?(?Yt88 z$;t}IS5>d1-aaCI9tC%k@Aq9p2`pDbz45SSAL?MYcl$7gNx__QczS(Vm4}B?8RXIa zn(C$(Tg&W3QVTEdlL|0<>4r~0i?>qLZI!fpbV$QF0KP@Pud1pl4-_jh*Log(=C(TR z7iv||95_c;o#LONTloL_*z<2q?_8%VlvG<{v|H&9hm;|MPR&YZZSrKrbW3kd2jTrfArSKlp?K zTec|l+F#Um#v@K*qJrH}{u=Z770vGe`sF7xI6iezkA-NVF;uUXtDC)HP%>lmgT7Q< zxEqNuzY(NB9q$UGhHK*$793xve)8Ix-x_Ti-h%WpX&?|U5K|qqQI(r+^X&?Jh5p$O zU~;F_fQ8D1>s6SZGnJa~N}ZM-j7wI`dGIa4I*Y6)*C8u<>*V3zu&Mu+TmB%ZD=r|8sSs1@qzbA)0QDIG1|l%&dVlXj z(5pWWZW}V=_**<@B(ayP=3}I_KGlvCnVrorZY0c%tsuNYpp zc)d0^eCMjT1-jx1zp?z=?~+cfRqt7z(J{1+j{y&xE?M|z%=~-GgihMhubt1l_<~2y z!!rCOi81vLTo-f*NUE2dfExYKd$CV-iB`#S4tFgDq{&&9u1qI_`sGd_>L6z%L%o9U zJcPo=pJOEBibHvi*4DC@wp6lOkp9<)nj_#GI1-lz=!lND>V?&QQ<=eJvW!qJ{ zX$E2&U&zKUb2~{zOZ`N@`xKz!M?|X7&z$lPR0vme6&1Ui-kX1_Ev8d$&Yp5UQ#^cv zu1}AB$;Yr9A9XM736_+bJyn`rRheX*rQ#8qD4qF%+=*s|ywWBU9PLj@>O;%nVT(w- z_5oq*+QRr|VpHjucB<|WE8qOQTCBoPbR@oHo=2wx0d_~ytTDCF&t!4ia{ajQq{-+( z-$LZ(^+$txa;t!9PdBuzzY%o-E;v?UuhZ(8%qBHEB_LIl*1g2)_oqkT^8#HA)JFP7 zH^$d$nxl}fXU-$Z5CHx2-C#4$gr}K9zlu zqRmpuTaI(E`|&4gPjD&}c$SmMw(9+;Sq77*PuHJ1Cn|7mrJtWmqNHn2WK%K0Fh z;wG{>U<~L>wrKIOo&B24%^`QU%H$XE$6xF10>H)>}J-rl>WlwQ$?x5W3bMgK+5EFWf2Eoo32ORBrv>Ox0w&t%=-@GN_ zZ*(0soGMJQXRSeuBMI06u!;@Nzr@yp%K{!av+cUi{umlHK4V6XWeY=ddwBo~Yc3G+ zvm|c;2g}rlAX@>4P3L_I5D3IN2|#|VaA#uLsXb1jqs%K;4Kw_#B6)^A$W&Wei&L&? z{-Sf-J-ao%5HqF`rE^$<1~#MM>Lb&&L1``Q1*ya#nbVxN+UyQFFwr8-b!aC3&fVa=c_gyEEvn4+XN=LmU%N42G- zx{nrrDAlI(h!d^(P&^`_O=oegy3#eh(HZ6p7-=~Dg5!kUf_#k1B{MHn>(kF~55+X^ z@8v#O{_w4`uJLR@Bh_;F8I!LZ6d4Ccg_3QKw8o7=*B9=Gf01O>e+E(p<^PXW)9;5k z)W)rQjqSDa(v8X%C`!Q{HYrQ{5?l6;!_R8*XtXr8@Pbe-zziFrZI(?I0XDXkzu{Lj zlRDJUKPm9mLQI73l5~OsA1l<0K?JGxFA)JPS$xU)w!H-VlXOmNn;UcJ_uVR1Y4`TSOp7XI78H-X z;XCj|(&E%@w-$HC^7iS^#QH)+%^FZ|Kn-^bQ!MC(%$D+?L%tZ>%`&2%jZFU7(N&p# z(rDuH*$86m1g-f>?vp4D9w!M6{Tw@$E3m2jmM=zpb2wPGVqPg}>zMxMVJ$TK$O884t zraD(;a5c|X;+?o!nj*)E*}1q@Oqjn!(i7aR7OHn*RuRu)=elaOInF+dq}?~u=G#YA z04@}aZv!O3k!)h-)fu;P?XKmQp8G!T@hEAz`#smNKtiX84>uFIeaOEX#_6V*|LA5z zd7}JWkG%(WnPB!L7@1d%ESJX$lOFpR6)7w3V5@#q|C{ZS0eZ1VAC(MnOL7W$?9t{-Ym9xL3S8ikWPk)Wk| z-^>}W+@>2lq5DU%is|>-eV?R0y#0LnW@gF#R=Nx)nnM&4V~q?nyNcH-dO4Z$dq9J! z)UvqXC>sCxh-t?1L4ed+>PhXNs9Z#s<0kp%5RoLYbxyI_F!4>XqOqN#s zjwvkfAy@jGVUGq?xeZ&8^s`>K8fzh;a#t=TZ|Ih533}Ij+O?%M+8Qt-FbgN+qRN%roO_l;)UY+iPouDVBg*{l=VL6q;$Hwm5e|vFF@7 zRFT2iwhxJQR192VKh@dWm1$6g04<1$Aijhkho9|0vN*!+vuD@hzfUU{P*x{ey*q0j zd#$_hYW*7l`0QK13V`YusMEJ$CUoxLVsoJ3r>vg5ae32H-dj134vkE9O<2xknR~yY zqAn)u^Y?_7R<*A_DE}T)QsJ6R;~gGxAfhTnI!2BBW|a`3n35$Q_tP6ytx>Cv224C| z1l@Ka{tr6*X9WEo>su4P&k=jLom)6tXkW*G9zSLWF@QjBy+mzgc-Sm}gN1?1Os8yB`AM^>@JnkZ9Q9L;UbZvrbvjIzx?4W@FQO!IZpCt6O*% znOW}fwP`?x5I2YXz6Yxt8GeLM%5OPznwpuMm@#_6kGFgiau)1T7kh)zn(x+!_3i}DbsS9F7rXN))L6qFSiRp8JCcqJ9vd)Fn>XFNk?0Nm2#`bnyhQ3j za33UA)>bT+-VCF4K$+d)|TM{JPE+Ree^7t8UNZszV>7471x zm0?b#CR5`ho&)aAcn2)ZXS@|?dPmaP$V?eeb5&c25ILeY6^&G?q&^c__jxwR$b-UmE?*BuQ!DF?y&ve-N#;9 z3$z3i?JkOQUyOM%*+<@#eW9&bT;PD~ok%`(rpJ%o?|2?XZ-ZtW6oV_yZ&;OoEz?bC z$Gm@`Z1iraL@BA)0l1PBWcg9^v}23E(1;4(teKH=Yy+Znaz;M>N8I#M*_CL%$5TPK zGVb8|A)iPTRf)rmGdvaV%(dKlZa-f&V4*%hWhmXk^0z_HldkqIAb8jBkS+}8LY|cv z{Mccny<<=D%BD}nMcadd+evfm8A$Rv`S0v=DsbXQe`b8t!dOU-tv^pqS!HtjT5kM| zQh3fC*5+g9PS>O#w0*>Me|mh{JS)0^EkeAo_+H*&FWdJS&BoVST%PZ_z4~y`)(w7F z0KUiL{x76X1d~L)j(1^O{%SK<)Py+eQ@fZZvzv2_Vx4_p)FLbk=plqD`wz_0)T`s{ zZV$Nu@#(~r&U2SNEl1wl4-3;&Vd4r5uIp#~fTs6T*uC6}3+}sT}q{eZ5Dr6QqFpxa4(ytlV$t zsKKi(A#`nM_k?GU;%l|-4)OqH>v@g9m1&)Yb0Nok-|nx}CG5%1F+d?Nk0N^p-Ku3x z#i#C85x(+K0x527nZMBGihoJd>fdW*{d=EL{2KhPgz||>JRNhJ1>4H|{6tAlO+0BG z*5!B8S4%oj4&Y8|{28Ehv~bj>%RjFff;6h1QYN;Wmz4L>4K3zxpO-#|yi3-_iUqeI z!btr1jfEZ*x7UKVu0Q}JK-y8G=vs}Hyv z>j*focu1=t*aV1!9jh8qS+;22p`vnA-ZPbDWzm`}b^~M^>)K06T}l}?oL8TGpIA$F zT03#OVsGydbp2EG8U4p|AZj&()lY#T{~%KIkDyBbvfq^(6x4`SXNdg`+u%E(^rT7$ zb}=)qV2*gyvZbwZ2@aJue$pn=b4gXp_loOVH~s_;=uy|=NA{DQIY@h&UgRWM#fZ^G z+=bKtv)k91P-;pRghH=pDjUuN#lFT-zP>%)_TpJaAK%kza|LvFx17;ZV$`HGTtURv zE1>-Q5lmO>4d>;phL;m^%7p|0ch+NUTWrHOMR0X`GQBQbNqzfmv;e z(tN+~-v3bkkb(CaP`BgaJ2~J!Z)#8e&w!m!NwgTEYXYbeKQ{&17uBZ>XXUW+ZKz{K zVa%pz7GkyTCE;gaCXoHJ11Q~JaVqNrJ>B#`bNACBd>pglFOX|W&z``iNYV1s)jxxH z|IF*Hfzg+xKAc4+qqPw077V>!$~pk*xk8@)8QO!(5r8UUCqS)o&3}YXDB??c3X)nrX%BG&`S(5qO#nCW?^&MzYd`x7{_y`yGV%YlqkA(xKEprwX?))< zUHUFnf-4tf-nkpukE*m=vi+DmV_lVgN1K_&O8FK=5R^joykQAkctDAe4_9--2yjGl zAGxT!I3?>?+%nU(wpBhDxn|IVHu`Z1>~Wp-U^^C>9Wq!ng55F|wC9Zkb2$dcv% z0T})x{QIwdPyK7?zmStdQxZ`m!5-yaRCB|Ho8Ko~^Ik8NRY|I|!T zk6i$|kwsb-s6!;+@_r$ z$v(etfYP;99(P(0XRA8V@u;lA|Bd0#lr7D<30Zs3d-#a#VDJ>j^J=%bqHnj{9$I=@ znjjjLH@1->D)Mm)kN~$hmLMfUxQ9s|i7Py|OGA;oF&v2J9)vZl{Bq@KFcr#MC*kS|t{=44+hhqNyDJ5Y1 z;oql^50ohYd=TRwoH$v#CeXYl{0mf!8ERmt-~Bh&_c!`(|F2|<|Lm&&Z+`th&=Kd~ zUxkE!SY7{L{EY0^@P8dQ1_rt%U&V1$zMUf~HYKAX+-ts-1t69~N+>Bv`ypDtKyG8G z&Exs=^RUoE>2GNc2XXtyW#dd_%iw0oT%HKIJ>(X(vqT(F_qncgg_)VPY+~(>rrOW* z0>F96Y@a}m>~_4KN6xSWouT1=d0yX*mI?$>Qymlp!q(FHZg34aDG39VsmyPP$|8Vps%eAbY;-9?>A|dG8&m-={`U^aa<(l!H+V4 zz(*&ybw%_{%Ju{5O#T(PeGWxm#Luz@{@90Vd;bmO5n^@ z>aVPT$0FA<_g>br;IL&E=3SaI{C;6Tu%g~zkoY~}z8}jo!*6DEWdz5FeSCPQ5qucf zKuw4>*n@v-fA}YTbS@Qtj)_20ozi9py^=ch9j>(+^T=S|2Mq={Xotw%6lKzhYTgnmooFl*{I^sAKeIM9mS zneqFH@(u^V|971x?$+ac!sPoW`l)XV{iq%VZEUJVn0cGB5|&a+ugK4qJ&f zTW6c8HmU6Zv~^NU{6zOJp(%)oo2NURcAQc0c6w+}*FmyA6xnIF(@hlwxQXZgp_}+E zA7b5O+mgf4?lqIRE3xefS3>uUXMc9)(j|8VH1f9JytzX77y@$i8a|B0QokL0)7<68M=351r6pI)%gRD{7qQ8 zwskCK0(k*?5NMP?4Lriu)oGSAn|vb;>_ z5i1J6bxD;qTXFinJze*Wuj`tu#}}U?3cT!|M3R~fO98HJm*RS9tuRW2q}p68nDqpV z<7!1VB?NA&8NuK_I8S`pNAe8~5t@FL zAZ_o`NLQcdx|=athn-!h3L^v`if;g+kdk4 zy)~1eWx4A};6QE@N#V}~m&5IZ&Fq0jzw9sG+z!OYp(p^VBeBHF*TJZOSYa0 z-ZGs_zd8SGY*Tzc$MM*%6^9~BN#ai~qCPs*>t;92N5!!j_Z;@#IHxUfYbqWn!hduv z6Cl@O2PO2I7&Zaamp~VypIu98MGcmx=x0gpG{2PT$ECoL=mP10vwX|<$mU|5Qx5?C zXI5NAW!M6Y+^b)&VR%30&gg3~k*)8;qUyrYm8w?@x!Bt^fWgd86-BricBSo{JueT{ z*zR=sQF=Dvp+;r|Ia!ru9C7%=LUn(zisFzzj~AeY#q*LYVvE#smCOFE(UHPK2A+6} zSnuMh#jqwK^Spay!nFinR}Xi#_a-KUq-MWp>0Y`@yC9sYY07 z_XZ~&Cnj2sV>7o-O~9kFNnRsb8f_uz_%8NNU`Mf!*G}8)YtEGBt(#PPr0E>9M6vB_ zThDrWG%qd92PjZFwIsxdv9lA?$2YGmDUvRz22#+&Ue2TXW_)=j!08L5bI@q7>-9QXjO0}6AV%ubD9Bdw2>J7Q2USfH3g#k zwC7d3w#+>9{l`^!gVwT!%DD=v$r=-l9x`d>-7od$fWC!0&&|@T&s@i`=7zdGQzBF@ zr)_HhOrbN~(tdhsU8t;6M}ma*Jch<}ls_9sAA~(m|WH&tzT2|ct zL%)JGqWr_JV^{e`Wow4khn6W5bU1*n9-s&R13*LY%mSQUXX7$Wz`<}~wCTQ*doh(XW` zFL2{4tp_eR-brgEb%dBvFM;D~p1E8%R55+|ut$B=-cma{@3x&tbk<(uD=WXf{Y(by zMb8chJzN{x@Hr%Na@JUze*H(M!$UcP$Fb^-SNK0HH?>QdT?7VV2b2^Rm1RtD%2~6v zl@7bwls2*7W{l5bi@S4Qg%~GIVNrne0ajBLZ#6$9X045>txf)kH7@Nhvw6gY))=K$ zcwnZ7t!lnzriklHn>W2_E4Hnp2Iq`^c6H z%oBbAt%=7tbq+@vXA^ISV+z}iNEk2N3M=0-5ffYHwzp1pi79`s!3Okz zTvSwAiiAMv(|bnKvDz=LjAZ-_P$@(n)(;t|ioxZTBG!*Y!gkMB?G>lA%vKl%Rw#^3 zJuRynIu}n_c)=#rx8$xQz{yB2bCdYCp58IL?Lvd{^h{=_(wg}jjmRYze$(6lABR4~ z9el?E>@^=&D5e5>Zpwr!@6|M&%QbnHgbS)%+z$7@D@$uQ$wY#PwiUINzZ)d|3>ttP zOQY#6pttbwI$!Jp`UMWtF7cWdf-}g<@(OF-g}uC1N}1toZO7*^c~)^LUTQ9|69CSv z=pSzHG)~5*J1sAkpf52ZLM+wIv!*DG3E4a+<8Dbr5l*nEzblG?1Yd+uvO7Y zvXfQ8`#*=TUYK8yEPv1pZ=*G;Z2eXrq|j&A>yDg6H2UzC_CI^V1k0=G`6%{a>f>40$0WN`~CTY$x0W*pQd%r-M4Iuxhm%l(faQaX9 zNB3~Ld*3jVgB-60qTT&Ft8!L6QlRGzKI*03(-GRVFw(C6qYJt!mDpd*g39g5W3*V`~^~KBA zFwwZi=#5i{Mgw;E9~-4=+y&9F0Ph5vv$F2-7{K&#%~SIcoYX=RYjR#*8e8S!>mQ)%XQUPAI*XwXUM^yAil+^Yk!o!Csu+?q;n{k7D4P zM466i{=DY{L1ff+=B+Qtvtj(m{VRj*!)ANpnd^WO-<{;}jt5r@+YE>zNn zq_whrkxcr3*n7{grq*>`IEa8CMNq0jP*4yAq$2{MNE4MNAT=t|6$C|kLXloV5m1yO zMNmpanzV%8dliu0dqN3;6wh$Av({YY%(?d3`}@vy<_`@SjN}S~!Sg=v{oI9Ol>40r zIFBT}9Br;SB&AbcGW1aV>jKZG#$jDosQHK$GRxs;@jY;q<^1BkryYKGagc>RnszDU zq+%PFTt^Q!V?b&uXpt97Z#TN5;M#2a2V{pP&sVr)*Zu9=1__bbam6=ntt$*Z!(JW@ z`4C~vs#-qkxhGkZ)|B9Uu6?e*IH(aDemR^Ot(*QkLD6s6@%(WLMDeS3NB{iX|0^x! zei1qM$Jylvg}4@p4fZuxWv0we(xeE|dRXZ2DA6JG?jVi%SNJwyLJ$q;bMxSitMJ`J z8WItT%_5lS0CLhc$REE6{r#KKpx-Y7aGU&o5q`f2zt@CcT^OScZz+lb`Ui9*S!y3$ z>{9K@d?f#~+?r>;j@RVvxa#u0Tj@_jtv{u2|HBggr|-?}iybm)l8Nwv>YM-jYkx(S z-#ixlBjMn;j}!mrUdQxf_&+iNUi(Sk1MHKEY%7rfmiZ~2G)s&IJ>UBcQexJ6U&AUH zI6cwSjgeEzX8^tJ#YnSmx#PV?1hV!)+(Tuuf|vOEE_E{q=IbdK zFfVdE4K(xwzUMZ&!3`4~x4doR&ahcIAyN~00j!~Vf}{?Mc9dkp{R8$rT4}D_I%LWdC120r}d-Z(a0J` z>m;WJo2oByfo!k#J>Psr%NiT=y^Hm#JLEIP*hMP{h`lLTm926OTeyj>`k?6^%aFUq z;N2U6lJ>R59E7{yW|_D2OzT*?S58Ws$SkJQ+^S*@WO9WX5U=8wXPH>=4+q3fugi?c zcQK#X8o|a>B@W$hE10l8e-tZ!E{Y-oghwdjJF$|DkJ0x>w``)}i;07N@5NV+SAT{~ zHv83M%;zn@r>y}QFRzZGbHBm}VC;08OK9o4aEmUf<1~7Lx@#j)$f`%<(?=NJ&zJ{Pb`Z=#AXAlHyk#lPI@OpZwadfz_0G z-V&hj*3b~hdf_xVh}Fw(1F$#*HoUXI9#;x^mxR=hc{`g-xp><0m|sULV!>${8c3EK zR04Z0aeJ6*b9HAV(b6kR3O+q?KRe1_x%XNt5^(z(oo|ZQ#3VG`X7k{%HbiKoT%CR| zefV}Z@BUXTPoj$(MTmJDi}EC0X#}HRF*FA^bqQ0^0^}xm25DgSzDT*8G%HQe(7O_U zUi>pnLYY4#81@kQ&5`R;j#8@M*$E&M#yXCPJ|>iAhc8H366cI^C*?h;NS=Mxe)a4u zs>1#YBy7tNfv1&*)p$*LYq^AKCCVKLDi-lFSc!g_yYte zx#mUR&g053%ON_b1RH_0-OrboJ4-)pYET4b2yEYv_ok=!_k!0SUPR1=hM0kCT;>cB zTg8pAm!tjga++yO`Tzl7PptQ#ZC$9puTLo=nhZ2x_3wpm_Goqt%D;-H*yj}6teC20 z$JG(Yr;niU&04_X^-H|4uXgnn36q2K`RTfZ%StP#K@>tm!9l48FaPSxbY15Kvc+0P@jhvls9m@Rk>aCdla7SDlV&CMB)PH&95J4@*A3iNg3o_` z@f9qlz#OA-4WDa;cbT6)S;Fn2R#SdsT`;Of4ZJs^*bX?HnAG@M;!Ci0_2^UXPZ)de z`It+&#}6xnad=4x+B8Z%dgn&VzXKRr*QX;=WXj%0OST?08CMOlK(@EaRnGMnU(~wK zCM}$rea;TF+kH$)wht!cdjm5D-9VhO(g+{8%rAKZviYt$c@OyoMn{Avv~opw8aC^- zx_oMTIiY((_}n6et+8P$vecR5x~>AHzi%%teuT{GSZ{g89P9CJ>ePeYx%V6lSc=7WD}!^Y z%JXwqyr1+Gf9fj7y@-2b6IUgKgu3~G7SgDEh3BTnOPbj$4Af=_f>P_sTH1C{`4`-8 zX--sK!uUAj2BVN>tMSc$_Y+!Qm>*U-i{O5BgUYcV}_5^sT$?mE&b*^>yw8~ z?aL41ZS^hk+*am6GsVE|bw|qnoWqtUFAq169P^#_0%Ams@Tx8lUA0pO?`@} z_rXaw5`)m&)HC-2zsI_{JzIz7h0dWH2)D05HRo zQ%UzpLi=Yjk5e-+DT5#Kp};^q0-7Y8QjHE1cAKKaUKq3MmACwcaIu`yndjI}dqsa} zK1Z{u>qVWVlst1*i)V`cGH%@;+SN*x6~zj&d;iewI^B*v>@v{@`{wz8nDp7Qo)d$G z$`hMa9QELEBsFpfNlnzj>(jj4+aHuJynef^qC%Y@Ria5B7n>zy{g9*lSvW6HyUxrIOGhbJAH`mC?55UlsCJp#GVY8b(`Qe4@04V4Oj_5^7~t4 zpw-!jZvBN_c4P{7LGTCR$@Qh)F?ko+nc+Zq@7erp;)2M}Mr`0Y%by7@|Ll=<33^Jw zoRsYR+z+tK8d!!-U0vq_lipN&Gx`bVV4>l>n5lPmaz1a|K5O_?d<3v$AF}28lFr*V z+oJqcmCyLU6x&)4B`BXVdTV~YdX?VzX-&^3i%-Y2jMhB!G9$DXwH_GkXsLdR(H8Kh z@)McaKG{0k4LQh=UYOy0{bEmL17e8Rqv9Z2SFq9Xg0MYnK7oXweOf5gs*=aP~ z3{x?PynyDiX}Zl?@5Nd!Ws zxEM{*Dnqf?kU)k*D*_B@5qqzU4C%IHnkw zG~fi|ZN+rT5&L!sj9q?0SdTh7V|7}wB9Ui*hn!Eaz*jWMy5#zbV-oI=Shv`8-e@T@ zB~TwDi#C+{vJ=Gcx|*~;^R&~JwGc_oq?qrrR{q;CpAwKyp&#FeRWsXY`y?*)Y#HPB zgNFfZQeX@#NbfqBPa0Ub{pO}qi_U6R!2JDECR|86^wX)zFnU;@Ji5COzgp)qD|--c zO{Ofn3!_H$0^wEI2c#sTsZUStyu`vj>H8O;fEpB@B=8>dxINdPy&ef(a~9r|7vx4n zwQdC15v>GcNmuHAfW`s;Bm5(72s(Tb7(^HW4O1EJHjqaJK=!k-rQ1iGAO3W>6yA8= zVR}#U_79MMK0qyX+K2$+r~q->-yXIHtY1_I(JQuZ0j@1j)?-G;&giiz$^PrSoLCM#lkAR3U{Pf=*)FeusjA)s#@ zCtLuMa)8eDZ;wL$_L7w0)PKKHzh9}}@2TJa`u?^g{a&a3|60+fF>Ug2m(!hpTErYe zampoR@EI+o^83J=>qw9Q%6@Y+f%pDaX!A?vK^e%9g&xEP&1U-SgJ;)!ZivSf=n;J( z)B%V>#Mi@sta*9O?o7|sHs~z-LEJL19KRx8$~E5}Cp19iEo6|+sVW4Ko^8SkEm{aD zlvLIWo5HwCtVgKx;2nJG?4^Jz$%%rb0e|^8dO{G4F%y-6k!?YT0Nb7jm06ORbge4@ zig`dPzxHNm%wne>JlMW7R9Yx$TI$1KHE?#qj?jMx0Ny@*zJPMUA~(>pBKH-yUtaQ~ z+w<+@xs|k7wI|^HY|@n^ zy;7`AFX@?d7v7F^_XlWOY)6V>5MVw0YPpw51i4?r3r9lJ8Rl6r=&wdV(j4%ndAo~~mr zUOy`A#g!vDlnft_(I1auiO{|>8FYFNv<}AWcz8x|2A!@r;kS@TGT2uhbI|C=u`16ZDV43*-1J(aYvw*1pFa&IZ^lnspeEB_ zPB#qBwC55==JBI35n(oFndnJ}aVqm(<_{cWbuE26)QX4MiCN_4Es28Dmaab3AGhq%<*9-9S zo_!2wd{aO*fy;Y!HI^cn<%7lsBgOHn=N|b}`M3i{RI`4B%fuUj56eBg-G@vM&Aa0m z6^FCtF_)?!4CHpRg;838NNFD*r>AcHW6nt$Ja1F=?-FKFi3c{oy>NZjg>J=)zW;q+ zjpF0-k=WgU&g(oyo5Qc?<6%MmR9>nlpn#PMK@t9J3SyM1 z8K8vjf^v|LJ3(1$jF`ftHt&oN$ZgD*UzVtR(zZ3$v0zOvoQ>zhAnxMtRkaNvXKLl& zRE#a;GoWu!sM2=sQzo9c$a@hCbtiu3Fn&ZSr;8@f+$cGhqB zP(a;rsSJ8@4ckorVy)!u=jMgC&)8f)Ke!v1A@p>p%g zr|R=KQjZJGWVL=}@vsi$fEg{JPoZslPkFYsc;|)}?E7e)N59f4Zu{bFaiNHjT0Vk6 z7eVa>t^WZEJ(y;wxkTm}63whm+{*qsed_*KcV(LH9+=tTdK3D~bhvac^IFxA{O~*1 zHTHz>0MeMq($+&!*P^U2Iw7S;LxbsTkX1U4QF=S8(>}1~1it>fIFPkwEKT3vYeq@9 zn)j+uBQHiJ2XL&D0)`hGMMp8YN*`x*Rz&!|x^3D#=d_MW(~9o5*z8poZY}hhOy9`G zvUvqsPOgsh+a>QldWNuJ-o?j3-|qp1jNK9a#o!0%(U?l6laTvyJK1Lzd_r>ftJ$P1 zCsbp-%Qa!8QM&2rX3fVbx300I4oB1!aU!S>->w`FoFW#cG^66*4c z*104BgqIe`KH}C!zEg0S`+%i77~8xTdDiWQ^#|Mv;6{H6-!HEU$4qak;S`6#zX14< z0e@!}B*pMkY~Ju|XVw3K^Zxs+5Pw zY9fj8hL6=@2)xdN<#D9%V7B8E6Aqb(i*LRyWjill_RU5+{PUHk>as&WtrA9H(50ohtjgs zv=>HSdjevZMwjFHX1aMIn_X)5WuK23Tez0S-f%!@-1p0G6oosapT)<|ZUU3oTU{!3 zcBanLYX+wR;4JEtN<*0_0d{a%dKlS0Oa77A#FK0epxW;O&=#xi=H-vX(KIoFs5fHs zX(l{}#M?kl0U=zoKwk_rsq+B-co`o`4rL++{Qhx% zzdFC)JHP*B{a$zepQ_4f7dC;#=x?fjV$Ch3?I`#TJ+Sy7@HW#N0!K(r_j&XVFh!g&3bGI0VZ39?%lQvjK1H^H^MK>h+dG%8H5Y(~yt`h&+g3_A}R;89rWp z6oV zzGpwlPk5$u_^}w)$HKE!nJV?p!KZ1~p_=yNThVi8rH7=1SHw2oX2;4`E%(h}{8Se7 zs0)D>k8`{{E@yq*cigzdiF+}7{&C#R_pcD|S2XoBd3S{8w%L8JVG(uFkBK+%rtVi# zEbL@Y$yhY=xEa_05}eP#GJTNzkvDq(fK|HxkwFZl@UatoKf^?nIiV+DV)kIR=irKa z4@V8k=qk!@joH5j^1svPqS3*{8&B1}Ct**BS{U03z?gDmniY1=Rz#{ZGSW5SoO*Bf z>AdyhwKNCjpi?hKCx3vLd_A&!H8F+h5zjT6JSZO*pH2?3Az&d@X`x?vV@_lwl>M_- z{r@!!gwg*19Effky`2wllUOEKmObO)`~fn732geMON|;X3dkaa6v1nqfDk!xJqp6vb3|bj zZUpEi`%jsSxbK*3H~=A_C3=9-DFCY+U6?|Z{i`kLOwF^ovcAWtR@>>-10ld>c2Ii7 z0$>YzOs5S|bP(0C&T~NOc6vZ_ClX2A1ruZ!*R8JI?CNTM9k1Ok@KM^ip<&&yCZeTV zwp;RvK1nx&TRVdRPVf0*zR5d&T=P}%)j@62F!1~9gZm?M3pPbIL>`uus+_x;XQcm) zX}7%q$+#ke;vO_;VS3ZseI<30ke+*AQP}$*vfKX)hP1!Rjp{eR)PMDU6o7Qz=LhIK zlmR|^GMz*xPaxF6cldw=?E0kiql>~b;PC^pDvApr1+znr&bS{O_yY)n24#-cphIh1 z3_b@SO@;|)@(;*{DnNvlkMr9)s5A|T!Ytxt&R+vDG;TASJkL`S^R@dDq=nhzYqVmj z&_8Y;oLR9m8l8h4v5*0@3yfRU+C{yyG#iuj+Dzd}d}>QKzj;1yA>@BQivTuiw~mzrvYbXE1I?Bf{lT~ghJlLj zYJK)|Qgec<%%gT1>*?Z4w_kSljCerqq&ngvSVe#i zCF{VYVFVW)TTa8eU;XpAY?1BaTp)AExoA(U|hZsR@6iGS%e` z1oY4$HPdrq4e4}5?PBq@zC(6w5icr?;tCA7oBb4x(DXb`i%2HVidJw5IxL~VN=g%r znQS~wAD(1%YxiQMqWHTSO*9{8`*FW{b-jVI+tu#L?txC$IipR7jWXeVYl~O5f zx?{hUIOb$)G?r#I?ajwY?IlMx>Ct5VRblwmGp}F!8WY7idt^`g zgxH+!gu9MdNXnzikj5uV?@RaJj2`HxADl|&dp~s@(7T_G(OQbW6dcqtBk|>3AQNw( ze2^|O5kOeC!y%a0c#x-^UNS6qwh%DV5I;;CE?JNz9;f8oqmvv#<^3JY?ayqt|E|Y> zD`S&nPFAUa?02IR-N--t4%dC?y5*gGphixlA1zIHR6^-Vorq6AKxUvd`~O4Y!k=sG z&qv{Uh{N{bdDKOKY6~HW1lDfVoW`XA>+#t+DB&4E@BAi6*^GWPeJEgtAO$Z02YkGb z0#Q&>0(>FW5rSg}*ksz9>uYrXp0luUG8P=|D=>S;1bZp}Np3Rd-Ne?{jl+62VF$-5 z3x(&!8c2?-k2e(59c`ZlvuuS5^l=(d2$VBPQ7k|>Vb}5Lv%Qg{jpORkiA~fa*ABj4 z2|l=XU$gx}lrJjrn=d^c30Uq{jPQ>XiWHjP@jT}DT8{oAQ)>NK)#DAH(JpYAEabE` zWS=nzuv3q(mBlFota;rtfU$uNWKsH!-Y(~jBiuOx2^G5i3OsD#r&26H(?Zv3s zq)U>g{Fq*yO?RaBR%cxthqoYuz2D_CRD_GLrUxuLzl6DcqQIx@wTbdp1&fMgn`Xw^ zSe~$Xuy$gmkJ=Vpw2I>BFWTRHsK%Na?-l>qpou=9&LW{5d7+e)1!J?CRI_t|UrX}1 z*LT)|+BAknZeNpJWS}egt`1w|{?A>1efHn4z}IpiMO@p7mZr zK>J*k-ScQJZ4RNlcK)9vYHzY3vnmZyCtjo%NhSvx1Y9=ogI{RvX?xCe;&bLN`IeO-gdUw?|+c zD@V>*nG@y~?Osp&zE(0SQ=EMC*ydWdkDqS>!2;ffd=DSp1^AIA$_#jIf-*k8l`K<+ z>B)a!O;v;KTAVRe3*utqnB-&pj)hv7EQM|WUD!n@fQR_A3%Q)XhYH}$tlWe*qc|`H zsxHiHN#kyxJHi)?Y+Eh!58h&^zuqHXCtgf~Ui8%jJ)Rdi*MUChS4J07Dmq8>4D3e> zSd^NRxNs*4wsI^JZC@_=$zJAD8qQLtVm)(k%B^;kmHj!!P}EkSu(I~UmdqNb z@@cxx=@{F{Dcv~?U1s?Vctt%odFKFOM{%w{D=bf+6P8Itt#vH1=w|GyJ|ZwW^(O{_ zI^39eWA;F$`c{5OqA}imHZu9kNZI;5XFh%fty@LeFKoiZ4L6?JQ}#K{a(m!DU{T41 zYg~`GtX}tzTXfQ~X;-jDcLcf$a@H-OsxZ?{P)a-sT7vg5Ew4%E+);b{Dxm#>EZqX* z0v$0Q3nxaWOq1Z>feSm5p}7X2+-Iyj2kW^^!nfyQPDZYbrC#W%GuF^eb`_v;_PSIZ zu>r&acZbIuBNinr{T5WWxn&}L&ymeqa#EAghFq$wY9nNB)g`{^_=)+TkCIhpiV4fwbSgT^mdVwEupw| z^7|h6&H|2g2lqbD(lI%HdJ1*;;c+op*&8Hxtc<8XCyZcTyZ!u7i?5>*d|)y}NXy;X zw3q}5QiU0D{v^w@&Om9Sse3mo!pZ4R(K7JG6rLA;J{61OmC#TtmR(K-wY=PpT$#4X zUg93#1Rf5#iA(vr*89)P!~cjI1uLedf^ZjKacjh7?^~%4jgHQn2gcT-TEN}?lypas za?4vd0`EQ-na<@Ytbt@1Tz5>|LIv$zfknq`Vl`R+gC!#HZ_gaQp?pT7h z*R4|jkDF;Q1MFYBj-x0@Nce*AEb?d;SUlfrLgAiFeg zOyV+nKwGUD-0jEw6-;pc_->{KG~-E2ktE$yYZ09eob@|~S$F39y?G&CwaSiIXaJOE z=`-J)Y>ZKn`^1i2fT%<_)0g4;#<*&#IddDIO< z1evBvF?63HdalNR*I8To zm(BgwK-6V?{;c1|%VvSseOu(qB4AJe`TL!F1VJQveHY#IXZOo}4)TzrTJ(vGVFSmhRuO2X%wVV_&4tKxiG) zM232|p098zzp0N`+iB%cDq0Xc8lB^*E_CGK8$p(l%yi{<^-{Mk!A)Y50hSvwx3+4h`IiXSB|@5z82Qk%KuEG zM74hYDW~Zj6`EKTZWZJ!6b+FVZv)sV@|+uSM>33ASxuUk=t#vaem{(n(Cwo;N@{2s z)G4kQe2#y~R$g2Hd|IA!SyF80d}M#Yye5o!@6?hMWOJNaJu7Tw(CCHzR4bnuvwZ4Z7x z#%1!|j!e7;)16dm;f?e$O0uGY;SZ1{Ac1~0ya2$~O9-bNqF(&mm@3@Cw9%cHTXu0y zfV*Td(C={Ds+hnFZ_SN>uji!ir1;CFk893_AU}D=tk;=6Z($I=BUC}b$j~&Q#=Ts1 z=_&gM6#)h79vCN)6$@#a@QK#ElXO-@X-fBUZn{VJB?_U4jCf_<_@~#_p^u2SN605U zMK`w;%rn1DuN_$TT+PWg_^zE0z10Qcli3iq+5s#vC7}MUIxEoO4qswdZ&Q}ZHJl7juT<=s+z&0W?;1xG9 z@$n>@%0p7zR^WI{#H){F{d8>tnT{)Mf&3uk`8_z0KK!C()TwYdk;sK!~pX7 z!ovad`VJF`qm)vq6|aZ^w~(o@i^<{A6(urB*sg-}iEnoKA7_Cww)~tr(e^G8TSiyjNA14tQBv=qD^`LN~9V_pzdH-EUfe_gPQ5rOJ2Mo zJ)D=U&wsblynlvGK;W74wH^kLZz^VV1{wk5U)^z%NvO|VU0J%A!@2YBQ{YoxD}%_#WaU{QbN^NnStTOUKNOZJ<3*m&EeRrXGm9cB@vfZp5` zXuZ~_mH{a<7fCeZlu0|Jtd$sX#7=@k$dS^2XyN~6^Vv&HN>wHhQ}w*wGR22s{E~tSaO=~0N&E2epGN?OcXptJcaaflJrUY(5tlzQ>$K@)=zQw~}~9)$xUeYPRn$RN8Nq$Mi}$?!cVy zk)`)qHBParZY85~1f_c85Ov_9G6ROJ&H zT6v1C(>5~VOSI!%*r$_QkHZRtCez=c&ZVVHL(;| z0w|+cg6ED+Q_lh_YrgK6{*MLI&!%0J(2*_0^ZEfn6!(7nf*k~PphZwI*V)4Cf(d# zH1i2AuK;eV6*Csa=0u)8fz&3~*5h3ptrT}}pXX1jqr->J(=?NybE3u4g$2DX8RnJd zPWNwyA4|LV3VvEvkBLt1+(LR*!9HgxdQ1)oWw$Kg58@l}`4EoQl*T-%$)ufs*YYLo zo4Drj{QJvJAeH)+r|KXdfnYMoR~u`U%u{SueYCw1SK|_dn2ncJYm=J^+8mYMjwG0RuAaHzAaC#~3dC|B1jdcj zWX+j0D?;a9jf=Fw-6df;SBTX#J-3NZXtgFE72oKs@A1i}$Oy4NcwOw^` zs#V$fRo0?}f8f7`6&n{LYvHQ*%K!*CFMqC z(G7++$xgYqU1<3pbe?>;b_xt52dxQHs`fkYlQa~ShO9kANpUpBkij7=RNukd=0lt{ zTl%Bwhhf&=(-w~6z5A#ZZbY-m4GHCeDC&TS@6>-jF8xo?gZ~UCC!qms%^nev(o#ET zK27DYD~&?rbS|`oX-RgyyA~VssW8!KwMAF1j%F++y%`x&bpuIt5Vm2g)x&9^a+#(t z?BLnv?Ln9*QF8c$xsMo&DJOWVAl}PVDX43rw+sd+%Hhw?O^3@EtW`aaHFdyNM(MiU zb8(G&X=HFmvR#h5~Rl2K9fBPc#nvae5} z`kXFaMRbeHPhyGIrXsq>_l@;oPlFd-XSML4!hX+lI1ubc>uE>+ItgFV@1q&xf`q=`jB)Px0h&>wx_tO#=XRZ`G=F(wSx+3)h={`Pim@@6y#euo?08(K^?QDU6ic`O`p@ACavPMtsm+OnHeD(tc2i{Y5;Dv4Mv2T4Btm#t`miSiWOQ z4KNsQ)aiaPcC#^WWxygc?Y&KfD8&J1eTEGhe^2Qc01YMI1myc|5PVhj924*f8a@}0 z`G!SG_u*eJ#R|(3{s32_x!1ovRyc_@pJxEvvmTTr>z!_!D6)&Y5EZN24u0JzR0~p8 z-J1)rAW;m$uKJ1(=Mtoo!>`mdJUxzFSU92-)YBcPJ$I^3;$2YN1@yTZ_1W5I4c-M+ z0%4D*I(E*nMPuBUG>i?5Ug3nakYx}X=-H{Y6x*zsNez>?gsWF@QOnmKJS)pu#v6UU z3oq4F7NlJNH0wScjsju28%@`}_jAe`{8lQqPL0D%AZrvW4U|$=i0YBMA6F6jM)xVkVN7;?;$iWr2MI=?xgV(Ad2DC*ZvtE{D>|f z0c$zb*Jq}e?OJ4GRd?auCy9qKDofU*opO(T=2;K5=cW(2_K`<`aRdNP7g|Nni3LGU zlex_VB5Z>my?A`?``GQunQOB#->-6#c~5Y6dr?ge9gG@UO+wyCHro}*{f z-c$zXp(%hG6%X(lzP1B-7THLn>YCEe6+YFg^A&;O$??V~A{2GRK_+p&EL9<$Gb5os zZW9kH%2G6yBI4V$iNU@USb;g?S}dPMg8opB{l@%55s@?Gtuwvno{MTuIcFb%Kp@hY zD*Pa(s2`^@XD)NCb?aLjM2cS6aUZ@~+B{~G+j|^70NRYzM zH_Kl{RbTA>@RoJq;XUiGqK_UuNVGkXE%=~qS#C|Zm1r|Va;ZxpPH~UMv8{m$xMaS^ zL`f8df@TQ*KFlc~k`E9oc|U$>Nj-$)ZcY3(x=)`ZOv7hD*ZB94;c!O;Gv3E?utp)} z(np6pDM<@Xs#0vnUDjJ{-aIg7GpLL-0(c)r=(2;cJ`E&M(m^$Zt2S$@4^u*JzD z9rsy#yaQ&P5VQ#u!XM1)Mk<|Cvx9`QQRyr_k8DqX3sPL0vChk2Y{fktF0DN~r)uTI z9(!+s6yv_@%zrPzbt0g$jZ*jcrVvm3)Arw ztM?v7xWCV^KPLJ7p#$8T!0u?RFtgk3H;A#WH(RcsK8a7(!}^C?8?mQT*SeHhmu10RWL9&z7+sce7SF?zS`aA%h#7GT zyWPZeil{cSFe2R^^F5BLDq&GIHIW9H&!fF=RWZ9g25(JSLQogFy4&TReccpSe_iVl zU86AC@luJ>`nfKQRaTZv&;HOP8C_IAFvE;Lu3L;-61ObQgwK?*5Vl@SCzSS+Qc9 ze)GbWB74x_l*?l71zO3n%ZZHPY}0$fC`U-l);9cCmUqas4+>Yv-!%jWno@Lkj3ooT`hn%cN(D*FkVe3_}ihH zKS4W`i!RYeB57!VaZ=@ujngkca$5-quB;d(CWfl!#_10D`pi#n#_#XsD#q%^aF4LdcoPoLI2cw{)O=(C*jLY}E5a+Afr z6cLg1u>SSSNaJ#2s4t zIQ!!dcx`N`Ha;P{#klouZ0FPPzON16b91+ugXn*Nm<7uG+O0$YxnM*493sRlt!eUE z4pT*g4}$-+z%nOH(N&qJ^@%IhVvWg*9{!N$rhM)z9gVz*hYYu^m# z8x9l)%u4|gQj;cd3nKftWo7^^Vufb9{24f5RVCwA^*@+Hf&M=t1q;umPdp>vKr+F< zX`}aaOvZ_ybTN-|0u5NgU6^78e$U3hvX8Xtw-O@&U35De21)X+ zAV+)SBL2tcKmSI=IGwkDcW@g}EP(lOM#?--Hd-kJfIx= zNh==u4HFR7ne@^FV(ibjjEQt|QoYrI6)iy*-p&Ac^Z{3L^)bLIp8n!OOeoYOH|)Y! z6>I%epkm~9^v<0!FyUnPAE((=znxR;8%!`?F|=6m28U%D7_@jjS>*ft)yPI*LOgE& z0;;wE!s6+33u_hXHM?mGkr_CWJR30NL7!W4nc@}hff5s^Ul|oSr2pO)BTko znCOknmIh0wJKuftBc&w7EwHGIo%s}_fvA_GtPf_G!j9gGzOi}l7*q7@bG?uTQ$8C7 z`zADso+_dneiDGB0spq8s%)!rZqwRmh&wxNZs(x~kN-8gtUf^sf z;=#IDjgQuh#c;pcY*}zkj7U-EVpwly-MM($-g4nCm?V(`D}oFY^VOM|kR2*rKjfOz zI)C^fM4MJLQ@r!MfN&`bMg6%)_>ssDzRplP8VIpW5NVU}@ z4|yoD_xoPY#$Q)2FDt2x->_2wL)kxgb@g0v75UTR0zet zzzN%ns&*`1W#*@rvF=r8NZ1gh+(5)Yc5W(Y{{UGv{s3u+z@YXO;->3t>cMF!`Qjolo>NquI(xx!Iq)Ye9XBjm2z4E>^NLfW-CG`K9< z_`qV=c|Tf1O{9APDHYYqU-B-kHdJLrcopi;LxC+1$4crByQk6}wG0RrQFgjgwq%sv z%CPZYyCam`oIZBYEPtueBi~cC;kKb>RcuGxMD(t*#X^fUSW*!3JWs(|VIOpW2n431 zO5Er`sq|A*t0PCvxc8tGQ)lHjtcZIfG93(|iwXchh~u##7y`DgcY*?mQx0I<&}Z;ega#_}oNV#aAHnUC(L_z!H9sr|a$+0!YM zgl@k_ha)#^(r{k=unzO?D)*<&IliZ(PEpfN_VQZC(gJg|^h05JILYTHHz*65Gkmy? zF$8p-O<+2rPXB(idE-H>>BpqY*^e4Bk_42SmNOpG;*foxD5!qH*MgdxbVEv8gB3LA z(2OfM^0~pKp^Jf=Y3eVXg+547@C{1QAHKyo0xNr2?jIQCXNFh7Wv!uGr|oa5<}tj=NpeP@5y9>GY|bbqzsNq>*f_57Y2lTR!Bkni^n?Vc1HQFE z9ulJfai8sWlfWHNhFG{nw@${PD_+0(Ir;98vvqk%06jt~>X=Z#yCt-AjRQfEsD{ZI z3TbHGuLfzy_*@iS^l^DE}~S9H?xHG#d=W%Zt)dM?@!l>$OAn=I<++V^oFqO3sK~gdTxUg zFW$FSXECpdTWMpKarRh^N*{)UXlr?mq?eMaT7+A5@Duv!S9o4l@O|D=B1Gy8W(=~c zhfMMnYr%Rx=QW%t-~2}NVM>q^`@w6eGDRmc)WIe(vbyr(o7W5Ry8H?4(nnCt^vq}@ zkdr5GbAYofN4mN{IkWBlJoD+s8b_i@htO^vWv)e<(*is!9bg_r`(FOkbHtdgRFZu? z!A0cD8yh2q;CLyj0IOjCac}H9M)Mn=@qQ`n_li@u_bP0@`H z*vVp0kzL->ZdbISq|L%t1m$$AJ-ua?PvDygE-#>z-z8Qg{%m7*M}|hT_O;o{8Na zb)v6GdehB4g5&On0n7mkzG5$*WPRwaaLj2(Kp*w??vQBN^><#Q&lGxN-ex~+1w3iB zFb56L@G0@yh!I7h9gccBXR*ZD5a}%sEeYj4W!*B<5!4A^jluoMbq0u94XUg>P3!&F zdzdeo;&d~QFm-mo9HyW~I74?1F%~1jIIQ#C!r_3LmD@~f*^60|(3N4b+??I%;Kk5X zd?lYgV)JZNKGij$I-DW{4A9CAxQj5&ehAHR#BDoxxts~q`d{pQc_7sP+V)r?g-Mbk zMwV2zgk+ncvL#82H6|e>WEaC&k|j$>N`)dzV#>Y`*^_AJj^-M* zIA_Fi&MW6%xXL}J{9O3Xrm{ao1^ac3izGMA$1{Gr!;MXFYRsK6xae`g1C7gCkR3Dc z1NPr(C%i%?JZqo6l_KHtX#Ks}z-E&W9`oK7*D*u62TN4s?;>M+VH=GsT{rDtLv z33un<=F5|m?5)n~i0!$>r>(N*qWLx+xUda=X0yL`#q2w_>_(rg$`8nxQ-||N2ea-Z zg6-mD<@t-XqdHHL)WF8%<|F0QR+jkpyF>ize!W~9$#pq86Y_zW^n`jvxwoqrH{IGA zwmcoBS1ckUZ$FktT{R9+(Xe0mMtRU*_;l@ny*>E;&xH=B@$%y*d?vnLGudF#IM6!r(+w*&%Dp>^H z1Z`sA)Xed(WpYtrlM?h@U2X31tLabrWTWry8F<|bArC&V>F8j7No!4BH`r|7opuF< zgeH{;VkPixNO*!?j_jue^F0b#83c2|3BW43@`zoLY(SO&9-!wYi`zXzqAtH!bz^m- z!tLIRlFX9M=OHE}zhW=VJrN5{V(C$KIb~ol!zIMXv7nBkBZ~k_iJgx?M|0{2WD_N~ z%d9Fy@Y|)c2W=EH1W)lXQy62A>E_Uw8EP`lxJ|1=1X*M85TS-lOl+hC0~y&AlDfBl zQ8NfJGwy(0*@K{kd(**Q_U=+AbkDN!)_C*3dJrfZ*s!OXMo=}P0v@&$)5JrK#cxym zW!rZ*L%^DRh#oFY#fZO)Ho%Xid}Pe8apjbE`EvGb(B|1=zV>gZaaI-t==BI#e8DSi z{wXhMN-Gc<6)^++VIMH!_n}PWDiMLMJT@{DJs{{rg*mAngEcNMI#o&7w;pc_m zy`(Ob_HQ-!@%p$6)BD-M{=D~8!@?$bC{KDY>AiBiIajCo9Y)$^rqdUXd}*ArW%7HA zf))p^O{@mlyYsZL_H}Ctp81wnkVn5)MeP{Rx)Tqnoal;opD8o(a_ly!3JBNc`%qvl z^tFX#k1ZhchL*ft3k;&O;#ng+w_WDY|ni7?I(E88!WBw ze`LiW-gR0wvh(7B{Vq!wj4aOn^$-4~%N5G6;n`1%ylM4@HLh7Ssc=_)5*r9(MXA`Z>f659UjG=`J$ACh z1~FDUtKoZy4tbE)$&(c3OE|29fPNCQ|6o#oPrXF{NR095*Wl?{sRDA~NrnN0<=W0` zG!Cp}j;Dd^qz@>s!Ga1%pJXbN;yW@7{e2j`0wX&c{%MlzA=%T)e@4#I9ehv`Y65m9 zGlS|if?g@ymGH5)%Q9}w82_4ZxPNcwpRD)J)Le&NQ}=W5Yb{Gp{(#ae^3|A9vPv5A z(x18W@wUHe^Ry@<^hTozS#&xibX=EYh#9j@&>==Xt2XCmeBXXrV5hrrzAc+d z+qo1C>Rd2!1$=Nm)7#wz*{|vg42!Q-Z}7)_iaG>90=HxRONWE-rjyYjxalXus)eO) z3}?p#N<0u_Cv>Y}+p7HjpjA5(jIK{!{pO_v{&K-)PuGHT^+Vww=|5(!K=|i-;*eH; zP08zYz-PwyB7it}c3P%$b5fVa54sDbJVII&80A==*~1Zc5JL7K_zxcQHtfXaiBkMg z;LS)-;k&>Dh62E0|7chIe^iw^s+4>%d3RZA{r=|ZjZv{=4o=!g#wK|1H+a%NSCy*Z zbj)6~wu-gieZ~QkvA8Y27d;ep!kW^=MetmSN!yE(&b9vp_i3psX52OsVx!C=;(d9^ z8smIQ6UAY3H-dzv!8t)^M9J(*W6n@tiw9%Anw`CK#&NFnC5=-S-F%WqWeSKOEq*8M zUSaoDWAX*m8BFa9}yN#v4=N4%qxoKEJ033JxCvW6fUW8J>| z`HM5uRPP;A&$XuE;-hWemdEC>$1Cin9RTq$GBk%(h}C9UpJLp3m|+#^L$?#x+&ov6 zPfe|G@&r3vEseGNgKhH7BJJF7u}7-IKV~{pBBW@=GZf_TpI`Mf*`S->|@;B&e zj)#GLreF1(p8`fBzcRnaA+@r5K5A~vqD=%)PuN(UlVG*2PR^7SKj~Hbe34s^F8gIKwS_J{C8^N zVdp5fz?z;l3EN$-qUji(POeApoK6B}Mi{ZRnL?L2=4s1bi1 z|K6<36iqhkRIk_7#cY|Vb5#ebTdT|jR}u>1`fZfnTP{K2DbD7#6XOvk(9qR>wZ17d zbeX~cdQTmM8Hr3bwH*^GIA_xO@wR5K_T;nORQ8)xHi)m9I_g~- zDQ=3pHu8%Qpf{lzlrPVJKr)6is4{-B{)opL19x1_$rEcZFVjYYd&{-`fyP2Wm+1&% zI(*X0Sed5zh;`^)+mkDq|FW391Q}MgGRA{vs?0H!*2b;OaGY7nJ3kOfl3`GVok88D zrn>8xU7!C7tS4rT4)#VE@;vES={i*_emw7;=7pJqdZX2lWQEW85ZtM${cimdANnpg z$v1Q@@m~b_?{)1M|HiA3&2(BT9GXW!L?f^DOl#u`Y}uM38)ofZ;-Uoz>+7{6`>4lL zH0Kc^pt$G;$pNU|Kxb$icZbF_?r`zMt6r!);$V>PL}I5S3jFqSnKjv#!PGCIDFnOdGd4MIR#3K zEcto~bi^jb#(i&cZ{ojKZ#-i@iV+O?iY>@3ca*F1wTrKg(HVP*WR5915pU#6A3o=| z@BY}{PKu+lI)Q$z`N_9)i6a#*tyK%3_k9;KM4S|_PM)g4Asv8jKpn*o*3BIt575ET zWK?0|B(drKIw+lekUC-d_R?WzM1m{Z1mb>(=nF%O%FyM)W=1z51x$wnoYsEpOL5QU zqJfWTNuN=gc|_FNG+#AL5YU<+k5VKEtFJ9Sl92k`ilb@oPw-zNa2MUF3+&QJSwV5l zmm3!PwRq1qWZ|cD;j>TqP7}3+n%h@L4Xz=WJQ#-j;dlQLRny-DV@1zQzjgZtl>BCgh>J7 z51C%Lv5k`h!#;hDR+WR@1Ym0In>nF}PAPo$nbcVH%#}X$3Yb#`*DIH&V8Mcf z;vhLBTM55)k7%7HJa(B>GEZ@y|Yf|`u-d^C+_urVyLf2*hacA_Mvy=`e+*%3V< z>O^7eFEgA+uCz?qq2vh%o!G`X4|#ugaBLcJC<&2#wcW7Qm~A+cd_eh15du)C{`{dQbhJHFs$`yA zh(i`Qg#pyJop~WT<$m1Ih+M}L;|~AzJz=E}*k`P!E4`9eiYGPuD0YvgmV3{tr#5)V zxR%XLpJvy)ePZ30g{e}HSPXbw%}XF53;=YBu4*(>F$qV|yVdCiAWBE7@|29b zV1P>L;tL_(4p?QmxMHAl&~}%i)>GjJM~&$BDlqat<{^m^KjN*UVnF4rc=xoCMsA*_ zOXeBY&O2f3h_K$z=+XA;_Je*f($YF~aFn9aX0oC2rLNK+XG8Q5 z|F(M1sX0X%S-^^gdwfx=(5?d;Yx03o+ z!@R7AT0xRD$h!wAzb9 z#L~2DB4&%>>(qmZ^g-u z)3h?J1cn{Mp&q(*1kU9ZzAs2>+sO*qhM*Pt7EZD#kFzY~B~O1Ya%vNiBRJt45mV8f zrrp-BZ~2(*4gogy<}F;|6!N!~#@85;Uy>=YqnfnZ8{iFqo^?P9 z(|#>BX8c`vt114ahPPFQF_P}y-G`84vG*_I*Fuvd$TuTX?ApmM+jlQUPgzhH2Kaxr z$b!sLtf#`1H8bxXH26k;No;v(euTIQi*)WWya6yb-6`Em7r&X*KS@*@K)d-AJGJ>M zI)#iKq;6Z<`a>RjcPTZs$WvAgfBHivh?C zH(=KM7QIKAZF+4#MYq%4_eO2m9gfmRPwpry zeYm|O=EPO~e7K?QK)$B`!}D$IV#~WHj}oL-+9#s9Nj428@!O%YJ~@hGx(e@&MI&Q$ zUcJAQ0%TsvU#o^d08h#2g&(3^PPkG20>Qc@4k)QKRg%CLfhe&x49mh$PDYKA@4mS{ ztwOp~9hPE?Fvdt;NSb%7Cf9P$HRhQ{%4uX6c2oR5lEe;LMIk>X3ZETb=sR%bgK+qe zu(irXMo7@6Tm~pIjV50sY1YwBzOJ>YO*CsulX$V#N>;WeI-S9d1~fc$-pn;6+TrCR zP;%4RxQlf>a%0I4cCEgM$2By4_}=mPn%2^VIAFOhdyP4Ua))GSLqMqNSj9LLnjUPo zQGz}{43l;8zuH*crohP1E?FsIZVE+~1Wd&T90a>D#&pBPr=5Zqd6F_vK`EQJlhv-a9v7o^7 z7SC#hx~dKn5^D59`Jp``#lEMf^$S?XZjGinoK<9~x$`>0DylX5L6rU0g>WW6!-X|M z6|q;AkE=qJIIvzP{W^vRqgy(8%QuWZXBkW9cf$6W(YK*1#d%{w$}vNyURJAB8y`Je z9y%YWS?Tw_Q}q+dm3)R|Q(twcPp-x3z(Ri_O1=nRrlWi%)P&ojShVz@6;#!}uSyXm zTSBXS#DYGYdU~E#(gs=sn*P-)qI7bnFx+-!{bn zaZ1?`?>Ys0jIk=J=vuJ!3U7#&SyLCWSp*NGnfIXc;7%Nu9E*1vpo<=Yt}2utHJ zyq?LfA@L4=I~*lhCD2XLOfy?-dFU`3JRccjl)LqiW$jzy?zeFI8h~J&`DL~ExoR*> zV*akpm|-=da1DrXK|*j_61{nHd2J2Id7C09$H`14=!NGgSC&A?!}#ll>pUddijm%a z_5-50$+NM>P8q5#CQG7CHhKOzM%o#XFG~u*W>)O9~c_8G|Ns0_e-c@Tn>xF9(X*goAx;2SD=6i!8V+vO*$YcbDi= zppjT>Ht6LZ)UE9M#ht?aitYvId+c-r)_0Hg@JG;3s|3MJCegN-11Bm8I{I3-wb@_B z47_6&l8)?xe$})f8NOHmpsQ$5;VH>YrRL3+UW333kFzZiXV1VNy9ouVsB6BrQ%rcO ze9lE`ahWg60WH#-%_(u4HWSQ`#S4(ZMM%41&4k+;Wu@#XVFz!A z_U8AZK0WlB+3m0OI~2Yj8ZmUMD#~-Xc53f?|DZQ@k{2-W<*}-F)D((4U|SU(pH9M9 zt`-?280hWS$|lchUC?;POY$?Hu;~cLBL;`R#sLuJR^(hCS@P-Y!8hy-QXG=rR;A2* z)$r%6R9P^h{s>k4EB8jX;Z*PRS2zd+ZG|o$Q9x=In*pNl-3=^F$6$f;=kD~UkikW< zKd69u9Vhqnlyw(SpMnlA6{Ttb7CyqRbil_s#?1W?U2-bQoWT9qSoQ}bjEJ?%V?1qd zpmo{uOG&WCwcWf356Bkg-DazyZr`W7Jq1rQF%tKVYFhvpY#_SF1H9_xzHDfOfZ+iM@PV!y@n(-_3VVASgs`cl5VGkx|}PR21P zTzDbM+H@}B&YLa467&I5>Qz!U-n1^UI3%ScioxOG$Kt&lgE`gDCdAGNOfw{vauKj$ z6us%_=rPH{g8aPm(c2zjItLtMH5y#rpTX1>7h*4=A29dEI{sXd{_>o20^ z$oGh^ny4#8EG;;^w`?`wc;zj+#Gza3qvg+zS<`K{SoZl#0ow@AuTreWY?^ia$9u#j zUe(>zPA|SCS^X?^TSbeJ#v;iFKj4vD&0Au{FN8dc*3G&@>EM{J!OANS)gT^RyRZo?M>4mU;T6KEVzLx5g5F8gkNOH%zCSs454~pf zIG>vsCM5arjk+?e_0*~tD%S`7C$3^%F;gkLp_>-w@#M>t3x$2kypuUHm*cj*rzA53 zm}3sy*2vlNP}sh_1syU3=LsK+g8Q0}%9|}n5kClJ1y>n|bg(M?fVfnhAlavwQ!Wr1 zf+kz~?QjAD=N&JYTxyj`Q3(+7-@XqaZ%iN_BPI`!oH8z+eNf&LGMp>Z?^b|vshwkPbb_mxx8s`Kgwozd~e9Zv3;Y5FVuv=A{50NL! z?LsHo)5k8yiayMnQny@jQH3{D#bUli(~0l;2~S=z@JZc};QSPH=62}ICl@q4 z%HG7QXt)NZ1#LdZ7LLUe1v|W4&_xcDJnrkY=qgTU7@k9w3vFL%GQ2|L@8}IE8JFvp za=&u=6k?5UV3((Of^Xl_Jkh-G?ky3FU}af^r{D?Pmh>_2{Y^UnZoBJmm7T5yf?-fE8~|oU5%khx;h~MemOcas}D5<(>lPzcDPn zXv~4w+W*4|iDW|kX+rk5PDtFUG@1k`pouHOeOq?mk1n2lB}(@?lTPy(smVO=eJ{@ z6H-x~RNJKYSK9pt6v@xc4R)x87DP60J~HVj0}SyNhq0T-k@kl5CTpt*dtEp zeH|ZsvsGcI{QAb%MHc31?*U+QsTpa3g3ni*^B}hNfe@#Y1qu84y6^&!^&f=op14eA zn4G7C@*f4T|XeW~w zDW%{i+4;?nIBg}nW)OV5$=8^pNxZdHLcJ*I5BI$rpC%DkmSXS6>3kSr`7|3*elkBw z7I%dg1pYTX_`u-MgLf61!tD# zj7{C{>m9}X`!=qac22i^N5u4q2cPe9$H{;H^}o;B|JZ1$(xm2Yisf)t?_R#WRBZCD zv|m#4=I9yAgDo6_hpLZ6DO+fkVyb>A%DBetiQoUVxF5M!8TK%7i?W}qxrf!cEZD2| ztZxiZgkRGd(Yzhk>OJ!JW=IRI?FVFCy+RDAy}_=OYF_!?UzsMwxTL>xlmJhsUHvrl zYGiq%Zd80YnzYRqg7g-|U;O}!N#;J%Gf{r8{$`_%nE z7&i0nVk{Je?1jUIexkh}95hXnW)C#ZeF|UDDv0TUO77jw_38g&m`nmR_)6k-a0;!b z{4@F1hG?7<^J~7o1Idy$jjJ8*qs#RW@Baye8TO;+pN8=1E0$}qlJ5k8{ry>lzWGyg z_cT;uO9puwbT1w1{3j%*hJk+FYDT$Rg-_)fY4+wVg2c*7pYeb$xjUEdc?xI<5r*5A6N8gdUT;jAthE*~bf1tFi zX)ZW$?aUIPLkTqR`v3=`g5rO84H+~7Y}ZB7fZXE;gj=)!;rIp-;SF`t*-03S?ido= zXjy?3N>Mg@LF3t7G{NoHL;TCH;2%rakg9~eD=?f-7(m=d?kr{lFYir2CNKPQ8q8|Z z?E};Wl}f2Oq&MQCEv8-Sm!J8|A(lV+0ZINs?(p$Oey`f}d(w$aLjL|-rv7q>o@_rL z2+Qph;wJW$?*f(WJyyBu*B|=JLGG^d()=3N)c$=j{(Uk2Ju`kgegAf5*mxx%sVOL! z1J`(%yUO*iN5@+tlOA3?ClehXeV?2<$&{68k7O&5xN)n*C%ZiKXxfX5FD&QIG$Yi^ z4rH<9+A3j$jGDF0#Wz9gj87?lfV=;dvHR04<{F~;;3LWB80J*5- zQXSBnA5TM`5jMTR^ zkw_27(99TO#m$bFmo7E#db5i$fF2=wP@|px(`;MSOc(p_0QVqATE>D{@!xpv(*J?g zpQQ>+%A#xnaZc|3B?Q+jsT>Y4V?}=(PXrK#T@d;9newfj-|H-DFy8 zNqf7B*c8TeKtJgA2|b+f z8`O;s_Fy0neP;mb>)SjyKknS|C$z&bfG8l|oVE|9=3j+#+g0t3z;^Xq&DLtAqeI|_ z0iK}#6Hj1@8v9#CtjYTNuwH%=ji|;|S>Pa^JIZz7@I81nYX@^2seQt!7Q=(o{&fFE zwA~}oR#{@+BMm)I#x@EfSi^Og$v%lF*MvNk`x$Wu>X<>s-8LIHSAIJZGP^Qd0|o^J>sgLAVmhBP@kQ{ zaJ@$^p(wN(cmJ8JMXu#Dp;g_9Bqr!SFS&&(cD&{j3iAgxSgpbPv4fskGpn%%w2ji) z=nRVvh}g7V)YL4T7j>rGUoq5ei#j#)z|!e~;T#`JO=I^^IdZUl`b1Xl7?h6m(W9({ zqu_<%^)F}7Uo{aGeR167TjLvgcCQrZXK;O){!ED>MJYzqG`+`LucKML;9a~e10@LdRQ#DWAj zjUGUlb=Sm_)1Mp8At|z;hHiIX{SS!$XEcFpS3l$Gwux`y_mXjOEnL(qYsQYVv%k?~ zz~q17S(Y;D(L1Nj!I;G;{Ul->1AelDzm);ez5QsX=va7<$di+2rb@l8mgK9p1bufb ze7@i{h94y_MB@k>Ms{@P#(Mz^gfL%;ITgrCPRs&}LhiS4$fh16MHi2WicjgjXYX*C zgG_}lJTjE#=s)R)=g*E=4yyO&LVduHG&KqPHC-QKoP6n+_P2OQk6fyKXLyXT;BtC3 zjv&*J#4v0>{gSti=j=l{otyRFb(Wp?PWrF5v?&P3g)_S@?GxivCGZDNy;YENf|}R+ z+KXz2&NGugDQ2@TRdXzwy0M=8Z^(7Cd@U@qhE-mCaN-)R2!bAJ$`gXR-%-OTp#wof zc;VOWPHe_cM9=TL0OCIZTw{^6d9pgha-oX91Et{DSH|?#A>+clt4M*)5zD1v)#h!v zFY1Tr_O#{{;>w4{F2vQG93nKmiwSvm{XDxc)E4;Vsfw5!T0zJD~7xFZQwJhTAsMSEC6nl41 zGVp5P=R+t;$6{=G9C8!m4s=O;I<@!i`8|Pju`0{={@jC_A-as@GaMP+_*He7Gt=o=5IGCDW%d~!}%;JSH-E(yZrQUPM)WOS)PNJjeRx684|o*jt@7I5zKKaU>bY&59i#B^d z|AH!FOt7MbcopwzYPXvlB{^Md4*N0`_XDEbu^Jze5AWT5m&>dU>U%sh@1I0z``TOE zl|0H5j#RGK+~&4DirYBT_-+yDv0!rI^<*V~_k}LO1IB|If?0X)6y zp1*+fRbMciq)B|YG3oWY#a;8Kt_}_&A3EQ*Hb4B}!Bstw>l(MH`dw;pSnqQ$oPB6u z$)nS3SjX9g|5S{X2EWD9r@e9`#Wl!ar&BLU_uHKns#&>dJ8Z@Hdc-6wuuv+5wyzl9 zmm@1Pa#1{w+yBcIt@!;UT-n9&&v3x zD`GekQg%ayU(YVf7Fh~#M#LNlDvNB2Zi-7o4``tmUWqLO2ZQ8w0boE^V}RT}4*X+= zhH01H3emn^{1bqUq$mSPA44sIxG(P~g01>$0LTyb{yI~|o&sZP6&RTSlvTLjK7edq z{w-(vkM09nO63DN*%{mC+m>99TgKh!;YJW}QX}L@TK=jVP09&*A2RL7liSW{XP>VX z4M;1=K-UcDnEYcw1+tuz*{7=04w15?*W0)py+UpF^ar84C4W-p@FS!AXSapZGp z8X1XG;{Fb+$&Xqxpz2Mi(cI^ir|I8Tev}$)`LgN2wUI(7#xbO$hC%{mb1XEGD^$&< z^F(e4?Un#FC8f)Y0Q!D|5`&ZEex_mEx$Nav??g9EmOez)-s*aLiDsL?BGI|2!Zkm} zO~T-(8>~5N{f_6A$J16$s2yvh4Kx?&Q9LS{%C!Ms{~u@I7rACHu@H5G?1*lPpIjre z$kW2^Lx^jG<20?}VDxe?cGJGU!0_oG;FZ7o%-??gulF~8K#qV$N#^(h*ieE8?0%Ki zy2HeVW!YEG+3zIk5PNy07e*C>AOr)Kak;hqYSb)G3u5-1-ia~jIi*j>;kVnPM^l!| z`@x3wU;fTtkKs3u5kGqW8z9nZ|F)kH&@rdyMbDdunLm+v-kcMl`{TI&~Rep(IK_jkpfUvy=VAHZbi18}Mf0iy^yLRnYMerXyrfcLtvdQcQ}ECj)ME{GO)Ir|4_AZ>|3n1PS=?Wcv`CruNzL8OWa1wSkpOMARE zHIH7EgCi5qOM_C3gsmR3clb%nH@#>76W8MJeFpqG^%HjHW+!5qiUoXa9txl-v3is3 zyW39GBU{tx!URfqq9_$~!VSMq)C&NedMIETn&SYR&X2tYh|cYkyV&o$--}6mx4cij zukzmv{Db`EuPf-EZhuMXh(o`(d(EVm`kQz$3fRmr0ia({9Mop~>Q`K!4}MyUItotO6pc|Rl8JE-W8jm_K})m5B|xcOlx zZYtAg)q@|97LDa%urRh%`W}K@fuQAVYH%KKAKLul#L{gI}E};7k zCV+O{Z=RS^{CvBj3%PEjxrwPd`S&%dzX@#jpvHed*mUuY9wa%FOm zCEni$Ry@&`xkirUbMF{EIh>9-#GbTnYObl7PwdI%ob;r`IqgReobCpjQjVt^t5^)7 zIMU4tvS3rkv+#T}gnlJ2Eth&53_z3wh|j^gbqCL0jFpMLynUIJS`Ev$0j3|* zDnfPMPu``sIAx58=ZRmxayuaDknCj{VCK~SH<|I?nHMJ$+;TIaNAE(Yc44j#Vh^Hj zdTbgn_BW#oI}!M(1<=Y_1ni^kU|il{rZ$H4F7u-jrg`8}V5VBW~>DwSf9lvsuJ zR#QU2+$tOTSCxSp)O?W1B-o*eyg|SWIoBRbvInc${8ub-?-yXFL(?oZ1fmv63|m3n7a0ZCSFha+H_#28J)j|)G0g@? zzEP$b{e3ezb~#fPOl-ocIBa(hV|beF9=$hdAp7sQI=*yHipXGv-jzXu4Pd|H%okGJ z#E?fPVMJRn)z2c*M>2=3bsJN=3g>`se#(Q%HJrRw* zvI)i>KqvN%0LumQS7(o(E4%n~aXfG5b^|Bl*V-bRTsf#S*SzzhIPUd(_kQ9 z){w5vWM96B!=Z5v6B%yw5?yD&k9$zvq3A?--i5*ZOMt^Vw@-Ja8O9#Q=$O9q&Z-2ExMyvEND{ zf4k6u`=B}bW0CHy!vtucRC4X^m)RkX*N@1!L|9q%?({fgzt7}&B>HaY(~G3!du4pf zZ)3(abd0Y0UQD_x+DYPTTD;b4-y)`aVQ^*boldC8k)Jt)W{E7_rq%UES z8dQR*F+Nnw=x;J{NQi**H#x##=DrzM+ZJW*`y^P}k%AXIHO2;Ol;3P(?&A&Xv20#n z(JNuK3%gU!C*D3<*Gb;J*GR9vx^E*&i#{;=#``~~kiWw_|4YwRysks@D$`9OIa(&o z?k?GG?KP2eH!^#E^p=_59qDZ&o~^(3Rm&b7y=ZAChie|@s&Ei`CKIfxeV+fTzqr1+ z*#tJioS8t(Iq`I|<<*C>sKakQJ*%+1o0l~cVIzJXqyNilyWzx3F~Zx_4!8I~502;@ z>eqi|$x+vZO2IXYb;p>s#eD>p%(s&AGrstkl7vcvLr-7UdiVUGsLhSM+L* zLrIk%OM`T}@BV-oJIhzWSL~mem9s0xXcxo}@WK^HL6c4)-k|z2ESC~j|2?E3((wE;`hw1&LKl+N8?t9Ob@u5K317c_)w*Z&m2);g_`u^5w%Js z?l-1?Kti`!a=glB=#NDV4~XisedIqR`4E{k-}8capF)VE zh{=$Eihbo2r4QTUzOocJZ8B+6$n{H?aH} zsGf&%qQps$rXjc~KKiyf!{sx2r=7@T{y_v`X|j6~7LJnIpvV&1GCm0U4)bX`zO##7 z{n-2@0(MOs zxqEP#!zyoPycrxX+l*!}o^)ns%H5dtv)|Q_jELEc{lKKD9P z?YvXkB_>!{7IR}uNtJ1P2)~p8AL{j<=YDt961Uz(*6JDcJl#o!8+*WmG2v*3_ML+j zfL6Bvw0Wn5c`8`!*XPV7{Em@oUKhX4_;hjOY$90jqlBiM8gRV2odtSy_jzo;l*u#c zG~=d6_8n^Ib8o!8l_7qQAHj90%2oksxiJ1qI@^SVHsaVOrI;4Y4|G>3yf>A6%@L}H z_EQMs|Zy! zAD^(pwPG`{s}a6aOGXl-@Y`#W;+xRJrWhrOF;m|3#)?;u?-f(z3FEbjQCYAj6f-;b z398H0gZ;OVk_viM>5?B1gK1^fcwsypMh6;IJB0|+N3^fXINQ7V1Rv5jO9TJ?rH`Xw zyw}Dw^f8M0Vd5o#A>_6poIltt&W|bx3B(^0+-~7$6yVWK%dOrr5t}V_$3!Q|QD
KSa5N>(veU6#5%A(6KL47 z2YbaQ`E|*64x1d`nCP~eb1$O|6PgD{y)o+#iUU7Je>#bId52U^*k5fWTJNw&YP0aX zgVQC}rKi+I-0OLL>YO?A9s316C@J~^!91S5bh2fka6fOr^TfkV+stL3SJUW~!go>d z6Y*X?m(N}|*F5&_WVtr)O?`E@gIx2pGeDqGIcbY1lw5)LqMB!hYvvl=AB(oUB9IIC zJiQ6#0p{rG{h*zQ{n+6k^7YkPoZt_V(s$?pGaUC}$rNQctP-_0iLhMcgS@t^>-{@(?4(HZ5nHe<7NXJAWfNXsq zDVkPd3IZ`bb4gFy^o`k_)F2pz1#PBRocTVSlD6~bNT$jKjGAwt_%9>OVUySFwAf5i zt7Js9*zr9xG3O>u(dJy&%oFg~h7HsOd<4HpQs;xT7si&-VhOPF2UZ7}O;)o4F#DwI zw0$s^?~S};0yFy}Hcy{?Uvl-~9|lL+qu(8J?v*e&haDv>7u>5-#joKsKiZoMbj#Yg z?G@4?XF9y<^ANd|%lUARLW67gYV95SDWOx>4fRJBK3s6;d}s=@>{RFa>iQp3_b*eJ zi}2zL`NmS2`4^vbD6~e>&MbU=`?d zti99@m#p6I4~P@>Q141I19~QoF*vpg2!P)WP1yu zp)!-{apYDSLkdWPRC$Pp50_cLF*s*8Te}aZPu0s((awJU*A@LA-@;yE3!ag=aV$0- zUaV!P!5Sz#4nd}} zhBJlznq>cO6oG8%VKRp@It`_OpDCkdzcPiJRhGSx?TUGpvJv2~bL=6rt3O(SVnN8B zf<-u5`Q#Yb^BRactR|lU|9H{b3^Ose9X2UV4m1 z;K(V?K|Oc^DWrb)aEev5KK65E#4OfF+=XGS>1Q8VT04!Y8znZuhq$G6ZONpE#nTxM zzZ;7p2#7xW`1s-Z(Krw%79vIa zGMbv`4>u#}5;z+Z6Y8R&1NY&_dmM8z%5TP=$`y8V7nnJlf53jz(xea6u`C*z^kL2v z=jTVXh(``;rmX6zG7IAsFfGU+#QMz&Ik1FeQ6hbBkWIV-r&?-FHsD&<=#y zdMbt(c}X2e_LOG9VuhB8?kn;;y2Q`cg)(*c=(TiI(rx&b9iw!E@WIRHRhP8(Xz0MM zLj(ftadv3Cwcs17efes>!jd0&&J|twpjE@jU2_C<)={RMrM^eqn)D$(KpDD9en~Cg z^X$9M$OT15O@xeP*%t+d2*ZW9Z$}HMY4Uy7#rwAR=G6`>b^;7k#LPGVX z%rwyZ#KY2Bq+IBN4IjD0Ag3e>9=C7qM^?4n(CMi;0<|^J@t}Rq7@+wLjKJ%u3Y5QwCgWVWC-Zv`2 ze&2_YFT%xn(Urn0DPKf$58+ckQ*0SFd@hn+gU%uHH7oAf17n1Qu8t>hnak~C6~2pl-le0s!_v6+xDkx04X!>6hF zVyyN?v3Q*B>EgA!LbGfI2QMeRTG4eG-%Fz>B}_I(aMl@m7k(nrE`8QHB&gbUjGp;B zr1}nYQWbW@E7^cROJ!u4=6!qBOF!%}vY`H=<=ZNoqm8!>G(%P?>Xum_8iQxco=|TE z7QS+p?1Dddggz(TSJgw!#m<<4bc5>K0=BDqFZ4<)un9G9p;hLhH;q%EKEMV^XR1V2 zk82rrAa{#JpgZpy={lS++z=MyJbga1~}hdcaw< z!_8h7UD-CSDz?czr4i?wOWKd@lC76=ceg%!DJZ+B>q4|g1A>S0%q?-~UerzL7S03T zruP`@2?UMsr*truI@!&W%^;_S$?7Aus2J1-!n+>YX>V1TMoQr#c?b@B&^KhusyE zKAK3pyzgYv$APAijMI;n*c|$$CdOIpKzAA{_DO+fqK?w_IfP13$;PqLqjVbb(m1pK zyb2diPt8dmoi^hiCnwsX$xC+3v=(H#;uWT0o%|vp{+k>b)b$j@Ic}T)q6jW+w|kw^ zxKi|@WINXI0eX6C_qE#b-!K|B$CVs zmgYr8!(|TA&#+L)qxUatnuzm(x&?LCH zU;B`J-Tqf{IDYr8!VFl2OZmGu0ReM#--;cUlN5uHr;)VQT zy4}DpIM!BR1#c#2=lB**Ktrnd zNv;5ewX&6twK~2kDLiNVhSkw>06M8(y&J1H?-)!NpL!9b*uq~bx5V%we48`d=ivDH zpCT!_ZP-^>M44e7Sz974)M)u!D38yBS>yBkR+rWdMH%^TK4W)bq2uocm-A0P3794* zPmMg~gvs{mFztf8Su|zqJ{A43xVX>6A+2~ff-p3!TAud8U~j03@k@W^K7YhfL8qO$@fb`e&WZSzWSQ31N5nk(7%$9S@c2Q~56FFkXrU3STPiZd43eGi+1f{k z-(>{#X}>d2`vLKL3w8(4I(ptIMxL_a)YQL{1n@zDr?B1ar#~HcTe;0TDe4)GA8id| zkS5yO*1BaM{UAC1v-9}jd6 z+`~Emf;|Q~(Y%OwtGA5oaz4JDmUrg)8$*H#?tfZeCKt4IzMY{+ad}8(3(&~wXT7*& zLnB9bytAMgn+d^Xcv zvvrU8u{&Kyvs4~N({5_PJoTC1x6DLc!}@TQyJR!X4+Ao*`&soq{j`OahqkMkp$s(( z)$ApwS^NqdFlInvT>>}9p+NP~dzCZx>?W}-1P{R)uUjG<8g|=h(V|T*^HZ<1$~)1R z_BhG;9Jn<)9_j}Nu0$e^w(Th_QuT*#cnnNGaG<+-Dmtb?fvlPXHx%%$f^M;i{#?+u ziF{wD+NNWsz`Mp#_^ZFl2IoRtWPFCJDv=lGf}n}l0-y}33rF2ki+-R_cNlLiGQJg; z>1pk5_X<+~W2V7|WIr>4&|ixa>B@RtHK0~Jy?%!?M}u}HY_bmKW>3V6Hey5d?idh6u+!7SVe>&spGvo z`{Z=HN-^rY<;_C7<7Un&twS$Ub};l!)k zd2Sgb{V(-z;8Oq#g3?AFn_2Q}JA?Z=Cicc8+usyAiog2aJ^rbw%MqjK`w`AdYo)<& z#@}#&c*k)3PUq&~TP;PpTaUh%+8h6 z`x$?G} zdQ>sReEIL(m%3&SSZ*;5?dTpSU8xRHAhH&_t+(^JQ~2F3Yj^9L9$8W+QYryODPTrP0pNTd)tG@M;T;je<6-ZAv3-!f;i4&@^9{ zgI7cje!+atJc0W3iTwu5Dgtw(y2>MgUq@Ox5`*AC<%ZgFnfBq5s#1)YY@}T^;3(=}G6W^(>87B5uuo0th9Ut|WNY z*bRBwwf<#&pkvwOBZLSnncIju562^`e>9aKiya33QoR2=-}nDVygCR3V%K!cmjXJx z0$e8Le(1yVUgYlMEy%_)u+Kh3W&Z15mv1__pDYAT;4*`POsgxTv@B0o{fBa90gw zdAEf|B0TScW|(Jm;dt2R&-brYF<2(4C_{w=zDiI(Ywl13UXM?!xN+XNn8wez+hxP8 zzt=*V$^WvA4Bx0mjZ9 zAdnG38k^i%zsghVdClmpN z(Avek^1gXfO1%G#r3fv9VQ$TfT3`Z|n#po$Jx@wT5tQo*%x%wEr@CK+w%6LabW;AX z&-%bcg4c7T7?^5*L%%CjszM_8f#NgugUfRfgyRvU9&QNf- zskk;GSC1S!i$R8+Gj2pg4Xfq}g~;@RA3Qi{qkBHj^=y_TKB&V>HX+*PvA*oEwN|kX&f@mpgx-@~0{+MM$j+8I^h6dwEc$q8Gdfc|l;KW?Q=(DS z+~Z+K{Scq|)Hl3eP`w5va9f;pM`6s6ogu)&zAZb?X`0K&Yp3+ykBCVYeaR$Q?Z!uK z*QM89))Fk2*Yo&gxDfF`bk{K6am5>iO|-3RKnw$&`i2bBaPzYVIi>bbXZN1BC0n!G zJ>;Lm5K*~7(2Gr*MGK=}_r`t^soUbRxMZ7B7TCp3e9`fExAybfcr8o;`*=gebkZl5 zI%TOC^*o9g;uiKOX^%?{bP4lColu=(@7tl;%|rQFrSB#n&nh&JF991AnhCN`3r)-< z0ZQ_Pwx&Z`0Er#^4lV5UqbivHT41!O=w-da<#%q}Yfpvy9vXROIh z)Sn7o((V#cz3s{ckS6_TW`HNkhyAvc!gtF%&m%^%#mb69AYdc`M)cgTvL<`xN#&SN zaQSYdXRd3`-RAGjq?u&mKL}5OrrrgEJiYiAgsMlK+GJGrem;=)@WioXAx3{l_w*Zg zbk57Xua4$HBkYQPmyD9mT$7gbtYVePgANCiQ1+lApZ*v)%luk&WmWM{gD27*Lf^`e zvrN+`y3s5h=X5w)#(g%^zOxaFTw2a*_H`=dCFljb`JpF195Z=iY&&ySBW6QXWYSbq z>d2L`Fo&iN^+RDNVy~YO#?moMA)fzYG`aXwf2yOG!&d1b093mY<H1JnfjmQj5S8RkuUiV}nk-G>dU*C)+E()PX71|{ zrUa#z?@K#}6q-pheDs!$O}TLzb@OK;^J58R4WC`>ge$7KOL*dYEd3XQ8HwB(dDnIz zWQ`O+X6$TiIh1C@N57)9$~Nt}*n=M*KDZla*EkvYuN`#wi0?kW$KR)0WUyiRsd92> zSGw?pnTDuaX6^U^rShD0OWm2q`u2`qvXhgQH z7lM9uEW1W`-?7uBI%=T-@fcQjuGCjozGim_Iob5xV#QvY-O1{a$t1Ct&_oJKe7N3a>*wU z9ihw;4|Q?t2C2Y2O~bpnKY?8<>qX}sv4iHLl4FZ%!z;>krb9^dLFoYYUcpb@0xQ1_ zOC%r><6cg80VHBsp86&hN++TTloESd!~1i^`6dq?KsTS16l8dGV#J^+V0xT&B=`ot zP`OgX*~MDyp||zTv#E3{Hdd0L;Se@pre+Vf!TXiK4F=t>5bko<+tZ&09lo*XeiEj~ zqIdZkj1rg|m5$ebQ+txdKrqQ9&k9G}%A8P4;jMdF@>(=L;2UQU zG#g4S-b+Vdm@g6^5e7i;pOv;tSDqnuxGzoD^tnya9S33LE*-U=EbW9I&@0uH#&z5b zOL@>X5O)RSkNSWBA^OiH&xf$+DEKM$m*X*?m|1Z`zo4sHbff8g$E^(O2EW_(As=Lg zCHsmK482!<-qz|YJr;OYQMos8y8GoIix%eAR)|8`rPRJc)*Xf?h|bEl)8h4#2rl^i zUqn>ye8uGn#l?2MDYLJpZYg%Iscb9BKZWsT%5xBu=OM)SJQO((oI58A#?{7`A*@wrMa}JTHQ%mulBIiW z6Oy%LqLETMey8x1x@nG9X-stf2 zDzHtFt|Rb+;@}~R8}b*z zleI#}$C@U**3Q#k)$?yKv(N6Z{~~%*>?00L&i|d_`>*GaHA5?Bk{+GYjSa#qYAfB= z$~#vftW`_U2L!2+&eAy$wVnL7nC;a%ET0v) z9~XnUix*<2XU1Dsec{15oa++j6|?8xYNvDEU_os|cZ|Yc-`Aqv5pYr_M2f7KPU70; z&G5tO0g9uYr`&7HQ{~BY=8Ullv;Y*i8#P?S z0c0H5ZnsG}#h0*t3|RyL>C3hgzN7x#pDsfDX?!QO380wGTwKf)h26OL1MnT_DYQSa zO23eju)WTxL$x%;2P#Iq{yW6D{@I};UMnAJ{}e8d7M@@O_Xxi%qhMO*2;d;Y)8@-f zGOpTB03&KM~KRGu?fXk+U%Tl?sT-rtU?) zo}nHfcr7s2iP>`Tx!x)6$+gtix7pVnAg`_sg2nM2hfL6__9jSi}SGuo?EnxxAx>a z!G}}UQ&Fb_WceFN(#9)zx2Jx*#<+W=-%_Yq#{vFth?LFMWw>{0kVZ9L>J83xB@2?%_xA=G8KrF4e$|Tu1$+oNH z!O!}kvRG(GKO6qikUFFdJul4l^8C@avh14mZbnPhouHtpr(~;o9nKMHUaixj-G}QV zv;BT|VKT*OE*Oq?K2=b_v2@VD?1mLm2I{pR?=g+G3Ykj@(D)=U zpq@c5!V)`7`EiF|s!k3@YNI#bMsSlilVamL{fQHQD)Y`XuAjX-r!|Fv)YID=Iif#| z9;mdxyu^4%A#T#>tcru4&7Y*ih9vDDpu4HGYJdLSmdG)+Su*epP z+K_k>coVnvdUP-^pBMq43eUIud=F}g(N2x>!RjNto+Z|7d(eGnYB?UDH zDzIIayhnU;h5^yo%-k5_MOT~#*qS+3{YWYTPjAv7bSS!n4LY5VS+xj{JGO z*5~!>VL54IERo(8@qL#tgO`7XH(zYo?|lIWjmRqFa#|YiVOOF&R7GURx3UA3buaLO z%>{`9+(H$(ygJ~izn{gNX6BXgib!ww(@6soMS(@Z8&U?h;$A7eQXq>35M3Qu=%Lap zlt{40Rxjbsta5B0P|V|};o%rcG97H^SFiW>PmdWYHOq9qIj~;1%*hdWnd5}{Xc97! zgHbHW5Lyn>tbM0vQN*$xghL15yQT0%VK6~SKdDyLvT~e2k zh=N*PE}vwd^Uw)rT7=08eLKc&A6{OUSEdS0u}P*GcG=bePaQNfEjOIkqvp{5D|qiY zdq?{HAU~Ok(V5}o>)(hbsQn5Hb7QWM#t4r7v$4ANNCmkguSV^th>>MOhKOwOYJ7ne zB{_&SJ zjh^eWrV%-7IjYVJ)b7VLfVJ0I4Jv&=I_kjG#L3iS> zz}`#tZMG6gNH`NYZ+nGY6YupQYl9y1g6V!6g2bLDe)I;Q3E(Z|YElD^0G1)@Zl(cP z^toz(FmjUy08et)YR;5vEZVBB zyN`iwo$t+#jL(83Nezb=s@5I((`~*YxUj3>4D~DvbT5`ajP|^{tBPbi@c@;CxQy=} zp3iF+bHS+0%v_mI)T|N8bHDhG#Z3g79CwLLzMqT{HOvOl8a}YsX&lHRqwtIO*1Q}-G)wzhA0>%!Ykgrdj!EUB?P9ZEMh2a9^o8QY z+TM8n(CP7``+CB-&G=|8h9H5cIFZ89VxXa5b{jN9ZSsgC{MuP&uRha9w9|9oCy|BU0yI}hMO()rp!opRHA`VQxzTuc z$ic%4tn?wjg|3G-sM0`3s^96#a)6j-8aK zgW3S1tPouIm!qhPcb71Nuoe+6UtS%O0&{Xnt?gHW2UdN3Xw8a{&dwWwGt+=S1mo5G z!aGDH{zOHjmoDmCuwTA+F}6GQH4t413T5XFJm5C}I91!_cS%(*EQ5jkY=1ou<@6f8 zUMBtccTyGWgb)X~mp%z*8c6o?3o-!H_CHe$0c#Xs9ZL8x*AFb|ko_A)9(ovH z=cc|03>iv30OsKz4c|WjP~*Qh?D-o>*MA@}%J=RXKN`@80EjpW#BbiBPHVtIEbxX;{tJ#R9=Wb_8{APll_JG{h{Cjg_*(w2s6J{2Bh zH_rR?B0cz(rb3I{B_DK|-VejEpj=qUk8yj60=FxxjIMd3`mT9X%-usCO<@qq!>+t< z=K=$?=f>MCXA;0S|0D{?bmhqu%~pxnY}oog=a+D{0Gvv}g*vOm2X(#e(2Xq~c9VAA z57bO6L7hr^avN3u5OVp?Yjd(cK;p_B(jWrY4C_d}tP}Hy-Sb_e^1`}^&q1~v*Tq`o zHumZkst9k?pPknM7kW*W?nti#$)XA{pPG-(IV+(b)GjR-726Gw`)X$|@4~xJv$)N}uaHzd3D%l?kILKc|7kv7 z5%;_iz~!Er9lcHz;0L$f(agpu!PC!vx2LY2^-P-MR@QQ(4RFPn^%D?9$)z zbI?y_T<|%Nvp*2)fb0sb{~|hAasfD86HQ{^d#lo9Az-fARZ|v!BLhR>+OW~o!>($^TV}^z^?X+h>eKuxaw5Y?r;Lyaru4CtE`FViH0BK$P>zlvz{%@ z`DDtV*TjipS@0WkJs<|S?8s7r_^DGO`OKtRRM}fS-}iE_*0077OUebqM^Aa|hbLiQ zb+H?FPvs7>CN3QAz3?DmPt|icyXc3+$>>%NBG_S(PitRARMK|V>~Q6qMZ3lXe;m`Q%|UI$EFEa}QK?N?Zb8_SV-|qI4#+*3 zM-SfkSFliW&g+GW3v0MnR8$Ra&KOwS)Y5s7kzm7gJUk~tZ!tk;xFd;NOeBL3(_A-~sxZ>i*ze$pDPSl6CgUU2QrN8rrh!;hwN(r(}2z}2| z|6b~U)CMZPAfv81hirp{rJZ%f2zmI1vv2Dp$GmePQCQ~FihJaw;Naw(Ucl2Bf$EOj zfPml>xCF(ObeYw_C&lC`fy=j4m{^@3D3IPEz2?JizdzfWwvNwglZJ+A$)dJc^h>UK zOqeJLW|_y!vt9fXD%u4ARQo%b+5Sc%3D;eSJ(hMPIiH5?)1xYYG(gRNZ8!tv z_={!w8t8vIrk?;Zj{k2h)BoTMz+JaHqFuO}f9mSmEjntY0e) z_t0;#B0TE(V3y@mmrX_AVlx=@QD_^2WrzSFx*#Vp#9|h6Zg`Ow$H`M=kKJ^-iEh{U z3ap1zSpn;c23)|_=<@Gb(*AWnrNcZ&;yBN8-*g|F;qHfOra>B9tnW;8tDoN~J?QWUNy{vx`7`ZQlT2IlwB+`Utb1t|kcmy?!eSA-NUdPMgqt@pKDnS<+Wu)!OF{n&3Gf1?$Udz2|N- z$Co&K-0eif(?UI38S^NfV7Jz|k!iro@7CA#0IR*}C--m2%jj1B4WSyei2d$=lL~F% ze;wfeS^+j)zM6dt-)*?l-I436s5LWv1i5DS! z^#nC?B>0P{=Hb#wR+h_IUi%E^vsXW>N1ZPL5TBE`6eL}5BQrJ6(3aVwB-xBHVxo^L zgo`bnNEZ}``YvqPdeq9!Uo*UT(aAl%Q>)sXhv8UrV2)-Bc#vpu;M4_b>YuRC$ML2t z)uR!CSE8}^mBv@U!NlQ@bLW+`@E&Kvc)B!`PD%$u9DTSo#^fqMU~MuPfHq^RD(C&% zrZp~uvzz#21^vL`m}%apQJX2xy-U&0ojH+kPG>R=&0utkVWAVpx2^XwgQ`8VGdxcy0o=cG6kar^Cvs45C=RQv!;;VUhgC#2xIL}N z+R__hvfN#^(B3)RF0r6X=jjDR&(uw+Z8WM(;JG(M>#6I$iKc?SAmZSE&ldR6 zM1+r)m&I(KnXbGS>Uc!J!MLP1fGF`Gy^S=Ay<%eE|yzqD%%xaSE*0?Astf z2(SJsh`MCo?N<7hy<@$W_43c^d$BQf529-h%%CD~I*~@7J~W^SwB2p|zBc(gZi@gW ziGG^Wg=tK_%F4azbxn1@#xFaS*xD3@CQTP&I|7InUI3> zB1HS3&yZ7p`8H@6awP9G=UA_yT|M7yV(*MvSoUh+>t?vpT2z_{%i_JTPMsCS! zIlT=v2z7Xk?1vFO%PPFlRYK|AiNZ0PsLe}6&Ry?a9-sNKo9P{_w5sGw7?SLA44P=T zI_~jyxJ}RGV1EN&1n&zUKKWs*a|)O#1EVm#^Xs!=Cnlc@8YjLtPP$Av_y-anT2JI{ zKyznq4c-&EW(x@S6J})6iNh^aW3D-@xJWUQ6vKa>3gGoNP1#4_Mz7u`YQNdU(gHF^ zv(t88F@TPkoOh~ie{=#35^^u%H{=?<9igEUUn@EA?sZH@7?)SqM{lO2rN|#S4>F1! zMetQ;o{~7>u?_(jU)yslWdd;Hegd`mSP?h+_5N5*!{xzyV>FhpRJgsm*DrHX>4`@c zo{#02e+r+UeH&D%mL5aAmGo%k{FH7P`t_&{N=;B0^Wd*nGBk3$E&0~4=*FI*_S9E? z$_h_cV%GS6^CQa8wye;!Ti%tKpyBtsAv@x8(W&w~4J}S`ZhFxtAz{f!T>~Kh?qHzOb(Mcf&Q$L)#oQ1k8x4_#iUfg{uN zpd^RN9p^o1p0&?HtP36=LS0Vv))-!?+PlqrbB9>)t)c~VlS3|Z_AJu0pkjnkxnD*w zBm0Eb!(pL;y@X&E*S$`a=H;fRPL1?%{?f{n!@af>_dD{@TnVx-1PY@PG+$hVuY7_5 zYncS}a64SWXt zx-(G*7{Wrq)fAcemi1FZN{lgrn zjbX%2#fb1?RT8rFR?GVT=EsQR&Ttx zni)YP_!fKxYbGU4l>xkhdHP6v+S=-S3U$3UQ#Q2Y^)`cHVdr8DV~i9=S%(e$w0Uc7 zq9#)0HDE;Oej4v(J70G(Swq1_7waeQ;ExDwxEetuPeerQFMX9Tk8i?FKE<7<^S(wu zj@7IT-0}UsoN7YI?#|iH*Xq+WeP_RqXnazAmI#;e@Drb^Js4NbwdO1BygcQqw2e$A zg7;%t)U%%9QSWp4epK!@w7;eZ*3Yr}zE3*m2ZRQ{F}25@&MUtLvncZq+>GBYxxrPX zN7<Tvf%~}V1^bp#}1(Ym9{f4aPzM} z=5eMNzr>cpQS^%l9woA7;u$^PPmTBF2~`+&bYDc)3umiT`FRFpbynv8>DcKnqo_u^ zvpl#}Em;7%EoI2D9-Y<0;X#j!8v@~JCNk%;9-l~TZ%?$2*-w6d&f@EC%N5^8r=?^N zzDdL+emqsW4k<@=Ru{j&&XdGQ0c6rpf=N=$SBI=)52vV-E}!fGM%Pl213coJ!0htGrk046$d4&ccJnb(2e1Kehy zPk_A`7{|8ymmZ6)sGl7YdJ*MtK~H4J45JtK+qIORyou+@mA zk(~S~-Ig-=HNUf?Q=nva)I}Xd#;flE7~mp_@bA%*-XgjKa@hpW(B+dEw`v~RXCJA| zMEpKJQb`9Wdyh0AVa#WiC@?-4JKP4n?q2Z)ZElf9k?x!Sh5L18H7`d&b9W|nDUm;Q zJe=)R7Z#*6j2VV&e!BhT#nVXFgtt~NWWFbRcpkC1&GPb8z!gr7*1aU0$wKz|HcMEq z@z$jGO~#L!>nOxea>YKiCow!rl6A3_LB6!3Hw(m zca}JgWAplBpZe&H8aDaMQlm`755K-2H#KgWNzuRdZT!H&;+!Y-BU~2C6YBaVO*3>; z*ojk0RhxA0#z~PmC-&ZDLJ7|F9j1s)%b8$8F)+LS{csCSa$TkDD}2#|i32OoIn0O} zM3KW)%kf#*9q8DWP;M#xps|s(N=sSmloUtIESPMlfdz^U4X)R|1+Y6}A6JY#VWKI= z6_t^U2-3t+zoJyng_*b2Qf1vv&atf=d+wGZ`tXKb{!+-85YcfN5t-BiFra2EV)W{Ua!y@rAY)3U(y@2t7q_gQbt&}A+i!?aAjYDdKPft5dYQISx1JWe*sW)4 zL&9@jTu7WHjoK&q=;xe^muWS;_`P9ZUyV|FKCaWUvND2K0BDbH1qI4W9)50kIFD-E zdcL$HPEED8r+HIom&Ki;{Xj^bG|NpMbb+9SAH;wnc3a-BRXSUKIu5eTc+?}byRvj! zSILFI2l%B)a&Bn)gUGiVK`>z=5r9alB;8j?!;QFkaQ4APe9 zm!~l3@R_GmcKpSoME3ON=~xG}mVAE##1`^;A~viZ#frC^E^*p=xr-8BVDbugeXdB7 zV)>oolB%)!M{i^sa>7Ut8}f!Bv|jV2hrN*^je>w=3~W&8f`kGU5fQ~)%;Mo0S&57; zz|%2tT|$ZbU1(j$Xc+1RP(=AnGC{)MD|aElXZa|&LXNRb=8a*f`l%sfZf%rS4R%y;@Vryugn4t7WG;4M;asVg_Pn0u_mIL*VZQhh=mz}8 z5M1WfB(2tf@Z2og*4Kl$^!$4ZL9l8}mUrIyS}$8?L(We-?e;pF?c#;hWVeh9>ncnu zbYJc59nBr-l9TwIoi7NvC^82>Qe@Nx;9lZpl8+=1k#cqO&L*OIk5`Ek0iq-n*zZ{a z%hwesVq-#ylS4VM%39Dzd0So|(jEKfCqKBvR`Whc-JdOI_1!`hR``)CqPK!G>&%?< z<*6Qd-4!aN&ZnTFZg2%*3&2dyvjBmk^Ig`-+*?9v1StXm3pK!bb?ty2I0)%FaYerf zy!`!c%Y{A$MU~ z`KnW%P&4PeR*lC-k4pK$xqM9EgQ7opP5h;z`jgV*_woM&^8ME!pI$!{6gxTFj9_Ms zR%<_W<~s-(myLlW`;kLsev=GT@g3MAP1(HkqNy)OA)}t4A82+7_*=WLTMWl^bl#jl z^8rJIhn1Xqe;<4B4wpu(3A$~f&{S+yJ=G;S+hoh^~lw!YO=&T z&ZX?h>SE+yMwPf&+rd)SAAnbN`}!7lMU|*aVIr+Q(13d3Pk2pbW5cz9ur`{&tlauG~Y7M1Om7^Xi%|Qjk}Wz^MA51{o!q?r#%EPwFDLn^c1+;L*s%o8!brN+2flt0QsW|awZh$FROQ`4xHkV z5A;H&&cE~f@_~$opJL~#GarF=$s4)r@u(wlhSgF~2bIZ5dhHHPzwYWef*Wuf>dGyK7P`S;>NZ4@5UNE>i<85Aa&MR{4 z<1b`n6}}#H8)H8{KD$C87!$wlsd-~bQLD1+2a*+hsdvGkKcZfM#=}3rIv`B9=IWi# zj94y`7#IhYlGO1npoYc7wr&)B_hd(!LIA&}Ashc( znRmgNYz-leT|$S7zj&la!hb8O9EgO7_iNdjvTzqa9>v54BWVz$7qO@jL2TJjI(+-W85h$0Y+|Kj! zrU)`uV0OF>shu$laX681Gi|~`=Xkz)6;0yPyXe)e^CoD7Bod>``Z<=oJ}fZkx=%0(SJeig$6s1z z6rX&cZIhyycFk1i%W+)e-3Cok1Sdxjw5VJUE*U=jW_$G`dkD*(^nWgG(#z=dhIELL!mAspNRi8R}D#28k}wp%3=iTQK+fG9)7%W z*G{5j`!G?U@N>7?U4DO}#YAZSz8;rYo`#3@Ra^c^jS2m3$!!DmP6&`{N4J;v_Epmj z$YKU8>%7J5cUsLJdCNI-H7+!4lW^Zwv_%f*5j}PNLo!pd0(WbpWHDcU3lCPRLIj@>!)LZ00?I6S^Fb;o0cZFF|d_a{8VW+Y+lTq&#xkH*HS z<|rMVn0zr0_@eHlx<2?-u&ZBN=ghlA#TI$(oXg(Iv$Ao_uCYz6?(&Blo{CIc?-O*N?=j9LX%n}sLG1=ju=XMx8iBcftxy<c0&00YV_uSlC-yI(=E${n4MbOciAnRdjVRXB$(PXjLV|pq%FSWd@9kHymcM4>BKLhE>L(}G!yqog`hV=PbKGCYx809pq2!XiRc3Tq@y>~ z6Q)zl|1>gT<4o|JH-ArXeXZyS!UuQ6qB}B)>tfI*<=ouS76$Va*n~Fj`*W1tFisp3 zDeVF9(Dh=d88!?x3hXgbc_}>_YAO!0Cm8+3!TwBE#I-=&85z-QVtPrRdAn5RzVZH@@JA5_s`$`jS}M8Ao=8*!RM#!H?~p zKYzj*`{wJRYlL+Y!3b|=jT4hMQn)o~=c$oH`Z6x(TOPh?AFHAe*yCQNo|% zjf%v(ji9#AK0P&_ymVJO^A*>V9Fmp^%@M1nXRDv@Ff}~6$HuStm@PFE#3G=zXtzqJ z9)auPyf75xIn%~=hM#ZicwcLOP`bb}#!XxYN(Wc$qndXD7-pD8IB}s-1IkD$M3)WY z{UrSt-)|ZRj&lhFx{T~a`B^I3hxe^$C*~+!rSY9SQ5@}h{B-fPH;`@xED3492RVRC zg#q$gK{gV7`r8EB0uS_aTv=^d2ivnJcQ-nL?3;gKI*jj0U0km`)3S>M-E z^dnyTFE?^n>=uwn!i6+B0Un?=9o?-_45Zwe5P#-&wgElkkMQR_ivnmoDeI6!W4PnTchDIQDSmKU2!*k+ zdN^%VEf*#mG;-~7u@4`~u0IM`_N<^035F(uQ)`JHUF622-q?;F zL|5-~&>YXt$I=0DV6M2}uHbqtyItG9`MgRpzI7M_ecvl8`9sR+;t&v@7~p-?jO~xO zC&FlZ*B-%Mr&ed+i<)`9U6Pk;t`Gddn=;is<*-FRMpxNo};mOvU8xi&EpMW&J63CO9#Z_IhOVd9T^>9^UHm)W|Mbt?t0#OD0T7%upN1jMPARlyl#^R9S!X9nF8@EPx*r$_36ia*5o2KH9wU^ z^lQ#L^L8~#Rds$@@JP-Gf>=^O##A6HrEFwas;2wXrxJk={-i{uF!4bOYFe;a-b5Ch z>)sC$CKSv5P!^Md_|G9~K7+138J|(>MT$Q zx&KZOKmZ(m5gl`%K+i&cGhyu0_Nhafb~WIFzldH$g3fq>fMGz1{P#mjR_M=bSjM11rquY<~!!tR@zTx&ONAdy!3IMXl@qVDG)-n%cIt z;UFj~LZm6uqJp9zf*`#_RHO+=4Mm8G^o@v!bb=tDQ~?D=0R`zL(vcP*RFU449(oZ- zC?SyIJKg8*efHV?JMaB|_x`@``~gW;R@PdXnQP23p7D%l*p~78m~4vjFQ-z!BKT4~ z!YuLq)ZFyK79v*gV3S0TdYzG4mJkH-5yMV# zCCgiEXXSjsqLHs>Ha-(hdb8gPmJ)z3m-Rg4N8-eWavCda+EAQdu(|cJ63vf=b`sw- zjcey@qBE*a5TClvZ91=RzX?t6@ zm9wEuA&SKP&d2Kjo09hJuETH#qvy{go~j&SxPLqjBjU&7gCroZ)fefUT~CZX<@F>= zNY8)dud#HUDZ7i{-E5h}=>LEgE#Z-sVv=D~api6UCBc#Orzt9LtP=?E$ZwE?ZmS{6 zoJ9kQitl;C!luidm6$Rw@kZXI7n4)5{9<{NV2;a0a-8R}U@<5529@-TdWLbuE9mv! zVLGc!b4}`_n;EB5K1(W^yi$;DLV9ezH+Q+mg!1?!-zT!pz4qu(!YZ2|&`x08z_2nJYK7Fh$*M2~t-&DB)I=^j<(k*LbosUuHlFifR`K321Ed>@7#Q<(uMUcuiW zQ_oOXeToik1=vVGV74O>nJDbhFQ~3=A=)k|O-YR+!5mLzWBPU;F^$x+kJKcKxO~<2 zP7d%p?SX9iI$#{56q&b|N=$s9SougGpM#25LMXa5YfgwH5EvhAIJ0IeZCp{vR2Lrw zT_+}Qww#(ug38MA_Z`^D58ZawAU&B%hAfv7?(0rEvC47>RZJ2&Hxra&AoB0$vILVv z=8-HO8Ccrc-gsmD%m!khy+2hki?)=774R0{^Y9f|690B{h@TQG|2L1nss?bJMDwCr zd+lJv1(4w>@y)pa11e7AYl45s{@KLsi?5{N-ij0B&)fbXORz2wUxhRorCqL_z-x->VKH^9kF-lZRJo6n&&!dmOcI zo!d0g%jx*^4KlKX3ZkiEC)J7or=AqO5cKmhYJB*Kk4CD8+T{uY3+8TVskD8Knv(8~ zZ235=`iHGjdSS78p_Mo67P*6hW38prvPRVG9q**gBmlwO)GQlW2qHwNL6^@*>yRa9|+O&ZS0QflYib_JFG%$-7Q|waL2$G}VQocDZ#Tae>7l#YdJ5~B<0D)<45;rzqtWngwS>Er}CV^RovA~7c+e@ zU+qn^%-~&;d8eEF6|}2MWH)>+1${Eq@pej~%iXEs`J19EpmNT*>uI)9*g;>B!Sl}a zBuQ*$)>}K{Lv&tTqEnYhPv69@XuF+9e5q#*m4qPnS&{ zO5I?#?=}`_aYOqjGj4u;)w%!6#pXm}s1q0Ldac#$2Ly=;mFh!-5C%THjov!?A=-Hv zv2dW)FV}HCR7CjMTfKXIp^lw;=>yH}`Qyd9L7o-8#ZlZt83Z%UAI`R{m|E{SLl%{Q zMI~|d&S{D9EhJ<-I{RXo-k`*VNrudRh0g?4d{<<_EO>HgY>lzB@`+HeO0Wip#QTk- zc*pvNyTm>D*^1~kN~x65yGS8b0xX20;Y`Fp7mX{PDUA&oK7;1>-1UucLcDgw6wTG= zIZQ3%?Er8!h<#qs^UhI@BWdFDWzVfsj&rNwGH_Ou?Xrw^KLtLe(0-?hI6X{_H3?M+ zxqj^KRT4t!+{36mpILz*VNp9n-9g=TX_KIS*lj)Jd6L4UTC=DtmKzm{!tE^>_*$}; z(%{WBQ~LDOt&DTm?+8xw(@FK9>Tv|-%{%%91Mv#HaG_SG*HFGD2%aw9aPJ_L7heH3Ec7KIr$e9S!#-EHst%ioZc1azcTD z{VhS|anNxRcXT8@R{}<5VHtAm){hOPD3oR~q8*y9AcT#)bX`t2i=K8T&z#`F>tbKSJxL=FJQp$SA) zw+E|48Hs75Aw%ewimyBrKd-!hwC$=4e-N0zYAMns(n-0AB5HfrccC0CKKo{G$F`OQ zwnr}Q)s2^&+Om^h*__0vE2egRucr9F{rlGQZ-LhP2jiC99Yu@oN2~AfH{+$ScI+k9 z({cI)i~xT)W_s`~BH6L4BsjY)`oKQ%DJrfHg>3Ry;1-s(HciopT`6bqlRELYmRwHY zfERal)&g}wDA5VNk%0k7=W+vsIXdvZ5NPp+%>(My$Vh%vm?=`Be0SImIspf6T}Mq>|$w@S~n>Q{xk)=4dMAXF8#&0Ow2O3-yR>3?ky zZW#_{#+<}&9Dtu3fj@ZspOA-w6C-3vDtX@3Xxg)8OPu9lvbwY~tHacQmLBkNy1E!J z1)0tDm8-tD0H(0p*A{t-)+`bgB%?+iDyahn3fDq+Dtk{AEJDmBls*G?hj2E+(Tl)`F7wU(08k@^K3K|rM%UE@k?Bh4&{Ta49X0Fi zxOo!ok}9~jn!}CyhW+}GOmp<@GDJ{7-ZKKD(j`1hR_(UrJiL4iAF5lO(*`3C6agGC z(_x6#1qyjz(QFR*I^DsL+9BRXoKu&G0i+A4*_%iTBRJudAqL+C+l!jNsY@mR-gi?b z{g+c{Ucg5KQBMdhnByA+a~MW(h$10D%Kw+UeV+f%ZvPpOe`AK&K_9_4#51TQP;owG zNcZ(VA_6wVh=Y8CyaXL7`RBetA`GC*On6T(lE2@fLAC8m7x=qXJ-lh%n7!;PO8^I2 z85;QpF$uEiBx_TIAHmKpE;?_CzDCWP+tJSpi_Elj4mH<`2b?J$)xm{mw{ke3VCR#2 z%9YHCl{Id+yQ&m8)QU!RdZ3N0fbMYV4YX|OQ<;9aTY6Qxj_@>?GmyWhbzH!-{XnqrEYQ3M`iE^<>x z?Q{~!bHrtZQ@7}vNYbTA-6zTHHS&we_$MCn^>#nl$VxwW7t{Ls&WBesbV{xzO84$M zE+GSwi)_O1U$K*7>|C2jR=N*+j|B96gY+J~aON9iqw8T5jy}eBtA30H-yCqizeiCt z<5GrMI9q|(r)d=Xhc*T8f|C_yzppjYDrk~we;s#?%`YI?g#TE@niIo*Kl@1dgg`9y z6uso-CnbCq`O~~tu7)g81SmU*w+J|{QnzSf(wNJ$V-;IQqDgiAuHqh}!X#>Y4{BJI zRNV;dSViFe{Un?hi+94x;2@;0-n&y@1<#lfA!58Xy> zwc=Z=iVEXQZr3{HXRFKJP-!kdUMauc8D;_un(d?$R)DPRGZlq9Pq*!gCKWY|olain zKi;?E#>jY7TR;L$&e=lDL-L?Z6gVM1e0m=)@1%D_$APyG>NBWWNI+egtzIWSC;&1c zD7N<0QI8wCu8uZaDEsPivib?Z2y;zxhnipn7vtdGakCw*izGR~wk6thbYh9I!CanaQxCIA zd(EVcT=6|jKUC@KuID6hxlihLxaNRy&j^E%ofY??{pO5Enj`~5w^4QQ;%S;F&|Jf1 zRVl}XWqvxg9@Kj8B_pHrvk!A4*CeNPpHwI(;y|U|Qib%>8jHld;ddIZzgkwRDB@fn zCTmiqNjGjg2h$Eqtc`y$PL7W$rwUWYA1XpouA<9ZA;oR4p1i1vX|rdlEC7n9`viAl zEkvuUtJA8C$Ye|)z!42~h%24H9p|Xg&EaNa_Zgy`?ynd^Oliy}z1q$i?IzNyeO zS0Bt1FFSIOm&tylUd^e0*lgE^m{m)i%PMEA2v)msj@&_w#1{1uS&0Ejm}~2ks8pAcWzG!-_Byxy*Ui-n8-;__ zE*=4$!2)ED?u2fSEe;G3N0*(CYv7(bd4LA;*sj33bjt2B3mIcV(i5AG1h?m-?@Pf;-m~vT)Qn|3=3sdEk@}W7tQe+R9o~_;)-mul?R@~|f4owIpzy|=JtBit? z7}fd}TkNtjQ>hN9^*I8V2{6O;V=3t=QJYzSIF{T@gZLVJ`u>@pKSH;uE5R-7aOxYx zESa)RK}|6J|NIcAbqzF-j#n~@Gq~eMg|=Qx7(0PvmpR+7G1dRzq<@K%x|52P+X)Dq zamE)hDG}hV3^o>K(u_7#eKrXc-&oo~(Ytrg_#kc+!qn4QB2mxpSsm*P$>JxVqV-1k zn?YX_TuA0m!_Frswchd28e~A)n#Cq9E_l{3;?vqx(`!-vi-!sLtuylH_3rC{@|xO# z3TTAaY=$##3BHHqMs)NOudq1VA7U~UOH)3ngpZyRyeC6HRGm>j#?z~K3^xkL%AfbM zNP4v&%eC@g(`M5~&PJ*AWDPl3VtO?^vnqb^8$`Ei8VxUu*a1Qj7vz2uxz(Jemcger z@mthKASrmc#5WJgK^sn9I{`ekdO%!DG4ofdB)S9pB#J}sYy#z?&1Fu&_bRBV^F-LR~95}rob7m z9N$1SHLi0D179!PfFAucQ3=&+-cy~PwLm=NWfW8cXiCRQiX(1@O3i2b3qRc0m5-Qs z87Xa2b#e?9{9Dt>z9jm8V{u%1P$&eNBUg_LOn!k&k&s zI*OkLNyrRs0=#L>$E?rl=36_Ob$rMzmxluErQRd3Rn`Kf%3UAe`u(B`It4vvL^rA> z7)p!5bqAkn;$;1?tjlPKI>;Q75t7Dz{&|e@y?giWT>Ts1RrRjFg@e6`{qI4zdjBni zYgd638D6sR)zBKZJ&0d~3lFFG%VJnYx1UO-!V;7b)Lt6n_Mpaf)lW_-jy%u&mJ~uN zWb{ZV&1IwKee$n^k1&4jqR;#r2%ShH-0t1FGLy$!U(rct@~lVa2HFvQg0u=v#-keT z7jCSAYQTfzJ^soz)EJ~P^MDm?B{jJN#LD}3YHBW0qpdq0PIAFNxHF-~)zv_oNXsrj z9pU&Zf(2c>$DeAre5OeP^aT!N1>XC97j^(l2#s%$r|8GuAWK0Q+M~_MIn;`jGBQE- z954v+n|)+2&}ZnQ?$f^mImx2ADJu9*EZNZ!!`3c%ZnRSEi2Bgg!tmEY3yqg1k{{DJ zNsr2+a4)A{o4jZkyY0lU*gou1?wXj926S-jfjAuv1OdozuV4JpMOlMKu4zE)Q3o~HId1X_2;cQg z68Gd!sZpW6%D*E>O0z4}{ED$DpC}MmK@#Gq@5|YwIUiqQ)S#i)F3*NOE_wf^|QL$OTKq7 zJM1V;$Rg%8!xwz?EAr0sHq=x>)B;j}upL?WgEFG75&+dngYLT98fD)g*61`l!T(#V z`TOTZ*b(@_5XB2gmepydi(28g7WdA=NIMDO?ozbXqbAiHSbS8{8I<=ld3&O#Zs+p6 zNOT3z(PO^OK4UUEj2cwPg3^L5Yn}lfhh98Uj-+>N`)r9FX0#m4%lT^w>Ei_$S6znM(lb_TH1X*e#d2Ea~TRv=WXSq9F3qi-OfS$Tuf zUkm&aEl?gq-TJ;GNGb)NB>`TgN+Oa*2IDX#MdSp>Tcw8N^O7K57=Q2$La~V=-C7MnZXD_V23f5{(Ik;K z^)A5pzd^8vedIyv_q*bv@9;MWA>sQ4@}auEcUk=L`r3Vi{&Ri*-y6SQ%)jryuMZqd z55CO^D;f=iT7=MO6i6ZmqZ)G@)fPknfHmxjwCuhBJJC$$rikcPqjFnm8+eqKiy6pt z6pGdFRbLnA69VEI@v>lUb2={UoT9JGMcFt9#0iSy=MzIb^Y6ew^HvEbsz_HcDmWSH zqx^A4CU|35W%<6H6FYKeKq}}Vv^CLx=hPx9@xE8% z+?O-M_6;#UIiyXvfA-j+f-mgjib@tD^+xpsf=4Pa?$gCI-?Gx=F zzWI?9>fo0}uSZ1-1BE(Ij)hrC9Ps}f&g;v(OIZ&e6``mxVWFvwtlzrTv0`F2`ns9- zWyDL9aP?gIOt~fxv?`bzVWc`>oEpIjY>uQ!8G;ry3OzL^J+hZB_OA$)1~VP5fZj1Y zXB{I5SvBUuCiQPJQX~*y>6{k>!Rk1GGzi&{ZCg{)96=Y^d*^(Fmbp!F#U-h)>x zxNcXW3EAmz<+>sJNDpwxCOVQEroHNwZ*J>7f+ zPLz%nQE@&fd&PrhdP3lXO^d$Q*>io#iL}V~Xl*P@L`_^)v z1+<^kCK#K~u?dA7CM3t0)XTmqEYtR#1#)#|83#jDl>(w2Q)m;C`yum_ zM%vLYCh}bGrd+y}kQC(2OwEa5crWYLvbgVi$@K9O$2AlV)evEYYkvHuPxR!2-WfOZ z;p>!`#;(`*$KK4C=X=bn!*lLxbACEEa-nYO`K45rAd}myn;Zw?eu1+VaXG_DV5LH5RW?7$s+2ttY97Zv=g?P7IwiQdinJB#9C)OFww zSjM~~9g_5&@>Ft*z-)bTywJVDseP@lAAj9|(6dboNqzJMU_!`)?*H}~f9Hx=`O{;b zZJvKl)`3#Tl1YeZk66<8j-vw?e&vAur&Fv&``vM7gl)k2k}tpCDHiWMWHw;xchF&w zh?QJ0Fhiaib83Y3kO&1@L)ipo2tAn_>nPp_(8feBvO@^jE35;8zL z3F_;oUX*mQ*_Yl!n&26O`C_aowzf#av>M^2&n3rHaU z=y>hMdym3K5ac9%>WbraV~M7s9C(ZpQ~$6yN?0+4yxrj zZk4=GhW5mPZ1!t>0^;rdbrsO(&Fe9ni=YYf(vS!+(zdAW<71qFdT9)9PBd7`|8bij z=m?CHYQZ(U_Lr^=o{+fS@xb(Rz0Am{rpG=GtJqdZQlMhel?yxsk;y~x_$ll?N15z9 zcV=(y4tnMTU(sBRu9VZXF#^$Sm(WqUgre|mjUf*2JwI@~-T zE%p&IrCe+597K@>sUK3fk2Wdpt=omP`_<=k=I?*z*|7iY3-Y{6yEk_z8BZh|AES)0 z7?(R_%o_5^SKM7n4W$f>a}CGFKLV+rmT{3Lsa1ATNVD0-IE(UOb+$=ZJYt}rf=>y$ z`(Pyh#2SP#a9-u~_`}+PCL=?;JQPD^k^FOkz3b}kp^9o~k4|JP4u#ctfP}zHHuXN4 z{UBNgi3Bb8j2MsZ9Mq&TO+@XsgF`8DPPk;~r5SVc@S-vaEdk%{+s=~@lytieKp=s& zN%Z(FZ1pZND9MYhAZz@5hr70mi0^_mYSM<{KLLSoei>W_nqgY>YF`Y-b5;PYJbHaI z`{TR(dJwakH2k+=7N_X{dmxLxe-E+%OTz)^!Oy;hZSCPU>Jr#d@2DlyBv|$OP3cHK zyAMbZ$SfhOE#0!rTT>Pt73o-jql3X&>&Z6YgAOeKy*Vl;`fcF}m z-j%$){uDKR1C{2RgQWLsl}SQ|OXbPF_T2G^hi!f|LV~=PPnZ7BH}fBmRKCA2hdu(R zAIbTE^{2e_4WcUL<3bh$TM!MX&3X`lUGyTunow&(OBf=w0^DudpfIo%zQ##_QSX8{ zt}ChP`YN?4qitPn51hnCt4MYRE6TRU+Rvos-zPdNX|}2L=DS&F>g~>Z=X9n}M)mR$ zp|!8_{fpbp#5V)By)}G|8MTe5;#I5QtGqVnjh{u47mO?L&y=b?^jF$5dx_5lSmxTt z55n!lj*zl;AE{si)s5IOq-pInn;Qg2i<5NH}K9w({$1aCLA z5#z4F#sr?Wt*CJJ_;n=heNS+Y5UlFQX@!aMv&ubbGMcd}6~~tImO>f)_XZf5rA43F>h5eZzO9uKlE?xlPnph2Uw<}vQxVkFz~R`2wVj5$Vbxe zlFx)h)`lnhQll|_qzvkk*o+bYT1pNF6jOQ3daI1mFTZCt(ct@dJZO&{e$19{M5cY^ z!=knutWahr8NgE2)|8Aqm*=MqKc0|3SNNc}Xcj@y%sZvEXJ+?p&{BI78iC@9#WlV* zvy2H-{@|5#`(~3>u;TM|UU$I&PG1$Fhk=9?QrnbJlye%lUuw=G+onhE)iCZTkBOmB zr55yi+HoI-9tvtwu1Yu4QF7?e6M?mX^TJ`=Q&|F8t#GqSoTwvCrrw!F4cAhoWhHQV zWnHsSpiQ8NJ>H~@_hAr!E#f8UmYaEV@O?(7(Q0Zho)T#$WpD7V`n6}qypOrdq*!ZIvoh%h>E&$k~m6p6&}d6KybAT0*e2FmD+0OBu?b z2n5h@92!hv&L{3bpNEK)IWrZ{C5g0DiMUUO@!2ROGroG8m0iFG~6?JS+6Arct8wzZ3s@xq1H#l4d?qva9lWpNy?Jn|ADe1_5CQE5Ig;Lz{@MWr*` z>D80QksU|$UR&%d-x4;6Y%Quq z@7^C1`UNCkMZ#FNFZQd@2B~B$p({Cj|OI5Vk|#^Oxp!+nrcgzD9#B zluoRl%{oFkbfjn}8mWh9hLtSOYs+BG9(cw%z61bVz9V-OmBn8L&Hf*J|E42Y zPi)H81mIQAtmdmbr+WO)ogLG*H8l6oZDHwG_IGpGP`{;qT-}PxknRGJvz_FqvvyRb zFe>Th8d(g)R26^B;{9Jy*>*KXcJ@lB5lgtZQLqaCBW_;~I{mTwrtYvI^F}7|x^pR6 z0^<;)il5xUe>^fc)oJL(Mlx3WDSP~2iu7pKwD5^4yzU^*R8N75&LCx!KMP5b_aWn8 z+&+x%6S};}3%5iv9N4PcpZWU)%P54>FC9@(Ukc}cbnT>HdWxW37B@i1PSJyc3AzOx z^r!JXv~Ku8AK1-9030^%*u9UPb6+Nm>;znk!Bj#sqgb>nmjnu9H=Og1B^wZaRPJX_Q|UF`@Otn@%*lW`MPi9 z=CW!RRsBAgnhq}z@0yt9w}_^iq#`e0;8dH~4GBB{3qq0dT@oNgo{$0L=1r{rkMX)g zS!3t+=_G)t<`<-$1n7_3)9(<_FaQ3N@&Buge`FOm(2RLWgFwG_vN$F&J3k{?Szz7+FfiTH zHsO&s-yG*kKaR$WB&pGqJ;qhwXS_BeB1?9G9JsjCtN?XxdaxWZ^>PMg8<|~MN!>|` z5b`(*)}02a@9ThKAklUX_t*gM=R|?B&_Ki-57GJ~5My{KCs%=7h;?7nJ zdAV_K{Gh|?o!a~sAK&P_&u45mpU7_iqMSDM$R|X5h8rFoPJ|(Po9`tb}8Q>n-^!w;qReLo$ zAN>%ooOE5`v)}QmNHKrp-kZJ)xRxiN=ZF1gKkZ|j!}B$}@wOpc8gJ+0jP=Ddjz45b z-WrqfD@oIlz@8vi=sS0`Q5xQ}Eq2(fPXee4${lEruf*!x6AN9a1^HRQ;!wh+I}&SN zS&Fu+qVW1;<}(`>dK9Bcg@r1$N&6d<>1a!6unF2*-+1g_B*_b56pZnrv*bT9=%?Ty zX{XGYWFc91sE`ZLT$N459+L-%j{qs&Ha11uvE-=re9$b8ZQ*v*vWQCy zAb2y*k53t&ce-p@H`9Xh<(eDu0{p`tsl9Djxy!!XHktVzLP9OAQ zz0kGz3pLn3Lec+UQ`q`-LDJ8!(eW6Ew$nCv<(FQ8B2Jm7P&Wm2B)9$H+&jS^ba_4W zdz;1%^(jhv3?Euk6nxPcfBb1ThO*mV(ESMCQYD?mwTAlK(yQ~>KEuVPA4p^(s1|ps z(#W&xd2fo&ON~2<3Tv?$VXT44ZZP)Ld_}i9F2ftgnJC6t;_+Aei??MOg*R^Y-s6u9 zm)#IJ8kth#A3`i7u-VriDHbsm3z+I)t7tOsYYad^HaAU64G`?6x5NixVk}U3Q1c^! ziKb*tv-JD0lJp|SX**?@+MX$&vOCrN;;j3=K|EHPd!oMZP<2C5V}&$>!8U$@J|Bb0 zg*!e@`7h7v+YgN9bKMgEn%l<4;xC*vai7mq0B%5Q8DI1aL$=wOZ6^s z+*v>AWENjoSB`XES8OBtlcMuHLK+8Yhl3vks`pq+PjU$EyRE}FUAO)I%YGkqW zaS)Id@sI*97Y$bFW-BF(UoKK}y5nCS(KK^r`RP6ezPCCGuL+jUgt@we z2;J*l-qY*mA315M8rB2r!<*|PW!y>iFk|`E&}4cFBf$J|W2t!y%Wf{20vDcYxSo;~ z6Am8A%zXE3w2xnIsr@dzh>`LPD9Ko|e}kOQw{6C7_&5>lULi`(EfDu_T^Z=+>*u5! zJ(Jg}V3_;@cR^{7wkwjk%F_iu*RU|D;H_HSpvzR>S#+;PUYt2#FXP)hdz_0dJP* z@bYzYGE$7LU>QR|zpgUM@knh%L`*?hpIFnnqN9awi?E)XI@jd4a*(xlVLaBt)0ejn zL447=FtmhNlg3_RUzW>Jf>xkxpxY;HX_z_igXq^hK?kF3Fi#NDWFaF*!1w6RQ|x#=|xVcfy<@Jd$g|SZCZweMLbG1Lo5M$Bj*&a$}bn#Up;61 zh%AYxgeC}4QDuOIM$~Gcd$0f~hh+0&MFB$1ErPh6PduXUZx|_N^b2BQR6(vtz!zfMbl1x!0yoU;C#33c)^8(o~ z?&A+62kw1AJHIbEgPT90LOCpBVP8b4bhml<{RVaR9pf~PYrx6=O~L2S_Vs`9d%I50 zMGhTGG6+(Mz3vuKRWX}x^TKaIXO3w3Q5iF((~rL0cPFl?clO$6E*hw%ptDGSd6PfU z;k}q>|8PI1Qxj4Hxd!2)f0OnnT*&_oI0vIYMV~~l#kVzxll*-7wN++Iw35%+u=?|j zN>8MsT;;aV(tvSM-HVzx1*p}oc;Fulgft@q`6Wp~Q#`(|*!s`Is#biJo;_ckmIT8+ zhCHzQ2`G~Fcqn|d05CM7XiX3K!4qchba3Ak$B2{@0gXjJYVRYeVxRALs79z?5EDAo z&oAYsI@E~Q{g_(f;T&2cO))4STq&@&N4N7H+A3qc5+M6NGe#m4CUoNz{b#ApFNYWt z-{I#TbDw%LDN^qK6uy)qF-IeDCa3$g#HU-!H$P5QS2HT}FM{2$0Ll>1o~lcT7l6(SA-Nx29gs_H^i9P7B_sWP%BD))}Y4IrPuYw zT?JwP)LyAE#7q=L3>AjlI0r`B4+koQMKN{_+V1O8EEFW@3mqS#R1$tD&5%*FrL_fi%nG*c3#0qf{a@(vSXsDyxs zgikLNL&^8SePCA8{|5RT?&!SDc0Y=|4@zY};funr(=I~~fax#X2H5^5d>)d`Q7y6W zP@mON^Djk~K*!?bwWA)kv^rzt&aqSM3gEoICAqkG#Zy86qrnfpV3GEV7Z>=Sd|za) z4~A`;=dokA~pmri9(qvsHNA7zVYjt2H=pv%5jb)?ks*_m?OJ8Z6QorOelS z16-R-W{;!BXfV=Y{>=sTh2#4KZhXWZ4ScGyAw^KS?*$9p#6V?fo#o2L_imyRB!NX- z-sCz@w^_|O>&4ix%wsUTCsVmjZt9FHD|{6Nr7%T#ZM?f5%@F+_w24+}Il99Qar+$> z%+IlaiYtg|dZ?Y{<4ekOx|76BV%8#4GKmfw*A)ga}gZ{3qHx zKe7^F@;*AquTY@V>SLyC;zsWdnvS4fCs~7B>Ugc=kmE;`9aVl36?|BJCUm75-#?mP z;QV!%d@l|GHY!i+mp? zp%A|__65bU9v9c$hK_E(>s=|}{5bUi5+GSSJ=i_q{$MER-C?5snSrfyxdrMO-^=XZ zD?j25rG^MJ3X@JL7j#oF6DI4~O+D6t7+&3b6n&QT#n#0aAKO?;tbGYDIt!!EV$CPQ z9vfJu>gh7bT+?v68=#)19~mxj#Nb@`x=i}?i9|N0X0vKK2J zJjuB3FjQS{W;*oN)A}jSNNu~8KE{K0@acRK?*L87qA}Ma;B+xMq)RqtptXp1C(Gb@ z%n$dT+CI0;8|;d57XBz=wRjJ!o;U7H5>j#?q~~uPD=ZrL2C-vm3g}Mnso@)LF?M|T zVy^!S^ju?CTFsRs6-kNr4lnVEY4sLs%a3dHh^7`76|3-yH|$Cg-hk{c_^Ek-#_p=U z0+F(P!pBe58tK)7Uh!E=#PCD=BiUk7RZac;e!x2zKAUL=zLH4sr=O7R|8?|?fqx&I z0Lul#QE_1`5Pd)^^yQ{^i^a_jQw@0EZfqLRybvk<-Z>`DM;$$zj6x=%nOkoT3@enQ zZ-30~75jYQ0jPU^(S%G`NLUSzeW4~ff1{4b6m`8GauGEa zo+UIWwE+wFk+hu>V{%A3gqh^i`7$9I{?!I1@Nh9An12!@1K3TS2gfMNr01DFa026w zEarLcs0lv)sU$<)2I}P=`Ius6*qigeNPqmx+V+bz@PCBb0ak=PCqLk-=^F-8Ivcos zYu@hd=uf0y7Ru81G+5ayRi}!V#9fx$*Nc6Bh*8$|c|2{~R#>g{oMnOPeS?O9^ba?s z<6~{2b7XU_X!C9kKB^M+PFG18ohKUD6Wc3zYy%`YexN&IC{oPJ-on?eoeEmP)$p>P zeVA^xkTPW-|F!F#IdSgJvcK&S+0Lt=8luR}W>6%yK(VJJ|61QhwTx*IXy3DZtE%ML z&B>SaZz)`?8&b1TcrlzpCovE=@9wqlB*5!^7>9Qdd3p$4R^x;cxsv%IVJRV4ur^nM zT5y%p8`IL{lm*c6rRRX+-G)q9S^L9vm!*V>@d@|(muK6TJRZ;;Q+UsB6b0Y>Bzx7{ zsnM&N1NS6v&xlR0_hOwV~nsiVOIdzbUN)HpO*S+DHM4u>$l;cRsnhbl7Vk17$- zZ$6J+Z@ITtB~l%gBMwP!7_?Tk-5(n`8)hGqCb;r4@T~_Do5U4r;-i#4ywG}XkBT#Q z`r3!4$vrv}E(sZvaC(wjVjWW|Z-mQ-9L?t0+ zYck?K`>>M`>7Rs3@ykWxa{MkafZJ4KZHw%w6m_gm;vpP*R&lZ_cx3bd6SZFlF$?LEPdGWhda5$m9dIj*yR0wFb}P3+UD4u^TuV-D__oe3R;is zFYOhOme|MpKx*DlPftI0QQ0BJX?yW%#%YDt5n!8Ip8knLML#1p)J9X)W~X65L5Hf8 zw@~f)wjd1@ouD6pOkb3x%%4g6Zn6NL{=KLiN@s2x%0CJL%;7v#{U>|)M~{=V;~v@Y z-5z4b1c8HsCH~t-fdY2t+GY4^q2zMHb`DVzA6_;@ESnvgBa_(#$0&07JYNREm7YrymKuUH;%0qz~0L=Z;a}! z5~yw%mki$;ehP{J-dx=PrJKKX!v4&Y`QP%MYr<^C^syp;8X@G98a_%lE2knqy_D7I z>XFwzsl6iumVxg|MleG%f%j~Mp#+&MA}kMLPx96vpFug5Bga~luc3l;H>G;J|23ES zYxC<`BLoWQY^lY_rE7MuHD=wdSKEEG-);l^QZO|NA?RJ#`=7P&2?%n5RHs!_DqGRF7*T{uKR2_S}r_bWueUO%0iJ`r`Osz$#GPT7W4vzKaC;vyC-wk z?D0sRM1+ZMQn`sA$M)u?mWtf%VH}B3Z-|I~-^A;VJ_e#Z1r#-1hztWU zt={*T7Dn0cOp{WhNUZ=r{Ng7#iDR7N@L`;E8v}}-x)7S+o&{fH>O2a2g0w{os6bG+ z;Gml7jg-9Dzg&hJZ5K5SU1d8-mBg~mpEJvUdo288*@2n6xnG8?jw~!$f-=Pe@l)eF z`xlT~AcaSmwQh%Dmdi&Dl)T|X%h;QQe0m^5e zWW=^JP3pw%i9olyfDm-ojCN7BG(I=*#PeYBxe|A`b@yd>-je*`hZ{?YY_uue1Z^GJ zAuW3|A(82i58^JeNYFKxK-`m|eozktE^TIXD`ciJPV4i45c%rk3E{+%-IwR!W|JlG zFJZXZoGgpQcp3Sp32XL3cVn+`oDVmd;EFn;=!)Ens>c#QBg6wkWwbq=fL#D$e2ljva z${64dU>cDs;JBNlZxC9r6DTaAw*}GcJpBrcWDO6-03s}Ed>#lk`JQmvWj5UC#IJcV zG%TrH?_mNZ94MKDOo5)$M@pP2%*j9!0zHs;VoY1OLxZd6JZp?bt6*m9$%j9Rl>ao{ zeQ^JXdQm?`z5UK%m^FP0Se~xC!Kob2Z61D~ln4LH$^E1EjpM%(B`3^Z!spU+yvDcL zVLe5T2%;y9;4~Ra^Fmt6(5~F1p5j(&O3!>|?Y}9NM+Mid4cDbr`#=H7p&B;t#bq4h zE}5AJ1BE9$_B*J}c^Vk*^yCbHD%J>pTAsb)#+mx_r(j_GHhMPzb++4vx1}Sc7*zxW z(K8l6LOqoXrzQ9$Q@ze$Umoc*jv}I{=c(|Wjqd{4!|9SkVpU+vu za@$tV4F1of|9r+nH$R0him3f`aQM&XOZg-hou~?m5`R9v+E1r64UzbRZxkl}pYQXh zzmFiOEfH*;`Plk;Vlxv&BQSC`9FJ}xWDvgNCb|BB(g&LEAJK?0`ZmmhGP;XXybxJvpFIag6Yl6R7) z)F?l?BX1JXRH`lk{mkwSGzWwGP8MWXMy)QQB4GGfk}ECX0cL9x-c+LpDsBPW`j_WD zkE;J(ro3+mU;=%h!Q=LoP{_Yt#^4E5{yaKQu+^9Ts2%O*@u_gz*MpsX*BA$r@y|$( zw1Cp&t!;Qy6iL$uSTGfHT{0zk3V96FI9j6W4RGk4{X?knX>jN%iZ^=Ry@sQ@w5U&X z?jOhir`(|!bqybyQ7xgk4ap%YaCQ!jk3Y8MS@?XEviaLrPT zJP(amj@V9EwfjCh{_77{?S{6U5pc%cz}s^~u})8ze`qNrt6-=>lIn zCk}}R1Y3Ggj%ODJaH16ysU7y4EfDX#zrEvcu^}`oSe@fZFaUmDMv|ZaJ8E(`{S9J| z3=m^@oFw${VV7jZQ@2%cu zR_Xn^PBfNma118H=y$ka;D`PEx1Z%;!uujrjK~< z88K3IH@AiG462+MX=Owa_+p0o&4_to>Y}(vwnn7q1bPj8+S<6iS5Sjle@=FV_1v{G zPlS;G+wyX|4~DIa_(Djk$_$mU=&U-V)_Y%1)lc6&2smGy&bdxf3L+8Gv=d1~i!#9` z!_V*EHF`!qsE+0%iRD<3)QtQ5_IU&wdv4sgQ*1iDZw<0BK`aPJAv2$}fHn-UI!H_U&Zixz<2#~`6=C`Gj`j0kfEM0)k}(kU z_K5d*uW=*URdeup{xOv=`Z%~(sQhmFMaQ(?OetWHel^J)lWO>%d#o9U#;glVJhPbW zIEg$&F~hI6O^%0@$+!raC$ikHVqP?65rjQ?S43LCub4ly%CYEtYsvrKA#TlFe5Ytv zAZ;gq{8zIim#Re!peAjGS=r_+#l;US4~T@x&0N|<_=j#MP(=K*a@oIEu_+|FG#Q#N zx_S9!q%RWNJku<&-gib%1gJ(syPriPa%S=zwupx1Cs_wr^VM`a9T!t-4U>h9N&O%8-aD?T zHESCVpn{EvfJzMt0*Zj5NGG5o0wSUmrA0-g2?z)T2nj_&kRqa>NE1Xl5$QG3QKUEN zgx*Q0ffV1HbLPx=JY~+zGtc+V`~CbQza)3EcXsyO*1FcUuC<7yu~uncLb^I;_QFZ< zPCpy@yJl~46iefiE#2NmXIwj*YAJQ1zsE?7H0JLk0=)%4B3a8cbE@5Te0WYf{FVnKO?PpjT8ozi*C#fFRNApw?@S~)*JTXk z^6foQU{ht$-B9jRNSA&ywdVx8u3dX?+tU9;nTwOnbZt}v$^@_DXU%RYkp&Dw>w1U083 zE>jG72G&l6W#NYIX?Kv1YijxV9v;?my6Mw*nQ@LCR7fNAu6{1fU(9Lp&x2cC z+D|_J@l2hR_L&@PEEGMm#z+x{1YZ~rG6Pd-m-msC%3fpRNf%8Uhc~q``hbLQFpH0rd(?UD znuOFDVS0maFvZ_sjg%E?CYrm?VxvE{T5~Jsslwa7D+xojjYIOV;&u0nwqlm=pU zeUiB$Clc}!T`;}d%?9mYsgFKHjiiYfO>q{L^xSgeJ4rpK^9 zcjJ(BtTWqrwF5o%GbXm?XPgMxiCQjp4csq^98%OT2Tx2pu@ev!NxwkMBs2*#ilB*w zkgTBxz~%!3`*R(RVvO3qfeWJPt|d(1veQ;6TBX8RZ@?!?mp26kMm*#IB@XOR_c4{% zkhL_=4Hp{lQHLkLLiCBaPi4!n?hZ0qXyBQCIfl^5djd!aD1uCxP|T*w`u!QwNP}Yt zH|Xp%700@uE;5As?m}SSpP_?dw}>reTy;VZ`mZJav;P5`rb(^q5BQK}aduVY)&P^_Z=~WO1Hc~0p{PPyZ`IssR$6zJ4$ZGnCs7xV%@Y6TY*x{ zcQ^6ZXP{w6FwZ?E;Xk_JM>iP%m<`{~2Gu_k*&?`9#Rm2)R5i%9X?EPTvANP}Dv`sr zsCK15?gg>IW8_ykc70~jRxeNziMrM)3qvXD(TBI&f=G$BNny_~5ZLcMvwuSNR`LuneJKFH+U$Ip(Q5ub zqww`d(TV@9b`hy7?YG*7Kd!&AE5fytuUHvG>9LF~s8^05rajVP+wRv#-x=!f4;wQN zr>(swU#6cjxZP7)UQ!}Z9Ti3$nN&z?*41S@1Yti9;dyvkx>exX$mP@o2(#za$!{F` zUmW{iJu^fT;S?YETs%;A^#S%-mGFDO{ZI$DxP>Ru9g$yrYJ;~gS7P&? zmmBhgatQIEijz?oQf_kb$(bhluqRJc*lxJfrxY&<7K%fV{JNx|;TQ69Np<|LJ{qSM z%^uie2OLg@%ka_lh1Sg&3)LX}id-tSSbLBd&BDmJ+gZWmYJxr6Lq|g|HOJk&=K~pC z8TW!|qt1|uhnwg~s(4!NjTYmNY1h&t((VuF##x4E!>6^>9B)_c0Ubt% zFLh(TtZ?1+c-zqHDCi~|wY$#a-f<+KeJq>^uVhjbpl}iD0#rkET9@5j@4iE;I_C%G z*ebv*AOHcI2z`P+N8%XX36!obH;;O)CQ^oL%w|8{I|D(7 z3eF+r;MlsUd7T{RCSz`g4lAX{ji0{S@~B)oGc^E0gpK7-42a1y%{D36dfgyZQ! zI$kA?#_*C{6EhOyjUIE|I~=Z85P@zOOLX~QPA;ge4_rc|&2GQKHWXUEjIC7=xm-fP z`KPbwg-97_&N@CrpA^`;tp~$i9(R#E2;w~1P|xLPMgHLzqn%FME}IGJ2e6}uB?A-o z$eMj@GGcz7G7RfZ!Utom4_Sskom>;xn{D`Mp24~AX?BXy3WMbu`y`r~6g||$HAOU; z;(R7@GFigl({xx>ZR&g07$^q^V^#)iFKMZQmnGZ1-(5F9PiM=cHQ(oTv!Q|6u(&w$ zah{qNXin0GF|7LZ9;?u)>__u9gK~oYo;T`{N3S!PDZD>fqk_(y%I@yB?97*S-EDnL zm=M}r^J&Kjp&G40Q6btB2ZJQF*dA#XnbegT-5yQ1kFR-I%h3}T<$SsclxJ9aOqZw@2x=U@mI~}I+-M?_ zl~!nl**J2vntn4ahwmSV+pi|(KbyFajF9djOm%7-5G05R>(8fMyZ}!$W%X2>GsI
=s>Usi!WU*S6kQJyz)R`)bsOy0N9ecaTxR@VI*l#|ifj0Q?bh`04;L;#T>S@= zmR0GkX_F(ZJh^KD})XRJ3$?ag(y zyoP86qkjk}0&Nw}$d_eO`-L z@@FK>A8XdE$YxNUH&V5i`g&vi#NIual0H_FDG_@;H#;}W%O-vJ!l6r#JcVJNX>4>R zosFF}Tl2F0`TS)MLE_N#>EE!205Y+!Y$wuXWS5(f8+C5%MQdP<`1=Dj3vzendRsQ` z9;lJ)$^tQJE=(osCK*4j6Kr0;C|CM#vK*SNul-%RG7NOF4G7R`5hiFp2DSf?))XQE zJX$%!^cQJOO(3nAPdNdvjNRC64f|PHGx;qw0L}A8Le|>to!J#-SJ8@5=82C}GYE5? z{cz%(cpo{4mOX!;CT@qYf{^sfW3vXmbFsU=OuOYjGA}HSzV=bI5-g zy8*%U)MWy>&RBdCbz5!-`DeM#R@k4gB|Q~4iQm7zSP-)Q{-2Wt10bZ{@6OyQ4`W)! zbd!grd~*A@(dK8gy5{e+V9}kXawd2N!-vBDYj2FACgub6`y@wTIorG(GbzV9-nNy~ znp;Xr^;-N6-_E{ygwO36Gh=5V@szIB$u}Es{q~zXwpgUYcUk8R2odIA4vWaU%0AE0 z-@#~i;w-!3P!!DG@HxpSez?zMa^*~baLVR-$biQMyIk`n)yhjN36;h)T$kcj%~k21hif=6$$K;TJ+Yq3FjQ|_ zX^@yv?YLq1O0F)FBOVYvPM+7gGn&0U!Shq)7G7n>EbP-|YX(B>uyJi=)^oyy|2|{Yvs)@yk<77 zi4n>rm_KxavzpvUe6aga$_UDDEC@by=?qPBm={J3y>G{E(Fqj72C`CWvC|({>qj5H z)=@JTJX3&tF{VscIL$1bSzD_&NQgw8CAd^xmg;%gn4f~R%f1exSLbz@`{(8R zJQV>6?7J03lE7G{wab8{S{vE*V7WGYc=u`IT)gG++c%OJo}zL;IH{aH}>e384tlm3q6E<>h=gwyFr+*X3AcL~XL;GA9n>$j6v8Tv$%$OX*EQ?}k+C>rfOC3XH z^rxwPgZ&E7Hi|HV$#=78^Jjl~K=+e+34WJI+4wMv)4emN559R^dQJH89vE)4>J(M!zYRo3_Og z!Je})#{)Nucr?el$B5;@OVvY1vOXf_^K}&y_&i-MYwZTQpS_!vP5d@1n_RoSM|BHJ zb3iywL6H}wM>H?DvC^U^t+qNQ8Gg(ivVWPOHi1%=M)qblJI9tJg~gWMS8%yN6nx2? z_1d`B-Ta=!(;5c_Q2CyxyqwWnSh(}Mj{3IK=s0wrr9l7*Vfj!B#%wBfX$nduBM52j zm$$-ZOHa}~0No2`O=J7<*^m43|LB-tx0N38-(oL(&LkiKf~JrCbJ`}`{m~A?-Wi3{ zt%lSy_w(|+G(H|nXJ=8pqBgXHgcz=^^|)FqrTSrTcS2N;asHE~a~Tu;;nIsE+Dk{J zsLTD#+msgIwvV+ky9^~$xVU8$e7yE73Gr6Of4))d`kv{=LQ4+=)7v5mZG2Sec zlP$r;*z?$7P4(l4Csoo_x132LR}IyvZt-Mf5aI%SujmX49k+=n5oKvVEVKWFN~P-U zbQ#F9;FZsttH~a9oSU`VxB>!D?21#%$ED^k{^txdQj8k zw7U$>*yK=_YKVYkR7{g}yg-;|Dr!HkrOiRJp~0dd+O5l$D@PQ2(OS=HkS zqtLkEmanSk&Pul4=)bKW_RU zfK6=HgFacHYm)H;)O!uhbX{J5_`&vcW7VbRb-Bhl1yd&^r#HcX7~~ZhmBR!gF=my_%Lr-Eh)u7ilATL*HyDgqNZKh_N2YluJGpr+bcZ>UMPx$VuO@3^@V$RDNhd zctoM7nSY|mCzj5siETQ(XCWYixAu(JgNH&_o*LW8FeTqBhZ0XP@IP$Q-Z%zfVDJ

vX^tjNw7P~ zSn7=!hS+92_@Szs5L@X(i~d4$kcqQcc|)FAVQw+rguuncXovsD;y#*uEp&C^S0#fP>7mFvBQ7%Kld|pWh8yZEd2sYpf}b zVA9Fa@W;JfJxR%8n2{bt;Tjj!yl;wPGEmk|S3{9Bj$vx;x)aLFLNnenaS2vpx5C&) z9-O;V9EmUE%xv0XX`Ou`gy}0p%-b;uIv};-Geuok%{FfMN&JnSJ4;nwYu6`U8MZXQ z>tHJ1;>L0Xpi2@c7FT&`lo}y^B_tXzs$88(!`{4qv2(7;BqR3HR`&)ZqWYO49mx-m zd#sI$bm}}MdN=s-u7;w4JIgE|*Is3w#=5PkQ(ir~hpyZ=9v;>;(M9YUcVja{g6Ik2 z)ZNX0%sw{AUc%xs+|SHKrPq2-ico%ru#{r(&Q21Pz!VB|UE@9J?XXRx-6CUBKbkW% zO|$#;smSc(AcbPSZCLz)sJseDr!V~Iz4~QTvb5bugO#DKeAGPp4XZH~Rm+8`ct`5k zT7@YF4nmby-{;c3JO&4!VU$-N%H5MO7l?VjJcjP_u2y9|Bl)^(!~CW2VlfEl7(W2l z_j6w8PUdUR6|8 zbPLeFUNI2U9&nGo<{9v|{WLqFxKkN~USPo#C*oe^qQga+JE}_$WkWvetm!{VGjHeI z&?|X0Wam3rf!k3cmvo7#F!GCu(LQpGVf-DNfkc-%ib$vR#(wDeQ+5g-<0ilYI5OnZ+{KlOyz6ayYA^+W>i+Q7Vp(AZ&#W%9e7o#v$deM=+Rus!xTVa@Be$Zr zJbCA&z?5*gRaBDwb6Vb|#!?JS+vs+u!CSwB;Sv`y1135ET$aPcSTN+rzdM#ob|pl> z3o!^hJjAXh-)qZlZXczce?SRi3`boioyCg>6d}4Lzo1@C>O8id^khBbHUh7CIUNL4 zi1?RAKyjz)h3+&XYx4P%mRF@=7J;Y2^%>rOid}4=26JV0@<&n)QrDcr^H*ED5oaRy zEgexi1d3avOmkdPW^8U|tb;=_7O~rq)wi-bxh>ohZn!$;&-37>qLLV5=gXisV{(_7 z4tko&Fg{U1QHikCGla&H)@c*ELQeJ(v!L|in3g88g2}O1%}=sh*_ZAHk(DT0?2gWh zgeIQ5Ctto=OfPq{234{_A2g>lj}FjA93mO6zHhdD&PPN?NIodt)tS`zi2TeZGvSLJ z)~Q8NlyaH~LCg-aTn|TwHV8eL_##kzfJW~X^H@b?S99Na!TeCOpdyJqAE-QBBKSIW?W)OU1 zu+WFIwvv{9{ZF-)k?6u3x8a&TGfFU>u0WI$X=(^VcPu>Po}{L>#MbDvZAS;5;zVEt zGIpz)sJApx(mJui{2gJg^us&HtdVzD?e4~{KE*$3;?aFmJ}c5k&s7Vc}gp`PLLkO>C3as)5K}5kwb0Elj@dF3O)%=&f1i6>QA_jymhFJ zPGw|Yv#Wr4g0gT|@8;WkrC7fg6U2qy)8S3ou{4Io9)49=16ke8f4((mw<3?wxtxHS z#y*?i1A7h$uQC+QyM=Y;B>&q9N8cF||4KyffA-zWT8>-ayM`60tSv0BWST4?B27oX z95EsmbC@pkBs0X7wr8a4uVqBc`T+Lijxhsc4dYiI{p}AZF!m#y;S)VM$K<6HDm@ae z-11z?moZ&Yz(qUXGpw{Z;Jby%=Uv>cQt@+nZJtkPi?;Lb9*kB!e_iL~DAT;7>ek{yN0^m{#@4n0~~^n?lP4+I#XK0v&;O17y=V@tr&#aFlA z3tU7^UXgyz@sbIW-1}7LDWd>ndnVVaBQbH%7tPmWx%(iksq2721Bblp8`$9wG=g2k zWQ`|W19T|R_yP9Mx(Bg#SaL0xV(MbYT}$TIcs@6G=;|WRgS3mPXBy8d&lbQC?c4fu z=I~iH9@fN}O2J0x{wdK+;nP{ud$`__YQqmBj-N)>oQ!G~H0daz8=l))8D1vRd4Dat z4QqDohUEo)XEXCHs?m7$6uu{PBOKVbG**)NRifYQ93a8ybLA&oZN3ogZ#4F$sJMMW6$wvnUws&32z_DQtI(?MXNw-7NG0vC;&#)E4cI04xzhQ&;uG&BTD&7;l=!wtdrz^Zv$b`&V3@T@5yQ z7F}~wHb(%{WVail{6RM%yYeh8s-1Q_a&d6)=_cmspeaibd#$rE$tMJzMx7vhSZRxc zM~-_Xw6y)iRWSPGv9b{`l{SnF8%|uJGN&q>0p(6Yr2Z48&M)VXA!A7-Ooo2iegqL1 zxdvE7=1sVlRts8nS3Y29=xZPR&YWPZHj(}c347ZNpKAlbUn{a+*?0OtIdqFjb0A)P z=u#-KY609Fe;bRX@nY({@Ui5r*D;e1uuuM6FNog!cgZ1{;y`5k$uef4mMW6-%^P8c zmhacMKYM@lw`x0DmyJL-<=`CWr8b%h+eoFQVEqXqSryB(BxW`olIN!sZEuW%BL}CU zNo%;C1{cXn)SviFkY9d&Lf#l3BvpgvXfF{>%bd|v-}bO>`pse{ZjK*PKfe`;E641$ z0!7ke=!hDoRcPq5YyI@Dk$d1R^ZVtWKb3cE4-H#;=&>ImLop-JQt%lD^&^M}H!QI^ z*FJNWzVWa-)#?Cv*nSHs%N#O2Tahno5G$YMyh-6{l(IWH6Q7!_;@_X*anvB54l>71 zW{8Q;y`Bx(@eu+fJN*kT-y^+4p z60W`vMK5aIK{e}B<6l4Xu%ZC@yz8A!6rQ^j8_^c8t* zc_WH}?NQy(a?g3_lnH`-qTv`wlPfvCwD*F>n=M}60-z`Tp#!AU&h?`MWV|(u>m-nIKy%@k1mJM)f<2$mMN;l1Djpn*-POCRerBT zKuHRp)8S`L8jSM2md_IgeR7Pq)(B+1_dNo1$KnPs`x=}iDQiKgsl!(7hB8uX*;6TL z%6tIc2>g5LACYRBB)_%n9iJg-|Nh~OJFShEWgp+ioW8RmtR1-;u=v;lhv%oh&T+rG zq>NYK9j2CUtsY@w*TO|C?bs0GsU+X6h;$c4&*8YZ3sM|f&D)Qr2CzVMwLKXjbYHF} z$Kz}jQNN6kZ=e6`baGn%W8TnDOXm~M^H9Fl7A5X}5Img|b3B|sFQDMp5BIaHD10v& zwENwMb=R*DS11YXTYBh;=ZCaiJ|Gl0Dk`sReR|)JtLRJzMzNzL_^B}Ri$o02gGzA6 z*K?3#usPa&a`7+Bh8(Ll?I|-~@hMLp*lw=4&E@Rp#TiPwffMFasC4fYEt6{Dxn@bMQyVpmYs>bO>H=)Ll`zBNAZ;IZP6WEquSlkC zO=S5B$$2P55!329s7Q;oCrt$b1Y-wPE#4C@iorXho6lxOi;pBpeTA^9QpBMlm6=h1 z049bZVB1r{$$;u@NbTSTqZDoDAO-B&566gWG_QuQ5brf$s#4W|K8-8N1N=Ql%n;bp zD?&&n2pDFPdypjl%~>LToaM)T`7vJpE9cS3@6Bl>3JqUk#!&XdN!mcaB!AUQ;nWm- zw+xOH`Exho@Q&!_g|++ZAl*q4M+$)B@1i%&kkqyX%#f@$oP42c8V~{3@5#H9b^7h? zlBOv@?nL1LY{J%Zcx^SO9l5Wl$D9_`3Z=e%zTvL|K_fyS=uWpmW+!Av(Ly_aKJ3v_ z#JcVyoEavF#&=_5ZYBM?jNsy;+9SY7+;<7XNtFh4F2-iSg}L+TAfA`#VKggx7T*pO--jhe=meEY-@!RJwHqe^1K7WN=>=cs494dv=jKtxY&FQfzSs{}=TIkH>3rK-L{=yTRC%b#5C8Pq+9KKHga*Ifrgd7x=vun|?3u6dF!B zT}m*2JnxitO|2#Or2+Cb!&tDs+#CN=rd#S8*h{`RjcapTMxGy zZOX2e(!By#jcxUKhsxWXc>NOT%^NvmLs0q|$UAKnvbkjAsXU4FyeYn|3BrK^3?t6X z=}K2F@T9K#5anuU`w1(x4=D;Mk`IGEXPk<;lKdRS4?e#^H~mtp0&9$B*P&6(KOb|SbSB3@UWy_FfDCh;LumdmRr)w8&NW~ z%{~iCutSv71l|-PtlD`);eBELfIw-%9GlGgmoSQIeDFlCAnw|buZV?dzSkMmy$@7) z=#xJ%Oo~7&6s5in2~NNqz1ZvgqIGu zCvHhJ|KM~T*a7%x0*R5@a1{vR)?WP|C<%-Bmk}Ey^9R2Sz`SiRx(cFq_kZT^??WXs zI)()g7l}&Y%|ql3{V9$qL5y0qnCzUI-EvShTN#EvKE4oj$SBHmIOFhLEb8Q~rev$N zW{oX+&K7qf++S-Kh&c~MMJ)w;cC~(mK=03#@_&edwXQG)?$;~w$hmsHU%OLsR}xS8 za1tTkTV7-+O)HCJt{L$0+Rgs+QwZuhm)M~HhQ6*+t57X zjvK&J5WKd(7pMGp+Z@;ahFa)};YL70k_pbJ0z)4gPYLF*1tT~!9snkP=K*f|Akcsk z`>zlo%{WYN63x3~HRHR2RTd0mT0OnAbJ6^?inE}nJ|Ykld63S`Pzhh%@g4w0+Yx-| z&cQ{2DcI^>*oL6`2FMt4t#1%ZzZoSjar=s!QHQAO7C3#XlPn>}b;9Mn!d`Gu)giyaEJPa1 zwyIFGhB1#!r0rgqSk)(f7+kKu6ey_Q{dl4;U5HQJ%-4GbHL! zc=y7&uyZ73{8+)q0^Gyg?JrjD6g$6QUC4cgX5WBqYc5sK9XuB?m_`X58yS0Z%UuO5 z8E<30=Ysq+MD_oI@`u}2*RPO3aoR^L(f~TAQ4GyRNVuLRxMua=+`c%DLeDW850qSH zjQdw6;2(UK?rYnB7A^T^<=UZd;Pf1<9cum{DaZXarBmUzS1NZ^-mS}2IKV)eWO(|x zDm;jmCv|qt8=kb7Mx5ci8!gB{oei1L8*e_9Qh0n;G4%pfR_}HhYmVK@Nm)#(FZd-$ zhsLAB0CYTrYxcRSFCz3{UUeg-1vju26K=&2H**h@a`!8wfJ8&aG1ml)$cOB$?70f+ zyuJ;ai#hms;mwOR5CqRq;z?w$#@JeAQI}x^0e;F6Rey2Df#8$Nb+jD19&IB};;F%E z`q;ca{&j-+N&Tmh=hh*Jde zVwXN?wNWe>65@eSr5XSpC!%7X00}Y#c!T@fYarN_iJ|S)fUQr6kK!o0Fv3%CXB`lA z7@`hcv$hdXpcHV1_eCtiMl=8!g2!)7fa3QR!mh-%tj7pGg7t-gkAMQ(-(LM_|M`ym zxDP+>!;g9K%c}5WefY6X{_cK3(S_Kx{Ibb_o#fjMCq(hbbUAIp7UY z?R;8KbFE*8(vEt{a*D+Kt>NQ3l(_j}cpJ@CZ36?2)X|p#M~Y%8iE?&fFXNc2 zo@~>iPgC9ryH_Wts%`~Lhe7!T>WPe-OOHMUt;Rh2HSOfH|~&cmqN81s+AKb>uSC7q@r`CGr(=<1vgXq`veO(hH}lp+QgIEPyXZj+Zjv zv6Qgb9YA8_5J&z3lwg_>d2Dd0(wdso; z>0$|B6DT);*+G8`Xpp5PY+(tllO$yzvN@$F-p4RA1L9Q3<&)~)n9k6svg4SKssu)I zBo+WMNy&=G0SDzD@8d24pmNJ!#~&^?*>6o3w|6{L$!2pO@ z+D}0Zm%gsu(8miDHHQtm%7&vh==*XeL+I&7ep+9Hr^h|nRmna}#1`?~JPm-B6kDA= z==<`P!A?oV!1DqT)6TKUam)uPwvo?gENI)`#a2ZI5P(wYu>7*bCA13f`k|sZsi8st z63xf4{<63*tG=%gGt1kxyzt^vwd0sGj2x34%iF>drVQAhS{8{$oj#pK{Sv1Xpg@p| znMlbv%4oW?l6HUq!g+Vkzry!2-4(5o$P(Mh(*J7V#M&)Q+ZGju&0){Ry+_rmaFrZl z_bEl&sN-F`_4SSb=lN4TUy6Tf49V|vN9(EwRFb8(17V1WK7IM(2=t)++`9JP4pLpU zWu15r9c}8X2S5T{W1iX7MU1-HmN2)* zJNW!ce8_B{5M={#&#}JH<)mMnPKSo5X(cruXpyX*lkrFB{tW^&YR)HUs9gYZs?%dk{+FL2e*;_aZ< z^B>7ycPq?qyKzH47rxku-%vR)(QfD}Iui0Em_hg;+YG zd996t-EBgC(%_YUP z)_rQ=E zI_F5zuA}1-=-6=pWnXPh^bYHEaTkB2Jt7b@SB`kX!<037j(VhP8?THzCbjmB$7%m4}`Tr?IcKMjyvh!X%o(juwmgskI%MFI}(02bI+ z2-7OyILgC`V3={j@x0(*Kbqp;h@sBqZ9)|OB2^0L@7h;C3I4lW0OL%EwgJvuB+S4{ zW9jFoA!Er?I3S;|dOu`+7_;yeoTWG3^8_5v1J30C(=}ND1OLaNKW@{HA@O78{8%M6 zSLq)s;*X8vKf6h~Z*I*n5*ZU@)1q_$M+mWmEmi(x1SHT{=D^*?j5N?Bb5$S!b*NAP z<^qnU2r74h&=3<1?Em03X$cc_7aR*f8%|XUIb?IciXbxrLn9XO4LqbsouzDaVN}02 zA?q#(z|snmwkWy`tPnFQrY0BIEiG>>eQ3sEi%CJ^nmnT5XUbF%@t#L_?qnzD5ux$o zB%L%nTb#21Oa)NYAf9v|4kZ2y*11}?LU_9>7F*H9C*yT^u%!U=z zB}&WMM8t>FtF?wjI#6v-&EtzZPGSj|JqoUYhj*K)rwhU=ORCAdyKr(xP?!q!a;vhW0& zED2;Y7;;h~x!mD%5=mP#j)mEJ1eRTcYlrIOe1-JvA9_9;FBdIny;Z@)UHG~xb;FWt z8T*(bMOdpD5DRR;H*k)x?y(qnrJXLcaaIzNru5R{FHBY|0Xmy+Z&x7kE4U5=5nU|o<8WLF)we%6Mi*o{2h|G_}UxB4q& zTqMP3*MXG)dAqPLUV$s@QwEG58g_8KSh3^kuOE(9lYv=r*_gtY8~aeeZ*b>Z`4DIR zq8sT~g+B457bV>?K$D-dQJ+0)jJvMAS(amHxNG-9vh5qTAdB!`~N zn^}S6a|Plddi?yUU8Mk>IRCEVF}DSMDnMRkA&ih6Pd&2?8b38%$_uX7@|Lqk`nCrg zW(rWCPliAfum3Fp@L%=X{uAH-jx=TceML1T*wH^HNv$)i<^fif5LkELJpl-@gnk$B z)8V5^geIzYI&S&QWkF|fq4eJaO*{Uf>{_h;kJp}?43%H;y#DSp{$BfU`BFM7zY)st zlpg$(8tX<hKPQ*Q4#?g`VwTXA$VOnxXkLMV7^T$5tN32HIY0tY3d&-lC zo2oL-R@zw(;kKTSh!O8BCm*xlKpe7{12X}Jxns>M)R0-Q{m8I%BGI#)bvYJ=$kbCst;vAHcm8*oFt|_Qn z8Je(=oMB&wvY20~YDc<>uUQi@W4#LspT&5T9X0N43cp+$ivP7T;AToZLv5KLrA(M) zJCQ^Y=&UEbFB6&WS%;6%EgmL_y>2!@=4&{^XZJb58_V8dyQOsp#RHbi9|qDyK&O_y z8~#n#ZpL9iFyy~W52E8gh;ST9{}gb&?y_vy2awNyopq+jh?~PF_S`y7yLRUK71H}Y zOp4bab$I{!9Z%U$B0gc&E`C&<6WA`Hjey5w7o!NO!3g!s5@iWgc~fy+=tXG=QC46C z2s{qN00W+b2jcS%$g-?RBduxpJWU3-Fb`j~QmhkceWLaIL5lEEhj&uyt zXq#m-KX+lu2G_t{$13sqtGITpniNu3!IwcvO!)g1+(ImQ9hAw8gQ~=rERaQj3UrJg zjiya~o&gyBXF(~$&CE-z9PLvlVA;F|$34TCS!0%(0n72z-+t_UE^d7ivoHqhHW;KV z!`Bu!=jsw}Hp7nQ1|zSluIlB3%0W;nB@TM-$B#gM+^!#^>&HC#v8?=9Ab;#9KX%67 zGbDsoP4cP0V!?nq-E@;V`q3D(nb<+QpA416?+u#4Rp$~}Ibe=7(Mn-AiA zDa7qC`vk_Ec<3vHdvI+HPL>p>=%i4tfG;XFGrn#yabF$W3%EB5S`-LuN`qRQsY1(g zq%9+9*W#ec@deTpeI&fIlFLH^o{~mHz6UgnOs_U-E7$t4r)-bNq1`STv0lf|PJV@m zPa~<;y4Q`*9^3^zBf|!n{tIlQ{|1H;-FgGj^uvG{F~0`0#}H|KF@}6Me7cn`Pv9%$ z{NsfSUsN_&jj_HU-!fYLMi?^NF!;+o11UC*7DWz!SyAp1%dY|FBs^|GR&faXiUYEVUtlZKn zrC00*iAZ;}9=ai6R(VD37Z&XA)dMvVRO&Q8iIot3xTlQUGh{VJ5B^F3w5Y zkTf5QAiwr->>X0eDPaBXM5#)a1hGA~t~_}3ys8_wNK`mFeub|T)>Dq$+l$%5r^5ahN;LSd_av_ zhIcn1R&Etf|4aZ2DoELJ&%S&OTie8Jd3=ymuhG2n^j5%#wr}TTSWaq5>eJh+e5{Cf z&(~FmcG}HOFql<-&(=42yH`{6hEJOvHCH{_dj32Uv+^zInM+V<$jgnyn>Wo8yOonH zn-g<0-ZKpcoXFMIxdGW~z<2y;C6K4ZHZ3rsJ9rN_FOzZx_gZGn+}g?%L4S4EDD&dp z_l(jD4}}Va1anKyk`1l(okQ&KrOcv==3BPU3!Z%5XbfFRbl zOt+fLK8D$j77Ph6U?0e(MpIwd*al^VG%n@eWM8xW@`C)OM$QSV6L<4(yb#7l!~xuB z0t&#nF1i4eU};}rtPx)q#zD&>^J$NQll~d{WHeAZ+|N*Hm7r6k`|>NK<3p@`qn1)u z=Cx5`Ow2M}tDa!~umZF)9CURjmOt<6+F9;mOp(vALC;h-+lNHeT4`$$Z9LDBuaL3v z6;yo}LdPc_^Pz5KTd8#gf|0_7*YHX`C7xzQ*@tWAYJBCAYCd++xreTn;}mmk@YFlh z1>yko;d0X~&k2BG@u!OMM09eYliK&KYvsB{_=0-ZSI-w2BaAKO)zX5|9m%NaB-+c$ zPkcPt1;I6?!2#O<-%t8afs@|@Q~np){sT#RFd9hGJ8S`1kUwSVks0s{lt+r3|Nl$U z`@L)={m(Eqb3hxI8IJX%@f;aUoMZ`d)YV=yx$JCkZZOy=WhiA8n=~^z_+SxJp`1!; z8s3%UZe8^G-3MBpGosK3=BvD+?-e`3%f%p5J-v*8llIHrYo16BQ-}nKV3i@Se;2y} zJDa@z$c)D;{EQXQG~U^M%ywxi4-a$%Ry=GFVkn1PvUvq&@7ch)+`V9K44x#de`JM8C#*7>Vz#Rr5&ZGa%~YHiH#L6`q^oS4r4 z0Ah5sw4r@pC`Tsup%4>5cpdOOrpwi!{;BHDuoBn%;A=GYj|gP&^c<}z>4=Iv{Wc@2 z!ch?fv`J<)!dYeL62I^9Zh-1r1T=UGZb z{-{V=URG|-0q^5UT`835eu(2el5M|hjGp79qBhm~l7i5s$MYYOs0**wJA>HhIM1?S zrd#wUAt`^shWVor@}vFVXP*CZ@A#4NVboh99!iqxnnIT%7L*dGgAwa`powxJ@b4X2L8W!5Led(@Io(v@F_y+7NL>J<+tiRoKSfp5$ ztQh}Rg?sOLZVwOZ?7X7#B`2siWfGcm}a$o1mYNRp1BOO?kq?kDU{(#_7?$A-}AUm-piWHVz1 zN{OiVd<5aeS^H?&e9+G2Hgnrr!o6wE(>yP=7?-$N5Udo?ZBMiCO&cUTD%;A($r#D5 zWge&=p+BlOr~i&Z#2!U;-z_YMFitfmVn)1~x5+kn#V+c}gRB(9ZeY5{c*1pl=>~Qn z;efjXdt{LRNK)ZS&%(t$1I7)qjFFeiZ$Eigw>v~*M=cRQuoArW~~F#-mV0q5Djw*OQuSFg>WR$I8=wVC^SpN+J+tN zBqE;e@;-TuAw*?{3m)scupERobiGqpBE@7A9qs(0#r0US<8y_yUW{VO21kuN?G<&b3m-ydBYf28uT_Y)5sdo&L=k=PSHZR9Ws6tj&?Vl@Royu&MHg%>E zWf+AO#~@kiCEKy$#)D^dvDobkgpj7rro_Uq)2C;ruor)CPW$fy;{P+BY&2eL1dWuZ zzlmxz@z;tDj4(xTwqv(hQhYsRThQEYgtX7RstlNZ2rILpfy81~1T+ny>Xs#4Dci{f z0HA={WV8a<12{qXrMc{&#HO2Qyp|1%zIb55b)&vKU(Sk_NO}g zB;rB=Flm6oj%n`S9Q-*f`41RV=KG1$NXVwtl162 zU{%dVhLgDYn~0_CJKj1bM*&OY+X7wnZX{ckw7eX%^$eH)NqVKxX60&xa69mNiJUU} z@xu#C$FboUjmM6=m5B2eB_Bz)C@d=!&~4mMF8}wf_3uBO0GT-pI<{MWT6eaj)mPZG z$YZeKr;fooDXgh#hxAiSR z%8H>|SMK2U(5#e@y23`px;Fr~3FpZkRiruJ`u_J{`RDQfbIBp%|1vq`Po6?|Ra~E% z2uVfD#r-+9qeYwAQHpy|Eovy=knv4ke_K5v-rIOF|CxZ=miwj`C8`{|(>w!-Eq1ww zc62?!?ZNbPU*{8%d?bDL^xppIC75RJCJtW2s)DmS_!pKmpx(a57G?j_j(R7Zu{wNt zu1tj|gNyvu4MlA-0LXtw7HgTj)%Y0Y+f`rlrSd>z-3-hzDK;yYw*yny`kBl^5yX zLx?TJAcETvor4`zq}){_=$rgW$h`2=C3;&}p{*^SB6A7ZU0(h9Oao+b2ff8&yeO|*kIU=w!V+ZF0QnQF`4@M&CzB{CgIQ1cp^O{WcLP&RK1`kiBC!6!ihcntOnx73sZ=>y(qY6snwc=WTxo)h zTGn5)zsip~rV2N3P&@BF8?Mq7{X;el=BOi$<4V#FN)AuVMx4q=WMu_)uJN++bepIT)yPI{|8YIUH*C*s=MT3W2cmv*B-vta#Q zoe~F|lujlX?z`=;spy^2zC>fUC^RYrL%T90@W)YKfGh29M2+**iSNW$@s$-^liVK? zFr)Rd8_U%3I(h@#2%WFMt+z{#Plx8s+a(@t%+1~RbhMcEiS;6>%b8W(&+qC}eGK~0 zqUcvvg!4pj*Se1iEZ5Ww_gGULTTZM5Mq=NrO(0y66|2{qR`E*5!=Pgi<91FRMj9yr zQrVdBlY&h#J6+vnw9VM6B$y?%qyMfUXTmNveF4};bNpKWLs4(0i2`*?4}TUEyQp-f z2Pd(Z517vLUbT$bGfC`#gqd-nl*8gwCn>nEhbUig%)0!rOhr(jD%XssW#-Sf*9{75 zd7`80s2%;?dV z=(}Z=ZDPNAo|Bu_kdH+a6_dLD*{$;%Rt5!cnwv{{tr)mYOLFMZr~NO~EIr3G7>O$_U#_M~jlSb%WeM zEFm?a8GB?oyIpGDl`c3g%VI&BU6=!7F8`wYJTt}lT1(hBOQQ2qn~oM5oW!dA+ohM| zD-9}#5(gq#*Dh|?_1@A{lq<4*`%uX)HMgKcQ?N@n3w8VFJPms1=T;ujHW=}4`%9>% z>A}==Cu;@?9vr&sCM(vte7Ub@8P6a!-}hKcs{0N=SCYtlo@*B#&%3B;-iNz;~dpCtFlDWZ(gNmaCD&r<{CiyI6y^nl)gk(fi$B zE|2+t--&-^m;cw!y~f{94&IoP#&u-h%0`uH7~8*ik#azLs0#6NlON!(%Kr&dsw}{?>sra^73YyN2r#j6 zn*ieZKa26M%{+i8xDCG2eDC#pwqr`o^OTDT!ax3O5R@rh%+4h?# z2O++M69Xz5*A?^2s|J52i>2gegA5qspO@g*kR7=}&kTy80A?>^LL03)_C7D8j1Q0} z0?+#2Jax35G9QIE3TAr^zo1ia8T86`--GL!ySw8FGCmt2AX9@9IWCx zz9b(+t0+Y!NKHcKqI!CN6FF7;EV~uR^JC7tS#M%SSIILdcU_JA&Ni}!wlnaI!c9!? zs1OV+QJ!Xu_>#lhobp1Ww9jxl)6L^RPMX7c_cxLKwJX5&TPs}0z}m5Abv$1xQ%a<` ztD>~>qGAk#VxPx8(vaidH+&nu$`j~j$PuGj|LB_GtG27GH#qWPH-Pbqt!6Oe7gm29 z)|YLit|soHlhb^v{yjkg_b|p27A6R!-S4F{dh*36E}*Nh{qZ6L)A_v?st8AeWy%(59Un>DVPY7GAA8B5t`;ipf;^L%a+6;cvYHHR4FL@wyZnoMFQ55w zmOqD^8h&>(ut@5~J-0j&-<&-^uvt-iX@xwD^UPP9$>VoqL$LS2cPd|V?A_!)q3VWI zQgv$5^Usz{j|rp%U5mC9P?-H1yET919u@>tedH`lD=i_c&kduKKBzdT;Aclfn}Dvt zpAfZw_K##sArf+|CC>_(2k=ZUm34F#fHZPF^G}(#+CSU;KmV@HufrKDsg)Za_<*9~ zaVwE0CxQx)SSZ?~gXUnUG%`99`GU{g zv}mDHf$Dt#Lz$_>)3T8E8A#y0n`G#>QkJ_aoFg)tQEC{eP3Z|Gk^^|H>!8 zA#TB?J^A#T$e{&G4w|h(fuf}Fg<|)2&dl+|K;1`lardk~j+GUOGhb|oZ#nwjN~k)g z3V!>@%ToC>YGcM2w!s*V;XiBoUKv1Bu3@Cy=;tr{RUrvB((vk9N%518oLBh@%N+N} z5`diQ9~l2MUAyMTtWZ1#P?7S8|C<9~R{S@w`#+S3fB(B@t^jXf3_o1Ev?sr%xq5~^ zE7<)iV3P`5rk#D&|Ky$ha-2F zUeqf9x&RA{X_BX2)|SvVL-3yT1xW2PMs&?4+)t2w#%9%P+^p6hi=+Au>eU!Z-? z&9B=IW@8kdk1^YWS>&hYp)c;K*5diMs3K4Rr{bByX97#tNlTdc7unO_M0qkVwoINV z%n+;l+nSd;MbS~*%_odQT*go8RxRK8B|ssI#M+7Oa?Z2dH_Fz>9rp5YnlQR3J}_8UkNc<0rw+E9y}V&UchVM=VpF zjXNF{=tso!UiL#Ywyrs_bi6^0=7ks8xqu?}+@_$)^A`!tMM%@ccK=yjY2Oa{(kuym zLE-P#dh?o+3|CnnhhJzn$0C*6O)ZtDDu?e3`Z^%H!c>Lu({7T?kv7Sw;|&Gd!!|YW zQs!Bs>UOch5x7=HhVx+Hpn_1J@j??&9}#qRfAE5uqN3s2Yq{QKP{tz80Lk(#>dr*t zmKjwXQ!S2OXdJ_cRES6yYMJs=bj{)n{vfs^TNUZVs7P<^sD10{DVvU7BRR7AH&M5T zV?opG^eq4P$#Cz;ed!fXVG_w9pEH=zPfLA0QvoqY$GslRyfCH3Lko_`f!*3Xn|E6^ zN^_^mBToQ&KB?z}F8mJ*>z&QcT?N{?PnhIWezgYUOSgvdBq8>%@2sMN_^IAXilZRV z02VVZw*3?Fu|=c#*dlh*NP3EuM8OF+_A~KOzk3K+iL6FFeSB2z@vA?qa9J-+S3d4L z<*=FIuibNHEG*A9dt%v*T% zxCX+O`O2OFfvgN;;aDt|emlvk<=EghXMBriOhf2eDStHnhr$GaNj8D-?nPmMnFa^Y z;zOjiV5zlqZYPNAtQ(ium$rrUzY~aB^=}-jY?`~Qmlz)|I9;q*9lnJLcz=LS**>s} z_snYcHd&J;SxkI|0)drQs{4TC2Tof>%Ut?M%X5iJj$@EHw;$P zaOdxys;W0TKePy6X%s_=In^>Y4lX4v$QUiqmBNed%RXEDI z=6aAk7_>(9aq~@y56WsynBqJB9%3;48j?11Eh1@b@m*DQ75U~shX=WhU16Yz2H7qV zt|${#({4O`V@KIA@WQcV?VaH>X#cx;r&|(_I3!t=joK{BVuMQPh0ON`j=Zm{Y1Fq( zz=Z7%&YPMC6SHlmq#a6rf}a4g8e^qd4WW{Pcdrv+ncd{!$s@2(dFDODEL#MS5|{V& zQ?8@thFJqZD)hn}btdb+htQ1XEhYr`nIwK5?ME0YE*d=TknUg_ICwv5{yiw7&dashK;TOpc4Q`P|Y6 z4qv32C|&(kHu)fV)noQcW;uL3M%Nu0)=WMs#5i{Jnpz{2;%ku#;7_beLpmn~3u90B zHGFw?INDR!@vx17ei_d{=_VXx_He3leZnf1oF>UrnRz4Vs}D7e#)z*?hTR3$JN-f7 zM&8vK>=T~g@_7!u$9l7&eFylZ%u@|0il;4keGmLDzi?>%no4{YXZlB4qPzRI!1b`< z65pqjvf>3Qxx1s@Iy~&rdK0Jbm<6Zoa_GPgh97pAIvY>J5+0z&PItIxOb5`b><^qP%?UOxWh;n6Id7tm=luJiMQD) z+>@pJsXC%w$XPC|b)_+zfPD%=!|r3|R`*T{sBRj)dRrAO7P~BbUD@$FfN!y)VD{um z4{W?_ZK0~7Vpfw1aj&eZZjIG9-Ewu)Gl@)>T7Mhw*Pd5>M(?!{pNL#!+Xb_HpLC8u zXe~nKT?^`;H^*|1}`i4<~;-?&o0>k_Ce^h;d^N&57X+K4*5_PAPO|A zpoZI1hDmL+LE*r93;G;)9$ zYB}B1AUs*|%J-=}J?%~<1fg*#d0zZerv+Ddzg-RZXlmh!y6C>CrOB8Z`Mbs`A?+zT zXU2-HYhO5T5${u4?*xb=t82XK1LE`Hu1>AL6e(lxS)oaCy@%n@3mSozJ-ouHZq>>4 zq{5~5Crwjn_d{%=Ru>yZiXxwkPp?Iul{kAx=Ift%jH9Gnq8W0dTk)1@1-l7T;~TCA za^O|XAUCFvilzyg!`my_!A8$>+^^>I-8khDpWN|DORvalD>QiQ>l?oq`Al1RsJdHcJmQKMP@xI3fK z8${7Tl3Vxkt!WtJRm&>10WGgkezp!T18IW~TxNFV@##hCOb-2eVAiT)%z3-?)euTe zoiHxa>tX|?RM`R+;RMN2=Kv8e%G82}c;)j*Vd%(RET}J?9=Y+GC_?;uWyF;t&W{ZZ z_zAsTcnc=Z3|8t#*)%RzfF>8za@eSe%6XrBmvuRt2khwJJaT8ylG#J3&^7ljd4Rha zTko%mXg0bpi86!+8NZoRjfho$l1@Hj#pdws{Q17TTiFwt9mHswB6oL3OR)IC+?VA~ zj4I5hf2wqi+{9tV#bxk}SV3j=q#w1P5A}7Rt=OQE=+B_JW{!qS$VYMhefvwLM1k7V zB{Ii7)9*pVAU>e}SbX&9BwHu}x&%#z@TPP+~(eoBt;F_`7fJm&c8YXW! zN`J-PV&bYuU3=JQ_SKBDFLM{kduV&G>PQLY8O)ufm?Ub*Cqt3N;F3t)kF>zHrd|<; zwa#P+SzQR1PVa;~Qf5DAMyTXYd%@u63A4J?ax>+hA21l->7YLCJ=mE~m*dS#(en%n z-Wc&pr)n26Lc;8fs}E}8=8ojkG$YNf*!orY?J6`|#5r!~5aO41l1o#HXG;zfU0Eh5 zN7tknPUYIrq{*1Pt_&(a#pUg+x=GrHf*baM2K412-5#mI>@lg%vz5*F_7Kt%kpc&6 zu4n$wx*B_DYMeq@3#YfnIhsrbhAQm}-PgdWm}c6xVKM=IbG-)cb9EVqO4Wf&?O$jZ z^|n0A7qt>IxE=jFy=c=i`~-s~=4KCSD}20-z?!H{B-bB9Q_mWrG|V&eloS7WvH6iA zLe{+G^K~`668bV-WpDq=r&A&-)r0Oee1z^YjN8kbjmqDLx)YQkD2{0HKUMLRFJq&} zUA3BayUQXY8Q!@f7JjyHPF`lX?a8x`HY!kfG7gRz!TCQZ`5jLWNs?Ci8KTIw$|oBw z3+x!(vV6VMZc1-my)nVsyxJU%(huY5GQVjHsux4eg*YluuYWp@f7ch(SHl+USGFoh z4Y>uDkQN+>5U9?%ZZ$Ms_rf`q6`f$OofE5nE@q31F1_8pvNNQ;6)-!fDi;gxN6Rg( zP({j$|M0T*6?{vOF`PxM-&iTv&3VI0d`Rg}ZtLj?0n;30hrLO=8dJO7*TD|WhegS< zA>n$cghcM5cb-@8z(t?yo6htVR(oBw+=Tg|gj$3<9EqoAia6`TJ6LigFB?W5jlDsW zN1n`o1j9(a6fjB@F+CGxSVLVm4K0e=XxB`x+^}I?`ZTwf&WAh?nRrs=&reWEenHnR>_{NddCmC;!=z{xw6*2P%~Ri_ zKvtp3GH`8fRecPda|pNn){GkO)k`a1>tBY^e7PG+8THU-$VI*2joheCaf!{uZNizq zR_-37SIA zh%EN2{Ot4jwJ|HE%a4lbdyXMp;cX@VqTC2glx7r~KV{F?8_kr&+W_ zvCY7$wDIlwABD-m`;7Kc-Pvv)9rjGBTljAb3VkJr2%G~jSE^_fXN8i#wb%ILiS&im zO3I0S_D*uKqMquvLirGH56V@PE^_Zcp#!$;S5detx{FPlDsXcZoG9V2RE8eB-CxrlVqkDowJr1Z&dW>+(Mv*rFKO&dehq zl@Cl_6m)Z~<$cfKhmJhPB07A@RKFE9ZyiBdR1-uPd=|Q@j%WN4m(VN%>)sMF2BX@J zpL5$zf4SIf1$oG`5W<8iooY76+=2OLXg4AE2S3-F%dSPOmaEpz!HKa z<;p4jhN#6h!tHWPO1E~syZ(47@?GubYxY}?qFf$FQ;WTcZ(jJ zqF)p2Jwl#sN~jXt&S(spHuSkOaVNd=Hh6z-DJ+1rEcj=feuxQ#LMjIO>n5e2_v)Y< zCb1JVyV$|FAjy}r9;|os$K?Tu_-IlGB|Ut`h+qnp2?lSv^QphRNEOUZgz#UOZH+0A zxQD?S#a(}DbhT7$c6uK$2RX3?U5BOl*#iYxbPWI)0-D_0-7pA-GEeP#aK4@PL*0fn zEz$XxjWGua97yqHR-g!ofdfrk*baaP{tE9WQ8( zTJ|nlPEFi*5sRG2vm~jii7FXm!;e?t0zFYdBP(BI5#Mmvf&a_wlF*X!n@ACErh-Z& zDEGHuH>e07`A4I3eagkbUhRp|`*T*Jzj(-!0?%B(jjJkWcjlm9%7$1>; z;WE4#G%lmk4c2eA!jJ%kAEhOs7Qhf?iu+5LB~Uk%sOZ`uJ9(C0kpt4QSoAj|T)KX` z75>r+`t6Ey{JIqiUgH00xd9To=s!Rpzn!EhmlfW@$?`CXE z`_SQgVa3+__Jl14xuRVv5-;hKm6GV1Lc5wfCf#ogooSb!ew2CQNj$;I-`iIg*YaRv z7h6L0Yjslf^~TwtM&F30ikT@0#~PZyn>-bl_3SLo=#c-_B?J9K`9A5meg43dHM);k z_65G2*^?G=Y?$Nl3(-zi_6eMNL2sd5c@kH^J;+VrcUsd_gu2Zph$4n2-;VJEs6P5< zway=(I{E36Dj)Sw%po#f6T?f~1>kp#93I>xiQW7INiqd=cWC{lY^xf zvz0Aj0c4XKw~g&$ay~B$v}N_m5eX3~d@9{-CPD!|l(K>)npunG4IP0j@_YOg1q^4q zGK^UB<@6^1BCmkWZi4>zo#R5K0hgB!jY7U3pTTQrnyDV8sg_0=%%n0PZfSK`=u41k z^)Wim?2nHh%VpK78?TxNR@x@d?rAH>YutQZB6MSy?CDeaPDH$)HEMD~f~Q?t_b1m4 zM%2i1^-2t@FU6ah(x*GAn4`})h7zxC)PY-$-m8xygZCeZk^bPkq9XT^0mtVo6&bH- z`CP&}$(&*=oo=r#Si?Y0uX4P>;qp{@Qs11`kJJb7>hcS*{Ky?O5+GRf9;fcZEc(Kg zdf4$nKT~8;CPj`^F6Mke>d{7LJ(?KoIy5uwaDo z57Xgm4GN(W5>ta~D*7_<5qI9L|M7DTKP0`GwhVR*)fZhC185L>< z2hRg<(aH}>uBle1MyS1sV$|brYqqv|>;CB^lz|~2X##}b?|sz<~eP|Y7|MQ zI^X-60*^ODJ$Ds&KdecSlv0{aRN0Fv2OSudOz6izeDrTp{TkNU=N|}Q^`bpr!R{v3 z=T3;i#a=DdFC2fVxtrGNA$~#^E~{DK1=EYunXuyF{E|(eAD88WZu+LH`%adfWGjgj zu=$@VD6}~pFgu9kJeqV=dU9UhP?E4{GZ}}oK;9Fqa!fayd1VDB$Yxc#Erk)TPjcSw zDCE@eA)Cx%J0JhpQsM;-mA|pHC;bQG&&>Riu)1fQ&H`q(H#_(Yj{cyQf-9tX=S4IM z`mRvEJB>gXzEX_g)F8>WAtlXY50q1M(PW9tqtVWx<0mwvH!%lsbxtHOQJHCgf$>9) zHiX-MO|J9F*|F)sibWVj|V)0V>mlTSxxT5Fo2K8b<$2c^QLnYDBpJz0V?*PhBA zl1vpbg%yA(a&KIgfHw*fl;c;Bb*TlQqRSoycehyQ7Kh)b-0>uu*DNm+hhp`Kb|;yl7UX0!|Qf%E5X zJ#@~Hi*=Wf-lVDi7s;ITf0N8n;kc%SNj~X^MzWFw>fCPjc5*V3%;CTIDvAO16GTx7 zxoa$1hlfeAGxQN&qQ~7WlTZ}f&L`7=7$m5xJ}uU4;OtEP?V-Vz^@-F@Eniw4c56aE zcA!W%C7O}$)n;A?y?`M7#2~**&8S$jvCKt=>b98#vL=kJcTD`oMhe0EiS~==S`bmH zuXTMPjxbeeYT547U|@IC{_w~B(b0P7ikrpU2%2;RF=b9JYa<-t_9Pl#V@G>cS=4Q@ zBn^s|WWS6&FHL2AvpYCeSi&(dgz=Cx;-~wZAufS2w{g!_&I1sI`)t%MSU`$gFw5*o zM&a(`ITKzAtstxG3@pTZ8WG)QbQymn8bY;0_g}Xp9LGMT`sfk)QKgOf=7sB;1PHr}OC6gB2>R<7S5U!nk1oLt9*x%=ivU##W;tKF^B(miEFrQ>bmloG~2i%yEy z$2n*3B>DWm&>W{XAsxFIxl^&RD%XzN8**NeI}*9Gi$t}0!sRDhy*xV^6GPlzxtoeS zk5vkIM_{;H|LBlvXTntnpKJDQ!ufW&AhOv!BJ<%!RZ8sPIB5S=o$ZHSeBoZQd??kd zxNlF%?b8$bD@%vb#r_|~UtwYg8wD2mK0DOVhHMLYx2n$t)LjHrA*bp}?E)T5|hsSANG5iY~=D$h{8V0DZWahlb`s3l!`wELqrm&*ZBUfod(=zUZ$RK#;LC zs1te(Eb>KA{MD2``_=DaFXKiiSDx{VFrw`Fm*q1(^Wd)eyI>pV2O-D$!lDjJPJ>O} z=vJJ%*ZG%IreTUb3282re!@I735eoEeMJ`$9>UksoAE=h$K30@4QoGNO*fnKdj9F0Dg6^&tP_7UMi^!emS;#_dL94Mm#i_z zL&{MgpN~w=$%VA~n%@D6j-VW6Luh8YxVhiCHqZOKO1#Cjt6$QCrVRbdAWXcc#?;;P zXf(r`C#;fm?0Tbc7Na*O+4O<-GxkrZ)GjyRItGl&ov<5w^_50KiEbw*>J2uBX}Sm6 z@C1@ya^SVk^;7Qo0cTApXlIkhf$gu0q|r^6DqW_6FvllL@lfx2 zOwe*aQ$cZQl|gng>HGEPOniM#&(_L{5N3~%g9-7pHSYO}ENQLW4{y0(mJduEKKS0m z6m>(6CH69a8U&bkXB$=G#8-sxVh%d>Iq3ynD{c;Ty^Uo48bp+u8uyXPB)y7Utax*$ zo6esyx7~pEb;yHU8~Pb3vjvd43^qfp3l-YxGPuFC^;CnTQsmliq7fH`J7z(*^72d! zD7Bfsrw?R8Bp=OGx^vYN$zVYpto+D;isikYeYE?Ft%vV7z9dAoE-!z*rR?-n7HX7f z>04?#3kwn8N_Zu-1)}X8-Po2Sq)A>q&1Ie~=3LQdOrBh!V6oFG2`auYJD4eQuPT9| zGtq;Ht|j8xp*NSls=CXS&N93>=bm$h((LCb$52_-VmGzzLqjR64HGKuXcEgMdRV-; z`Dgh@V`6pUhOuPY_k=l}rEF=Gb)tSS-_jt1$&R}-4RRAGRoh?CbTx!3v1TJCSU-C* zzdn&wl*%=teF+bC+1F!GBQdQLs!=x|>V=pNk23a8x;JsW2s&VFTGx*`6M1y?H&H#1 zBoh}d&HWM%fvj-3;}O$@X%Id`cmRG13q~LK>Y%uDg^CLM-X&`UVYGbtqFW*8VeuVH z=iSdBz6P5CD-#9T_8fdp-Ctfd<*9PqTjV|aDq*W{KE)!IK#HbiG;7`oyShjHMbQ1# zn35#SPsu-$on3Ds`TH@6)%-IY$B8RR3qU2JAOANldmmyL+60QLYS5qcym zTw?ORW&p-{_4dxWp}}=+(OoKy?ndnQ{db&s+Bb)>hyJW=?7hye@f3pIQn0;?KZEO$ zx3_)XY`?!Zd-#oejnO`Q(K)Nlh5!-Kl&VwPmQ=A(t&AvRU?wBcGcj`gR;~E7pG5sc zqa_mX9YY=rcJEl~*Mt~~c;ti~2l{-?Ve5#VT$2|^vSu!aR2W}=DV3k*z_+K zBq`nX!YQhLcjr}J0GkWGTG_9m3K@8(QKh-qR+*?&=Q&q#=-|hW5KANJtdU6W>YHsp zb7XATx)vGpRg(0VI65H;woPFFbm#`osSkfLJ&@G;!G`(MG@>*87BXrp5-sJ{z>ppI zD$}-KE%kPBQ8e+^CM?g7>4yrkzk@M#c45F(DH_3*HN0@+`qKAXD)ekdhNeb^vZeBy z_3>S`CY-zVG5Pq44ON9Pyr%MmyPV9cYVe@Ori=ky;PE4^jA!Y2)1dTzR79(@%0|Qt zC()Wq2e*PA%`rR-r&;9Hp%32Mw4R@P^T|bDoF`aoZ`U~%-1G}`e|4{4yguF7SlNC4 zete48u=Q*X2_`u&2~@=WYoz|Aa)=YXs#dy<*_i)bwNv}KX82e=9njcYH7SjPFdq}g zu{~rbY3?R2JBkFUnP+&ONyCHP6DsSdZ2Ya41ET_|^xj!aM?a%E6YhZ0)kNTAYe158 z)r0HvM32aczb^Yc!n>M!3HX-qsAg5htZk)A|8mO?@6b$6!3kFvDP_2zmpZAgPn)W} z^Db1phm?wbdB40k?<%VdrRSo)l`%53Jru_`}*lCeN5ag|1KXIsb@Fwb3@N?X#_KHLK@t@N+ZjBk!^M4hHluW zft&d~McM~XRD+Upv@}V+#oYK%^stRIdU6`kmM#r+8T$Y1Sj@DXiW*v?Tspc``Oc+l zJ$ZA#Po_zi+j98}E)857U@|)Pa)g@ydK1d72445(VUHwEZvVGSFMR*!r8~2~tqYsf z7*W~yxG#g~-jlC^<%R@1l$(+(TGa%y8#5)P0XHPp#h+-Oe=2~*OQy4#0VUqtR65<1 z7ThS>Sl+;W$FF+h<6Gqo6R`@uNgJ9QzV)VrfY6b*wU(s8nfiJ~zGn`6JLbN&4URq7 zz~i%F%^Kf^i`c~*-9{?(e9!{%El&kH9I5@oFXm#M%y*eJKqq`aA~+Brfhx^j6Vy87 z_E3|Y{xcEG!p1B>T`_K@8H$+4tAl1;!W&)!`g-#*LxY=dPC5BfZRWRT!>siR)!~%n zIro6?56NV#%XQGl@(U*n5j^A*Mj8Q}=A9n)G4~tm-%rC%m_u2^m+xY%>-#VyW+U!U zGCR2DL}G7U1T{ytQ-=b7HidJ{{5i`P?RiBI*UgN<*6Eo0#YKbQ&e$Hjl+Y$tG1l3qk2jz3eU_KkW_@H^?7(d z7|?#2zyTex2Ohw(HxmVxeRT;h;u~_e$Y{EQC9yr^Ez;RBUJQmXh9so{SV-a4mQ(uB zV@CpaLo#j!RvqxrEAXhJ`F8qd+khvXQ1$j&;Zg5fxwD|4=@S$BKf3kqYrE6N9GuAQ zZ-wlvfGxHya^~`9r)G^75|dyhUMKDHY+u_7?X)=ioGkz7S;mU|c-s=qpQ;%daK&>L zQO-^{0q{MFJg2Z&F;gLbh(XnuMU1v_?{m_xNU}p;Ks-z6yX0}`Y?B;Nm~%~$5*(Ra?A|k&ukXh8cO_I3NcyzYRC$& zpIYAohAJM|B#01cxgNe^z=2yxp?aA-;?_0t5Sf*oWw1^#-5|qu!%|sZ;hvFxsN-3a z*`dsGUD`!T* zk>4d{i>UIhvUT3srSyj_JKp1oXq0g*dc(Q)SR*K_uXX|Y_J+kixaJ!4?4aFlOgS{e z>3qVHS&__D2jX*39VNeI4lLamF3o!dUR{vj<(p0`j2aVU zSWbJP1=S0}BAy|mH6>P{Axz^MpCCu^B-qzte*ts0{|!0$*4^$F%0@QS6+{n;CA^of zbo$Wpf(MG9AqAe#yYl8N06hTOPlFjA)gnU*Xfx^8pSj5Bj&eUoUGSVubzwr0Zd;M0WgEAZjP ztN7GL$`vx|OC24U`Oh1%J5PgO9ezDlI4Yk$2*(l;{osovZoQK5A9>Dy50muULd6kO z**b)&?SbLB`F2JD@gES$@tq#OctG)i&^$TK zDi;sNt?))Wf@VhvOQ~S*th?Ish#{VH-GrX zwU!Fab@|kUJ1};v48;gl?s_z>Z8GsfFSV;B?%*fa&jx+u!;r!n$zEv9FFv1}xtm_p zy@4zkzJ$yKYYs*ySq3eD=MPHki%ZrjD}vbr?^kg|Jf5Cf!*Vx;SjxEb%{BMXKVjU8HZcjPyVAYHDJ<2FJU;lW?ruh1r`G3Um z|1&i0|K!gA+H1`gzDVpH;b^Ail>q|4L+jTNVlIshl%$Svu(1K4BmI<(mN3$fuR?)a z3H0XQ`$i~R&?jyjcn{EaRe%>DZq*LHG?|`*U-tXo8A;fs+y9!Rx$$@o4I9Cd{ZrWY z|54Qa|F61kGX#Ke|5%EmQ^!DmytJBZq;qn7B5e3!{#rxCw$A3+P3KhDe$~bo zN}mX~yfz3d=4Iv-dt-9Lp?QuC6s0o>opB=}QcyY9iGIj&iKa}g-6S9ikf9B=4k^5jX9 zR*-%!2rps0mjFYaJjj`Nu-*^3xeUNp0UJE)DwKE#p5tXyr725Gi1N^0Q7|q*(V|UT zSqqps%B?6+*qTuwH#1iJX%&QqHG&UUX%VF{r}9Ty=twiz3+6-DsKAb zNko!0qUupW4E+jZ=Gg>Z-4T-xj08S?$X1Y@uPKM3#1?MB_p7r&Y>g>#`!ZB}PZ^P2 z%y$GHQ}9s`YyT&-q`%*E0lm4qvTO>GV#JPb+Z+YihCWLWW7sq0D$=R)8CIL98*5h0 zqpLKp3T1Ew=YYsOtZt1RUZ~o)UcB0iy8sD29M{b~2)H}hXZgje528!pI&Fz#ra;Eg^$q1_&1O>Z{eb&zNWxEL&2n`>3t}+9A2fePfZ+JQ5XR#q&Lo%88=8lQ z{PEx#;jW1ihW4M;#_4Q#)~EegjKqKK_GwJvwUAu_p|)(tH_HLRy*ofM{Gde^d@Az_ zNC@;2@lH@cD~Dz{_DCUd*GJYJ`5K+L?OhY)=e#^jZYozY_}LR1L3F^VlAqyD*A*soS3a;*` z@&1FXl=b%bv-`=qnin?52d|x5c{ta`cln2#Zv%_-uV*{gN?Kad3E8+8?6+5ft=#RN zCsss!-ft{M&p6iYC{&VS)2Q%!&afnD?$BUYMGQWk> zs7v328&zniJnrLo?aRkYM73o$WEtjWYsRndr$D(81Am*!r;^`gI_J6W8H;mHpL5wZ zVkt>}On6`{zs??A>qrf7TOA{c7pE>J1-^#F^4_&`XHvTx7rDkDIV1zfAJHI}Lfl#S zwse{TnfftXy!x-%`kxYO-(L5FD?E{T<0d}lXE}M=#NV#{{`Q5fYryD1FwxZqk}m7F zg7}Eg!z`5EB6((BHb%L|{PtKk`R;TM;?RM$pbiURv9V1@hXb=C#gbEb8zc-DiB-S{ z%mItH3kJXte-j~VSb4_C0HcsO2%f$QOKi#@+@_#2w3e}^bqe_oO7eRHGz~~z1B^c^ zJY&^P)uuT;Ae29$uoMJHF_!pXT9$QBAu*^V1zaRB#ax4KLgR2e?`~zU`g3=$Uk?Y@ z0+?Su6RQA@@XNCnnaa7H%E9Ld?HYKBE#DtWJ9YGrhFAFy2g$qYslK?aPkazsyV;+p z{07Ew$fB|g?LjkxBI0xdbs~H++`BP8WATgy68Dl%AxOxJ9)i3<`>aD^!5gwz>rlDA z`a}3K0QwcdNI4-fXMJLlci|!uBTB=#34P!hJJL^s5efBA;`oNIh1n#$++b8{CDwGx zmW>wiFNC#G@)-})Hs{uy8~NSpYHkQn@TL@rEPp3`Yc3PjhZ@iEI z9Xi7eyP;abaral=mV=*+E-Vh!y-E=CroTn}cFEn>D&vPFj3sd`YVw!W$f=sKm}Tl? z!MNS$H};0*AIuNCui0B1k_jYIGH#XU^<^JlX-Zj!hcrI+&r&NiHSX}v7Z`}-BF5zs z*|(>DN^dAjpINQO4*Fn1RpS7TufoiCiWZ>|emYXE%F2f7W>8h4r)!U&+8Utrh-scI zL6cpX>3&i9ntNi%iJVnar&~kx$WR^_WXE*802)7+e?H_>&F&~mweiMBvU0>x4vw5k zw|@{7zxSkNcVi%;GFzbZJ6LYHU=~vV;6OmxKN?{|pX@;Yy<2C~hwVke+xzAM1={g! zHcYDh%_;jWIsP=LHG-m=FmSdeo<`8`WI5^qi)WXQb)t*J3$fN7IW1xjwl5~2ktJ4z zbzNZVgaI?oQEfyx4v<2L4a&h=1BcWgeQ7>}?JjiLhLt#%=KTU~VcnmE_2H@*YB7Vg z*M&cbh+2uY(PF$DU5oj=r1zGr4;4($P1A95jaw#l`MHFR#=P;t-uQXN1h*_MKt; zGbX@{d5^ist~u0^7U!jj0$#dn0TD(oSelBTpePHd`FL3ZBZtgFc^eqA^O))KhS#FNNo`JUMrnjH5;74>6(yB|H_O<)G!%ZhYW3 z(c;Q6Zo1Z(@Dknh4j@Hm&jcvU_t=aUAHTG){ur5AJFdU$s|jPY$y40VQr# zS_||vv{F2iJe74n9rPzbttlsLsg9#-zr~lwWF@jBnAr17i>#qwOc?u;9Y(gW>=cu5 zDS<pX4zc8<_9$C=iJ&9^MI*i+b@ zNrE881)-Hbr-D2v*(kQJ;(k+h1lOkPmt@#E7zBrRh|?VG?VP82YuGRhnZIr|L{AFG z&zd{W;3(^2A*AZ$sS&=tcLIWe6eEQZ=@@Q)tYG(>=m-vEEwRm2$agyvI5}`3i?p2R z!NO@i`)`JY76Q*HfP%<=M0_xaqh4G)#xS=|(Oc2tU{ z+LGBjfBmfnzdv^c`7J`rI`CYaL8PeTO(ld_zldfcP0=gT4-H=-r^Lof*KnFYY|~j9 z#;epY0$X(Uymx!b4wYUK1$L73rSoOL<>VYo+%oPN{7Rqs);@N>?<-(PX?15BH;?Bl zl>gAg+f;IqWRbPw%$ckIM7=>`A-h}l@oE#OUBYE?hxK*Bdt9{$_faA{YNv+=h)RE} z?@ul73auTl8qcQegd`^{+YCGq ztUUuLbvHNLv*pPQB*8I1VuVgL0Tvxy(NkQ0tzC%Mg>HAkrE&f z1p(;-0t(VaKuSP*2kE^Ndg#3*)Bqu#?Q`bL`^?kMbN(~)&U}~;{&4TKz3+YB>t5?x z*SeN6pEGYpn(fJO>7qIiA`L=5o<->r5D4-lA+*1FJUJiW4u%i#sOGuMH z_nPoa{#$!Kcab+?dUEsj(Ud2<02g=K)grn>n19nrc|Ov5>2=(2h^1gfQaWDk+)h?V zYVLq zEo1n}ZsXfjIN!I)ob%>~Mu|u{86CgcI$J9}ju>N=^9h3@HxzFvGjYHs`4Ge>2Bjxw z0a$s<9}vTu3+1;;Ti24qMQjWjV}+hltaJjNf(u#3%Xm>5$S{usW|y{50-3Tdon zLQktk5V6Z=s|d#T*gC1$d9|c(FKheXBPn=$tj0MXDWzY4vLRk1S%!UKwQLsN83w9X3hqA|uqpC_ zQH!XDDA?P*rBzDa^ir0Mn%F^5A;-x|Z1&_@zyakA>mDRNNywAsd0bTastz1KGl)o$iUvqfgb& z>(&7K_u_ARG4#6v$}r%k!F&aWWvKIu2d39QUYBWh3=PN_uE6hE4*ZnYHVFv-{9IV_ z%T%@O%_@u+ThTxld%8usrN?U#%Y6rfSdeg#nEc(9gyfePY#b3h%Woo?xP7L;TI5yc z=pD+*vWSzRW?_?P6F=q;U#@1YqZTE=CVm#Gbh%O<3dnXWbaAb92 zmFZjtDXsx+{Xx8V1CTjBHO_en~=Btc*l4uD`S4+zv!$7&{`O_7r?LCyE5 zSW?(_5{C$opIe6z=kOyS5;+n8$fiNMB4fx)0FNWuDUgZd_{YQdxVT7YNBdHn{}-Gh zZraP@+7p+ZM{-v;IwOzg`<6^@5VNPN#b-7|jhWe$({4RVr<434d8jtb4+nuwjiQ%Y z?5}oS933+-r0Csndw!S?Nk{G{(H9-;_nIg#7`E~8V@?M7DIj&P5lBlI3%)zlDrwYJ zytn!L`Hy@t91vuyllov>1GTIgZ(B1e%T=aOh3Taxvt!o1!Kh7~dc&qt1M_ftwl%|6 zmi1HFQjs@JUu@fV89^XQ)@Ltj{D`N3bh~uViOcV8-D=xD@Ms~icJ5MTmt12Bfloe? zCN$qksbqZ`sK?1cfq*WIJ@oFiJk-S!acs@e!Nl&)ijKuNZG#Egk> zdE`!0TvKDfEesb0ENGsG1Ug-lL6k71&UkBU#{rU?kLltt+H^fs&hGHfod8 zDvWJ>@oNVYOS2bWH<}ft`|GMkuZ-c+3bvTTk}R_IA7@z6UdYX3Fnh1e1bK1~@-|OU z$j|$+>!iS%fPs9_RpP==&TDZLy4>a9u?HOZf=~$?nr(iXL%? z#|??1gw0}%o~6bb!~7e91K5z1U!Y1y(p(M%IM^1SLSun=Yk367hyMK+GqV84O(>2O06gKy=RkEj;z(2A zcR(EB3m_gCz#&U1*8Ac9fR5EV^ z*I5qz>CLhlV;~XgJb7Q^<9&OUhj|-)ZjH+qcA1_>sWSGrG}SEUMBa%74~1xtYmz=6 zK>2<|Kf4&=1D1!@0}^2O>!sy7W2AK=XfpU8X@9w?beSWJ#rj(PF5|A)1hTfV!WQRS zTrSEce<|yEu}o;y_k}Y_f@=Ma&5nqxjhX$a?c1G%8mkTzWCfY%TBF_#$1x)+1Wd(zL&5_YorQ34rzHGkP!O{HJ7@|411Z$Gu znw2L20)w?q+5{XSDX8Y$0m)Z1{Ah!l>;o0rM*~Bj2i^yE%`eeegah)uIHeP}sNaxN zALQ^Fz~2$akfNarfCJ6`8gEEe`h;G4hik8SLo9rKtMrYF2fY zEk^NSLrTkyk0Czr6C>S(HyKhpJX#Cr&%0j~0#ojf_R#w62OE33+Z@2|5tKs2&>{7 zLj^muKU#G2T;}xlbx@@8n5T*EC5aIpc=KC#lsQX^w&s>3$KAZ}(oO{*+zV2dYjWvL zp$~dIR8CBO#uwRGD-%_q)Wya2gkqBMV2r{%Y0#USO-e$f@+= z4?#u7eJZeK-F4m8{x8ozNx;@mVj1Lf?bzP&WY_3bx~X7cg_t)aLsOdg*-)LIBWL|t zvb3+d8n;m3S#`&yalxW&6lz{7MlzgkjpgueL&-f35HC$>h&5QWL8e}vS*tb4q`|Ysbz7s z=YO3j+~YA<3^*d})B{VNwqS*TCp-^#^yzrEWiaiX4S4p!wMx|_xy2V`ljV&fa&G_T< z0vCc_du~})8)v+3N$yHE?*8x=Oq%%JL8pJXe}w(mc*Ofw!I5s%8nb`a_fCSM0 zLE=j{Mov=-3ec&rPmGvKQ5+a)|DZ%OL*LL)fqAL(VtE||Sw2sgh$F7eDwbN#f*BE3 z*JwCPSZ;>4DAHy=Z5a9BxS_Xr)#_)Q%Xi~_8A&&`cbNBIZX-<|rsq^_r5Q~C$Dmt( zff$x+_Xoyn&b%t=dv#E;lPqzi`euAjP4up{?%*%bxtTDbQ)ALpOzEG;Cjy{J zZFd1-MGIhe@Gb|;$N~IFEPJ&F*ig2wTLNA<5bviMRPh&{Y@*?v$D6!pe7Mpu{76-@ zCpk-g`J3O<3*uw)G0ku1( zKKzh97bx)$!5vXEMg#E}?cV(k79$BwlE{gAejbPZvq)|xUHPx<(;>P`a^9hB&J)oTaWYUG46Xy;H>|l)o`{|PNM{(CGv@NqEPS&zCil=sX$}Fk{4h3Q zrSky+I9tl1meAcXzc(*Q=N{`wFiBfoUD@m(?BQt3d}02+=>?0WayHlS*e*6N05gSh zA!ya*rst;T%k5X@lg1(G?uvz3Ru$F9s|LV~>zo#LV{sCG`vwGh=J+<~I87CZxrz+6 zq|HN%Xj6e1_w@til%PBOsr5WKFm55~z3g*$&y2E|(bMu_(&9~^h>c3gJyxtBEL2bP zY3|dl(mk?Lc0<|vEt^Lx)A*`D0duSuLk~7(?H34yo@dDhCfTdn-7EUF0JuI0J{eD1 zVzWd6^YMdS9(<(;adaUEH%4({7eL$%6BACT;Ny&nVxZ-y2uUqCOWR^6Z8EdKT?5gKW$Gb8XKv2RA7Q$|bJ z`FSLgm3PgAKsd#ZdOq!?>@4#M6W5zlSGRV3lAS4*PhI9hUB)}q2-8m?A3M#o%%<^Klrn%?2Ox;?to3K2Y!KOT#>lrecS^!?jZx9apH9%${M6>)vP`Gf&rPW@#vE zW_QecfoWgl6W!(YLWGLcG4=wV37Ehtjqk`Ghhl7o9Xn|u52&n*=+8tXX|`buiDzHm z1d7Ez!8v1AZm=&CAxE^pw7o`B#fJrWmoN)PSbX20?23@*gSu)z3t^cEgBxskvmgUy zH7(t;no{f2TUg#l%wGRYbEn~*3B$r(w^w#HwLJ7{QgKym6sZRy+M*q!$#bF^8Lu+6 zJzy+1`3sbt1SQUql>~s`3|Nf4U0xUPA_^rX`1PaFyLR?mW>ZQxCGjB2$sg(%_UIj4 z7<}Fd2=>iGSH2pHz)^=ts)FxR% zB1-gm>%0;>2y|Scyf@ch2?^4@V?ey58*}si9Zr;o@=Xr!cf%I-wTG8C;A=U!Bc?n| zbKHZt13wozKH3WX6fWV_<@?Db{kwVXKuX|+hW{o!PWF+l%SqX)jt5`xraCDC?|en9 ziZH>W^Xii%)tw8PUuY*hu)x&*bNr|3$C#;w8-4xV<_q-ux2HFx>M2*cyh>B7U}p(u z-??Okbymn0wJjp zLW#7Sl@WkG`Y~j&)HQOM6S*vL=9%G$^Ohl;x=NQJ*rKw`SH5)MMUgfFcIN^=a1XbG zN6dCs){LEBDXppO*+ciypXW@4_t-VXSh;U;6&*wt<7^LPV`t$ZB<*j=0nLJeJ=I`O zWZdnDwC9SZ@(!>Ju}1XIdP3>RxxCt~ies+hdNRXc-q)Hz(Ms#!KnD}0yZZScCiuM| zV8qUYeQK940cPAAz`r{PbYH%He}!{(|7611VfiFO4QX8zY1cRO)m^DRoNjGwF(d1K zHcr(wLO#StvHOCJneG>q84w_B?~u!Z_#m~(%RHUPBRQ3Q!o)AJSa4w#H%(*n?wnaBfcBm!i)teUuvz_`r*T*%6^K2 zv2C%MayB+}R|Y|4ydtg}VQhjE`bftOE>>n4Em>l95E)F$b)Lztf0j4ZmiW^0Ek<^~ z&v#hEtt4gu5wU&CYiJI4Ng|jeB_4h0{U&|PEB%_y-R@KS*CP@~yHHSs4M~aMpEg}B zh}Fn137dW_@sa#;T~I6kxNObW#7=KIOJ_fcCYGfBZT1s326`qR$F^Xv-H!6ger13Q z*hGaI^cyN)#p^v|b<<&mWO7v`Kxe~4ccb-Y2MuO#&6)oU^gtIipviJ3#WU~_?ndcO z5BU3BTiaYatxRvnTH<{>&S$$zHPC7TLqpQ(Rq-gJ$uWhDji=4Cr*DkFs~q3K@z*W; z>&v+{FX--G6Os;)UkuXL~Y!hez^o-A`@XC#{aDty+ zIMOL?k?A3(^K+Cs^chV;?KGp!X$x4xlILj=P?gL`X~pp&y*DGxcb6Tv5K3!fVvAXL zU6n8X?nS$*U);9+7f3$0;rGH>EISOLQK9V)NLE({dS#~!sMvsZ8rD|*be~vo zHHDV1Zozqr63&cIm9e)}me@yI4=_r=T#~PaQtCSE+g0fLnzAg|%StF$F@f`p-PASyQ(F-&Wny8cqg5iT5=;<6gX^=b zIuNi9UE_1P^x^QOk{;|LN_!S^89|KK^fJ_>JxLkT4zVs`OQQ%kax@{}PcJG47!-2P zEBf4(8hyG_W|u?RFnFwr1rG%b(dx9Y)ZvT^qt>##5@5w2+$xZ7)pWhGwCwkEUR>X+3O;QiDH_m9lMWPg_h+Q+qG=y zeA9wd1w?Gh1c6rRqX(r~v#ES!eAV)F8yLvW)TTEMXBWHx-|lVR_kS;&F}~L7*c4u$ zplD>VexUQi+X?VfN53c4b-aCzPilyy_+t12jVm{Xr@Ph`$-Nz{p+crkP<<>>27JZF zHr^Aus`&j^H4Nwr>(?o{x@Q-!Y<#AwJ(iO7+(aqo99E^!)HQmH9Uj&=`mX z0bx`fcuTcGgFGSN&7QH^eflEEnBU0hmPP+tptENpl|ect@y`h&EqXA>K#PA700+jN zD8H5w%F)4P3T;R9_HrO}v7({pMy9M89LE$!%IodT|D;0mY^9W4jxbE=Bbt=Ivh@oj zZ2W|Aj^ntu;Q%)>$6kgp#Fy6FpC@`RxY#~Xo~1T=7T@*YFkSsbB!u+-)Ji?aaJ9#q zf>{$bBFjt%^y3f-;V2a3qA;)QLHQwo{CPYinZ2uWoPOg~ATz&Xrq&bCliMIR(A9tG zpZB+TuK$hBogzF=Izn%_%Qi%pP9_o0B>`}P2m*ohH7vqU1)wpAFNQ04*{xN1UP<2a+3P`myHTAO_DZ_&7nOws{}3s|?0SOtsQjvco+QBQpAtYDAoDd$Ja0``clW4^FfL<~=Se zxHEmzRfQ;x4)*0H*%R6Ns+s#UplLUQ@P{ryiiQMYMO6I+nlGE%S5hEyWqBY!S10%LYHzAld+vW1EB6 z**rGWwgF0O&AWxaNo@ViKL>mtM#M>!2_7(Hv91uS8nbQN=^fLGpH~(-i6>(;mDNbh zT@0My2bf~9txNY#vfQ5UXD_OAO@YN_a&Ewkdcr4@*DA3=pxwXw8-Ii6{`cMs|JDD; z$cqu5_+m`ooKsHEfzTSC#H+m;oLQ2g9H?DsnY+uHV_2N~Jl*lQYLlanA(@2Cn+4Vj z*x*nn*P&tY6M+mY%C*Ici~rPq@OvMD#uE^Jy8uq|YTHuc@X15uJOU_|PXn8LrJcB9 ze2ZQr=%bNY(aR#M?vHa%MSh(mp9W@?SylYZmE6^S4Woc9SHD*e$56SdW9<&5{ZQ!h%cK?D} z2R_!#yM-nVmJJH1;=bSQ7h?k?eZ1C+nxr9QqlV+30FT@?&qtBS2^CyU-P+2 z(VwTGKjWZ<-zby7yhe@7HNND#SoL(F2D`R8x&vthzmv|ka$|1sn=^bbsMoqb^Tyd# z%C<89VpJq>}j{1_23d*)7&PMrBV{{&rfGF+z>lIzu`;5687sb*(c>{YO^jak5{Q0>zYSy}dHX`cv3=k2oR1`19 zJUwrAQ*kFnMtkVPmkSppsT0pxuYTuCi`EE9lPq;sP!nv?N|zSnxR&lx*e7N!fR;%V zBKZFF06OHn=#P~zKGrC)4Dt2gl6cpGGw-?IUykuJp&%gqNoq%7#isk#uWdP>lUkD& zUp_rYJF+jAy*vhWe`eWUTWX8L%O2^Ixf0JVtP0N;>K4}Iooo==Ox@tJZD?;{K@GbSDM$U zEna zfzpK631_{xX9N8=nxVU6J3=9l?dw*Mt$G0E)4?#1E+&R=-);l2gy#XjK&)s{RABN7 zVRdXVDZRakZhMi3DK?W%@~NU{2-Z3e#Z8EbaA-UGdGLBqn?P+>#OCJ-@JzV8^=G^V zDf9?(f*waN4`FJo7xNkUkU>t1lOlWUH+zJcTyMsGjS4o%SVcYlisB>WWlJ{?Q=2n! zIG4h&Ymm35;BPS5Rctd*Fs0?CZ~94h6lE4ag)l)FyJb%G(lp?>Dk`8;Kc{)0p!$ZA z5*MrP2X)@e8Z0s$b=i}pn6>{|xnchVeVOP0?cssEDt$So{}IG=VUCmL9>+J2I0CZj z(%hOd!9LwYf%|b^7NF|$#k=@Gjy}{TFIu_$+kk~DWw0>P>Y{H@qycnle;ws(jkj-h z8`hQf!|U16ODIrORQ)Jry;5oABNNl@sqMuv6WhopqVr==EmD2djw7BO#k&3s_xoWq zr)C`A^beKD*c%x*acmLeX&1KKE$nmEK`*-7H(_Y3VO;pV+%TZD>+@WtzP?-P^D}l| z7uv(Vx?zNf?)bH4X-J6kiLUklDOgW-a%-`1Wb#Vpj8RmilGRaA&`Q?ZjHf?x#w$vv z-H}E>6RUvw4vO2kLIe^+7t+6oj8rsbj+rSMeaH)l47n@0^udv| zEe8u)%T^J4HY1iMi{cW@yH;e~=NbuPN}9oi<#1CRb1iw4QeU3BBjYb`o!&>jLc}#P zJUnW(<~b*!>U5lB?~oNadNl2c9TC-yVoeP4DCyf5Lk=+UjNJ~3^@RW2BJ~7)B`Oh4 z8ApqDtzJ0wu9qs@#H>ebx8E|Z>KBN5TpTdk+gAunk510*RwE0L(aQ7JPELc3T!ga;h1+E6*c$_v$Ji z9+>i5ty9&VA~svJpPf9B=^dU1C$^I{EnV;{H4tKDBRhY*X5Tuj&xW9wLK=yUHlhu1 z0iqZHB|!=05fN&G*Ri%WojaX?DT3WKy0R@|O28mezcDg9>iHeB@uzl;^`Qo{Elk>^PSD~S3z70MjH`&+l@4U#vkegsPAHz8XzgBBc?2C zjILoFIyeq{6Eee?!YCq4v;l zVlq-i$IFew2j`zYuzLsDsoy~G15FgaRcB1dPO&woyi(@#^Xc^M^{=zfl!8U?h0=fH zzMxida3Hl^wL~A=RsmM3@D!O5BYMtRM71-E4ST6PL*tafhpVB$4(7tUnOaSFX6UIU z6Z)fM1;R!_KcjU}&t1x9y~+pmkI_)PvlwM_;AmtXmi_qW1;rEXt?kNQGshMx>#HiX zm(OYgd6YVkr>-yRF;PDY>8oXb8HxHD)%l(K?FjFSH+(;?v|I-PtL}A|=K}qTZo|(a zDei(DDQ{E6S;O&OkHdBBXTm9>;au;yS_GF@;vKjg5tAdD1Xg-I9S@5e_&eQ2t<`u!B?tg3<{!&xf1Ll$g#iEZKw5&E-$)t(xzHFUGj2bu*< zASuI1M!h8MSydJ`Xv6-?*;5>kW!xh&I@8X>T#fcj4|QXJ_KEPI6oO!qBSz2t+HI9v z5g`RIS5$kYcALy65OM{fuVG?qRFj}mGyE8bCxvJHbY$nIeutFS2r^i+@bcKQCjBl1 zdU)u&hzkVPDC&L=@pCbn2G~TQnIF0WIUfHIYDc=7e7{P-iCNF6zpJvHdF-t-B)UWG zH=%ILUYu*JV~69pQPiY|8zz+pBWAukWN#N=y>;@+>;CK`t$f6jFE8e-O}fd(wi%DX z5hx9OLyL5HpqSyYoJgn7;-;mky)GOKorF z?V_?CFtuM}ySHVMp&%`J(fRwaUa6B~l|o?Ca~-;`FH&=VxD6~F0rhbvfjc)49LS8B z@Gut$-D;d);^#%fiODWb+a-AksMPi8G7gSG00+=zhhCtP*NFFUmCMPN$c} z5334hv9X*lnVqeEZ3FSBPeYbks!xl>RY`&Bm{y5J7WJY$y<&J$c59$~260}NZC3r# zl-1L<67);e_j7tZpIPR!I8T~wT((Ts6zKcp-@U5W3~Svfr1E-XcT+DYi)*69sAte{hKDY@rNFWUhh^7&PIG z!=p)O@pH`{Lcv8F>Y4*p@vqoI!WV(eFe43pn`}g{?f`S%XUkm6Qfck&O`jDiJ4#B* zGFbAq+?sq)1NfaEqqO4?Y{}dFi*Ac<*v(WG4IQ0|QCzk~v%$enA%+#8e%rN0qoi$= z|IC%J@eea^_v$!$<5JA0A@AMV`CkldroG$Kk8cn0oN0x8z?AajQAYJGRzy8K;vsw! zzNh#k*g7|9;q}~xF5q@_Or>pB~!^Hrl1DZOZo~K!`;WY>RJka zLeE?0E#?t^kT3G zhhrc?G04;owP5*@W7Y8vj)xMd&u)Vt39)iAx-9tGI(7m4K~U7wVSZSTO@3=SX(T>| zg5vx-TIEPBs2|Dfl#^YuMSSJd*FzA^7dpk*67dc2TfJRL;LF7N889vKn=$V$RK56; z`6uP$)&aZOc<*Du*S`HfV01*5CKwZezKdLHzW<7|5E)(7iW4*oee+X@Xy#&77_;?&Iduevdr|R*nmr48D{leo%8it=H4kNy?xJx$t zNDL@`j1QL?_7XX4-iBI8-%XF3k+);e_ag?%t5%(xyjdWnF*ml>_PXxPxN?)ifmYrVp+L@ z%^FY*a{Nfp7UmW5#u|((m=7NuCS4vyf@j(KPlBkWHzni0x1usncB(RSCsn6QL-?nQ zt$F5ZW3)~0G%x$yJYP4-A-EoXuNlsKd@iWT{Hm>`c=sqFQ6fR*a*%P(-X3*l2z=IVg9jP^&m80=O#LV~0FwnDVF(sxyO+zc#gFW$i z6Jw4Yvz}E!XhN|vPRVuM)TraW4fxue!{89_Ezy$dt%I~VFYxqMh?ss| zji2*4DzyL6JsUS53B^71Sxf+{Kd@Y)`xaAWH5#OU*c@+Idd^a>xVnj?1SDQBeHRaWnI+1#;BQ zlf3R{iK<`tgLGvjUQ%LxSD~tZE|vBTbs9*aD*_2M#D(9RcT01K)ttqJ^>MgWc4+py z*34O+T6Z4rfcI_qoo0gSd`YO29;>;o6M!<_haO=wh>+>-TJ1yMN!8xfV~1a2-wj`glye*MGHiAJg7N*Tn4cKNYaih|3Fd@aH?r%860 zX5y^VrnySDkTZbR&Z-IlVcq4uvxZmmE5abk;~gPjBPmF7m(6PJS(2$4o-661)u~x( zsN!-^q}c-Hr*^$lzF%mH_vz`Ie*}pa+UzehBR5@m|O@ci9NxhD6XP;X`;R)?UI0W{BXOR#kmM&i2toX)d-u7|wGWH0A^ zUf5kw5jAv0U`;#I8j*2&?Vm<|$`0sw$T;t8Z@OcDZcu9Dm;7SYhMyhFwIp$@Phhr7 zZacjTn<`!|(l8#(`tI<=kfXi{pk?Gr?-;b<9*0NBs{i&W0{-MvNc?Y~0uA^SY#qGy z4oZMek?hT~JQnkI=8~L{PC#o#U^;(%bU}sU^Y%!oGj4gSL31f@GZfRZu5I>M-0mSr zc8cCEHl=4AH`_`rb#(_fZ2_Ipcm|4yqEP#ywlkgGfQ_cFgUj&1{Ty&qzr&1%DgFS> zp>@DUM?CzKjW(j8Z99Rkc<>~i`?AKjQ3=eXCxD4|_;V9|uI$!Bwt7rZ?O(cwyV@`E z9U2W2s%4lKq^39omiB4Yke}vPDB~VU?N{Fnx$LTr|utivK<98?jC2(z#Bx|HNsI+6x;O0-$@LF(1=oF80R2RlBW~eN6*=I zs02(BQ%XrT>vcY81Ypfxj;?it5UN(6ynDJ~fyt|#N5%r+!X;w-&#tWcypfYQnTRXR zQ)c!%T+@oQpLRL6^O9zj-o`gq81Y*3HF>`5?2SDRfv`lsKNB+j3~0n`1+9@(u?o-k zXgnM%uepib+Bo~F=W)r{q^mj8FG5IP=OU)6t%8;s76vm1_akg}ci6d_O!K$EI<(5;*1Ly=OG2zD zhQ!%TW6hrm!o2+ZVe-$z5dz_i@JY!Q(-HSGNnly~tBIPgu(t|BzS}_#`sV@cz??q8 z=3B@@XI|JB2*X^arq<%ejUgZ!@t6SWY#MOqP}<+sa=)jp#3YAq+@QX}wx1f+bKIyq z6ek}3JlQheQrgVhxQgPUbfh|G6d(|ls{+lY_DPo~#UIFqU}c+LQuB53fQ>p!ISpA>5j;Yt4KhT7_49sJM2yuZ|?z zCk6)-RmL9;&1?%kRo0JkDxr|#h~{)Y$U6#xrY4A;=MjildQm{vsFUEn#Wj^^;e-4I znm!yBTo@+ZX`}CKtQy{QO)FS!#r;%wKyrTxA^@78CK4qdnz%xp8d=S+N|JgbfS^rF z_cH>T_Eb$(Y2HhRBG|Fa%Iafeq@l^daZjkTy6=?m(96@?X9u2CYW zyqWP+@iWF;a@>!ea;Sfvtl157jJ1CqUgv1ItEkJ--q5-5i`&*zJ`O% z{Auf`H_;`)>*$E>-IsF3BE&A;uD_4xRGd$ddoPQzQ}dYjNT-js(#WQ~u-@Ite&_!H zDfDGTmpM;;)Mrs^e|x>bhqVX&bZ~<;AZ2CU7)$7I%Td^u?3Z{o!(U^ADOxuvyKBhA ze{X*iR+(D$$!u4%(l+V2VW{vx0KX|?&FU}E;$J}@)r51t&u1TVc|o2&eg4G-;;!eE zlQ9S(*U}d&xIV{=W@af$SZ=*;>MSJbNINfS@RS+n8Rl*Ph)7G^Ry~Hn(y3q|?Xema zDVO%*wChBO5?tOo0d}7F_D-;P|Cauh)O=gtm8g=Mh19Zzs6IP_E&Gt5XtN@();lw- zAsk3qa4f(tlCBPK*}W(y2#!M)t{2_&eKa?)(4HyR?IZ_X)ha0s8@1^dyUcgK5qao( zDzt33g)wV6k0ef)aAxq@Quj;uX71Z@K(ne*fruSQi1!6t%j;VQ?@GETB5~%f7njw| zUjB^UZ9C(Mjg@G?vG6Bq^yQU$yledhstEwV>v07~?eHA`q_Y2s%fm{meRXx#8xvPSAdma_c^1R>hb2BUN?jM0P6n_?I^h9=NKCI%+S!O)5sv~hN4fdnC6|RW@liQd}Hm)G>>j5?~u^JP8 zvJf0s^V~?I2%x&cOj;bcOyWGrZ~;1ROj_35jA-OeU9Y;S=SW^pYbPyJY#8vQ`T*!a z^65YDk7_L!qEb^`JZNuwBXP9_J-QG>&!{E{ISNJhhFqo3r@}&Va}hVl5}dAiPr9xEE^TWpY)n#*1gRocem>i*%v{F z2h;FCep%=@Bz897eRw;SLt`>&KpiTZOXnF?xO_+bboHfu2fR);;Yu?W$5Le$KNse9 zd@=vfWNvfw33}nnrR|Jky(J#&Iv5Q}oWR{QDL>wzQ593eGQcGvF_8H)TltM+BD00| z`RZszM=U(&NId)-5v@|xCyKl_+}?gGh%qYldk8pdQ+Ym7U6jtU@9#-#F=U}zJ>Um( zD&5&7pIS6X3z)*s+Yv3>_rrgNvQ*cfkkB z94Y&I$uvSU-`DYqF_oh4@!{Lc+|mvwbLdQ160mn#i%nnyiQtwR`hI|>vl91Z%r(ii zq9~Ew+KX#!hJWBGO-rkv$?dvlK1Ofi0}r!v)q`MXqB=&imb!-31F#1ouQM@v@F5_T z2B^TEdSusxCy=v0>DkZxpD*~IG59Cu!RA5` z?<|sAAyVpUHWZ}&cG?Gg1_;NF4Tz^Q#t^D`D1o62yj>bEj`Q{@fp;w@mAW!Y$J2gP$*}jNtZgz?gt81TOOaF;89P; z`~pFalQwi*cz=F1;>t@>6diL@3Ve(qnd03xN#@f#d1Dy>@wcI8=^90OQ52m8YgFBC zYR6n@gX-s+Hgpy&!nM{Lr{QDhrhX7C2)N;1x+NJ+kkiTw>w`OsYupWM$Dhjwb#owq ze?j;KDkl%2f{#s*X$@pvyv}DqKPSPk_`C_r1Ii9(UdJ;^UExdG-Tg|?Xt_X4Oe4PJ zbavI1;B9nT`6!k7!>z1cYD_qa+9PE{R^i(t@PXhAyzRStmBsY2ix8|bqd6_hI`hqm zL6zq8W-OjXi>N8G~sx<8o+?H8zmAB!mV ziAr;QYJO}+^pY4=nO=nsQ;C|P>w4PAL+2l)X0F4=^wtDWSal^ z_#b?b|Nfx^@~q7a0HY3DB|y2KJ{d5E`79fclpWd3e|PvA26y$%+cM$($1|R!j_Vn#ib$|#;)r_olYDB{XlIh(KIa}1PgP?1I()8Y!9VI=zQ=97;n^e5@Q^x2J zGN$|K7bsD@&Vo#qenL6utlNA5FG+f%OO_Lr(@{TKzOKFi9ez+-()|X!i4tAJfDI6R-(St>Ddn561k50OJI`IOL#iS%Ul6<@cj5GioUuo$}qAo z+;YQ?a`k;XTzYf2Cj@bE&*^M<(C*K!KC`lipH8mH15X{SocetXp^s?f&MymFvvZ=P9w9$4eS!WDfU^G zffTU{ui7(I# z&?1YqBkgpG2j`2tW$U!U_hXX?c{OF(1N#E*FIlMpKjQ7$$t#M!SAP-DEuqwb68N6u zSGB{*z3_SFq%Db^EYe?}OQw$d%~K?f@7LhPnt-0JySVppDnF)P-8rAf1Und=f2Og6 zJ#d<1k8j&Rm<_RMJ}MY?9?#{_e6Mcld8J;|wlD1i{WI_6UP?cdFd>H>hBrx?WgPKJ z8ECJ#M`ECT!^yaQ;V||j5?YJ#mlbkHA4{S1tMTgZznacWY7aI!`~takym%nN?#ueM zY{kcYOyuCsk01U|&vRT4+r29A2khB@rBLf1`fZf(Te=bzmSm+_{YGLNY z>Pk<$5Nypo3^hFnoi)W&hjU)$e`dAn#Wn@H?}}lLg-Co+ zbyHXDuUI=?9VXq)=75IBvDg(fzixfmeeetP#<#tf%X8@+>P9k>$Yur{PFv)Ln`d0namy8{$C&$d9uYX zP_D6E3E3ptX(~Is^X-XsBEJ8{l%<>zxD~y(OaVMQp1&+}`EPLCHB_9yaqowhs$sRy z&whBdSYt3Vn0Vzf4~0t5p=vXt7}tunmiW2;Y7x%m$I3cN8eOG2BfQ| z$+u3j$d`#|XIJcwwTKW4|4n`opwkbhaI;cH0jAThkdPF*#k@CspjYxa)jlS2&jcHZ zr-(J}&?M)Ff~RXG$a2PuNz=6#Es@99qHS`jnLfy%aq8u@276iaE(?UB+AHl@3BBQU z@g#6W-d;Exv8iVq<05DIX`3_BI=nX~+ck;0%HGqLJ(`bV>S+Nvtd1PLANT#c-xHwH zCAFH-OcdK9ej*AaBYP4(HAt6Lfw+@n{@C7UN+|xEMRDoAJ7W^b(eQA19&)<)TFaP= zp6M@;lQ?=&rzYxaUfKMJc7^?-V~1puTpWU^-XBK!aJK5KV|F#Khp?Q6DuLnWHi9c--ZB;$w&5Y#~ARd4EI+r0~ODy z)fJIR`*sXuuT*qV0iwHyGHUu|B6`TGQ0TZX8+{WhZU0u2#&*xo-r@~rl@;4}HCl-8 zC%k}IyCd+jtQh!(vp{3c1IU>wRrbf#lV6iv`B%}qr{M-mc{TLNsqtGjIxcLp(ctBW z@N_Rpv$bC!K+69LLv%w1BDgg) zDFg%wC1|D^OI8?pyl^1W?`fOkUd6#Ok~UZv1z%4+9134lv<~`qx28$GnyqMOX@BNG zB&-*1gLyB`Vu~#qr#28C`Y2bGK|YfXJpCKkN~HaM3(-C6V{oH5Lj`~6(0=eIUGkYv zfLn&Ez{@Gkz1e{jWYY1xZoDpc zV}m*Ret&xWOh#pC))Amo69_v^c&{;&>(PCrv_hwH&Ayqd(U)MbkxgLGBBEzc7(a6l z%sZQ)TH#2^t!=aL#*3=}0|z@>>eJrvZ4GJZ{PV`;$(@?tk z$6hI_U|NH3BZBV_%{BUY3)&Eh^B9{DpHq{42zn&P9Cr|mVYL>Ife9P*FdNL=taLEZ zi%N~*fH@;3T{z#KZ2GQyNoWr21@jaO@GZ(z>SLXq8teN43Rs_Lc>KG=t^cHu@;~Cb z${_EPXhnqrm@lm{R|b6EE-Cr$$hl3^(5FNYzfGBSo&El7KnsKlCF8HksP7_Rxv z{C$t92-P}d{bX*LlB2DL1;s=_H+#9mI7r6sFZ#a(lor`E4>U_@Jy^TS4rm1&g;g$J z-ttMJF2;qxgwP=n+TNOY*2-#k{uWFA)Dt_`$ft>m7^To7rPy<9Yyn%AD?= z{b_#37G}Q(3XIy?tRPCz#R8zk!#^0yvT9ZG{k~tITF7o*|DPQsgCG27u4qgU+P3GA zu1f9~=s6eQ#R(v+b(`QNOQb*(;P&&Q(91E&V7rq9pn4)0sY|W~DpeJ$Jr$6(f8h@P z$FA>x;*KeP4gDe4j|xI3KFN3QWKrX2=|~$47Mz5`r<~9q$kC=v0-;irImzDs@hMc6 z5s;@XfP9?jDa?OCX!?I5@W1{`|C6{htL_V99r3Dciu>rAUN3wX=DyH!z=8O*(~Q)Z z2m)GuY#YzAwPJj|ImuWUKY=#(F`l3M zZoUm>j0KFGTWfDhQs|)RK}fB@7zu7+LS|L|QW$oN&eB*am6%Nfr8d*GkR+=zc}-Nl zu9=#n&tm)!SSDd>BPtJZ7=r*iFAUs}I=zkOFN|0%plS{{nY)F;ptF!mv#_lFQ2E&d zS_CD7cek9~gay@z5^Q?8R`@WYr z`rCK32X?hdk-5dTBb?vquGNniiMPRczwVR3ZOaZj8{1s7qM|nplg;UurD?LTOb4uf z3E352w=j&~TkYHzbC|sF5Gi{Y`Qhu#GWe*&v#`V~$oj-JJsRk_;VpL27EvnuKvQ|9 z+7HO(oZj6B{+{vQ5Hv!4H-y&mY{z_Iv@g#Z^fJ)z7j6;^O1H{c^YT6cmDYY>Bxi6dwB1KqOR2rMs!axlC90xS8v7=x#F!6h;f*Wgg@$_9?$Q97KTxrWzX;Nn#{eJL&Ev3hKCIdBJ<0_J#8 zjwkgG48wB@%)7b>ai3dO%5SdC3he`t?~$eKGTNe@sGU^ez3{ew0CoUXlMgcJSy6Ry2mz)HysHyOM{0Sapf6JciUd zMy7S}sxhDh&3V|HyOwQa{P5?W)*BOYm3 zImF1Obgy6wI`hyqcP!8X98F^KgW^ta6ZqJrSPbI>U9-BXx!)(rG*W%yeQo?J6-dBf z*;4JBO*6bTgVPU|VmHKPz?lh>jDp;CPx|ChI{h$6)%V^Bn-)KNC1JC%l%1;$0N$dU zE(89a8omx#O+H9!F-xl+{R9~wup^yZ`wfyiw>nmC+}_a!t4KmI;%&cl5M@Jwh1*#d zkP*b6J#1;g^Uadxs{{a}MBj6(;{dXa#8x03ayI8~fQik!nCy2bY$e74xTd8@dITm% zKyVgNbL7N?OCgA0CSat90~G$o5EQ=*!B#;XoKFP?kaM{Y0n)*{ORM9+$gqFD8QDBV z%)zXow0?tv(p3Qg&_fob?$3=9!0)L!(?<3y0ht-49Kg}#%px(bZi@hT(y1Qd%Emt( zxOc+r-x!#(x6B?LjhTOm7??fK%l-{&dhqEts6+7YN65V8pQh$#3ve^tj*DwkCuYMe$~+@+ubbuxvnYZ{+=Okd_c;K=dj+U(Hu)gfGN3i-xYBN*1#*YY8~X7bu;2-zV>Dthj%gX! zvB7Pq(^v7xgWI`LyOUJ<3zHf$Yt>XmLGU!Jq{ptaM;f@}a9fl5*NZCrmo)X1aG3KSvN}f32h?nE z3**nQqs@`HbWO(;?SYH5I&QmHuZ!;{_f~k{!!VGf2`{PvV}Qy;{-iIGFA`QHRP<+m z)jlow=vFBP`8Ex^?+raN#T(AsX{U8*c}!-nA~Ks6sV0~%vT^sYH-=SKRF+o7j`Z*b zQHKu2O4AfjJ>sLW{})%!y#5(nBJ_Vp^=#xn1fg6qZVMw`V%X$Isv52>vcDi71Me&ZCvI7gH zF$UUa2Nzg*%aG;cHk+}6_viezamGgto|5*Mi`EmeM#Hxl6L%?e2Z`%c1o8$yA9${J z$HJ{L=|$=Gd>_f@uk)wAL3TT*#pJ*bf*Q;K-vZvmE)34?g(Y=)^d53ED;A};I^~Z3 zwDnc^VDeen;GDKM59_0tyLkjke`YX$x}I-kR22+Q)oZ;UZg_0JadrSi-#%0!jkNZu zMqNezp*gOyd${l`fTun{7@jjnmRbJb)Nf&LqoKIR`uHIT8jmz`_ykla2eJCh)j%={ z=~~KkpOX^;E+-^Mf}Hr|65jvyjY!k`=lS?Vu8{r22RAiBa5o{_LRPCb=e$WK8U$*O zQyGm4zq18oh}=$87fls($Mcb$o=x(iUnHEnq zqFnEGcNns*UypmdbV-v5R~3oiUQ-gFlg}L&)zc0S_yOHUlVMN|ev1oWYJ6OiSA?6O zt>%=}?N^sUPhGRcn$)?WT~`?D2jx20&;9zP47#Mniv^)m3I1n~{O(|1 zYuRRX&hTJ#Vv=7_&}cjd5@5jR2doUyF$OG{xbc!rESGSA! zRdGcvFR1wGv*G5CUrwGDcYa)??7saB5$Iib^aIQP-V$=rp~`nED?2XgiC(oZyWaP9 zp;Zh6fS6o4v|i5=NseGR`@-rs4mx&-%2%4Fe02(Is!0QXoqfZ#SY?hn+b)SaTN=#- zFqlgUJh@MaQQNnmk)EO`JoqAzRgRWu^SAbz2>E8{n$LpiuJ5jSd!lhd3&xR4;*J^D zhR}Y!C)WIL)e-U6E}n?kOnTpL^XhOnwFO$NKC}36+_lhHQ&nD3rgv+Abxv=m{gcKL zD1UP(_pQcu?+y&Rnx?P^vxfNW9YAOZ7BUK*R>9EOcMu|#2V4#7W#N1{nw(Es_XMoU z)cjerEaHEN6~4xN_#wP_`Ka`S`yA#x8WUw0?$2XGRJ(zf_L(${2HKDD{06ldHx3wK z#_)Q}1QXOzrO*k6!tYq1srw1C08{0B91LEfd)|vpX=8BXnA9b0MMDqxm=21VF8_Fi zem456B5lEF+rQ&S6%p|gzpM-r&MU!Wb z2cC{n<@;S5_|moRl`@PN%7&n#P!Wy@bkt$UX*yd z?t4IQ0X;XfPh>Yo&2v9z>OG4d-uxaUA$uOfSOE(q2@dS7gX+VPOf^%;-NaBrU=fsx zV1ZAtQKAJ=C-$p(O zT&l_elOqw z)i%sVQP`O-A;f+iwFj;OvL7SoD8zCV5TE2YtYdKV&c8wCqbCn3C;lC-j{g(n2N;(B z+rIY|MvsrWhy4=gg7yyGkr(24Jk49mtr$R_Z0$`$#*3u{!L@}S-rMiloWji#gxByW3a>yigjRmL^DJ!VhuD_h@x#+Y#geAt zXeI6(pFKkjML#t;vDLgDfffWkP9V;M?H13ijUCgKqq#D=?yk!nd{c+*T8AB`rV-JpuK+fVA@C0R8^LX)wo6_}S8haJh}D4l8izi;mGbH1$g^l>GrdbS z8Kt`Ol~S_NiFWwO#S6&c z4hQgANeSSy{&B<{!{X;9Fx{y68#Em%O8lVfrm#y}iJATlqF6WK=i3>b@sHAiu~fA| zxSvuJR~{&(9LWNbqcs zHYV7F15kOPKEVXH;J>W`Bd%GK!f$*%29Jnh0W)QVSd5lv2O+K`UudZlA&LJfia9dD z%!bDiDU&OmNOExPxQg}40pF7s>x8b6lAc#+&(Y3y6B)i2i6#K;cJPeSmzwm)7SLg($aiF=M*lsL#c0p9C7^bQBTz4j5Omx}z6*DLn4nB9 zw7VEY)hbm1@`Q_jKZl@|@WD7}({B)MIc8h*iY4`ovR{-vcE3l%evl{5h*xk&4di>_ zDs2V`{k~BEF@&JT%4_G~k=)mY6K@((otpqM#xIGqK^n&oA$K2*wj%+;tbH%LpO%mN zq=pmA-4}V~fjuY2q_j>k7<8HyW~~l?gY9S$ft!zeAm~Cqth&X`06~B)G{2(~?kjcE zC`Clt)5f$xPJbnDD+-~A$Kb#VA)`sfFnKe1#I9ZirASjRNd3Ys4eRB!nGczMH%2`a z%QoIP^l`shbk9cCOp+*6xRZ8H!Eee%={jO$rY^XIejqt;kSHhD#1sp&(fQ3wE|!O0 zUgIzP+6CkikL`tg$|Cv|$mY|*FWyc>MU0Y6d-p5-LuKN=Kd-xQdjIsM`zKx3K_iVo zgRdJ(4s|6rZVV;mzn}P1I~UB!{sK%G{eR6F|73-riTfUWAeoXw3v!K-#l1s$&DBM+ z@RqnNS}_9Mi-AWE{#r3ljH!)Dv?DK7gftAk zyj2cgTEmrC%z559c?Nsv`lYd?z-xrrG^x&X_IqOM^y7AoAzz4#<#_?V97TuMXsqHd zC1)e{$HyrP?tm2>^o5!ma}rKz&`ym#>9fmWD#Ph}VBSDi9ay+&;JDM7(U~(XNZUI< zb;-jFHzed3rnohH35hs?<{)wl@0U8qX9~Fo8qgmYxg`SE;w&dYXe4!2xi-1^>32Q2 zAU~V(Hm^o-hx%OH{Pjjd#y5p36Q55Glu+Qv91BaJ%qMwN8F^AR581vk4-Id85chSf zUwXAUoY#4(XZE0770|@FG`?){GCh4zb1C!xG?r3ennLh4M~=#n9((3*KymXxVqiXb z^6)(D4Q6;M&Vy7jX3RAYsq;A0+q8)J-%#ZJhhpzvp4|}QI8FpTwzy~^;^}RnoQWT< z)J9|ZX6X>FLJi+~+cA05aAbzVu)l<;&&TAo_UZea33q^!2o z!qfS&;yd@+#dl%KqpQwafLMbE5`o$50TWq=Px+K7nT+;MJ065>vxl@A_1keQd#Lpe zhcn%6X+>}kKihL}Fc{c-fOsMhtJ-176!AZV9XArXIK|7$?!?gBeuxE;*9hd;n0cdG zj8mZ2<-=cds|C0pxwZGAbUy)K_zA=)N+yFChb|7lDVS>h6P8edIm)iJn;`XNzl>Fn zL#8apUNB$u;yyf-_S?5Rlg3}~TA(5bd!r8}^MIK{E8j<#S4ks{H}q~bedu`d+{+&M z(Y_&&{v))9h`df*&rtEFav=G<(o+(vI2KuCg&ac8xCs*|3Db!|~jD@|~tU<@p`Icm7K*{$97zaKZL7p1ZfqZAf6s+c* zS=6LvScjb|G`HUi7dc&hd(GU~+Hf9q| zJ$6cco&)!u7N__DWWXI-nRLGY7({-p0Gwek>;{g^-M-^()k(Cu_FaA-roVII+CYx_ zYF6g>tQI3Lh)wCkzf&jSOZMcRk+ChHn}G!Yrl` zmjx8yXe{Wy^w9;43f^2$G7aFo-G;(fvP{}^$6ElpR^g0R1HQtdT;PX$8N!RyO^4%YYNHPhy)`-qud0^IPiBKN^96 zMeq$cImjClb5!g8`gXo>ip#`__SNv9jqN%YqxG^c@}tpyP~z8eBnKX&i_K{d=1(E! zTno9yzWhx8nt}fP6Zr8MCFkMwBGTCG-hC0|mI#pEeEx#&)>g@1zuVPnl=f+FM{j(U z>r2(%(w$?AH6%NTIR3c?MvnhH+MdBM;P==eAT_B}=aE}UXKMri>AiYH6e`H1ZE!X& z#gbzDi#J3|NA-fVBh31*8+6C{T@qSA;hnnWNg-?)tT+G? zU#%vBkH$A(gO&Vb^e2L!Sop$EcCm1lqAGN`8ZJLu!V&0Rz`Ap>&Mx_t?0V{{TMKkg zWF30AN=1q?B_!hkjU?Zy89*eR2Pi6^1E5xaG;~5MdQr&oBvdns2_3T1_iX6>q=u`l zSs*=e-m1zWrq*dQ=})cngAll>6Le6o3q%-R4*cpBy!FXkoKqQpS^Ub z;i%^m;~oWa=XUt85*L9EuUANbV%rPz@GmD`*G=>P^jWgux750-?s@0Mvl9N=DD-&O zT*VZ^JcBim>Z@xn8(~1%o;(%{-L}9kwpH~zn5On-T0FR$Ww9-@)seaXj({PXck-zb8hlxEuWY66^R_Qs zHY^OxkkG9@DiyWsrL|#q*`7RO=)5}6_xDuL|1}Zkzxcg>+9KpX)iC7WVIcHRnfL$I z|C9ah`;UPZ@@=y~xA5PffeQCsJQlz#is6Jpf&a1bOau;K)&CQ>__MbEeg5!YwXGPk z061s@Hagg)86Mn~NgKUnR%7lLd+~c6tUG#ceV6pEklA!7$zfMpdcnO5(92WQP75dc z=V=o)HM%(A@ilQvRJ;Kf&_BVEvn+mwIHb9W=xvqU*0UGxYZ10O*b5!ANwv%!-#{)V z7<$07u4<7g(&#tHU~2h&o2|{I`*)+g?{SUcYT|_bL7>-|mbg%(My3d9T@xdJlaAr1 zRCwdz1~V+@Gn6W2PzJwe?6gT!bbAZk#V$nnDv8UFH%ylSj<^x6?bV{@=%Wev20cGo zQ|#6A&B;bLw9M!Ch4D%Gaq}=5)SqmK-?-t(wi-0TpRuAA$;)Qv%A|bAixtXV;mt<# zWpZ*=nmBDX%63Y=lC32L%!j(6@$3()b50t`1!? zSy$Ke{mOgKdV^1Hpwr5Mq6Hq^X2=nxt(~^w_NzBN`>b;bJklVqiDxV5Vb*m#6?(j3 z^M(z!rZtgCyltFL{1lBQ?LO$j9DZ8&SEW)VF67KQZyJ0l#C5b*Ju6OerVQeK6D$vI_Bq^*Qh=egmd!VA8&m!UEhmW6Am=Uwo*ECz!J47x9jYCiW=>TI#+Lvbq|w-s z^mH`S#G%$R4U&{=+UAIwj$oo9$x%Ya$9~Rs6gqD%g%GpeCw+#Vs81`?`tzN)?Z1}ngH+=G(dw3jbpHQro5OO^fvSPk+y6RFuZ?2D5^KUW5w z#s9)dXY3%eqzXcaZ^VlznGfZTi_0rYixw4fI zE&1kZ>njC62$ja7udy2DQT)xMnYl|n>=MYUEr;kY4noO3513)rGcoB8yIu6pj1M*O z6KC`27-0#RuFpHKkYpVajW!}F=6wD_9No`rp$ofu0FyEKS-lNTH2}!~do{$uJaVl6 z{^e(_EXBt|VuYAi+B}h7S^Bv|U#|Y1rQw1>U8H4hm#>@Ko-w61(^9M-&x7ld>dvSDOw3g&zvI;MxnZ zD&Ot;rRitlr;4FCpuD``sF-Z4I-epzs@rhbcAWrn3?JCxr0Qfj>hhZ0LU8u1>MPBj zwikC!Ked2cjfVXU3jh!1R7SR!U-<+-o_r(W)a92Yt8i@={Rlqqx_c85-Y-AFPVC&y z0C2^hu^vHZOuO8)x8MNWOU2Oq;J)Ir} z&ghma>6b|Mld<3YQdr*s)KX~kXXwJr#XQT273t&~D#q$;v|wvGpZ8Og3OXJG7Hn&C zCBtHSgHMib_>mKHxx2PVwD=TP%h~X4X{`r3{z2yA8x3#5;tA${EeCX5ylM$Dv|q_6 zI05E79YO{Vh&t;h$oygIMkx;jZl#-Yo}T7n^k+vJ5|8|dSpry_rnz2@UxW6-cg@Tz zO}2ilfjz?}3p^Ji@xtodITxxQnW8-I)(fdTNb4htef}KeeJe9|WkM~L?VX(F8;R0y zowhtd)JqHoqq2Za!j-XSH-EqF z6TA5+dOmy0U@G>mZ>OyD{P*PmMYlz}ZSibx+TM8#)%yxvf7b+A`J!#G{}Loah4}py zct*hqnm*XQ@Ea6ic`f<$)S<%S{?dv`j>65OA7^>NrO`kGlwq%?8S%vZYC zEv6dbMFha)`q%l(j~DKp-ae4!9_}+Ra(iRP!Y1^2URGsaZ5$=+o%MYJ9r|(V*2@=V zdp*HBr_PXO|I^n;=qv;+X=tJFyZgd{n?#x8s8P+Tl@yNOV z8?^50p8dx9jxcJ!NBPbal_D!WC8E$9+(BZ)$7Ob)@zzVa7p-g~zVU_XJj;$stO#4u zxEkN>_v1jr;s0dS^wA+0j|EnmVkbhu@}nW>V*4VgrfN@$EPkFl1%?F^Q|oRn`tb~G zlE{98%8YIQ#;AaE7s#%b*nsjI%jpn16(QJ=xI*%P{tFx3$;$qvt;%NNJ;oTk!>Cae^ZAm3Sa%l ze?W8Z@2rvUcOAJBKUI{EiYSmR)>SoBG+I%{EUmhi=y+>p+jM22lC+-{(o0t5PG<HemYszh&$51S+w~{ zE^F@2ZEp=)?{i1zLmi#3bic+Lwv=YQa6&8$_&oNNLfScmpHc}T0TW6uQ=DEKbmDe9 zk7nt{2`bPRn|Btv7gH zv=tMVIE_zUGyqdboJh=WNYqCUfLJ@&1AcjiQbBG8cqnNY=g2XxX>G(!( z&Q+#UfiK>AhFF#@9CF^m^htKZV7cx+O{}DZ?TEwqfhMi*PvtJb+mY7JCuH8aq{br5 z0TsGXi5EavWjD&TmjvID88d3uAtY*PR#LW@Qi< zMvpaFT_g_7*=GB>8i=V}_29iE8IN1tvH(yP783wvQ3Yo3d(wtSog)JE_#~TbW+nCX zFYS-r+E3EQCvgvhv6)~f788UJLiDq5_vA=VPJb0RK0fa0a%Q-rWN!5+z@<}j6y)*! zdL8Nt=?8?NUh<1p(b{NbZOepcYTqPF?Bc~A;-Re8;@@dmk0@|3F9IueIyVHtf_Kxd z(9PNUT=V`|(p)h4E^)u zOd%Awm!L)0ag~6{WSVYk+&@iWhmE|1&+TFcMpUDd)}{>8ihiqUy`njpL0_Iet` zv`4j_`Admo9dudptHy6o_WtPTArA1}$^r@iv%rRLt;kb7MUY?O7e51BdT+}%<2*QE zWLY3tYhrC8^fqI8X_>J8yOSV_<<8AMLgwm>63FiyK5=d~oZT+EUeAs$DZSq7+&zc> zm0#a#MSq5b839*Lk@Hwx(+JP~k$NAwu)5cATVod>8;kd}p4~)F-pT}MmOJqE-ssw2 zr&P@t#*O5nQu{5wSQ$k@7iDj@W)=~pFVh;|Ia&cVEwPrgI4*UL{Kb@j!MG75I$lPJ6M?`Z=wMYh1bHAo@Tsw9#cNs4SAD8oV{ zQDx52#yUgHnx7~X0E6=FTc1RNa2byngDZy48@603gyoZ%ar_^EQ0>w>iNAl=KO>8# zn@v&Ox6tN}t*jAOxb#UrZgCMEqI7G`)IS6D+1$iY&cFA43Zs-AeW_T2c$L74Qy7Wd zvINW1mST$B8eKF|SSVeaeaYO)d1b$7I1zF*{2ur@D}WlTQNbJ4F~gVEgrKa>5Mh>W z{2BFJFAaHXyRH-^uC8$X6057L=DD7zGY*3CkbCv6hvNMfoth-oi*}+sUo@uU<2{2n z%fJ1(OUD4a(8|<~*7ZYpC!u!>&L$A1NHHMMnoVRtapM&W56=fcAqobVD@yd^0GM;J zUV|q_(kKGz{^ZAtd0rD{`13nWKdrt|nTvI@o@N~h^5UIa$Dw;C0Ej6)MjD;rE*~PX z+^SwDC1zRP!(m<}#xtV(BMkh9jJeJjfwo#z3(ltmVA54f1z|B0#W118M+4&53`Y7s z`cf8Ndt{HYlB;ujc>Cn7B0;6vuZfg`p9#8J!d%idS1&W<) z#l`3^8Cp9puiFpF3Q#NAe7p&g-9?##(8Z(<)MfV~@d`SLqn5`Ryi+56^$|P_6Oz{{ zZ4QODyPZhAFfM}Hd@rL4cC#aeQP0Zl_V~5epLIsOI4TPyq20^LCp1-4cX&VymMK7S zwrC!5%Hjb&J7K|(wgZS1{1g7T<2FDjSbkG$YRT&){LOpe!8PK3g~jwM{iNe{hJfcIoc!} zwvW=OCY*|wtp|Qkzlu!|U;e6aT`pf^|6 zZyzzRQL>`A4BpusZDjY>o|}T;j0oc8CHzl*@vjVzcN$%b6noAGCcheBPriztC|uMC z_mj<2#U3}Oi00P6ttVa9m>4a)&E;-{zx6bd1>|F^`7J#4oO4j}gexNsJ*P4&G;Y~w_ zIFgQfSef*7_uQ*5_RW=tT)~AxHYL2CXc5>1)f}Wvn*w_r!;>(;#;ZZU;ljZ3zJ{!R z%w*UlabH{?Xmu+ZR4+8wOS8@oA)hdtSU*M|MNi5cye$pjlv(M#O8ayn04fEwui-@W zr53Zsl&<4fb5EtHB8i5W#`1wJKr?@GkGV2lVNi4W@|~eF@goo;ED2T)BYfls=%y9` z^?=A9)Hp@Vt9TtFFgH?M`Rnm+cF@qb4TdGk4#I^;`A1S@I3OO2CMn|k7hre|1<|9t z^1=um8DFPj)gIZF?y3}PCF|@hl>=elF43{U1;zG{w_a9=NQ|t%6RWaIs$6m{UT1r# zaV8(u-~laxg>1d7fYN){Zd^TPzCo8$$nEJfIblFm%mh+;6MJ~ds0?{K!hYO_8Xs)K zjF)|Ik`ePNONttwZbQ{$npIXWRolvPaz+w0vEUeYbhstsH{A31q|iep`?2NE@ejs% zoIRcb?Nb?=nC37Qt07wz&Y@6je7W+By|K8&eYx;rE^7~^wn#`GQ!AnsaF<`p5bU!k zV@hwU`1Ld+be;+b3|C&*uxEyk$f zWpQbXix5tHBvE9$xM%J85ZkCL!o>gy66kv-_=jjXpiovoVjq}^GE_`xJFQZN zJ^bdS9{KWqXs40-)yveK+aGZ-KT;3s@@^v^p7P~k?!JeUgpGp-|GATDvIL!5AmMEC zxCM&E^Klcdtjhp&DtUdWKF?~7^P02R z{?ad(>v7NCp7UQqSHky@O{0n^Lt zoG2OG@}@OKrRVk%Il&O~gSz9BC)*KFFLd|w9C1bn5rEY9jh(u)guKH-zmxdSU2uf? z;&Dh?5Hk8mY@`9N(_1M2PyBiAlTr3L<$H&`F$!VI z{oAT{CS(`_0|VPB&8zXUXL>M!cprFvPZ-{IXu+0{x_E_b16p}I?m*ocq{YKLzZ zon+(9|{@vVa49KQY3L;9fkx_T7n)0=r9vOcoD zhva$x%*XXV(`!R0Y6!EW2ss1tkNeyI^abeP{{M105Up5;ufR4pZ@@WbnbyV3&Fa*I zoxLuX%5h436M3t`dObjqCUQ~QIXr{1??4-Q*|ByzsWHv7u4q*v-y{I=F@D1P%~tC4pTHf;U?-TuM;MR7k8bac8W8q2LTqQvf}SdV)iLf6nXUEimk z?_S&=FBBq6y*A#oxPmq;Z73ZovThtK=Bcer_tFr)#l@xPVav;~L-z{F@`upikpHW7CR?Ap_I9(t^)KCB6xM0(Ct* z;(~8-AzNp-utlkk+mIl zl-%(0ZFSu6ho_a*v0Bvn33m$_1LQ^p(0&NK;(T4l;3150B4SbA!f8Z1{p+2p?=LG; zE2_wHt&xub{X3gsR~otTE2_@(7QQ2-R~PokucVPD+%uqbrE+Cqn*GD#S+?ZC&tg%{ zUV660-l+rf;XlAv_=>rxH`tyU-}H|Ks8U5tOEcW~O>A5n3!u%yXY}VLzKQie`qj~Y zZ1ZJshY%x%8TOz6wdQaT??1W&+qYGdaVUhlIG*>0cTpzp8= z)%Mvd^}qy4jR_Z)ntW~OAa%CQ^MQAo&78J=fY#NmWAO>53PK2~=K(^CUFZR8VLQ~O z_e7YS)KmrS79`)8ow&W25~V~Bv-h0gDF0St@!-acp?5;fd46S?6W5Nodbf`w8e6zC z74MdX(a1wOs#Z@^|8nMJI`{Sj1O?P8mw}rwOEs^YpY`g#1kHg8C{)A;!(;Rs?~^9;6RUC zY{=cJvGI3!D(NOA*^X!}66zV64FFI4>2c~!YgbF7%BDfWrC;~r74*L2p_d0!lNCmi5qY4}+jKXaBbBZig ztQ=ToQy0`CrBBP~saBQQuNa3;AqG$sPA`?ZMI){3dL%hnE(<`mv(;R7KN@Fwn{^;A z;mYY;=5?({4Ii<2geqH5ow`d9bkQ8wB+-hkr@`*IGE6oY;OGjnh`8BJ(%;mBr~Pje5^q7jRBeNDcu%!*=Y z6!w;%f5t3{ul4!N9;Nw>+S)-&JVfN9d;9$8$TtN_-2VJ*Ym7VIt9gIwhwn8XoG=~x z)?KotPk6I$>5OR{nuckW;nFv)Sb%r`Ri&-OCny$w>=QFgevXW`TT_i)*y&th`5D zx*%WT6=Tr_2!}Vc*;SP#;;>NU#*T`8@{W6WwfQ+?7q)hF7iBIrrXOqE3RvUF;_Omv zPn}JH`MnHy2VCy{&ky7-G+N}mv}dCx03CG|j0>CBdC>o3Vf&iJ1zNf#eIp?2 zkE>_MaY#1p$wfag9dqQ7sk%I+9g}3xOBGnxG|IzZxv@DW)ou0{hxkXis1fk(&#LG7+>3 z99YCHO{S)IBJH0jysW!o3-FN+&}A4~=WqlC0HrQ|@s4U`M||)iesU2=baUaY9j{_g*e z(RyzGiwqtky{N`*G$2PhG94KN43C;;Nj2!bD`c+!co+X0{*C|X&)mELFj`ouYvQdU z#Z0C~Ofy*kzBc4!((zIztXzL7;SiN@*$UY75N9g$+wVY}`A|-tl z96~iQ6KQjeJ}SxbZ6Hz|Vdc9`PIpS_yr+R#kFKG9_eAD<80oZFViU09ef-je?n{x| zE++#SaLNA4JwQioE0KHGK4srn=^5kBtg8bz8A6JjTh0`xQ23LPe&|${2w}-|JJuvJ zc?e)A}PT2OMGo3cf%*iYi2(<&z_2L;U72Wy^UG znaZUzWhzpS1NuyH5_Rai7*|vcJ$q<`8E3^T5%d8OF5}5>5EV&d_y}1^)vW!QVl#ij zuD)`^=yTXR#GAVuqxJ~BLn5yiR$Hd2T5ekk z`MdTU^^AKnCw547uI{u4vM#?TnJ50?UB!iC=n00n;5Og~%%{Efk&zL<{DVe({)(4P z)ARNxryx)h21^wq)e_afy-Hs1u!3IYo_%b}O+( zRTCV!69SZm8~H7eR&?KJ=!V>EB*DSA#G>0JPp9Js?b9U4PJ1xtdo|ro%NPtft#87GUIgz@vCNvz=kH8;Fh${*u$GoHLO^f2b3w&MNvsmb_ zE$@16799njoLLh11TdL|P%5b8Ii;BFSTC3tSInVlH_y@@mPG%Y-5U`v)j3Tzx5IUNxF<2@UMMyx z%+h@H4kE?%VZX8tYqTOV=tl@ z=9_K@w;C<%iFc$kU)J9--l>|eY#6+YQx93t^qaoEZTnQBAC;_j`e_xnzH8~UAR1h& zh<7-+LHDKuB~prG1WXl0=QNMdbi)Mc1sL_TaMi0HFZD5AE)4RLV})qdtylhqiJPob z+({`?uYm{z5X=;Aj0szo!-VUQfYl|>c7f0ejMmS3XiLTvA1Ub%TI?~u@b2}2F%evY z&UA;-+mo(Uo*BivrqGC2Ut-hR`so9-Xm-9Bn-y-kI9g@c);b4&h&Ua@s9FO|kKH@} z)Q9KRqi(F`%n~w++K3+THdaq-S-Cv2FHg@ZU+Br{Lf{~l6&h@hP z40!uKZ)L31D)oe-67*&u%|2XBM9Ih}xe)?^BbrMOJ2zB}3?zUsk9rM`n_SRn8T?$g znJVn_&FzCVWUnpGh15g@bBS9npGo;40NlkFjtt9?ssUIw=G~>uJF0DEB?|%qBopzo zTs)X+`TwErJ)@cow{_7_6h$xs(o~2Fh;#+%B_K*w0TqxMk!}Q(j)Vk7ic|psr3D3~ z6Qv3Q0qMQ>9(qqGA&}zzTzl=Z?mgp-eb3#0?hi^JVUVx9@0{~#P^p!lkt(Z?J=Y}+ z>KaFbokTyphGcpg-Syz3GwQx&LQrjPEHt{4Jv|j{Z2S*3tGjOAX&`24d2~yNtgefT zh=>7^q!%uTvz8Fof#C|oHRt1P=u&uk4yl){&~`#<0!$1?x0G1=$}~K>r9{u3#4VQK zKcMtxN^?~S`>2(3p^lE^aqU;B>4@>y!G;d){%^D|s>kUw<UQ)(+k;k5MW!L+j2|0@(dS2kfZU9=-7ZY}aDjx9s#2pDMMWt39LFYr z_TWO6BG-ft|Bf6=J?LnZrSzV-4|>^0Q@mN<38`IU1XI)}B*CETFN(OSb6b>DV>uEQ z7e63&_gL!>Xt?|7UaFMOA>SF{CL+vlzgY;;qbB8>azG?lhlAoc_Dba-kF-pN# z#bS1t8{R>>^QI9Q61%aV&^S)f#s@5OZ_ViQ4`d2&L}81(Dw2akGEz2OpZ$bANwY-0 z8}(A!&quHuEWdgXrGt0{U$g+0rJ}Ao|1fv~99=*FW9~X#8DMs+`S++Y{3S=J`}Iar znBchc@D0%S=gdm`X0OI;sTcQ%yBJ2_m1e;C!HAld`O9MJ4&qmena5IB;9^$9lnLX?vJwFdyfP4m_%5|8eY3WaLc^u!>?)HkWxkGH zpXTvsXoH*|e9(2gKWM7`1z^a9{J$+t|0kZ$`S1^jrUH;?fhned$$Iu55XE;2H7ZQg z3Y1n67})tInaR@a-D`jWP!Af3G9K7)0dwKE}3Gt?0e<^I17U0aI5#HxPRTeA`F>kmbC%i+p?{XZIRV&WRrGCLW`Y}T=49_NYR zq%efuXyQQqg#i#tSjXwTi{I2S-;jzFNI9u;{$u&uI~60ha06pKN0W&R2!m*EwUauF z=fEm&=|(8x1pAmMBxE(28d>L<0>9*Oc1=Z(-PmUR5+t`_72YSdE1(U~!GW2iN&3)a zZ;rOuGkyQut_qJEs~s9+tufe(j+ZzlDk-|;dk7feAjQ|c+2#lN$yj?<WU0PIsmoIhu1OKdy8+1!|2)XLMLN^J#%!1ITe6KCL;Y z0S!K-{YgbxwTwOC(y6X%d^3%{8l+JXvJ$PwygRxSzOWO<^{vZ{O(OVR=HWM^j&4yv z-J{v@<*3qKoq_d`>Ai|-HVLCqhhHw|PP5Ow-Pcij7JNIWRHWh27y#HUqQ)AVO_HoR zM^yL(-uT)25St$QIXN0XeC#u^>cc-7S`?52i=Po8=@V?{e8Np(_zBA?iwQ-4tA`yj zS2x$*e>ge*;bq6%m(?@OQz^uF(jLy=0Z&U{E}ClWkg{=DUR->-weI&%F%;EYiRW^y8RbH19dYb27r3u0b~OKY>(1%=ce$OfFKD6%jSxdy(QmP2agTU{@N~G2=KxeEm+G z*wYwAD2W~Y;xx;|pjcnrXiP1liIYOc)|6DUquw1dJdNw&wMJAze$5I52ByXSfk?eMPT%jTR}Y<%2_$8DXe3<6Xv7+(`$8OIN>I4}>bSAJqA>{&|z>g)Qo$2wi8 z8)mWU4>CoWMgrCDQ6sR6UpJt{Qz+{7kX+i>#2q4v@l z@}0f3`GGD@k&&V2hPqrGi*NEE5Zpmg&Y!3w>R&I);TwT_vFU)nnYpWmscsC3?TV=r zq*@^3qh>!V{S{w4bZ8nGjV()zkL^$R7;GCY-*VK-NGwij!j@Gw?}T|v`?~2@hO|qz zCR}xXsW;eGeB+UaN+g3L38Y%84!P?*TQ0j@MNpHXb9&7v?;XD$(jxZ5-Mz0c#r)-$ zQ!VwDEc=)r1vvD0&fLj@c|Z$HGC;g?Bf8u*%nr^xXU%ps!OyNm-90P6*qu(1w?%Cv zgT4+*EkiA)tOD~1o|r!%-4)Dtu|?D*f6w;e{75WcD*J;4)2Q&4>@bKUI+cZ>eqpxc zLO$i{!;@#8%L|fIFVhk>(A;IZHFxo?O%r!ZlG0t&9Nw$9J4PXzjke9yIhU{{F4@EA z-%oJAg6BL??%r+L57ew*`kM}(r#sRhOFO006SSix5%LQuJea!4=Pm8_;+Z{>A@&!| z{BCZm`~i8_k$~V6KhjMAHY=~B9?AnGA51h&mey-6hIb2FCge0m_-)^+5+dtmC+?Iz zX-{hs&FL}ZH%dylBzNO<`ydpKjkq8ix0Xbl@S+d_xA5fc(^ebT2=NukEX!g;O&DK&%k)u%Z)W8lzm`k(`-y?v3DDxo&x5>uRjK(upvXEVQW@oX z|F?lHFJ=GS5fP(=Z$CrC^FTG98Rs>aihXf)8dky!&@B<1h5rW0&i_eRq~$H-{e-L+ z?vpZ6tyQ4XN?rv9NDluge00hjlzDB2W8jQzMH*NAR#Z=yzH)^^=S^{B8QoDiS;#kk zuyOW$2*AfxIOx;$&!Npww>Vbw;^+LKv;ZXEFsx^WwH-wg8! zp*tt_zV0fXSVchLq$VUrZk~$5aCasz>)PuK+lB>?vx{-{(9eM~cD09~KW?+lqu6!F zW^Y5U1M37YJq2($qw#dXr#iU`=Z&T zO`P1*mp|v;gj3Z@Pq$l%B;nU|vN0Ewn?}UXN z=5_w_BN}AYeSb~NOw`b}F84GK!%iRmKGFt8vR!~|uHgJ+?<+cnPwUbX8KuXkn!xl% z9e_aI3*X_#7X?DT0mx%26ds@(HJ_xBQvjzq(;(8Z!8mMwG;9%HQ8}+GToRms+(XC*DHEn2Y+|1c~lwEQ-nMcS^Lf!gyT5cg z3uTv9#TJ%dH_LEXwU#?!pxWgYG^>ySE~LQ85YidbiZ~dPfm)C+-Sx<=SFWw!ZLUb4 z^$%5`tzm$Y)Dg=DuVW@P=Qt(qv7PAA6bemhzt(JWliCvNtA zQFK(Mx!Ky@RE)T;EeqyV;=-Z-G(O;zWpu0TEUg;_s?l}CDWk%Waux9p^y#}Dos|-j z&CK+U;zX7>ee4+axK==C)zyLts4WP7r-5Ir3XiOoN+kAYnps_WbZ*bcj_K9QSLF@D zoTI);bF;s9c(_t5@mr}S?d|EDH}uB0?cQa|`5N-BMi#YTa1z#e>B8&N4kguBTDpR-iTGZQT!AyBBVFH?qn`Z%acyvP z7m=iw9a0P`f~6$-tIkygl$_WH|72IPV^ewDI7u58WgrFk8YuYNH$~tD?Rp%JG_NpD@$e zzdQ%fdq+2|aGe(=VAgoAsb9iFabg8`Re919%j3JaOO#!p~1;k&K4(0VC8pk6=Y z8_{rc+S>W1{)+kZ__jP(&7$}lwU?$jpVS^I&7~Sg7)+3H!wG@kf5zOe`poS(ajo)| zoHp+jA)URAT|=e;Z24h?xF2U7 z_ca<u7HYs>7Szm4;y@XyvU;4vwl2J{FkY6jwgY>|$o{`p0}>V%1p6(QpLVJ~^k z_P0*$MaIK&>rUa3@pDpV#(W>$eJD|AJ6Tn?AN^n@7CYfBnBJH%#|)x$5U`Xq7BunZ z1Zs7x;NUG*i~`SkUPQjH>t6Bb6KeiSSpZIMva&mDMF9w9JGdRFB24@Q6j4`jZig-? zfK-mhj>XV4W`I#fG2pMkGQ0yTi>hrNZGf?Bz&X0bgeut1mtL~E{Q$HQC}Vwv%rypk zvNj_|b}6eMFPBa(3tW3yvck)t(3SzNT*ZW9U#yH?jrsj>jnzFY$WTVM>Lm!`AmC@v z%6nKGN|dXut;5SzN^I_0O0WBC>NU6V-*W`TIWcVh6)XQg$VX<7A8wTREjaBrfChZM zQuGHjLHt`{*q52S)KxAnVBn0>Kc|ux@=~VtY6;2~uKY_1v~hDrL-6cG!MPHVhuX2+ z3cwN~O)aHx5(5mh2?vF~`ekz5ch0J`?c(FN+iL>OnWiEvNdtggYiUC|PjtXxY>WM{ z7Sc^_^2Xg?GaA(oRfk4bfxhiT3hzHwzf8FD3u)lKI33i+lK~#Uvi#-#W&w=dRLHReeAfbpq9moc(HcX6kgRa(bNoOF z$wSYCkS`eL=_;mEisaNg zagpW^rJVsLJPV$vMlEFD?A|yq^0ETL9WQ2phbAbM)bG&6A`CT10uU&Oi1i`aD6XVY zTuj)+wIG}>U)f|;nj_?y4$bId`40#ge58zkw%}`tw00dQ$9tr)U&FHmVT)Ux6WIa`H6K2mH z))o7##-}oNzeQ#)u+S;kKRsoya=@bk9^83XCx``dDp6qH2xDc8l*IX-v6iEAA06xG zT*ZDxE8?*7g$Q8+iWo`oR2sps{GPGAGx6^Gr!9%OhY!X=Krig(AICjeiV-R}U{J>& zpo3LJ?B~;@UR4gdaLq^~_g>s{RiIr{L3^n4NpO-n=lHan@Ca>udGQx7f8KbLA6*_Q zLGCHDyFRiHF zgr#Cj8%bCiHzX8VW?@La=^sUVmE@q-Eh+E1P^{F)wBCH)bcJc_1x*+TUP*Dda_-}A-)eWy-FPwa_G353p4)xFD72aE04KUD!|u@DS9Lh0>ivj znNnz2(R})Ehu!Asl+VvhRXGnurQ;_>={7xJU(qEaq0X5I?taHgb)lgN|MoEyM_FMw z>Qq0|ymb+QK?8tb-pWp&JYs0}vzPW5f6E@JIvi-TeVt(!xD&ld?RgYMg3GrklzZ=d zw~2koGPk0RwPv~FJ?cPK6p7rK`u6t17ItEgrc(^yjxag&u_Aoe9BZhix-Rn^7{Z)L zEPCmq@9&b;vof5uJBIG+V`{*jz%CY%G-%tlUUpx~ zJp1|5d?*v2HXmyMYbLE7&0QXFR@KSE+^p=!d-j24TfPQHJNhrod1}!e+6YrAvVxO$ zGC-`4VKtk*>}!*(Uy}1{^3I<2L1TWLsu)oz5PEWZOLpfJyu{o}@YuTO+x-yUo1N#w zd@o09Y@bp&VB$P(1R~C#{3K=1B?j08!CxyE)h>XTl6f3NzAId;zf7DeRA->Cv!qsd zUR0LicH+PChW>uugg#3Ximk$aqILGgr``wW6mLhSh0J_e0~fKu64t3C2t-4ef$?bLVDdC<-JY0s}76 zeVXLZ&ed^pcV>HulIwG5tRNJ2=|jds{sh_7?nPmrt7aQ}BEZT7q zPr9=QUxcrZQ`*exg&lNEOzPWMG6lZ9e03nTnMSC5Ts;&&j|wkNURShr7+Cq3=;<`- zVUeh6`cz2f@KxE%$6zoihSJUjHGG8`)sM*dSlr0%B zhVMc{CsL24qA9BRlk9@!Lq&Ex3T^Dg+I^8hn@$Yd;7Oo-v1S9xmqYY1i}3>F#|kBp z4q(wvqL&jk0qxrVhLF~eb(K0g^=Y~=wL_oI7&I!q99Y~*33y{VBs*^7gmhf9V# zl$BS$;W;o;{w1eYXV?w$HHd;b#)Mll_Mw}Sms2i2X^c_bS~9zLIhB6B=1qE`G?Ry= zaO3QSbrcu@`$j6b7du?I^yt>}F^=0hdsp2IH(FKCf(^xpp{-no>uP64Hzm*CGQHq6 z^x{-oq!#6&l%wO_!+V1L@R;l*@?a!Ft z*kq8qzKb1i3Vu;2AlORJ5o~O9m6oQ1xVsX_#}#R#ub@9Fb6-)8UMsKd$Q&+G*XR+^drH1;m)Y%XYY!^^8S#sXQg?c z*bbW+yo9eoNiJ^TuzlvfKcJm%K)@Bgxo|FP^cB7y4eV_0Q8yr=>uY35=6BL)RxaD;v)Y6ex@WMeg z1y2nvR~&szFp1Q?(Rrf0jdxC3=F4UOP~Kx^Dj1lIfvE*Z#pl&R^CZXlGvCTSHDU6m zn#MkyK525orejIRQ6TOVXyGN)*A&OyoTe3EibI9(6!Vm-6?kW=#tPYKkghY}GZ^M~ z8l`wBhQoFP6cOi@TcZ4aOD-RpvgWuw(hclEKN{-i7BHkJvMNFwh+SiKv#5N#^MHj* zKSkuiE^d}uC@2&{t*SAC#&o}XTa(OHDfSE z0Zc{CT;{ZxfG_L1%jVC|>AFcDXXP7XHPs>Uv!FT!K%~Y}qv?|U+*C?FM9TiO2SaM% zi5?Zw-l5*XO9h+M$hnxlmEhkZrbxRnPWxznF#E4u3Wm5~S(?jQ!^%Wt!F&2kkf4?6EBT&=q@Hg%3cPyn$QM z=8PNlLOHoo9g>8qh@m97Kxy^gQLE(eX=;3#1)AL#3LtPV(gc1E-x`y+>U{bXzb{7M zVH5q!GaiAyg^)>YnqGM*_03aY4>{yqMSpLdAz%g{0CxEKprcS+Oaqh~ArSf?rD<>c zUUiDzcWRurJBx+A*Fok&~R9$-oJ!XOL@Z1~<#yUOrQ^}~zBRYC-T&cCU5ID`R{e%~X6~_JMaFUf>^Q@pI4a2~WVOc){0!{)PYo)%FL(E2~_q zN#pc+FKNCk_daW2>BG`s=f!lEbiK_J@#J4RYk@K-V2x$`?8mSU5FyD+0aV!h5 zQX1`w-*R1LZVP11W;x=78tcQv`&5HJaV_k$_A$k*Lo55!RkmvsVWR#kihVtZARpOn1 z{y>8>9Fd5|OCttbCxB%>r5SZ_jM_hesX^Idjs%5izkQ&`ES_|Lu4_L61NK$E-6s_; z+mn)GKG9(-_{I0x<7ZicI&BJVm%NJ;{}Vs`uUP7T_j`sv1OJnhMCNva&vf}Qmey%L zfui_Y0KKLx?RKE#f0(xXyLrq1@Kg`6o(j=o{$!BHdZ~g}6u-`J8{u~Ahfd8(O zK=#qVq-P~-FWCw<>YtkZZhUcPU#7jy;7|*osd>Os`4vfN3p)`?`6yS*cK1Ty3K`+* zxN|eUPf$~^l6=3ac2#d~I;KobfHuYt>F=s`Yn(LLipcJumdr_;=N%a1bfJWXDRwa& zij#7e^q!zb(Jo%+R*LoRGI{y->Z3`!n}JpQKPA@TwfLC&qTBmIE8)v>M*Mfn?(pgW zjGQXtV;1xmOs1z4i<8=gO0wL0d%lv`5PM$0649^c3GG_#l%#=qPVJ*Wbd-S}uaVAz2ls_ULj$Kt`Mn9n#-YJrplbnfGevMuPncNS|S{pL;3w}Ovh z?dk;|%M#WF<4ROoCVid(&dB#P|4YJ-F=l78tW#I2{B+#R%$20r9uQ!#B~{_zYF$1a^QH`Z>i5aCv*frNhtf7#Zq}%G{w!e2(ZI~` z)?b~|IaFXhh}}`NdCA;=2t1hmj8kJLiTL}J95z3^_$21)gfvSOZO@&1RT>#m7dGH| zUs$7@%lvMk_m8t*m;{f1thY|Z!Oi1pDRRuwe8?^MY~CF1Re1q6U~Mu3aFZ4*!vU<5 zzj;G{lZXC`75(3&=D%buz-apKr++;BM=JBbQ&#@E`1}`BpObQ#i99*)zoc>LOxYv^ zUJe=yYRGfG&by!{Ow#!F{Ni?v3a-VY_v84DAH;2Rh<3TJRX6%Qf-_&_gMQAm@Ko#r z1~Bo-%MSl&?V^Bkv*X01oONQ6ecohtlhm-KBid*Ra$6At=KT05_h^+|LV;p5CmS8g zMT+(6_E8+IH9~~&Gj9{sM2uKM-*^TsLU(e>SfkeOoZC0LS(|iK&X%++xL1F895e+e zD)$^C>e!~ocIxo`@LZ;A(^UzmX=saTH5DF)Z;dM7VVdH}uoe35He=qIR<#qoIsBfUB-P*7Fj0C~D}V-Sap{)csX*>EEMme(tXgQoYOhf0`eG?ur|AUu={PjcPCf z7;pTi1o}oh&}=HqG@f7NfVYW*k{6e@pV{{O4uDpp7}{3Wk8^ULm_JW^@eYNu=N8p?hr8a-)(N5~PpTD;D9O@5nd^Glb7 ztn;83G!DUp>#at60p+l;bP@b^_FRA$qjYut&*Yw^( z?G}P$|2gR+?J_JWpwbixU6|IPU4tNqk1F#NjsSt$8(344=EEVwD7on>rWnOmfNiS{ zuxXhqoRsa99e@uKD&O|Ff8*s zWOV3IcH3SNRgqMlTRTj`?y6W0LN;wT^RRw2#?|*1#9!t9jp}*|(BDLyhg*CjT9JJC znN^IW%0>b}H$6w`MI$>&u`tJnSt6~;u5F}a&y|Ok-%TBD%#^-WWv>1>sxF!`sgDfY z+Jk(h0(|eIL2FVej`d!dxx=khFhsapgnwHbf5BOVZ3v-GmtN+SV6c=Z-wtQ>7|P&}wZl0%xwPR_uB=@QhD<5F~jgP0Qqo`po$^E@AxmqoPqDjp=^AnGH_hUm zoD$1^`zRI_KFCa&A9NlJN$a3eOkM7J7V`7C+z)75@vYu@9kb(j z9?9=}>tN6n@5e;zHqIhB&W(~Q#L?TG!UwiiTS8!_Q2#-g`-zC+id-7e5Ag?d8X+Zh zTv_!FI4tX^eIeOkFT5*5yklO;@o53p+1OFe zTQN?I*F_7j)j`%^d6DI4hZst|~|tYVTI z3AHDG+;b93u;~DX1+$Bh0CggYV? zJ=PXO81q($Jb`P*N6w}(7`0}8GHI9FVtg26>!IWUL0A8}4Q_cqj<$`G?HU-@M- zpw!wJKZuhGt79WodHJ84YB-9m<$axLyQG%!0U3i4B}L3NBr-UHk;cymZLIE|d=1Eszm#xE=N+t2CvS_Jb? zUpiZ`ZVW#N7kT#cf59bQ02d4X7n0zAx3K;H{Jea?)Pt}9h%7A8M;9uMr2u&2uS*;B zpDt~v|8!~lcVgrJyi40Z-P;(X5Oc*UP0}jG|sj2hD?pn6R77w1?%#!wVUCq;$4%h z2@@?>-?62wiSV9DXa2E}EBFfb1O^O?{lAUL<@w?6%SssgxN#e)&C}XUgx+tSXZJS^ zB^?qU;K31?Z^LC4?`xobFP9ddaN1Rh`w8CmP7oxTp4qZCgKXZO@!6?)#sW|{Swp+D z7hb(N?qhmOu_sVxTft$(wfSFuS>;xV7wlMW>~gtU=Jv2eqE0ZViXCYGzQBmfT z=Z+~1Y|fX9E3iCM&&JN(;UJBELfjzH%^3vm0R{8g+O^CA7ct^wjm%{gHuhZR&zCql zZ3Vo~icLBFJ@ItB#&1kOK@BIryd4=CVsl}>0~Q93*V3c@e>q32p1S9=!g&o0wKQ_) zDl01tBqs#ld!$|s2Ie%$;eZ?ae}0J8Bzni|l-_4_yXs!paT;=4>p5`j9|1BGwhs^{ z#>hxwTaJ-BBu$N%nukDTkPbZE!qpx+R}8khAU{l=NaVK6wF!#j#Ov^Otneyeeu}FJ zQdEaFLo#h~_oemS<+;d@^~+FYyl)?P}9*|DY8H7JVIY0l=X`7$_g1h3K*msDh zP9gJ8KE;ba>Mxu2c+i@Jy8+;i5ONhd`@Iv~F~@Lh$RtlTLcHu(v2|g3qr&Gw>=g5B zI;?z!p4MBv5!-@_dK}j^eVT!{1-?2v4sWPLvXjghk0Z6jid=n$m~S+SyXMC-9Nakz zohgLUBiq&J@kRSe->@F?VZ~3c#3{x;tC&(`Y&yerER%3>vMwJ40`brX{==C705+Hj z=H?m#`YbnFb`C#3rblL-1rCuR-F~Tz zzXMR-$A3#JCJlpsdE0;A`v39A|3G>irco*ky=$m)9~i~KXQ})Lecca?*sP;q!^8vI-*Ccvp;IxGA!~?M@Hw0TT}}uk7)68z;A#w+7yVg;}!&t`maHZlK%eoRo3uS?5K|Q} zOa|LKdgabct`UtsyGmy1JiEKzvnZN-D2T%v0n3bph^N%}kW;u1tBUhT{c+f54rul( zRB7*KXRhxfp1tdNKaoU9w3q4WH5daO2XJcp$qXXpv!{8q5X^!mH->KYMP> zo?YkAPW7uWQtdiRH@6dZW99n`h~aexV7mSs{a(o+hT`MqIKMWsalbDE?vS^sRy*`)S4t`{=QV#!7ZL?JB`mdvUdl`g zLY{-DT%@mRbQICPtYKLUvy+VV4PZh4t}kY!viU%P-%K=@4GhFUdUsk+J zqmIM-w~-t@=h43dRc48RA~!DY8)}RlOub%Jd`RR4y21tMq30jaiibYgm?A-Xvj7Y8 zzEBzLsUM}+qk4_!!V~W~ozkA>2Re#$A8CM{UfV1f;XG%Ccu12Ra2RM_A3vqCg@qSu{st-v&_Q&$L#99B*j5{jym65(dC=e@++)EU;{gbf-m zc|o4aoP>=-5gyYW!dhZjEJ-Ou#}YC-ukb+OEvov}12GmoAt;__d=ii8-l2bftQNGu z2Qej_`1m^Hvx{exkDEALv_^2gLHop$V8KBDH?Z;<&cUT}d`L|8x?yk}pW4i6+{;re zKSZlz+f)|$hw+dW@VEf%$565CYzUhKElrvCY^tRoo=Z`T_k~!CGD|2!q`WzZEUbm+ zZjfSg{sF(h9YRq?*(+`9ekm#U6jEmp_uWDRPP+n$x4OF$ zM@Awp8KetLFaC63NNhbbSgcMeHV6j1)?Kc_weZDdanKIq7sYBXUfbMgT#L2-GAA|e zgA0w=d8SkzMDl-dR%LA`q+RY0h&u^9>(F|?!4%r84)H+t)(PX2qHlE@G^KOl@8-P2 zMH`pxxhw?VZ(#73d=@S9uH&E$;Lq*fb*Hi7L*CCpS;u`%8~b?*3k~zP6@46p(xwg_ zp6PClfo_NaRRWfWrbc?U@f*#pqU-oF*tX_Z9POm)1+j0^Z|lD3a~|hm#yjxnXb8hT zI`YDudnH97RZj~gB-=U?R6b=h#Ph1WReaReqjQ;Gl?;51%T(u1;4PNxX#lJ z)B2H01GLKsBkXXQ;+|o zj{qVYQ(EQ9Ze2f?NBG+TWtAc2U(viqYcN@a6r_dUY5wFmvUBxE$Z`j6S}3i_d0B46 z(Cqt3?+?;`3ViniCdrzJtJS3Pc^(PR_i++1y@Z@{_Dh0alHg6PB%i0}rlmnD?Apr* ze}!?8uV3uALEr5x+9`CM5`MJr6hW^R5(s@n6M$dP!r&`ga`fsdYtOAF;WQcZt_`?k zU6vbo6}tfgTwqS)-5SzG@ox3A;pa#>`acI2s|Sl`gnr(S?5J>mtFD5F?`;P{oz++= z2K~ruB&AlPvMQ~ixe$eeGcRyDImUE7gFP)O1)i4yfDnvEy@0&07U$=Jg&ezdSU&d| zY?fGb^6@*@fFSNLK0TZkZ#ajPP``izQIucdU=cGsrEl~l z00W+Z^p3>P9zinu7i1PQaAp?E?0?|{@JrUpV=+G-odNgytXUtDp_EgVB;9YldCv!0 z{cwDD=?w%Ctt{#*?U5f&w>Nm#w=+btq^B(8{B9sPHn2M-mS~^MalTyjBk|rA`}ND2 zY;C3wHB{#2<^YqkBQkI*2X*jn&F8!(-<6dQ7iRt7&}*>OHCu>1b90W~cH;DcxC|@{ zbq+udE$aEXYX~PZb1gm#1SgNsT}}F#d;+l!fPtWz?9j826l<=3bQoRd?jkp;q9Q4 z(59szup<3t0Mk zS-~M=^X)#RtRL+cL|yMMGMS!Dmj1d8-6LmM|P)=Jf@T^N)-m3*e)zGx3`rdv&%Hoch0 zA>%nX=Ks9S`P&`-8T|h@(&?~%T=zM{IZ3If)IHJLB+M?^`7Gz-*MC5lnFR5Q8E29J z8w~GVtLcsp8q7&?x;&y+bK4zk1A$}=_}&JPd1364E87}OdO z9XlayDax$>gm9E%@Paqn&_VmSCdiyDj@nWC3~;8h}C z5uRULDL{-3O6~6pz;Z7XO z@$;r(=c9i3B^wbHjGYdF2W<=C3bs#dG%h*O=jVd{&ILfsUP>A1xHC_Z{Xetwj3xp$ zQ31NRx**$?Lm+5MZbVJOS4>y@z4zz<20u|u077h1g%?|Du*M`pZs*5-&#?@?bZ==$ z@$dxGGQ79wvrXRWiqBHs>ch?-va*JQy_##uIX);`8%RpPE6x(sYtvIuxvO zsEMFKzWxRDe9*bb*N8w4QoPwnIyb@fC(Wx4?c)xknO9eUDXe&?`JPZ(p0d%m*@GpI z!Du}k#{4U6G%coZr|uf*qCs1dakL5F7uJ7sIB1(exZ`USE%YsZy;a`2^N5t#yJl@{ zi-)(EK9Lu5b5K4iafKeSK=+C(2In+WWogAs`ORD?!ZHBWWDHRqrStHQDi4-h4_;KD zTOR^cggA&7Vf#_6W2Pv%{U$U+gR*fF6>Tkoyj)fq=3f|z+P?wS+u5o+?Q0@_F(RU> z_t)2$wreQ!>7aS-H&0tU_eA(+%nVj}zymP_%O0eBA*$bH;*p&k@Me~KS(6qZQH$jG zMsD(V46QpE5V3Qq05HiO$As#aP{8F6lYJG>6m*50e76cf^?gvQW(9fkU}o(@PdKDy zMusvHv$+YM!QvXpnv4}+NCHbi1nt&P+Os0S<_8D#^dj2LQv8_v##o*&&~CByUXqrI zoRYJgEp1Hu52$I-;`a=%TWyvZq^Jgn2#0QV!6<(~D1Y#liZF!)a|gImORF>q5Whmg~m4^+2ePdMr^5LMyL;*Y4VLRjJ=2ZpF+vO z0M%Z@MxPA5RCnPIC<2U`gEed8wLe%;JV#AwYGoT{&?!lDSGn5+#i#Vendz%K)O+rs zRA$Ux@z;fU?zi9mfK1(PLALc;|9~{7EvcgotZaKk)!WMge?a%?b)f1W{ly0^-iQ7= z(AaWs-#J+akg-yTC|(s_k2lIw=BUvprGU7jbJWUq!Qk}z6Rza)(wnP=^MDGj9>%nf zIa5P|6lkNrKQ{!1P*z5nG>eT7btJ0=i%aP5)Gthg+W%VCSaLpSU+#x7hZ3M6_Vx0~7Av%5P4rUohG%aCQf3feYN`D? zh~%hlK;8dkv3IR3#P0#I;3WEB)lnrb8>0u5>M)s&2|av~*g|xgB!}7HV&J|M!)fk~ z@`?{o4(pUmz%TB2dw8&%Cs}LlB3jabyo5KHPd;#3a55%cixI70u9b>rSz80bs3%_i z2VLM1oyb3d9z%w|#EXAWFrJw*0GCAQF*H0j=d$_i8z7_ylktk8Ih%X1WpyBAZxSim z{QGOQY_^H#QZJJO!nD9C{R-YB7wrUgk>=~`WmC4`>knNAt9`9)ZGdnVl^aOu69gn` z$A1IBleZ$6Y!w@`7*K7R_dMAIU1U}pl0)Cim}dM7V)**4GFvyIaltK=o5m}JvZ>y< z-#gRV%U$|rlKQ2!SZrC;reIEReSUg=7zO*h|B-P1X8+@Y0)tE%R$xO|5n z@r(2$WSG+OAFz$tUxg(DH>DRUul^8olMvIK>!QCl!##N`^Gb18hZH%86jQaZVJ)BI ztagQvn6*qNG_~y^KEP4#G5u+u2>DQKKK@ZeDD=1X=dHin%vxsBg90zY!;;xv8r`rj z8kbmA#9cY4J6HtfiKHEbk_{$Te;y|{;|~KVnz-A-;Zj$}t7N)A2n!&N=%pk1SWXLm2uKDP zlATX(45O0D1B@&U!YHS|`o6o>g|`0%-JQ5hCrmA0 zo$Z~eLsmReawmRLG$YcmkEXMewZ0O4e@^?*u(x+YDE}9G?-|fkzvhdAC|H1qiYP5A zC@LT}L=Y0umm(mDR6&S{NC#1n77`GpL!>JTp-Lyxi$IVT5Rnejd#|B}ki`2BZ}Z)A zX3o9)&fYV7&iSCO#AIb6b;54toahlR#+-Pfg5j7NcK3cvVDMn=N zF3P__z8~c56dBf*Mj8T$Hv$RNr|2<@ zeoZ`x@Z-Ud$AImvnOWh_)S2|aC5$qj1cVL-;eQEqd{!<5VrNXLd5GCNX3%9ewT*Y1 z?R5Ll-#pjM-1sxu^M9AG@3M1=q<8}lAPPQhko41o>-@g|-VCz8eg1D)quGrsrQa5Z znkv(`ig7rkaPnN?%asuU_2SbFdA|THj(#h;1DhdZ;jqO1p+zgahY9xNFgk(JH4BSP}y5@F0k%Cq> zdBeEdM>_2x?7F&$>qiu%&!?5x-p$M}2}nJ*bwIfM|5wjSKY!R+MhYn*i=fo`4!Hw1 z)?9drhIbRouv)t&94K+;m5r#)h|^==TCLI7oF+zn`xV6}s)oEP_sjBgv|atdkPMtA zdl^>K#Yiij^mtIt$FCV;&jx@icf{Z4ZTk;W!r?#K|Akv@Mm6 z%^yeH%vV43C>!RkGcH8H1i5(E+}53r(w8ap%6LzWIasAyg^^O9L3;pMX{Z5a{t~gi z)I9apknO%c=p4_U3wCGu&rRN(k*YbmRra>r=DNoHuFs2JH_$>gv}67kme%iF-5Ln_ zD1$G$wzk@&&BZ1yC#MQsBAgRGxc5;1EfDh%S5_PfX<@VjymdJ{-PoMU4@F{B)ui7LfHhP zPQ9ExmsZ72;Vfk7%0|ofrmmi++$F`vK!emEQwNya`ySp?IMD}HBb&LR9aASchYa^D z$)}G--!8~iFz2x;F0rwTQ9Qc!4NaaLoXjR|O1kN@WnhP?D3<02PdL=c--Hx#1TsZw z;KaXix7%1{_X%*30{Z4qMS)|QdTv9D`9pHH3++H@^oO9 z`B*GIC7O{HUacm6Y-_R3v~G`he9W|?A9L3=l#<>ktwzLFMZvbLdHJ-*?82@YX#i(d z%?}2~ovwbRFn%v){?@#*$u5dd^+VxNlesBEzXAN~`dOeH*^g9N^7eLAkQpi|LTnma z`|`kEdgh~EdEg=_a`YVdGse66+vIGIR}>U}Io|Y;w&#f>nyDwC+Y;8qZvDD?L#(T%eaSW_3 zdG(=Xd#2UAh>^bKY2!j)7eNb7g=U6bN)2{eYc-l}g==wY7b=_E*wiMe7RdAj3K#J< zty^5kUo~Kj9a#@w3>#fBGQ=h!IhXPypz0xRvV=R0<8hRO;20sKe@_)|NeDMerM|<< zfYefJtFD6@^ZIS%V($+m%j+!_w6ASLg+>%5PxQ$5KD1O1IHXdK)Shk8mabR2>lj)@ z7oDk}__qRPddcT-avOqqpde`X#Y<9Cs!+@5HJa(ZS2EzHE4fonjjH%bzzMsTgWOOm zOe9Z(aL)JBH!RxxP#<2WU%gg&n;(*fI^!Q{ig$%@IdK)0!TIz|woeUCWtbCYk|5 zpC!I|@)eXnryUWWje^#0Iq$6+;W@}L^|NKyfqYG|gNn+Bg(=gUo?8icHxI(9ByjTXKc zz5z}~yr%0p%!ueggnwhJm_YH7dabK=v}NA&6gjJ|YAKwTwA%2!>WzrjUMsk0wZfU0 z@4+t6ROU}`2=W1uJ+gpJlTEeHF7lcQQ+LHAZRZ|Xex}H#DE)p5&i-U@>40S}&Lvt`6LXobWvD$_>gjk? zE8D(v>djPVT$W`m>=;EHqj|tmZ>N!% zyq>O(hgd2CG(^qiWhl#|k*e>@b6>V(VaI6-Ek+lJsnGP1d^H@96~3MAd7NyVaZfH~ zI>$HmjfpNQHYhe|Xf?fqi1VbFq>?V(v0@JC=;b=xSZnuL0PaBe!JymK1vn9-tZAZ! zi&ebtNnZY;4KVm^LAbfXc-S{ECPY7>iSXd0cX(&E>m_*r46Z@W(~r;#06M(C;fqxM zKtiZmQsp)!IjZp}`BimFldQUAEyvT)9J$H^@53HubvtTztWpouud%5b?c~^yxixpl zL2w<|b&1KZj<=|qJB*GmvX&jOdO2CZrxLSt)Oq7;6185R}8 zAH=*j)6KL6w9=KNYr}36xbYdOg<1VeUb8KD|AB!RWl^M`GULGIGo*VICb0;l}Cj^ik3aSAPMAfJY#>Q*4LFZYxQ* z{RkS2r?=yMlkd_Ww$K69Us2WL^cUYf%Gz9GcTiSJB)F(bm;q$WvSd0X`@^_KftA9` z`;%TMfN4NU)~q(Xd7?}i5t&B6|p8ZoQ5W{c;& z-K#i6?9d-v0RRA^Dn_W4a@&0U9ZR)8=HlV4DK~=Mlmhd|QvD&fYt`9r+YWEc!0x#Z zXLl3q7TZzbEA7Fv%y);;ZFZ9h(((r9?rpV$rt4GlM$+CRq?Mcf zsZ_Vg@AlzUP7;K}hNr#jBKKQx1b_>KJ58`jeMSn?`6-R;CmyH0*S^1HMax_@tnXAt zEx?rZ-hx9#j){KeBr;ikqX{^fS-MD;6{rm8zL*Wlz*ghiT>~6(a)}wjErEJA?+YHc zTYL%YP+Y)>g9&OvSVpHA8K`>XW6TJ%vUwpiLNeFKWPNPjxXtnrwFQwLP6{3!ga9kD zC2s6ouYPF*yA&(-EqX|3^#mT&d!|^Q!KG{zUyqur*xR&e-JG)e&9|@FLa>Mn=Wo&0 zhkNkinM#@;rW@>0-b#rUvZ~iE^#kEzUHi}K#eekj*`yzTWPN6KCe2tM4=a~<7aG|6wWKJYKgwGs3tGrCufme1?~2o3PVIpd?% z{#Q>jShf5od&mCK-n4)8`gOQc(RD8HjZs>7&!5X-j8kFia)0S=|1&aP|Et%5{?YNT z+G6O`!Sms@SC%3I!EhWdZ`0oNne2&~U*O_p>2?7@7_noH`$9jTwVZ15;TLa%L7f!T ziH}oP6ioPyF;_2s=~Lj|%H4nI`=eD(((cD}qnV**)hF2nLi#1w?6J-=l(+k50z^47y^pfN7vSVq+JCt6uFiX> z$dN+N(#wW>Y|4A>OP#M2z4n}HeC@xmrzzj*3Z6JxP4+O$Q+(JuBEz%e9(wTukHQc> zP(FCXkTrn!8rf^RIE z@HbsfEe%60wvMd7HIa!Q-=GyptJ(&kq{7CzJE0rLn`kc^H8HgqAHgw%JnS_nf~vb( zmnavOc$xMj+&~YW1NWPzjblHLCy4(mqG_#aVj)Nlp@9}_maXhpG~JAJ%u5EH*@PaX+c826m}v z|Dei3(D$HDhF_fLcCbdkUXFE{4`V&K7g7#P8ldMIo_(v7P*K<_<04A(hp&oxm2wU* z?)`;Jseu3a`K|-V)DRZi%I!ly&{2 zy}-t!EkYY@v)2$SXRP;1Dm^06##X6JNiFJq2GVXs%$p{-RBt}vJ%6~quEiZI*2C;uiPIU46jJ9WB4t4;ZJMOJ7EzUCxOD`96J8XUB92t7yh z#^3BIMSLDO-;O+*dN4nxqu08oc=YP%G5#Pt)fn-xD*&lPLW(Pnb*<{++wMGe3PWeA zQ?8?t=+UY{+_%R^_Q)gjs?1N~*Qoc&tjtmuF&)|5HS<)owv3}n`&_>N5eHEzb&-Qv z<3?mazcmNd;=1bi8HqK7mpID17p>-ntkP5Da2bh|H9mqJNvG>d*Lg#If`jF;h@)Xr zQt!Pq>Q0n_^>4QUo)Ij6lD+$N1%G#vrn zjcYACdP`37)>P2$n);L)i@QX)Z{FhG>C6d`al|p%_kd>SAYk;F6ffeCZj} zNwoq4c?arT^bO2{(8mQ=Y-PFXv(z^oV_sohYt-kkG>+uR*2$RsqfieDi|MeX-Lt2M z-IQck>f4XOD`N(dQ^|IQo_7oe^mHq2#2za^^rGg0 z_lKH+9}F99k5Ed&J`pR`D1mp6Li#Pvq20Mcp>ju=mT~7kINrK;*FSU%k1JZ89?RD& zyLXx<)Uh#eVW}WG*fF)@h=Zo}9DL|Z5iPvlfO3B+sr;86BeTr}{uaPXgu|1Z`Y~0u zTmwVzF4!cKJ~$vKjwkU-v!WV#no=KVIM+A_we=w*gS6~+}Ni6poaN<8Va$eukTKnov4fLf{ zX_q4QQUs1{jzl3glHgnC&TNUax;Bb^T9uu@SU1m11G64to(>+NsG#QaqX4fi2;aI3 z@cdVwM=+bikJ0n*fF173nghvtDE|k;8x$(10bp)zm?|tYbkhs7wFwMzTng>EVw;rm z4Vvs)DW2vAFGjx|Pu(66VUm>Kc32h!eML2UkIIbz1gbWY=eL;<27Opo3Xl~%uMeem zKcZz=O4+^~I2=jUzh6b0a|dAzN@nB&Q@%7Is&g#4R$p{FpXz)G);oGGgtTRdl}4;J zhJ)Yu%d`mX6bZJ-nAOvdF*i0MyDQAzf7SH))0L+0Yi9&jR|Fswzbr<5?DfX7cx5PA1C2XX`QF$Ma!F!OF4MyDhm@6E7tX+_anLNrzT51?!0usf(US zjSqYq-1Ze>*An<2@NcS&M5+-Z{qnrJZBuT)8f^&M^pp9xNf`FC`}Q6x>T(-x5bYbG z`T=(%9^DnZw#GT+qEmYHta78zTlsS^!i6XTKBNp1;F?>}1l`|L1a>=_XYF}${; z;?&bljc)njN?EVhx;EmTo01^m}ynaiLFA zjs&sq)^c^xg2aA(>g(GKNa&V6QrVV+lmp)}oomyU$Mh~R*;nicvU_bgY>q$Rz9ycj z^e7b*U+8PA!Q~Q#5tg|-C<)!@-Yoa9oG@VC=jK{%3HQmOd@>Ervdt0{vb(?VEGDTh zI^pQaH#$j9y`xtW(gnZj^Fxz^jP;dS&FyPQkaZ`N?yQ>c^dKn55Lq)%uuPZad#oH@D7>;2U>J5LQP6zBD$#NL8J5AY= zrN>r5|80e7)0RLyiV2q3l~P_D)HUS2wn0;PLreB(8^Nw^piIBU93{z)4DNp!GjjZc zULnk+?zj)A@ZqIszF2Zsxf6N_@45OQXHdQ;sbVaTqJT9c99g&Sn9b8k_x|#6H9vpw z?fA$g6*&VjlQEsb{DRz9*7GM6PpFjyr)WX1Gw)JXO~l<3wsO8bXb0p$u{VE>Gibf> z;_DL@!Kk=&3-WVsqjO1V-wM`=tht3#3^TXpSWcD80oiJewQ`O8Z3&frlO%iL_qWck zy$yc!H^PV2}4M%ClDL8|0;gV3LwLln2IJ|N@D)LEX=ninW>Gv_v3sZiO{iF}qlxmYMws93toS8bmU;g)9gwVUxY{H2~4 zA&7^6vbRp84i%*k?IbZQ?>!vHH%{XAiY;BV+scE_9bbT#g1K5QNunp+M7a-5D}+Sl zz-;ne!cCgg`{}~w%mDe{l+AP?{x|K^X&?(Ur~d^)AV7oomxuJW@DK9$4gGfcYXlLf z667w>n;VbUO*68E*Utph7sk4RHiquC^THeXV+t@?qJvd+KgK6;Y9%bQ|S0j!x_EbDKk>;2}@3)*r zFjH=|BJUjdHhEajcl3+#avgg59Y1~{232P1T8%o?LQ%I|cT?pp!SsJERrLN&cIEZk zMAyXZ>_ncFct6x+`f02wyXV&o%M-^L+%%)_G@`e$?|e< zEERom@>!9klwje0m6}xz5tb9Tz@B;jp5E;NHU#v|7wCs1sId}bAz0WMC)OK}h_FEZ z{gf`d(^KflSbppPg=#4@?2UnE+i=Nt0MX?2XuSQL)2pUsc^Nkd$nX4gwj}JyQQZfg zb;s6lVL%}*NV53BFb_t@No zX{iXn<7K6gOPq)EUnsxx=BPOk>1gbp#(e7(NI)43u#q-MqfI{j-WyUtB$JDw1Gdd} zvu|(|=S&OOk;0{~^IJvwkaH+=f%{hS>l%aQEXmTDfvY_ZUk3Dd9aQ*gRdJa!d{eNuao3H^jKpJ-wY^8E{6*5Cu2(olUoboq_uR<>MAZ*;VIDNi#S&C zGbKf-&(sPjkQHf_LSJcp*ZQLNEcoFb$W+2I=adw@lFe|^DypXPUfhiD7J2PA>`vfZBN)5{EIqQm~S{og0TG0&BbA0@!u^b}}zir7P)W zqgf|1XTt=1C9L0bJ^@%S28t1H z@!rMVvk0@E3gIIQ$E9uVIPZM^z{Pdzak<{Pdk!d_Oi47td&Es7?AcykkZnc7ADm57 z!kM^Qr}W6VJswdsy)#u+1n+9siXykS=_oW@ZUYHr+gIcWzneUUH;J~2E33WMkHt6V z5e=_Fx+HZESppj9LN&2;v6vyt4S-&JnRxJY(__LHG?VB3HzoW%0jV35ZyBPtCVJxR z>Fc6I-vgF0JBm?9&LvqvQhEZDMAnT)# z-S}7h1OL(Wuh_ZhOnT419mV9&O6j}BrRmZx#pc70VeWg$p%sW-qEpg) zw;H?F3}SrOxA2QPu_Vawx8R|7>L(bVr79LhjC;doJV`ohGFyyO;p<*J<1ku!g6~i(O+(Me6 zBHjBd@hEM+Fc+853d$=}(j>tJ@xuNGgVc~O?lDVUAxi{&!1fourXfjOP?k-pGLfbF zIM!{RT3>l$DW%-qUK2qpmYgWHLOo2O&BWxQz1P04KRNm=bL&f#EDGNBw8>9ee+0rm zlVWr8s%Sw^t4R3$r5zp(7l2B~$ir|PFcHISw0B+O zb96`Xnh&rSa%s$$bO<5_ZU-Hd(UxB2PX_Kmoh0oeBKa$>NYxy)NBO0T_sgr;67SIS zK&K$sKeIr$-8|bk-u6`O-`tCTl=r{ogSPw{9QxaffhdRR*G4I9d#BAPZELp}PJ(?{ zOXq}GmAuT1ihPHPS3B|2XxsowB%X*~aQfrst0gI@MndJ!^Z>0nW z5%L?0gYC!xG0|{IH)ECr19$@k6JI%TAr)%m-{JU!0lp$5Cf~U_xL@~FLeke$uSha) z^-Dg^DI~9EDv^?J5SfdI0u2WCw@6)S@OQq*Sut0Y&4*E6#buOU7>O3Ead9eZ>t!!G zP&tUYB7@1EbAf(k?_)ORy)hOMY5=brCYUT&l{?L{bWy&AfLm$VvoG6wb-TR*#e#Z+ z$P@WCm65C9Xa@Gzu@E)c>+HBwq`*-GR@FIa|cC7EGG8zBc913yVq_vAS3YD19KS6hf2R8!wrjk&{+@0>h=?#X-q&>5d#}$$wI2-n?OmUo z5W~C|tV)TOHgWCx@+fuM7CRVYUyQp0jyJE%7}j1ltjbGaj+HNcbs(NzVwD%7GIxo$F7M@-viK70;)*pY@ny;S>dvq+bb}VRmeE zfL(D*WhdPrb>U|#j`?TVt!V#@hZ=jLoQd{-Pzsfu^O0$E@3zKRJa1E zwzQ$sEYVlNHvW3@tLzHUo>{xP$t$#$$0p(~aYd?E3+J0V(h6mG&%rX;4wlRz+lCQ7 zE;IG3b%9;thUP@?}D*h&HJw-+k|-r2Vo4e?=4C+BHZK*l40* z?7Q8uLqMJe0qhU9y{oxnStp?M8kpGj!Ar#79PMoLgJEv+gPSIClYea$;(=6>z7?Ms zdc|fwn|HK*)EhV!Azk8QCc3`zYI}5?=BX${h4d(+QNe7@0@vthu=T5^?pHwets>Z{ z#6*@a{MXv>xzbv02UrDhlR~YsMWa*Ie$Az-3~ zoCPI^^x3#zQLQ};ouTjwv*itJqf_$boz$SUl~m-#;+98NT;YF&tLtsOff&Zd^$=}(IEU&0=fn?h{3U}m_PMq zIy}Stk-vpD{w3LO5(&=CV@+FT_|Dj7_!u{KQDP;P@^7DuWsCY3t%LqywcDSgjsDIt zM}M!)>%ZWAkRQ$eYQ>~Z2-1j@n?Mm-YOrwBw-Fc4Z27Kd`t}i1gw<04l^2s!EP^_g zqtL@s1I$a~i+ z!L79UbY_TqMCi=*Ddyt zauIZ3Im|ft*^aHC0q~6hSNs03korh>|GVldU{n&nB-EG@C$k7j^(MyoLY2nd2 zn6y);i1X|2+q6|x_$n7?vmUCsr`s{)Zwb|Jf;xXCBv2tOR-|rfvdgU%j=(CjV zi`CDTRyzyty4#%FU3HDUszt$wYMGmvL&1Nit!L$;JtBbsItgUcEpHi}6UqGim(??Foqf#vmSOC22+m zqP{vrX)|%fk)Q_WKst`-w{8L3l38je-%+)Pu-H}c^-Mc{@g*&czg2OyL<`ecBZlHo zrb&xdh+E2K{nP`uzIh8rpzWC-x&(P#7HItd+}cUZkuQ-3$xfftwhZEyCowym_B05f zwPY}Bby->Ef_b01kPuwt9c!GZzGZ@2m*EV$G((bQO2HVR=cG}ihxWf*gU`Jts3OuF zx*|`D8CsX274KM{tLhaq)wqSLZaQD#$~AvxmW9M8ekd<|F6L@jSpHoyx$Smd1evVm zwOi-eCwnH!D&G`1AL*wh!m!7@AY>4s^zrlF1xFiqbc|;iGIJE?;}~-~Jj1F`f7-`M zHSnC&r7agR*#!%1kn}Uxk=dqGqAlhv5(`EPgby18Z5eMH&th2M($;WJ@9woG4!O5M z-1|&;v$@ITOU%1*eq`B0f}cgAnG+n zhj<`QJDH6SFaO%5&;enS{M~`ff8=<9@3BI;psN*Hatx=LVj1iKsj)4R>I5|B_bRYeftAyW%e0{Zg}t8RRt}Z3UYIUpuA(sdl3q+YBZ{E#sI&dhi{eY z%gs~t(3=^_UjS*52Q6I$y2gCUPac-4r|#ggyD^`i7T zjk_x$>Y?_4##rB(xkT!mOX}ow@IZM6E{b-_3B4txPiywDvrRD}O$kDgD7Y&T7QW(~ zfAofWO0zdOK#YYsBsH?gvLzWvApgClRb`D;Z`xzCCZO#rnLHQ9?6P zyx@CREjFgK11;=Q`Q8q9B~ELGOWJSykpf=QxB5~e>TEYPp>kq~-P%;4Dc1!4Ztp7M zd-A>Wq<7UQsAx_9d!6g9z4aeQfmTTeKFVX8Ptw=$KU(t%KMMT@4}S{HDM+P86R}F} z(dj9&4pxI3(@9K3vl^tT^FE!s3z+QsWEXasw`79cA)Tg}seiOC|M&H4{p^R(|4Xbm zL*1W7bh~nnY0x_}09Q|X&R<-^?2gp{17`ifP~cCsU}K`mrqWi>QrQ31lMLPQe>)E- zKFY~D-qqjt@acqIZ~I|IlK-fw*JL$^q3V0*=V!Yl1qw)?1EA@9 z8P>lDkIHDQw=Jpc%)-Q3NW7VPeBnA|to1#LvmQSmorSny!1Y=76Kh{7VOdez9Y%^T z-vQ6(s&&l*0L_j#mSnPcAC|mF&z5poXP!@pjA2y3Y%yD~QIZZZZcsUa*XswvJ}@}) ztxe1>pbC5PX>nFmh$4pTdGdqd>uW4YqzuHZ=P{C*Isb!DQM#TR3T+0p;F;zz!~fH<%BS4`}>g*j0$7Jjaj>!J)oL-E#)m@p_wR*RDfj zFY8QhP{hzZjgVBjv_-+M(wyq&fWt< zzTESqA-D%iWTWYRx_x*JcA*t@z-d;Sk@k80YA-44jlw2Q3|ySL;-?35=Ra3j^ppst z>DTHnKNbd2XI*V!;l0YIsuV_U10b>@o1h_C-7sv9HZ=9WN7=h%QUo;*S(w@wJf^;R zB9&?x?>P*w+Co~lYBiXWQFuwb`{J0{IFX*vl61TuOpylEut`ukpUU!nRC4Hdh-_lU)HumSV?Av2v4K?;hc^GW zr_FRzL>ZI&PkjRb!G5=`{sE`|q?Y-MJjQ(+eBYLOY(u4g1Fth|TQ2B4#e-i%1%c7x z2)((e06Ku}r8_LUMsYiPNI3+g(Zlu!8e%d2YD`Ba zJxJ`uhYT**Gm z^nK!uFURTY!mDVEgHzlK*b$kFq8h4hE%jArw(6?V0Im*Zl-TKW67esP2*;s$eOE6_ zNgw4mu0!bkED2(>ZPV+!lKS(h89D_|_dIxeGXZyl&B25;?(2m=Fi80(UA1#*rES<1 z9W2;dV|%dan$KvU@}&zE3{P*tUX-hlUX7-fYPeXm> z6X+@zWqWakF@qrRk;cQe_fW@G{=-5CrB~^I%YueOwU*X!s4_I?ShShWj%x1oT{_)_C~K|(=Ufb-M$xi z%>omNv>d9ZTza4NHUVC1^Rkh_F?a zz1W6BXq4sTw_BU!lm#fWHsptGKu4!sbsAzsm&yiTi62Y5mzj7ib7IPAPI0OMgcRb! ztM9}x<2p|xAr4d&ALc_`?z#(+M~+)uN|I1@jRRVh%&+o4MM)K_ahn+9ZV<^vWX$IrSoo@%rvr9=kiC0Qvi~&V9KvAwv(W4Yo_muLlbHJL z!(mt4bj*bHZPoMDRi;#d4?v7o{${fUY+~ZYq&cX0PRtTdMn%5MC+1^Gs~`cjy;D#SHoDZo}Dqrh~k9EBb!FA&YQ%jJQB35RZAUWtOr9b z3|!qzKJ+M~Q=0fP^Y$9vGI`+RCj|mk7#A!@u|ClJ{4fb`aFO1-I?*N8Uz%SM%v-77 zdOGrek{jfUjKtn$VSlV2I-mWcN%S}0VIFgo=F#(6GAzs^Rt`dY@pF?bGw2=2^Gn`U z4YKSWk72D1zBc1kO+r}+UdX!UuJuNy?3Jsedql#>TxtqZrAx3an3@q1yT06+Ni6%RvU#v&0|o6ar>z4_@!gJj6Jr zH&}sv6?G9;8;3Te%lG&vjh;+KYr#1w`LBmI6q+P7iR7dJaQ< zTpn~F&!i|gbB{J+78*Fj^--GXWX2(y%j(rSO?wmhSmKSCe|MupYjiXZv z4<-51$Dl`$97IHwn+EM5-czD*w{_0q;{d%!hKZ-1p2Boq+4+N^>^mo>#X@+Z=>;_J zTqyD4Pv132w z5d54;@ZWKnsQ%mzvh#CW9dsG!s%`XqKYbuieI8n6c zo^CJXZEyO)7Ig)Y#_sF&U_<~I!kEY9Q+~SdR+9PnY7@HvG9vBC%FxorP6wGvNVr>F z|8?kfeCd-(b=8?uX|pBbGLY`(rAcZ5j~f#!H>psyrPP=xpcgf*(i7g- zSTeFOT+a}*C}3JBTThTR^d5qEN|ZEXw29YUuO0MKgw5q(&(#mgsAG-tz2fw zojEnh4hPw{VQ|-(Zt~uBIW4)}?q^dfnA{oW)e>>I1n*>d>AX?nF-x6Pz55*}?LcWN z{ETs1$^B>b^4HRYj*5c{X1lBbQV)8S^P3RlH|@9CcA(~;i05BaBlN7FZQc0q*eL^@ zC%*ohF#PAz7Hoz2nE*O&&a_=1(cQ$1LkLbJ`+3@#R^!jJj@%fE9}Df(COK*)lZDXp z%_DBW?0IHb5w!TJvwPv7@EV^C&`qv0#VfwzyD7}NUtX3aO`K5`Q!ui%b^ z3i++w@7p0F>SNx!HQ)nDcU%FQ6X9c13hmrvOcnp} zeB!xq{sh})e1C-Wa*z}z`H7qGd*b9Pu4+BgXsd+z_1HXxPeA~AE~9-YRrqXpDzcJX z7jmB(@=%~i&1&iUng;oiBQ7HOj0p*ADmL6erbuCLtBatZF>qcqju5vNcvA4GV0K3K zd3&H#f4W=x1uSod+&2EpZxFIstT7!ncV9;POR4qbIXUPOnoLf0!sl*zkgp7W?Sy`w z*EeZg@E5Q<;X4YQKgWqbuP|;{lRiD2HtncqAARn`qxw0;AsS26rg3G7=M+jPrzG<9 zi8l(TWx~r9nwk$HE(C#mo7%Rl0#};a5-gNui^8V_aVVVJ#3EYnw(7KFFP_|XydwmJ zone<)uP(3g(lm6sw3bgvHyyP$bey>g4ai^a23?X`2Ua!SYC zTu5=7pVCLom;@?i&VevK=$80%tyyEIF^Mtc38Odw zS%<;!`1JKqVnDeRv8oL3ZOuzy-K}^xc1#vhNLdR_m66;kD7DSBK;&U-QYc~xDnydb zFOR7+Zf)G}Z`n=hX)Mdd_HN@r z?o!yb(z^9gtBL^P27X|62AbPNI7ABnAmLs5O9Pj0UORQDw#DH9aUx}IhJI=3|ejS z<%fJE8oqnEbz?q;cdSavPSAA9nWf~lYnQ|I7ykvaga2(+`~P*D{u2iN#s4kE@Za|_ z)4#^h4;|xh$u* zhusvsgP6P&ArDkDIOLZtiV8wm!vD*F8eCz-&E1GxO`T2l%x&hna8n{~b$ z%{x7jd~?SDADkx+R=S33Rhevd|`%N86_A@zy-?AQYy+hL+rB zy-(Ljer`@BDl7x7VRm=aaM-Ezo5PWSCEJMy-^`FWLjP`w+hWq22g>gdC$Q9aBk80t zqmyV!R0taZDDMPyI@X5pM?{_@%*j5`S!1&|KNyM}A=KS8tzlEm;*Wj(SPD~5{mj!5 zNg%us$|2d_p>;G{iSKyYb|a~2>wyuTIAm#4N*ESj>_MLI%y7<&ZicMuv%~LNwxxGe zYORMIhfS?q(4HVtj zQT*|!%}+z~!R;tvC#62nT~v2=YAl?1t%#Pxg*FW?b>XJmcuwybU<%hamnG6jwehcLiVqqH42&=t#4I<0G)9izT#wPHOFMHy%t zic7uD7sa&BSU5HX)A|TsgxU@<&S4o*&}5HNh^=V5tHbU4q8%*)6L+dM?4TQmmf@v( zWd(kxPycl%ew0=NCb!RIz9ygiZj~$ApQH&|=j1nb7irZ?KIS{kgz_V1LPiIxY&mAu zqxi#JFBeAU?2gy4J@JCj9$vveQTG2bt~xHwJVL7TokQY``kr~)6J{pzQ+1;|(H`?U zQ?Zah8sjY2;BMOa8T_97)LS1r9`)MHS6pH&*~4NqQTzNg+}~^-5f{F>wxv!&O{%_= zC50!1a!y1`>DQlFyPIKBh$ii0KDxoV4oDh`vh65z&ou0GKBI<4e$dl|=zxZmRN}d99MxmGC(T0`#b7pP-0gMg^Pp|Zx|kN z)06(*4Lc^!t9)?<>!8+AG$-fgjL->taHU$)5Iadz_%cDFF0BsYHqn5d$qQdI{o?x* z1VN|e{sbfbR<8xj3EPa#PdDGci?jPZ^PJw!@A&tUs_9J7Z!fH-cl&fy1kgr*+{Axd zqTka&{4L`)2J;6vbL>y{@FCdbGXcNTgd@bSMndFUH*7f^^ZN;m06 z1A`Nx1_RmrC^y36uo+*iFw-%-4q7Z*P!Rg-0R9) zr^R{B%$<{;%Hjz2&Sgaz=OZK7HeN~J46dog!ke$OQQn}u$`QOVTwJW0&s}NBtwT}O z!zNx4v&M54N|`2(%yFi>*0TFKD@Aesxs?QrN1$LSals}d*z{13;V zy+(_f-dVnUi0*!%NlVm8(GeJJu+4W;zKhw)_JuF@ve3e70=;4UiO9Fzo>);Q$WZ&Lmi-h>En3fQU396afV#ND-70ktW4N=|!dY9*~YCln~OqFS^#+uC@0$`=0y% zcb{>_-DAj*khjk7E%STkGv|z2Q5gTMHes}r);xw!y-NsW$(M3$nzI2C-M|?P02|zP zX2LT0CaFv->VO;0b+1gfYY&0)oZX;R^^89IpPUUwSTbIh-9)I+OQgYK-BypySDTM1 zm`FY1>sLs*S?pAiCrLb&;g?3`Ky<41CJxPX)jI5n_Q`%7!`YkxK_NhJs-%w+nr$oA zMk0G$_O5OJr0>4xwXU>vrm#z?u+HpCno(!r;2pC-^S1rGGi_hl>lALv9hoHQrYvgi zWT3i{M{>LpnMJ0z$L2j!WuQ(%DD`b|=NwV$=-{>{jioCE7+w{bZ) zdOgu3H@R7i7m z6CwR4IbKA(jQ9@G#(mX$x|#tDJpbeIivQQJivRx5#wIZ*i4hrb_xEi0^ywDJ9BMeV z37MaR>ayW1A?p(!YHQiL=R9>CEAlP!2pgWf6_h4(=j3K|Sqepc!e94wYQ-cGQ=Kb1 zdNb$-d2e2>RzZkMZi|1r$%i;AY}tV=b{{{8Z}|Y)u8sguRNObjCr2cm@w5#fn^Lzn9)I%k_4hk9enuR71)b>3dzqpn`RE3gN9?H}5E zx!H-H7-wBkFQwCD^KarKrtz(On8Tyg2%TGIijiSk<`Ge4rVmTW^0n8kP1=WM?!2$* zth!UkS7xB|Zcbg~gTMm1D4`ldHL7sLL)$Y3GrjH>-geQ}=||`4vMyHJ>TtSU?x#wR zkY9ND(tYNYeP8>JNj?sI4W$YV>Gmw+43sLF*WQNrIyS>$VYK%`(u8B(s_#{rPqgPQ zzt>e3c>kKS)gWbMMdp=G^C6$-hKN+Dn743kmy&zHe?eOLckJM&vhj<3;SbxyuhCzB z0uq0*{QRyv{lSa>lQHAZ4gb|H^c#5bi)IF1uuXpZ_?to$upU3?$&Z18a5+Gy0Uy^P zl(3&nsdbfR*B>9F{`~FFy6aljZ`jBWQ^~s0^#^%>J#NuY=a4-Q*HMyLq>snwodx{F3d;FzgeAwg}(3w6s;CA1b>t z=#y5?j_9>`RObA}c65IM<+Gsm-V2}Nl|Z=2yxZ3?4Fxod?CwSaw};ytj=z7iv%Yr{ zU|Ad}f09viypz~^>t4*wM&K7Rv8a9ZCETRG0c0|fR9eA-6p;i@ zLIN>D`n_u^EQyWjG-tHN2x5V>tIIkA^C*f^)5^LOX~!lpTd56xOAck%k^?5Lu2kX{ z9cinSgDt44k?rSfqVM6eQD@9@E=&oSF)82VM{pr!7z!p_>F~3RV#RJ2MAwbsRfH-+ zijsOs36567qaDIo-6|XTi7Soj~;oNnP zT*EHG7seOiE9p#J#v+Sn4g?r2i+js5mP8x%P~k$cBtn&}R?qR_)s{h~{Z5@df4%xI zC3azKiQD%s9PempaPZ?hrgH!Mz6mhBx3vsfbHO-OIir*WMRgBWsz-Lr%)CUJB9X>%!YpXI2j& zHK^9!&ds97GGFoT-j(f_+>LkP6VS zf0lgd{?|Z{U1H1pY%5=ZM)FUW6aF|e3EHEpOItldCnImLvzAvH{P$M237|5N|}cR0!ajZ=eLt!DUMH^}2lS9Ed!p@>J_ z0PD4TZEFMBbFhd;U^QLtLo>WA0fF)m>!|4Z;Hw8fz83o){hnqy9u-0#aqVMAcXXZl7HV8L<0ri@L46 z056J-+wonH{d7kvbetW2ZFcZLI0aj2k3)FkZrJ6e}<%S~%F~cGS29hD$otM&^^IIriBMRKla_PV`*J13yzg ztr2MN>IWaUs&F{eE8$pSMThHr;ILK0&ZGd30{dP8ecFBMz4K zBt!Vw*81t1hlcN??Jcn!3e~j>Sq`x`)0hdqfh&Vhj)BM(nkOP*xZwT}wVe`>P}9u4 z;SeiRSL3|e?iMbi_79R`+fM#*^%!Ge=FbXJ@n7s*ZSbCYuchiK7aAwIvT}ph>jWXS@rY z=RbbmfZqHJM81+Ap8d9ngcrQ{vKmj=A6z*>#W*0MUBq?$P5_<5rk4l|IUtEV&`y4# zk%JrmH|^HvhG~Uu*0EYvRw3EG(vUm}-LjZ3LVc z@PO9kUsnlV7F`#A$X^x(5T+-K5BA3xKt&>urZOqv#~AmlufkuXIrT~&_`HGU5Wh7C zGC92l0zyiIDQ80&oPW0>qX*WT%Yl>jz*%x&%Fr$77RU7&ulM$k#;^UOIT+)| z_|tx?0L+K~pu)qL(gmuCw#<6}zg1YDa8)IGd1wCmgx5#-W83_;4PK<@`LcMwL$(01 zJO0ros6zO!RRK~`B4NR_&_Aa2Cn-6|C;wf8>%}vfo1Onb@qO}GP`tbnyDaj@kl~~I z|0~7KsKyn5n|LJYccJ~dE4TiG_CWD|71A-wFh23{&!n{ zCC|a)ZQh1%xdyyFeyn&POnDMveQXO1G{an?54)sPp%Y>D7480B{|dU&ViZl|x`ZA% z4!(qu;;0B#pdezE1vXp454l5a&#fT_+|FKrGNe&Lz#*(AfeE5k{B}0rz-!;K5EwXt z4AiFczK-ZmcoO9hJdRnrB7FrU_0iZ+~ zNYPh`4t%_T!esEvI#7Ehzm3I<<3hLCGy#Wg@StkWqCtJk-#YvGh?qZST5fm|L5lOT zW!2vIR*#QbCf{u2iKOUl8JOWQTXU_9I^hfm388}NhtOB@8r^TFZ~}bWHUy3 z$WWpaM@xmVw-dBs{4;)!mOOGAld=b&-{#hSzHR=HFwLqu#$+TEmOKZylNJf^f{qNLbryZ{^Qbf{Kk=~<(gzB;jS~lQ{P7}m8;363Nw^5S} zB}$@TjF2LVDYEMrLxYCZPe$`f+&ABTmZqb0sO_0_w=cWi!{hORW*Vd`pB5nZ)Kl3rjY z<_gWfAV~(fRAq?phlpC;&0RZH9qe00hu{BT~zC%B6dJ0 z1ouTLC?Vz|(unkl%!i%Vvj3u$6T{iR#O2iQf#^F~MWWPw#0$Ow`KXttob4)}`}e+; zj`(DhzIf|izNEN&a1mMiu~X&zgud< zEzTK3fmQLp%K|9?u7WD)Age2J1~X>a+{u$Yx0OmC?QWu5JC1c&qo?9jw_sBW&**RX zlR58yV*IoJw_=q2cL%&>Gd|OvYBfpbt^b1kP`7lMtn$CZ?7;Ina!q~3f%39xx~X2kFZ~wsE@v=YT5W$-aMTJpAjtTucDf1829)Ic3pLE=RisS4l!zqq@3xDJ$YA;94`<-4&qn4(gCg}VpQz5(L^)URe3AVH<6DF`NgJ@iO54}i5?;A)}Dvu zXu6&5F{;Mpz96-;=?-~0wMF`;p1sx$p`Sa}meR89t0*zdw0^d4hi>_aEwtE%TtL%7 zVfkYWVNVbDyZo>eJ^0<)^P6SoHx})0nJa$b{H{UA78sF-(G)09Tu(G1Y%J=rF!Z>4 z0SJkXMAHsmCLhGETFhXa_5$;Y&7)-~)lL9}XNg*OvG@*gOJj)yp{K6HsT|)S{j)PO z@@8{6GK(lB#wT=9KDwEzw5WRq1WUV$pou_PY#{Os4L|~ z-BO4F__SEq;0O)787XLFpObfc(hgO>cY}FVC!hqY?V~uaH4OGI?rle~NWY#tY#-GIv z{4qg3wf9ig!?Nc3U-{`SNI(`IWYNoeAPh{y}E}T#cU` z;*&pIX1SK&Va0F_dn9(94=D7=b>& zk}99LVinW!H?Vox0r+Y8$>xXM=o{pqD4A`CsTjr$|{BW$F#K z;)D518oCN=4rs^nGti`Uwg|*1BdcM2diC7hUcQp?^v=G?ic9Lnq%W_O%$6Pa{Vo|{ zI=Vb=G;J|{tC}IoXR_U6e8Se@)#lGk`PrE@i_vGB-9#=OR@uW_3NS-w zjv0MF3!BaQqNih*m^|~7x0%Zp2{YBWSo|(0bbs>RcV0<%1&61|wl6qOnGt*8CKfF= z8r{Lc3?28|5yix-;~!_=2P3GQ4Gj5a!c8*>OoCzZCUQoHb}u@dUV(WW60+Dr(HigL z{8s*6K1rvVp(>Qk-}RkEEdM4}oyLljr{aPkN$&PYXdt&~Ks}_P4|5)HqnNR^4b)JIk;+2tCJR`yG^m(JJ_fG*tExTrPlg5=5gKxxf4FMU5$dm0{k z?P$=E`FI&|J#C9*!n^_b$dk9s0JgbZ>ae6(TFWld!rVu)r1`rexzhM>H|+DTaeI+I zuaAD(LQx=GrlUtEtFTbU;a0-i!sAL(={lc3Jz3g&OF1?&3?5{_J`iJ8MId#+`OyR* z694ZIW}GL13vLgCQE_#N2Ej#l?79-#-k#*;&Gmm&#o@^TLgEbE{f(-;K~F?Seo?cp z{XF$4nPaZCUbh!FoUfv$&#Qc`8tow@!~W(2u6r|Ezy%SGKtv2`XXiKFos4589eIkM zV(dL}VlA-`cN-h*nauyFzM5h}vmCn+;Y2DK`IbOhH7$=BM+_InXs5_)8azb=O>KOr z?Lq5~K<=QPb0fnoD=_C1E&UFj3bnEG50KP;5Yi&ZwfSDY3`aoZngCvvaSlU#NO1DF zx-q`Y6}u&pbE59arFs>uO;X_fl}%fA1(%Lu8?+~I<97^DyQ|xhEL~2Jv7b1<8136p zTYj4N_9(elV>t!-7}(x<@Sx*AMUYF`uH}CzLxgWj!+-Ytn zfSrJ;`T4}FWpLcIc>qq&rvEoMJr_*lOEVmW&h=g%khikbGLSL|-oy>%$XXS_&<&6c$*Fq`MIp-hZ6NBU*z z*|ECr9XVysnbq;6`VV~kJXcSinz1f%)%n2Qt=m99>Ls^ShuG#}deG!ilb`)r%8+vB z5PSS-t@uZ8&J2d&V$gOaeM6Bs}H;L{qMXR z8HZeBwV)So3l6Wen}ATi^2?R1$|m!-SksOa<0Q++_Fu#FM8t)!fW$p}2udgoCrw=( zU9Zov%yi}be5~QiX!niTQpfjyV$2wkhZE@rM9=vxBW7$<-PkkZu>P?*N!NaC~ zb{>YE>tKJg5;=5O0lrXt455blQg&r9XO_DnK%fkkXI(c{G{owo;hqyQ+ozra9n5&2 zt2@Y?!|M!g4b|oVLHG^Q&H-DPb6KJleZSe&4PdGnIJASOxN zYUankIa{yiJNex~_2eo7&>5nQb^KC5c(fI-gu+ ziDhE|T2T~plIa6uZkL~GTx%a$U_27~U5lMnMgd{ORzm(I5v=^v~WWo;PQN#BguA#W9h^UlaU^R~Dh#}JD~ z98C7bH>OSj6`H1+v+b#&a`NU9sUug-)Ka4ueD!NrO-)ku6Z$A z@|C*fbJk3+=31@K@jFq;P?g5TXcloEMicj5d$e|-2o{HDax*q+kPq0P<1;l#KHZt&?jyW3;=iV{daA{7py4x?!wHJx&VOHLz>#ef@2Y`A#i`wP&#Ouy+clK+P1?8&9 zGh$M#pu4d=5pmpB`X54As}N!GBWso``d>%;fvldR#&AS(WMh$zaI zeR=_rhA4nl#f~6?qaD@A-afdHtwUzf4^QUPLz-I+uR(V|i|izo%&m|b@X}tijredB zCVjpfFLlF;o&L^@dH3plc$sJ$!TJN_%B@XEkQg=hthsc#Ze@gQ+sjvV*TLa_#ra=( z!vD+t&(=gKK=3h$46xl?q0GIdI6bP%R{~Ibj^G`QrHkS81FUaPR-__ zcBeVo$dI}s)QdV-oU}xGkA-}biQFq^$acS+{r-{3zIRv=z$JwuZAV?qs?=D=-m^`CFnA9c*i9BLt3_AuX~1?qT6+tbJ*N((cL=VJcs$ua7{F#I{&; z9VWcp{w_pxn6ES{^7uV}S)C7i&zg0!PuoCAP1X6xz0@S}uHv)4_Q)pVr;65gX8ZM& z_o(oSu@}A3kd(v;(1bD>XQ=M4Dr5V+5=?j3j=Z%p;2Rs1lB*)~bFOu>M7DuT;zc}~ zu^GYAur|Aowo8I;_R0iVtu8N@m}S;q=TY4n@HWH!okktPXca~^Lv9CIkp;o)Kmb6- z$sUePH7vf4yqKm_T{-*nq`uU@C=DHaqjKwY^)t3a4%^h9#!FcU4ue00YQ9_&>e0Zk z9*Gr85PIU2ts#8kvYs+t%`IV~?o*C|hQ=iO+|D(n(eAoHSi~VWc52^<_suhB+@<%L(6jP0 zm`BOm-@!Pb50H{q6}x*rr=R|~H6be9HRTywjBh?I)`n>n{HVfn6vO5ae2%P0?0?l9 zw_`0ME|^sN+;ByQAeo$`uP#XxrVm0Er6WY}no~ z{Yb2PuyYMdJ~lQP%0S*|TPF6M3h6}@)DBlXMNbz*rhq8zSN%L~57FRhF?Yjxz4s0& zF(IcVPyChAkANWok21ERQ#S!lJ5@y4j`Smem%md>=V?~oA=8>Idr|jW_4zsfo@`Lz zM*I{Rk@4VKg=xze{W06zV~e``Gvjxk)-gESYgVSF-FJZph9##!-^k*k9%f`kCY*y( zMbQ!Jj%6v+-s&Loe*{aFZDmkkQN7C}pp77s06h`U%4ypCmElS0k zO6=~gGd(qh?!O7XzeSb_F<{hyFjNamAPow{@2WmB9mHW!27WABI67^3%uGU_4ElcZ zypG8L9heI&z9V~v1~aNNK_I+WV<7VnY>78%2{x(+JKKubkJ;&!j{bB4(azX6R?Cuu zE%Bfm(3SqW*i`&A2`%y!&{80J7*Ox3+v_kDm*Eq;&5W41b?mNUe7W6@chXpfA9s!fiArPMw zLemXf`-C(kpZpGq&xg_N_EiyYhSrgfqUQm@&c~G}#sF3zYS}I7DBd zl5}ZE>}n4Xkry>TbM;LEi*^~Zk|^J3P~&&-JrEzD9!F>+E!wnt2hd~KF+GV)eeYY; z+|<>=QbsJ8SSFY<)Yl0|{|cv;L6&FaKXcTWK|M>#VWf@gVi_eHR|T^!SwpEC(B!*p z)2bHFFkup+@Q=6QBijJ6>s(#+0vC4e8+;znBX&ZTHRYS@s++lB#3yI~PrO$L91QSw z7ylMRx3g!*r@lKvhE-;=EbHO(N$4PLq(14zcSu4ei~^9lN`I{|`B2ynh7|fM2}8dj z1NatZupP)fXz(~-ltu)QmzJhHMnabq4VIff5a?|*S#@-bum!soqQnGS60A3m4hvar z_ffEpl=&HeAPfY))9WHI)SXab_%dYKvr?qdNn^ zit=AL-F^31`(>4ew%t7hepvmI_`jB|i+Z`AhMj7*7y(^x9F z1BtFnF(FUOs_|e9QBt;P35cfIhjf3Cr$YrfJZ$6AhjCtvsEtmw**&z_X>Aem`wzAgx_RCA_NA4=2fwdkD&HX$ znbDU!e#Oat`Q6L2wmG6t=_7N`9`ZJ(BUPA z^P=SN>I5_GRxKLaP;1__y__KOx8j}hIb&n zv!+-FxiUsh?lB`X_&zlj=8S_YZjzcN&z)|G@H7c40Z=8%0c?W6QkGr&p^+Kg_kI35=}oW z*U#u;nJ-w1vx%@O1h-fF=*t|J)UN&Q&a?b(c%h(yeV5J@M;S;$?sMu)v5znG8Q0jz z;w}6ZynAF1=kp4f`-?~=YbDa!DH@%JbSy_-zH)y(_}N=-_S`P- zaR*`9k`nbzq3R7X5-V#jhzQ;Qcr{G7&Ha?AuJ6(J|qIpnX_=|c9v|m&HQ^h9Ym`tUMP;Igj z-dr)G#@p9p*Ljg1k%1rxG!=72epD$GU9|A1sA)O}1><=!ZC`Wtp1$uZ1Jh54+qKZs z{I0N}6lnWvd9o_wCbiHGT(qMHI#+%C@~jD~U*-uJzQI-KzJ)qn*!zd=b;q4(=XT)s zRvTv}bc!6!9(%>Dg07HdU)dVg*s}6osX9^T<@;0+PAOv9NkJv^#0BONaZc^PC~tu^ z0y%ALvWBpgahl8#yu95nSCuXwbn==YrMbw&v1b6pfHRyig`g5!5%EIkB0{wr_2^uo zmOPgFJXYRjCZ^SHZ-s8;gmIfyVxT~BQsny@K*GWX`# zaP4;26zcKErP893(r?;^DL4&oK<-iw>r4wSx^SlPs`MX6*Fp|rX^4=+em@37zSbzSin zSW>IQ^vRr}o-5I7y@EMQ7OR(99Ki1F75!G&fAZSr&eJ>G8+O@xPEtzgdW>t`c{_=d zHA3+p%cPV1;`n{oIBk;CV;A&b1?MfsE(FkzA+M2E8k^mSPT4gZUBjs1=~uL1YInG3 zuQ-MXM_HRtEjul0n!HtWkMg%8kJ%?*FP^z0R?p296=Qrw%=KJMsaSM9geDiZ)%velhE8Eu#ct$Xdha21m%bxnx9tvx*~y0rB*dgliV*SER2X@Z*c^_MvzHJjbK|>x z{Kntrv?Tf79S{@|MmF%i8E5~<&qhP&3#>gQMJ65%_h5qzrsrb_3_3;d^A>TU0POPbaaM+E!QxuPI4Jr*u$ zj4RY=5Bfo;WZk5TH@kCI&XbwetL$n^44M*2HR~<#w!n+<5=Hft#*G z_h(#VZFn-Ss(B;Bgj`n@U*Wdd@usP!|0AnwvgyNLU3*zYFyWy=rP+B+0^iF{mPvIKST&zx*!1DaPw$0OSH~*#axXncei-aA9{{7Nn9Cw`$>>Bk~--Wr%cm)_t&!|8TI;I7f7K%Ve z7fFHkTxMu>YlA$V^Y4$bWcn}+Bv7%HfLRI&v`2&ksvRM$E9J#eb9p~@RPkyd-RZrY zR_OU-8v`;_>N5(YoDCqixOK9Zh~62ciN>KxcVqJJ>8O0VZ6>kIMmHpB&P=S1$^_-% zq`fENKZ=Y;haVJ6HWrIP70nb;Cr6qwaKXD$ZQ2JjoaQ$_btWGX)739`dGtoqQ&k^h zlRiPPT_?x>fBIqnSLZqMZmz+I;i_;D$02!hi+9D3q~EHYNWQQ|!r<1Hfhu}R%O-dT zLPVR5)NmZNw*%g8?V0hsY^{V#OFWJ3$edM{P6U$dL3g5F-79}%G`sg&^LDA$TA-ZY z<$yh$S@RuDs2mnX`j8L5$Qneb-pXDpdzf*H>p?|ukiPK1`1PRrtsG}Qz#T54>VVmL z-i48!*@2Um50pySnlAD6xP6KrxBUjUEvPO67gGO@cx2B0LXmWJ=)Ko1*Ow)(-za8Y zGB?Qe)8T3vIGP~%?QMph>$M%MPIQ$)AyAWCid`uZCoiA1Ty%jw&-87Wn@=R2}r@YbnFtxQL z7vcx9it+=MMSv_+3+?l*0ceWu9!ETP+uirN#D23KE-@oU*KHMe8t)YAiwS)gD}wly zBV-vDnQ;t_v7|M|u~FaX7c(UrE9%er6@EEoEFi3$|IK38N`H*0=a4$}MUEF@B$rTK zu4U=%u_b42KbH!};xX$dfsZzC52zTMpmo;|lll-l&{NyeK>(f^FSLu;<6RWO|@%vQOI398hNRQIeAaJKvJgS4?Q-aeFeFApiY~NxB*1AF1Ek@aC{A(_waGW zD>bpf$&;D=N8#daAQ$+`?C>^4d=rU&JOuilEtbX7g0nu`u+vW)-JlK=1#~0@9pNNi zK1KsB1G!BY^YQXl42`27R$UFmp-D3dLQ&kBR&?*^pv&$rX@&u2re8EU^^YiMAOhJR ziE@4hDO*pO^Lx0mpYcX;QFvVd<&ND1o&*5XU3M;7nFl~R{U?xa0DyEm9G?bAXNk(9 zymecRn`$n%PYt7(as534@_*MQ%8qriLo+U9meF_FYe}@bqQrB4*AX3>+7n-vm_R5)` z*+~ucs&O$Dw~`UzhYqbg!G2DlA05~?EyT~6MmZkoZ%<^ zm6kT+vXAk3{};1`BO;6rb6R5y_gzKRWXc;5Ky5=&_SC-ej|V?y|7YiQvD1kxYqD@Xgq5cZJ!#$tiK4HFx2tKMY`D~RVPcvpDgt|I>`xjnGb^b zf51I&mb?7eL9w5KLGR7Ri=rER#VSFdWY&6?i=SqPuPWj?VLqkc^cgi^H zwJ*LFt%P1p3&t9F4@Pw}WVeBc{j+d(_;`-j9Tr{`+#x!sc&B`avJ;_Mj;+&_#iF>v z5;hTmW5O+OHja#l*{b87Xm6qGA0wiLrfp!lf<^gsB#}q^_QUH6+?*0;CzCb9Aavdz za};Q#me=BUlJ-9r%qAug^7}r^2EdwPOzO>_aGl8U*{4w}%l;bIgo%K21HEh`wU|O- zY7fHAOXG=s(`-FMwnDPQgC5PhR%TS>=do0KLLfNw#NpI!;Ebrv=Bq~R)MfX=8N6#L znm=uQX&_HzXWQ&9$_?EM=8qRS$4^U6!nUE>SPG=!!2YVX{+BQOAUwT8r>dWf)hh2< ziNcYdR}HIAe)L$_F^iJz_}X=d-k^lrOK&@QC!qKIU32lr5RR)Ckms(ZIFxlzSi%4* zEGiuMcK?o8v~D|N|694|O#=ae9kwYe!vPZes8>h*&Q2`U#Xl)GF}&$BwNxjeiF+K@ zgIvS~PA{%*w4yrEHNDbmZ7#ebiqrN+^<1$=w@D*tH$~_vfI!bmIe=?TvIqLAy#3-? z{_|EVm5;c(!+}?jJ_DeRDbv8u)e^@#WYf;oI&e(9TITNUEDs$O;p-kLslW{LryK_B zo|FfQr_c%(ZpE`z4mk>lOeVRk~SB87c^Hp=!~2vl1@~U*akTpr#Y(d{ha5 zst>`=L2iYt*@V#^Q{hz{tgCNW=i5gJ+hA+K`m}ZM5Lv44r-$TOM>`LIdm3nOJFk+I zf}o75b`}lIlJnQSmWof(-S7i2^#X0j05rUc1h7^amKA(afxxhaAe;65di!39Ov> z5=qu(0t3jNuJ5}>wHR&x4pID8$Jh?QZU?G)%DvKZa1X@-lR*k}L+bk}#=cf9YbzFj zU#r`pmCs>CU+Y*v3#o45cyPvM2F35-hr7v0VC)MX)qUQ_a-)B>XRoryxxu&OlsodG zhuIkp!J~(8E`|V`2UU#mi1ZO^nc-08Z1&{8Y ze+`V2P*$%ge2dilYbE9y9LUUTUSM`|RI$#*5WWE5V&W^%?xf2JIGuv7_1BIJTZttl zgH6mptX7>ftMoG~T86JSo#uFa#wH8J?>LBa0Xpjf=_M>kv)ifM4QLbq&|jR9Z*Q*5 z^(^YdF_(eV^$#%Q$WW(nGx|3(mYSZ#O?@9j>LucCAR=JG*beH1U8N$3M;Q0&Seu6d z^6mXuU@CYN6Kddl2S^#dF6Dz{b(XLhv+z4)_B03fj5V3+L&>Hk6VxUs_}6wqP@!3e4j! z^JoEcR07tRe<`uF5|S8;>(xjXw_?*$v8Bh{yyL z%ujn21JZBUJkD-HFf9wD|&`PeVs&JNdLs zqi@6Kv%x++cR0Z|N9E@!b@je`*I`&d%61;yY}(6=yYP81+X!thOts|8Oiy4~080GM z2IY4%=_vk2$biIN@{VatTWaE%YzlV=QlAW-%#+~X%yyU%mNIOS1Zp7sumiP&k#un)ZeQGeFe@_Z-HJ2x~3dK*cN&$3_084%^WD&XYH6Wfbf zyTY4KB_s7XU<*nRmN3Jq&hg@v#0m5JLxxN7iubGWZQOU<+-|=fB)`_I^aJ}M zKV;^wHx|Q+I!!Fc3Ny^f+?fXx!nS1=l!(WR+9|)}aOqL->*<~sM+s188vWF2hjz7p z3*haaQ9FbGR(k0os*r89ZZ4LDITBnMN`_e#L@u4_NRM`|_{#m}2%qyn*pceUnO@g& zJ$4a|671HEI9hn7T(L5h0*lixlvPBdlPoW`-Gv6&h;$Y; z)jKPs}Y&rC}PmO{I$s>WD^ zt*x7Ia?2*+wWC<&6Po+00%}bq4pMFDhmmKx81q8a6iwIitfm0wbI)zzq3Jp!QujZh zOABMW_f~jTe~0AXjJ(_7U7?hHX;!u(WPH2RL0?GCSaa7AJ#i#_0}z1)c{?R{1SeX@ z(;@8})Sf#Qv2)k(!Ho}1V>WT9uR++u=p3VURkLQn$y^1b%Y%nsxm{ZJSuo6= zyfGQ|B7LAJ{0nNiJ}WYkl^fh=c3Z_;m5szPuPo+{bR(;}%j!d&{L>)l)MGssJ+OSg zjGXTfu0s~d{Lv3f4m8Qi!0d`w1Qvzn!JR7I1ZKI=2iOf#hbS_0XAKx~^A|l?14%%u z%gtg}qqLHb)8O|D5uCkhaS}zh7fpoxTQ6_ag64bk;Ik2;3=WX0o2*9E!2}m6)}0EK z_(qa+vE|Z3rmTn#ypGX;#Ax6$ze8M&Mf%_zGe+r^5x(0bJ|;EXfQ4LOWH%6OKijmM zP%B(qpUBT7y42)oEbOwMsOxq%)q}mo_n^i4rcfgCaAsx~$ z9be#N!)8DJ%r0)Zdl2R`wP$l>4zD+Ned4uI5Md5 zlv??SJC{q0OZQ8ko)^0vK{k*usNb-|QY`+5X(f9O*Ow^$ITME4oz_&ScG8syBy)sGtRb**D+`eT&L+qN@6o&-CQQwZ9)-Ix8=VRGrDiwA36n1yczcov_4_IC& zaUmdiiPX zOYd;r%xv^{h!bL^ulYN~$TkJq=*B&jU?WZ5oHW|cyQwzQV|&i^ur^4I!O}(*+39y3C#*7a#W+aX4U2-iwZmD zuRrZit(CI=4EUrX@F3J^3&#YZkWX|LA4fU!x|MxwtGsvEs{DGzA* zjosfNdN@C3FnDRPA{#JKmH_7|#%*NccoN2>n+|;N6+Np{+XjDf=o|dae1~Z|^=&oJ zM4h|SK9NbMoG@Ua|}w~;0U9cN~2&F^TD;xPi$8jRSM6`6Wdc9=K)c; zrPeGG%F$8auQN;5^@sbq8f)tI4ou`_skM0Ml?i!4+$M-ux<%~cq-jESO@x*P|ayN9oRkV2h( zBxsU$Kj7RIh|9>BKrLf;1=Wn~dHj8sv-b9kX=3B#+-I@k_&l@vJW+Y!k#BXQHqXiD zKE!{A@M>>=<#pCUaEmLyPDOKlq*uLt&)Bo;F3#vJhe*(m3*s|`f|7_LB`)NVH_82G z9UDR^c|?rJRWULpMrGTE^NR;g7>)R&g3y&9d)ar$(U;(G^gq1*stzONtRtE&ufUZ% zvYqf9!gC^=tTehBl1nUYtsQOd&V3#MqdEqLCupOyDe!!4{!}AnorH+&%~`*m zr(YltgD*+8TN0eX!Sk;h>~b86Hfz5zlxC!#?hVlqPo`0%drWK& zO^+|F!XhQ@yFvqtJfb_FU$%P3vtQdw+Aw1Qm>WIHy+DLzVs@G(GJ!sYy;cF zW|4tEJfwj4c2?dt8|9uk(leS=nEkhxipHmvl@(05O=MapDBh=Gj9{E6RR7dqN#QlG z_t-v=lU#BN@yv8+@EtBK{1SOIeE*+ir2cq*u4M?%h<+!qF(Y0{$6NIE+1M-UOI+=m zv+xb^04;r#UYe%oVbltaESlIwyxh`%`WDM>3pCfvI{&Axf`6yF3JUMRS7x0~na`g3 zP&)cvM!6D{$%nB=V64z@x5)6)`oFqGEc8dKS63JJiYGJooCpU%n%Mg){VLBDz;cuS zFJu=G`2RPf&6!O1U}GmX;TI@B63ne*_CmkMR48>2tyTCV^&b4RcMi|@^KkwA0}MQZ zWl8YIY6J9S*~s+ksDh$h2LRsazG$cGbEhd?@+M)`4jzA@WO}5{ke7{OVRZ(0*W_0$ z`W=+dZRP2k612GM*sbHXc}Ne8)6SfZ7!<>b_}^GCT09PgX}NI;v~*=>8zC3}-_%O{m$VjNfsG4#1wgxuv(A`m|5Ru!N|b&1j{ue#i~t&V13SS)iVJ>*|4DEI{|5)(5>@`bJoPNr zKyit9K?33pbW?25n-pPo#v@M$tY4Ml2uXQfzd*m`+}v;t;K)3%^l&u=aEiZso^v-J zjLrf&`@gWv=(spGel%$ij%0Q^w5+Au``+hhwF>StGk4|hbg3EM`t7suZg8aGNvd0p zUBh&iwGXqBDD-uWNe_a=(IA)t8${_JgE?<)Fze8!mIxE`$`87fe0$OHx#X)M`GfLq z`Q$_PSn%t(OBl;`rtWb2Y60aJN9QkiVz_P8=^w^3UnP$F3>Y?K+D%2!Jb~4`Vo-8Z zzsNm<$5aGkKm=xGQI}xg_QW+B{hLRoq(w&&f&t#{w}iOM4%P+^^VaTw)0x;94t91{ z(HJz3KrB8-l_YSGdY5;xoWKg<$V40|kVa|pjBpP8Vhq3*qz!koM|u;|opbHdwtH>g zhDzz&4C3rTP~_#r7?HeD_PV|w==~O(+UZ2yeIez5M}o@dZb$TLRzLeQGXQo$+`>>3 zf*K0&r@-qn!3;5es3ZWhrF=JLBcvv<&^mjn&(CJ@$vx=n*WygoCn8<0fe+Zyfzx*% zmJYYwr8b;yqAoDpaK9lPJ-TJfrQ$%_hm`%Oyj*yhn498!dQ23UhQK(!EI?GjZ?}lK z2nJ?Dm4CQz2BYi7ba%JVp;!~mJinYd&|4^~5_xfR6)HYa>t>X@q^z2)2NnRJU1Qus zzy@Qf2T(M;zaek%zdM3gMLESBnX!1f`D*7zn18cQaCM<$@KT}BVhmuhmIC}Z%i!6{ zc5kgBfzYV@;bH}HdkH?yPUeJ(?)c8sk$920SQ}zMQ;X3D1CkPhNjT~Xl)wo$6n*b` zR4lprw4_{_jEjUOha>U;76xYVWk)luoC^;RjvveAg_I77aIhpX^WFqZJ)zx_t+$&~ z{%TbEtzG;Fx5)I282S_e3<8Zv|+M$9LoK-?UNrS=2V9W$AX%-AHH% zWb^?;0m773?rpIMfaGuiIF4;tJAny&cm+Tu;JgI7Mu1eZF8ZQUWRhz~M{j!}uoWcK zN~c`5r6L_OdJOC~kGLLJRy0jm8$lRMy^OwOa}z!_Z(JzVc7VA02+z$b9;)N75?Mfr zOzX38JoTye1djAx)0W4IHvqqm1cyH(Hu6M(PjReD)i*}?JBR5iMJ2SUI|<-%&<>GT5s_BpS13OzpcNn zy=#to_AKm0)^b>=L)voR?!$@Y2$zwb0KrdzII3(X4&Rc?vJ&iHCTPgo0O4*FJ9&$+ zXFpP4^$_EtnqS@6on`I9xa1-&Pxrn!uxm+$g(X_AyOTYRAd7+7(kGWP`v?g0T=PpD zWV||W4N)B$8c_CRBwu9w30Id@IhVt+p*6=B?fjX8WFjsL+$!#MNc2uD|HjKR5oDJV zcP@sbA9V~_gNH8HC2Oh7=A|J?@s$bRsh9HuGMVAfZy9{F7C zmE+sC;Ru}tCRJcgy>%r#;O^I05aTE@+hcz*zcg1+7lmglI4^uwsS9YiW#L)327KGil$6u*2X@K&AzBZ%s6y}=1w zNtv#Tp9V|TCuDl1a*;y%hMsW+*Ip;WG!`0|YCi!)p!k2LlJ;g{X`=398a}yz>Z)3O~kBIjBF)P<a@!R|`kxuOU4bt`z2 z`ONwjN$?!qX;`E$C6b!iFiM+llYTbC?tcEOi{tp-ORV4}ZvBM*b3R|HMDsf%tfX&^ z`b!HA-)HpzNc%UmSw!`4xG8@V`u5i^ej0io)l{FXNM2^K$>AY-^fgwk+tsX?z0w2G ztODTPs-k#X3h9(QnZ@}I?~Yf`6{FAJDe=n)aW+CP``*GhE!p~yA{{OOwEvh3MJ2U4 zf8IRx%gGIZ@DMsTz<4|xlfS=Z*gQc|<2~?36T(XhIeVjWJ(60pug66?;Nc$MFo6|k zi#~1NHY#SnVyePgx)Sj!)Fwsu5H#0h9sdR-DJjXgX;zVT8rmecoKBmMNz$yD*i{=& zb!FeC7pjcodVuLqBK_zncJt9&NU?2|CU=ut zK3{BmQdny)(qkHl{76Mg^o^A63{q(O;|_{6g0l2TpGm$luaCh^jmu3+UyoJA?WHPL zyee$|6QBC{k(~IP%GgUI2%qE>gN5;~qE3iTM&?CxPgrXv6}q!w7E`HOGchLn&NgZN z@$q*KL>cruX8>60*~tJVKkY%ec$($HyYD9miu6PLLC{0CeGJv&3FK$bXS7NjPU9P0 z-IQCio<%pKmsLVNHJ`VR)K^_rc&!xB2TvjuF1|y`5z$)0=mlq|_681(r2rJ-&HVuPfJniTz$yy$1j%r5j#3mCawF4Jv3C&DQG?*Z!K zQ?m>HG(YACz1)xx-rJvM)fkGcKE|pc8RBJ00E z8*;Kp>RpbyHm!t?dU_Zip0+AXj$4Xy(|I6d?4Mx zr66h&CN!nfi33KkY+h_(ICQxt8JiAV!%Y3SBBsDN_W-cCJg=`Sy1O%rbI#P$LG=9f zBI~E@SK)BY{rZoe0+z15k!od>I-&j!e9od}@U<5>h3VZiN?LViv4RhE=eq5F?E%NC zb3X+}%CCqVTJ8*zp15K}kaNjRm8f}Ew@eFUqyt zI43s?S8mG4TW)7uOHCA74+?3ltYpCFb7Rgxex@yVGr%Odoe2kJozx7FHD6 zF0CV^>x28dPUz4>CFuwCl|jmJ{g9cLb1p9Tjc!HT{n!d7&6k8_iusA?jlnc)swv`4 zdRaU!zCK^Get1{JI_sl>gzVvz=uGqd0eGPT#!w@Xy9_Cq@;&T zj_irw_GdIe0))2Qoy?z>RY0B_ZJ!5xQCp*|ueqZ%LL7BKpc zFjaHn33)<~$e!+i1ctmgsQap_<@hwFc5=tv(j}crZ!>s00-+5~U&}cNzzEJm9WIAO z``_5QAszS5G4WDUw2HJEQ!h(o0bl$=8xIMMmt2fr!U{Fh_96}RmCrscCQ&g3yL=-0 zI)(&NBQv2(q>@c{1b{=8S#260DZY8?%?L@gs|iz|uW=I}l@XtWqgnz1;oe78 z_PA;&Kr&w1NbvMFC^>#rWptiS zjBQ@t4>Y^dj3Ku0aJyA%l3tX*D7%u+skvPkpWCXy#VsNg@X5rqrA6#gq(fY>s z7oS|3S$;SsIsK(@SZkd-QH|&5#)%q6r4XZ{-iTfd-<_R%GG$@la5LU~rMkyufSRMP zO_enME49B|LHjQdsO2i=(V3VgGol<{CQgw!FPnOcfhF{zh;$KaWPsoHOd;`M!Q^cuq z`(;3_t1OIUc)r=f{}TN1r>zh1ukmv%aCN@$PyYN@yUEP5z${Hf=wTDU?XcxfQ#-LX zx<=nV)8@_acRWU6c~5*4ls-?}p!Z#`mtxJT5s$JgcvwSg?0GrH>0H%LuYGT}-{fjCh3s5v zNI7&SQOOAbkTuP|w*W@<7Aj=GV+yM?QT9bj&5iW%B%y~>mh77h>qpiYpJso4Oag)5 z^U&bzWhcT>60X$U0|I(3TFb#>tEmN!d`0vSTO!;i|Nfo&TJn{I(!D zWCOmHTVIvX?D6g`duTUI2A?TLBoTTna6hBEbS8-{>MjWUTx@T(a;;;o0s7O^YeK4g zdT08K{UKl)(Vx$v_!Fl8uO+R-{}z1t@0!wJ@%~`wpS&ZJ$9UgBG!Xuv!Uv=p>UfRLBq*q0EiJhzxO>+-zTN+ z@B&$mt^9sU?z$ro^v(sEhWP?KO9E{N_=FNTtD~A`84b6STwxG;LxFsf=}PW9+7FBa zlXj%j&UX444Py+HlNJm7v_JRXe+wDS_*&X@j=nZ+xl(;ZzcK?#qL`v^82xIcu3K!w(5%fVtik<5gD=+N zN6WmLY-e<=PezdV`Nd9Ojg>`eC8YL^t+b1c^r2H`3-sG-4Pg(oUFZUMnq8LdR?6qA z?KDNP{$Zv}Da5pc>KRNB*p{Es3M{TcJ4`TdOq5*bt8uG(Kd)bT4%JsOl6qEj;2tEX z0-x_@^#N*YQ?7PZ_Dop#KHFUI1?b%cj2J2%;T;zG87&bZpojHY?qXQw>5mn&$lhh> z7a}=Kh<-^0$-o?Luw^6tNFU3he`Oib!RhO-GlZLH=aZqTdCMw(lGjx567TLCRCR z^&+99EnY{QbwwpzRdQ9YQ?2lV1G|L>TOudI!R`ycK-vcqO_XRsguh%!=tnLEuWNfL zQ}&mE$`5D6J)hW&R}|Bq^?!<$4plFiU61&3n^Hxs&dPk|+qIH{NrPAHPau16X3WCd z#ZXtz+Q$xwriL93T3&kfAYN>?#bSv2OILPVEyu5wvFmIy_fFh63LA^;T}|aM>)9CP z15K{S0b334c6IH>-luVB4$n$OpFh{8pCp zDI_%7UzWVDEPd8zjdFau)=!6bm)Ra`MP4g-Dmw<9c5Nfv!D)@ibNI^FYS2v8j@Z#u z)qIOL5KO*5Um|HW%(xUQd`a2t99Sgxc)S3VfdJ!wB)#G<K%O}P0f7_)^AVU z8>+P0WmMPJmoy|y<(%~0e*o|ruueg@r@(ls`c3uX0ghr{4ei#=VkQm2^v|efDD8IR z+?=NPn9=!J;*TE}1-WD2e9bB1o*p1@;d6IN_e>WNOy-R-TJocBy2PB^qA6!ix!LVH z@j1R`Z%(ILD5~O1u5!*Vfc(!ZKO^!Z=gpP>#WMspu1ZzdWS@OTP#lyYv;*+^7ppR$%oGTa%WMQV zAr64hPHS}f-#?xY!^Lpn-&z8&EoTwbMCbVUx{jHaZ~dAXtv?SBA}~K?JoYpG^ZMJ= zzVB=Hz$*Um6Je&)`HBCi;bqiOTJ?`iwCTrL2K&S(J4juFp5Y4jdq%96xt%ke2MObg z9tfz>F#fZt1}YV~i~&AVip;cU)g0Z6dyLW*mLbt>o#k47MEj9vzFH#JD_lM+ozyJE z6(L@h!$_HqQ+@UqGJ|<`g_eoMH>!M;qQGvM%J$Hv`(X6!vET)q`-rTr7k$##*0<@K z;RdJjm{*p7QY=qxhrzN&t?UalIE(6b1%&8J=e6?I1V-GX{O5l9p3|OVIy6rQ^`&_X zZAYa|V$bU+yM8AjhDF-9lE)-;^2^C~A9ikieV+Ip3cERy7ufmZlVn{#_7JSjHz4bW zHbZ0*y{c-Y_JVTm8QG}z#daz4tdZ7|WIcY?a@F$-l-`qR2#X#_b?+>-w(PZzW3b%o zd7kA0Su-(0>UU~FjT$DMs=^Kh3v5|u4OC5bch*B#!^}IqC7Y~tl|>MpwSs&zhvgrn zwW^R2iR$w1_1}$w0kc>q9 zA(3IbG6Thj%&NV&JXTTKj*>9x#%?5Xw?GFhxXBiCF`z?xraj#hIxhe7TXWS!=}ma~ zN9)>83f;h7o_FIR>=L@5`vdOgGGvj8m1ZGbIC$^|vxJipv~jIQsFBsq?0esyPuzw1 zHa>bI$l25T(2y5gE~L225ja3N76U$bRt-Nz0`vOSX)O^}9dmQj^h&2AYjJX7&E>5p zg=L9&oujbG7TJy2OIemDdOugSBsfHiDPO!K*~^w{p|4}C?P=6! zR_n!z^p&jGOg7Y~omu59;;mYg?>CZ{@i`uP&<`oBt-izS)1*eQf)42i3+6=MD1dvRyvq-#dT+SyHxBfjc_k z|FMR@4bhe~?tD|-I{%GI(S??9lB)?lVrR8z9>q~#nH4z|RTQqp;XQw&^6okLo!6QR zCy6SB8$ct@2FBb&ZXbcavW$%CJyCq5&Hb*Af^eZhc*n{xndebWnH-cjUAS$mK#Oi; zBX1ln8_PZ`zHt10EO#W@Zu-Kd)Ln%DGh2Hvg#u%1bUvo?B)WVYmRo^pj@%-@de_m) zrJ{m;{)sSRJ|PpqGTmzk!-WWfx0|xCS7EiXlql_XZ@n-_!p#azm8W$vrtTjS3k|F- z1VI-!Gjk6!1H3TydJ@;xw5}Pt5fV0*%j;N#W7Img0Ua<(i&z1lmk-H}>+n_=Xry5> z(wB+%-tpbTI?`)c%u%KAUsXV81wd;A$j3hV9ZkL~Pr|&enpv~Jl zSXP%sT9}~(mQ9=cDv`@!pd>pZ;|*Ewiey>Nk~PAFy|ew)6iuPLJT~@_1&=WgBO8{Z6y#n=`G8Q``tp6ZWh#fp{ilO6yM+7hd> zY4!lf155pkdsrPm!#8JY1hTG)59*^v^HDd|uSk+kP(&UIMzfR4@RHAyYvV{hpCigdRA-as*C=lIYK@{A^k5;uI5rNRln*FA%UbRComSS~Z zqIceJJ>q^2uabz`6Lq*She)}`ur}F$Hqfkib%k%YDPFFXtJ#aa4H{BoDh7#hHJhJx z$PKy`VdV7b4$%i8mXIgP&OvJ1ZDl2|g{zAB?=kzAthFs|bOm=>xNpRqo6TApy$Ea` zEzOOR4PSk0@27>#YHtZeNpwJ|Fel&B)!ifau6``FYE2;3M6d|2*WuoHmaw`mr3Q#O zNbL%U9*-Byoi~P)JL>JIa)+{I6G+$~%Z(bP5aeRCY|n72U#{^tNzygH0*Zo3W9K8W zL0M%%Qc1k|q0*=O2&7Pa>f|(k0^lO;aEk6-5^#HadyEq8!AcW4iax z7WQ!`aAh^KiGuaV6Wc_S`mWt*X2!NI=V?0Z%|cI+R)TEgbjNIFMs1ucgjtD8Hqtkr zdBog+dKotCNA-ib*-0CKf6$t)*8osOlEit7FG)Pc$(9trn$LV|WP~GhPF!`OG>y`S zq2L4gaTICSOq1&}HwrU4+tW+ibL$W{TqhFKVt9N!=sg|Yq>|&Q|2Rp1`X2p{AM(Op zuPsM)RrtF{{JiyUhoM&5OqnKbJRcS9sBTcQx<;B1PH|tEF?@Z~j1}jIcI%FiwRSDY z4cn1csIF4Wlz?4>Pn36$TshvhfSA9*O{Tq{8^?~IH;{$PXqGD4=xfv1i6aNIlMi)= z-$hv&Nx1f!C*D>1ynlQV>j*Zi{Omv0cS?RE{3wux|AFvq{VhLS?UsB@R_WVcASwbW z#;#wn7LL{}giko;ykO+ETx(}|#i7f^O?*h3>Fsj}ztKXup(lNva2u7&ZkgEQR9CBa zTOtlDa>v5`+i|NT2b;)rOi7n0zAb~qnbiW}w zEPBURPCnhi!NJq&EcsKeNrG%};se<-Y5ibd5sZfqnpV%}^~chxb2`1cbVd##Nk!6_ z!QS=2-u2JeG6a$|M3#xYgXw~CO@X&SV)hRHhP736uyu#Q z3eh)`{&spz3%hH6kpyYX8gh~Bheqy9`)n!BSe5H#h4SKp_3GZgOh%nveEz^>bokJO zZA1k>c0i9iAl)>Rv&}M3)vK27udDPL-@TUse{bOR&bwxQ)9diO(6ZI3G>I3iNK~-&;e}cEsvYf^EZR48Wu_0!XVnfB@ok!C>gW zMn{ZBm?=*uB|#ftdOr2=^ZuhpK(Mmu=*l0BXqW#*F$X9m|LE*+@O5?uA`Zb<`(yp5 zJs0CX@&I8IEsYapN-3Y*JUtI?dV%+od9eC@@>i-QGWo}rx1&M;JKh{P19)SxL&oW5 zY0*abfqWd#skvT(6~TH&6cEcDbq`(E6@+n&m#nwcJ5(6usCIu>wp;U@?i{A^%j{#P zo~zxzSHVBC%arvV+WNs7by{X@dhbDu}fa14> z7H>yMOn6vCID|@P{YM8u-vPXtc-86urnRF9NC6%Gt-l9$RR4>_^kMxay)!P-nZ|oI#C`I&eVu`` zC-YI%kF8Hm!HE34=Szn(A|c^t9m#se4zc8xK4*(f3%;<0XW_Bw_mYy4%xjVoAo*`1 zX=oF6)oGmb@|~jvIksP*XA^m^Z^deeTpQZ!&St3;f|b6_`hJ!2TTSCK=RLBs#3_m| zS=R0s;rq$iDm=)=S$G${2U!dE_^ueUg})`4&28eDwfL2Bo8dxOl`QQ_9JMFlrP6do zjGraY))>q?B=U5rOP_GfaU7FGVuAt1`ydRXp`K+jA{bg}YuETQs$Xa*T>kLl#o!Q@ z3>xQ|X`yo&Ov5??e09g(x1GPb@O6lPU!LaiDyK*$x*tv3 zbj{5_xxJ|9*eSjaXoL9T)#QK}U_a^-XuATc@x+noQpMHrV@#R zYV{f)j#j<)m~K;LzM3JcCyD-A1ZY|JfrJ$pbtyc|E$dp(hs2XL2TPvw_sT5UJ3I*EL6JfLp(bcq zn!I(>YWx&=afKxAqJQgE`L{-%|8h74AcgxQB;x;+|Nksn)3^Qey(gq!{&x8yT4Azo zhs*D2EO@V6IS?T;mtD%h4MF4CW4v3^b2l3lFU)DyYIvT9uw=P-C+%t4{IzW1KPB`2 zb3pn(d)q6u6JR>$agR}Sr@^K7#=4BUL*9+?x|!Y8jbFV$E#qvO4k+r_mtZreFHp77 zqS~~^o{6)bN9h}aOro`KhHl2ok7z7eGYs3ExBvH2-~N&I_}BZyzb5`CDLVb(#j3@~ zeZXv_{8JGnT3J2oAR$E9__rjzSqyoL(2W;!SDvj(@t54=)G^s zOTypcACi71dGj!v3fh@TJA!k_Uhs74&|Ci5x9eWM(YM&X0S%crpgvlL%fMfs1CZVY zoKFjiF{9~*pilmgNQm{fLsstY!h;Xg54xvXmM~dppLR3yBcG^Jy)E8qAvTAqBj@$L z^}RJ^^?dJo24czHR`3=70Ven26~UNF&!J5Ze2C-zUT$~)x=C2HV1*^ueYQwHfVw06 zD|JVY&-S(TSTYM~s433xv4Gcqw|3yzijzXzmy66_E||E2i35})l-6jey(aDDns^(% zr)Rv^9V+&$K_a~7ha|dO&J5fc7w$v0z_SPK%ZEr><43u9u{jR1bvy55U&znpnsrsc zE#b7A9zY0PneV+s&Mj$^`NZBE$~LdtUdVOr_BoFe%PBvQJ3%1L5Sqe^J2n3FV~N6z zS`M7<=7e$K6sL|f^kQUI<&8kxi(F$9jB>jGwNa=z1X@W%*Hj{Pnfs*!=x9FsYDPw2 zBsQpU%R4zVroeb>Hh0^>+L~GOUpk6A@Bjtx?BCNOFi}Mv?vZ(+iN9vH>S@+3c+l1a z{c!ExtTLG9yYkb^_uK>?21VP)+WfpCG~<|{kA_V&{JU=7 zv+^tCbTpfgfmJ}*Zo*{X$qGSEJ8UQDh{x3H--V{~ru+`CET4dAHI z(BE1NuYy3Y+AyUAdJqbpR;NwkQ=Z?^MyBD6P{(W2LB7rqv4anJW3a~(dE+}ln}xK{3z}2#xQtcWg9C(5rtni z&H5_egV6&fy=!c5)$dx>L&^ z{q^#!mMX^^wP8}O?@ik1;WlaUbnaQD*{#|Vup7n0p!Lc5fa_Fv{6(J`V?WYI!tWEm zO`x0&06@v$y(RtLk##?miGpX@v3AvO0d0-nzb*Is4JUE0felTl4~noi>}Z6aSF6S9 z4>$P5R@5VkyqL${jZWIQ=(oNMM00f)`$}cC&AvIyu8x&4Ub5P4 z|Ij9PX}OMPemX3;;4bi%MRX1BXInlW`Vu}Ld%O6EL6trqI_-R4gSOj38}ZgBuDe*T z3Tv$X?D1JTQt+A`+eNfoOC^QMV5PfSwr|;*l)my2Sf>rN5_#@|STRGS56U^^PAU~U z^ds&{CTK|+UzCYEucuphZ&9QiRUq}EU;;UGKiTTV^LIv?Z>0oh6#?l!nNmFTI7}zb z8jno{hl2CG_IH3a`^^PwLbD~nnrsIFc4=;4C94-ILt<0aNZAkaXHvFzObD}Iy2u(f z5xEb24LzgU1|M&;6T*xmn;@@s-iyznq!%vh^jk6R!$k# zbWj9=3Ns4LF*Nn`))*~NPtU$;mTOzf^Tn&bHUk}KQoGBHlnECzGGj1OHk|z_R$Is6 z*ZWN7@`=?Fv~lkaMg|?FhT2P%6Zrz3FYn+dw(Jy}Nj4_h`+7|9QjvF`?*|Kg^2Q2x4p0sp`p}dZb=V2M)?~Fp&MM|s z+*Nz1IIU!~X7FUs9v#5E7yF$k{o~bK7D5#PqASk6r1r?QkmC7)4!k$5Co^T zF4V;RU+(Ry|Qr{TE(VUOYZ9u8*{sYfqH+@AUwji4W;YkD!%lPXDqP1Im_HThU`FX{8n3isTO zvlWB-S9n#1p+@}?YzHYkhmp~*t9u!*e7o?maubc9R$FAV($N*G-vg!hg;K{gDX=a?JJH2t z_&+8h99l(KbJgr!5*eX7PLU2}x(;K?7(1@dztnQu&Ec+Zg{hu=$R9djjS27$H;`I! z`|jEHiZ<^3t`i;uJyLYrfmgyJMfn6c*Ab~65c*hh9-$qkg}_-s(E&Io*|Y z#yh)sjslRE@U4oc+V}FTr;jY!J4?Dsr?n^9U*UMFHNaug6go!`b6#v;=kV6#1Q6-O z|%gp67wg;ZjoI*C$_0#m}fS8s`M@-5MW z(2cBo`)2R54zxtsv8RA&bhd>8of`2P4UK@|ltle?8sF&2u3OxFr!>si9+)XTJqx|) ztMl2;lxkd*S) z43Iu*jShahh-LN9nO++l#)ScYHmv6N75SK{!9{Kh2Y;3>H=8_S}hM*oA^GBzp7vGL{XW{(I@3m0)8n)E=G%g73@x-sKChn2C%jZSsW_7jXP56uB zq$40)6J@MpX17j0{2!NH=R_@0^F+nO4aTr&=x z?W>zh=4N@jv<#P)6$3hV*^+;O%;KRNov`snPc~-N-Q47Sn-#KG(54OwddWTSG+blF zr>ak;KYl*HRZ@2KyU$MvlDM(jA4={&>dU-`rvK3&65p9I{!(qBlU!Y~2w zu5lBtI9bf8D@DrmZLERS)7ZGeuD(7535|6du&IMR$GD=$5s4cqvu06clri`({HCtO zvaS~yD3eJcFMa3AMS5hH1UsOy-gGrF;^|BMm2QReltIg4R;ol>Z67FVQ9WPZn*n2k+|pI{atJNU@s%G%OBFIb>F4G!RQEpzP}LO-7P7#Y1!d?AZrCZm?+ zf0rq_q?j=em=Q>tiKcPWM&6$Eq1SD6YDJQdjax zOU(&m@B^_;=(2v-WVN@x@@W5)Uk8i;76@dNfrx zi+;!!n4*OX<1pBfra5-z8{q);3Ech=MI&EVEY03Y&1_@JQQbIMEpuL1dixhhwK~lj zaYyp)-Hrh7V-_YXc6dbs;nV-n4PPGboWxSNwY^(w+Y1z*i5;Tyf;#HeFE4k?6>m`o z7@GV@M`3yIO%%*I-l~t5Anm`Yqa{Z*rcx+XdeAUQ&-q;H>`TLk*Y9ncWFZGGy+?9i zBk#b9NQ@7hPA4&0-r^BeD+W>`eOR#5xx5CJ<>S5q)5Wq4Pf_gsy| zuysGny`6eA#+{u3hHI8bJbpGXRyBAO?ZE!gX)JYVFtlGl<(WC5MlnSDJlGj`O5N9D z3g35?R5^nJTe%*rI=G>~K=3E9-;nzXBy(e3fA)Dyr(oSOrl%dgG0-$-ndANVRu=^g z01O#ZYHumLXm`p>arJ~b{5j*!SLw{wYmiZSeoT%JEYyB+?XjAR3QQM>-C}QV3CG6~ zZd_f*=}g_+Zkf}d056F=1{+KOF4J8z+Kgp|zdIT-A4igzF$k;r+j7hAtyW;CUntQXcDMWm!2cht`ThS*31s)~Ws+%j zGPk2J?yqz8#0_~Jj~C+^aeUvmMP7{AAMbLWDo)(+tHx22WWRCm$oHyW&1Z6pWo(0_EDLHB*h3|l~7$%7--!mHSjWlaw?)Kr*P z>2_kSo?b-?kf-n1_CQX94Ya4lcTkMX5T3`WpgP`MU_RFyty?F0t;d4`r46ImV{k^x z>w4(hrLO)oYfn`G(V_0c5Yl;y=`naE4}$zW#>D^THjSvbd*t>@78zwrQc2&`YU#M$MB8 z{o%`zLMgB10tnXStJ8qKl?Es0fm%6n3wu~u=jm7)1JB~-fBx}xs&9@b1ifQbJLzRF0TA}9+%$!%D^I?D zy?-Gv0CaSr#j_TJMv1gHI9H)|+RPu9*H(N>6aaE^6UwxUz%hR0PSVD6ov!rm>XF-- zb>;~VXn_~`ORNOnKLlB~w87$9fPC!UA9h(9S$G~plHrlxt)&021M~kZkpIs<-`P7A z7nTD=w1YQI^8R$^0b9iXDg4Vn@i|F#r9Um!HS7Fh7{LgNur6cNDs$Eml_LLGI$?Qgo zzZTe8BW?B%!--;Sk;5TB*i1+f8!s}>QYC~f2QwdAx?XK1{!+L^?V^TB1&lCGkvn^q zQC;4;H90m~Od1bfeo}D_eH~MQpTF$~*>@uIL?8$v2?qYOCS~u(;1|8f zYoCYHIlF}F0^97IwZu?bXNl7#0_rg@l1sORKf+oWY8X+!;wG-6Hgf8^jfbo%fJQO2K&cFT-F@l zc&(F28Rk_PdOH2H=5bsY6EC66m3D6p)$Us%xm-LGNM%UDsIL$bPT6ISI!U(9=X#@5 za1|7I%P3U&rBGB$n#0S^dA0ka1b~B3~9?0kq)( z;rg5{jB~N;!ODPBig}iqkOuo}I&7O6zyDMwu+DpzjvT|pj>(<2FvkzKuFk22eDG(` zmk$JD#tYhE02<$}vJ@l#Q+dJJ4sMlRzd#S-SxAMjmwMmw^+J(%tN`_i1lD?Ag|GK4 zh98Q)jF+pur&15npGn5vHVHqO@$B8Jf^r=WbW`s*UyJ;{)$+T_9j1DynESiJT}>-D z`w|d223U)LTfZLyTHgQK?S4iKY{77`de9Rp07+LKG<&Jv{R>n@`lmytw*jLFUT62! z;y&MKmOBuawUhaeDifk47+~kxNX7CeGUyS0Xt*rBDojvcrNRt)7Q7=|c;Ttvp=Zhw zXOM_2$*7BNzb^i~Or&WAmgh&T$ zo{1)=8v0EKhu1n-1XG#1+xVq-ht*HN-YrIMPQ?i*y}ip2`QBQQWGX@=dv+s*B!m!!N@ba(vadt3W(lE) zQOdrrS;xNbOOxFo`!Zu4X6binzq_CN>3x5{&-=XZ^ZxEX;+iXSUdMGF$9bH`c`V8qf$W#VHp_C_2BH$^;- z7eDifXh^)jE7OISV@rIvOUapGww_MwQLgM<#Bps69S5@z$JMb0pEB6PZJ^D_oj1US zWd2N~(d|~?(|wy1^rPaFoEx|MT!)jcc{P(Z!v>Mdu^)ieI@|mKnOA*MPC2RQltkY4 zPvSV=p;@-I0qviz(QCZsv1Ih$~Fx*d6@TU%r5Elis1$HnyqBCJWmN^GQ0_W z*PJb%Kp3yELIHrQ<)^36NtmhR4XvD==}N6=Mn~kVQ{YiOy}GFCu1us0+Y2<-E*Uy} zp`l3+k|$6Vdi63&%ak(W(Jobv)>aS{LvL~pr&1-)fl>0hVlH%uP|a#^Z7EDqa(B#N zHi)d9lDnjtkN3K{mTT`s%3D`E=a$Gf`I2D2IC8IX+7RnEnvU-k_j}$yd`XIn^OVqe z(gutTjjS%U_H~dz9k2vDTWcNg3FIDr{1Ve3)#hBsm{6l_r)vgrDGsaCy&TIZ=#UMp zjcywV?~jU$Ys*#3z9a~C*;^cDbn~>qs}h~vwJHadFW5YKNEJ7yEbkk=2sdA3zS4)P znO4ntSH7bD9;g7}3BR|@!&c}OI5r{g10eC3x$)?B+o~JO>9xB^gngt+$L8Pm(1;ge zVO&{xo%BLgLjnkrO=_kFnNjDSI+zc5*SH=yK=V$$=04V8zRGcG zr3)fcZx%X21)z7XZbT7fjQo#v5OM)w2XKExJhxFc$)pYr#;BT_i`~be9Ptqlzzh&{gS_qK1c5LW%THrr=^4x@A=OZ(7Tmh0=3+aK&S z`&VXNmNt)1N&4yxtioV-3x#DR5_o)V*=VSwF3y`$RdaD#SS7I3Ye1#@iVKU%iXyw` z#3Narz44pi+wm(oKjO3GFIPI=TqwyN6D2ydIGi>LcJaa$JTxW(| zNOI!{!nv&ndPi?nw;=Yulfp`5C!y9wrjU}j0-I;$Z*wwcY>5e9l@pI1f9B)_gza3X z#}WObioF6E%r(rA{=BO^ktA~w|6(2+_D7KLMha7Ma>hJN9-0QIEYX|RFY66kToPz| z7cANAu|MtJ&{affpcMf&G?QWKQB(gq?NEQhi93hfp0qm`JFfYy{XCrm5U8g6G8ZITjmbAQ(ke;wC|LUZ!zN*&4 zP5rZ!b*u*+V`?@BsKXC2ZE&(YrKx-9Ji!T#b>ewcvBv->QCk?t`9wc#Hbb`cT&Fk9 zlycWPwevkd$w%|1_`KDAkRmx(NadKCkLJ4{5k3TaeIbY;^)OK3djkL@+Pg&Kx;&oM zKUlL}&@zIvU(Ansx+L^$^+pqEra3p{?o$~M|4ix$RE>6ehMok~q3C?0*$WMgk)wIs z@!dC#SAK+qdO#;N+YzQ_1NDObt!DMe|tLZxoUb*Txi@p9UL zY9Dg2^0m(3#PwBbCG(YbL$@kp$_a>vG^M|tYC(|nw)!cTB$11+D01?0ny$ZnmckRh zYT8M)+06gC@bl6L>gqU4ryW?Fa-kK20J$D~QaN%|-4#aT+*ObDBX}OF3A}|6E^*`U zBdDTdnd_oWXQdZ&hZGt(^Ehb(1EsX(Dtv?a-2Fg z96(90i=nyMWkW-VCo&wcCm%mf6X?QU%^2$YihZyRVj8x}xB=lirOA`RH5uj-W@8M) zGNTV?)e2}2>L9dTxC_0S26B8U?g_k%IY#$uC92z5C#1=}Q$4 zS!QOBHTqXIwy1zRPG$89zSAI6P~7wN%#i^8^NKZ>B<9`QO(PhHEguJe$qbK9Gm z{OZ7N-9rEx7DK+iI<*`hf8?{f4^g#!>07DIXdsv3O#9Ccrhk^tjQlyDd43w=W%5%# zvxZP1hApxvWg9P1^R1YK1XNO!QP7$v{yj_qUwxbKkeY2SCn&vnc;%ylpcMngTeyg)p zK8YL4C8Qe8{mQ09QD%=-JDe=sRC6LWZ_dzxDQszAXKN0DQd3q#K8MAD>wE ze9}7U#LwG@*Er-m2m6buwg2;`PGYmp0W~QBFCfttU zM!l}Etdt`E$yN3<7WX{zTpOrBhmG1rg9*<1uU?o&a>OQJ<&dpoE!p?V)mqR15=Lk# zfc#~M1e$~n&S=Rj#gvdNBL_DlmF_^DR2N}LISc5__jmS?6iR-9@%@)`EVfOqQJuOQzCSxM712yM_~+t z7q`FsGm0HqI}SlQgV^YTXPk+LH;O|?tfzJUd`p2%3~+k72I0*Bw~8Q8J+UA`NeaJ) zigyK~J7G3x6FkKgp#OxlK7!N_qz$R9A@MX-0EiPxd)w>6&89M1UW z_WB9FgBn8TXi_tBGF#frZ{aAiP}V!*;pOcFY7vCp5tO%ydo$4cVrk04>SJ&EvzLD8 zSgMd4Q}v5m(c2mUAPkb0)Q;FsfZmU?GCh@Y(u~jg%_;-Lv6ssV$fDCRo?y2)cv6e^ zoC2e+GKkz`2am4n2Q;d?x~ncWXgY}SAZ4znwR=!SwCYaM>#Oy(N5~6RTHRE|x9bNB zV-YUwX77l=TJ+NL}O&zl6So_yZAuUO>NT+xa(gT*Amaf#{$yZi zRWe^)Wx&IR<0{rKBqri{g8A1pYn-z-Hhpc&ZP?)X0!NA~7pLm8Lf(5Q_x;MO3V9qL zay(9{Zl~aDv9JZ!UowZ|FA{eHx$_6Urqcc_YX4T(`phZtMrY{e1Sw+R$MrL^nP>k( z=l{=p!hhv8{L}k?rKe5b&%KL7qzotk&CtKpb4%EGgdyjV(pZAV{RQw%PTI@CzI)0m zDolu(`SVYf9}C)0MP8**92mc7f41Tz!m1nSvKO^+7Y9xxs4&Z)#NIdSy8ChEXpsu< zJ?=~CgN<8{Ez{*^cl*x?ogva>W5AyIrSM#f5sn@!G=F}oVJ#VC(Y1^uzG}AM0Mz{g z!JsJP0|)sm4UKxk50!{ko$lu1c;}k3wq?_+|AB#Q)wfQwzqE4XboVJ9%tq`U|4bD& zHVZ4Q8$K%R-qB)g=S{1wOVAb{xlSm@b2%pHV5VzJk-J8i&WoGBRq+5I-TRUSOFt|a zFid1|JVL6hyP0To+LmU(^kc2k(_dR8Mr`x(Hp!DJA8L|E6Y7bGk5P4qd}qh7_s2qI zw<8mdc?S85Tu4YdQ=$tA0=d84SyuY(hw`dtJ+UB_8m)mQf%TS}BPGiB8FlVg z>R)8Xqq^*r@oGKd=dP^FoWIvcds&^ek!+pJvkv60>A1WR3M6~hLaB!xb&QI7!kQNo z%wUeUR7L62pFHZ$o=cFmO<(o_I(E2PV~P}GYDU<7>aQ9s8O{dvYAhY@I^dyq?oc)D zuuYr07vUc6dc_IcgB0^P>Mj5;vmWKbDkgHZs$y?x zH^J@o0NlJOxkbe35u4X!z7`*0zKpE+D)86Rpe<7p8{2phVS5&W0}BGrPfjFelK>;D z{^{(cyj|pP%VMJB>SwRqa(TqR7DA$fP{BKyYJkQbuK`3^bB=?U_(}6ZD2@iune+;f zxY3+x-vXnv;{edm6E6R@h}f4oWec?LpIA1WlfKNd0M0qcfe|5?2_~n3) z!auCRFIU=K3N%@OU(Ou7#hwmq?1v$wpy-_t$}hLdE>08swi5=LkR$pD^7F3)`1@sr z09F4j=l_4?@9z-$m)F172x{KQ=(B6Ph?sBL=@;-96H^-TWA zyw^4C$wm5mk5HR*&IgjFpij&Y7xhD8#^oXpzbyk?E~4+7lU**%mM3Iw9k#9-J~U+M zU^w(-#gHe01v(m@0#P~yI5PA*C%q}cJ*2o$C&t{sc=8&(!h5A=Pn~+4m)*}pmjVUR z#s*l1Z$N6iXuoVmO?&WEBC)j4LQk}QeDYP1d($B} zU70np{Gn~EFtB(`?v09qx9(0L@d-FL=WWwjlMi@wL}Y^aXl=Q%pxB32E8CCOCC90# zbH~>C4)Jk#L@tp*sPz{GtHFB#=Q0>GdImF}*y3juspdJ_5wn@=HScpX;R6+JNXhAa z@|>o)3sZ$JnJIcR(l|6EA6IKk*LgIpMhx4kSXO?8#Ewy^m~%t2mS8s>U`MH;QVk z2SrGz<{o+O55!z~X1rpXVzb<&?_=wS-uI%4Wj+y_^Ln_XX3fs`KDO19R5eg=9iajswsUdA(+W)??+z1$5%y zmCHu!e(=Ny7rb7amdSfQ5aS-BZk+w;i_;zTwpE985vWRn8A$M2NnL_KBYk@3)54~$K!P<9C*>SvK*Rf0%+ z97!wCUATZnYipY6{$qYs4}+slk}mDz4csmBj>o=d^q7ZiLp2l=>Z(qRl~sYI4y&ik z9T-yGu#YHeRLZRpiRTGZrJ!DIb1fKy`;GA99313OKKC9Un^kOM2;h(yjD3@`R4mC! z8U6t^yXjfB?-fC=`*!j~seX zKQRG`+U?_;z6E||^8$*@k93y{>P5gerM*6Qz-|Jl=ufbt)ua9pa#>|NH{qqoW@hdl z{k);H>^O?I@V*>&5eb@h398uzC=B_e(4KT|G0C`M$SH3S2(Ow9%fb*iTDi0}vs(Cp zvfXG4^(g;>!TtLJCaYu{x%gbRoQA#Drpnx>8z8xD3-#mJG8x(QhsJ8W{r;QW2hGXH zkaVspxMFPP#$l>)SghuQJ#zB($8rvWN6aT6r=ew&i|dS=MGK3V3!2{1a`kCh3-AM$ zscAxrELLdBV~jU?!yirFKM%TfN@L>%HfD^&AKagBWv!O|7&w3AD<3C>3GwG)P<1B0 zal*aG9D-=(pvlOVm0&o>Se-r#=Y5uXbMj7LX9n>DrBzXNke~_<0P1vR~)r!z14G}2F=7h4~N*)v!>z>t^aH`1UUI(c6ArRXUqU{s(5_Gd2Ov=MtOz3r?}3C zeQ0F0>OR2y@RY%s#*ZExKsHkffDMth5e8(K`Xm78)O2tx&{V|Ez1w8%KrbA#OOPaN zk`g^q^af{hq4@oJOY%ax)q%3S^Y>FJ=Tv1uQam1_Cc@Cg5KM-(dT#$*$6l)n$43Ib zqkFF7fYGK0JS4&W%i}8bPzKj?As4Xa!;=Fy@?)DjET1N>lUuju9PKECEmKXG365uw z*Evwr@M>fLEN~Q*we<27gt%Vm(OW{U9Ik|<0*}AU~SNGF<8y2 zO%6!uAmgM{bp2THjaZdtO}C)UYtHQJyFBDC;dCY%31;yp9qvI3PAUX{ba^+E8*-;X zJ}B-(mR44WmGovUVt?7~V?&DX2Yh1q&)8V=kVQd7*zNS#Q&7E=%s+PRQKeKTwyCW>(k4i*D1E+0HQeQF*C z9c_#Awb;5$yvpu;ClMDkekKc+K~uVvpVO)}ksyIB8ds*wX7mO7nf0uxbmdpyz#9xZ z2%%rvUa4B}uv_Qan8zM!y~Q_P{5IVbSP?bIO1v!hL0e2&PPaAzijEq0hSv)ERcmp3 zqi5YU^jy5<+_b<+X%34G;#1LPzT7eAOR)^B=-$G29MT$p*u%VibxV4)c5_f}7w zulh=>#^`Oh5me|cpKTI5H^e<_dwz?NMCK?;q7Ml$JRKwEemnLh;$8{GEhoLn)8M>{ zUg)y^g1S3{Iq3+hOh#r12iGp?;f}DE8MuhaJ7tsq>_}+nOV$}A+Kg}kXit=904O1R z2-UsY_*BgQ5 zC;7AiE^zWCD6Rzh>xdqIkN5Zd{VgwluaB=9=HEacSiS`^^BjlZC?1kBk`ex^MvES$ zcjpp`ux6a+qI*O`tOt{z3Xt5N5SFF|2lYv+cYtJ|8;#Jl=qaS!OdJS=ao$>nUqQw`#TQetHvCbd{jpc= z*T*t>egYiVFkM@*cHJ z+2*IhC&rhQ6y+2|Z!9VK?6{5&+@&=NlZyH-O7KA>fjJ0riSwG)4v;w~k!%^1k_rxC@PklH8Z#)8(_g1k3JrY$Xpq>yqS*9$T2RFs}xh99MOzCRPHF+MJDCRF3 zK`P2KMnY*^g|3Cc^91tZkaKkoy_bYiS>IxM#9GQ#!&{l2HVI#PP}-vXp+mBrk9Y@W zuRUidPKqVlrIu!KLvaYy?ydYFPQ;vi5`lxwgSVNE13IDL6KFzr#VRS_;Alf9u6K~> zN`Y=~AX)KIv(-ZD2J}mp@p3qc#d;i`iTDJ9cL2Q@jUixPaTbCmCl-gzyjQb0_q%fu5MK1QrSBiD-L^>g5WdZ#zsKsya)?L zX#!PH)1N?o34eTo0tef;mHk95>1eO(S=%QJB|)`O)K8i-H+al>vS&>)jSu;cVcv&R zBi&_q87p3k@2L>Ms4QeL#eS5^1q|U|7H#GvHJF_CU$wg-H8Mr+%*f7NQop$aJ7*9DhG-DrBHVsico|VCb6R zEBHcbqaYV~W>v|t2csN!G~-zo6(C^32^`A=I55O1UCBPup6-G+Y(Aq~EFAJ8g5lu; z*>3+MHYo9B2&PuwWKEKX1VWIY#t1wFHsA&vluqPJ-a6y-cBExmS)SCCJu~Sn(Hajl z{DRF>fpFxcLjm(7UkYHwlLNJr^S$l|+}r30C$RyJnVJVO-y9sdA{%N6B$5~eJ2++a z92Pdw{{bMzp@yw#V1|M#SS?46^bFqK(k;xtOI~OZ+)0>p^*SY5#mTru#K??o22}9F zlIm?n_qzzllcdZ_-wmIPqE9=SnJTjX^xl)L$`Z6?sQ1W13egr!s4=9NHtF69yhKk4 zf?E|($8%aTF`Mk2$Sb(Lx{!76AX?>8``HxE&IAxRXDuGgMa(ZVl|-Dy9L|rq$vY6# z9Wu`K98LM*=Gl<@E>!Ip!G$1;ackem5>ig;(1-cxmk^60e_dI>i29ocQ?wVZ+<5MD z&Fm1@tu2Y#!F60YUZLD4iU(og0Jl!OTEZT}Cz4a@MI2go-DXG4;Z_8WG_)`XCAl-| zs!W0y$hJ!H+&Hhx7-TI`1x<=KFzw}xmhhE5-j}F)7haN?#2zQdzxEm_C{Jb7Fm5v7 zT|ksYrwt1N zr6XfSI;`%$nh4dTfXvAR$73u%x*o@%#&2(WX300%l}wzE%{vLLV}UksxNHXptyg6g z<+tf*zQ22;cdhvS)PUVk*wIGln2I9!2nM!{k({}CBQfl1!q|fkEa)V&bmP`?z!zg2 zI_3XUX~s9_%uS$6%-38%{)y26%0KvK(n7G59%Ve^F}1zo^WcJODu?_{+9HHJBFXp8 zN#`ua!P;Aex@)yI?mKVA`Iz_zQH^xNG&zA}GoNsb3IS&c-&}%_>w zi<|)jBox>-C!V={rShrGM`Vj&4WSwS%NWVO%mV=bjn*Vc1HMZYk_xb)2Ls+uDmWVT zeIEWqrOxlDL=pJFz9v72grYCG{``Q2RmfDx1nJDTc?1l`e?E_Z=fnF9X8GrN^g@sR zHH9X>qp*R%2l0jfpimQc8A=NPvC#k3G&VxfZwSC7%d;YgcwYc$0hlEkp0SWE3}7iQ z4?zjeNdS#!cW8uu=e6$+jqoqh^ru+_|0;jKKH>5xCfS_>-pK=GNdiVz!wD!_%N3B! zc+IbT`gIzQ{leKlNq9Jkk-4U08_@V)L>huguK4KzycZa@>``By$pUl6}j1r3C|2cEf)sIWfS9QRvO@ zBZlBEBf-eOe(47IKVL1PBt}7iqTOE=7Y~8Y#CLR!>f4;k#hDVi2=GR!mC=t;Xwogj zZxjUZ!qtg8*^~-dgb?T_5c?ZI($#@CBYB#97Il@pR(LctWQ$V%Dj}BuZH@Zf>J!9W z9tG6p3{Y?SMQH(C*(rqX{9@pf`Sf2Cy{$ ztQeaBz4x0Xod&#l5{B~opa82?JfNS+{)a=hKz}opx=8e8NAjI{WBs}vBve8^bEaoU zng7C>+gUgZCBa#p*_l8<^*q^G&IV)9ZxomOkiz91{ctDg zcO22vC-8LuLi2qJc|VV+`EA6{a_IPzN)*-q4NuZ|NQ~B=&~Gm}J_|+b;pj=8h#j7s z`pjb4pUkGLU-#mWU#6fB*vY7hDiLecfD-J1V{H+bv@&3mQRpB9ec{R-Xvb%+ochd_ zyT2^7U+p?mfGL?6K_WTyv&;cJaqxuCuzY99$%d^7z*2l_b9;2h25Uq7LeH-YIo%qk zI|3|;c(WfYF&Z=324Zr+A}NuJG$mknGev)C-U=p0A4uC6Hp0kU*vCm=DQs?)q>oy<6@bAr7+lf;B1x1SuWU(l-tidpQD$c z^99!duH<4=h~)4cq2A$&AO*Y=SROlSv91fSHh<=dY{FOvAy5LbJ_BF7soyQ$ZazYX z+4NoFm_MJ7m(AsgY^zXio@BrAJUCuo#mJFbELrRF)cT6l1Dqc!eeSHa@QD)^{GF2D zbA5FVMNC8K2wFtX1U5DOx6|Scz0d9)jifXMGmx{ z6>1lxsf7*kqp4I}HIfY8>no!#sSD}do(gNKS}1Dox`xsr`n3avZ*4N>ilk%fFBWMtS5AZ0v=;V&w)IO10ANJ_;!(_Sp6jM$q^o_9 zW-}7Q%|eiyM|}dBhWJ{^tpn-r8OnmggL`tbhol=Tow~uBmmgEU8mcLs)w${WBy>DDJY#X`)O7IsZk}k1D|{-fk#|fx zK~h}XRND>cqJ^LdIK!4Q9@f%vkb+oIM_x{Rxt&{*s~2n)% zLaqGZxYWqSho(iBuWeSIs;_XwS&u8@RImc43L0yl4lWmztgzhTxaQ$H(|rEAyxt+% z5N9Gq4@uBfxEHOuNKj9z&lI6pJxMK}cB&hk!?2G7u#u^spqIA~0p(N6z3>Sm2`>PO zgt@AMr-z_vl|D4oa^t97(1b!p1plKm9nR~^I0HB>e8!?1ia8A?oywdgNn>s3kR(mxW`F*YK`>}CdW(NU z=4>jisD6Zt)0d7KvF#XX0NuDi(NU57I)LJ@9LWDJ_ZjQvOjbBTh=95L);{8=44v|I#4TGs0c0>&PCQRhpv@8^4=M%gQSlEdHW4*UCwaOfrg~bjK{c zREKSQMCw36WRex*$I`KUo!5=Nl$?B(UXMh0$+2h8VcUz*5W1#uO^ViShTw*~mx>*$ z=@9~tW6Ekf6!4{HTzAU1Q5%i8H0uMwSLHZ9`eK_XszQiZX>fPN5>_CpYI#pT&DlC8 zR*o0s>?h(zcArwYWfb!q&&yqQ!(EKjCOT)oZR8VS<#fC?g-X_0o9~`ImFUzl_FLPR zC_UgcX0Ct-D&E*1aDtqIZ*iB0jF~L5O~CytWJ9XA*Mnc|PP?iu>~ba1Ktd&ux~yHe z11jL+Q{;e=sBtE6Fk^QY`X*WG3klR}4Gq+==HG4g*b00k8>tLp>Nw2bncqR}c=h57 z>oqIx`flxa1{q@kr};(#18BkTJ*2-C07+@GJ6@=1aHsDj@T44mYAN)5)AL>Zm`|{R zWRnV+$8rE6r;o%yu*TV>j#{wl`eb$v-C=syO|iVY_nPOss@?;djfB9L0Ys;)Skeji zZsWZ;D+lf;2jz>>>uLtC3SIU`mYz&Hs)^@IuX)EnHV$ZE=1MDv9#_%x^zVnRSiv%exXNfEBo5W z#|917;GokOAuj)SVcgK_`N0osazVCz ztAY#DZ^mg%jUH@9%UtQeC%A<4@z0hJnRgr7B`d#qpH5h2Q)YlQzvS?<>kH-=barsOs?>?k#9%=p6*yogY9hGtEw&Bv9b@ z{kzZRE`TloC6&MrpsUI+`ObSjr|tek)*m{rd;v)NPLj?J1ji4QwC6zLFHlhU7lfSu zKJ*FDvmF|C==AhgRWlaFiZ<-3=fNz6^K4)AVm^2RXkU?pqr_j7-Uiz=oaUjUVi;D9 zm2{Pz@4YblFYK|>4dfWIbQJ1$`+ zRb@iILI{`E8Y(LQc<)Z$QngjFpYzALO)Q68=poa(abG)2V6bP!BqdHdrgnl;QRaBo z`X+}pHS1_CoTbj}HY6q)Cc)33+V8;5%ai+hUCx6GwNUhkz@LQUtS&M0w#^}hBq`+F ztKW~*e~K>+YZyY+e*)z!*U&RKKV?|1tY^pJzmvtl)KaQmcwqLLfhg#Uz;#EE+y zNr9KO1MSZ!Fr*g9>azz8hypT5Jv8r(z2S$G|Hg)MNY6LBdSS_fX}C%_7117kjtP zEG09#rS&9}x4J_J92m+P$z3?pi%O+~1@6Nk&LSNVRF~b6o|6miMb$!bl`~4uq6jia2amt)bvUX?x!IKSZbO)w2wU5O zl*-SC?t@zltDQq^8e03kd9ZJ$RB&%3JG_*41t|@}>j8(Pcp!i2dnt6546X>kNoxSY zX&bbF-~Ws_+FgYB+Af5QaIuddkvxmFWK~M5rf=bv>Xa2OJP06{)ha~GN zh*eD2nwz~Lv+hH1qA)AmAofF5#K^X+mE7A6TM|dl@#H$*SqEjJDI3%W{_KGM*_{QkmwqU zJH~kTAMRtsZU*ikm7eM|MpX{D7`Jp98I!XsY^PsewysU;M0JnselI9{cC3y5TCaUf z8lKneS^PlhFxg$&8ZqiQpmO7%)@e+*AA{|WcVss1Yq5g4umnK1%^UTEamBmJIyOYQ zSUW0NGmDNbTB%*)O?ofMc?nEhTY^%31n5C9(m<AsYM@h+UzLj#r3M2A}VLX2TG&2OgAd8dSN1{p%3h2 zYy|N!TtK9UV^9Hd2zRv_LiY(|0bwz=)Yp$`y=}O(;ktEtrgkA%401MCymBl|g*(Nu z2tDU2gey+(lS!tgIQjv<^m;ud?!qn+=gN4O0~fGxL?d zL3Uwm>>$ENSfFI|48HI@M>_4^SA+g`1cF+TM)Ph+DZ=6`JH` z6ucrW^c;TWTXJfeWvKwjOXD27jjG>|Z*wpyi5@(cA9c5fn;Ior$daIUcg({<=Bcyi zBUR3M_yFm>OUUCVCyzWU>|J~l1sUc)MvcE`N3cXJxMWyFA*hqOGWwY|H7cGBDTCYf zu4k}6c0NGmMXHJit7Q}8NHnt>0GQ{K>W%J4rD__tB04IjZ=Tt{S>`aFv=D<CONxtf3nN!nbba>Hy$Mq}s!OMA z@H`F#x)lu~%o0NgHp#Qbr^;}4?u#4h%2GT&Jnc72JZ@J~CeGB{+dQ*ul2EW^hT(Pu zJjnyb0%jM~Bf~~49Re8GPm0vZcXgauO`d*_)cLUw{JbZ8yUuFW9Av&m>cYjq5)$sn z@n`~#D!E=nD?1= z3YJ=Z#7+r>^ckFJYq6ySg`HRfyjG%$o@Q`flenb zJ!v=xu!i-fzCc*~3=#7BbDzMk9k0LI!~wrr$3H6le!%|z>FQg)+1LO0RwYRD)zbZD zX#d}Tya<@ofV~fN=_cQ?*)RT-*tTO~|AXf~-68z^=eP&}iIOFX9SvC|NYGns+ng+k z369k}0QWB^GGk@!s>>auSt|$5t1CwLTGxUP*P&bNdu*q`DHoAiRETltA_NWb zpUCpTX;u&T1{Rnex^p1Sy7=9_`}>+E*Y7NDttGoc(L~ew=o%F^(Y!+Z6W^x2`#wJ3 zgZqs>M{;64=l;?dL1-6NWCaCd_gf#8A zT~WD7=_AYA1Ui^TPx@L6uPuIW?Ind$U6ob&H*u;6R(kEKcHrna)tQB6BCN%X~_sL3-cOrQ^^c)Ta!b=z(Aa^$b+vdSo zkjXeQM8NjYa8v2i4wnakTbI0E+>*;{XU@4v>D-J(_yN9P&fbZlZ0!=auqQ5(zMIN> z7Mu$y(VixW$h@R#Elf{Ui$m%;X5;sYZ4RNUiqBi`yI4^QlDgy+7*`#K6cGZvuEKi8 zZvAVL(zNV-W>_`-kNq6;Z)WvanW8KbEG}h~$39b~5YAFMcSviCwtQYOfWKYty8-t4 zb=>&=E72}5n0PyH$2RJc4@Ivmx{L>TC(ekSC!u&J+FaAG<7rutXO@lLU1M~;SZofM z{81whZ$7ZSkDW^6cQ;GZG8V?*jr$5Z=!Vr&hQ!_#-c0EN?BW1Ph;hNf`jrLt>;kvI z;o(q8*~<^ai0O<^0fgR4d+%QyRR84;x64Q%dyXw+dlCr6_m%t%a}rM-vUjZ%sCb4i z^!4k1H}p&S{$Q_7yJ`X%Y8`lU=myh76w&zEc&eLJ`yaoB?OrJ{@4hq!$S_X5LqrGtA} zL6{C7WAP&FrWcS_bfCUwkv2}Z8$9+dknBjqbU^T<85A<87Os1Qkwr zwOne+^GwLn*4^`uCOS=@MI9E>lu>q{(xWCn(J1?fmlx>r@ZA?8Z-y(v%NjZ^YZYUa zyUbL3{;_W3+kW-! zcVZsj35TP)%0PkrZt05a#02-_h5m0sKsdM7==%Iu+;k7I3e)#GDP^Yc@pgmZ!{+4P zm_Qor#D*_v;7AJ^$f{+GP=%Zb7)CyngSrU|di6b&(Vtm#45`%gZowccJNu1~u-3m( zYLBmZDFr#Huzjw~`QAwt!;QIp-~5LE*1yNs1-HN|Bm%s<^umv854nS1&8%Jji++PY z_54dMdC0^v}3b4WC}Ws)_x-Yli6#e)6TH22NY!zzxI zsoXf})}teV{!L zEcnGy&nm`tS4A9xGML1D1`D!SycJrIAYSyG3p#_h@f|2`mmJfd6M@!%gIMVqndG^8 z5*|K|80vz$-rC1`Qzr1BTw3cA^46%A1JLOsxAb$T58IFK2>Gdo2?dBSL4%kfzI@1h zljATZGlu97b7E7HUFM0o-#Emm_)1Smg@VvP`H25L{MNG9c9JjC#5lx1W8nSNHaPFY z)kdn4>RQp}W+!dDf(HJfvrrZsQ&idEZA|+vcvA^OfLK0>h#M=fNdu4Th)U`##p}ZN zy4qsE!LmAnkHz+7a`hfNn6rA6_p##rT487u1KEPu*J>miU>7Wmov_|NXUy2=i9h{z zpZ{|^<+)(FE{_{a{3|Y(dPqE^_7U+rk5t9*p~c6Fm4>aHXwOXC=bO8~PiOVSnq<}o z%Sl%le9YxAlO|5{W45}829+g;K~Uqu!HY}Q3S{!n$S`%ji&^9585@?ytaCEYBq`m! z&dRZFAxv+$sW5=ZN!a!5n%f@!4tEd9-;H#@ zgpUk+zVC}M9|@c|7`_qr^(z0F$2Zrzu@)9)RMu^K)kTdZnnm!nz7@ zLf+bp^D75Q=|(n=f|cH4`p0y8pdygHWLZ6|2rUY1WnBwWwfc zy{qnDz|_CQd65|!LXORP&KWU2bZIO%g3GRIek&MU;9zpl4xNdMv!TCNl=lgw^5igW z&nl!YuOpEkKGsp4p)!tqV%nWBOdAC&G*X-duRkzbN|AL6Go_Nxc*KC3E{@|Fyvg;x z<(%8nRo{&(wsFgPE+GqVF*c-wG+ORljseuOC3m0qOgyMX@j8XarO?gJeF80on!I6* zeKRX<@11BkGjMJ{?DlQe8tU^xsR2TixbUVqm~vTO&*z1~cVg*CfqznQ=*^%#21DML z56(u%tZ}RldROV!wnPS6Z@Eo#^DwJ`IV^LFrkn#e z0eH@yp~f6#uairP$sCiYDA*{&T>*(RuT?gRRXAWDO({RzZ`%etV~0%u6TQiG_yYQE zRddoglh?tNs0>ro(Diqg&je1YO}RymP65Cj6WEt$v>k#E8#v4ZA&L)u>x);fMYZ*2 zvR-3PwN={Y9dmPjFUO#dQha%X@en`f$7v~zosK8T1RfymbR9)>E+Y9vEU&LJKay3t zjn23LYnco?7e;@G4y2)Wf^@9xfpIK=S!7Z(y1M89oWE6oNFe{1Xe*NbD%11@nl#SE z6<2$XNJhms-413+UB*Dh<{$rfC&f1ly}Y#n*y}sUNfQxhxakCZxDia>fdBr=-!t~N!2EuZ{k>jxRLtM%;{R5`_Nf77LD+YnK$)6^Ag57+08m`lQ7}rx zLV(hgg_t zo^w4wIimdj$ko#mmPvQlTS5#+fo-82-LbV^_H|qMqr|3m8S4T&iAxP)bsi|H{k-k$ z>?Hrmukh6+)tP`tjB3mn3qCkaUg^zk@VMe3>O0$})-0T2)NN`IM_0<{w>H50x-6ml zKL;zjbF2lk;07_lHRBE5K~MYl_V4*Ml7`~UQToPXLR0Flq+JysW`glSSY zF@kdU%EWt?xMQQ1g`ky5@^2hK^CSV+E8Xra=5-%3+H-7^79wBFIRmy?C@Z2aguIx; z0Wa-(tgoQ_8o?YF$m%j!Bsz=LkiXg1{qflv=mFIl`4kA`ljeGqh*%$Gf^m+U)rW|T zTYOO8gSap61jgjgchSLp*ulymnxv)dT^^}Ilpv~oJK@m(75;^H4?z)QCfj7~=2SOAXNu*~D z3XqtJJy?}s5}Ug9LcjWQ_m&zR&+D@PL!HP}G=$pSgQMS7|fnSC+4pjZBTCG560g-@j1DGuQp4 zfyS49Fu9%iTy<5lVrBe~=%qXUy%s)~ULL58?*vFZ6%qⅇ8ik*Z_JdY`fdUDxhqx zJitWbZk{UJ_{(1NG;h1gjGSXU7p+EfrJ2}mk#M9IM3~+}q{5%89}6VI4}gmD6ZGK5 znG=uSoA9JJcSzgxU=(L-RwTAkroNl;Ze1)aH|9oL4HRUw)u+gTy=}8OB8Li^Xaazj;%Zai$1`qIDy_eIbqzYkQ4jJ`MQkXr7+n~^8&b{Wb~Ia6Q`h2u z2ql5B)F=*I;wc|){EL)*TSNAwT=8mB1jZ_ufoooKaE!`m(KNH5goO3g1-k>~m{(s4 z$O0QLyn=8M68r_%Fl@A73K>nOVGlhGuR~g@v_2W`2X$)gNn9}+scF@u!PJNK{w8?( z@_^>UY%!@@XWPa#2R}s$OkQL4xQq-rJC8*TDF0LU;9ng&XCOSC_C#7-jqXfwlpiAR z^%qo!K)_BIgmsmE1>@tdp@bM}(18fYL9XL!0jX{^;FtNlfgeKp%AdvN_$Mgc75M40 zi|eQWhp(Kq2H|)*pb@a<4gGowWX8ues6a*kKT!34gG4}VzdD(}I)%Vrd^dih4gSW$ z`v<%3cRt}Ch{Yd}2yn#T5BrV!_Xn`@Cr+RtfV2EY&-)u}0T2@ZjDY-o^A}z5e?6~t z_TTdvKK)|$DFJ=WzmX4n@3s9SRrMc0$#2|4#dD|>j1cVjgisWaT2zElFH=;4elR@I z!wB#A?b$(i2Kdd7@!vX~=WBAlR-L@!S>CHjEx#dMHucSBPv6y#av#l6o>>Ci!QeQP%opp*b6?518U)BIVR=^t88HipAU0 zZ!t4sCx1-XS1g$=G_KdTYZ6z!`%{nVcfhG-lnCfK5;KR>BDaZ_s+B(YO@DL8*==L5 zB%Tosdxw1mQ6iql^iW%#=`?>dM4R_A+ z?g~?+OS^UolI?#z9&vJU@?UB17d;+5`OQ3A&$NWU=xPKe;+zL9%T{4gxj%ExNbEt8 zGWRUM0hcIT%Zk~$JPncHtB{Ke-`&aBAyF><_%*C;5O$=WGe134J$6c>)#tWpN>OI3 z2S6YblFT#F_m3{rg_`0FM0|=xF4eWJFDT!0#9i5kMnlji{3gg*OKRmzFQ@o?8PT&Br-ycm^p^oJ%G-eO zHD8Wd6i!)SvGi17v@SQYw!*QCj?CQUWcUNT=_7cb2I%;K38i&)YIZ@*-J?Tk$P5DI zs>XL`(b-L|2kV3IcXI>!}OU5ZJpmu}+ zY05;a2u;&|V9FE(?=b=zgYd%+z-|O_as|FXK^#^1$04_ftw7Q2ZfN~~Ig2Kt{uSW3 zf(&Z-3N3MN*xTNLEqS7;H=)>E1;Y7;T|mqIB7;bM5ul`367T&4Z6?4SpF)?HX9adC zHjEwdr+EG2bUryBbv!q~wz=|*}CXneD!<$mT`+y+u8=N?*R|uhu;92*X-ZTO7q!U@k7tE#U7_B-b zx_G#&x6U*v>8y?dT6(wL7q%%5NYizEw2uK=F^Uh1X4Y z<^0voS7O$GA{x)i_+ZDIz*KIi4HUJ(r6#LyX%K^1^C?Ne*e!CMqYD`JK@2(G1_$;L zE5Q!!s(Q%t)z3tERFe?B$M&k`qjoH7@#qsSAlecjbd9}l9*(SFmiU}3U#nfR7-smy zgPa^-ARDG0Hk((YGRmPhaE`zV070?#axB1{<7=WZ;wXVJ@9>u1Pf#QvRD$lDh~wE9 zL}-O$U%Lc$R52s#Rg3TKvm$Jd<6(q|eU!u`kVIT5MxNuM;6ay6+M`4ti5j*MAz$v- z(Qn_xN*|_Rrx|F&hfdf|-w0UBz^k2JllCy`uzx@N)>jobGWltrOsZ2cjaRLIJeu1Ha#UIbTZvP4YNQ1QJ&KIEa_sDFDM3 zJm2=1HZg1;iQ^i8;ucA&5ACUUlK*8d7+cw3qkh8C2cOB&_~oK&6(*ya3Lqp}dlCeHDt;&i&=_1D&<^ z@4wnViw`8u8ipRky*54-3&(X6X+k%pIVQ0zC_=LU>0Vr_jO(e+q%&<0Gw)AOe^3O_ zldT8-V?NLPYu8h=HDkk7`Ef{z0noR}cso#l9X_laCYmT1GFUrgPPWoOv6N4UI_JBK zi&1>MPwfg;=PQFD`*EM;z*>rD>!7XE<*;Tr8EHt@#=3`HpERSC)s%Tq`)L*e?Nk9P zISNNt53RSS3++v~G*KrlB8pkl;=v}NtELAt)1w8m3W4wV<=?tTls8_teWV(=#?sUx zx$@G&yN4E{X9AyZZ(kOJ+$gc9Xx@!ONhO6-)nANTi0U){_#`=f9j5BbPI!+8V@E#? zZtoP?<%Q4+b5b@7mjWy1Z!9964Z{ixo=-f3qDdIaKr{R*@K%F(#>|0l4dSO5^X z$Lzzm+X+17f7;+A>HXK$>Hi7~epZ_x&K(A_!{P^G04Bkvp; z2vbwJ;VO^q3{YG+Z}7z8uFSRjGXY%t8l=lWb%2h(RCx%EgwWo5{PdlJNG@^LL%@C_ znh4Xx)6XirC4Mut!t;ylxi)Nw8&$e@e@|Vm_s`L_-q@H+=)OUj87k?nLAQ*$Wt9Rd z`#X#i=j&F6t@wzoH7ef;-ad3d-jP2)oV0ta?G>?5@pUj2A#DAWbdn>-5WkJ%*=g6n zN#{+j_T?O6UVGfOdG;pZ&i&x)O%}S6+#v2o4>GHB`l}u--Di#PN?7~tz^&q`HtDvo z$gdwej+Xqyf;X?Wx3(FXC~K) zAq7bMdG%j4S&%>MC+MMWH}P!Fui8z)LP)`D1B@T>qbb3iwz^w`Ig#T=MiD-FkF9^a zV|}UkP6Yxo7W~rXB>{d4s691&0dvNWfaqt23Apa`CoLpA?<*k}(CNlA~P_N*thzq8Y@{nnr%%U>T)<%gsaX(gUaP~ZcW zPxRN-Tdu$=JtpQtp4ND@z>CcBFVQHHmrf5hLg?>N75iRlsq zZ$4GeR4Ah2PFY`HuI$bANE1h<|N2drO@Hoc8{fzg_E=UqVNgkl2g%W=`yKn~WQP50 z7>sT;G9|}!*F#^1y>R0f6!kVJv$+2YWvf8AJ#P3-4b=e?RMLz5xI%$~+AuVzW3bMm6{Xg8)|LhCF`DA#O$-mbeu&Rny++w(P+k)f_WvK@XBYv>^P0NEI*(}xQ zNC>n0@;7zYc21NgE@~!?r|Htuu@`0E4>LI7l8Gf{4uWX?({@?A@A^{=7>X<4gbRXcpl-Iq!PRA2I6-ZUnGGDNn?(k-UCpZ)z8SPF9StGM*grRAHt3DVM{sw1@7E$9IrrI zc5CYIBoJWSTTiRz-R>*Tww90d=Bu7bI^r%aufLwE%bB5V*2lfuIXl+3D|Dil7V+ag zLS7Ea`{<-&1ZGnEX4P0dx=uwPLgrqdk%OJJdc5r`##m%MLSN=&o&@dx2@k?Oh0x)s zt{9Cptuv(aqq^N)Ac|$HR6DCxB}{gE6Wz22;Zxg9<*+5cL!+R>8-Cki0FCJD5$N}( zlp!aI_2AD@kJ;sr(PHJS(J}M zZEyylFFM5M*f+;+U0eV=*?o224vtr-nZNhG(-iY4^8!GXZ+M8CUx&E|%yCoOGZ5GD z5F!#a?t3xVU5wvq3g<+n%Fg_|r(XHVz%+T@8EBybZhw`!fGy*E+dQ!v zaN0W)`*-bcE%&_dKLSlH|H&kTaT^$E7U+M1>}gL17)bGf+Jq&dJM=Ghx}Ev^sQX_c zS@B=hl>Zsq|NRu4|JFeHzsQG`12${3wzG*Ce)$DK`ADrwW11wBT!~TLuR_-^2Je)4o6+3z|L2$&jWY^PV}g z9zzi`vqRA`4rjCxg9r?u-xmK^ED#azH|E)h9f-^fy>-oVo(@$Aw}CU`b#OlO_2LHD zsKLqNaJP{p?M1I7Rp;d&)y7h9n-fM?M%u1=acq+Q1aaI3NYExC-2|W$<}NF&KIC>9 z#iLuPQXpL}s7e;-QX_uunl(u)=De?M8Ls+MXXV|Z^vdiY+L*j!M##M+-qcN2#=wqr zA(u->-gtm{a??}%Fz$U*MXB_?N`~~e?NC3ymdM(Y;kP<~x5~if|FF}C@~~%G%p7td ze4Do(U-=SY?kZ*CmKB73iyU*C;(VUnD>`}J;KZ||GN&7XbDi^evoU)U(q3R( zZu)&G3Goxebf=hVF)sNT2eWn65%hi@_f4ZWTbmzi-Yl}q#rj7IaeEV#AUTUgM$Uk8 z@fUgc4-XK#PuQqBeN)!{xwmK29X6PQa(SVe1_MY|Ht}BLzEsAg4>h$+rbk|Mr$%IbU#!_MpTnT9ayAs&c>Z2&`-_ZZad%5|C zw({$(O$(#1EcEst3SLiaF5mJ2Hq_#b;*aXhqt*OAmkm<7NP=V#Plat+dSS!WHb%sa`gXh@LmYkCTxzzPng|3%#9(23Ly$B=w|A5u zb`Uugrj4h2AkMDd`6}IY&91Rg;o1(>A)l)qyza=kw7F2muhh53XiFxgt!Z1UMums7 z6G-8_0T*5&bu~qrWAhj1ujWqksuI4&!-+D=KSA8;p(*Kr{AgmrmR7LYUF5yfXm`>F z%4q30IQQ5N4Dq3&JQ_+qp}@NvU;SmWM4^Q4TQ_~TTH^LxiA44Ap@iqCN6E~r(}-fv z;YgUtVYQxmFikMrJy$V@klP6UNZp8wXf&)0jo%cqud3j<@MLfI#DO#$PPuJj6)0mi z$M?`Eyy;LK-7K~_eJ9}Uxvh@ZZJtZ@j=CYzQ9@gnTcUmR#>ovEK`GUvOuF| zK}ecX5kTf*Sr#C}d8<>|QkXF|osFgUpsf9<8EN4?tW<=Ttn1@tIN%TNCw=rJPKZ9@ zvj!>0P$td@IO@hv&;c;LI$_YqR6Vx*(zF*p&a^JmELimL`ghu`)sNY*9_^*EAWbV6Y;j0AemAHAH?6*Km-5`uVPIGYi3=c#X}A{`ymG3x06 zk;^72nc{qK+0;3SJ@Iqe#O-sjxXa7Ff+%ZO98zAcra)ciEHQvP^5@3ftgg|2lS zdf|Ft)Q4;L3UBjvl$*%$>(m%d=PWr&EDl)YRqcrn9TDJn_~5fcMHxo*i{%kMXI;k6 zkY@~_WXN&sb%&U)^+r?-+EZ&A*$4$l;PF`Nec5R75lcwI*TA$0nmz1fbJ5(BPoql1F_u$puc$ zM1!vw1E~Br`jff3{N1i~_3F5;UEv$OKz^|Xy!#>!L!^Yhg(eH?vG@41vxxD7{ zO6~qR7f50^UZ=k>yZ^)B-00z(OiICXma(sAPG+JZ*_BP^JJ(PLzG|4~bcN*1Zjh3Y z{;w;W-50p~Vp@|4K0iSlOZ6194ft>|-sv+=U_MTa%_4FlN~;eA`|6g zCC*tQR%GoyVa(nXne;TRIy4MFOb;QUkPkqGulJvo-@Py{gPhdKxDHVAKx@

P{etyIeZ@-j_szY zc;XXp%A0{Ox%Gx!k^%Vom?Hr+EN>oNq(XU7J1I)w$6L!0Zc%GBcb0s|U5FIw0k*yQ z7_XWQ7*i;C#!v`mjA@h{J9oLr+I1JKQUt?xd5|Z#_+vhgCC~5#YuEml9uh(wH`etR za*GQ+w?k8C;}zZ}u>PCU*9_m)L9BwXg+;Wv32AqjP()UDT3rPC%569S8BnhJhQmxuv|6 z96U~<78cm+^>k`Yb&2w8xZo@Q%mw0uGDirVt)bjv(P7$MOo(KLl}W9I2A#$0I689N z@)%%I#)vyqC-1Cz{*FQEWe3yfv5t1nQ_3V+KhVJsa~yO|cPcASA zJ-nF%bx#4_AD(91+JAUd=VMbcDrMlEn`_v&UA$bT-#V6q5848lZRN-lN>sVs96_7- zzyp3TrcSun8{?}Bf#q6FUxgx9tOo0)I@hFvi_?ofPjtp!V7cY^w7R{*Hp?Cd*o&+3 z5F2g*B&G~=`oe96`R z?1Sy>(BWrJPZ*_ix%s5+a+4b_v?my;4V2wlDPunaaALXUcSYxGDq?f3lTl`QkIddl zwrkv9(TTnYLGL@hmT~dbU@gT&J;MZh;8S_B?|nTNcahZs9y1+w!fKctYD&=Y%TOoG zRr&?JnqQ5EUtVpV-?>q8IfMsz#8nntpWUVNuJV9g%|fIU@oqV8nU|GCWv6JPDdm-* z%SGN>@3AjWp0uPC?HZL3FD7?F!`S5g-{m)ui3*UfEFGTeRYzY*9%uB~i%B!0W9ky# zFYa%hrsCx-Zv=FGlLPv+E|`g#ScaLn5teiNB%*_nEpwBOW}#7a8?H~WBf1LX&)DER z?-1DMEyXMYi+TrwdEg{DCuVk>dy%ILi8s9?AMLgh-R&6f()7RyDp@;tBT;t8Wcv-@ z3x_4~&l8H2!{0S@m80M{UeZtJ!uyGHNVEIQ^O$^f2#VqyBA{0u{7m6Kfs zKVD`ETBVLiW7xqG5Ml|32ZW3~1fAzpnII z{=R*NMAz0qm#>ToMyx-%Hw%1xb$x9~*PVMtDE-4OPl0gz&__eTz`kL;!S|%6oWmJh zs&+p97mxez;goXMXuf$AO!x&`&_4;-iYGlCy(Q2ezaBpCYdQP1POuQLKDjTfmdzF; z42&a+?O^32ifeeSJ`tmDynIi6RzI>C7o5A-oWFG|PQjN<|BU%u6m1K$WaaQdq7fb6 zhhshYDYA&Px?u3s*Lk$wP0A=2_x$#WveN#=B{lYtOLCf70^Y0QsP3;tAAQ+}&=Okf zn(y3jnKuJUlaooa^478-1?6l}fFx_eHlaBG*k`7VOYAzf5s&ZH?N{7rz+P zeyzK6GF0u|`lcxhxMiB*W@~~;*cpf0yYV(aXE=EBWO=G2N2Z<9OKvx-x4=JS@OHA< zCO!Z2Q)yy$S2Z?5zkeofUAfBW;C?)FUmPXz>$j`N0X~;^<@NNqo6j*`^A3c$8g}fB zKa_c6?nP-3bD$-!2vt7jnWL_IIVChO|K_~*9Smu~rxJov(QcuiVSgfb4Z5hyxBrZC zB#T~B*s6K8dcaY1IN;wB8}$WzpwNK7Tw1OkbW)tf+wCJ!j*eD{Vk|bj7b(B^IFhUV z9P&%}i5B=k@F%DSn&=4xEn+x~tqtGXFo+1M@Y3yNCbM2(k=8jYMP{))PQ7flgIn8m zTdi$K3e9-)@*NF7Ns5rU$QY{k2Ks$EN<9H4d*+vMi_^7FY8g*dea8+2ZBABHH z&d1F3r2t%;);W4^XS~Qqbu&}^&~?=KYOnfD`R37(t2#R)6MQ-awW#Ek$9JE|sIsr{ zK#^;{TqfPYM6g{t=Y98UMn*F8UC9zJ+($B!PO!3jIxIrj{6um$D{_1bvE^0RIJYJ=+IivmV>uhq5*s81HM%fBke(E#uD=M>0>?UGH z!PP=){SLZgAnt-lDtN7_;Jlp|o^)Q~pq^4sv_?KjlqW&&sh(VWj^yDF@9AbF<(er+ zFGs`RWqR{!>yXwlu^@Ryi@hXKgDhpVwR|CXYqH)!%|I=YyDv-hv=y#GwzC}Yd?(I; zSKrR=@ZoLgn2OC)zl&{R!#|k0u>0wBL#%zQZ;eH4i|XZjJjnLHp*~{RJ!|!``8GeY z=o+hqLicDrCPpJ&WL3R5LT}gLqUN7$iN+MZ2{357`XSTvjC1nL;wx5 z=}p~)hTO1w_)56pL6NI|8FmT*$#JR*8ND!ID&)ef773J6d!A@+7rMC!eXE~O9rGxq9X9-J4)h&cSdbhg;P9ZlteO-k*f1{#CQ$|^e z^VvhrLo#yGY@PaU)tb_RjK#M@PR zNlV5xjWlN(Ie1&JXC#j}Dh+|Y%iC|2Y7CVX5f6t7lFV);Dy<})lr_tEV7G=FPfY6Z zv;O7(Kr!W5j_ckwy>>)n{Q!p0x5(w84eE zX)imzEq!0dsJ{)|5=Jy(U|2}x)SVsOw89T{X87Z8%bZBT?$h=wxZ%-*y>~Ju%87zn z>pAO0g`VeqIpQ071 zi=CqmH4g9k@8N0j*JX0fZ*pvA*AL>Pm#0G0n0i2+xY?{96_mfL={Cdiw%}UVRN;n-;DKl_;2HV+f9Lqg zJ~#m;aiZ(?Wib3t(%s|(G_Lljc7h9h!FKY`4naK^6oR|~mev>Oq3uRN)E@%0e@f1D z#c%+tx_^$j9{7vs>z)fg{@r#*|JBdu?467c%>cZ$p{piYf7-am9QL2lf%w0CPST&l z|4uf_UjhEJKSAXRfdVp)Sd>ox{@2dR3ORufQC|E#j5V((-hSUC$@_vzY_Ob;Iw1|* zSjK`Q9cbG6d;(?D#+Sb;WX-n=G^J|yT%2@G#csc5(@i)b ziND7zzB8=d?+zWK>euLZn5{+Ii1e|C$i)d==$uW6Z6Csn;?j1OzW||nZgyd21NLaX zr)|@gr2+T3Y^1niKHd=_je8PsPuj(9HUc#&Fwa!i#tE#tbQry2CvU+}-6bWs=e6Be z{M(qhYpSAQbapgJH1(#^vi+S3M=l+2GEWvuR;)>Q4mGjFg1Lt9VT>$>Y-cr3W=~Cl z!rhZzjG$Z|>D*5;puZaIf+54gLVZa-G|qOD8AjMCy0mk+eqG*{i5h%+1bIsW0!iin zhn)bPUHD-^FTB)W$X5K!%j`Ty`E9GHlaavh#w{8g!DVkd%xs8FH->%z~0~8yLExONYQ>NR#Ax> z#65`s7kSQN#lt^UQ)YeqDx+9j>tFq~eGe5COh$8OH|Q8_<+#2BX>;GH@{xC5HW~a0 zLVY-Yk`#5?IxZKtQ=-LAcrG(P&{{+ddul^9a zQeZDNHgyP-@|@R=J=uCHDJg?Qd1KYz=sTNuCh5KjxpoIg6v2LjCOMl(%6b=<=SiB1 zNtT7*NTeIoEHCy64TUej`l0JXU;;=#*cbS>vBlejk%oQ4;ToI=QH~%?tTzLq&dz)4 zw}6is0ElD~pi9aX$!imjwR?%y{bD~su7Ea+-3aRufOtcR+D`jarZ-kASUuw2=K4z3 z*)(&C0sB0HG|k=Kk%3~qO1cz2W3jivC3A+=an?vQayLZIY{jF5_=sWI|F);A;Sj%l z@&1B1r5yc3x?9vQmYvQ;Enw)bzZ}PRWIecXH?}|xDUza&0b0^- zNe~Wf%A{Pj4S%}DW;w55qVZ|!857YXyypabn1d)^m)yvFX<*|(WDsqQ4-RCP_;&u? z8y1dpGh|2Grj_1T47xJ%qC!{fy4tie)Jmi1T|Io(yy>J8^EF+Hm`%TbJ)Zw3^nU!$ z=zWtv5PJG`*oL^#aKcwGS*P?9bmuVVC&;u6zH*^nW}I+{{DdD}T7+)79>C*d>}DPD zk=pR#0)UfiV~U=?8H&cXedM($@G=sDj^BnCd5JeoAJaVt?gxF~%J?Z{0T{D$KS8y}2+(Ho57f55LTBO-PNk{LGi7t2I(&$X-h#=+oBK4?ADraHeFO`p zl6QMQ@|yWs7ky|M;NyA$6Do+3lJHWyiG(I=1iyG>CL0|9K975-^y-IfxurQ+q>?Z8 zKsvhj`0{IC?HNC=x?0*`WBf<^PwPl`jfNuexeKBBb}1HutAU-?7E?PG{Iii~gpOW6E%AD6UYzyE1~u+RO4zdUtL;XhlxJ8k>Bg?ESh#|e4^Z6pq!r-@~LR5 zy+?Ex0?52?|4<-e0Fbihho3Wy757?seJn2h(;9HwAyUR~!!jVXMzfktgsIu*+ z0E>gZLWOFSLuPph)6j?Jv4^RLYL?sgc78YwoK&xpVE~wSRCv*W=L1=Fy_aRg$&53^ z`Xbv|$y@!T4^c54g4aGu_7`sY6ZIjr;>}%kx3%KxYrRHx744U+o+a&+7hV8jdFF@1 zeTT#=R+rQrS?&6HgD5(~PXxXYDd1NhiX26knoNMmyKBNFzsP=*RdsEkM!$*cntl}4 zn500}kx`l}eg7mL*;H@+JiVVPw=ZfK-nna)cq$`Yq^IW>o%{<**ZV6dy$8bx|C;qX z0FV3)fUo}@fUm3Lz(+2xvkd%>vdb1Ij1%H5%eDyVGJay=$CYyc$pxo!aFyV}4oNqh z-$z;Dyo%-WSFVjEfvrn4ucQ1jw#0j)As>buqwB7SzhPI^R92QnAmRceqp&VTc{c^kuzN<=t&@`MGv%`WKSp;lg!8bVX>kdY;KO8a8SDT5)Xd5qp6+icC&X`_s0=l*OV5 z$Ai>0J-vn%&*x#4M4Ht1QhufD$4aKl#b@1A?~!MZ!2lq} zYC#3x&Oa$u?6V%@y0GA+8=5P_U{GfwH88%CJky?_q!#t4rV^JimvW*q`z#UrnIl8} zHuwWEv1!7WZ76u0t*G;=eeI$L?@h;=U>QV_k6v)*{f{SBZQ+_Tm)t*z?4mdLBjxA6|t6t)9Nx zhS0H2?}m`g`N`8$wrh&@ZM{Lz@Oh;1@P#|?z5M8l0O~!>%h@$HL=kCiS+hPypX+nM zx{++Ero(tO=v=Zk5X`iTg0uc$p^GuN56Bfo(@F2#@r=W0?d{BFjui+p^k?U1&UOn zZL9q6ow_l5k;r>I(Amc&(BKF1@+-?R5=Rm&`*-s2YS^Jll!BF59L9Tmd-~#2awEE@ zmq7dNsCC0sKAS&X}@7_fSvc|`$ zYuSCoHiT^9HS@F6Uc9_@b1`O@@(d`|LuQ5oX1i=R=1=yWYI@D7gxRk3Cg(Q#NeDp^ zmDM(s$bCKUYVY0?Zp{Kyk>xV!Q;Lu&ioV%tXUD^&`$`V_5%2uLB41JVWfsP-V26pMo)Vu??A3!SSr9JndssYGSvV9}ab)k4W#CeYh*Z zzlmu-1C=T6co~fY0qJ%49T;Coq~Nv0$GRS?@O3OC<2qMQs?Dr5$_Ne%J2;=!>5=q# z>LHTf&45;h@$T2|Wf+$)Fw(~kYn=e~^-~V zN?!tItTAx~ER{o+4&8f?mOW1D94GxBpJ4uuCHuP13#o@6yZ|J$1IR8+YbfC{d<(G% zWAf}K%GVnaF~2>0rgC_j6aP#rWMe35#rvqy@tAZC=uklk=LV6H3qL_-Pf+IWNW61? z;N`T1(k`S8<-xWP3zG_&|-zBJ%V=I|C`k?EF(?3D-e13T_YMc>K z-kycn0Pu?TpAC;C>}}6I>@lFlF0c7cVtqOAZ2B#P?NSUR-S#;qU|`GbRGQwwUGEY4 zv7-=K?l+Ky>ooPQN1*!v_9*;gL+adx=i%lnh&K2P2Pqa+fR?_Z>Z*cNPu&XG5~@u& zl%)eZTr4c}_N{u`1-nv-zi#{~M~p!O(Siaz*%qO*TsWZYK*eB3z_Mu0wi6hsSy)ztP2bN>2YZtvbKV?O#Su5V zmV8U|e}YP@SYEGriHhdVm@zxrE-n=6GK`FVLs?`jZ8;%n+&3wVLmx}&PI#zaQE7PY zLUOLnzpD8AOgldO2zG!6xD@={yA*Gnt!YwDZKlALig(I2;!1_@+DN5m5~hHPU5ux7 zhcMLM^EdNeK$tUn66I@4+~&z@xWy~pZx={7q1j~G)btvX#^{iY(7}~yBLpGGpy6Bq z+-A)JsNQUX`;*=KCb%y$&+xFKJ{PP1r+B(uZI&rZS}0r!s;YI+eu z4C|#xIgvMNtyG{6FOrNi@zL*co4Z6^jzR;E3ippE>K?JFFcMtQ#s#G;y0{8M9W zd-w>wZ0TAC^6J>!b563y&@4}m=(bSWH#y~4{FU=Xxs+6qPoyKa9F$hnret~gzuTBi zvF{td*B>04tXCHn%zN|1PES*5T+ZicAJS9?%o`P<>nXs*5_Zd1z9w$F&HKG#%bNvC zexGw1ccE#1a4TR583P{{B|gB2?F{3i87c|B^_H<&n@jix?H8NGxhviKlHAIMJR1na z@1_v|XCKIE`kUK4h|TwC=e;SFy4~!^%{L8j3GfulMT#B^;+^_s4NPS}uhjtm_zSU> zKFLGPbXfw=yqHvIq_1kbxKvs@Q&R}^fAYudzBrb7B&V zNwXDewr6N-kOKx&SykV~9ffw}*j{C5`8#3DEB+^PGRo^C-ucMRy`IY}iHc9}mZ-yj zr0(R%yyQYxE<`J|VYq8|lFG?0FO7-q7MX$KT%1$M3zxwkHep%tGF0Or&T-a&Odsz`xKFXkD^k)U2i_-!*CF>WBQz-*HhfbxdLA~`pL}3Tc@4S#l_bf(8c7u z<61802T^RIZAa$B(t*{4Rt|%cMnEs%gl@p_f87rK{}dnmGdTKhuuuP!QTbn)$o{w5 z_lFDie+NhGp8ss16vz|-ezH&}ONE^JucL|HCra*HCVz?F`3NAiWe6RF6f5qmWGF-@_h? zpDJ)nhl2?3&t1DHc_FC*Ty6umMo`>m%8mes+w;xRDQfX_b18g?%+oy9e+bkek4+`Q z_I3CSFAxy#@Cy2b6BPutLS4f<4M1*Zp%7}NZrSB;E5|n%6zdwxb2?W*j%uzjkDmWf zi~xiB_haZGtT0H>C<6H(q+?CrI_q@i4$=CWA7Z>NQBQg=*m3=fCmgkZu}Y_|RLm z+VC}=l(~y^!ERrEYVKyOXK~SpjP{+HyDPdO_wjOm=?X?Ek$D)z9z<9s(Fu|r}I|5D_efZuZe{|er-=eM$6$tWCTH3OrLjlwN4~G zmt@^l64BB(Ars~19>_3{!>|`wd6!nNNx@qxK7Xz#A$Cz5ddBfhIS7~B>k?zMqHe+k z>Pu}Eh-E+9_3g=czBw}X(R;?*TThlvj+co++nuZJ$4Rn2yR9uZ|sLE{jjWy<* zi9bG5HS%9+Ic{XBl0`Hun?@t4-FDx_wP{SfPC#@c7EL*n42HjB53;%?)@&Z$ZP7%q zvVB}*2yMOkz{~vfeCc>gbm5n@&KbtUuDfR#nJ*m`*{` z2fmzh9cAhZ`Zhni@0>+zGr5B)c@mQ-?erueG^dnPV*I|8z!h8&&Q8;3hjdK5<=J?x zqZYYq;OeRgq;`iIUC%wQataFVzAS!RTM}}$|LOh#N;LCRHhJV4cPXN--ut|arM0$O zhJgEtJ5_~<>`Oq7dGzV9F+_1}WugG_-~#k=`==VKi(ULe`j3}_7B6whjb@BL#g#nQ z@XK$K^K~t0CL`X`Q!Cif+VV_cVjnP~q1&^N9`U0EksNgQ)uqSPjVwNS^8J1Mj#Uot zFg54y9`WMgMIxVaN&dHN(x)K4Wpj?|wl1FU2NC;&qS9${cXrP=zq{-(8Q{J5dSI`w zERQEUgglB~QY-5Hfn>ovk6w6`fr-F@yy&kK#I)gO+N&ihA#${%Z6&<%7oi`SM?rnu!OPO zkDPFVtk&R{ca3Um-><(D;9n5>!8Oq}sg%pb^B6)EMKLSi<{4>X`xDff*sX+JZjr|l z<7tw3&k;4v8-9$ZbsA+iudf>eWEeGHZLh1N6{q6y;N(hw{H~ho>vdx^8|TxNrh+26 z(`iUXx=?vuzEE=Q?h345RArNS)3JQ@u@K6+CweA2$cNz|f+8YFo>u$w#Ec8J0(hKX zeV!+th|b?gcy`y#Ze2^lQgB4YmbL(=Nn{meN#a?J}lXR zN4#%!F|0<@G(0E23-bBGM6GzgpNSe0Whm(>+YDA*|ilsB)B6B-9^|s?}u`#*IQ4*)VG6!Fe z@z=o+{OPm0xVS3wL^E@KON!3TB%AOAFF*^;G6sk)oEr#d6@bML;Pd1@FRtyEy=0ea zzh{DEa;{pYzRKpV(b)T8JnJTGUoQN6qp8hH`>qWNU+AQoVVAuKuk!s{C;tqtQeLb{fE;s*sW}3V|6!@6-)Dvu z;gs%=&gpt4h+Z$yNeWe(Mc!C5rKz=xU_vo|xOB&N^QjcG6y?ZpVe1Y;jqBb1YAk_z zFm%ZoU+^W5+ir$aFIiM^O_$8iTW~Rlj{-JGFDFr>va*@@~8$K(t)f#t^=Kd zSy$kClKy@@=WZk>ADU8qbMc2S*%0!=YXK}Qws+P4mI+TwbIaXvOFaYDC*t#G?=?GY zyqp}SUJ?HZqD?<(Tl#*=$uV}v^T_MC?SMsMx6K)8xRb9uReU+vfJ;r>?#J7{w&5&V zI^}Q2w|O2a4+^*$;m~}t`09_6q$Ny%RU;$AM4(dum)DE)`s1Y1Mb zOQ7Qv+Ve_>dX3J~Ntn0VlU$p(8w|+N(uc83XwN@>6CSEaYE1%x=Mc(J%tP+i3)U*V z-yu3qb&A+KmmTI)tw^AWR(;wZ12g2Rf0aFIUrXC>@`c68AE!Nq+wEsB@;{|{uwh(U z;rrw5*hS*HRgucP0Jt)u@M)>oo#AiEos&zB-5v6RXg|BIRnC(%u-)dd-_}X!FprOb z`H!u`p4x2FG;yu+fd^h8L)J^B?f}1$v!=ImrhY7S7oUbt7~mJCm;3QbQ<@{I~dc_uKK$J?y(*%vkLf)+l%V zV}ukzMHU%yi?X@Z|4OE-B^LygY8rySC|7NNGm*T3PAjl|IVSerIp37&^Z4zT2as1j z@n4Qave<5nkJyOyDMSIhM2)9MDlG7f)tS#NxiCyvQ^hsVGKZWrY{^IBhqgje;;2-ie@10HOL%m#y2L0&SWgfz9!i9im385d#e@)e==grib{CK z`RsmEEbO792=C@u635u~@!JJNei=sf`f0YW^l zbAI0}-&}i-JVxl zB8f}f2}5hUY*P8u`r49x|BJ8h`xf^2hJ$gtdBpB(i0?zkN6FIWl9Q;J3T?Q>pjs~X zPQ(WWfTofKY}J1mD*gqC`HlSh3mpvn7{HKZ{>CEt)5Z>XDgjC(Kvw&kS?YgC{$2kY zcc#Dn|Ne?1{+kc_MJE1RFwJjF;onfi|14q{0K@#zA@$d>^f3&P29- zGsGKNS~-l4?`>>*=8uUvqH_qGY2sbitT}p|)?vtqTA8$P<$2Mu_J`fR+2LwX{v< za$xpJw%4m@V(|sv!q=fmuH4z$?_d->Jqh(Ci^_-BaXT?pCyUJB1u!yD{P(;gAIDff zvw6MQ$g!$$M$8~3{@zjz$#EUZ`tp1IEB#xwqNr|-(&92*Ep zDytEBVUH5xUUB%<@}}Yio|R!%0xV-k+XkG{!V6$Q(>PI2X-~IE7_*8(N?Jn#$YXP9`-TBL`@qem86wWVqlxyTkN zFY>L!h>etTRmzLcLk^R)10+R(li>j*k6;srx7|72zq}4&HE;(8Cr_RRpJpP0VHVJU zY_)z1uUl)_Wl#1HogQ+<+OU&y$hNF4;0tz;l~i^fk(!QD^rE=o;?%5H(LiH+-iW1{ z6*WdY+Yr3U4Yb)OHd^bYX2yJq<`ye^TZ>@z>4(AKJQ9jSSzaZ#;xA6Pr#rIdB7m!f^2hW&H zmWxb_cJ;-Ag2Xmy$$Cmz+_}I$RQwOeTjyMB_ylCt5@%A12;pW?HHUa}beUe$h4JHy zG?3i0RpJUV!}~!Vh4PaMPU269iuB?A@z%qGS@ea1u8o+w0|O`7bPehhOLInLM7;O_lkY<)V4=Pq zGR_Dyn$ZI#jF{g@qb_y3e%W%_#+wc40xHEPM)sX8Dlwd;L-N~As+uyHF3K(RgBKl6 zp1hBcW4I8o`Z|ChLt!pb*F9-^fZ&O|jtD)cDk{KukFY@CaykcfbgPMi#Fmad(c`Xi%o`m0JVot;q4 zfxvr&LM9*#%+vZ8H(YGpFFeBNJ%r%t$u=nkc5ARB!%SGgSYtZ|xRTiFMBUZy(R`)SazCpTQ$5WbMhT~~&;L{EKb$r^b>`tBl=I#G+= zOE4l2xOhawBHKNUyYAWqu1nrBp1<1g>Egan(R*}Gmu_)9GIxlF_kDZ(OQ(I|cy55g z?@I=#Tz+4U%HvV$HxO^RW4a%oQP@@luwXLEgE%H_*mu+N9a|37YbGSa$sat;OO*YH zeD7v07t~INxK{Sry~>haNO4&W#X}#iq1J~reHo6Jo#k|@+x>BMq#yGp#kktf;NNjf z#Ib=Vdf$wGOEj)Z+YZgXD6!kA^=p8s;N#y!Xn&5?e!p#gqp$t%pZ>*Z0q|Gg=PH|- zsS7Kph_kzp`ca{V`NNqgvtEw@tY#Z95$eo~s=NPUQm5irG8ZjJgNMzi9l=wIc#Q80 zxhYwDC1+bvT;`FII#;3Qjg1JtFQ!@03A_7>^oVx>(&RHXjf2EJ<({rTPyA7hY;jG+ zQUu*pgfz=fTs<_ZX~(oC2Tf!UXgKb;M@%#yIG16?Yr$ab^tt_}sRABd16@nVh7rqN zH^dPZk$Ysru&;eD-TRUMjn@ld&w#A=r4Z;$1IU5;s8>EJmqWNt>zGNz)>r*@skB#E z+L}J|;MTt0^-)o7hUXV=@L%EjNaz%ca^;BbH64&Le*G`*zW+>^|4;w!zaI0r{}$eS zMBwr!f9I2#w9M^ZhU^r+>W3^cA^}J9l&g9Ff9w4(9_fFQ8K7}2M-@>mXM9il^Gh-` zENCCFM**m)g?Gj?w?iE0>1;pVK1=_k{ZQ`$yZa~e9g3^w2VNZg1-L1ylbXtA>L2n? zq$RftF62SW=9)>0-d`g;AACFGo_;TQ|K8G;9O|^^1#Bml9aFZ)(V)z5m0JULasB$+ zF%`?v#pIhFhSsO0BrGq<+z#zRp(Xj7OQlGGrXLTIN8?Jvv4n_XV2|eBrttqo41Hnv z+nDJ;nEC$Mc=!*#z<|W+w+#C?M;u_E{?jA!z6NcndIb7jTFpyLLzuv=)hQbgc!CXF z$Gw`Q`7%Ow$V!loGKsgpSbg1gim;FPO5i|HkZ7uBd2h*>d7vq4uQ=B#@Ot&u?kcb*k>hIwSJT^<$Cv%F8{RencWW=_SE&ZER`KJuzZlmCVUCiVZBJyTExmIq z)A<9erE##1hP_{uVvS7JCNsD;LVLOX{&bhsqm38DO}5U6SL_u16r;6P>1Se8^Ci@C zYY12QCo;7)W<=ciT8>Y&1_}f>$cpQqt~PTu@31);i3gs#W1*bu^cKg`i2Dil#FAOP zjN?!YfC;-@pM>%3sqms5%Kj3yd@Un6+gRPpyVeA4!ATQh=*ZZ8MQQG(JfOOyPF7KM z*ytmljXzzjolG=S@xR6EaPoCu@6Dr0V5m99Bvw>N>&sI3<*KEKe5%Hhf_i`A|~mG!3o!AVOZx5;ObUd zo_s27%g+`6VsIf$L3I8n$bI>Y@{0bb=sBe%S~W-4w}|?RlL(q~p}DFB>s-2nPE!h= z%UPLNtI5#=1$PD#+bb5DJ*JPX8rz;uy6krnxCb4WjOU<(s1aWEG=)zZ)%l!_EJ-#8 za~BDslviy9dsg~O@`xeu(K-@^(xC#L(QbKl1sA!OId*5(?d+61?R#CuD|{9e$dgWz z+b9i%r&73}Xkn&$Wr!K~gYX8>X>L6sRskaF)Jq2}6kK7GA9 z>^7)x7*{_n9qZn&M0TuGG|FXYYbjaFRW=<|#hlF!67M`V{=Uybzsk(xG&DjlTwK$c zNRePIta_2mN?`nOOP?Ei*VG8>hn82mO?jJS!lTRza$R}hH8X9pjy2_4RD-Oo{k4Tk z<4ZE=i_`cLeHlk_#U4Y$JjILqc~@HFm{$afZl zGlB<39xIqw(&(0o>y@Of7E7K%j&y_|!bUV#-kJ(R#VYDlsdg|bU)%4=ck6xeZiT^8 zaOHjQY~Z=bqD#N8r>s;l zQgC;u=+IRl4DoGCs6(i)Ubu*jNNwVaTuGce#Eg+MyO{LyXiS+n-KLG{X5@|1`_aYu zsPEdE`E9@Ik{Tsnn}!3{-3yRF!QH-44>1qHA_W8AQgYI7(ZlH`pcC&41OceZYW$t> zk4C5P?44`=_esjpmmM4T5qI$2XAR}6SYU@ap6y*SOMNSPRT^F|G{5^C>H)lPMy*0} zk0+(^hcU?cWIDjO3~J%=^3NIWFT1s0hVH;WNpio9W2*isbyds#`>N(YIL-fVhL)|F zj6Z2RAYT8ro5FoJ(Ulv!)~&`GQqcYsEUwxO4Ta{G9z+4dQkuM4?(gm@LqR#bS8Uf8lUe4PqRfCQe+ z%L_dRvH>xIxXkVq0o=vEF){gfeEcu>uOMjaqjMD}8yaE|pnZiR?rF?jr@+TRzVq$n zftev#K7dnIv7d6a>;KDeoI4K75$XX!$;RLkQW-kCo~Jv6ia!gt-L?_m=!>9&2Xo*9^h zP9D3jB1PTr+W03lfa~2dY(#TBbG~DSp0>VNc@*tTq#>8=ez#9VQ%%HWm-zP9YWWR7 zEzo*d^zOvdt1%mg3KXMNsK88NA!$vMOejyQN=E)j*05f|sgsDvy|ikINB7dI5Y#T& ziWvX6)BM~3m!6R88Bb}9EfPU3u30>;Kehh&)v;f`mH)C}3`JgL$LUo=1xopRv&15&x4YA?s5#$qR%VxV zJE-Wx0BvjFmlH~aG2_~m!ND;XX7wVT z{^JJ%#ox;-P4K-X7%&|n1<*6N<2LZH6`4XJ!Wg=TN}iE&HIBhXG>Rm4lSX&H?XkJq zWvP2o603q^zl6K682*0r^%oJLD+SrCKKNYNm4HID_?^e-#+M-p;T-ip-8I* ztczJFORdk%G@9f}*a}Qbzf-%E3L*)2ez@1FfkCt~U2dI397w}09dFd@$Wfr+9Z&pK)h8M&se0$iW}De1i?jb~rrOW2Ss@<1C%2CG+E z0wHS!&=X#e+ns+_QUBlI_gVgHjdlOkY!G<=8U6Lq$>ULT?v7D&ci;D!VQm@%ZFI8R z7lZ`yTi9q+>rNAlS=aOd&CQ2TCQB!F?cVUlFd&h149XrK+$fS6FDP*)%ReC4lht$!+&Z+g(6x>>HzDVw+oG#N(pK>6szKDhY|-LCGt zMi5fuQ1q0PibcDk3|f^0zE?3k6=w2^S&moAcTQWUG<&QVbX%N4nk*0gmV0I&8Klor z7kYWroX)nxM$3M)i(>tp3+-WOwuX9ofjcwt%o{FMhxN7lD^aWUd;ssL$3|OM!AarV zHHELv?pFxSnA`OU&suL(PPSYLuLQ>cBbH5CDB8|M=#;-zhUWsVq1VpsC#dCAn_%vioHmegpr(GkEij09FyEY(_2IB#H#4JgZ@KE5Ho$v zXn{ffP=B|0@>U0UkIhhsse>(hv=*X_Ar|M(6vh-5_)81j`J9KSxh{0JZcYv#huXk@ zh6)!aR9Es;9T<^C6mi=r)4RN4pPl^7?sz0;cmz_k>u-IF89w#Nm?@t-Hqr37Per9R z#gz;7L}pl+iny6-8};6tK#{AR5R zRz$dPu^59fcy~@4M%swY^+mqG)P^blI*0D;UV$4aeZQa z6q=hm$dalj>9$iCtako-I#v*S0EgLT&PnEIKb}Z2SXWjOWijj@8;DnKiMYCcja#qx zfD`kCJ!Jvh^YFs){E12;Dz7WUtm(NSL(y?+wHQ-Z(sxjAT-CkZvkZ^E zf@5Wx<8B+*nS7tR%XXz;DVk(i^bBZ4%(x2cXpT0$r;$IH ze@HHn5n!fBTo(Xh)l9>asfDOw>XO^Kr&vFZRq|sXf{h3+d?fnn<(9K;dUscTM3LOC z-gHEjf3oWm4Q|9_&V@XZ^G{75pVrmsZO#dr6b6CwM8a)=ZbkJhxb`Z)~1(U zmnpCax6g)bAq#|?`X33;Uv61~N*eOhJSZ?shI;Rbq6HqF9tRdoQMRu`407 z2U(P<`P#xPOm<`mHYV9qN)h0;o&NSd&Ql)UBm?2m{cee~_|^!HX~s{<=#i;JyrB1u z+stQBpVuBB4wo!Qr2t9uORECYkBI9ahkBT8*7~J5iS^iK+*%`n161R z{>B3Oi^2AvPmaGg)Bal#&u@uqC5#?Fr;PE7g;GJ~DWXB*ut{qv7KFrf==zACP*v{) zo3?tUs{A!g3n)GQM|TrF>B$S))1-X}=jA}eBEoIF z96(^jV1_LC?Q^F6%r^!rO%t71X2R^85NKESv4#cgZcAV3t8N^4@65+=dZxugV>r+T z_m8J(n!6&2I%V?cOt`K$^PqWJG5q!Mz14gI+wdY>IxTp&@5R4k&iTjh^snZi=UD>% z9ll1SLQgNeG6^8JV3?F70L$3y6m28^nfQX6rrG3_TxvfWl9ECm9Q>b6=;NkpiC;qc z?~C?YEjefASzRK3!hcJ{ev0R5?&r^H(G&D)*<8`;^F>o7IOCbS(zeOwC~^C?S#GS< z0F850Ow6~~{;1;6`KV^Jt_Xy{1b}gGa zaF|j!-t0r_GN#g=no!Zo?N=O?!75LRg7Mi+EL0wnsD#Q`G+JRxj8VL{u;%i5=meL{ zo1tONEW)ekmdT_a(J&N0Ofa(*@wK`~UVTUOb46U6jr%C3Csjl#!&C+Vqfdi9ZmpP5fQ4h`B*=b?eeh^y(q?w*yon)14rPq1(I$qcwZF%Xcm7bzS z>yHISFS$=!uehU}iyq3nVCHUHz8<8jYjxdU^l-&#b1!i(5!dSsW~{)DeKk0C(9Rwg z8|HeWRhv|E#Ae+cUWJHHzbBs`^SVm+RDnspjE9{Q#g*q#K9aGoU}V32rlFPBl>(40 zm$=B^+b?7%$<@s|T|9g1#r$K6SL}==?R4OPWygfpSpnpOf;jEW?kh3VJ=9)sN6lU8 z{qe-E8`+x+%%&_cONsnCG%_24X7?m(2*;*-U>qssCwm1b$$By)R@u*^CQN?N9kgD%WLN2vRFy<2Z^;|H^yA!++MYO zGMZC2CYSo_i4S%t{R;B#6vbOkz52Qh24l+FLm#nhhL{e{5EkR?y`^cyY;lN(n2N?a zwWru~;orfjq`9VdcY_Nq&WBzZ)4@jVZnrlQJwNa2bRK;1$A3vT@Lyo{(Z7f})E~R= zVVcospEILC)#oqsbwajr-rL4pOm~$>T^;3WT1fbjnCvg2r2*8_(<$86$K1^kUk}2l z(JYLDmro(mL+`#-#ZJVdd}xzCMSjD`N9x}~!2(6b0(S26tyCrBdTZLR*vszFMh2t4 z++01cyw+x~C%8wen!lM*ZxZtX6+hm?GMcNXH`_fhWxqA}luk@_Y^5Giv_Zest!H*< zr`ypE5oJoB&`gUk;9lp3Gqlbl{g0`;Qj!nWf3R1CMbJ@t>(y-OhAu?OGCER83i!#~ z9f=wqdw|l$8_7R_KU&^;SBt3o33~1>$jZ7%0$^8av1B2&dLEyY9qm2EvM*k7ZG%mO zJs@MU5m8;&zk>Dhv2bLPR7#$uZZo32Wj|!A>a@2J+kisA2}#a-B=vK!;Ma- ztBXN$m(ERlE=hSn=K&7-Pf!5gTZNOae5D7s-US=q*RVfK=U}H?>nEmf;taazlqBfR z*D>CGNE=weL=QF;>dZG9VWSUU+Z_8;s-nfr>Kq|4oDmi*qTQqMv61vvBiU%pyUheK zd7G7}2z&it!YT6|>@`-vrdOX4HslJy=dFrKppiH6%iE?fuY(S|@=;W%EyBW&5q^*W zPeIpGCpKok7lo%Pc$-U`;o|r-E$cN$fmy2UYZ7#1kIIfvZ*teQr5Y-Sij2y>Qp|=0 zCyO#V9-gmb(ZiRa!`v0i2Dhdsg%m5UKb3WbXVbRzOFzsXUa7c&N*uDaQu%%tNs2a9 z<%*HIS2y*lqHB9){D`jIO`EWtEH2$;bI2`N+uL!Uj`n2FM}G)|KtD#@{BDVwF-8>? zZ1=@b$vTVKu~d^V%SZ3dCkwTSRtuM!2wrR(6KJ<_)V^^?KTJcEdvbKBTho3$m@5;9 zD@rhVoa)XReA(flY0H*G zKgew80Jkn+tSKfSBAB$$eKxXs_O1RyaS+6Ly;5+wT}V?GV+zxk8P=H3-9^3$RE=6Q z_j+P7^GNdvN}1|4zCt=gv-rr_BT8E7g&jq0_E?d`_65NP4>f!kpP!*3i!}FXrcJ^` z2kEnb4>m45(+lEc9A$1e{u?Oe$%<6$7mx2$P->h|`&?bpv5tFki!2E{It2hIMQ9%Qz_PQRV*GvU+be(l1yAH zZ?CnrPIFSiwg1x&|ZHDi_aelAW#PnXTZxqv%M8JSIDD;F-ns z`YSJMDGr9YZ#*^zA;s-%@m&E&a~oUUS42MEvXtE0dqgamUng@t7 zv3RwnLtwHCR`TfRZ7`HKV}|pVO=aH<=}P?mS1<`aSN+S5#p`41Pp~bKRE!K!272Nc zcU0&J(>I2GOxKPO8?6At1NR8Vm+3a&3wFQ5>1VGjBT+NZ<|ivkqXrLp!@4`Ty4DeQ=QNGbnnieve| zFIg`mjO#291qyzx#x0=DSf@<%H>a78BZM~->x_uzI#6cddJqS&gJZeB(1ULY*MSMi zaoAV^vez7FNp`d^hr6-tDk3qEUbumZK0h{Hd!$s6Ln%SFCU#)4(#SX{IO_2V8TBGF z88$9>r&cdhYu0Q`K!Ud^KfOc#n|L6?s{G9*$GQ-uV=s;-1H4^12Pw8VUJj~goq8a6 zLq-4V?#F)>$gBGf`6Nh?n$;`Z;P48N=?qmOvlP--7@ld^O&ERS;k|CNXyG4`tZa{ZD)u-kL4K}*qV|oGAkaJ8=xC^z79QRr;>PnT-WM>-=CZyc)6e)( zsTvCn;=9&vo2v_WW-;bKURENm8o6XryvsX53QW6ZQFl*P>mPO>6DX{K11{8JzfQ#S z?hlnar%hiv)Zm;t{=DG|GVyPiEg+m9ap#;jH>}QNP zx}kt(bzo5Q{Tjg;OWxiTDHxMAm896-&4Ou94!a9^P^bKDx%P5XJry>h6+s>LL{6p- zStYuXtiW#h!ab~W>y~56kmQ{_@Wza^fwoYEot;~4KnY3o z@TZ3|>wV3A0nD^`qm(Q^RE=i}D!bByF8dP&lFj*m#BY;(9$UHo<*RwxYD7(=fhj88 zD1W^N<_n-*#Dp>MXY@{=)6m3u5?8l%(rBSgk*zS8-(2!B)IcG5gy34Wiah5Itxk z=Ra9g!0nAs$e7*Gzlw7DA-fo3j><$vC+QZ@Zbb^v9}l^Miw zMy^5Z+uagJ_=m_%(??h3DnL%H4k;^BZiXy}y&k>vwDFp;^zsTFr^<9+mR#EDT{Ah= zl1i5qttX0-^P1jILK-8e^(ufYV*^mzkd35@d&J|5O@^y;Epz^t{&V;6FI?#M1^=)*mgAR>zd2pZxCrSKcpmQEL( znvQwMlqI=q0($UV*Uo+Hi-#_j=>UPQNfKPV-KS{Hb+27@KBbqNYH zI?f6vu!doL5Ed$rl@7V@t|@JrJ(=nBz&b{~Lf4QzTvYS3i!xEVkb#IS1O^Pu z#wuM-P-DJgcjh;0HiQ<nRvqxi5cyOP>V=Iih#4J|ZZ92(_h6@=#|Bs! zkc)r>up$Q;t@-08=qeBly8c>yB(cjJ!9qNth+B4L6bF(iuDJu?XYSVReMe_a;kezo z<1>h45B@Ycw4zu0^RV*MJjH_Myp(f^5jH82P)QE}wDIXK!0CnZ{sg7a?;?GIDIzR_ z7z^wv0UwC*S1#k63!j;Pv>h?Is&w4ZW@uGJXxOiNayKT^3>2!4b}Lq#(KK}q)0aLp zVdD^tM)!eSoLeuN9cM&MHfN{vyge^FpuQk{P8MX~rs)pl_prw@VTTI|0+&CR%Er2? z+t!UIoppVP^8FBj9S_Ux%z3>& zI5R^#$pb^Yyhs|u-$o;5C|a$qe;?nMvQ96!9i1`JCp!BK6ZeJdc7ST;hjU*dlh2cl z);6QYKIpX5ID;KK%S*pscxOcC#Fvy;HSakTWgnqrJ|PJ6C0@WB2H<$*rIKBF3ymC1 zGsT1?OHpU9Y`2|reUurTbaPEgnjYx!C>OU5c74N-x6`OPnsdV%wvOOjnycYJ0>ioi zJx0TiFCc!bpHdFN7MXygNLTGANaDu3n!csu045Jwn3k8{WE+-+5Hgv4>Zbh5TD9Dq zp#=tbtHN<>5Mqegu{``TgufX;DKqEqLBRPtFtYdMZWR8TJQn?|yDE!sMi!0EeSxdO zZnewfZzCo5kw@o&opcOfaO_h;1*2N+6^~hRZF}Xo)D4oukWF*T9zoa$i4T zoDU!itkub0bCz_h5sp$hT?|tIZlaHdEMe|D?e4;NF(XOnD}`nK6q`)1P4+-uf~dJG zWjdZLZIhNGQ>1V%8?DG*d-=_zIB50-OZ5=tF5KfDBoLt~sB&C`j6otK7hR+sVWnxQ-<625KS82Y z9;CyWiiWC#7s2YIXdBVH*m7CY==UHD6LQ&jWJ1|e0tuo^ZRD8V5ZV^-xUpL9?|L<;Ql z@R;)KT^nodWEli@>3!u!J_>Cu;7@V=e3G9Jt>wSa{J&p3v}jOBpk}#tgH=IKU_&F;g8f=RiMOcW|~hH z&@+d*=Iee_FAstzKYV(m?jHRCaD#YIoQ->36g!54Jm@tE(yEh2{K(9n+a}(G0BZ-K zF^gJ*lUdYyYiw^8^6rW=3(g$nuwfC=-0N^gEjee5y;2lEyktu2g;xSK#Zq;>GiWkf zd#=mtYs)1HN^i9(I3ePzL+&Gd8Ml_Bx+~A1hG~?T)jP5~(z1;E8&D-lY+vI6mi?OP zVKMMW%_(l7e!2>}Zg2+rXw}AaOuIFvsE|$4~l-(T9W}W%b!bY|Gqi=e@mm#bnU-y8DSy}Jp7Pmcfdg|KpZV?J$ zBMsfIi|0PPdehP7x$8cbPdGcgxF@K`6mHn0%eoJ-?OWI-YTU2Ete~L&nZZ}BHEYYF zjCg{zUVkDN@b#VdbQ^hFnk&*#lBt<4Ojy)&Bl~JVKBH>l zBvKu|>gt+oaPNT4mrHwT{Z{#jpJ8&cbB?f-AxSybU<&~p&VABc$^7SeBvD^kA-ttsAqFaD5 z2m;KyzAJa5soq+uDh7;oMO<0IipuoIAB8m2dlu!QFJER@Q@w0@7b?7H5X=VsTL1Pb z9q8kZUO;Od?8Lr}hVuDT%22?B8q>q<`>>r7nf^-)k9Mx&eO`ZBr2GbrW#+hTvzaA?s z9aze$%yQg8d`Ggpz;3u|j_z1=$}$o1JWJGF7EF{f=b$vFF-cH0E=dH+k-Q{}7Utqz7(DmqS(P1Tbw zAQBQfaK@naz-2g|Y zO3tmVOp7nq;JjPr(dFi2dl$0LwF#zJJD`V$5PNfVq?{=xGAyHScT}CTv1te$17Iwe z02fK(2R9gzL<&&wj3xOtj1(#-QLdO<6XMZX#2CW2n@!ql&}tK3yAqsd_`X)0)~neUfhTJcro?kboBH`x3`u`Kgs58F%QZVU1RhTT&=FJOJC)ed0gzwlqNr=k|9L; zG4WD&3AWGIks+tRqD=N~vhieK#!^HIy52vNM>Mxr)ZU9@dEDcb$rXWYiSCJwlA%5w zCOtq2o9+95u7s8PJFBwa-$&iHHdl+iCGH!=(jn-i(eC1YSl!*dS~T~y9hX8O@$XI_ z_74UAb|Te^N;LLFp^KQMajV+ z0u&!G5A?5@v5`afXChIy;(nV3dA+m{o{-h%bUwp&HquiSOfyZ)>A$OLs--xo5%k)`uVGY61*Dc~cT z>-q5#3vAhQ{w2c&&>sJH%fg1jXP*f*RNtWw9WpuXBI9wi4pZSWKHJi+#??b-4`Z7w; z_?JS3+wh;G%>X*b_bG<&|5Oh8T>$ylViAA@_{9bIC4BjZ3F+a_px=h0->01alQ!mW ze&s))&-};Kn}7UF5_5>!$fHx{-W{Cq;OvFt@9=)|jN}G8^FVAsOC41cYU|eR!jT{G3=#%Tg> zE)a1ZiI%t;#S&*Lw208xWaWgv1lE2mDdb@{|yAsf5EQ*tGM%LXGod-Yj>XBWs*Z# zWop5$$>259#*!irlqtW7k#G_+fTnIgnd!gU<@n?Kb>U&P50iN@TR$QyWw_tU>h5b% z#q<>~vKZKc82oT}|$CSCZ%l*7RbHoI2OiiypFoBNwzu5q4Wm=9M!U9<`n zZ@gU-iyv6<|9--k1hV5_xe)LbM_h)@3m%;7Ei7D=zTBbe(x)CN$+u|7O+f_Wxr;~1 z)(5=dKp^)Wh~`?xd`r4Yrw#-yz8;fS=3fj9)qip+^Uf;ZTG6eB=4F!{6Vu26N)^}j z`UIBc$mPA;?g(H-Sbj5`~|3MrCgb9OlZ?Q8dK52LWzGy z$Lo;GX6~xv8f-M3S{&9)dQFx@3|+tMAV--VDan21nzRI2F?Ao(SM2VtD0IAtq&v>xe4h58UR@o|FH0&oZFNo3shj?)LtJO7cr{PU20nBuy0EDM)6w< z*()#}$9}BIpE8{pTa~RZ@03CFprgH?+go`JnxdjT*-SR^#*Zz4c6a2dF7r5}I%J~S znQ+_;w0!ySuk5;vI6151y!`hon_@()d>@SZcXC*Q7rMQk*v}_M936)o33}{c=7=&$ zEW{ch?#`-Th^5U|zW+{(X5a(cuHdED6VK7Ig=lG}ODUvYLPfRGXGRe8)1zfblJ@RO ze<58`j8f7OqVOc&(y*BbhILTA076tC=j^}gYTbxDV&MS_N}uPBdB(=~B+fPYB@2Ec z`9ynG06%Q2HPJwP_dtfhp(}@M?`&`-nb%i>Rwq=n~F-i%# zTh*k`M_h41>R|kaxzTkbWmn7Mf`C(YF-0Y&L`{SF(I33k1fRNmtK8pzEV zAzWDi5f8=WzYn7KmHM^a(tR6M z8!JWvp(MYSzGT`LWv2J_s1=%e3?=ydT5Vt0@r7)d@!H(p2E{1`iPC%uGO#)4TFH31 zx#|h`^l{<|SnIQCrgB{DsdZg#oZ%uQoMl&p^Biwx z##X(HVdRH4EeRolln8^nyUlY-b_3jaipg%S|Cx3wT)5RS4Kz^WMWNAbpUxfk1Jf<* z`zsf*YQ*PPp{E_c3YwbjUW`DD9iRYQ8kG>hS7()~Eh;r!gFHLu%_rEUqH0g?2M*;y zT1pZZQoJFT_+7~K3(V}b8>6vTY-qJmNnk7EnH|ePz1rZ(*+>(_nx=?2zK2FurFfV* zHK00?a+@2ieA9(n7vr}ene$A4y8L$4Q;!O!s?FIvSAsC2W-UsFF~=}QGW`nmn@=*8 zf<*!(SDq=>2o5iH+OJPo#4E&8I3A~?HXuP>Lf?w@M5Z)sFWqmQj;uMyXdudI4Y-69 zTosTT6I08jwYU{~ux>oP)wvYGv+Y76E+=?Mf){O4BATz3->Flqg%K1jCnvSFp0k$t zi@F2=Ti87U=xpuIW?q!3My^9dmIsiC(@%|f$SVw5}y67h%wp`kF`NL}Nv0UG5 z5pdfi3xj_z_K_`IUun<%Xf7L9e6*EmPG1qQ<985aa}dWFGhLrF^09+$Jb3MX*-sDy zL87{1kRw^=kmu5~z?_u*>Bh6+0^Ls}pvJR!_ou^|d$_BQN0*^H-IqkY-ch}>aHtbd zkYxU9dNqwX|B5*+#QUTEbG!JEX-Dy)&2_9l{i}j+`IChS<#18xE<|imlA+XcL=Nyg zs`U^zvCeVfcu`ya`=mD(sENL^f<}fkzo!wJtHN$F7C^ z!*>S_W;{r9LZ1Zl+3RY*;gAAP#Y6sBv=pO2@&kb%H$Y@7ALG{xEebZ-b}bCTXT?@5 zXi}+eg<%u!x{J7v^)+0`3S7A+4h}JvguwL>WsHV!eKS6rLL>xQoGX zSiU{#bqYg6?yu|>K@-)nPvr=sdC%2Z#Kw3c(#%OKo?cVMs&s z`P`}4$+?p9QT8JDR<}tP`{s({ZbQe~=WP_O3`-LDV(R`O_F&;p?GMQ$M_#{UX&B3W zG_4&nd?f$S*GyVXQ!c+~qxh(gt+r@Twl%5Km?M`XSMU(}p_C6VPUN$}xfP=+Wy)0wl zXxg~tkG-WCFh}A$j`3?UgiG~4Ig7!Zz?b6AU8g+@VbiOOzf{Jdh!bP{&cbt(i}p+Y zRJP-HFeL_D>+u8~a#bb4Lhf!SawCC&I3j`PAms7;5@Y*=kd=PGP1F(!lt})%_o?=p zn%hV!Uy_vpmG}@gdi-_nJX6;_bel=XHhl;Yv+9zcU4a*Q|w zh-0A85to83R!fWoqh+X(I!Q!>M(|D}s?HB$;tm&2GB$tEa{5Lx?s=j6CXjMFCDZNp zwIh!xfH{qIpk4JENs3%_V!>_j?P(vQ&2@CV>0MNx37z}Qvhw_u)2GLH*6_z|5A)8n z&vUoO>dtaZ?K##Jcoqmn@pnVMpT-JryygOA*E^c8{FWWR9#cKIk# zo%u$HWyTQ96N?8bd>j2_8h?yo{|7w(zi;>bmomaXtJTqrBfP?^p>~3a%pyt!^-_y; z=Atd2PEtTH_e{EB_G=@v?^*F@K;=Od%i&*Zg7Il%F&HKpUw^E)EFtF6&c*lvTTJ`z zO(UsWKFE%H1L8k-!(onb>au83d3B4zSJu96RwjKO9CQANXS*P-y#?;?MTbvKsSiTa z+N{NvS3ecoaJ_KS;YoqzL9R7)FMnxH>J0`Ql<7URv2VxM20C-ioK5bx`v#p^N<1B3&^;9e z$${{~{-*R#vK6$vpcrLmzOE(h5iV1f9)*ZNB3}a3n)1#`v(4PS=RZN}*BE`r?Jh-l zB<_vHAMTKOm=EA=V)fZCXU!yw%^ex6th*}XX;`*2hndMw^lRBI3fqJvz`Z26{n67g#2{?(yUS0UZ-`s}VtD!k zwTT0SgQ$pthzio9(v*OJ zfOH}tT|jD-DheXK_YxGOL!?U=0g)Q%ogh_uC-fG2O{gJ+c-C+B>^MzqC7rTWsyyXsew-Hgo4t{4n@#NR>?Py>*<$IReTo0=!x!ZLWYqNEfFcM4hIWm* zJiTMkIOAS=Jm>$e(?Y8U{F>U6JB%bUquzvW8T5jT>Xdde*#H-L6O4+gi;>LSV4B2$URI2YJ>_2l}8iY=jE zui?I!*^pVbo`FUe(j6=e;m>2kM;~J4R4`M@=GGdw0<7Vj4hf19RLjSgn@;e?XWV$h zmhY^Fo!3)H-)qb+(@ZQmoLr~M@vgZ>>hwP4HF40d#L&1y!;Zc@|DM3_?SDX|LHN`A zlHpM8ly94&*>l%(R*tSEh?&iIX;9_O+-NGIp(DJ)PA`H3W&ZMhdWYR9*{Z!wx%aNs zNNwAY+?sGdpsDS^skH$)wSBy&9g@?Ck4$ zA@5kwN(vzV=f0NbnNkYaIxi7pN&o!CPKVeH*jgZWP>t5epxCM5d93>={JOuW$-2LO z)KZ5}pOo3#nzIo(N&DSR=w8;e)YbEMDGc^zK0cx2FLSk{qsbu;8SV!Dy6ASfi5}l| z#?9HPxup5&N9k|p{S98noFlGT#5I69Gl4nFajYB)Y17S;@wV@jVW-h(5a(fU7&&65 zHt6S*f1dux?mMFjyJS3~wPsf$yw)_sp9SX;(ZswtB1@5Mi))%LWKMXUoAivaK@-I_|e>nW-y~n@MWB=u^za|n2e9rznxbGhp z&P*$8?tqkA1)DR4%iwiQ8EfQo-&`W^uZWr(-Fw)jcy)VYIp)4%qW+c8YeIs}AUBST zk4=vF54imM2f~QUH6APGjU7*qXvxv%2c6$NMOK=g;8T&F{qok7A}Y@mHccF00D!NE4)C4IBx5OIeHAVTk%zwov!r6;dSltLWCxs|uA+2hScpQ{ z>Q-uMR|o3p#Y}1^7cQ!i8WOjIERjbb`{(jn$xwwIck7fblT4TuT|z?Hb~uAB&krDK zET22pOKLpbx9czqs4f)ozVdj!1)4CYiWE}DeH7#qs;bT7+D789>lCjS19rmk{7c;1 zxflT0_Nty^B5zA^Uj75hE}6~&@5sg)k|qz0mskVy$GD+Gj09Mtof&;H8x8wT*H%srKH_aYy9Nq`M!sOuaoZi=qKC_n#?+ z3xCP0>7d_)&KmK5=QI)|FROx0H3{O-+cwCTr%4&0(Gi^>^9Z$Gc`??HzoG7H=Zg$T zC7Lg=tQ-rs$qaXP{w&j1N+#=v*I4CRb*c>;C6*+A7-qNqLLboCH_?n9YO)cZj1n6) z#?R#H`^g2~w|+Pu2lGZ8a=^T@eBE%Y(G!*Ro|O+eVc@!?gBQT$Xr6t`R7qYcmyf5= z|H0|zDd)>Am3ge?cznXj6n6*slJ%LkPvXdTXsdHQ47yhW18YNui%WJK%ZhXk$!fc@8%VM`A5Q^gpR;jNKm+0Tkrbf`4mQ^4A^vk1m*h>yKu%i(EoofB%;f zaPvlUEB=~&90_k!y`l54gJ<7eLGfl>h2A7e#e}Zd*Jb$YEd3`7&XL2L&wVQh0gv1K z+|h`w%$rhXOBddx;EEUhrBWSck514^Xz-M1Wh572EdGy^2^I~{%c2d->v)%1dFQj-GvnT|4>e?XXd z1BZ`rOJume_(SQE_$j;iwDZCjy_A8ox`FJ069u$;rsjB>V|grpNUulJqYTX(bB|~e zU8sLRcOTB-|A4HpKE=Mr>|#hAWuL+xcmsh-MoTWE>?AW|-U$m1kzxgI!*HfTt`G~i zBBR)gaiuZ_mS_95R{{rbN`LYz#I7An4%h2lnYAKuj!-D)jvv~h+Jy#EqxM4alq*~~{i2dJe>)w;Iw@5z%eEPs= z012W4|1XI{fA9Tk9$J>Ih~ZiIC>Ou4#yd3rK8hh&b)PLFDdDoD%E6c1rufPcHj&mQi zda@l<*H~VP=x@xBbls@azXE-%U@`5P-6~pMT>QP*QmWs*$Z3FC2>g`XoJ5D+3zJgG z$7ky#X^-Zj&RP4@jPcsux7y_jy{NF+ z=_m#3_J+0YG0Epo_B@Jl{1MmDs~QPnxao~fifGr%6=9Iu$b1#I!tmUL&CLeA9P14! zc0G_8YHTVSu}e94ksa*1wp_K5gEift?IhdGH%x;PGIOD``Fptz}AU zm3c$T>QT1M?UeRb_TKR3>WlQC=!D}PXEVW=Hm;@Ow-f!34Qg#Z7IK^-D+$2l+_*E~ zzq5?~)!#th%%sA=bC)x00VENpQMB`xBr7_9w8u~SXeDYs>4*@H_eq9gQ}9qypgR~( z3&x{WSYrB^`fgDuktM`0-|)H!mN!jHAkZxS0iA+2P)A!dqyR`)^V{4|XKnq`Bckl* zksdP*-PM)eIg(-{ASA5CeOJszu6sX=xL)|faPl|CxoEiHriA%r_WS91Hg#Mj{V96q zjTl8TC#u4GFT8J6q``uHEwHFix7l|s7^R;_d6FM+sNM_o0#AtHoKK z-otB3bv`QQ^}6pO7ZN7c7zrOtUSS71{2z`r*yULf67!_n8=Bnlvt@a&(w^rr;uDgb0;Ou7G5uc81rj-OWFXap{ zM;CMleUu6QuDucww{D}FnlCnDSd}#ADhzAMUgD10hc3AQBsV|!QQ>aEQp;Pj(A5?F z(p^hNR-@{;sV*6t9e-eAD*%AQ4W|qL3(PCfzxOc(22q3M<*~NJ3BaGF!CP+%<2IdG zU$4d>V;91}I&(~XaAEJ0ov2iGzl3kZkyPe}ak1{t3PXtUP7rz=3VYqGp6k@w|P8;}j567_Dm1u)^+s7`=KTklbisvVFdSKVJ)r=zgeolC_I!-Gx5Q zhb%)W{Rbr~qkA;z0ZPhyIkB^6cR8AUkXHc8$HoyC0Bp`jH3%;K4qW%Z^>IBO%uLiH zWSf=lK9=6v#7+?CNR&2{yT0GK$xK2zStX03zXQ;;@fv_#{twIezc$N2r}tML+rJXU z_BUjpg0u=5`>He7B_8z9p2#ALNDQP-)Mlka z_#K%@;U3sIS2A1=K|0$pb$UO=6|*=i!OAXI9^BGR7P|Q1!rhbRO-*MY-ounLtbMFx zPm5RwhK00fe=SyrCojM$q3`Ce5XJW0qI4fsBe@uiR8e&wP#Zy(V(e!-iMvk@kvV`v z>-C@iP{b}IW&yRjGJC?h>gSxHssOe>;=aI%{zY&btlwYYJl>xkAK&Ga(<&OWo5}5# z`i}Rkm1~83@+xdZ_yxr(e+2aHUg*@5(*{u&8)UNtPxWLT`z(hf^dTiO^9V`TanN3gHS> z@jVznQEN>nR^r>rmJ3d#eLIKVltVjF%z3JrTNIb_xydX*{lS#n0iO&Z8;D9+nniH9 zC}0*7yz@2r^uYF4r7eN9{Db>^!KM}9*-wfN&Vk)S(t+majxQxVz@e`&b1~zIDbe!+s@UD#Gi9_h*?S9NY@KW!N5L@%yj~~SGS*Tx^Sn1?~cb5kr-CF2d}Gs2<;QdOI6}8|B#NoQ zfqTa1J!&=-e10V*DcvjyUXvTUWa`jxb|JQjCvypfD0zJS#|v)`E&k10(Bd~efE4t|c|P*XKEt9d;wH_K^KJikwKjWf zo}wB9Q@D2-k{s;7Gz zaav392gF)Ju7lqx)=U_7#q-IM9=^(T_|fr;z;t|0bId(%&O_qF@io?fy?7VB%D36% z>1FCQhZ~Wt^JPjcb}U6HX&xz~fpN$A-CTZ&WMH0qm%tdBFrHN*N1LH>!~vWZC_jB`rQ5R;K^||?d|5JgWQ!LP7n4_Lva-NQZCkz zvjUr=kk&a$EIeqCJGjD7S~QZr`V;NIS^wo#qDslPhOW`H+0|X|7kO78vM=$XEznYP zNExP60_Q^1Cs;4Q=Qk9Uft0|vTc*06e`O4`BFMJmT2ojXU=N2kwbqO*N7cAijR+6P zry6;}dk6oc^Yi}`9{(RzW&hRgUlhHe0nyie5jJUs%DXLYVOPS*als{ zXNcOFIoK-b-TM$(qT0q0>ko2G8>aM*R2#m9^5mes ztBj%?N>+1<&H@519gqk{jhqdGv?8BcE^Zq6+qq$R{TUHmv#&C#U_&Nk_}nF9hqe^^ z58jC_|!vv)JL1u z{W>?!CyNkHo72oZCld&@YLJ;pl29zbJB3*wS58=}5&m3OBbw~!KgN|p7&+8|282eSzglLyY zbHZ^EJW!(tQ~x55Eb9x{>1XCDBqf-ul2`THS})RmEkC+Pz_X#`5ws0upL2k zBRjVl2lksOtZ#N%x!W9biw;=x9Ybh^*gnLc5BoVQ`aw{m{w$tCS|d}fXOHIQA(a-N zE-2@f+q1+%r+!OstxS^W@S6{FqNVwkb>C#Qb_Rp6iiVArVi?C^b(JpP$uY(6N-5fD ztRkvvmO1{`KI6Rv-4wVn>Axe3@q|d1VqG~WlY9+`pE3gQ-yeSb0X0&PqE%0oexde7*Wt%E zs{YcKo5Flw16T?J3~*O^vidb*258nF<6R8KI7f<2 zum-A2?qO7^3D>U^?SD7_>hpaj5q?lE;l>V|PuQ!Dm5KXQD&}3I|3hb`GVZ{AhQR*| zN=MXMqPacs{$$JcZtXjovM1CnLR2?9fjdqex7KDX`s+oGqzwAUb5RbmszrFy8U^{_ zTkZq)9~D?saM7ciyPYcK)fqC{$_r79&VsF=D@zSJ9Z*Us%_WDMCR)VdpRQ(A;@dIPUUVys(JM+=HLBO({HJ+GNJp&bhYAGwL=S&xPFXAo#@+u$ zpfo_B!2GI`I5$u#8rCpe*$egf;>#hN4O6kozMWM7#Y`95k9sIs`XN?HTJki(YRPK& zxV+eMYHifaN&Fo<-H`-?$OE`2&mRzEx_|A5uQWc@26KPPZCMVj9i_Hy^krFVG)wBE zP;PYwkjwxh`PJ*|??42^AQw7yiL_kl*|P zO};GK?Qs#=JcEb<2t0FzJGtI*4X~q|7ir+pZ(C%kEgIWfEppUveeK*kCw!jtI3)^I zH&YC6#!pdZOnL6Ici_KBtbNyAh7Ch$NDuHyZKMZPLr0v|H&n~$_Xi7oLtnm5lx~#T$KqEb~n|r+S@E1QDzife(Y z#*2hl>#f=?Cws^>)s#GgB;lM~AfqASONKCRJZ_;-B=$gP_Bl~N<-brX(yH{Ioj(T|wnrT4!>+2m~BMq-%DgfbwnmI~ik*XU&O!hWx0C%i6e zV6l1g<4hHii$vPkSShF*ua+|am2 z^F&U%>r`owf5~Q|iOW;y6)rbDDly54igx<Pw8m_YE2^r7)!BA+6SNf_6_qfi!jrzXI9GHPw!g~ecI^Gy{RuM_ zP}&|Q{J2kRO7qj>`n2-*&2hW0>2T6+t1nYN$Fp$SWdC0DEu=O{a5K58u8Ow#O`vF> zXLh^O5IfLf{du~>@sat9UHopr`~%bHA2q904{3rj9*+B}WY5oToyd7qr}5P4suxz= zO3zYSGvfY0qMF=H(Piok<4*7&{iggi=u-=uth21RS^s6htqE)2ry~{fA^z7gm9YmZ zjN>I2WKL$}cvi4}SI9I=g*Vjb?i{5I%vPb#Due((i-O#Q*Kf}{hF2?(y|WElGfBg2734W|l|J68mk}Uo#S%&2ItLVYG!yT{u zNx%7+bxek4+`ieGe1^K4O1+22-Xxp!B0L%<_>_K~vH4B%Y$4`|gK%`|FZS z1PbjGNYwRBPZkxH#No$fX~Eh-AFpg)LB=)6W0TtHQ^sbPID?Y(ubzHX@9MsBg@X4w zOiaop{CMxe2H`jkOWJ#myGZT8S5xycA-r@y+@2*L+V&Fm?rPQj{ajVw4!OA9dQP|T6QVy*d}B82#=7$X|}1uJLm<8dvQ3PV5dYubhr%{l4h_cCPUZqyHhx z%7nE%Q*ETUXvL>p-}8;~1Tt~$;tvW&^6-85r(=C@M6@fAK*?VFQB%^@`G>2m+_Ht? z-Gu=E$`y33zSx%*ora20LeXV@N*zePNdmKuDh%Pn=YKFSLxR+$iD`&dk#>)E8jXe&jG7HeF@Cc)=ua}U zdq3FR|4fkAXHs^exPpq<36ApR7e+HhZOs^~HQ6LzlZtjqbm`*W^g#GdSDUetH+){hSQWyu!8SV%D?S-> zj4Lmvkd#L!LoKwn2-lAmpch0Y-;5%CCE0IL;m*GacvHyHm6G;jGEZ#2;5$ot3h+&a zpvmfy+j0@pUSw$)G`9osA}mj;V?ys0zt_fri~)k$^hx4X%^9_nb1!e-y}Y@ey3US0 zdXwJyNjX*t7UI{t2{_V-*j>_9$fFSq7{T{NxH@Jgz@_S?4p{Ru@NLS*%Pz1&O>b=x zW=#AlXjbH}`Jo-j-Z{LeWniV;8gA6-n5S~3Hhfki@g$x4d-l~k0n-H?tV!vSmv8TG z#0?%~RIZKWMipvK7a)e$hTaG!v)#G*^KomQ9dnucX`|~VwAu7FuhiRnBPHMH4Z=$n zQp;jLjAn_6v#Mq|4W_Yph5TDvV5cdM)`8GEyYr?jn_T5`L*=r2^I$!3aJ#9B=Uyw} z_8jwgku;T?XO8C3Ly51SkRMkkkg8Ny^MC-@rgMF_k+- zY?`*pv6rlKV9x}DlP5jnGbPpLks=N7BeFo}wVDp7WM2T9lsAz*wEF3u9XHFWH(B_T z%$yjQDj_=BR*k0LWPallmh9A8{8^24)M%5VS`^O4c)yTQfAs z*?k*nQQfa_`V53Y+IHXJ_2LDQH#r%v0d`6^M@Xpqz=CPgq!i3(&lNp)iJzm0_O=^u zCx|A$=zvPk;APO)!x8!HkjH6k5We_AS>syBktw$G=hi&4l$M_}fC8rqZ@pHQEj9m) z(0_DgV9;sLWMk2Ro(xMGX=mfEW1QIG&6V4|uc$|+OBNRZB2U2x^MCN>coNfuauR8u zWOV!V@mbC>i@n&fHJW>PPGkVYYD`y?qC4LY10e9sRtp)(*X` z`q}qL^k9eU|C_Wy7R9UIp`3PZ*S6R--Cal>e~53#5o>8vOxDtbT0Q52-w^ z+HXW?)OY`MOQhNK?NRGVJVvFCou*`f@Oc(Xlb4fzC{}qVQtDBhIAcnB_pfYKtVPR8 zVa5v&3i@}~H0y(b`Mt|@O$>`0Ro-3nI((Zz~kBcsEN4!5OEvCWp6W?+g zJQ5LA>UPpl(hFm$d19sObrrnjUllT<4 zgCPch0_YVq!8;*UcNzW8H{A3rrpzD0(An`k%(vgo9OL%pLfGZ~vrn zHjTV~vLBPi``n0nE+)VOXWuNhsm`zHoQZmt>!I$!6SGx;e`joY*j*>{9Ks`-3#1Rc(% z5w1ooStMpIK(8w-I?z;vu^}Zj#oTAy+iu$pun7JlLV6k~bKVhVC2eP?ezJDg@isK#5&h$nIuxEe=|eY`PS0pOS8~L|DXNa|iLqg0Vn-d( z7#~pPAo0cQe1uBu{Fca;2?f57t=NMtZs|RyDn*-KGIXDv*PG&EdJZ8l@|dpCC6CBh zS7vkTQ4G`1X3v!{bGF4_voSzIZEE4G*1Sy*ef<+kx7#<%IEFs=+z;IU$<7|rXQ}<2 z{px6A-s7X%v5;1a+6rzJv-F32nS4#&4(kJhn~(t+Fh2E6r@4yI^liB@%|3}{G?N=D zn$P)^d+rCkWf==5k^qUUzd4|YB#4CP_Asidm>O&Ln&?HiS!?I1cm0~2eBzy;;JCwb zQ@K$~1ss3`CK8{z7NDoZBoeWUL^u6%i=K(}>Ad*6cUhuY|{gf~38@$oAzyLc3T%i@pWM7ER&yPb6>=xw59 zA^ypTW~MSNeQnGbag0~sD{Un0mQh}7W>^zD4i=`PGCDr6O3AB z%?50vtl)=qNa?>MknC-6fH1OuyYLN9k@NT5-0@GZows@SDc^9ai%%IG4b_9?Jr6po zWGKs{GN2W6GizNzERGW8Y>wFn+>zZ5ORdf@+HUJ zZ0-2jK=l!=6<{ArwM^G9nuTG{P#q#Iwm2uy3Rnh?Q6ozNDQ z{-n|(q{X{RV_|(O5=GhOR8?ly#({N8+Bas;%dZg2AXXLbfJ?r8OEl32J2QRYkgVvA z8F0d~7DwL9+A{b#xL6&BHci32ti&Fwhw`!CQqFEDvt;}Oa&tnQM7fw!RcKzBpEa^( zd(>DR&g@)B*RWsY|CS}T?Ixuj=ELaGMtG#jqhi%RAXqlUD|Pu)<`zu=X{bYI%|W-= zPr_;pCCw}AfD7y#ROX01=Vt6uxfek(y`l$=L3&^*+9jd%-NKEEBITn$RX;)NlUUs? zKP#PRcrM@S7m`xQ_T?NG&bQ;;Jxqms4(Uo^d=pvtPepq%qjq4;6f$LO^wPN9>;0%b{!>pv0(CAGwh-k(cBpg|Nk5gI}3Jgl)th@T)Tk=0p8oOBUE5^!K>YB%GJiT2B87r-{ zf=1QI0s!TB5uEy^@W_)~=sgw6jBW4Cmh!X#q_rB07j1Or_K!`?T7bC)`zCwQeK6-n zLj{(s$sNtev|@Ay#vin8ZFCa=#HKlf z?sqXG6!kCC9??06gYaidG~0oTtgb!q1)uMO_S1aF9=%mXejsjNFPTpP+m$%YDF5re zs(3rWy77wI*r{6ys0yx^V(W8noHM3dGOf>(a_$Q7v;s~JUuim_)V}t(?;meo>w@W5 znLL?U{!LMly*f~DkMzQxbiyxNFTNS=O?kfdk&78KP!THO^V5I;@0c&oY(X)5#r5Sz z@f_{@kL10%+_n3gEG$e~<@p)9D?{KsifEO)SA%uWb|`KUztKaAf)3hqJs~8S=6Y8M zJy+lpFKPG#D$LOHMbf}b459p+owbwJ^h{|Nwlr$PH|f{0HEz)GSe{vbugy6|{3q<{ zN$(i18}-%Kj%ABW`hP&~dZ#b_7{WIXO{4uKhO2UChOD)&3pxh6OQ%0cJ4-{?D@lJp zETl)>YAuc#H5g9#10vtOs(L!2a{jI2)z6f+G;bJD`g1KR;WCe`zJG~zez#MeW!QnO8^B&##r~Ji8$6jF0*=zKBnwQ_CDZ! zGJh@coQG4a-9)3khJ}&1YYDVXt{$ibRpBIR8PD2DaagPsitrJ z0ntg_T0HL(At8`r#yDf*bDDbvql2Uo6JRHfpCSna-8z2y_d` zN#;d#$a4=9j#vj;<42S?NvrehHDT8=hnBDXARDtnsw2XNVuyg~9W(RG;G4{z<6`Lb zzc}WebU3|2=$z_Wqx?0+@Jg`>ywc*ywvEu2BbqRY&?rxbpD)v@hgE(zAKtrtCV;GL zfGiAL^JLd#GX3QGTX@S~^h(mIvt)f*eWp7HT2sL}UH0F}MF%})r}%7>h60BFm2w(+ z-(DKWp$DqMp92?E)h7E}fK!dapOs37w8N4wtmOBa(3Jzc6!m@hy6xCDKn0?(#8xksWCmI;8sFyv zxA86lbQ8bZ16#(QGSc%lHdZDK6>KwS!{{iMzv^{A_;*t&|ADRafAr%D0KZQh#t9(P zWu7us{|#4Pm@H>Hje3`Wsd~%cI6z>9qN&CtkYT ze24`H)2w1+h>jZm=fthMryUd-s%I2#v zezc>yG@@1;JMcU#{QfidiVqs1^;Ea&&uW_)_WE96(Jero@GKj16MN=O&GVqtI`QgZP)%bj0c`y#u8b-X8XJBG% z%Gq13%|NV>%X-S#zvK+Rr$b~0TF?CfQmr~*FWqN>KkM``5k-aT+?g!qlf9vk-e%vi z`nE%*`55GLhUP9AW{@L%=K~lPs2->*e ziRe@p0B`F2?!mAO-1k(~oL8L}-fhz8T&MP$oo{!zFlnrA>GXq^_xe!Hjc!0q&T=D% zO-ai{zQEUWbhn6YT75*je-Z7FX7Si=K)bNg>#$4gPy|!Q7Lo6n3JXyi8SQd%oGD$; zE|dhzj%IEde~CoD?4&OA*CdfGSv1@~<%S8_Q`)_2vfC~4bHn&)_ZZ-<@~u;BgAHv$ z$=T^3pE=00K}zhyP0~ZCW&XDSazqPXf78^0t4hBfgY7s`3p_W@&2f6Tk^V{1Kd;!= zCma6Tq?6S27xL5Yw3>80dRkpZ9B>FamB|e~hSLaDo>V6M$L9Klzy!@F6`fcq>!RS_qBlbxDaf;t2qN^8Q4<$q1{|!hD7p;~18C^cwQFq;HY8&* zmlWUVg_K{Qwn229sq*NWB$bPM`mn#WwOxz~chS;-_&pl?1EOhLZxF)Hx0?tCo~cf7 z==Zl#?ivLwJGuujM$<5M2z7V^CovKQYp^C^*YmynlWz7o{=AYc2fD->Vb9IIi(%L^ z*8D&cn816;20k0JwTRoUPZYlkIe7SHUf_*@qIY1Q6Z}Tg-5JlhRR&DRcGZ(w!48dA z$#vzl9Zt>Uc~WWUi`hZZY#Vb92QtA4&sV>{?GpPjq<%c+dVpS5!sd~b&biMw;D>7H zQPZ2KWrcC*RtTvVvXc2}_)3FMj*#nP31DT^`U&oD`iVQwIhmsx+bNE3h_dg!?W{5X zB|zE{GSuVNqN_HrFXIrF67BLSZW_3-!^ybIF+e#pIb>lE{NrXw6~MYJ#de9^XI_kX;nYZbM|mHj;9Sf${#uj~zW8iM<*eUm`)$avxP>YS=0#uPj;`2(SM_ z0i3F4Deof}Yca+p!OCoEaD8|B8D!q$K&A-LV{B=546x$MG1CKAr)6{3*DQ{fk{*Ai zNn#NDO~LNM5}^5<4UbH9ot127lN^)jA@6}Z%X)a_aNJ)5bWzO!_DyDj#BHh7F=eC| zU*wwndM0i*Wc(_PeB<+s_X0YTD2Z#vD&|VJjd!fhZCp^k9k5lgOgBK$qy#rOr7}Oo z`6lGP&n!2Or(?;2HUCmA)Lh-G?x{Xa&oRAF8M?DvJ9;GPsV{7KpVNw_TL_VA;i>0G zwggz1Yi&+FBj^WO@zopOi&ps#Nl;{XU68Y{#K?_!eqahkbBUOZ#ykE22~Bo!CTrZv zc$@8@@IC!_Z*Aht5a1cbs&k)zP$GrW;a)J_^+r#LI#c@73P%i8C_qm^u=b4;^d_WS zWG*uc;~4$94S2oGY+A*iZJ+b5hTrxBwfmmqNaECqQ4h`{c)R_JZXlLz)(w^#;EN7; zwFXg{Wco|J=IZvE>ilByDh zynyLDP>!rdCRIa&|A0Cgr0Q8UqC_=HQZoJ8Ca*)7LxZ`m*2bwwYoBh7=uB23Lz`}3 zNm*P81#@3Y-dr`ls4XK3YE96VxMAIg3KNFk1{ecPL)=2GEY5AcT5(kD{Q?VYL9n(Y zQKrn$BafOSx3ME)<8AZu(q@oQZVt!+->Zw4Iw(44-Ud{3G3!=90^G z-bYVL?z@c4!v25)JiVHff_i=o`RC=RPY;F7;FT}&dy_dyZxeeei;3*eWn+v&@Y z!G9Y9ZDt9u1z%SzB>Dhz6;kSd&-E+CUL(m(Ri8To>x`T*|0$y{5RB17QbC*nl5=~Y z&LN=t5P-Oi29VD6T1baK@GM<&!>An;DKxebg-OpjVWa)%sarN}!rp~g)Q+x|nY`;S zvQG*uHu3a;RgD)tN#1!`7Q(3WS@$Cu#l@Qvqi>h9q9Rwy%0(78HJX&Pqon60PAdWa z_?hUEk+%ARu^D^+vwJm%`ufnXB3Po}>2QwW$Gz9}sb9M~G%NjF7HF7_i3$k)k;(|= zHy`flLg@Fts5ic3iJO4+vGHP?vKIN?7(YHzd8!g@r!{N+nY<68BG zEAT}kYL`Z-uF`VeWSIHEz}y6*^2493*32{cJ{$M7;I$E;PyBpy=oce*T*s=tD2vk| z*~WNzYV!jZg(qjM!f)FiBeyqI1w#&AL2FHb?ge3nrD-XKw`6bzzCzVj#xe{mKyHb@ zZB%A?{9tR(gcWx|{64OvqRdTHTT~{}87c>v^S_EWIMXc8Z1etBr{et$n4G(;IJV;e zlu&5GGONS~V;cC#e_YBqufsfSOndu5m%i2~VAxfr3(~ZuF_>#&!;3snoSK`?a3_Df z(!ZugtEW=4|2-jLq_NuHW^`AE(ny~FW1C=1)3s#1PTw?v^D9VLUG2y2DVv_eNvbQu zl5E}C93&77V2R`U^A=Jkxhk}r?8GkEc!*I)2eG;TJoQU3%hrj%HHFjpL{0`>_j{&uh zA+SXNZnpdLCrb{b`*r-x0uGmLo=Y$@{sy)^ynVh&SWFAdga=0j;wI3g=Aa)b^;EC~ z5t;-KyLL=XJ1` zU^gpnTr8qL$q#MDUvZ-&NSdZSyW!}0zbPIIZEy53LA%b=%o`f+q{Vy13%FEL_KUv0 zsU+Q4*wtNnBln>q+2d{fU+d)bQ5#U=ipa(R`HYNvHK(YvS z3@k8JrophhYskdtBcwwj!P;hL>8c#A_S4x~%>1LT!5y`G7k!_ONzrmFjdRpi)&7d* z=J4$qN(hmUrlEi)s7n;mM8y}_dEisuIL7rlOXy+=-9u-?<)+Dpd+k<k%AUoJT$$1=&QBcpiRG; zJ_?e2ipRMhP-C*f2UtU+ym9eQ3|Pglzk-nmqytspR(-V{9|5dG1QwhaKIqmy|M{h@ zHHQ!_-Z`tp-#_#0&isnkomZ+k!Wy<`3rrrNLOr`(&M0% zJN@Vv&JwQdr8_A{)S8&_Jg0Yq8{X#b9#n#>4NdklCwddvGRYi1OjvM{VXAv0k{kXB>s0rl;@Ol5fu9zy}HC1W{eZ|ePXX~WKOE12QjLrGzZB9;rwa+mQ zRU#R-N+%2f*T|-`FlUW6?lpI4&nV8+-O#Yz%7LzkoVF!`iE8a?HhE^Q_oR_kHBbBk zT7B2ogI>2yR;P6d^009`+s1YGgsTL)L_rKQK%aYe5et9*-O&V+_|RJjBLLphMYW-X zefh3ZzKa%Wb|*g&AqW8Wp|@W@Xl83cNfbjdo8S}lDSExqQ=Dr|#x$Qm!z%5#MKlFM zckXnq=P+f&2=;dbE3hT&1K^FIiPj?c$~?a3MUwi+_h801p}oFK&2&_^G1k@MW~O?? z7uZO^`E0|GbJowi<2}U*zLlGJ#r5PulfsZ&lupZ3n|DH(z^p;3cJy_ccSiEeUR7!Ja2ZGy52Jo^|wlD9;OJ_}Fu^jSa%;rJ536%4%%( z)MAA_5pPd!30dE8kCmh-zj6!CRXGpHe8Yr*A#YIvR8ZBYU+oV9@nINU(yix z)&T$;#+%r1^az;`B_OymyVH(QJlh-717847kl3w)zr7G z8w5c?Kxs-1QUs}rG-&~pE?s&PklqBONgx#I9Rw7SPz9t!dhcBnq&F#{cM@uVB)63_hCsqh!tPcfdw|rq-Ds(S^7EY zO0(Xwi*@hqz0z&fD~d$Q6(Q`gOkYhevNXB4HpO=X|A>XrV4VLTHjHIPuTL@GyYcpT zVQ9ajepR;SJZORYEYZURaM6oD2kb7Ub6N1|K?M|a_q{;B1lyenHLQZ$^?%m!JXHeU}1R4iE07G8WRGT%?i%-SZ-0FO-*LS=8Wt zrp`Uq)8^bVd2p6_ZYxmE&J{8KvL#=~_{K0L zcwXSlSEh4LQm9c)U6Igb4G{xW}!rS;$K9PIble`T^#-yo)S4>9o@ zRN%O^h(-d|%XI2o>*_b|+)|LfF&!%@CM6)m?4g?@ubf)aEHSyQ! zNpRJ%v^Xc8#a{g5QKk--iU2Y$;>VmGB-pD1golW^?VNyut&9x6H>sJo9mbqTN<$A{ zIIieuW6UB`#Rr!SAk6%M!cc`;5VN{1VRd3ROe-5-(?GdLuo4+W^~75268%S!_)FRg z_W)EV(l%G636=HEsIk(xhY{REtGQ!ZFtt06(%4NA3}@4UZDgK3#7A+{PB3|Mx@hAz z_`ZrexTy#9wMSh(_05EpjyZqCEk1zlt``DC>F6o{Ez!2KF#uKws1n>QMj6X{J3Q*a z@BR7>x_p#T1bf*Q8i6+xB*C>vJK+kWf%q%jVy50=RfZon%hk=b@8BJ}{StEd;ENFc zXV$|&wyLkOu!j4Pi$~ysfI;vs9OWQ$3isl=oYDcpR$iUO(-#m zT3qn)2bRu2uVdsa6WBY`$P>=@=z&mKk*<771LgKs7z%s2`6eK30d7|tEd^WT;ib3c z9ek5;V2ke5iXWf)mC)(zv>=%KIZ^ACo%BJkG7-|FHJ1~?jTFJSYP}WBm`vAcK)16Gj#)$ zOksZ#GA;V6Ny5*Ke9_wNHkKij^7qGqbmf;#Ps6r3VO+1soFd=qP5p;%EkFAKUL4WLbx5Z+o$zR&EN>B@e~%}x8;oY}7xtoo8Bxo;Jf zc1D4bpHrS$_n zes_0E-%%QI?k#=~;FFH!!>r1J)I4_Bg{rR8zq@%Ix-YFs-zP$ximK~m5Xd7gF6`hj zH4#0w=1t1Vj6IYt8R7nNQNvi+Uw!xpKi+%zO=ec#fBU5ERXJHVgj|Y!StVop@Mnhy zwt8}7&{n4Ou};9Z6tg|r0@YU;YZ;A?zhW|`{f>Tqj5+j{GKK`3-&7$K`EgW)r{;J8 zE@vk>?0KURjC3LCx_L*~8Cc?Dp!eUuy&k(b>u}zBI8*=jraMB7gYuikxmcVMW;i6-KbDl}~}8=c}KV zi%Q~Nl9DMgvrE3=4rzp_YNN9a7yEcvNw%S={NcC$Fng4b4$ponx zQWKNjl-A$D_qOUiQ#Qw;#)82i4FEeq9|CMsFYyx>5j(M4#H24%UNHv$@y8HPa;q-N zSX=`YJPX$Ar_@Kh3I?5w4_tfvn*j1^5KqxD=oQbK`(rs8@D4^|UT{=*HJX;GY(9)e+k}ae zUrGJzMCt0+@ocq^eu@lY^d!uXThk}Bt7jI3P-dxcWt-+&q75tM0@-865rCgEV_(XH z+Cambs(g`NGm( zG@Y|n)I*HLAH8L&j1gHcbEVYVNSV|!GEu<`&Dfv!J)UT>cayd9o(Y|VMC6LHh$vbcYN>ca%4B)f~$J_=- zOZy*38rQ2Py4EqXH&0(X2{)OpU>`3*ZulBC@A3R3i+wSCSMVNX4#Wq4<*Juw7jw6e z$v2_oVnvpmaB?*TH&XYIwdhK5pkQ_qUR@%(HedA(5cGJ4p~eS%27>ZoyvWw%B-+@d z#|bpY#9b6xPOC_=rK_#25hnp7(h=#lt`L8l`X*j~f8S%_9?CUr8R^82*hCU0GCg#1UiWOSQ*A2=!wT+*Aj zx)&g`5|8EzmXcf^d|!U@-du%MlV4SN;BZ5lq(!pJoEUZX_H$MM9ar2w4STwGAVL3* zSpbbcDHK1JYEd5mquvcf?18zQw$5&85{YRIxOg%Q(tOby3Dc)Dw)g3pyy=a|cRCFpG^~l=rtYVBth-(MSa<)LNQuI82lIr{$!$Box5xpW!@8o^OS(pS<9R{fx`-7IwFx=T4F3y_$WTI=!Uma*jGwvDJ1U$neUCB=x$aBW`kmIG+BzEd z#uny{?v!(KenNVDjM`QP3k0B3wCrJ|SYG6w2Dy2MofzHfJHdOh9JEp(NrvNSUikL7 zU&A@=u!3#8l0v~~WmLnbN zbGza${%X=SEZJR1F|gBqG8N&8KZ>^MT=#7j_OsV>H$&ufZow&}xbnB^BwPw|=KScR z9gLz<6Hgcc$iiRXs?8kqE=r-`yMfl6Do5C)!uKJ?=c_&^2#wtIiK5L0x~wNgEB~AaUFv(n z5DDsG<6{Y)HTlefVeZf0i~sfJ@(Xa(rjb%BW*EJt`8T!=$rEKaW>*=TfNOMh9$29z$vXG6;TrD+HFn6Jy&UGDW2O$OKNuvht}vWOBBovVlT1!X&g{aI@I*4@AkF>j&bA z3?Us^X(5ROEAm%KP*PNg+Pd_Q-AD>=`kRzFY!6^;ytDS@Mo9Z1)6tNIZR8Qu<(j{Q zUPgiw-YlNw06F#6^*HkNBbzm2Ij)QnMp`QjZ^K8)_e*E@Qn~=nce5b!ldNOLGb+w@ zMOkA-%kZ6!^7ZJplF`O=p2kBVzj*%6=krD$EfWiM3u{hIl4`&f*cv9lnrI0$?z_*j zgf=%T`eRo+F5|oJZqtN$k?2A)Ezv6TiFdN7P$x_R&ZNslme7qvoVY3P|;^e|<+uUKYvy@tEEFrq-yP=O6? zlqN`3W?quKy8wS>kh#-F+RTV}HYGG$lfUmV?6_dAS*$~9H@-K3P|U3^R;8u=Ty7cO zGu1Vi_4b!Vij);)a3E#WnVR%&*Z-P%Z@goWd$+r4d&d;2<0l`*%_)7K{>vPzDhw}B zU$~{b*<u`A0|PFp!I> z=@TdVH1BJ9Q*ER5J1z1@2JelH?_Sd+m4JK8zzZbiG{gwzbYb+{|tAXO@}M zj2Wym*Ick3$Eky~M}-Zm*Qf>}GUH9Yo`P3Q-f92m#drUz?(Z)JdH;>hfq#$xZO{^= zr_Ez-8z{J`K3>G2>VWK%6ShRGr)|3|;ZhwNX1@hLqy)~s(z?3+AB9u@JpXsa+W@A> z|K9Hv0s=dIVSPAHw>1Nvv;rV_zx_w>o(i(TDDBS&=w3`&o5J3Z#mT^4Gv_=V3o`~y>FX1` z3!!vZGj*~zo=D<%o{K1TN%*<^-iz5|ENbK{WFr@5&P`a8TYroe?!vt!UEEKG^p*)m zy?_MLf#oSNS|sWUF9aiZDfN;9vte=1MD~PFyuo_H1XT(z<2;{14(GrqI)LZ~I~{rt zTdT(cc`)Knv>$N@z*!yk0uBLi%*kQ#Xdbwkk=&}bn(Bk|{pz;aSs(0~G3aM*6;OEn z`j1mE0Bk*)w!0MwQV$x;7ST%!(Q3sq`Z(z1C8IHtR1D(@=rh)+LEA;;{LKmhcjs={Uq2>-Z^*K+wJz~xgWdQ7p;{y+(i1IT zzaR1hXhKq7%=^aEe4D+S_#5O`4^FpO)0RUz6;tdqq&}JgKg5TgPboO$I845`1ixRw zz~N(sTe3_cJ*7K#Qs;IJ=UT9Y9W%~|@I8Ka&f$YqmbTY{KpGtUtx;mFWyfXsxq4H7Q; ze*xYjXn+KqJv=mnuEZ~QyOs4&(D`2Sf-io+dyXS2x^dr7H52H~&3Y=b6?hx1AG)GW zMR^NjyJ@~P#o6ug*7s05T5OQ5Aq7zFdeh(3>^dK3~DU1u9HLp?Zn043J-vpIopeN?22) z;~*_Ha4w4Vec?eYy;G5WHrrf$;fp1Ybi3dd>m2r{*`s>Mv=Fsjo zmR4%a6O%^NMr6%!rK70L(~*F*ZS{h2@$cWZ_$*Rv@tavLDTn2pZ1o5F>Y*t<{> zGVXOJ??fdI!z4Y!Vk14qy!%`6PNo0q=Kn8IHUBa=f2Z#+a*SXI=(`L@OuCl4e;$I( z;B$lSW0V7zmwtn)+rBk!uSwm5jNFr{v_7Vt_}^~vcGZucAD|JLkU}oMQ67Cs$a8Qz zD@zOz#~YSOa1)X@OXk-ptK#U}ii=FT7%aEeDf3{F<PkEuX{Js?004lUTmZ%_osWZbrr>W$TATxtz=`GM(V$fT8>FrUe4~SNQpjIk?QmyP zXab*u{poXge|c42`R6Nu$m7pf{Q1>CulVO!{mXp$=PaTA=lVeY%NYDuSO&NMX|Mjp z`uy2mSzw;-!8V%Ve==liFeG)=fiS!fqZ<*%Cpjp%-i9;-bO?SVkU8X)s0NloItwhw z9jw1J1sMJYsV?RQJ)DvqHOQyGr~>ck>#B|AUDl4^c_@!cj#8>Xb$tPGw2hjYR zaQiX$%{01!}X4#jgqTOCLxV#Avpwbu&cpO*Bmer9M;a2icgzGKLzAc7@x-}kI z({SSdLg|A`QA53fh5G)p=zxicRwWvOKCCgoVkumqyd<}PZ9SRV65IvqwK+6_kf*#3 zcA?7|Xzn{`me7sopKMmSp-430RxU6p`k4D(Qs#a3V)|aE6UtLyh9-060)!QlHr)N> zAeo5VRwc~NrCQe#*$k9@X*#!Ma^()tZast1N>M(H$SsA0!_E<%nHh{)FGe>lFI2pm z5;Nx;&$E5pQ@!ULx|z7EeXd2aJ5PIlcj;i(WVtR(&^mcXFTnNt10fR&xY(u&E@b= zRAHA(`3`-SgJF+@<6Dv}Q#%m6F|S0Fg2YOhynB?v@M>(q>~eX)c=vNAt%D^FUS)KKe5KIJ|Bw>_T%_ITm{X2>o@Acx9RhIm| z1@YctV&>6qO7)lOh!~OkjE{T1?|Hz_x-X}Wvngj9&;3v9$7`4$A)TqW>U52jFY>Pq zlT|xE@4Mf@BiAb{ep;TlMACDHsY3d#kFUg&+o>g4H7VCC8PLb;=_co(+GDexpRyXP zBVuI)uuACOOkoI}lOW8Fsne2Lfz;~hjqJ5sngo%(_Jnm9f@R3f1*j$JN6kX9kCc$9 zxczFDqCSgSmYlfA#f(K5193|U#@e8Es+u>{0cP8SOqH+Dk<)qA9@@d4WC05tw4lNI zulxqxa`HA8oPcYGPyXPDG8Rv9qJN-at-zIF47#+9#)d+w6MEYIZwdD@hvqR03hluDs&F8$C5b{pcP{&KsH0o+9#O~!pau7;kSN`*Hx#M0m! z&l@@&P3>q9?s{$;6C1!a9`5Ej^Oo3edz}RSRK{6ujK@&i3fdcTvTj+=`gS%th z)QfEJEJl<7ZN+r@`P8Blp)B|-?K${T3pVfyeS38HH%J+Y7=ec_(h48MnA76tYl3Fx z=TuhBO{kk*<~*^{)QP2do+ueVasK1qT$4>%0<$_e=gJtyg)RODJ$+(fIn@WR@WH11 zu)}vA09EpL5nnffNcB)N24Kp+hP=(VvMsiC+9JPOX=GI5;$Wo2*Y~NSTyec)8$>Tm zu=Cnr6&ww*UyRA(YV?b55=bZD$Y+3ENMs)`v^Be7s53f8@#0w1Q4S*)s6kQx^Nrh& z7!w0?U6v9YLAVcyScr?oIn^=c<#jQ9l;bq!)}?A;H{%Hk9-AA|XXN!8q>#Z^YVedo z*NM9FnFsN!Un?Y_75r@5A?GIel*$c9HBNA3uv_*U#95*QSVu9~g~rLnfJF+dvJRV8 z2FBL|gySvAscp&_s6nnTSJzqAJ@gH{LTa+B=fwLg+MDia)xnTo7_Z#x@& zXaRJTa@a%+QHCwn^hF8JVP3aIae7IMDw4Wc$LTCHq3BaVMl?Q*PtFEeC^uUQCTdPf z#cFj*PtUGZDzM%Bw%Nt=O^N8H2RX@Jm4zG>_-@414(MZ$3w?}gS&Xh(mrdQ*L8+%G4Q;M10TVOzV4<|Dkfz`5M#(9JTCRZ z1>Zj#8JxwpPr5~_h>@Q1NPaZ*Fw36s;9wfyYkC8@5KBNJ>o{w+wGNRCu@*e%oK44t zZvbg9f5#(89Hd4m(z*GGKhh$CpEjS4O1gP!H-;}+HNxp!u0uhAHAvZNUhYZ|M0OL= zcdeBT>p7KlP+GHi)3oo_`NE`00>5TkY8d#537!^L&??|-yer*XI@?pv{=IITB!fZ% ziAmBNsn`a_tX|s`+y#p?7VKUmox|ik{S~>|BPDo-{2(jHc&npVj-;LyJ=)5QDY#UI zK1bDcOT)SvrT3b>^>4e~;F&~MkUos902bJVv;uVOQW*LSZPz_axdX^ja>e_V?vHZE zx|hDVFWYzb&5?!L-@yU;J9x2163E^UVyz=U^(pCODdcADyi~%0M8Xb^`-&TBx$uTe zC480lb15ac!2Es6vkwV730o$||4|3If1U^0r~f4x|Bv^Y;P=$u6ldHhbCLSNvleh0 zl0_poL{}UDaY2TiHr7X5*>jmAuE^6 z6g-)ibSLs&9}ckOsNT46WvMx11_jdkCOmCGO+|`8C>CyxZKdlP_bbW&Y>-!~=v4?z zfl{DJux#)uFqR{m*hOejr(QvtekDyO5A95f#bDA)jGY#5V%16wA0*En={Ba_W@dQA z>*Ch^V(J%+(wS?#&3aFMbeb-4#?dv~OU87xm1N2V6zF4S_G{aq%EIEAO?_ihL7Ks8 zRl~3E)B7qMQ(Ig?K#xYi>85oE1@bPl%K^-m%*%%^O;>H-o_Tfoh`KQEhU96#}pBjow$o(T zG1h+6X2UHa*Vt+^=I_MPyuZ#C=fm=3C8eWJ=Z)a>S&spOLAOc5jYsgxowS5Qy99|} ze$mq>GwYOnvrXihr=p|PChk6^ShxG<(p~0ti=PSJ()~9VS*({4J_JKQ-D=hYyH}f^^lO<3Zp6_)eK! z{QCScW-Tf-uWay2X(4N9RguVrd+|11qv9v8FN=_L7aOu+-ZvbMMhQz-p?SYSYp%UXBh3aejpQSBh*+HQAF3&atodsOQ1)?B2+N*F?&r8@vilTV3#5G zWjdgDC6GqWHgqrbuKni^c5qfUiOzWM*tEw>ON5Eah-`@S=DqsyW_E<1juQFWyM8)3 zfQqQZ(@sTbC70#zZxDoKz*xAUow}!37*Y%wge#0i#$8_=VSmI&YYBc}pAL0($r~DDZlxSn9-2wi*NzU>-T`W+ zXHS1eS z@6^2JYtk<9^vJ=x&NwU2iFRWC`v_n1?GI0OuXfB9G1?P?Ryba!Sg;08E?oK9%)X2h z)JNg+y&pK~9}0i%%wa#uc|kv+MAl&D0U~6VRzZwX-T+C7s3hHru|=n+1+ugEYNS`!JyFMax<93aYz;*yQF z={~$_Jm8kUPyxKV+}YMhTt(aJI}_NnF%ZWnsLO(>1BX=+=#QP-OAo{-6aB#3FA>wi z;di1Q=H|!*kzxaIS=YeySbq5VISfb58Q7&ovXm0Aa)PLb*(J>Mb+z)NRs+S+yB=mH z3so_atOr!@6lZO`<;#5@+LcgIVdZQ=sZhWsDsHLV<#@+N(s4BbvAdFr5`VOFK;D&+aVujERsLv5T(yQ9W zU3`dFTJ>qqk{?88-*91N%6gLly7+j?ZY$vyqYL5A_#V8k@BACYbG~v4>|)8tWx)bC zKB>z>9^VZ+q(uARyea`Eo-HD)&5`n4fQ9YC2ngPogb7aEcFh3%>j*gf+5|{qL^|zC zV2(}&fbZ}Fig2cZH;URi zHmb$Uek-_DEB{t=#cV!qzL43|LIrAtQOkp>qT=@6>U(dMDbf3r<>oMRn#-pXN`e^; zJsuAUU43OyDO%xB1m%d)2#bJ!Hvgr-bH-xr_a*SH48bXTO#tOGzjs#`14YgFEIzvq2npc^_*tQsd8TKBO*jf47`HOYAnEXtnbl7WQEx zgX(`B3r|!~?xmt8{dBHX$GK!Fl4d_~RpD3y$&45-*5(X{G|RXrVBXh(PkV|8*V|@H z@M`)A0R6xf7`;zB)4xG37Bs&;E>dCyxOlI0U&MZskbHC}mkal^61J$f0{^!Gwcn+K z;V`(H{I-sbjC$|8uO;66G>27`?aFPRK(7GZMX@ev!DYdC+m!B`F>$;KZ%qfUL-;DI zu0}EWJPEgw-0N7%z{yNVu_FVVlwg$GZ}dtpJaZEIA95?0TCah~^SyaPBw`IZU={iZ z2bX2HTbs#WO=5PGZ6m?Ubo(9cXShxAW0KJ}g(5<<#qw~70dk|8*}5~aO1h>rr)(zP zu!ARxePD%y7osQiH+VsS+$HU_`t49JoXU7)xffB?_04D6SAgMa?Eyir zE+3pToFFa$5+a~-Qf5)QST&4IN+5$x0Yzn-Q;31#J9$dWdmdtUY{muTSl+5mT-02O z%`5nzOu7Vbkqk!Lzb70?_M-_sw+)gyI~@RKw4MzR!A3%kpPis1cGvy@HaY?Etpy9* zEzW>|OMAx$Pf%w{ql3ZHlV2*oWEI>E*XpLUX4}}YV8stv92z$Z0z!GfWg}EtB=z{& zEROp0ZU&OK+WjH36Nq^wMZZtEIsxN~!2SD9W#GR9g4~G6K+*aJG!ff|+Q?m_)v(sx zDoHC2_jz$!_ST;iav~FfGn3R$&|uZ^YclGSDGc`kL8SWyzaXH&^T~}}-LjckOiXtc z_(RAqN4lSyHzD2pAG(49+_}Y%iB7XAn@-@w7?W-WzAWa6>L!Qdt($$T=Z>_B4@h5- zzXhJMq``_$H{u$c0CQlBN;~3s6VPG!p}{t+J(+D9&(0_iuyZ=w#saesFds9){8;v% zyGKc7s^pe57Et}L9Z(m!U;tf0PKAJhGb@HoyTWi>wTK^k{u<-2;~am3)FxK!5iWMg zk0*Msd08=@AV%7z*M0c|skhI>g!r_&N=+r35^42Z$_6;HK!nMT2i-B0Gar|sK;@t* z`rap};ytp{9ku?I_hFk%%>7HR&)+4A6KRp8U>R}6&5w{^(yavww0@FEqrOEATTo)uw2(EQdVe2y~z4UAM-KqD);xL(aeO|~) zdS)6xmIbG)jG`D;I%U}ET6DxX43et3m?J*WCq9&EdTuZf_z0yG)l5((8oXR0TGd3# zp`AF$@iLK_Ugbu9h>&Cm#Wl7dh+*qZ?A;2?6U(BlS?@xb9Eu#CXHUPCRHUkAy5Ld0 zfvHGy8C)^~>afCZ(WC8~A*nFF&)`mGoPNKF$(1S=_lgGsOp9gFCdABk*7eB^r~2cf zysROW>vqaB(akmqoN}|eV;ryB!B#)Y?Q0LKA8pf(C`a5t@DP*y4DsM5?8lQpS>Z_H zq+$op?U*l&mtmO_-BxooqAOS7BKx%ZlNaC4Z={Lc`o)$>7X?g_>pFfV&;KD}R0FYP zgP!V#7p<_nK*ps!xgJmoQu&Y_bNR*ONF4u;U}Do@ZA*V=J0JfNzGGZtJ_@-vc?VV> z-IiGL4;`fcLpTr5X}uwtn6nC2%-W^X8OVqCXM4_uo=Eq+U?VZBm?&Q7;OYnXlShPb3+zz&eIputjL^j3KL#=RCPWzRgQ zhK0#Fo0?ck*Xh(Z<|`cq$)VRU>PX(>31JIL**f>y)ij^vr928bxkvRnlC+?oBr88o z!uCG^;a04mn>&kGAM5O3JuxV>Uz|PkM_?&_FsVo4VN?DZm> z5_Ld{YPiIHo_onQo~f#-xiR2KosDX!ui8;kvvt3`?<#)>e{~a!m7={t<*LT0H(5&)_5Q&oROlmMu zfP!yU!KE;kc6`I>gOqrpBon-9Sc(_JrhGGyl$-F#WLc@_fD!tCZH&;C8&(>*V=s`huYT`*^~rYGBY*>$&v3jA!pxl(VFZhPTdzs^ zAx%6ziwzZ47fcj|j%4#I8wt*`q!N|%b^GQ)Sgs|(Nb~y`I|R8;*P}?gS?dgXIr{?5 zedQ1Ipu-1pBssK~PUhCjZK04GP#2G>R(2Gw`b>wZ8GR69AXAE}d;~s=%BCCwM_N=D zc56mv0FFn?<-L!E!@*MC%G+Zgx!D8uhPk8>?~R=P&rlHNNikFvZPRk{U7aqtKsCi| zXz;LKEBxkNDLU9PetR|R73HERxJt9bOa|#{javS`6MKDMfKx+SJD&&OWzLogUy}2T z@rDHC5y@<89ud+YC zSel??&m!gY2*{+iMW^{T;}}gxZNz`o`_2*Fd~$`}kjaCzGHYpSpVJ-Bi-9e*F+HvD ztx~ETuf0-qc#l)|O<2;aZJAi$S+=^(-Ci!uaHvvq5#SAJ3LO8c{1Uu()!~_cYe|A5 zs)eu%FNCRC0aq9Ty{8bx$~IYkDtF}|!j)~3+OT$lOj4X^%pbxbH27C2wA)jsrrbQ# zksnqCVTRgxixokV-TI8NepWZ;a#?!qwmb8DW5t=3I>s!5>oYb zrqN3Q8mf>L^N)pFr^f1Gt?8dTYfsaXN*{EGlK#P=X9>EYMM8j%>GWw816CTwTAiSL zf3Hh6Ke$oZ%vNDw$(5T}lWA(@rH&&5OHm59zsa7=t9A+GO4_|)o;S3X{cLSMB|o5M zD1Rg(zC3V#Ie>U!doz~)XR}b<7%8TLCh+A|g zhu-^rbssQO{XqBFX1V16D;NzXDL?H%-}>Q)DaU@A-E(n5$GvILk~MS5iIWQMV=LJf zb=h|rrg%S^)qu|qg%&*=-cK-PJ`!hL5Rh;7>?eP@s^&|YK=pdx_FH|KyIS=Gp0hzR z^0>(Iyt(B|Hd*NOx4Z^$+YJ~6L;__o8;Vzph-^)RT_}+Ue!|!xhl8>7Ui@ca)g~@N z%f8rR7TS)N{Vs(my|N-|<``|%O7}(n>|`7AQf+4^C|70eS(CF?NlMnMY=T{G!i|IJ zCcMfz5bas|072KyscEkvU%O5jdX&q%44<&$&DV1d`H!StmH!-)HS1d>B|(}2Fa>IO z&j@9rJ6TTBjUb&986WAc*!D)>=+}Sv_LJ;dZ8>fGDz|PdG5fwFHz8UVIov}@2j!nw z)JJu15^YoY`*rx<)9zo$F7DrL4-tCxN?95X6n?NTlXTAuUXnc*on&x&;K09|^iK78 zNUODE)6HxBpx{iZ>xAHZb1n=RRnmJ~GoT#fJ-?sNnPpEl*HnS$vNsX~d;hT9f>*h9 zxCkI!b5OrLmFwHNMEFGT^3- zO71hE2u2eU>5iT^S3!Y{hKs)Nc9wf;O^R^k$=Ah=5AJigHq+Vw@`9vEArusHh#V2f z?B#M+PG0MW8xmfzhj$g5LhciFu`{MaW1|-?H6#fva`1c@_a!%GZWg`~mzvvpvz3i| zsRko9@|6U;49$~}8ULkNPro))Le-5`cC&d}@cC^n^t~X6pp+oef*LE2zVSvh)J-6# z$o#R7l?qwPks5F2o|cAD!nOwQDHpOXxHOOn^GE}g8go6c|LqUahwI-rUMD1YOHq6S z?U|S}qYA^D*268B*_Ky*>3mc%1}lhNr-@pj?7vdAhsuy=5t!mEJ_aT(3WhxiH4 zDu|}o@er^xIrX+azyzc9O%muZaSQu%YR_Zek8`$sP5t_%*X4`6w zuLZsdNZI6aAjW58n0*;EDJZYiM%WA6xO2_WJ*Y@$3TL(E&)P^?sH(Pj3nzz47I&1{ z2QpVxwbpX<2s?z!e|w%ZoWm9*wQ~9nqvwO>?Wp**>F(D!8M!B2ZCyH2f5kB3@+nW{ zMQYz0Y~83xfM=)isziCJRHJuc;ov*>)Yo21C4`u8od=3?cM$WcN1C^xEEuPil2HFY zS+}j$lg`^Aw(afn9DK3U%wp8CV2~5IW~xWp;Ax6-6d&Kc)EjPoE;KPR*E;SuD(YEp z-;U3tJyOSI<=6q}go6Ty&M(EP1mT_X1AW8{_O3})vb0L(?I57X48(dz zSrC8ANmPvewHQOocMWO1J(ux>{;5tdRUq?D8@ZIGClcpKcdMkz_}7cw`UnVb&ZciK z63Z28QL(^)f=5U(zqqgqJI1`8Q{j9htQC7dDWuqXUp^S4_rqF#1p4IVg=>p7agHO- zsy^iDm& z!mnfU(30kuvgL=%*!OSgQ1TT0c|^5B9!61CYAmhoAc!hJC9u^D@$vAB=xWuq z^b){99eij`Ry^PS2EYTQxei_RELbc7@M%Dt_ zoz&Xs_$I>H!LrD_ns8Y?{4k5jch*Cqw{Z=-4Vwy3OY|6C6+nFG#>?JN}iFmWNNQm@!!JQ{Xe*-jR~L}Q=ao>0Sg5&jQd=3Cbk<0TONhr-tI$= z?`#roJN{LT5X2+Q42k3>1aiqXByPp{*91+-7)lgBK{^+goT^HH_$XdF27I0l#F4OHu8i$$8D7YgeU0JmihUL9?+@ORWj6cw zCgE>5rbP(y3ty@r4%XHDon8y6@x{{K0=if|Tmng?SkPj2PQQ%`?=%2#3BeX*Hblr^ zFWuQHDsn%Gd*NGvQR1bZ<{WR|RGR**Mb{4*T?PxTIYe#d?Ks0yZrF@KY4@8iQ-f_p zfi#+Er=C9LWZRlC=e$dW-F~4tv8%T}%Mtv_y1oR`pm-oF9Y{H+w9NLSaYJ#GBL?C{ z_1YQMY-h3)rG_`b1(Y7sTB3Y&5FUW^I;ns`b{#1l5+e2 zm=tkr#8+)7AUueAtKiL*tlLaM*1P+>{7o2Xz%|78-=LfJ8SuBwPXe+XA&o7g`z$2f z8e+ZEzY0S`V~dYFvMJ|zFjO0-Q>`J(h7jbfsOCDG;W?8`CfG2;g~1Xyen0Ro^wF4C zoC0=Ue!3=gE+u)iE4Ud@9N5$wZV)*q0pz9nmTiIHJ7+{N05L>sjrS8gYT?KDD9DWbxrWwCxeRIFTFdg8-izv9}s(Z_7PkvVsDPN z(wc5%6o!A!Q0ILvq-dQPZf^q+_1bTggedp-cMPloir^O4Wdwa;%j+?sIr+V(>1j4P z`$O{dxdPX9?z|8ZrI>7&VhT2g`}^A!8mq&Ko~2DrZoW)2GUQ?79sqjUfxPau0X1YV zU2Io0F*S4_nZhh5`1aY7Bp=2>*Thd4DJUCG5XQL5K-LA5+K;Fejdy)CXtEt#;X(QG z*IxH&T07PgNli!4IC^R?ynO!j-RY{krwFksTy--v zC45|1oyXqu+f%Nnimir%M7u>dq$$bnIZ6i{W8AYgR3()*EAWnb`0y)7hRUN_5$nV} zQZ-QuV$e>~NkS}er?7~+1(y1RjuFkz+h|4O1;tOR@kTUEe7W-N#8Niy@y*AqEODsx zmA1s+aN*2{q!VL^_T4P*P zccm|ojRbqYCv4h9(nn+t?R<&}y0&*OU5;p-2w4EjE=B|n4*w1q*_OC2u?BTr(=?DRhD zBM#OaY?2!I_Wdb;60|ds;_hI4E;)VKhP%8EF&6-MZ?#7Fy;wL&iNF%hPh_J;yV(;p zunEehQ0s)$rZ%6&xU9sH?f#S@nRRAr?b;rJaJ)%eO?YDAve`J4zRb@vWisNbv)_CA zkWZ-BqqOjZ=r!vh<^O2!JENNHwsnJmK|~@bAT>x4l_tFtKxrbPAku`WG?6YMEhH4_ z9Yhq7BBBUFq$@4-F1`2AI|(I3NaB6J)5h3m+;Q(cdw=JSz0c+sKf=hHcda?soNGSw zc_uiO^nu1w2u__Ga&%s=`XW8@eLG^wk7`tZ3wyl z24%Lw&meoDJ7&tp!0f1uBYnp$IM9+q(G>=|9B%_eNA=hMl@g%NA4iXn>cf;Hj64^G z6Tj78I3vBG_|+jH%}XGJyX`09E*d#L&7Zg!7OJdb?+OyQQsZ;!1$P9~n;^0zvTPQ{ zh4A;mpPm!@@S2x386L3#CKzN-Mu|(vJe09@9>x8u#N6sN z>x;WpH`tjX*#&GHpVe*BYf}rP1%UYbU_jeovl|ANK=f@{qi*z%SqyCbzRU3tndYzQ zpN*TJOL>XBysH1j+iv#l#k%ihZ)nq*eO>S9)Ngd$%y*EE#Mt>8CVp({I7c7Yv zFjSgFbgtC#3{o8y0Q5ZRjME5Cp~$&!mYRukml3byn_%8L-LnM)JjuJ;0e~2sV-GvA zRR@<(e5|B7BzB+rT9ufQmw&C{@@j%*#immSFhSJr`Nd>(fewEbO)8Od)}EBSeG1qE z4e@m~Xz@P&H>lZQJaHY1iQN}J*^F)59SpvN+n1E7Es1@Ts{QPW&CBcBG?^kh>)M>@@JO%!pcZoInXPXZt>y}69$ZF-VW zfB)`-7wJ0-(BiS{ak7Sr>E8Fi`Ovy^&EBXD06rDi1t{YG)(nf?j!KuU1nTTy6Ffsd zTi~ZB&gau!mWL;YLvFZV#&ylT4i``f#>e#DNOn$}eJUznh~j_4~|$$U+Nk9Rv`Fm^<6>_u2& zzgIiY_lXb=nrmBaIZM@AzTRu+7lSVG0+zv*=n;S^g=~yvdh0bB5+~~PL~CjS;pJvJ zo0oUSf+G0s1Y7#GW}Bj)(bdyX&ndYQiBaTzisw-uJcuxZ$Fm)+lhzLaDa1JnpLY@I z^lX7ebJ4`)lD{tBR#qxKgcrQ~Bs*?BaQ1p)+od zNvrLSuWdVjE;y2?e4^=UbW+_3%*|RZN;{IS6t`GSfdyIWnf!>>y(-dwor^XUQO1-k zVv@}&aITk1jmHs-nJ}@`D}}RpBZnRD^D3=w;B~UAXp%pkp=>Q4j!f||;a>#Kzo(yR zLUL4ER0#*TX){Et?kJ`kr+kN5z2qRZkh5y(G;!HRrGuqcY%(;4*k4~1msGzBy5%%V z6-@m~#f?ry(${qi+%jS=7q@_WUVP3P-LPma6!IhRR^1A|K?S_B;8}fVP2~*Nty%{EA zy2InXueL{unpE_=KTD-fkT&lz{HSXe3cB`;s(%$z{yApu`~6{QZpkGEQoD;?VA~$U z_T@jOc@!gANH(~X=BQJGecT1;%~vu%qlwHd5Y9j@Mf;e>3Ep#&I8Cq1f2LBo4>ZS5;8AlTz`?-&$HBl6rLBDDm#EMy$eVvZ7yw!o$}w(VS-`xAa#I8+rX!^-8EGAe)nVWV3uKn1K&rY4)Rn{aWnPwh1iPG!>lsYb> z&yuqcexA4^Y%(S1Q`@6K*C{7w8!#=;Lp?&XN;6Rdl@!fL{+4uzet>On#)?}6xMQ!$_&#v!h%3AU6s%xmeoLa7$FF)fY}Mc%wls9*h1Kc$|t1txebh6Bm^fOH8T`~>M!`K>Z;Q`YEPqqJt^Cc?dj zuF*Q^nwwPMe#VCD{Q6J5TSLlJxgEBYt`l3G?(L?Qn0q#O*BEztDtTliIP%^$HpDL=`U4J z8)_|SEZ*aTNYTXm7ki@2{! z`V8e70*S*kxK%z+kj*dXc-P+{M8{;Hm3m9I&JUCm_elZG+l#V&FN7@{s)*aJ))O5d zx?@Un)SZf2)}54FtZW`jvK1Qi*V5p{!)3%-)($vFH%lmY(-OINzGjMWba@3=H~8k77)WzsL4>4>2U^3)9{(UelEU z$RdiUb_X1W>;>?VZs@{2DFaAGV#s{Y>D6>@oJ>MZL}9Am(7Ednrb0gO&*zh;CFI!` z)m3t-JC)%_PsS`JYtVsvQeGSq(5JN?9N)NX9~-;_=ZehE=LHY+oFnuRqXh-`{JC1k zZg^2)w$aLQ{aHkZBVq6H^=5+goL2m>FN6BWw46bPeio<*>PJG_Nhg~C`EELSnSHh< zgCIDK6M`l8r~fCjCwjD1Z(NPLZYPEasy?1k5F!GDVkJ^R3rJ4n2oD$je$L5=xg(~` z2L>85(Lv|vMGRk#jPkqoeNpDRd#28fWe(Hbarqpu^)S_GQXDL zAD|4tv@1F*@0+bAKpx_;W*&V?A@qv!t3aDO9cd@b>R=K=?DJeU0dD4Un_uRLg;e;H zfuE<;6B3^EPkiUb+4uy)0??Jp)blmqIap``=`MD}+EwB*zD+{Fuux8GmRXJ=_GJ(q z@9fATycHhujdU50CJ4UVdEhV-FFKUn8G~hidsE>8P5zZ-YTj7kmAKJ&rr+6+Lb#*0 zTPKzLN~8BoXtQ7NY|vBhou^uxVaGUdW`yvF8Jup+yz~+6z}=HJa*6+_#dD%mi!8g} zJg?)>dPP^>c4squ65ivn;2w!)c-+&Uq46 zKaUhX*PSllnCqOQH;X%~ycs&gILaW;Npq{|2BSwBHexkFjT;;WFn*jLi6tP6u@gSd zaSQ}WWtUp4tr+L(4`dnPAc*9M-G;8PQUq{AZQsi>f*{N{=C`%^7B)S%U}trBGh&b; zl+N;T@>FaY&W`h5*>@8*H}dC$i-Cr5OiC1j=`}DlHFc6@&&C)oWlpNoLVww)J?_5lni~KM49c20-6<~Wzs)F7Y*w|1{%(>h56vk+))ueYZo-w-Yt%kZF zFXd>GWg&8WoRlJ_EQ6iU5pAV&74Ug*l)xSh$rM_93P`7%#s$wqo8aLGiiHs$Ij4BW z>6|yxrYy8%U!DIBi!=D1!Q zGfjLE&>DR%Ry~vPL?e(gPd@uHxF;$$cMWM9+!GQ`uuHrHiSkwtU;d{|@!UBcW)N|RWL?q7(g=2}>F|JZ)hfQw5=UAirg z6YFxPtBCwx$0ioVkXwq8 zKkx>GoJykme2qbU0`ZyB{s1ZM_K21w#Yv*`;D&y*YpcG_oIzpnHZU#5UkU-B zlPo*0#*06`yM67r_a1%Gs9r@A=|>s(4cd8t&;>X{K*Jr-dKF4yCZffgXU zl;zBu`#4^CbIr8xz~+PX_WXrS%g?bui*Q_+FaP@Vl`7WZ+07X6PZ9JO|Lw1>JJgA4 zOSP`#D9l3II+Sn$p!)>X>XEaR{e9-+B2r(eIw-z~5FRzxV!Y?0Uv9!(!XM;8?#=#| z33-ZV+bk$h=|sOIsW_jhOk~-fS_H1dI?;~|%9jp@UnCqoB{p0cT%U#xY-Ld{KJA~V zn6>-_%`qmw$Jjgbw~k-;9%=ccJ;0+K+24Oj;%&4&g#Y{UL#JGNbpfvUlMqjQ3E=^* zHU+EKbDpFeYjI^SH+9ZWE@4Uck$TGI2POHrw3YE{EmTVZs_%%lc)xmpqwK`9+&CcZ z^I$x;Ib*I)K=NGQtAx>6yh-gL3Nf-4~LrK zCr`6mKk7@~%Cd!pOH(b>vXP6efeuXToHYtGSg-i|E0F_dZ&Qu;W^FrKZs1$P;?XE) z&eGb%nYvT~aYy!xE}qN_?OS{?Nl*4V6SeOsnlzM`4@^%s+CL4|{p=Yl&29Dut9Ag; z@8Mx6>z=UTae-^zua|ZX?s>PqmzE4q;M!;UG*GwtdMds8If%y++w;eD>CHg%G_)*z z>K)^upYrYeHRk*A5%y^$@x&CT15WFwt)jrGq@X_`J0 znV$@m$2nu)t?7gO1OKkUWtI*{#oCQAv$W-~N2IX*O!~u`qnYk<*|#=Rv9JhCoeBKo z>X%RB;fHOLRlXSpnP16Zts`dtyagn=mHH@{WZ=8{H{g(em0I!f^FI82tsC`UIXdKD z(49Y&kblIuYOwhg7M#4a);Web1X%l(zL%*32V_zLqVz}s)hAUV`lzAnw;06z>M7w4WpyapNI58UgwSV^wu2*r>JcXj0XKRE&e9p z71Eh8L2C)oInC;yH*c2RpAmax8G7)Gcky89KZ z5L99PkEC?7v(C~xb6BQUxb<0zF zEf4Ys0%Rnzx$_#)arrMW9D>~Jb6vIu@lzkIL92aobB*p~eQ*2C zPuY~SqGyWQhSb38G#ISaHcpSCEA$ex!49SbTb(;{K(z&aK#kr*twcPbUPO<1Q1o5| zQWlpfiJj)CveYhBNvpTW)zUJF-=L~Rpgyhf(+=60v13sRIS_~Ky*57t$mT>3zuXB2 zAi>-F9-VCvwc&-;y4n1_)X<;hZE{>C;befS0?wcT%|p@(TAQ!0d^l)bY^&RoL8Ey( z$Rxr>!y~xurxz601>?f305#9{PH_s%Y=)**Vg|uGvL&wC6tk&$Jxn*} z7ZGcdoS>27L0;k)n(RFp%~_G_b=G0)Ss+f!m({eWsMVEPK!fLtvd6}GoyR#5-K0i z&Pst3=zl={a)kt}gU9*qTsqpwYzb-shP`%{Uod>d9C^8UoB4XKf7+NsrLXk; z7aR3&PAd3t#md#5RGS5XZ@}~P^%8W`R?m-lQc5k_T)XwavLR%D z+~5`m=$9M;s5K{ zd4hzLbE{>Qf$yGcAhh%=j8Pg;5Jp%ys4OL&mKxW4%90c2$NK#0Mpk^02%|bpdS)~T z8A#Id^QJ((-*4e~IrNu+pYGL$7@6btPr^46GRp8XVL3Qd(8iXii0d_RF|nYpg^}0k z`tGJ(O;mk+TW20*2XG|OjZrfZNTs5F!%NPJ?Z^P^07J8074;5qs)n}vwmMuY=}$UZj9M=vk&cp)UtTs%hT|t0{Z~~bbY0mRGT3CgPjK^XYN2ZTt{ITL zBZ*40uuUOoqdz}|Nu1SO-!HqVi%>j4E2Z|}wavfor~(bM?*f(eJZ4ke8P9fI^(-#r zBO%qYQF!X-a~9ha>y!74wI1ePI4O_@$V+vrsp^yiQg-2(8Tc~1hJ`5Z4D^ySp=vSR z7~jhvG#5D&vk>F`nNkC$MBgMR;(NwVQ*=fmQR}^^m8MKcJN%SNwUuz6O;uf>_+}~q zth(K-!rUbPMAvg19bkJlgR@7ypUvnI10VT_l2b)A)_xkQE9Qcv^4mh-P(OB>-4=H> zp0FcpneRlSrzO^1ilsazN7MJlfO4G&{44^*oDq#=*+CvmC;%2|?LwBafO8||ED=^i zqK`|<9l6=`C!MpNF%)hG^y>wg$X_sKrtmLxsFt7UBae~^StU28G<4dQNo>@79B+=L2Oe2_ZwQaUi# ztWnCMI^#)26!X=5mI;yEDfZj$v5qU-6A5RZ20sq>(~nsToR`QLfRk>lzvnZ!c(wtuEy&9S%J2E)rkJ1xHAHv~l!-zK)_u?4KbB%4zcRP1 zX<#)G7PasW}%R1KxvRbDFVW&e_SO829BoIP3oCElv+& zXzTm~$c9r1G&1EAtWw(N0LU4Z)#FB3=76hkt>7Al1z{#0;dyy@Tt=O?pfW8y)9~gO zSW{Dx$SAu|z)8@ND?tND;;`Egxc9Qj%SP4j%t|*Ng|dFx^-0gX*~+h?5!@RI`V~w7 z#5)ql07=*pVFU7|*xDF!q%_f}5h*aF5vS0wdeEaLpbJ zuXl637Y$=SXWhTROV&hQLzocV^GFi-kflI>PI7#euC)SGWO#w}6><=OZMEZhBS1}% zYdj$M7KVI&>g#v1tg0;X$<4jXE9~w>j$he?#@gZvqM56$5~S2}?anXZjCTv#c7R09 zY=-+foS!61&OvzM=CAW`g$g_0ZxA8Bbvf7yhBbWsD*1NVVRbaH*$s8Jmfw3~t~>Ma zAUy6ZV|svq>`JGzQ}NmA6Be`+>~@cJ>$*W8>P+%`*g`4sKS)=-@HiC52cq@|i`LH# zDZWdezneFU-O`DO*=ai;q}E2C8J$i%iBZMa8~hSgWdqXC9e{BfAw2#^4N-Al*%-TN zmR#E|6IEA0Qnh*Z`qBRQx*Cs=T(dzelv&1eK*Y1)_S)KNY^T0_DL;2hn=C0$@%4&= zP>p9VrE7*uSxujSIsf9j;gY2W-sT~wy)C+9CXnUbkvswrhi&)`Vx68Qre9I*%pE6Z zY%*OUybL?j5qncsVInDBUS@B(S$w>rZoZ*n+&*CWo#Eg%)|=Vre0(b?;pW>@3C2;E z74Dpr8l2AsFD)W}Sfbj$0ZM-WUaUWd;dk7xG-HL2J+ItTWuV-@0C4m9;njp90omTh z3k_P4c`5;Q==wS``O?=S*@WX2AmwCI+_I<4OntB z$QRXu87t8vzC%u5>Mb159Wgu6VX3F?Q3?vx$4FkI=B#rHpfVb+6~;q11^^xDkja%W zqY&6)Vfwl%5}2DxQMg5{$_F&7?3NE*yw7@N#X)MXd2vg zfl*CiIcXMCifP`aMcl6JFQ^uD>VL1)CHa)Obx}GHAKnX1%WOeDR8VI8h#_3ZEmA(? zuOIndp4dp0`TA4jjgnXQCjlTbdWYIk_vwRtfzY-+Zf%C^uJM?S3)|$98bfXoe)Z~f zURh|RFE?3O>FV&ArHamYTWXOJhhbfQ``mHU-t3-|t?)Da#6^_|d!2g;6Ty*7NJ)p0 zO8aIws|7q?Brb32mYmxy51sy&87(aulR-=gru8={Ed3Nt1j&Sdkm!)YTJn?k{UaYk zcVQQrU+QnzE{#OJQ@}IBcE}gXe1)X|OtLW}z4K$U|i?&ss_+E1Bi&&RG|M*Y!eN z3^v4!dNWq>C@W^W6;C}+l37FCU@v&>pc(y3F#Q_P8X|f?Nvc6!G1<&$=JY9~aCjFF zPw91v#UXOD6HV)g)RQA~wjkubCglrk3fh4YKmj+jUt+2Pzd;o+GJjBQDe=+KJ#*rW zRWFItJ+s{vg*tjGbT0n}JHLpjSnsHy?~5KIxZr;-@A_LGv7y=(hG84PZ#XodMZO@u zwS_Z3&4(S<`^Y=}2KjY4QOXVl|MI^O^+);$`5GL&N$ID{Ws4)BCdryf=D?k=-Cp`k zdl%au!YZ{C0iCC1g15H-Q3C7XqJGzDcK)cr1Og;jb~V?VAg-)|Uv#|!PndVv^ymjN zZueuPC?}A8*RlT5wHNTBA1lWB+Xgg-k8btqj82}Yx>_tN6&=KApwRpR#g2KJ4`K7R zTW&zomi#D>x_g~vYH0MH>y__M=5Kf%&4=2lNP?Dmsprhe^2Fj2@r@Lx3h264@&^Dw z@$)YSl5BA-AiAeF2ao=Wka--lpcEl~>28y3hxVI=y>5BdE|n7kZ6+|46MavLNkkeH4_AYU>y&RgsrauDu`;nIP~}gMej@x*&8^SS1#3z z{HM=BN67!*1FrsGpBEpWS;9rtekpHi#=`8<>I}Fz_VbExVmc6L%-?Mcw`riBqhzJ2 z9F2y}pE-+2UUxI~bhRyTKBvK2<+6R!ZY_Tdb@&_9QRtv3z~RPRE06|g06yBl0ONs= zRfzuQ$v^*B_mt|d98vfmmC!%TIN#FTV*19TA8PA|ac+Lbsh6pPYH<6DN#tw;y9ASj zST3X`3eu4{&?Kpa$X^hRm7@wKMsDY;9_STgpqOYE${}thgV;;!=-263HboOiu8IR% z)f|&6am|z9uA-!_R$gdw*!;jRW0@gOXO-&wGSS$6MilkTekS9188=1_>|1 zxF~HDD!j^MWn7qx9d5CysYIJTqM(9Dmr249qv@9Yjb-2SJQYp^R&+ky>rguy{0f3I zB{d6 zY~Hu$=FET=+lv^u#8}Z{lY3Iyb9Bi{CZkt_Dpv(AEL6IO!TjcK<0p%pO5ID;*VLoq z<=YFz=Vlt-%gs$k*nfEHHDIc3|4hsnYy4P8>lXK<^_kRtKsI9u76vVWgdw@SYdc$5 z6@N}G5H#)--Au@X%F0GBHe zl2HUu8kE(&1OdvTt37TbG40J=+UD;{uympQ*(7ej?KQgvVRL8pkr9?N%ajH99=8w@&% z$6Ow&jM)K~(;2?ny$=DZFr4d#=WGGvmeTh5HKX-SCI;S7(M$o9|8=ErglVcRy1ClQ zii)8AX<-Y~;R}z{x!g19bMvm{Wx9d5Kv#hkTZD5VJCL0C*2sNvmMKTC*8@)tf1%Gi zup29B%rZ;G3Xj&ZBILdi&9K4P7}+h0(U_(b9-DgO65cynY|&@RR?pu7%gptFiLR|2 z%Bhe@5o=6I{IcVMRA@?Lx?p+-vo4dRML|9L1~-ow zxH1Xu4wPgWRxm}f$!8kc=b-2HJ?W16%qK(`P@CD+m_u5BkJ%Y&K%$sKVj;+hLf;?|H`YLU>sC5~jlNRM0S$YZW*v zA^m#r3_T>PaJ>ap92QhAAh9V?cOh+L2w09} zk|5Fe6GEWZWlvO+;J8p#DR*l(#PHM&{Y+q%D2ihWzukzTN9gv&M$F|KsLBXtB{Pm? zJ(H-E+wfQK0K}hMH9-hyF0$#x&*wck-eq$YLW{Z^jg_%up2^gx8NMn&c8k)^Lyw!P z8BDF&8}H6iss5%Z)l#HMmCUX@7$%xxIcC^Z$D;;LJ#2{t5zAqt=mz(;Vh9sF@gsEK z{RK&Q^D1z#ReKPEfJ?K?T*l!Z^iV>0biE5H^&}M?0cd)KUGEbe-#Xly6u9;UROMex#1!y20U_(ssVu`WT!d7*3%0UcK_2@ovE=p_dSv+Hw*k`QeO#_H@m*plY%iZPF z`eGq*j7p{AtP)5b{feY;1VcP7$1|pQxMSbWM+$%Ep86O@pU;z9CK zO}c`!Z#ZHXAqhV*w21aOkdk|=T;=QH1OidkLLTf!k7W0$@%Z7Du!~^;R6yU3wbWX( zkhe)ys2?sc)^CDLuCnwW2ZO7L85N=ZlKBGtekokHOp=nrs#aF*Ov0b+Pkpk z$6i!KgWyAxndf)F9n&=A#9Aov{+6g7JRQz7rE&f0?GM5a8{=b-nJDiwGhEQz+w z2i*&ujG4Hqet@nP#>U0MiiapQUnhxG*ze&#eS2i4GlatrZnlcxq!8IY0>woj9`*V< z&jd_m)OD6k+&osc2W-b0e!d*%78l&jGbwGXAs82 zR9gP+#~_;jM0ar7nZ@`V7Q1TZg<{3MC^g-6UF_P#fWHv+vKC7 z2?THOui;NIIL>)kwCh28=Cs<*w_Pz$<%z%v5B6`HaRlSIGLR#s)A!zZN=ppulit!7 zOp}ux+PK`1d0oPG>n%MjavBXU4-yjgnC{d1}gu5T+!}1#xDRX6?5FUEg=kfGO-sR;x-5V+u0e(U9 zX!J@ZBzhb67$7!GP8)bIR{6Pd6-#~9;l5)8qaE~3W4H$+nw8-FPNl!Q$4@l$!l{@+)eKlixB?d#l+?nuR zv#K$8ue$^L+y+KJzB#M1hTNysVqmYZ+4=-@Oo2Zj_=jA}oTcvlz$6}0QHyj$oE6fXAIRm{6&P&hWb zc9_!CCLr zIRaCsrn5`=!jY?eN*ocg2)7_A)myRSSff)RAx?=MeT+*jW(yue3m&iSR6hbC2t>b@ zn+ahdqH)6YGpsaTsh{5htvu?tszy3p(R!ELO%LfA2xkI&LBi|wm#G(K!Z5|@$+O~YtJ7izXM6m_(l+@t z9-0nC@e2w0LFWo!;h18e`hnyl0x(dE-1yJMkB4E&wU-lRHFrMfgq~*DIeTs!ZUC5r z87>8D^$wS$UfKJ+a|}ISS?T!6C;48e&5iLB_-G*fZ36Ph@4?N7H^w0mBH5Ut?@(n! z#`)|k7p^vzT%ma(Qw2mn-8txJe&dLjxLhbg)BW>}ri3$Hr=Okl1Dk{gs5C--d-`FI zE$Eg_Mi9m#KnLpi7v=fqpdaG|0#Gw72bSllnuu)JnU}R(4%)i7_mSf*3)02RR%UNd z=VN_T8#6a(g_>Nc$4FW@|AXM|Et}FHVw1UPfLQIl{L6JN3c`__fJr4FG+R^864LX`1RRjgap|DDO{7@75*v@Qm1$h)Y_^U+q5 z^0}SyAuj`71DE9$#Ss~=Id%b6<4Bes&Nw05`6D=kW%so0Ub>YJFQN;@lhIw7FM743 z&W;W+sO<{TUj))+>ZD>Ed2^kdea9}wA}Rd=U~u!lOdK2?Ty4)rj-3!k8EqN(mi(P9 z*s&fG=>NA4(Z4?r^;b@v{fq1UzgMc>;6FwhYBGE-Vn2U)nVQv~S}!9*xKpT-3;7;) zvm>S9E_9!bp|&gYH%P<;t_gAz$6D+uM4C?Ed4bs$_5@fs-*3=;Qvlt%1bfS}B7Fy( z?w`?AM0iEHi23qy7jZMAUXFa!vxmO%AwYS1v-TpqIuR*>B1mV9`>w3T{01R`(&^Aj z=^gNUl%wPIJm?YO31xH`_5#IXlx>_Hqq<#@%8ZS2@zj!&5PsN>V}IKCXm%0D5y zDETA1fRWt#$VFdK9=}0c|CWw8I%NO1~V5E}fiyaV{J96S5Jy=eUVh2!||-$VpCURwadIkurceGMRg?S!V| zz4+7j;@G~Aef+=E1|WZ}p!u<{AN%^T4g5pd0OW5F7XR-&gO0!ZIK~~@z@KXakiSLW z`uJS`hdS4fedo{hoqx3Tg#4{)C&&JN?C-}m@DE`FkUxXZbG+C85PSWP7Ke_1@1L88 zK>p0;zGI(1_W5HQ_`9_M$T7bDce^*oaqZ8IYya%)d5o_g+tWY05B#&d|1rLP?Cbw5 Zw*Jrlp2zt5v9JHL`@lcT`$K+@{Vxa5>oNcU diff --git a/resources/images/concept.png b/resources/images/concept.png new file mode 100644 index 0000000000000000000000000000000000000000..ed26f5ebe3174f6b088375b6db7cba4f21f6619e GIT binary patch literal 79259 zcmV+QKnlN!P)?Z1&#p$K0qZ5Fd7wnyT-r>qA%-$%sM4CwMn&Jz}I4?@YRd2M4K?xqX4dy z)Z%0y{RfCslM9MU7#J8+fNbURk^&IB1IQLB0y+xFo&#d1L)aTY>?8>L2v|)>kTZ~d z1ISiL&PW8B!T1Bn)=5GVPe)=y?F9jDm(1dVoWx3n0Am8KB?FYCR+NBf=lp`oqRjM+ z5(P(KD5WZR<|XUtC>R+Snlmtf!W^Rdb09b@8O6>Z#G3CjFxc+@|NsAPgqYWU28NA5 zom@K+Vj30<41(MY3@3ILBo-xtg_wbuDJ_kG;nQ*k2Hr>p2H^`pJ<-K!#ztUyMjj^y z2G)E3|9@6vU|^rj!0>m%|Np-i{r~@WF;Mdz28IK902Lu+?ls5J`v3qy07*naRCodG zod?11<+#4!9i1BL_nBczl5m zM_y%QWQbkTAnQ&!=3FoE1>FY9H3x!%g1q<7y6-1AIM{6S9^|gL9B?_1YYqflfE+k* zKoS!ZW&i&D!g^|Is`;d+rwbofIOO`-xCa&y2M{VDAtB}y78WLaA|oRuDk@4URHz`~ z;o(Kp6ZbMM2TB(Qf^*de=i?J`26bg{tgt;>O&Ie;*Ui;I)0RjW!w zL`2R9yL(*@lp_x0TYw;UcJJOTTeohNy?giOnq|Hw1pA!^Sa(v^Z_Yi%u%5ghTboE! z^x+p*lbr0{B~l?qB)otF%>G1CDJ)XC%Hh{3t}TiBd{a_HDpnSqrh9s^Z;z;I=~?Hw zu}{rGhDQlIJ$!&4#X$aMM0TFU(%&by-KCRnA=m_<6I7uSq|h&fFLl| zuU{`oNlAw>2AgtpbhI%6Fk8bM5E>flF%e*IW{@dml<@WoviGYGB)Vr`sXF5E!+M{x zbfIjy;UbB<{UM3zch+J1^Ri*b6C-5*A5)~>w=?{nb}&6nc07ECM0V*RF{hsG_n5qH z+&lIoNm;f~@9l}hPGzv&H28E$-?Txh-1VqbIOPm;%+@=umIHJD^d29if;H~O5fat4 zmuxtzyF_*GE!7@;*1NwXuN>3~Ksp~av8JV^sjN&fnlzd6#OSgBmOvhZU0Y7&k^OZ;l;`u`+->t+e5 zSzE&DHjuO}8)e_Pk0rcWE2((Vm8Sox>(+RVsa9RW8a9#8I`z%q8qvCgr0?1x2Xz1n zQnN?e#&r_W@@PrhzD3gXoZ#pf32WLyg6u3llN$(yl;ulwe}ydkVc<$#xl}Ut?AB}4 zlh9gqwaxXi|My=dNNw{`$M=*F)l||pt=G>639eLG!WuU-{6Cm-K!w60VQ>sTs-*;H zYEBu6`y_SMG6|_sTSE07)3xp7zh}yU`EyKvxfds~9pM2b{_w3=fY@WIk9tjLGvSTV zvg?IMCGyyAc9*&KLDk%XLc=7e1p5(~)P(ZUi4Kz<{2rFz-`;)ujCppSE-kUXc=2MX zUcI{1t5;8zuhKU$viIUHT@K`z1GyF;yfCX*uQszYdof;`DpjhO51K}4r`;dEA$!KW zDj`*CNcxVgQgO%)lJx6D6$m;QFaB2&Cykeky?b=|wFFnGCK)@oOYC_=R5R%#JD>luD{UmsPZ+#z2}ss z6@&}sDn4i?*4JmXl5N|z853^f#*K~By)@mM+gFzZ1?51lrcG!Z%a$$6B0R7KH*MNf zYSyf2%v1&KNJ%(9`Oj%)K#m)Jzf`*XMhVt|x7Iu3OxI%0I8Q1u2!;epWgVbvz5clb zSFP?57CxOnxIal^`kyOR?s&)uoY+B^O6@TdB%=MXa^Ro8E(=z05ppKOFg<)ODp zopDnoL~4B^CM(kPH<-yIvY4A=PT=J$-FVOnW|gODd?z4bxmuOzdI8BSY_F z|J0vVNE|OQXJ2Ugm8gQtNeDRv55gn7d20z#EyxlYQFO0<5_VJzyQx$!iN>m3ySDk= zqj6ip)U{;E64|py{X#Ce94JQ|IHCrElYHH}bzYVb?pUWzoigb}ta#B?lJM3Gvi-iB z%&a>`wTCRECM9O$yWe}+Xetb_AvlkHh#7ySD{nTYBs7<#@t@1S|9yGL0ZluZgy{*2 zJfWAiRY}5IxA&N+R1-nl+40{6du@zn>1-sCWQx#N}fGKVoo0s;Yu7c~Fe7CgNS2hP z3q0Rd+rZa>IjS~VklN>+l##GY($$29`3k>7-LI#q2GZ0VlCpfU9MraIzVwk)x%)9m z-@etDpF-4!f|f$xtBxEk71RmOcj%x-ex(yV<)Fk#-x%{(WT!3?TDQJx7;TK7jgK;BtIb!Qt^R{eW>1FtAVR{cn)N8vJTUzvyz-oQa|qUVJuFi`~rp9aq8km@xh>6hC-KZ*KO)ISx~rKh>hj>qnn4QF;WzLA)-FVaCj)(8hlO_7Zk_L2G%|1$kb-LO_t z7R)s!s4BNUXto_#_>XMWHWO6~$qg9+_tBfi2V#9CxjC;i9mAxmaid1^-+%u}GU3-s zt5&U&=FOYC5n83`LArg-7YF=?@sd5hXwf1Q(nzZ$QlXlcW#M7DhyQ>`C!FLFE!vpP zDe4RkR#R1IjanwkDrLz6Ge`%kX$dn^sQN0dz&pMGzL_u*Jv+^+2 zNH8B+B5Lcv;`t_8DWqmC^?xKtc$4NPx{Ce~>2*-eQ<$B?8Z=TtvPV+atWf;Ac|R83Vxff0#yPz6GY4)`pIAhW)e^c~wIMI*by>cyMLF8YAOo@g)n5z)Gx z=@!C@=q}7(A#omIXQINC7dZNfBqREY2rnnuV-QRTT+ClwoAt6xg#WHq!I6^b-$Jmo zY}vA0yfbC_xw`k3GY9+%5Nx?t4JWkGsix=LoV(ZMK>6iBifSOM)PzO2J4EI>6EBxs z4wO|69OeT7j>HNNZn|z=wVRiL5Z$u+0Nwj4IUERAt%E2nON9L91*O5B=Pq3ilok$T z5gg#%dx$oB2qy}2IvJHY%Y z%Y=UdA4XmXKSA2IEpjj|)%Y8TT*5!WHrgu+B6}QPhf5`s1LQ{8%ygho5)u+hW=Gv# zxg01U2fPA=SrS>bmVjs|%TDt>pT8#?2lkX5kKb)?IakX>jegps(MqYSmdmyg*UR?% zZ7h6(HDZ$*6@89K^VnRYF?;Krp0sJ*jksCJicqTb2CxEJ;zE zD5uE*EUe^tvrWzg$3?d5Y;dna^;}D#8qoyMV0g|Jqz2V1WDqkCt^Fxc67~IiN`g9>RQO2~b3|F`25UbkKQq3Fr z-g|#wU{xA&y*Y+|d%xC9_77ec5)H{rum1F#vVG)jhcL4SUn`XcU*|pEu521|rh!SR zl8Mm;VsV{sf0wN{4VH9-rSeRaUo~F%fLLLpvB7tUBu36Ft3>o7z7T7^;{76z$|aWr zWr+hu1HpN2f9Tv?S=K(dKd_}p2#iwr4YH`om8f*pEs7`gm4s@Z1sUngt`Qy;P8%q7 zznQLC8qav3K$8Cg&uXk`Blg_EQu*53 z42o?;oAw5y3SpA)`ZFdc!(M2gf;W<9QuTpnr1nP>wbn+s*_E!QstAQXss7~aih1>f z8LSV?|I3TtRr%W6CH9<4Bq%IG;)dNXwG>d1d=>-tXd8DdBU-kV>I(F@>!l|oRFf6G zm|2<(n7VbN!R-ROiw_!+KLW`(?Z1@@sM*;Ca=2^DC*KxMGPDzc%JR ze{GbNZA`>05#8w2Z0&Y;>2jbnbHHdIn6K=QJ5oatNrEa*6ciS&n!*^_^Xa>4OG-1? zQD(qr1%HR`t+<~dOR*Z$-O&w8dc4YL)*`7who5iqm1YrjAej_ll3 z6AD*Kh9)s?RZYsYsmX|#yhw5^s{|}Ong~gTd?@n_qJv8=2g(Wui~w=gIjB(1!gjIY zW4^+qg6W7s90(4JQFTCl0uddKm2Fx;iFzJrExDoe9ouF9%qfzrj_fL0s0k>Kjpv@2 z<f6<}yM)gBxzNs4L<40?zIA_ZR!1YTBnt9E8E1}Z*r@(5dvh{*LePY;|EEe{kt)<)fs!p zDbMwQRY_gF%<$`g!k$2w^Dt9Yd;AqOf$fuRcV8z-+E08&X)26rD-tmERDgmB*IFrM zoe&~6uZs7~6(D8(iI=bYGXjK}ktJYD$`cvL4TbBV?nm<6KTRDFX8EBCAA+d`yZ+vB zADLNrwTGUUw5^+RWKRg-i0Y@<`Lsg9Uw6Af?*(5AtpZg5_z$2!fP!b14ptSlG7?^W zO4j#3PR&pmQu+EjvlN}G(C19q_w5%N@pX&wD;T^iZ7nRmsp$(oor5~Vg=in>5w2nU zH~36c7=YNDIB}fOZ1B6}M3sl$s?lj94q;}gMiHXWB)$~c>392|?zqOlt8m{@T~E?#zp=7SQI0nZ>S^vVs>dk?8pgNHvi3bnd2I!V^F7P&ePSs% zpc(JfCL!Y;^D`f|TPs3-+jh@8!~q)s!4iKkAGAp#?6A<-{fEvdpKAunWWj<3=0^z& zK-(=3LL_DNat}V0I*xHtAJFI^2HxPRS}1DQP6<;iDvAl2+8U}&M6~T-gb2|_=_*9< zm0*5?z>5NWk&1!L%!!m@9|L^G-rWWd%HUaH=FuM{K#5!vvkra}Gdox3H4#!^VEMz1 zBBCorBb$ON$C=87gaU($Ww5(8Z5gt5CTTHJk2OJ%Vbw+^9ojcde zVmP=(ixy=%|Kmsuare6%2qXv47QmK+s3X$0ZQEQokKmx*2zexE8J1{=nK0q@3|mlRB=CsHI`VrqY}jBl#PV3y*a#5& zD6sLlQpj!1nl?6KWuxPyL36=av=8{aK2a0_sw4mINZ@Z5IDf0G}2;KuJG{3 zsgkjcs8uu%l14Ezu3WiNoKdYMqnCx(i(sd1)%im&yi2+*yBsKA9H3^rCFlEQx7j8B z5~>+OV9{Y)M|*LpWOIOVvtGS=CIOr0%QMW2xw2&6UCzA=b^f4BssKTv|MABkvT)%- z*|ces$(ko5l2Aq-6!*Z4t-KS14bKo$B%y}PTv4;@)Ttw_Tep@IPB_7x%@)ItglIlX z&iBo3vrD42*k4hrOqO*dYHhrPOBMlA(q=pV{`;@I z_~MK5)mLA6fgb(cgu5+w93ZLf{PWM3VZ(;WQAZt>yOZ3b3e16EhZd3DL}$;QJ;pJM zeI0@CQYqwsKIgjn>{h553qKaH{$)LJG}QTnE-3@<)qTecWhRI8@-+_-V_+;h*# zj2Saz?AWncYFA`)s=K)$95{jog4qKN1oLBI0huQLG(mR1|Eh#FYAV$qed(~fp@z(+ zi~32WD{qmC=MOn-e^G6qpF5v^Kx)4Hv0t1t5?(i6)K~j?v-b1yEZt%PwP|pFg9KfD z)awQwhyy^ZCXv>V@)&^A$~WF^l3zDn+)oTLw89t#ibycDiJ$fB*Duo(g5Z**fe=@H z>Zzw1;en>{*=L{0J@?$>3XcH&vWe(<_@Renj0zDVd}hs>WrQ_0TbBxt15ORZ-&GJE z5ZTRv&^|H_Bzx++D69=c#mqe)zM%*P(E^k|L~RyR117Ux3$XuH?s!$bLS1Mh0y+MlD`Ir`xqn&KpI&^a0 z*RYIWAwkk(OpynA`)Q^3!-=9+6{>C&af`Sj%%Ur4K#EdzcH+u&`t-DYM9_PbqP zcm4Io2{u}1==0{yvzxOlzx?t``QQKkmt|XSzl>0Uh)L;4L>|DES6(S^y!oczL&vHX zF@C(`CGmTd|BanGbrSfCkpBDczvY%&6l2S!LgRolj>o3&8V1EjDXPYiP-7L4>b*=5XSpwa&zHcYle8pL^aZoSWa8@@{>2t%t9tt3OlBui1 zfr&qStNXgkri)KC%S6SWgZ7U9OmWw`nPc~i@t|I_-Fsct5;mOOUG~rTjm_Co;v{_^ zK&x(bwCq%ipTOH+p z?e^R(5s-vy9*O?<-FMQeRV%sjiYxpc+D4SeFjL~ z?`A0`SrbW8vyo2iJ(p&w#lOHa0~pjC|IZu=Z{9}h;!Kn1Q_heBbN(>6XWqVMo*Po7 znyIy;YLux)S>yRJ29y+S035WP&p%@7#8793_9)DjrkHUFuRp7GZQ7aoGZr8eLA&FL zdkmgjO|8=dM%>QP4;lP5?{yt32#fvGe%8_fCrHeI3*?|`VM$Yd^d50gHLe{`-KT(_ zHyc#zu%lW^u$C#X1o&uZy@pzT;7|=0yDvXW`z-CV%}*Pp;hYkadj8mBj~Ra5I(&G( z!ry-X{df6NqqR2c;J#B$VW;%&Ew|o!t9<|c_r|__@x>P#mGY8HFO^Spz=WQ8_F3ax zCg%QL)eH!AwKVy+-=xnerx z|1OQd=%e3<>CB$Qu!kRhSmyonj{&N*$gK7{EMI@)4bv7j@Cz@vK$UF*EuVe9k>t#8`2X+{Vgc7VH~&^chU)3X!wdFcR7W~yt} zP$zU)-G&CzAM=zYYSiy~{!vr&rOwBbB;x2!cGqD`3(>#p#m9u&H8o%VTw>3??6Bj! z8-g6eiCxdszR*sf)YZ#9%T>$F5)BASS+>Y5F|$DkIt{Tj#mMS>`KwfY=sAhK=qgi| z*&N{W5Azj;&p0oAPlALf;&X}$Qv5s+8b(dk%4%rE%__ItYbtI!n*d3gVsR-e4~-Ab z{drxtcxCq4=Jov2a(s!VEA>$8D|tojql#QyP4(F*ua4UW%lgZ(o)fdvm*vi zP)!-EpM2^mgK6V?Y=0FV4?Osw{62lUoSHpM3JkX13p{V@KmCBf<$`g)o4? z$C;^4YNE*=Q4NIaFvAD-z)3jUxW=G!&XF!%y2w@P_W({mLT8I;9;m`-5tCE^z53c~ zvQ`t6eAVLPk3ZfBIof?vH6Z*WXe0<2-(#KEW*ycCLIEN$aj6Lm$20ZT#Qkw!B)Fn^BLy&tZc#T*HWu3&fjT~e;#fR+;2 zr6#20xwFhMn1u2|p>^sTEo1Lz?;2B6+J?1Kq2JjO(Y%#J_v$B!->WuHJs$O^K=>?T zFSuOxst;$&HRq{=eY=)tzEpxj!w$PF1Q%s~UwRBGazC>MD6!1sp*JBmw$THPzf8w2fPI z|7bPM-FU+da;b(t=Y~3W?ra1|c0mKVX~R7x2M!#;2VynfBSmZFa0TzKN>|*ZCaPmi ztr`3kQC)hNeO2yOQzoV?osbYBo2-5e>fclzdYg$#3fA9l)sdr(&jJ5Igcb|4{i*uE zvu4m|P_K0La7o+npSOS*jR#k*;yJg`QRZ54w~aK_JNXhb@DhnW@l>;&!M%d^DOH6> zSp7!cFxx649+RkEeM~)@@D^>1)(~2!o>bO*iJ}~`YAw{~32)I>qqI`J{WOj0zo3Xn zsrulv5>73eOo}?LyV-{3Lq*YpN&DX&+;gZ_;6!7w_wPNR04tVv_SqJosS8$*-q54-MnAJ)LG2OlC4W0 z-R)$%Da$MzKL`;_mhfvhi6Ujra~bq4MW{e<5}FYoCs6^^dA_sQ89o(0mhj~`k7avk zjar9Qx5S(k-MzOx&F?Z;1&^~W=epk$^04tpT4x@bD$$3Q2mylW2f`;&5WuO>Gb_GOE%vNpMv@sv<3oVOlw`mG1bDA*-MU52 zeV~vrBh6Faf(?JmYbU-}Gt+eSQ{XRfDQ_Im-+=V&)yn|*+22CmT5ylfi34bM1YJN) zn>IDj-_GMP9pY?ttARSXQo7e|B}yiv&gjn4eOHI~zSuPoB(!z(`uY_P?{4;6m~mHB zJFZK4=YSR3xyet-d+Y8Qx#d84F?-~eAMSA_p97A#yW;7r4RgaBQxF?)y!s!ysoBS+ zymP?jAC&c&^}KVYQA!k^=EQCCQv)K-f^{TzWR{Q#kQN*KHKwrm$|uL-*epW{y9)$ z1V|t-fc)R0yY)yMu)Htp5hF+9LrL9_mY($64|3q|SsFEUs}NQTk;M&+h_9WUTo5w9 zEmawIpTwN519FDqNf7{&aZoB=I#enSzRm=M?9&k1r0KsN!kbbgXOiwLjuq`EYck-z zuRqO#VU@mTw}C*h*YRC~Z$-ovm{^v)m{}Tb3}zQZo(f=&UQ%D6!Q#rq-?Bj~IrW)y z)3$AqgARBSwizrf&Bh1&%YJv>FZ*p$B=>1Cy3BUnM<2-%G3UY;<+)3TI8Y)?Sahc# z+Q*^W$=w=1lq=Nn{hmfD`6EJW|N85%TxDkCKOl#M?C|^&fNO;BzLJ?klHYdnd!FUJ z??9B__T2LV%Ypqm`v;#2N_csciAvh>*je7f^O_kf_m2Bm(sh94v-P$s4g3jhAIu!|fj^08(_RuM zePhlA^Q!K*zbn?&J(}@7#cYe}*+=s)9?z1au}AYgcEA6Mfke@lTPh8?!Gs2Hy-{P? zRMS4d9|P{F0))_F_y_M1P$3%i1w4#1TFi919R4eI4Wzhu4irdsgiv^1v$=Ca_-_^~ zu2w;a=&MDGJejx!X(zwux&kD>oGY61qK@w+vFBZ8E>`ok&x{Exq)K(E`N}5}r`iPw zw15Hbe)mO5TmPR_f97pP-MvOIJF(r6PMQuQ-B#`Xr_F#E(e4Og%NE!O5O3l z3xWY5QK9!45`B{9a%i}1ozH)ked9kj8cFq0uPMUqJ+fahr;;*(5$!b&Ec(a5sMP)6 zObMw~M-qSjpWP)HJ9lWm@0Zjy%Z>1e)6DRy4?H7jYgU?sLJrJO?I!MqyG)W{s%D3$ ztXSeX-%p=+y`;&5`VQ56;|tTzgtwlPR27IjpSoW{s@9O2Z+>auTXw(qid4P(@htc2 z?G!7SkyB`C2Ne%2ed{KZoCrM3t`{DY=)PwP{0BD-Vd9d8P?r}bkkI##hSsiGvqrjV zM!B7B1K|PkE=h*XHD3a(s~a^Z11OW5wCECHwd9NZp<%o@`L9##s58$z(=Xc)N>8aK zLUAB6)PbU>Jn1vq#t%RKXtb;gHI(>@%P$we9D!W{nfExwtg^)R#~**(gdU%C(n)f= zq6Pz1f}?w)Mw(r(IU}EGKFBi7?;!k^Fk`}nT`IdAFc?+}h6s`TN9Oqc3Na>=9CMW+ zjpCx_MR?1$X0QbBidp}WC|L2b06z@U89MnM6_c3g9;cW? zl2vmE(d549UVTmbNxx0fM8G6_y26!Y?Q_b~1#&QPpKQ7A0_~S4N9(}6*+zSbJ?lcF zDO7*rRqy%E`#rQ#*D&!p$bd;+f2W|eXg0DWs-GzFyRXbOGW2>bX%cC9VZsQ_k6=Ll zS+S-_oU^lT;L?XH4i?+N)A~m%_dTU~7aTL{i6>0W3}8vVQ(UVD719K3EPq4<69VT7 zO-!7qIUOOYNwMY$pCJJ|Ky!-DUkmd@H^bnwkWIx8m=93cXMjaB=@?<7_s(w#|r z_#zTseM-VK;jqr9KN^#jvz_2rPu`LNam25nS`&g0$N9cYQ<)W5iWurVHOso~GyTDg zhjvusx%W(eBEk65z#NOMwOdV66qcg@kUf{|`tIwx8DL6nowf0hcRI zi%(Hk#HDhgfna7@s3sx?VIqz$Q;3f@bTEgQ!2_KRLNCdNsd_A#;5e4aJO^hAlh&h; zK5B$Zc8+F}7dif96;cq_KoC&C>FTSmHpsu2spty{dt{4SO@@k#BodO5kM?5w2Jqsa zfBtDQ)XzQdJOOf~j^^o5mKN+R&~`!Zb*X%D0J97xn*@!DN?qweCBR9{K)(HfTV$iw z+rR`A-mIm7GJ7OK@L;;yHsX5Ypif=3Ty{P?(xC8iF1S}_u-Dl=^*n04{E;O7^1W=; zNG`M^QG6;V$YQW+NPrg}m@`Y^({D2Vj$nlp@g3CKD7aQ74>e{35=` z`hIuzx#C4vYb}#|jQ~P3K>I@DiPegQ;DQ}kI9JkDr~$?ZmX`BeYKMT{OC1yItoOYZ zJdEtpL-v00z6LW)Qq$IEsgjAg<$G*-`<}gcFGHNb^uz}^5fFcEb~aG#7hiITF&SZ2 zdT`{(!|oc5gF*amMcGAofHmchqEzVMd`F8w_+S!45P`@$=7SI9(MKLJ^+irnNEEgs zNYEr$xBWo?@c~*Mdo1QH>b--|OTVEsow2>GaP9l`+^-b%cc_}w!0K|TY;ypAL}tWjMm`AtkUBu zT{GNk?!os#@urZ9z_2$n#h8M^A{6&Z9n%^`6%|t-(uiXz?%wh*4i8zTC=v)diQH>=XWD;A{Bot zI4WAe^Rk*kM9ZT+7*(pxWY=e)Tf!PQQ$WSJMsQf@lge8576_MA4K_efglA%PIqw&o ztgw3Vg5cpiKox;@sSJNqpjoQ+@C(N8!+nIeYHwib?Y{E7EEfU|n6ptR^iWMw~<*3ZMJ#zh6KF z2G-;P)eflPL5>UA$ZxAJgX?iVnhO1)gzpC`U?6}SNr*&f9iuQTZ>qU#q?*kjYjOkK z7o4kWhYpokg)u?kxK#Ez;EP{X0jG8*IkBmHkK>T2<9pgey_iwP#FM#|8F__Fz(Y30 zs87O@Gtwxk>xp)=08&_??;)RucSq(*m- zXrm?HKEneiAMA0oar0#n*;?oxTFD6k{#44AZgoW4a# zL?D6n#H^j-MukNr88UlcuBf`FDSSErgA`z*vftkdp#omjWkZITVo*JL^pGlXagwAO z1wvq(VrgwqZ2$*&W!;YOuzm{$b_Q|a3i|cyXO3O1<}B)(3>YxLIOfq7aN?h;`wcq##fglN0-o(epn`_c7Y!M5m;C*6+H(S+}bM|G0(tGCW^inW%r7?#loU?jV(k}cDYeL?yS6EVY$9bWt_ zD}6m6EB+-dF6ENTfk1E|Y4Rl5`S{%ec2(-KMFu*>-k5e=38IMzHc?dI};6<`t8*oec42CZGY%iIiPjYp$h%Z5=!OTXhh$(-S5T2 zfl?zt$l>spr(_9{F)uq42v z*nE!Ax($3TA7s8C)F{kEMFS4k9Fw@)N6KCWVWb8+0E;OL|1rV_T&>-2J#Uf>qfY4M z(~kSKI5|*i1c+tQL(>LHjrgJsHMZ9-ZOf%IHp%nnPbzKQcKcgOI8bTGjfzrxqU`zj zErUB{GRif-3IZEz_uDThs_stnz4}vcnx*$&Chinr%P|AaGxbQesD^@|iPoGHv>f0@ zKnS+@Rw*kMs}SksZNgnCeh!p60a76@Qevw`6+h=nK@XxUdFr;8g08wX#@mp~aobNJ=zW=+z&4#XzR) zQ22GA;K@=4jiJ_iU5E;yC~e3Yb=5@l8!%qmJvY+Q(Qq@XuccdW6~{ zzQTh8<8B-w5v@CD1XhMbDC|6?dBbYel^R+MYrg^-V(yAP=TZS%3)5DucNK4I&L0A{ zR%FNH3{F;wXHGfmpesPi*#mcP*X2M#IpD;nvTcRcj~6~U5y-tx{478jEjh8h?6y#? z{1rFU%`& zyYJpKMOM$jRnp(^~oNxp|rx8zdEL z918p0x_Y;ad+aY^*|=j3Is4wuvf$VCvV7(yx#X#CGH=pKRP#_b(t$?u!wjjMiNfJht;|pnx1GlN!kKS(~*? zbFf_eR96WJ4VKL-66DWuD~(Xtwq}n!tl8`&BeL(=f#1uGADto<;v$T8v3Tl6Y1Ol? zESbJh7W}eKx?SAdusAF-L>Y-U#1ju65N`kemOP=ng)dzzFGt&UFHR+xQ%e!WWKYE@8eNc*qa z%a&DpB zV>}tzY=nPsNwC?qsQI-C|Szr49XLUrcf{gUSL*Z9?D;6LV!M&?+yTOhX(6;4hv z*Q3qMi(-;u+h5~X$x9diAQh@aO7Cktb)ad3qI0%?Xch<`L%OkYt!RmFUtL;stt}Ug zI>Bf)&TH9wac*X;hHVF)iJG(88%hTQOjh~OaGYf^;%bv znoTOn+t>Xjvs5F&57DuIywOBD_HSe~gO)w&NXs5|WwAzXHPnH;YW>(F_7IH)BAV(C z^>_Sl^jztDR%1ciK|oYg6WP3p|CwVjeYHO|UcP&NuFO=;dK02%+}9Qp0tMaCI^=AE4k``h-eZ#0jt7dA5kHqP#5 z$Ja9Z7{I$;(9HXojz9M(sjkj#C{Fzzn8WV>`D_&m^JLmbOQi4hZPky_UJfLssrhWT z(QKNXP}}%0HfR{`1&Gg43TUtLVxRGD7Mxo4!D2aCHJNn_4*6faopM)P4!9g}IZ(tLC^a?F`<;5KP+R($1s6*#)goLv zG6xn-SucrUz4x*`!L5m&Eo%a@!>x%P5H^;oM#@V~^fIY|l&W8v+gXQO(KxJ0%J;0exHzd^y}HE4#u_sfrYhb&KAe-BoNR;+LSy^(?dFr7p6+cXH8s^M zOlsAtCGFd{m-_YVd-s?16<2`xJmGc!?vVLEu02eEY}KSbGSd+tR7DI4&wB*)nt9t~ z>GVz63J{=89zN-7c`sq8fkE-P^s zVZ(+}vt~`qcGHKEzW8Uh}p@VmOSzal20b*y` zg*Aw+_;ZU){cwpy#)QdA&9P{DY)z9YXYE?$P4*Z@vzwMOhBBa z;l-0Rl$cQ8qfcp|cw23~IUcVL`AM#Ox3^4F{4HM`Eoa+-TDQEn&mvImy6w0eC`u0C z6rVeHuKfG&zv9#q=mWkA%uHwp!DRC0lQ;>$P6c`D%n2$GQoVbJ~hVxCX$N+opAn9vTorHzc^avfB^rq zytmIHz|FhOxg02T4xkl${q@%}Z{EBt!lQBH#&Y)AXUl1)ohE1kd_A1s)Hi*0(KZ2a!4jqhonZM ziZbZFW6ddOL)-t`Yv4-m`{^v^}!{ z|L(uQZ~z~`&Ye3=XezdPG)=2*2IdOI-wOA`eQw#ZCCj}LYD=UPJ`J4beDX?YUAuNQS`I?vzyCbQ6ZlZR`Q{tx*s-IWaKZ^$1dP9SO53(#A2E19 z2oVM`e&j?o1ezF(!xaqCkiuc{qq75jYI2%HM2DF{++Np-H>JO!EI&+pj9IGoV)3>1 z?@5s*3O`b-DFeLcS^z6r4Ts~S)QknKn6*WbcE6Pi9z9-8x}vq9(Qq9nfur@-&|l?( z$4@XgT6`}o6BB%g@@fxu_Sao685~%=c(FYH{PXg^|NYMhkCM@=!s!KL#o)n%rCqyr z-V2bxhbA&(#tiSaK(637IqR&mWccvmMx*g|2c_UKdU8Ar&*lL{${1A1p+LIrMBwZ`0)1W^0`90 zgTXaLp-8Af!nHsGj;kFli>GZcaPDAawd>QsoN9a{in{9$N2_tC8n*d>ElHk&+ytqB zv9!n=VxYSF?z`pHS6}r`c!(^jQl*MS>ufwkXVJmTQhaABMc&iET#zw?f!Y~5i^=G> z-+nVmfkz*G=snE$6?OKPp&xj=KPgGJshJE-=Z!bskhkA{Tkg5%9vLxWgx`r>e0;nl z={Y#s$v1&OB>4RE&*jWB&y-4)vRV>AgZTaT-{r&;Pt13Bi{r!+AwXaz0%Z5@-6ota z(2R&uJShq@{rb)#>2gj}Ip=|6B|Iuru6_GtnflREBTV|=)R?9@{H}nF4cb;U=(*0m1G#d?`2OG8iH)TjlrbjBtb{7g43dbQ^;94%-2fm+|48A;;% zqYhO2Zaam?0W^1#;x51Za+yATy3yu)_wFryPC3Pxa0>6r#n&94zFPm1B}?SPk3KSHsYf1p#6)Ag`R1E`FG)KHGD3i7 z&z^0LVT>C)cC6825bh8rxQP=dn(?o5=gvZ+Vrijb$HIb9J$m#o3?B3T`vL>~QGT(o zC?Z8AKSFQI8|4-Y>z7}Dm78w5Nf>x0OqfvK*i`8ETNqeaKmGKR(KaSeo-CMc$_tTe zapR3QO2vv5%`GE9nbpsnIWyoaK+EZ=W7Ci!L!@onwprRoTexM|Fr#IR9z9yFyY9Ly z`+c#nR;*YdJTq(30wM)d7#hvgsZ+g<|1-`wL(pDI3zY~Bq@~UrkkEv~WhX%L%TVfg z@Np@74lGo@Lqw6dRI)k1Y!$mdrW#8)cd@-Q<7EczJ8QPbmG&h`e?(JRQ!8d424ze| zfiPL&8%pELURmysh!dasSc{E5dOH$ntW~6(A+yf%x=+1hrA4Mw!{F(^N&^!xVjXe9Af@EN{K_RuI zLgebJt}@&je#QU*KmbWZK~!1}(V=xU4EUB?ZV|?(Yt=->`1G)98;?KsnD?>+RUjZh ze*E#rBMJ~sLbLHFIBbJxAf<^)gaF|Oa_zO($`emKA&;qA_{ql~8+F_tBVWJbXX60( zx6j>{l>>hna#l?Y-^*y-Zh8v8JgIO~(ZOj`)R~d7a zhQF6pbCu;e_t&^d6R%^L!FRnT=AEMcy8IGaw~4M&EdXIebk#rq{3FYkFPA&-x=X$p zKVF`F<{59K6v>FMX>{AU=bdMq=Yuc3)SF?B`5i*g=S28EFgp@vof9P_BotaYLr&Mt z@4gZtKZ?t20|pF`o78cP38cKyVvTy*uwjGuw#oS*!Yt6l08j4Bv+58hf8$EkL_YfP zL$A52i3+g`F1SG6fB$_mPTBDZ%L&B5pB+(egeWT;&YRtFc00!rEKnM##0U^V*LXOm zYUROkY9nOkasU1Ido__KRgJg#3Z;RuC8>`UePYhxb>8+Qg5fq?#6hS$I@!xC~M_Ux@^I0pfuv9OR^@y z59uK_!4)_010}qXB(phl=E!?m6aAcX&y^vXaE9=}u1i!8KyVOo?LblI^dc1ZJ(Y*+ z6FQ1e$BBER`ajAe;pg~QL(7@aKumOs64e%G`e|wj;a&Jfts$0}6xw843*&Alia9yP z?fINbuGOXLLm_0IXY7wkQzTZ2F=06wi6%lc*0aw(E3d!)xT#? zk16W!gy#{T{_VHl7Wr%*p}`NC7ti8b`QAS{12|@fX%du;r$=}otf3z?q_=P1zTTOY z@A>Zc!sP&UEO>TAA_4|j9*J;S!j>W4?Y3>&6#4IwcYx5{*&1QxBpbr(jEHmY&&@i5 zgphf1-k99+?l}bzK9G`Z#cJ;C{qjJhjEfM@mlT3xi7ElWYdFvMJ@?~WZ}YvMv$QSFveL2 zeO}uw%HvC{0I^I!eZ2@XmVTUsgl47>vyt9>FK+tu(@)LJ-u8p6Q|z8aMQCBn*s#mu z6wj?zMj1}X;`J1^5aHzmw~1&MuEn6ivt)a5KSy{5|3D^Y;GzIguU@^RQ>RWv@))gY zBsLSOqew1VLN3CyU!-BK_BX&xy}srr6$e?N1xBBftl7z_F*x_U0{2;CX z$qzSkc@8f+FL_CUAV-h^qwORxa=!omd*`MSyIxd6gorQleVnt*63Y}v#}FV7J@ina zCtD&&p(91bVdii;TVr$(BJr&>6O#!mKzY9bN6hUcDA4z9$CaYAa1e>wvu95uNPGpC z^Zs~$FyG&P`|aM%gc#%S$C_}l`iwyg#^PWJOAcYm)>O@QtPunayz|aG<}iFOnAI%L zDZm9t=Vg~&W}7)|S>iR2!+KmoHvnZoPKA>Ix?`4&Pp33cVG0pUj@Mjsjd2jeiv|rE zm|0f?o#8Y1F<^!S7r!sd_>feiP0{{>__|sX#C}%uDJBM9I3Uv>eDJ|Sdz?%}Ab61z zp}{@>!VBiNg})Hw*iiKczV_N{C1z5UQ~!PUao;N)$#|IXoTgRZ;|u${)ieTOs>*9$ zx$m6rop4~^2?nNpf@T=}8Ha$Iw*mxnI^UgUb{{f|HX zkbwgSn$!yR`y*F?_~S|*x4~>?1^$SHqtc><(Eq{`BIGp?UBrt)eoz0?PtP_A#UCGw za~nc~@bVX5dda|wFjKtkw%ZD~OF9tbL=3hsYk$a2XW+O?`$z=ibPb3&IWw{@r}InN zJ{;#(`-+T+@LQ~@DEn*$h&87b<+TfXEpk~1j6o1!1_OJmTeoh4AK4ND1?_@}%rRre z$Q^gwVRmuf7F&%#0`gJNTCx)$IZt!!)!b!{9Pp=o*b}T4V!xM$B|^lBifnx%2ugp1 zc41$BfBPxvNoF-sUmxl~JV3F9>#5_a|T)A)S;VX>cg{27IDc}gLU&_0-)AbheTSAb;4nOttqN&B!(Sl9#my)-Oy zD?|_;3}mRy+0o;QZw4{TDav^M?D~`6?ab_X;TYtfeDcZsHWQHZ&N=5CGbYL^0&Vzw(;AsPU(tj&crTX5JDh1%{2vSt0bPw_b8WJuKVG;3 z#2;7kuq`L;!!}bIwGjG#Bq3sj2hQB=oZ|E6&o?Nh}8dD_3CBnpxFCE7?3CDL^1E!v7-^9cPTg|lOCtWkNFBHBh0Wk*RN9CFSbEwFKpv; z;e{7^K`iaHe3z~d#Fry^_?{c>!!}pid?B{q+4)5H5d&duBY~Ldpc?x+_kg;FPEfn7 z?>>8t(zIlD+(N-88K{8w#@<{U^npgR5#~z9G-U18`RNO0iUsOaU8R~#W6giy9`ieh z=k@ydKCIy4er%c7ytIShSgl$NrdHclUd}IV$Ko6Zy&RZ%0(8df%`Zw-vjs@Xfuf=oBuqG* zV2OE6o-A*I!#-#HLuih_{q~!wMFMu0%{L=qn4k{`0YaT0Oq)!0iO}PnwZSa5J04THIeZHM3P2cmPec1bYOhF94`|dmYwNx#Ag@{vl*c-u&5}R%d4&te5gV#&_c6J#lQp!t^R$KugdYQtS_m_tFJZ2N{O$8S*7?221xWYq-JRDjwd<(( zQ>IMG(tqsIcAzCHh9Wg(i71{!{5S)QKk{eBvVtaD6Q7)vu2HJHqr#n=O5}Q#q?E(@ zWh0&Jy&ZNyA#T7#LmmK5Q9JA8ayIcTke4n@LBf5kchvc4sChw0B3E!6i9oCtwcyJ|uW-ObC<#gh4Y-&ibET~h7 z)e=oTRvccV+CFDHdoIcwfd6C}lNoPo*>nA7YNohO%dQp%6^0-o>zUsa#B4gJcf}PP zh4EAa(`Jzi5CFt{2a~|uO37=5nIdK#V%*904TRN^Nut84mA_U7Kv)8^t@Dj z@EZXu0Kl7dd==&0TH#?G<~d#4*NkPInnmT5&8csJ+GkVz?7ao5-Q2W;_S0UGdehbC zM2!hc)w41G0Lq6pY-zbBjveL4gkksRv<##mv7nJ$50i_(F~JoaIX#V%v9HJl2uUhs zk-+c)irW(3eeIKWUqO~+hT>;9`skxgp)JfxzUaCIY2Wu;%rxYA8_>PW0Liom|;T1@(P8Yf!*f8koUE$rT(olJreoDXX@0crfJlFV!pd}?P@Yc3FQkUG2;c` z4b5Ve!e``E`o8p`?dE3cR` zxi;+A-?4e!=1-9DJ0bS~?K(*X1H^B_`JMCZ`%z8Dfd?LV!0bnG5ZzW@!g2G;?~-_Y zz6A(_7|5#cYqsMn`r{-oAUA~ngVgz)(&KZ}j=y6NW(;x+>@1lLGjk@KGP8A5 zdTSQq1#F`cEKW7wU)y#Y*fCV@BL{*Xx|4V*tb>*9!C{dSUA2xxRjn%#u~j8Jwu*#T zs3IYe6(l4)QiAmp6cplVLi?VPo+{}n`z7swev@3MEsKt57ZEz;K1cFWY5Ewie z&_RzR5-LB$z)Nn*shXJOJBue?{imOPGBuJ>zx`8?#MEvg73(&W*g8j)GxGgP=!bGuFaCTd8O>zv_kf6Tqem0Sqt7! zZkad+xX9Mwqx}tlaZ6zefUP0@l$x@4jo0jYnk0{@eC&^ds;$ zE4nJRMUKxzgsxnO12lpJa~jJgS~9s^yLP5K2oC&u5dJ?K+Rb)MSrk<=?}$0V{Z~*9 zfE>-UFAB=F0HGEFh@>Q@fIW|0^W~RcHs6b)qrTU`$MCt%z{ZXpn``^LZyV0MU_hI!NVsExQ?+nd{&X36?Sz*k2;t%R zYGnpeBMTk+s%K`lWduNQh|y-Es??Qgtuzr+_Z`@`Lw2v3@A<4|4QGF4XTL`(WJ7XAmG69b2147_TD$M9pOUXnE3G<<#>|fn52<|P#EFJMRDIH zC9guS6FncRI1lp zVr#`qY|Taz9up^_5z+dIQtMB;nuii4b^kuuzjL$f*}O{jZeA^WH?7jaIKz86;5Yc# zAG&u3E5V^sz4>ucvrTuY+~{ac;tI<$dontM3Jd4J@0nd8MpQI7_MwS*NVf-jc5_H@ zP_SfV_6NaX+RT(H=5$WtyMBp?sVddm^wQ6XlCJl*Yvml-vFs1ow|)sn8DcQM@4oxw zU7f8C9XizH5ClpivDW|&fmsh9go&MH+Wqb_FdV=Ka*1LaO;I1%fBIbUaZnFrnGVAK z2$Sjys*NIG&;r`1xu~E{bbr^e+qfR4QzjR_rY#~c(KvXAo%GpfpBW*@Z!}P5S7-a> zX`K*ngfbI;0w@ZbaxFlxYxAIQy6Gk}I1mZ4Wy_W#)j$?3SYQ}KUz`*kQ(5Y?KT&G7 z>>_auTk9^*!0c3u?YdoZzFkjE+AG_Z{V7`)&XDcP=dg#}S$^o4qw03*C$-w0q<)Nw zX1_-{$Q;>q3+;I{4Bh4s6h;V`O;!uxbnRo1=UnsOv#O`*cE_=vBRxXI(~dom3(|~U zQJ1WSsGzLYs=HKe+0B@?w$7g{JC{tCjMQYBG1>tRPT((C63JB%wpCtzh>t|KXbqU>8j={O2tyVD?UCx@SSqBSX(pW5d==Mg2cJaxz0ThDTZ)# zDgPWm`>-g{-+ue8{H^f*g{_6;T7WPhkf_858ygzH=~@(sK?teWs8OR_-8qh4APc~& zw|3FooCxj|c75mUr9!p3(x}@Ysn_u&RSY7`0B(f>Ln>pjlg!Yo|JZI#OU&-WqGMD5 zog}qYI3(@ZB>&AAC!7A6ECV|MAe5dV#~;Jm1_ z51fT6ZYU5CpQjV<|DxsquKeEkNw;F@R&H0Duc= zhYte#Wp3#=1pphMvm(yRsn)J-4?8L0ibQr;!W`A?l*^>y@rMc%nc2S`Tp5KAW{$Xe zl4p!XH&Du)^#=tr*s_z2kVoJ|SFJ5A&$vMv^*CR4s~MAG1&GrJVyoO8i9@arh60L)9SDSyppnkU2cfeVe7evqr6DFAIZn+U243@-*JB3GO^>6eX{-!kUc9sE= zW6(CkFoU(B%z@NouK+=+4aM^-{XSvCn(99>6hH0D1c1IM6o0>AsCS`tNMp_KBsvKlXDz4*xn`&XHCZ zJR-3T+IkOV#&Yw`H=9swgtbehg99ryk&qcCa3(V~M3(o;ACcw#^UpuaZY>-|(V&Wj z7=^|4ojwqM{qeUA(>Swd`sh;rIN+;&*jD(B6ei_L17RR%z`sUHWZ?FS84FS93i!Vb5o$w?-M!ZSPfjMuE2 zF_>lMV4S(dF}gF*vfmzHM{N$#bTX3ja(7thh2e(8GVb-4n>`9Y!K((t6qC1KUyvT4>hb-a6k z5edyEM43V}m=-ZX0H@+o$>hLzwQ)b70WGg8ME5YQEO&#}oG1Fmm(VtdmPGTYuYiSA zE-Xy)=L0#?^;{o_n9{@z^!WgLl-DKxBU|;;m)B?^O#X2j}MYPIPmN2~C_9At!&*QBH3 z#4*hdjf~cT!;R$dGATl(a$rI$mp~LmQlI7};J=xhM?cVa* zpFsl$%14@?L~Rl@$GfxysXrp&kQ^tKS2r_fq*B2;dqF%(^;mUOz{4mDc^dxZzO2QyJ$4X4W` z_elIPIk(NJE@jVwZL9v3AD+5Ku(1<1jo+&z(e~B<$+cgOkyd?9luxd|U2gd1V`ToAPASwzxcvLMzz<>aFS#Tlk)2WVFq1Z^FWA>LqqUqh9W=-zcbbWCgFAK)-@Ua zw(bx@$C=hJX3Q8FGGvHxHxMaXP$Kvur)audJJCr(!yiGmC@|8PIOr%iSXSHc?JSx} zlO|2%7PZk522AKE=K?;M9TAZM`m3dm=igx*zzB_ColSaeacCjVk3GR%2806$X9T`A zfdr({05npwc~faW;8Y3ML9>{`E3W_<9OH0;z) zYPW1AzrX*9?A^9Q+V<-mfF6{LCV(+vM){5oJ_A)70bJ1^C8{80A{V9ns-a~8vNLu0 zJ2yYutPg~DJ-37pgn`P=m~(qxSvd}r2(tEl31QHvQ6m#2$%GF7$T*E&J^%dkP2SAq zmtSs@1&?Gc@SSM;UWffmK&TMO#KcK}#O7qm&Xop2gTO2k#eu^diHB4|3T9^;;~WGF zodR?b$%znRR-02*LLxFO)oRmKsx@inaXqWN(EBum5W${}P+=QOmjcfLv_Q1Zr=Nb> zz@RWF_~L{G{;D~*7!e(94%o7Mm5jURVYzGCL}_$vCwc5xj}NX=?V1wbxubml_-N^V z)unPEG0CjAII+7#Rq`Z&=6r?Hv3I9V&Q4J*x@hG`CxxsSlg(`NcnwwoOp#ga`#K;2 z0E+Xyu*mg+TQKWP^T z_2I*Z8#p81-XWyO%|k$e6u|SN=%3Ywj--L)T7cj{_myOL_#v`j8!LF4FQLxj13~DJ zYrsu#GtNRd&U46E<;E>FKgm<4*CQa%J`fn5NF(z_4^&ZN4|kUV<$w{-rbX0fuyxQt zv8gj-D(so;k!~fXYMeyI#>f}9-KS3SP}#h6g`}vd>eM@LQ8QLQqnRAl{RHD1`T6I$n5=cBottI>E^6L2|L zy5fo}Wcu{!#&p;@)3nL+J6R_tWU-S&ga*PmUmza{7euOAM>s!VVA(Bs%}RDd<`Rd_ z7ixdC(hT7MW6)rbY`%m;h!+H^s90828n#sNpaQ}nBkZctLaa*S+mQRcz#NFIR$t

9#>x`jk|P~CdYR+_-Dg^{8}1z^dQ3?;Swc&HG1rz*5ie7N(;K2q&7POI^d?2~jK%DqCZ)u$YRIHqc zf##QAelgMuwaU`u$&*dT?04r9SawUb)qG`y1^*6BhK&!zN^2uT>!|V^PXMpVOiF_8%ya50W1VI0HDaMkdS4_A4nn-x1ZF^fpZ;G7hs~tDiCU6lJ$Y) z&_LB$tJ)y%0;VcV6~0tUBd_irF~T@Ud+Fdo z5-*du!Xke@kWkf<$byg6sIj%{)*V(Bxx8axIW-VUFMyhy9pOe`V8=hQRYRXEsAkzj z$Jvf5bkG*Eqwm$9@uQ|)5J&;ACP*SV5fv@*p9IVYLUl1~I%3x8j0^ypq;u!ahxLbn z1!oSnwjpZn$Z3{J+n7cyMQHg~BQ*G@`+Ujv#do{=J+K_$nK^kK78NUv`rRb!REQ+3 zoNX?K(~rR`yWohx_Q-7w4&XE;eA6a;)zd-8p8%Pl!4HIp5^hgeHjpQUDL|Y)5Gyzs z5ZMk98N~RpO8uY-2_8)rbGMrZ;>iOckcjfW{s`eg-~#8nqe8@sZfmz0)0PStrpGQ78V4-@tt`i$ zTK~k$6)GfI_O8`rJte?3nM4$KbXHDGSoHI6vTp8t>3he`IUQEueGHH|Htp}dzzzCo z0Q@7OXmT=PaavQ2&YGw}4-{WxAj!!g>gO_f-h>ZiqzWuF4yWL7HiX@sj};`Y59Cl3 z7Qsq;{D4X_HlC<(QA`@}Df`QrS*A7bDKX9m?Sbel+M{GJXatPY_&z8|b|iXB1rm|B z0np*}&{8$)V7<~Mdt#oe70B$_9-i473s(|?T(oLJj#N|gjF*}R!-}e6& zYn8*j3aycM9{KC$fuudM?w@Iz&v1%@8ir>%WZs0g3=fY!{mLxci{^`R+zW>vte}in z%F?eMkyI_Ki}px4V4Uj(C7iuvoL8(_!-VSE_JA-++_}p*&Jk+7RB1Z#2+y?ZlW|TI z7CA!pla#PqD%GlK9LQ^C&y~4fPmq&uxz=D}k*&US-9{mIB&uTIfFmiiMzSm~tj!p< z_YkNhV3!OAIMkUwwQSkauNl#nPS~O$p6k_olpW+rAo_*IQB%W9L-ZYw)C_5>b!6AJ z6(mn-Fo07d$?o{Fy4~pmVbVc3t}ij5$H&JDk*e7de&^mAf{R7qT!r|@N9iQY&A>#R zcG_vq&4;ZcfanLV2Q-+RCLO_togLr}Bp>o4CvzP*l-FN>-7l^d1A=qLmZF2Gg;6)c zL-%`3T3I{DdBv@NteO3jEdKdW0%X=6=KWjJ&aEzu2#!(&e-e^9~ zFq((qu08?y>|iweg2N)EZr2OEEmK0%nkBp&^SH8N#w@v0eJxM*JY8iG+()qZn4LiT4T*wmm^+e)PG?0eR$@Uh+WeE;2&v)xUqm zjj~|UF9zad{rrXUT;D;m@P}XIj(V--`BMk!vX%Vx8#IX!Sd%~#f}>HRM#idu@IOn#if_sEiG8Z( zsUGTkc}-D+A%EXF9{*=cO@=GU8f>8XX4}IDVl&YRw+7b4NmzcUpM@QnD~a$LYN(eu-BULtSZ5D>ZB@k<}VV^7gf6ki=YBkb*^@4;!J+;B2|zxyNP78}G^AU;j^P zDSXM~SKgL;X8t5~+q9CG&%8*!zV8v~e&wa6KmPtDtG4pczkkTGsWarYa|X+~&ps;s z@4rLljQ!StD}H>#?NXy@Be`+>r?Tjm-z0hOK7Y*=cpL8k0|JN)*#9x{_`|Hq>xFo+ zpMK^U;~ej!NrDIn{EWWDfKX2}l)GgHvSgQ-=6halzWeiW*z<7eLfG^XVnAi&^qok) z?={Jd@KtNtD(GayT(A`Ub7~-Ht*h0M?j-mDADM(Z3GIM^-saDN5`o#umk9aBpKD(PR*)ab2U1`S1OQL)#gaL}VZo5FQZHwSWKqULj#N zI{rDcX?Lw#VuS~~&Cq40zs!O?0>un$j&qN+I{objB&J#|IsLvdS}ZEg49Z)U&()dd z3(`pk^OJ6Q-Ux_=KYXZz@Lt)o`9E)yyH)t4YTb;)ge|gZ@oedO`2#v&_mp{Gzb3U? zAE$yx>yT^GZ*t+gJW8t7lG{6b{|v`3C>3VDV^(K~l9vH}9FY;Fxzq9##-a8x|~9%&L>k zSv`kdE;mp3OxFLqNCjxT)Nj|;wu4d=y~ns2Il?l zMn}uUCq_$HRAi3r6?7-}hG~mzU~-}Sy)SvW1~3-*8t{cMgQcjHKkBAV)mkdF;^X7Z zhYaR|l2hYoq6idDQx#^EDH(%hmE7b6ZZ|ImW`|VA?{EAcX8dW=<}F|LjDH z_aeXK)Ibm*OthJJSVDLZ!R93P;|L!&Zk!3zM!0a?|Jgea_$aEafgd3BUZjZ_q=|}M zQEaFnpjg4K53J9J`q{8y!}mlK3l>!D*s!9AVnIZ@ii%iJDIzLOM0&5^`QOdi+$_7v z?q)Yx$ejEpGdpwV&Ye3mrx(1lcpj|D8i8FB;_-xf!(Nk*rzA}N6RI8J`?WczzqCsr zpMUF#bgYZq!2U|N7-?Y0(b%UmlD@Ss@grKSRPt z?*3?f`TN$ZeFCZB5as5p)NUw8U3QP-pnv>eu++)kQ+C-mMBsL7(@Bx|6sgko1po9r z+o+IgG;Au3_Bld+R_P&25j>trS>-E8r5ZcsY0HnDRjpF3Zeyv{cz2gsxK^X)8e!kk z5r7JntIK|;Uak_)7mko@T)Rr^mT!>C^>%grFxJ&e7rN~O&$!C?`Q-k~lz6q4?1MY1 z?1!F#yO{ptIPp3BRaI4xf!Vz5cOq*(!fN&Fy0j#Fc05)SBEBlqRQ1&6oYQ5g+A3E5 zy;!R5R7ZZF@|{YLJL`N`7t+tOpR^{c|5+NoT&xcoVgmfBnR?q_(^Dm}b~60Vd*!X` zZ~O8+nWmpE=@NfV+zo6BT)0 z3I`j@^^>tYq=ng@Q70IJ4qK}_`DS(NO$PFlIIKUA7evi~07e#rf__^&)@CEgWQl}W zH^4HKyB`Vu^Cb{Guwb-Nf0|bv5-YjScy~sBsxNDzd$cBOhZRIKdUqf!DnZdQXnNXnp;RYQ!E~SetZ=2H&bQ)L5k=RBDY{AMOnB7~_}B`PmW6 z{kvTum1@>iHP!_A;>EsFqd^m?(2mA!^7^&6$h488<=07*<*Y~Uk-a*%cbRqjwLMPNPP@tMFTR$` z-+5X0?du52L!G)Q0WaqalzK~I|=9I|6HqteGhp>)4}iA@ff#{WQ(z2ts3WN zctNh@eY4zee)fb|=SSP`6B529|HF;eK5`!})8*7q{goP3nQPoSqXAV4qg5)5);89@ zcnC+Vb5pTDlL-l)ra&~WsCe()j|1VHwr$%A5$Q%iz_Un>Ab8w#0SHYs^{_%^6?ttu zuY6qUPKfdEAx+r$hI+-4Op$r^f#6ddW6&^>ae`7Fs9=3CjOOo)x{Vtk9yVmwWq=?F zA{ifj^wF3EfsU9A&VlP<5^MsZU4-LEA^0_`-a+Q5T4wh=y7~}OgD@dFI)Y-@ELw*- z`?3_d;n6gC$o-0Rj+L3?hsnOG%Gr6Zw({Nk&&s$bdrRfo4P^G`AIeexxyMNh5SuSw z>8}JP426?F|k%`?y8Ryowek=M{pod ze&Km8kJbEXKTG$gA8{U1bV@#}8Dl33URCTTtqJg7wO3&fZ`68EWdK%6?Z%Cy#Sv{) z`U>gXAsW|yGEAy!(W!$~!l>1F7v;oDt1anZx6J?P7dhsNOQrddZC!sw{SQI{Q)Y-L zk?T_&iX~l8+=L_YzA}N3M5?Sz6$wsE7JLbWKB)9c4nd#uKj-YTrB9zeuKFZxsIf$n zT4F(9CJs=QU*}Gp@<@NQ%MWhTbW|AU!Q~LCSb&s;lmam!Q?E}*;wB&jA)ymPw8E~z zHZ{vnIpq|g%3!SE7rYD5uc3>ZHEWiWT*&MiNi}WoxE(qIqB~}ec|)2V3DV0^w%{kD<)XQA*hPKR<@O*&kfqhI`l2Hon>41O{k4xty=I}L zd+o0oD09`7w%K7_q+auVq8Xme?%YGU@?BI#_Mo~^0kSa*$nW=V!n}+U(J^@|l5b&*AL!vxK3nR{R ze#1Rlx`k~6q_7i1-DSSkT$&_KvjWzJZfH3g%os7Q5?@S!V!7T#Ysr)IhiWy zcg=JQBqBkgdh)Z5!$iQ@X(H?GSSe@uO2imrBGUVtlkJgK(ncT|pcp zwJ;L`fnai?wP&!Ml!`v7Nxm?^!;#PU>^0e=V-JV2LjS;78Ux@cNk|g0N%n6xa=VOj z-7SvP5~mS>onh}T+s0IY+*y3^kuC^)&*RQ>hbvU6E_-$A?l*PYyS3>U-q)~IoA7q6 zMlFN|*i9u2mPo)JV_?_eH=bJt)s#@|d*UVbGrXoMV`OE<`nw+}EOCPg8r}(z4F?bn z>eA;B^sUOFY*KZZff?cK_x8Rfe^g6P?fROg5J7bd0Xg*0L$|p-n5_H=h!GAmX!{?k zz01bwZqs?rj<2<{=R{4#y;yA^4Hfx7$UrD|eysEI+n?!#BXaENBqXus-@Kd*5029G zAb=tD$T3Fy4TEe>1<{VP5Bv(ljST{TDZ5|m@^n+;fJP@4+@Vs+W+g~x6_7p(=ASh0 zBuz{dd_ocz0U^x*DM@V7{0lZ|(j?}`!ekSK0`UNNxwEsg!!CV(KemlK;ze4xh;+ul z&m&)!#{0LIDmy7BujhxjUx&ML7-9I}Fluw>1piVy13Wk8nAry$?aukdBU!!p&k#Y; zV|db}(pA)5a0mGZVFl6n-iDF47A63pNsc`1Tw+l@kY^5qcsfU`@?wdhCIJij$N)CErJSZ@Q9fYVzI3^x`Bp{{W)rH+;tClJK z;)Vao(N_<2Y9eck3M)i7a@1LNAXukt2J8j3kBN>Qsdkh^|2ZV5In3rv4PVpGkrv)r z!Z%>PP*I>KXFESIB(f>TyC0sL+U;O!Q&0pJrXHuC-PEZyTbc4Kc*n^`nca(U?s;$I2&0Lo!AZ{!T(Q92mTmH zLjukvG~#Tvi$nrWOGp*qYV66;VC>kj&c1^EIzJ%KS|kB6!om9n#6>`8NbLdCOBhiQ zml3e+h=QRABmeyXn4@F#t z$Q))%<_f_%1jfZ#;W~ex$6qWy#Ju6U|Cuo(DUs_hHb$P^Wr~}CV3VLK zA?{*CIkw9Qq{5(s&vC5Sj(7$cs|`epJVKOE1*U!Yykf7?((1VL9T7>y3oF(&g8i{) zAS`sy1Zg8IdW49`2929pFgYYy(uDfVJAx{NdnpWHmq-?}v$Nf)czHzw2nBAGclGU? zklsC7U%|fEGcYq}{_!|PO+`%=bBw^_ijv+)4$poPO5Cc8V=5)SSP}@=$DRQ}Aio^(9uyEGZ4KG4ed0l` zF|v5T(3@u%=SKo|jr?Nq{Pw(iNYVLaGx%=-%(h5#US5Zo8bFkP4_iY0+qpTu?G@r73u`0%5TKhC2z zo1@7WF{P`-aDux;1&Y{Uig;;+ro9d zdby5Zda-ZdMM}x$R2b)@jW9n6#M)B|!xB{5KGxt>@GA^IV-R8U7-Qg;@}rW`ZjO)Ro_p?bo?nF21M4-9)x;-n zl8QA##!52W{_*YTXr~AJ;+%mEt5@Wc$#v7E94A7 zJe&wpN*4bzQT`nJvTWY4Hry$kurU(hoxw&*W&pQI?2F7X@lZrA^9^Bdge%5)OXWXj zlhW@ZfCOeh5igl6AQWpgt^+(WUkQjo&z?P<%EqKV+n=s=JQ7H>YZEI%%@l}8ynvS< zf#LB&$|MldmN4POWk9X{027Nej~o=nKq6xann6kd@$>R0>Vq8FFRo>{&tV%uy77T5 zwRKw_EOj+)2RWehJ&6LkGiN`mT<@a1`)L&Ge7B$PRyWvYGn8J%E*I*H{hWf_7L7;!!D}Zw%?ua_UmY@&+yUku28>r631QJyk1sWnpcaPTB}j!e~cR< zE9d?kz74owT&z)}Bn#y>ZIJ_hUy|(UCE`2`>I8%lZ$(DbnQA;oWf4H?X8__%u{%^j zq{=T!NLB*DPSaL#p<^QA9B@q~APc?H1O=UzG6@8IEXHr5CP{|KZzPH)O`5oRAJN>x zKxl;DL(u_(2pLc+bhS`YT#NCyCsqWh2H3QIoy`A!oGkq5b7{EGQIg%}cuiEO3G*DX zcTFG*x)DOMhO`ZzhLNzg$Z4hcH7_CAN**XZKXk60nTw#qedZwXRe%2}|IGSUYB$|i zDr^2KI}YN`X%ZTf>x$9kTsh~UyFNqDxs;vn+F1u22eb`@%-yb?Zzpn?eL4NG4qYo} zzw02h4dIym(I5>H{32&>Xhq=J-FM&ZMEpDl68wSxT;+|q2^bSRy~H#Jv7!5Bp`JCt zYp9b+6NDS5dp9EAff6%2$=xUj7Zo6Qh^TVnm$I|7!*-_RdQJYUWF-(+IWQ+pRJ>Qm zo=i+jypR$J1l1K1#DD<hE==lye?r zhV1S9@KtjL6YALZf-vklj7q(1L*OA18*O-;Ib$?p;ZDdo&v(-$DdN&!zmY{hd?qXY zoR-sZXvIed0|7GtQVXEyX8@N<3r;Nhs|Qz1*9;^pfxyfW)SU<{SNGriQnyF;l@bXA zqzuxqsB;sUh!!WSF4=YMm<@nvM|8MwOMcNQf?-na3I{iV_(`f2%7v0_485_zy5)=I z*YR(=rHrPAudP-2YAGS9*|?R~4R0#tv@&2AE9*(%)-Z7aF?&RUcJBC=9CWO|DG$49 z;T%~$Z>B7r^P?7VnkE}oEw#(%Swl=_vV8o!RMU*Sb($R@wVLfORU7PBui#}B}->bkkuQb}$3M3>1C_l9fQfy%_p$*1%G8t|(|Uc)>=?p%v@Alt>`hGW=XV zsIiQQ;1(9m*o0vR1K?wiJ(eeBFo+#5u0(V8#JWL$Ka<$LeS5bc{k*HeKqE(vbcQBt zw^@<>qF=spi|s=asI2udD%Y$p<*V0Hf>J}v)M{nItO`zAfm9Itut^Qbo7SyW;;>9T zl9o8*{p!W@WZkm-jZzpq$q7bOy`_VODy`5QUo5l743Vr#)wD!y3#qzubE)2_rIS>$ zDpYpV0O`n4EeMOF1Q3$YHlzf|B@m(Cp&dqWa!%LtH)s&%ss+Ew%7wG^+P}!k`7>2q z+g6oC|9H;f-C+^1IXwrfBAx-h7jhf<9#>}=PZQW1D&qEZ6DH@=;f zec+5lb9dnhimDIigG_~FB@l86;=X=Q{|Ep8KmbWZK~#-B*e^a6>ppQ&#%q>%mUtm0 z5(qC6(U7R3FrLEk0|eY)#Euo$!2kt8`o0TJPMLIiw2sI5GAU2LlJs%MT);t_tclI02gzu1P(O#WTNl zi;V%IQn{9XR9ds7g1Rb~tyEo!TQ#@NDyM2BjSVPOs*J03joo95+J81{WH>}}lS&fn zS1b|IjjY#Y-SWTHAU;YC?52~;v7^UMP%{u$tX!Tp^qs;F!M?0ygd5C_gF zxcTB#Vt|U7-*`as&XLi9cmV^x<7lOB$u>*2*mmvO$(vg06ab0v9^19q2SO(EYdiq) zV~luY8c_m?Q7Z^71&9rY0mf+c+Hv`38zgB!qE5?(Vf*r1699)35B>Dez{_|6H7$X$ zMfLtCD}iu7F!Rh2-~ajfxYINDXQ{_5DgR?J4$-Pz8F%R z#Ik3=b#NZG2_6Up2&B%-Qd9sDBeNw7fI1HK8a5j8!cep&(G=K`V>h3CK2RhLRCFYs zG(v0bAtKm}SRkyVXTceP1%p^5Kvb5qsHBiiV5}rTK5=)EP(yFeV@h45kJ=TU5IR!P{@Y?Id{A(xgo>Mi1#=v_`;30#XE$H94bv0_C?O z2oB={+!{gDgR`V z<9{^h(bw+qE)cm6GXcRSB}yWY5($J0GC)$2Aaz&)_E_~iY=9tUQZ4ZE;xPph=Nw+n zXxxvTm(DdQ30Sb10N4VUcnAyPE_t_DkQNZLSs(-~e2|FtyhPj=93U?+{Uz#IhSwT3 zQdk%RFD~jmTX-qBcB%mt0k$g$9Ul_OG4soHlj`dS{UtX8d_%a=_wV0-8!nu8iEj%@ z1;mHM0at2vF0RPCWGTKu(6OSbLFPEV&?kA7;NSrek)6+Pz6I38UAuO5wnwBRJjEb# z(EtYwhz0}%q>BqG$@;(%NIFzD1t*{>;9dcL%=`v1(iTEVoD8|2Ue)T5(Q-RM6c`f( z07CSh7JhV+{Ks=z7 zsFQdI_JYLIN4neaF(IlJmAD%1ei%cxz62~p9|L^+=zt+CEJ%ECRlM}9(JK>^!vbOf zAPy`zJn$hHk%Q2|anV20qH%ccq~PCnGXo$qq<50}MFNN!gbV=z@$=i%L9~UdwldNn zVi0&F5N85&$oC22MU`Vg!yrpLpYz~kVJdmIP@mB*zZjL%w5A2)f#V06V_!p^g@l2` z&U=YO!aW#%V2u7X@!o_85G9VJ0wF;9iUuG{a!5f4Axsbm@;ULXUS89 zy^-8Zk?#aCRv~=LP$=&l!Mdz#ZcGhPaHb9<`pYMk(md;EtYs z?m71XfYg%#u6v{+84v>z#r@QnY`N2-^#{UzP~X$-1uoFk;z$sY!~jo7Y?Lr)aNPOj zZ-TRCe*I^|KKevO%`v`F)K^FZu>h4BoGH#J=+{B)^vU@q75nF+#-cw|;UIGmI7A8V zpK~Dm5D2*bSimue5APajjVQ|sap3$|0Fir6-z%grjQ2<-j1xl1SO9eW5Mcw369kQ< zVg!x3v9cAZj)ekAr)uFIQq@;}#DnXkO#%U-;66tUbRHPo1fn1h5nW>d+y4xhq=7ge z=_bnE@c||R=$cVyz$KzDCj%-f4-9~Ou#@#;KTt*T-l_^2s$A?6_3nXS;5kH2E_0N? z0}q`w-b5|b-gB-Z91N8px+m44gFb0{sdCQ!)td=P39ly98TYD`48wy8@G9l#eb1vr z?j@aVNevV&mI}2M9QmL@gYvyR8UgMfgEIFCP_&JC_njz+7r=uOqJ^=XZ39$JWD4e6 zhGBphU=M)_0Le1fkr|pHY^ZA>bl6bfmN~~a_T&1`^G2FN?c!e(y9dvM^YL&*azX0B z%Mx`>tO#q=Y!=6cgk{7MhaMbSa1P)dHg8gl{;2492N^Hdf>1(CV324VKxhWiMnEv= z@S#HSc@fOWjJE|JY@)~|cwm_^0|gNc8zt&XKEy--qVO+8g4m-j)8v7qiHEyn|Gg?n zfFLREbHee*%WwJ-Pf{fHsM-avs|`?z_thwzE2>ifd(w@nH-}5!L-(Dni0TwYe%w>;P#r-CK-2~p zeF?htxoFhNAZQ;znsDZ%4T9+SfF=R;Do!6raTw+8c{7G+o|#BaAYmg~JYS!9&_9S9 z^$Yfq$eKa4#o+^DfqDtG7iuKl$>a5%hy<7lIB4z@S7FA<-0^NgJh9<0e#VPqiFvod z^zy!N58yEz17MIKjNmcOgQx_MMhS$;MU3SE`i3qZ9vou;+t&=hVe%11lQg2;2_Lt8 zkkO7u?t`fyyC`a>pA=cad_WlBh(UsIuK>UC;sMx)?i%EW;W!$gCx_$2x!`4O^Qm&1 z=;2Y9@P2|MKuoORM$z}9W|^Ss3ad{Zt>2>DXU-A+HJK9w-vPvk2geMFgRl*0v!A&o z4#Zban4X{GJ|L9GF`Kbswr?>z{0~-k6MNvT7BoGGAOAfb#0S~SM?gMcX zy=7qm#DEJRYO)}yT<&Zd!G>t_l1IxDklQ-`Ek2-FDuQUez@;SuNEvXzaJ4{3)knXj zDg+3I|FQZ=uY9?3uC2e%?ePBL%2`*(&iitX2HlbNp@V*83)B@e67anFMc+|WTL-I= z6utS52&k$2gEC%7+-sumYpihv`-2%s_Y9ba8P`GT%8$89#aPl#Ku}|t%U+@b#@!FY z7LrROFfsHEr_I9ebWc{~7&7fQ2$BVE8>Ec=0QC)lQ7m}8;4Op70}mVAFB2go@e+|l z)mNy+(9s+5U|vDw7*av}#1I(Zmg|C~^d51}08pcxk`o(ovc^IDqYMUB{7}EVsD|36 zO`E!Q7|D@zFs%1eTL=WBlM*RxD(L#r$>aWtE#wk4y1%Eyl6l)v2nch@`{1j;+{EPk zgE?bveU-C+JlM|s8G!NOy9e-6^QHH6j6UrIB$+|un-6m1Lk66FS`D746)@D*QjQwE zZ~z6*0521W16Ro@8ca1@y+~$huownLyk~gf%=lSwZ=q;l)RF{d#eziDR}i%SQ{){9 zR^!?C#KBGh_|d;0_I0Ay&pmXQWF3J-T4C7cgJHv4K0o4-sQN0NYnJpjcnkrk+OYc((1u45zag&Xtbn5fYCBTY zSgH~n1?+htof;sCpteJ*#8b_e-f4$t7v4>{P46fYnB9A@PTdS-6ahi)La-4j0^ouM z4jd@W3z?`abyKzD^mMGcLGGjWkgGJ#0_1`>6Mp-aDK*xeLOh7OFu<@5Li24RiK?%x zuV5W}1~BHc;6;QxEkIt>bnd5D+(Y!M}(ZNLwnC zi0c&i6VEDY7yu+a3y4R7Q(7@t8RaH!-Qg42u!5Dz3210x;;RheNOHAlMDSJ!Lg zuyNit7S>m5Ohcl;J^7MTqURTNdjjA9)Vii_M5;jDgnbK#0f+*C+R0aExt;nC)fYe{ zHSY8z8zl7!&(qXo9EVUrpos6FTtl?9B(w`4n1PHTAS5Y6=Spg3av5a`6vfAbrw?fz zaCMFbs3g*rKMXh4b5sfeh=YiP{D_COH6nr+mFeT{nCaGi{kt7Ybd5VV9%hlSpCYk&six)sv zciwrYbk&?klmIZK1QB{jb3Jua&YAsI@{}A|00@ry45w>43{*NGP6!V9ug*}F%mo)- zC|{`^0975X;dg8HRZ@cB+RU{`Z-PTXL108IQ`}vNn5UW_>MXp}aFAd@#fWr)Ere)& z;$HwH58fx*qZ&g+<-7JGSrW5FwL>Hk^4KA%lEyp|@aD4|Qp}xSn}eKWmS!dLyx{ka zF#{Q41HsJ{{Vm*Us&mZ1m5~4qB5=IADUt_?2BEE@gI@Z{3iYY_gf602Pvo6RFe`iR=7#j7E|@3ZllTYK=ypJAZMoTC{ zN2Nua2OKxhOJiF=n#8!=sqsUkH-WH`EN_5T!~9;I07%d{OrP_|Y8SwujOvYSydP>Z zLEeK~wQ?qz6Y>L7quIUX*ESW${-b%pOp*iPlOz)0ov?eq;)*K-Mu1{RJWKQcCzc9z z7KEJi9}qvrO_>GuqsFw9C!c=$sUsvLWF#g8B7?Mo+6-bDEsYI?+L#39-nWyHW6Dnf zl8U(q`pBID@PWmA*j{@#`OUNNnH=6Bm^^=4 zV_qpz1j54$2qwUE^pxxbDKR$?UQ?0KKiuwjx{qwGmP?BwIv`evWM@TipD5wreDYs4 zQ0^QK9o5^vwO@$w-7#V$+)d>CvwJD3SR{xXgb?6eHWytGH{&;8e`M+xj`3Zg=AsWX zhVx$Y9vktX9ovRnXpap9;sLOxFkB?yxn}{L%&9*f>@r0~WFyB_5`M!B;6>>#CLMGC zI79fyh*G!bo2vQA2nob=tEuKz#;AT52>>JnfGA?clT=r34wVR88JSr@%#4Q*mPlyS zB>cWki3PA;B?P!TR9vVB5cfgsNEnvbEwau;W&H?|8iK&vZ(t1<25=4pQ2<<~Z;JsTz*#1X2uw8+ zjeCtXUY?N=28b$)JvZBLoR@LG9A}I~v^!$1^8@ikiZrf_kBeu9=a~V7_#Rbdu^y+- z4gyZFWqwW^b}y-ZUu{g8xK1S4G!w_aBaR6I&$l0sbPvOjj?B^-A%UP00;=S2tEqa} zAX($q=o4gSqP<ydf!g3Z>l* zqKHUFJyKZWL4T%Jv%)>>D+o}>QCX04(66JiVLSlpEp+*PH5CcZ%m^EVo@9&=42TO- z<_eWSiFqN$0kz?Ss>b>}N34h7An738VEaJzL;Yv-Y>G8T-YL|re2{+Xm00bA{lN^R zYX*?`A~PY!x)vG+AAiMam+1bCARxw7me&bMB_Mqug@9}^#KK*|X;xKH9yJx)W;A_H z?Hs7F&?P%Rdlmlx5Cz;lfp@q*l4=}L5MTO4Q(b_tK#b>Wfg|czAno|c&k#B5t2EL} z)ngDFkhyu#*u0cc#0Q|RX{GfVu2G#nNDnUCCq~vj-n9`{BqaJHabjU454$clEeITe zX?T;x0~Gpcr;;7^59Z;KoU*0ZrH<>+!P&4-+u`o*12sb^V~fhKixNR=Dd*_??8+NDy;lcdEo2;)6q_CU?yGpV+gi`87RsbfPfei73;d` zl0LXzMyRk%3NSTAtcgf;Kl)&j;$5l*Vc_O5pkiMjUlN6dUQ#X;pGxTKE8h87S@E`DdOcC?ZhHc3TQ{i6 z5aT?~7}%fU1*9(vS|kai8J+EX^w>z6NR*|Ocm}Zj&j4B#ywE`K=~i{3H5jaMbjJMo zvxoG7eKsBHE3T6f5{LzvmDZaLpm=707Y!(k8Z6exk!~PRNDd%NVlI%baHU4NLJBLZ z(pwpoR%{%x`bzhH?w~(KnQpv%9)r0mhA=L;P9@CECsv0j6WFXjy(f7nh^BqOCjmB zeb{Vf1Oc(Z1nV7p2H=$X_wVoQAvhrr=!EwZ>K#0%P~(_1T3j=M%?29`#&uLpNJvC% zcT=eeNhyF}1~PF5qDdj?mOhw=5>G%VZHnpygL^apsex4DQLW%A+DH*jwi;!V1&m?X zqAX!vvi)T8c^JK^4#*NfFayC1q-F+uDWoW*4@R94ZoH|PzT|o#LEo2}5+D}bsNOad zV2TGusIHvI^uhosHXv#N1TzrKKrjP|%z#NDoKqCi2iGZ)5(w^?A8Ib33aX|crHQ3$ zAUo+Kb<5>JPz90r5mI|lPoYV6wsSwl&32I{?IU|>SFct#lj#E~1W1uW%n6a~#0(IO z_@)LD{;9!hxQ-Hujyo>|W0bn15@6`Zg;2nWYZKeRwUC5}RUrk%42^n^R-V-#L$F;k zW&pi6*&KLx*tXcA;#LQ4PqPol&cZi_J3cspxFK_I-(KzoDFnP>F6!wXcZnk)xGa;E z^-2vGMDOmB^=Y8l*BT^91Uf-{1Pf9d8-pn?6LBG|iTuVGOhUT{G^&|kwrCk>iFT&# z)vBWTU0pZ{fFnrl9u0gWLET9jTk@2;E>}~XI&PiR}NOTAUNg5$4^l^pegd*sX z+*|!LcBTJ)_c;RMr{hTXOA_>-c{4ynJnrw*aRFfj#3J?22x5YF6K=258T79dRqK+Yzv zPtL79G>6wa!-lzpf=DrsX{q0Wg7I7@!M`1H21bk+;euf;{~3sm_kmn%aLnXVGmd(P zoPadk6yGxoF5F0QNOR1uB_q5~88@UdX*QR@{C60g)bW>ebMFy5*iB#Xb zBquBN&erUuQ&a^N2@hxyBE;H*wZ1~5d;%_sRJVTVgS=KC{%457p0TdfdEm1=uM9d(#B2#H$okbNg7Sfi}iv=es0$MQ_%qZ z3(kP?Pe`vz14WC~qgaeKlmmd7P?9+@)XF{XG z`Y7`O!vmL06o1pEO`X#PMaW378K|rhex^yRGp048e)v?k?$VcD9qf$n$`!K zB&nAER_>nt2aviMC`>M_L4yW`qu;T?U3kF-VXwi|jWFoRp8>Q6lnsE8zyQ9k-v$I5O+QS0b2na|zXE*3O zn1Mpf0IDnMykPsnFd9J7%>bTllyAVB>1T}_vsyNmS=Y*Nr(#67>7#;#NM&ohSu$%R zBtM3Xap6&hN<5kV{Bh)Wj~Q(PAs&Oe9RZ|v2KuTO4}pvUWbO>$sPWcYZ@B?dzb1g9 zp8;Yjx@!@-dzFyCtF}sPPk2-U#B-$zu$_`g7b1e!SE6fdH+~XDe_RyJBXByIei_H3 zT4I~v=hII=-S0NMozVnjm3k#XodZbi4DeC~kl8apN*vTDWF0`Ik}jaWdu1(F#uC7` zGk_;01cYEsfb10j1U^-OK_1U9jP;EgHFDkhYa|N-pjqr)@Q-8ri1ke)efS<({&`%e zpx?NOTfL}g_nXlK#0I?UK6T_@4;*1XgLRTK@YJQB%3rfrBzbmlT2aox!i5Xn<;ZrB zPQWd=k9rAG9g_%VxXu7_X8`E{uy`zUqer!vDxmaow0S3p@wVG;lCfjQYfy0`%@SQo zQ!D+g#A~y3=x~g>{-5D0X(AzzAp?NvAQ+mcdCML>M$39rJ{w|?C?MI;XDrw*2uP)B z<)r0dp|Y{DZY0=W@C=l%T(;mwL5qx-0gwQIYj-*U#DxL&=H9&pPrO@ixg}#`9MU-t zvHB*35z+l1Qar1$jR4peQUxY~-~nkcV#NCzB5;yC_`q>8^v!EEl~Qh{%7qJ8$;Tsq z)igH!G$X+**>AtSw50W^T3Wl2^BhC>v2gGCIc{th1jHsaSO+suQf7cG1VB+H^TRduOm+P{O*vxn zz!C5oK36vyWTWt=ptT;}4nq z)wlB7?9Zikt&PeA93l1URngeB^7{T%lIb&Mx#;?L-WjS=ODMe({Sni0dC!aF|1RE5 zF1)X)RIQvPAB~>x+727`wgyw4>h3vT2_%~6}{hpLAk#X|-opm&XqWn(5Q0yFlj&BRb0dd*?C=Juhr2#~s~JjywGV zy{Da=>vDb)NZ`in&vLLG%s@J3Acq(v(zOwgbTfF?KdKticiepQ%`R03Dztm^S`xAglY5n@;n((!GGF``U8LWw`{p4Yc5<^ORi*dUE#%Qh9w<-(2?&VITCfggAWbunt|W()1wKiU{!1^v z?A%OKF?@`%mtA(5%l1nRn^_vnc#)?4xk@ud%+vD2B@qfDAj}EgY#)C3VN!$y31qms z2?OKDkC)>*ByQ|>Cl~N<>()|EC5q9#dX;r@)xTTCJfp)=_0=x&MS(Vu9hQKsS-xI| z_y0y%rvknnK3l&3>B3>S9^?-F=7LcxAI_UFX?{#8r?N z)(%TRRxMpGZ`|>vtXUQ^M&_BBBz{bKZH7#Fe@+r-iv0T3B3ZUDwkuq;PA9!KQ*PWp z{n<35T|3y9z8NrXwy;;APE0KgY9kdn3-x94Gx8EA0D(z_$YA^Ept+w&9!QJ=UPzBW z{_6*?mODqoof)M-sz?VRlFJG#5hYB&9-_d8R?Am0Y^t)FprG8RYo_zXm zIqZm6rBUO7(!IwB%>q76=FeZ5=loHlW=rdR2C32fO_$Aiw!DpBGIwpD`*Dn+1%c9>_$cn$$%A(oJWuK0_$^j=fk>92) zk~!ZkcIU79XPq?OyS7xXS4rNxe~Pqh+dx|A^3R_uWz688rOS2u$@GZ}exp}iNbv0s1OOh7gnW8 zm9?zHipVDJ_ieXNl)iV5b%A}6{SfR&ak~5Oe_VQ8FihWtN*W|sM^$SpH72BrQ;{J} zp4g>>Tz=uMQTz9IJ~w2ZoYwW)k}HAulkwJ_ljYUkpG$-0H56ITmphMsTQ;uWEYJVn zSo!GD@1)TlwH$JO>eA6N_va;YZ!89zq|1E3?)dM0|9^_J#x;+ zN_c*C1nJqHV`StLKM1iGpXhq_H;d%=?-$Fkd%tn#PkMc(YrAmfa_N84`%=D2S*cR1 zy!1Q%Jz1-2H?Ch=1Bc31C?j)!S|a~D_8qBEwX7`DvGh4=sH|O?8p$erGr>mMX27?B z7{X4)#i*)zvDzp8rRg?K)p|0~0F_DyRdYSBB?+F@!bmJeJfgMFZy!MsvghKLRdp(+_=f=aW&hi zBC)Uh@clP(Ubp;hEHA!3TV~DKEEj0z=Yrxuw2)bDWbzxjcCf^JXaX;bC zgN=-x0sp)5zqi5+7^L-YHFhIaP$ro=Gtt~kNmVgZOPms0fQv4R6-!Iz`dsg@!wz$` zCXP7b2zO14_?(NeYjiJ5r9hcn^oLBPTfWckNxO z>PH+R!c}UP7b7+`8)$x^nt2~~>-M`yWhFSHp8Qc}f4xYqe5s?%`f{NRJWsE8$(~ZX zQS_)|jOl1W{`oT$oqW;_2S|e!H6n)+{LauBNCyF#t=uq$%aB33UO&DJ)NHBt;}Gx{PRjXK=7-*JZ`p4VM8`@Z|`n@D#V-xkDA zI``#j82?sXZBa=bqDA2V<~Bby7HvS{yYIfct3Vp5YV_kkh{qW3iDg+pHlTr}dcNVnkZBXyJ|*B!QF& z0=`__b2)7_s1YwRKDwGF|N!)#Qt}X1VidTPlaB_~&umwr4b#xBGtO>>x-VNJq7H zuHx((W~5)ac&$@ktzEIfd2h8+se`^*BfUI%$)|GQN%@PR{&j=x;+=tX5D+Ab(ONNY z_g1Z(ahv!73L$|k{FD_n)zH4PwtY>@n_?74{eo9j7(5L`cwYh=RNACkA4yM#4IAbR z$5huOu7Uho*ba6CAemw_O_|O-Kd0)z&Ye3u=K}8SZ4JC6m@pdTM>wp#Zv$!BvZc(N zx!4Z5_1w8Dw4ha9y9Mu5x2`A2w^P29!`jqURZ}VX{m->BdCEMgrmF7A>Vk{I!*9RM zQoG0i>2u>j(yM3FFteNI0{Y|Q&*sZTSADIL>WAtD79t+BDNq72!y1jZrZB$?{S_x* zLuhtzJt?EAq#h3+B`;k+K_2M#fgl;6a;mmdMd|r`dwKEN3G(n+AIY9aH&v*8evxjfa%;SI9T{@dB)R#+ zlVpL~N}j!Zj63JdyAG9I_Nwi+gMXPn1L+_j-1XsyAMPsSv3>N>M?1G(xLlurELgBW zC@l2ggAWRxTh}U5r$z+jky8oS1aSSuJ8Sgl(ava#jR3v;zto6dlqp$Kd0)x)1>wMP zfjS)r9duB>-9$NgQ?RRC4E4tXNhJ>mlufbu?ti5G{%Qfvo!dtUN?v?b=N; zYTQDZ^8FI|e)2$RSih`9Mw?q7wti#S?{ZzAA7#nX)y^rRPoF-HKyti53B)HLco^&` z0HNqJwrluyE+5<>yloqJJN0^J+d@6JjXKqu6u5>k-1y#!dD{qo?*hP*?)k{${|p3;5q#VeSNX+#i)IK3R?D$EmAr zEZsPX5?iYZ3uFc~$1_woTP<@yrEy)|{ft0Fi7I!}&epEkaoNb-B>TE0+Sa58EtG6GbqmVX2}ZC}%r`Q#Y$!2&=Z z(xB<2DO#l zva5#Ai?uO&{6gvb;19wx!d*93fh^t|7Gg}0NU;6*pzmE)_;)}+!c!4!1T&D58Q_~T zQ%#7NAKY^HR9UIQ0@Oa>> zufCGOg9i)VbqyNiMzLq{QuC1QMTe{LiAp@3m!vj7jhiPcG%nT zBH9B8X27ppmr6r2=}X`SPss@&d_<$m@vu5u)2|!cqEX^-s8?Qjr5u{$a{8$nxKlX} zyH-JKxN{IK1f)2wU(oo9>NwBm$Rm%;b2;o(FcyAF_-n4YTt4`4rl!+x9d3Kxd3(#S z;lC^hOho%n(A%K}^QBMt~iv}vT*I$2=iz~7Awr<~EbC=!v ziTp8lbxbMb{g3C#8JbPNkJX8McOWdp`9Sb^UN{9r14lsc6fgpUA9=E=4x(Kkp?w+Z z1xCseVm&Hrpy0~IxiKL#zgQ>}HOb;etx>#h=UruwqjuJ)@#!w;Z|Gf<)obb?msjiS zVWB)%hn(I__H5TEp(zO(Z{IV(_hb-{M~=14Z%qK<{u*WcixvfHtK4BMz_qrD29QnG zlp`jo@NN5qEMx(R)kA9cn~FSOQ1i~K?}cSYNEdU{O}#W3<9t>BHMHGYC9F z+5P3Ir=AP%w+#?~Ih8yX{IxbF0r_;|a=Gq?E_ODX^#lz<9IuwzK{d?O$wSptbRXOUi5w1LRqdFNfy^NLaG zERnbV`thBc%A`pXqIKo=ttdVHhCxqbcZ!fn)Om@@Dn!}S_x43ju`R{04O zj)3?sXLtY)FR`&!V9A!SRA;0EWmXApUn& z@Y$(pfz1I|T0C}e$%LaUj&Gd1F%?!If#iL*el9HkNRJ*DsLOS8IpM@N@(_^^KbocQ z)%vh=hJVhuapPnsWmb3Dxsty-ygl{Dr5aJc>o#u2xbgag!%iqL5}k&XD_0I1((Q!& zsE&1>s^IVa(B$L5hP3>7^@>sW*u@R$Tgc*)9RVu7|HU1--Z~haSx+3zsC4bXAO}~z2 zfw5pWDK06~-m$Xk`I!kJ%Ud&I6r!%V?>HL>u`@nEXy1KzgnH5)A>}IEC4-+oSraiX zk-Phicd6w4=Loa%tL8(aug`{ci1~~sf4f)%72~&o6(oT;8wdj-XAa%~054hOi|F52 zs<%xiyJ2H=_q0@BFOmL8XY$7FU&!Zg%#@m%srU0YXG)X(c5?MO%2h1mvgM+xB8x8e zky^V{aW)WAypytobB@va9aBI2-CcI&KWh`Ye37*^V{fWcTol<2WZ;FC=yzm*$|+Sq zjwwUNexLRo`TI!yjdAv0TF(O@tq2UGfgz!yP#=dS5LJs*tkb}TP;Bc>o77WsUa?(1 zeBo#|5IYD0pD^JId08_azxLWo`t0hu^flSp+15m;`Eu5or^y#uyZ+7Rj+8bBgt88V z8+aQ(OX z13B-$!{nX2zjj%C_dlVDrdcnm$qaXs$wPnDS|0DoZYqJG+9K`xRj+rJy*un8FI+QD zu6nhj8qqD!RYOgnSYMMT{-VbA%rm5$swI_r`b>M4ML8Y_vac$N^t>s|Sdc)-!Ws#Y z@g3BEATJi2?Ad3X<(h*K7+W-A6WL#?fBVwdRtck?R9`0U{Khh7dG|n2r@${L>Hl426M_3}^#`jN*+~t1XFg$mEtUOmyPb|irR=?SvMKKO# zhq$ycW84hsyHhFI5#Irj7KAzWBLyodEP$S;i&s+*DT|bJVrej zWoJPWNS;td2!`1{;94k9Qxt%hjfV>5H|OObvZAKAb~`m*;rm!(Oc*4{qVyN<>bx{^m*n%gm z@ZouP#_?e|pU|3C+3u3JC zUVQPzuHgJDS||m7b;e<^K;32sUp3Bt2J0keV9C5~V|?J0pVB-xK%^UCl9ws5r(r)t ziZ&qWT)LbRu&F$%`LDjzD06Hb$7s^TXh4eb|J`+$J496c?B9OdhR{SiFBSXHwc{D| zsis~rMwB@Tr4Iiq6_?l{`bgFWQnhLgS+O!P&((G7wx~TWZ_1neCb1w1WE%oPXGkG< zkf6(jgCWrtYNm90ZjfN?u)_`$^7q`Sx$f^%PZNV>T4a9IQ$O0zV4cDlfOz211Gm~e z$JH~1-ItiI_*N(>YpN>@+J4s3R8ItP9o>8Mkb5;XIO@k+Z@W$I>DO=5UJkNt+rxpcz}%=GbZebhMnf33omp9r-%cYuXVnQ8#1K;YqB!kC^d>0!V(BQqf+95PuE;Kh@a@le^;Gr<6U=6sW(aFT|E5o!;XmH zS;FP|sZ~p;u%&Fbwr-@y({(1EDCCnAI(wiUruH1e1UE5>> z&TlT0l0d9OUPy=tA@Y16a9EV5q$K7})L2%dd#Zu-!CZ%Om`p4>UN{(B41Un^r^0}E+^OYI14wKJAiUJP26-A7@kp25!>+>`0zkFDVfu~0*;C>) z8qvaqn0#0m^C2D(npAm(k)RPNcFL3~VFG%rM!jEp>7}j?yFcDwJN67DD}m7Cx#ylM zOPBmD%?`T8r7(#Wg+6=EO6kyTjP$wVW_@RFi9L`gN0OF6tS{UU5s`8KgS+6v!BDtT z#npPM3P}f2C*l&1pvx}1%)R{BMEa?&m&~HOsv6C?8>?=Y4~D`zrFDUn`69B7za*`B zHZFKfiTx%e!Wc18uMm@fG)JaQi;gDVW59p`f#i|I3xd?KP>F}1s-1Tc$s^s)CjM~u z(FBPs`2piPO;y3zB=Fiog%yp^2 zwba?EzWnQot7WI!B1a!qOLlHhNtP|&D4&0WifgeFjyvSK>w4KKajy#{f!I|b5+HP5 z9790VVn!OFkWDS+hN6kjc78HzqHB>ph7KL7dGDWcdev`~(_sl9`58bbP31Z~-O{Cs zOn!8kedbCf97r5^TlqkwIgAY-Tc!a-4g~eolTSL^<~HNY(IUWn6a={8LO85G1gd39 z>RzZdwQ<4-Ah{Vp0-@|MAq?2kP){bhCI;xi0RtUV_`u_nq)B!oIq2X+r15Um<-$wb zyBMKl`bRa{wr$&(Hx|_kD_U!~79toEPDi#Y9 zG%uNa^jz`Rs^ukm`F{ zuZ+@imA1(b7nuTrIRn*TnXc>9o$%v z)CXw|^>{T0eCI?wLpUr`1S&9cYr%v>8ttQvbw}D~gf^Ah4xCz;OJmdvvqqgbNn?YK zJnC5K+qbX7lk^9^OtxPLS}cg0fY1Sx#?A855HaotB!SE!E-bvni1HvXwh?jp*g$Ny z7_xKgQ0H!pu^tR|mk#<71Q5(XqBCG0y>c1h{7}YlfFNx_Mau%x<$cCA*NnoE*W`V~ zO*ax@N487HZyX?~>(NF719#U*bjFA)_K;UzaV1EPQtvDS2;2wi)rhMv-(YJsrpGg8 zm@SkDV*zz4{J$R9v313StLzz7j!3-L&ml%34nkfLPYefDt-9ns|gZoL<) zY7z#{2c)J8)Ttr_yHI#I*|2qzpY*$FJ47l;VY$kSBS0#jJhz!(siTa!YI z^Z94e{3k^~>;`z5P>Jw@5oyW-(J=LvwF}n43~aA60H=vs7LE>E2NHBqf*~*enqxH9 zVv0&274-WXsj8)0Z@EPVYdRCM^hTym`B@3efd?Ka*Isjt>!P=|CA);h0-3~*v_)>E zv17-Et0>|W08u*r_~TQ~ar^hPJ#DX6bv=MOC;)Y!NW~=ZQQ=m|!VAyKi1!a`BoR0t z76=QH3Tsm#*qU)f`&d#jzMzj_1`JWMw#C$`tRWOEsCQUU!D2sxNwRZ`au$tHMPtUM zs!gJ=rYAv-bjz)`x_FRSW7}D^1k;s>%$q-7HmGzlSEJ>*KfX=hV+^BowtTGs06+jq zL_t(73qam1JgVBYYga7y9_wC;Yrkn%cqW#$I&c~YJ&lwQ5dXpQvGL&*g)h8+-C#SI zfgNcEEa4!GS0s=eu~)BNa;;V-yh)?XFVmQ*Sn6U^x%@XL)`#!1w@NB))ZmWylnpIoy;JkW>yX=qs3kk~#yXBC$%5 z{yfyvth+rmrxnBikEkcrs|xiKQU#eKaDg{~Q+`ricw<2s<*G@EK_bmP)Kx!#yfc6V zh+PPnpn3WT#$-;9J@(kV4Kw5y<%?O03IYOBR5Kl1Czyc*X243|M%MVC3)I8U4<77n zkJukUaHyZ~*}vk71TyPsUvScy%1tLQU1@7-^N$?>LqWX0?2bF`aM!v1`s<6;i6U(` zn?%PMPe9N=BA)ZnAdVwW`gRoVIk251@#KS3;+_GsnXoo{3F-{&BLrh&i%7Mdguq>{ zMJ_9n6oSAEaA$|hoIQKC{HhD@|H&tv^#OcU**VB@76}4)CV!z_$P~!GQ$#-c-4{5|j&(AP(cz*Itu_F58F=xfmt5x%j}uNfK?bYjmuhAqeWOo*&qeE#7{~n}j*}_@&I`BTfOy0)X+e`> zodKj`;x7p1BU(F}=TtHSL{$Qi%7^bEfTEp&=d_Lr`Mx05l$v<+%{NPI0A;M1&nO85 zX=9G&*Mi#nfV&|^MS^$W_7Lq^Ots**4Yq?BNcjv{1iKnkx9B$SCqx5g2T2)MbpU1u zLPDAm0{473*<{8n&GZk~xKF2eooy z&uuB4z=G3o#y&qKM=%oN9%q20K}2m3OJXw;tha|5K;=r5x2doQ*dtd}L1`~Y5(?Mb zO50MJAQI^B{`>Ej2OfC98Pp?>2ftH41LUS+zNw^`445h&DU?EaserfNe%mETeDA&Y zoGWo6eNy1-Osz;srOf~R?|&}yZ)DbjMEbL~#V&y${*x`#fC(oxHma%sf*IJJW`J}j zyuXx>g&^>5BUK}b6h=ZoT}$CFg8W`ngF1PtsK0I*f>Ebs03WzLJ*x0nnOn;|K$2Jp z2NDPATR4_Uke!|FYP(pBP$FX^`6hv)M<0E(>myakFpC{alhv0G0N_DxkHllEa)_Je z{UKTP8cHdZo+4q>-+#&0wQHno<0g@ZO6>2=E0)XVe-=rZT02RZ>NVUvY+klRHZS!m z{CUSV&Hq!jDpD@HOLj)j8B#O(;UFR)bzIQNT8)=hVFAx9qKsJr*l}k7wHXF`y!t2; z+)h)$9X}MYa$U- zjS`!jf%QR59cBa6VZ^l{C36kzc@(%pZAu^5->4893yM_&u_#Xlhe(wwu`1Th7Iy!Z z)vILvkKc-lOj2%-y;a&MFRR{tUAFx5w^YCA|Df_Iz!6TtuGsYpRE#011VFxu4}(#)e0w}l+Dgd={GHyD_hs=3sAa@ zlXAFjnd&v&t!!SrNH$ELrpHQ4xjpxlGS#ZPeoLt&w{^pM*`iWU`Br<&`d@yO&5IUD znVPkA-1~^?S7hUFGo_4>@DSXYc4C^d_mTX`A8}sf0`X~YlwEKGFoFs?EA+lU>r%s*Zj5E$~YAK`y+EDllH7Zf_;8zHMl^%|E zU2G%&(OhZ-N zjP7n65RR>SUjT$_emE=8}UR90E3bN6F{*juvb?ou(JpQd2JJJzFuCr$3a3vBSOO+G4 zhL4xswTYB(y}wkL@sq5ZG{((i_@Hhqyr`>WHP~6|4|?6-ol)C-_;7%W7cX|>;$062 zNJc%Pj2;Y91`G&z5iLzLgSXXwv3Cpvk4lPKO%FZvkVF1RBbWtZ0cueIq5~KcPf@a~ zuDZ(E@W^-&D?ql`R9J{rOcH<_VwDWfuaAubNuylLJ*DR5H%ishyGywy&7I`IJ=A^V zB}KAXQn~Y~veUhTq}mxh95?SI1R8iys(b~hJK%XK*ZKgdpj>?IJ06f)*M;)ztp7nJ zoJdf@QU9s8r2f-G9k;&noguPi&1$J~>e;TJ%I!~(EoxKP_{VH_=9X0}rB?5LQsdHV z)y7aqYW`nusdMK;QbuhwPJ+tmztZ8yO2fCuIRXJuZ1lloDcfv!SwHnFS^x9*j(Ai( z>q2)8eMqum@MBU>?-Qte;^|WT;;U2zb+43H!U&-(x7U8!=JAlkq-{!9sVa3Jeo>{b z!`(UdZ}msRq_j#)%wwsFm1MON5g(S^-$#k{sj_U){ZdN%D8EnM?66f-BB`XrG1I{( z7_qYD#(NLww9$tr3M5kW88!cW)EG!c0DWVe-yQ%x{#)v5yCVZCq;~DvIq3q4;QaHq zRo{-&lE$P~!RQa-NA-m50!gBeN)MN7!cE#B4aW-*6e>=d+6?IgNd_Dp4T?FhXc48|LN`p!uqxyIpb+d~BGc$qr&Wb?v#uFo}}ejuwqRAg-GpB#^D)KQg>?(FpL zo0lw>l|x^0s+~3_bke=3j^1hU=P)P|x zC@ZPm(B`F!og{|pYq3g0K8CrQ!aUIzbzV4^K9Fo~)KUOOTrjk= zAExIeP2a+TG5y?g&vn<1HP^*PK>U}wAV2w7gD=YcND>>>NM7f^4-4Kxi+f)#>%JK8 z5Uz~_&KgOj;ai`{`fn%8KYeesLwVL9YGCabV-=ZxFLnA3boP%0=bw>`p?E@VI1t%cO?Gr-w3Hq89lspx29 z_sO<_NgI`p?xe(~nv~tOsZ+7BzqAq+0LiMtzM)dBcAmdp_S66!OHHYEW)FXNMs9-$ zuz>))^T`qr|K>jb{BwEz_1B%(Vh{Dmu{Z;ao*YB{fcx;xH{UGO$Yu<<%0KqlW6t%L z^zGD(ctvZdwET%a{WFyi9R_c*7ZN(d~cp%{MMM60ab%5~yASXg|32pLc_1h`1E8yN76 z{pZYcj|YlPg~iKhkcyAW`byWjY-6<{sImC3tGdgQem5(U)}mQQb;xtMa%!*!7G89k zEPbS}+uo9+B5*$<{*`Oi%Bhe3x%(z(Gr&-eDry^0@$v6om&jlLIm;o=s$I`>#_%mF zDa=3XIF&F$b{K3fAqWuxq@Xf&8_3FGLuCFL$2!Tvq=Rj)5wWBE?t7`4>I_-=#tSm< zR3%vIbyZ>iL!6s(*3OM(?YK{5#q$HDrrLoZZg@;BedKN_*Q{m821I`mQYG%rYev53 zh;Gem?{F$MRCf@PHKX2-+f!-Ph@rCPvkxOioS8qfJkj5O=aU`nlqpjjXNe@zvSmw$ zFhPAD~yNO7IIDDa*8Sq zqR5)x(e|Oj8a{lu8yCb9E*ipt`Ua`$haY}$o<-OKn4|o>i0r;HX^q-wrkCA^4m`wwO)AA8ag|fs2!?b353fysEpYeGST`?U=2TvIp>bc#(`Js>2=(ty z;ymRhk%}O`Ha?Cq2lQFGS`FEveU+)R(>BtJYolIl#M*Fe>`l39Lc2odtwGeL8UkUP z(b6AjL6PXRMW@ZVaQ)_G!+z46P(W(%;K2?FLs&4nf}}zG5CRZ%zkdB3aX{5U8;HXP zAAAs2VS&IQ4!9tr9)bt}5CmcXFwO$#Ap{4uT`cieGeNgwe5g1e2@K-&+;h*l{Wvpl zZ-oiwbR8w|FCCHS30%I&niiW1%lC^f$_HDRnFYC*Q@eo=AYc{)vsLih-!D1ppR9(B ztf^Z&DIshx$#cm4D5c^$NEz2%bKkXAAaw&HDz^6b-B0Uq6L0 za1Z{opiKaUpMhdiVfim*@L*#<$lB@x+hFkU*bRas89f93yY0X8$qxD$tr7CPrWuF( zCsLUFNTjD=|9}AloUH=_5veZ2PJ+&yW9`(t2T20NinQVf^HOgei3OVpaRua0;yfSB zBWI62_HalU$qLokh!G>4w3Xjj>|ELqTR{KVRZz`AG{}65nu`VdOtgJA77V{g8WSc= zkooiH%M(vLk#DJ4##fMP$ok%1*Sup$qo^d40b)Lo+#vArKp=s{GfP36U!|06j3286zuEFSh=bd*tI|TLsvN`~$urNAfe<0ErWR1!TH4z9L2>|2z zUVH83BoAtRTk3asniw-BrBaQ`=Cr@@# z8U!4B77`tvW=I|oP;AN&0^Gl&<;O2W%Ec8MAeDVQ zj!ng;!g8a`fPb6j&y`ifUspqQRgDJk<3>cDs*S(@>h_gyyH~ov>wo%A)+^HGc)3=so!1kAg9Pf~2}SCYO@GdjbsG7N1hjG9 zA5vCbWI4WJx|Y&ZkE3!LW8ni{Uz}6Ab{+Munywy8b)AP4ZOFAnq&di>#;EttZT&6*W`^pB* z#f4`U!H?WaBrq=oHD>L+i$=CrbLmzv;%7D7#d(iynE8`?K19Qp)(C%qdEBJW5RbKJ zM*GNXr1Q70!b<0LQ0Kr+W&&i=Sp4}YZY_sDmUy?30tj=s>#n<8oDR=;^ytwp*2nL| z>}#f8)l?-F4r(h)?!8rN-+q61UP0dMt9Jfn8eu-f?T0{R z)o9uERKB|?P$naYY27E#)wd*AR{O@{o{na=bja;tM?R=?u zS+8*a8#FQE-#1+9Tw%$KOY*~N=UwWq_gBwz9FoSXihxFv5`wRZ&#?SkTyy2C&$`$P z5~hBuEB*#MPEzram8yPx5#BrKP+VwffI4>k|OGy;F?>v z|C6%r+b?DLGY^NaM|u=dgb)0q@iXN6+M)bg&Djs~eYis`? za(~}=nFcC8D5PE?D=(g6q-puLTl-6`o9}kbH~csyd>XF2 z^)&flDK(k`wR-(m6F1)Nk||cwbHIx2*kqOrCT7A4qo)*oucABQ^eWy(1EvG~@8{XCIY{N3@rE zkG?F#r7U~$!EM}7{eHHWZM>8)7F$e-jhl#cBC!EHm?AT;f;2eK7C2Z`76EJ@Gk`M# zwk-lCEy|ui!&a>{2w8#PO&lc*@T{Xfb}*bH06f(qb7tX$KtLvv1~Ebg=KhQdES_{R z%BBV2&j2bSGV789%F=#pR-^{90znhwP^0G!?$~-&(If(!Rb943v+oiNhrvA(2*4wK zI}z!NZwaL+z!*=?smR!e3j1ivb$waN?{~1X!E9B;yiwDLkli<{VdGHO9)TkRFL5L5 zCQoq3%C_7?rGa^&?Iz8`u^!~6;`*dnU#+&G(v~tkf?8Vcs+DiP7|Q)st)>#1wc%T; zcIJherewG9Z`;Vy)FqWWo+L&bR)6xIteyCoN=oXhzj1>~V&7>N;M+puQWas1|Kvzt zIVo5Oa{T}H&H_NIDtq9EZs`V@p-WOxL9Ar-8D^o?@!BEkiyrCWwlT++IkB zJR>P$01ABV-LBb+!{AFhnGe4(jy~|f1D0|G4=e!vHy%@{DlneIKp-G^R5^fw;PC{J zzXPz$`-+kE_*WPVg?7Yjz zWe1TjCjk|&mtbeh9(Sr#(poB;NQu0A~y`mq-^9w>j%mA*R7hZUwbnV(Tc~^fN2N-~fP;D?d;A+b!IpDU;2Y{*YNuDzc zNrQAU4v=sh@Ege}cm|RTQy7C`6id!MCP_+A$)M6T11xp<(zB12B5E^04WsleO`w<* zHU=abYy=qCNp}vD+N-NU>?HukUI^sc$232bN;lX*mi9c>u3<|_DLQ-G7?lvd`NY~# z%&KHFHFT@3f!Z9X&qV8kpn8ie|%Pu|L4D*~O&lW|iU~VuJYJ))h zH@^9TeTtN?U}~dnqlZU_+Hc>jdWmEwRs1HktKhLliu>vhy%?;DRmyWfdU*`~y0Z3ESN?CfbGGZl!Ff{BZd(}2V8u)Gco?s?b?+9{2x$*MzmUszLeh@@RsuD7$ zlI9y%&a?mqfvy}~I_eBoounrrR)R!@7-@+D$15iq^O4q={#yRPz_@Yaa&@A476-zQ z6~hpa*Kr`^S;8|2FCaXbcFt7K9u1f(u9Ao4W(xCM;cc{g<#P30YHGQoP(#rdH4kbM z)GMUip!#1?O>0g)3{+y4cS_q7DO$vg=6F{HRRe>ZQy9Z{EnaA977XCzv$BeXYGjV4 z2mx2yCyz5Dde#4X*gTQQkV_m2!2HZ(N&6|auyZgl@?;gOS<8|fl1vdh1alC5R-1pN zDz-{TeO9C_X;UiM{qcaZDvO%ytEjG6@PgV+u_|(ZsT4r=T=IA^9)uqCo{Og`rcMLS z`^qN5{M0)Oh89grLcMr043})A{Z-wR5}1dYWnD+(}jUZlJ`}LDY5Ba%Dwq0sG{o3Oi7qHEJ(_@kHm~#$^kUMFL{QPI0R!B%hzV z7A2xE0X_yQu|($@-QQH9FyI+TN?3{G zbEUNEU%k)O?%|5>L*f|VE9b#QAOlCg@7C+&xz&E&Op{C>g4fVY7Z?Dy-RE{`#gQ#OzT^%x{i#A}D5QXaC70dE7zI_`W= zj{%Q?e2M`IRAt1jl20GcPdkS(;8j>Tj8H$E$3T9@fM*{0`RyI}xqBn}fya5K??_4k z{QpT)K?Y80U~>)wD$&INx^QyjP%REuP4wZ19d?*)^Fz(~Oe4j2@EGtI$a4&M29oEm zAms^Qr2OckkIbNnFMoPKYyiPxvuDrVrzwmDw@+8>CnME-DHJPJR;n~=FBR*zlnQm5NZBfNrQ|`?q6PuWvGc;58t)yuqeq7~a>H}X3k10Dk&1DU{p zXCRqCR&F)LSD);e4?p~HEX8uk3RR?T$CIUYhZCi8!#2t&N(5PDm04Kse~rdwONJ8N zNU@S-q?9t1^0gWU*TeH$*DR4$(?-g&Nke4Ag5T{v)I28on525d6yy{zA0!}{A29_a z^25E8xB`mPkZ}Oj8u0{Pt(H?*_v3gB#|PDGE=^Cm zLTa@;UWyhEQhOM~(5s}GP+k!*jc{w7^sHVo#sM=44{TGen&yb=dcGXo^*q_K;!l}B zVz4Zm@P&Nx$tRYXmBeY(qt0p&7|f3nnp8?79)K*K=>hqch&iA-+&Sl*)sxmeZ^zOOZ&)RBhx|vDjQ`B~B_t#W{{NZD#){1aNrp0;81)^LJgB;quiZq- zRIM+?%T-dzNzeYuWg%UqY~RqOXn)# zU7h+{To#s-!jHZ$Z`#VNrY{Ujd<)w)TL)fdO zYBw#JDbxOUuij&(`TnEQy7}gtU6WL|)Ej`{qoxANRI4vF+n*v;TXdB&)ejEUY7vH( zY)(5jt&z1eN6X4zhREh6fr9|dgL)Sv`YTA_8Aw5Tn;Cx=&prgAMg{F-N^O2c-tM>F!SoH15b{60cf%^0Uf@R^yuL%EuXQnl$}Qlr(; zQl-gZQnZ9tAB;&L3>Vr@A_SfL&i(c!nLlih-H){1ufF=KluRKrE>g!Wx7=cO6dISS z)kqSK>nl~7cTuTHb!Z9!<4R&y?l@^4;T(e2vPq5o3x~fZ8|P24Gvr{xiJ(@kT6Ub@ zzjBg+pyovN_}g#4S(#Fb2w`JFjR>BT77ZE48t;wR4{TDUOP4lxa9qJDT7;bpFFp^M z$AD)bna5eCdK5+mLL3JL?$@kYBUKxBkRz^pL3VClFY|sFEQ@~rTy}0*AH!i$l}0d# zS5S3OX=OB}Dpr@`>WxL_UQ|N6wrL%8?YnLDV%f5CzHD4FOQn+_CWJwdP-=HLN$MSW zj(Tr359#;5L?%XXl=!TiHcF-qzDvEiHk*BdQmIk92y=#@kiZYyOyY@GOWn@rD&r^? zsLJ%7ZakzL0G6V649-WGmR;mp$lRFRs%byS{2^M>R-X%FJH?V(wQA*>=G$#fFc6X> zQcwuV??3`^MHC_$FkpaX|2>fR&hPi~3?!G|U_oRc6#5~V9bQQM(Ub8MTL-IovZxZ< zfswgZo9@!$wEi;l>z8EdFGFLPF0U*4+8vIUYR!(2N(pTOm6p325?ko7BhW^k?R*j0 zvumfUU-XBp`9oDh>UFhgaZ>C^m8PAf(FuK}s!AmgZR^5UK;dOisMKrw>`#GN|diGCCgXc zR|-;QV*iKO;g9fM0(^}czsbd5+Mu5(pJ6q8z(!zVl z7zR88$rzrp+^V235Yz|nyz`D3pN43h}t;@8V_3OTCUKNYzH|q+-2hQc73hMYIGg2Jy`+7s%qt zL(RsqbIS%*1C@}5$6PE;PVB2efmNX1EY5YCM9}RpB(Aov%p)N693{0wBC+1#MaK{r zZ0FWZvS{@CYU6m*7)cS0Cuw~0)slE@ZxtZ4bZmrpyTMV1O?vT4RVQt>)F`CyE>pFh zG&}1ySvuh}Sv7UUKI$^YELE|F8ucsL^X}0~hkvOQvvbpGQ?~`P)mW8M<)lWt6Q#}( zJxwjgt(j{m2*u{IM^$vQKfceH2$CCww5G1O^CM)HfsiI0#<^m}3c0LLAN!r<#q*=O zyg&jAnD^MTP^Z41Ho`fLHcK(eb#VLp_b;JI{QYc-|1v2RzlrY zlM(XWJGaZkx9*ldN4#h4YqahvZ7;Y}aNXUY#_|oSdfK#Pwro**1}?VPLMU^Kt)p1U zvQk1BQ|ZbzRdT7PF0zfJ;=!$@T#W{%vWO93rNR(njH9MXz2OoQd*=p?mrbff~Zi86g>F*4X z&5NhXDW{xbXT8C5;#4lV z>ox^uT+cvq`Bml?0~tATq#SwVk!JKmwZspf0y#MW7{rO3Ci1#;rRviC)Pl&M-*DyzYq99QM4H?VXhC6vJw zEvhSeUFl<>#xrT_+U1((YLObpXUcj_Ndg0KP`q?GOL@|u+lA`6)hHy!p)?XUVm|!i zBA#Erz5P#FH%C3Hlqs~k>`|%K;z+&3^`OIUwM9&O?;cq(+o^g-Um^z|b9s3GU_9%q&%f%o z?`5vqL|FCWn2_I+L1u-45MM)Wa=f5MtNJc4rH}CSSyIniv~KzdC!Am&a_+p}ri_8Y z!a$Tt6&!fujW^`6#~!l?>3{z7pJsr(Re$uH&j}#qhcS2rckX}Cv&l_(dsB|S2spD9Vv!* z+DcweI3irRVUk1vea5D;diqFNK4qxpxcW^NjrrI0y+}x+ z5>RF7c-8YV{e$~Wr8VW%8>QXlk4g1bU5&}?-mzVNdHqINJ9~nu(3+j|PYn`0$_W3u zc~i{3v3ut>=dU+Ktp+gnvNaNNK!IW)BSwsn=bwLGMvoqC5zTn5cZ{8*yu6$0Vq(gN7R%0F*#cQ-ZUY3mgK>m1dpw3~p>z(4Zn8u!qi=A(zs*1~+xj8sj92$GTlEkg_!!$!vAqUG?jCHhz<{?$EKd4>$>>oci4_lNp0= zGn*52z6*LH#&zq~8Gb9N1QIXc(#))1pA@SUFLU$r$U6)aCJLZZ0FAzSami$Ec<%t7-~q1TQS?B5UUUD$7*bz=(`-xzTaGH9=xO zTlqss9Wgb=!IE47GRt`*w?;hmd`NsX-%QYeHrgUkMbRsOa=M~#an5bh@PvNy=l8Fv z*VTvSzPobjNNLsU|D?tt-Hnjj_%?PpobES0{RTmrSTgow`DM_}8p(dRtkH;g>>huq zx7d;~9|Sj1f?u|Jf=LgkvB>_5OR)tss;y${in-=>wPyMV;Zv;i(^9L$sm3I**96ZR zJW}*ID>vyREiZXYroVHm+D3+{QNN}%IJR$y3mAz=r$U!Pu zlu#*(IR_1b`LplNp@v#JbF8fT{d@JEnr({SuX%{eH4I4Jh@-|V{H9G)t#GTSW_ZxIRL(X_| zlpKD=LsGr@VF5ExV!UV94jJ=Yf0_2_GfL}=N-OoYI(opna*(DYacUg+f_;L)BIkJx zdeP`Al${|s4%AijkM#|j1#LJ<+i+|9Iu0(rhhF=VG(GbslYkaz3inC>xkkO2Ho}*WJcMU>??iPWmw^DB1my9%>zSrW|tl zvvNqk7o}2@BTTwk@cm0NbX-625wS=V}zVHZ=S7I zaXZW^z&tL!^ioSdLd1CItDlsv*S{d;HT$gr1YiH{Qls<8$FGn@{%6sl!-1wswM(N;KWdJs6Bj!FfA2IvY8reb_J{zj|#S2#?v&O&L&wqc4y{ z`@f=|UDajnoJlhN*}m!;yT%B1tgDVY`ztl5|97BGl#IHaZX7I$$6slIfi_PXGdG=q zP0L*k8a(%nfD4_JK0G<_s~NR$p;^NVDTGm&%If;X$A>PKp;VA_?mSGu9UD>R%Rq@V+t_i?&raEC zt_Fl$uUNBSoUjGViu>3oMWkYb)^gbOudDr{foxtnTPA3lk{w!`gO2?B^qb_6-Vd7{!n!l0LNK1~*>S+gg8oU*5X?W2aBO&MTTvy8wpYKV8GE~0B>ePu z2gvHF!x_~+JZ7*L+^6BGH|kDNbszp(wrPqKW4hXidse8|!p0;Utp|?*kAb|$K!LJ> z)UI7ye);8>SSs>y=+6G zNllV8AeJeu-~!1cAfWUkxg8Mnhh2mKA?2D4#5u0)J3iNfHo;}ceCbNHu5}&p;un>TOO?udHum$*95V+Iy?r zA4XNGQf-Ee)giG1|K%~@F_32%C{PRpBRVO^2{xi~lLlS1c7iew0AdFb4Yr(6JA;0Od;xkH5=Cpx zmx2;u2(=Pa`h`0oK}X^U&(SnCJ_inxh_KyPiMnld1ju?R^_*(2Zn_hn?<*S@O_vGJ zT&{(rW|`!1$R!V{(OQ{DIKiNw-DaP2!DRG)OvRX_?r>7)qj$GmzUaRvc7MH06I5y^ zTy3e~my8}PGvB}4Y$as%b+}?&hN;u#d}AQ1rwy0HllogM3UkB80xVgAEyaV!fX6^y zV<4^>2$^ol21;!T4CAB}C;wEHDpgX%8@}qqYY^p5`s)@gT4XwA0F@UhGw4sk!zMj? zOOxY!t1&#mXgW7`gq9OX3jx8#lPVzMwjS4g19x4w!FdL-rjXiTEF;Kzo<7>nAnb4H z(St5@3+@%{STp8g1Uq^k#t3gOIB2o2r%D}ERoY$gl#GA6k1-DhXnN)?meVRo!yaKm zTvTx52hYsCY$&@Ydv+@~ZMWt2vF8u)Vh#0teT_ILihlLfp)%{kdsV&liZK<&aZsj; zR#>dx+S0nO`Qs<4uCXXUQOz?3kc-TN$AHH`-eVxHZ6M@7nm&EH5Fm)&8^%Flo#X&h zA@cn3#~-)kb7!7;rd+5QMBBG-Fa7%UOWr)y;}>3dLC!w=Y-1jE+8wLjRzJz1z3-Na zwHrba1467D!04lX1%%QlW^lo-u_9N1D8{;RF8IeK2B3{=$2j&c98)(j;%#li^TCY+ zrs4Vrw{b^0N;p<*S<8j9=cz()v*j9>*tK) zYS5iF3QjV#gF^g8oRbHSfxN^(TuUI6v>pMeNE$X&0 z@*G=;0~Y}0j#JxG_rlc!Z-3z(((*L*qB4RNuF93NLC73YWlNyo+#mj@L=UnD9S$iV z`{6lAUT|88bhxh@_UwGHzyGm~m2@!}gkB8B*1p59=sCBq4FYm&cZzjz^Qxw`K`=@$ z8iR^29FtAh-<{(Qu~XJk8wJhn$X&0}tV=AjFXw_b0pYg;R9Yeau(>ok^ER3K`gLj( z`B-XcW?t&5JJ)6UR!VGd!)#pet8ICjUE8LhDX|ehJu)VsS2^CTHjqhg-DPd_;}?O0IR$03Qeo*znxQuw(0DJr#tia? zK)d3YTKU>drAGTxWaZ>9H6NC~*K_ZTF&%b@JTG3hq7}Bw(ft@Hsn;ttUs_s!2sj>W9 zS*`WZ8}#UBURGQ~62S;xT5Tblw{DOf>zAu^QzHyS6nm!y8K)V2wp(t#Sw2ool*+2w ztx&$a9Cg%D8SU!Z6#@)k3r4k1A+W;_KRj2SIIbB8sk=Y@^i$ceVS`B)>7`{SPY(

MLG>NT&Q^Bz1zk_0`k~!`lPrJiHwOzkt zUcf*?yao6(P8;JT4plTUk1H^w2c?xMRBhQ^b6|ZhD}VV`>UZyB_%|k^*GsAU@#e+T zjDZws&{k8~Pcz(7Ue$xh{rBH5HFR+`XU-hM=MI&=kJDQA9ts8qNE?GkDOmtowQ6M# zwS2iQrnIbZ<;s<1^XAP)8B_N+D2Ta<10A zZ``p|vE+WK3$&tX#QL@J4Odu3egrNJZlj)kKfOEclQhk%#4>IHn#)YH-p? zCs`eDytgO`+NDbu%gUQtd&+da`s%B?;w>siUiGL-ERLUst7z?Lq)>@j5)srr2%%eP zr#5&YJnxt<0$LIVp=}O(hE2ntLk}?@0ChiT7R76EjTHe%_DHSvU0Qq#`&NSn4eWej z|Kg9lSI?e?25P3yDkKs5h2g#9s!lb^wr$%=>(;H!whK*T@q}gu!0CsQ&uCnreMlLE zF~Cqz=Q~D;-qC7IezG^Byq_=aNVU4 z&r?r5Wl`mLV8J!`8+BsaO6I^6GFg5LcD@V1=vYNHI^4IbC7J;sPpaXmIt^6 z$My+-p30Ge=yfqyZtLv7%rBYFHQAJL8z0II84`-sfXOd z0CnuMn{<*zG%1;oE_r{b+6it>31BSWefOQIFu(ZX3xQ!!{0=4p3?4k#oR6p^dGW;; z3)!A8y67USEa}qoAEeG0FrtnfJDS()JWU04%{A8u_F>Ma264$i5ZSPK3>`XD`uFc| zT$2ZcA&~DXH6Uv)H9IK$g!dL)m+Y;n0pcCd{KP&X!3qc)-w-!jFGCFhJ%Fe={XLmw z1ELsQF6`e9bKp!dWuGYi3~$y5MCyOFbAVcKNd7Az$>WM6l|?iouz~&_R{r*#?P3%2 z5MY2jZ3Fao{&%gq`J%2yn>Eu#PzpebAJiD@ln^|juI~I@dB{Bsph^PgEP~O7WCidj zMeT&$7srUHQ>Vrj2#yB9BmmS;4?OUIwdvHUlf3rYYx~_7hJb?+k_xo-lv7TzV;1HB zArGjMvY}~lf9u|=;B`+-39>fp;=zx9Br393_ zrMepRJZd)Y6j>8%Yb9y{2@JnEhS~*}Y#VTtaMy3Zj9~%H?Sm^l&9Qw#tio7ObJ2%Q zh5CVhck?ohjE~`3Yoc!o>{vTgGTE_inf<79sz4Zb&+qV})=R%h4e2kx@`_xiYK_$5 zLW=9(0Bta`j~P2wswkoGkh>UQl@EOxtN13pQf4I*7zas1vE{<>De25ugeAbBpou?e zxnlrTCXxzSHzrS>Y<7`1)MM2F5(JsbaVm0skSsV(NJy||Nq@6Rf4{DOT3e^|PpeIy zU89Io-@biK&j;5v|1jbB<#VAqEr~NEJan6Fp~0)02k30D}%!K|PZY*NV&1m4mpQ z9h$6CksG`N?ds~jubKX%bw#!5(u%vW3iEcX{?0#BDck+=r-lo>wSLy$d(U&vm3QBL z*Xo+5*4CfTSq!l1hgcKShI$BlhO4cT)}08LhH)SXz>H2i?KGo*C!Tns**?g+;oRA= z6Js#vT0%mCwa5Mes(xgK001+^Nkl(XFyp7&L~U)%A=iC;@}$ z8IgmT=%G+coUMZxJ&c}o7Yp%L@e(lC0B2n+)-t5QFkG8m z7|vJEH-@r3ls-r?<3XeI)msJ+nR4oB^U+5iNfVXS!5gZFlspwt^>BGg+vEX)9C$W=YS4-(PWgYb^t{KR( zWy|aVuz%o=%Y*X=lbTO6F1dIqT&Fr=0L1lFFBMXts7>tpG&!+c|B#dGQQ# zmgw1F&}W``CRKH$buL6-OZD&KYt7k)zIu-8u=xPTs+}XHKXx2Tu2oy&Ms8AuSwsOHAjTu~f<&yl^d)do7)VOs3`51V zY^yO2C0YT23#ASvT(Pw5YA`qEVYDm=a)XKJ|G0#|wr!DZS~%*Unn^!~=tOsJ+blaJ zon=@XUAKmDcMZjYyK8W_^5U++i)$(FUff+nu|jbzRvZenSb<{2-HXeaekb!USCZLt z&1CPj*Lt3N34Y1KsN>eVwj{ajx9GDDXCEB)MO1UT!*;7dL$qUDI>7k_8G?Z0jrM#_ z;)}vOmtz*G20d}%QH&0$#fkva`dq%eu;_mbm{{eSA;MkDEQ^Gsm1;fN1jYBA^(rxh z`tSJR_pdR@t)o%~`CK-0L;FDaTzpD$oqfN-kov=izrV~mYGtEwC@n?I+FJ-GQ*Wh( z(2kcoR`(6!D-R$s1wSQXjD4I$ACR?qq6nLzG40{LlDH`r>ZV-3f?D~9Iwr~f=EVE$ zsC68M2coORCGdI9oD*~ZhVzuRW3ul9#ixL@(ol9O;kHK`1rLq79S*rgqqEggRdJO; zeNB4=Hr3qmEDpuUO5DC>o%RZ$$V99-8&F;Q^FNCRQ)SN2L2J;E4-q94QDL7LdZkVH z@bn50QLwlRC^jh`ItbO!5T;9GhNa|@7^L~zd*yZYkOnszGbv&S;alqr-|$92rJa}% zRY>y{b<0OI6~6Ucri{)k`Pn==&Q=c@J(e~*89Y?1cIjQo+Dv{U%awD>n{!&05PXT+ zxr#(PcZlUEH_6z5fWJ??fVlj<&$KKXMHey)5++E|0v!ZTzzh>8z5_S*$#ket1~?G= zRYNVW(P3zatSrNS%~e+P(3>wme#dz+TpLy?T$vgRWj9>px%S|fI*8L-z@6pvJ76i& zG8tq>>H*UYnM#^bEIlLRvfa1A*xhfUvfX|$4EsOJ6q^kTPp>|_>x~pEke^!12TL{6 z*u(b%kyoB^z1dJ4@vOs}U%!C+3J%=$m>!}0kUtbgKf+8wF+dx^=-Yifrxi>$ zgXp-Yj+rehT&YaeNnMYX9Y-MbY$xfl#h~yU|KoygR2lRzxNfr)k3LT1FrIrKtW=O0 zJ+Uh=Vg2~&{mfJQ6>J21dUV=Vl6h+_(4d;JIb9%FEp5xySJQ|pxFmq*TqZ~IkEj*% z@7hz}jS}&L0Xv)T#r`mlZpKFUAsNVR0}TYSGior*a1DwNtvsKhYNc=!7o!$@?=vJ% zTI~I3W)t)=$Y&rCs86eXjA;oExQ=9FNOf~TOhq$`!zLGKTo-QuvnZ3k{*1A5gD9Db zzLrSa3g_U_li>bP+MB8X!wbY}S%Eu-gd6j2g z-+Nf$*LC4&*!|(W{f8fGpxUcGvQ!CCB&~l+a&L90_*PuzEA7t^UUJ>`;(W~1!55pVj8`529e-&7yUbdta)cyFXa>6MNU5+jBnaL65b*6DICM#3Vpx zy)A{2P

hd#@GG3@8lgSx4nw8+|PD@SF0n(p+7(mVYqk9(SKBrARoYNcBK`bxmnU z{l2;~CfW;Kl9;8tJ2xpmSNmP+Tc7KeCri2Q7@n$gK zO9otqd>K#rIsbOAZiBDynmPK_{h7TAWmRbFkp1Y)$r7mhIRhGR*$)E~4~50bA}eTjYm%px zW~yO&?6AMQPk%32sSxNGyn?MhVdMHl8-3F$oiowEf?aUpnb<$<2HKoKX$@ROv}p)T z)|Z=DcySSsRH=Tp>i?l{J#8+ks;jI!76vEza#x18p`yWFU7L@DfOIh^BJ1Ab_LCO! z__;}of=WSFmH6kr^>Lsia@!4&=tG*}FkG)^GtIlVyaU23*yxwWu(Nbz1VJ-Pu^4z| zbT_oP-EI#fQhNOOYi{C|4NcE-0IBXCJ(LNaPlgKe;r+rwqJCF8Uhu%$jp_+6pu~>U zhXs@zwBTS1Ng`ly@Xx_B>z!j|jke+{mlg2@`+dW%QXO6=ixU}=wB>V1P>dq_s z_wB8rcu+ZyL4Y1DzMJ@wN~pz)(QUa#2Q?l9IC_QN;we-u&xL@=9*Q~1o;PbfcFWe4 ze{M1m^{ebDP4rJmgqX^yGw<+)p(B5n7IyD@>m3{=cKnp5``kRB;OezqrFEM;)f&9- zE2Q#qiz^v>V)w0$4TTL(wdJ1w%H3zLl=mr87_lNsTzsv=K7k}11BCYXC%jJP0Ww#)pq)&z*<1MiPxRE z90Y_39_@H)xLTvsvZ|;Q0s?*K4{>n^?LxGt7KHt1MI=x<8n&}1$>1z^*A<3>FFw%T zUA!S!u}pco3Q}5A=Cg>+AGxJ*0n6@WDvESwfa5OQ_x@MMS*NEH+b`^6rsFL3wCx9@ z56WkZxj`=(UxcT) zLyXV|1ZS|;yp{MWr7hmvRyHe}^;L!|@SkO*v1>Tu8;AW|eI7PYg3|Syk(?wsM_lDk zsM>}AQw|s>7D2A`Nd7WJ8fMEL(G&*O`w)|eR4k9J%JQsmt|VpS0c3Y4nz_tVE_IK_ zX@ZZa`QJw*>3T}7r|NL~;e_qc^o3Y_J8$NK_rzj=Qb-$b%CvsYW||0k8B`-;GaQan zxS->7vN0U9i0}(Dktd7Pt=`y?DHcgoj~!So}8!qWW5Y&MyDZmyJW%FDL^QI3xVR0`tzg9WNKMa9;uH zwiBIigEs(F0sk*ok}dZ36sKg3vzbm_K)K}I>Cmuk5a);zMkKA~7lT1CDrE}kCmLnU zA?SlDy$^$@?DO$`JZT#dBhoq?fMA(|jxMUGZ+E2 zOIZd+(J~LnE);1ojk?i%KD8%~NV{g(-uZujBC^h(@H!E5nf8N5Cmo2UaL$bn03NY- z)M!c%Y~I@;Vh+XHCgy=i-^Qc0!C;(OivOXqjfSPykfW2B=@1=6V1@5AN-``B_ve>+ zpv}P6uz%$8i`bwfiUE@1m01Vb)!IGW&!OfC{~;NKPkl_jqljMV%iK67r~EI17mMTU z|MIq5%t)d93~=)5P`oPmV5OgP49961LUd9kv@aWI^$&ly?7qKmom~1wUZym~XjyX@ zk7c5?EayI$#8bV?H~a@C(9OR$!4$bnJsgoibGFz%Blmvr_!!O*Lu*_oxqzo>CJO`u zz$fYM8`toCQoM+qN*A0hB|ox+Swb3HK?jt5a%gx~?p}ZA^?%#P#Lbp+s0;$dd=?8S*Ck!E`)ysk48=sOInG(F38OxuypTYUIF*6tkW!0 zpVWXZptAZ-t`A?SJS3&*qWdk@=v{x31K#FihtSVt_Yh))A69oCSKR+3x!Tun>=}j2 z7Z6tpSH`mH5;aiRAw&AGby*G#5Lzg-=>z3ek~KO$U5_IrbfTxWO&&Ax2^ID}#)3kD~W(>jt) z&A|=Xz7c9PHqENo&mQ7W?XFGuuR4vuAz;1!SiQg4yws}$>V$1g&y~e9r9-t?GR*S} z=EqtBZ(8K~a{Fa8>`yqByY3rmMn~I3p54%qe9Q!Oox5vj(SUD||&{)PFXN?tG z!^I7(usCo1nSZeGmM;_}I+EScC|5qB2egl_3iO3jc{6?Cm z3clnKl*TM2&(An~5g&K_d7oP&^+&yB!SJ{s0uKJ23G$lkfk(pVphB;A8^MXIP1{08 z2xvfz(s?wcTJCJ z^`=@i;FYCd#Qcryj3TFxOGJR%#8{*6Jb`Y@vEJ~k(ve0V31X`+U5O>bNP=n+sC<$EQkm)xGa$XDCX~_qFGX$e1~e5r z4&AS&jln3x7k|7`EUx5o?K{F(px{dNM@MWNo%*V(rp5>Q{q~=MTRBZeq$=%Z2$gQD zEzQ}1dS^$EfYWXy?kgIO0yP3!0s7o3I^PLOD;Gim_(g{?&$P)Byuv z@W|7((^aCg&;_R_mRn!rrW3zpoK0p-tjWP+?4LlK;f6K9PWZM)&?J!EO?E98X&XVV70|+ zMJ3;Iqmnm4#mCs__*;QIY}?rrqo)f>`r43olyFk~ zr@GPat1MvoQ05uajnG*+@%HhWIH!7)9}<-ZEcpqXd{z&wREUQC|7vz(c)mtJ11(X= z+gW~%Ss@NE4qVFGlJdD!NDj=#Cn+%+2?5Lj0_p0+RYq__DLQLNp>$4sv}K31-}m zWzByV$}b@`JkeA5`|@82TZO~`4mM{zf^%!FF-!O(wR6RUPBo`!8}jtpc}yC| zjz3_n{IP!%#QU_Ar=w-aP7ieoRH1G$gph2omJHTRqk8&{V8i{@nER zq_n{zABUPx>Oo#gpy$fI3{Tu`J+ z?8#inOXPxx`8#)!bf&5Qgf$2h)g%!Mwcuj;v=J$A&V{>P&Vor15Yam)Id=Og^7U~G zO{v6CC{VNv)jB79yBL`9L{UO|#g-UmRg@IDv47^Ke;ta>6*JmSqv4@OC*yhd3Hh}1 zEh54dcQ9=>O3h)iJ(Q4Vb9BcG8g2U+_`Ip7g5{lOJ!|Alw8;M{;4+N;KCszI*B@0d z>0tfL(Hz`#y8jC`S58_S?^OJLtIs8Qfuo^<;xpEtgf(YaM0I~$HAd|-^D)Kw}@*;S1Wj?Ss?VE*Hta5Rj0WV?5z z=L?6DL5;f^Z@iP&*D+{^G)-;h1T!wXwIuV%Vy$8^?9`DV%-Yo4;feR|=jNluPY_iSHR6*`z2!0l`XiX#0zR9HLn^H~ub*E~yu8 zxC)Yho=QlAptu_(yS3VnX@jh?E*;}d8QQ!a7J>FJm{4hU=+_BiMC31QUkeQWeCXPx znY4Rf!b2Rt#b*0o!^2J>>a@mLMXunx3QX7eTov8mcL1RGb~?H(7K-@y?m^zCxn}0sihWXpGifI& zspFzEmS9fOpMvj^VoCd0kVhRnfS3_SGAVxu2q2QbOE>^I5nrbiL`+#m0<;@kEqQAs zv{Bl%2sta`U$tm49W%0X5&@Q~@*ncmgz2_QTQ>ClG*(9p!=@`Edkpy77d9duN zh6CVGDOa=reWwv`+sb99h>}23^c-GJkBaq0k1chcY|y<&XsF|SD?V390rIG_(6*1S zpk&f#t70i0JX#lJ-{Z?aTOVHg5OGI6bA_18q%P%Wq(A?dX^bsUkM@73Z)}7b7DFBr zDM-P9z3;C5z*@oIx!3iU8*4kofuqn9&y zW(S(`6Z3Z5e-=t_-&-a^e4{c=FHx{HYt$B0pcnYtVN0*ZXr7m0PMrA!0x?Ee)3eqZ z<^rOJdQgyb39K0LG{lm63ZCXYWZJAO%6Quez&bw23lyLvcZ2@7Q}5)_1jpK3>7 zNl@Ja%>%@FO$!7o@URm?6->pTb34`&_yblf|jyC-U$8fno}m{ z>Zb-WX@_T&zibXOU31RVE+pIh4mDeDH}aa^!dN9m}$wgd{*97FMZI9)fW4tdmM=#bTO5&EWYCvTMU ze!_|&3EBuBIwV6(Q}WIJH$?)HJ(@}My7ritr*n7WA?8A|zUkF+q#~Yc)45v|ctTBr zs_*o%CDyjvVe1PWLR>}sa(e3I_PFt^C{a|n!@r&emE5w*e~|T>^Rk*OS0ridUQ5Bd zbP<(wN+F87v(lg(bsp7;YP8(~Syn(aCYbz@t7q{Nmw}NHV<;Ai(oVTht;?qUCA^q@ zj>A(ikUiSmufo?#zPlb2pW4RLPp@4@x-!hYz`;g3PE9T8gd4jpZTjG2+#;e7)(|+g zd>2Ul6x9p(01cGGJ*UKf=`=<=lC9)EpXG=OW0a1;Qoehc-wfZ3uRy5(7uSVzV^8xJi zoxO}~LzUC@I2A`S(WGw1T>HiuK9YU0&r(a^$0uGT$Bv^+f?5EO*HkKa5r+&?o~^#% z>q=+mpX-B9g6;P|=U}`Kz(G34VW~a*`UZDU{8{{x6eW(CNA=)C-jTZWu!+8ZmdMwi z<3=4pS6D01seH&xovCO7Y>lnEkeV&yNT|gOzBB&gXku;NcET08HMBWw9TvD=Vcnl} zHo6QUtQDSvDW}aOJaOF_;ImP`jWP*o7U47ycsPcwNF(&^rs!4U=_pqsD)Yv9)!R&v zG$s5HstF!eA`Z~V!FPIlsd(sSg?S81P0SK0b!FXpTr@AC*+_Q+AroVyBc-Nsp$Fei zv|KX*P^f?h5y;ganuig}V$8&kJ-wAwzZ^fruV!uqO5xs$DuzGjmHY4-Ho*k{M8b|9 z3pO}Wl%21+mqaXz9Glg=SJlcr3TJUZ(fk}&J;A1e?01um1Dk$_GvC_1gYraBEagNZ zKj582io@)t3vXfzZyztB;YWU0oe92t%BJH3On%t>sTfq0*RaZbfbsr>@Qe&WrTkXVCA(30;{_CYM&rOOWopsakTIncx?f@8J*Q=0W#1p9l;( z{YiFf>3C0{iET}Yn(aQEwHAwK`4=MWMD9?rR5I^7MXfH}oK#O`@zK&+1yR~6|v>VM<5jv`ZUxsCv?VeNvY%^-1J$ z*gY2hmJ=A!gz|154Dcbs5md!2wz-|3vc1$1t{7l6h~Vj%Zby^;rWZJ zC!EuVO_9-OoD;!TQP2#~IlsZf<34!c(QW(Lw<&lh1(X)4`aI)I3YD8_tPDJ$62`2q zpE}7NJpa}8G`=+sWi=D&4@XWio!+2s#u^-O9rMX|-9CfbeSQf4o9qJj z2G(BPuEK)CMfWB;V%LBX=jrP3?23m6=Nd|?w3Aje<(I5ihov7}r^2#{r>`*nqmub+ zF9959d9;uXlva33e)@I?3!EGKJ|-l`n#bmfs^_&8sd$?R@#Z3_u7SGWzrLiVsNbXs zcz#+`!W<6boK)}7%B2XdN}sEah_~}Z+YkKVTI9Z31^uz{B0s0r#B@W^UrUSl?sbxF zcjEXqQ>7Y>U1_wcxg-mkAQ z-(Oxhj@$uJ-~`~5<`8h0OV%t?7%NMXa5&)H#R@dO)2Ym0elJ47C16a&GM`JuI;4`R zsmYj)8$Sl4-W}{o0J&lBqIeqdd4B!;VD*{((I)$;<$ZWdUr|Af^(75H(+15R_Xh3X z6t@fOtK?UnzAhvTKX^`RrR<^g>bI2Ycp!~0&kZv@dkI=y@3Y+G=#xV%+`cIzI?=3} zSW`FlkP7IdEku(%1b6A3<^5gFyByKyLd`%DSj68_!@{brT7| zXEA?(s3Mj$VZ{a08$ZkrcAXR{bI| zh$EWsR?fE5)LYZ(1>s*a7_h7(LbGSOLVMi}uTL?GP+7#-Y!Byi|H=v@*=4at@Vfi6RJxt z6y9+u9~q`d4Iu^|n*PIuy>%w806shEm_rOiKS0TftUeTm1N`I+`C7AmT0E7wkrHdn z8+7!LdC7yuGO3_4BnA@tnWG;i;Z=#yH)a?ymMpKQc2S{HA@w%WAsP&R_S~;uKOmrz zvKxTeX?gOmZcRWhQR0Y@zFy5^d~03#!0kf_#RkjWZ%QdF>!?s}4y<}UyQ${muTK(( z^EGzEi-^KD!8NLua8bSQe$v)d$gH4#FDtI8qlgQj?;IF}UrrV5QpsSo^=ICfRzO87 zM?Ms+{K8B>iTquRiCE!>!Vu+2sPENH)e`b~?ovoZKePh0( z&qAv55AUZiwzWW-mcrOFzMal@^S^{p6<@jJ7I8p)S} z>kw$gwB)N;x2?SylTzxwKkh$XN+ch#q_Y%hZncoT(_lvSk1r{1tHcs#0DIle=cSWNZwau69jy>=iot zw%Se?asa3LY6CZ%jG;}_jec8#`*7a-eB$ac1=(w_I&NafKz}c)^bq6EeESiBGX@5= zs8iDV*CaZgT5|1ac{Tg>Hu#n4_2strzgIO0&Ibz5?>r3_K%*99wM z%xQ5mFK+NFG!5#5Qdr>2S7g@hms5c;Jt*+M8v<-< zfg3W|WmEoN=JHy%zcCluNh-rz)!aTcz)can=<79GnKqn2&Quh5lY<-aa5#q6?K#VjlckQo>Mj9mRn9dZ&@w%Z0C?1xtf;E+ zj{vTz!MpGz5v*9%5|{}b&_Yr%&8e>Y#z@rG1u$`k@*s%f-_y$E-mvL?=AI^W2n)fq zBpDTs6T#fY*y_LYnf*^l-tW=$pP4Bv2A(U_db4)(BJ6N9)bo$*e-YHe%6RuuVl>rq zu+Dd$PAGW3%c+fd-Z1}9Qp8B4!5*mG#oI5tCXOrZ4FaL?`5)GNGyf94|EG;K`Ssjd z*>0jqo~1A(V9D7|FS6k)Op3hQzvAmHreTs^do%(85fPwOb7fQ7Q;PkA3=swVgEho{ zSOE&$X=nbfZ&8wuf^-_CzDxqD#t1x+Q}E%&@@$n10a_V~3@)I8ChqjcS;+ zJcff411urBzvL!N(}Fs*s&7MMTKsM!oMdsB1Zt8h+3GN_0yyG1KV~vIyh*ADTu=S0 zfKI#%gLjl$YQqQWB<0({#aN^nw$0*40u&|h{;%mRTeFO z_1D4k`5)D`Ik6{NsIKZhNM{W3eFLXxEq+s~nok_u>bL5%D&DZ{v)kfmKU2ZB0?+-m23EO`|pbX9lY@X2Z}eIb^V7G_zI*n zmdANjc1J~pco7S{-*{n$b~=zyh9*#$B9G%GMB|4uAdThIq5_&r9~o zoot5{u^9b?GyCbkG(aycXq+THcysinx3HGyvXPI6R!r75r1HuCv}2?buXwbATT1$F zk~HdeyNG8C6oV%m0_oGw&i6;p9xCpI?Rb~EDa+wLxTTE{MrtCLUf_|f*f@PVPGAxkdj>w| zgy1PkbFyqofPQrx%7pkZp344M2hwlOb$xhNCUL;T9OyareoK1A9iA~#c_NeJ#E>j} zuTR(Ka(gUOD^l<~`V;r*l3kH=D)9{QX&20r27~I>hW0Slg-PQQXqi?2q|Z4i+|kj; z!fX+ z6a7%Fz#QBb^!tA2{#guD5M@&aVJtzCH*6!O8|+Fvn9=?bosiL>ng70I6kgsBH)|Mi zQ(;grKB^sk##;{F^1s7Wwz_s1H2Mg9VXz_z7NUY2uZ$%PI10~yCI^li&qD=%<)hbr zyu9VmHe7f(OMRqI@70v(u$u{z$n?LKly4~!tiqS13@k7-_t$%S#a#jYcLILHP7lE8 zsuz2{o>llx_A#Ax&^uYn258Mna;~(gxG4o?6~hgwj)TP_aVNeJ7zZJ$CV%gde0^%- z0i7ztG59dFr^qECF@*)6o{hIGd@*0QH0i^Qk7xF_)b-IX5P9GDVdRRpC=?NOS(sa= zkWW(jy6uD^`RRK1$%HQ~R4h%(U`ZtAC8fi2X_Wv?(c2J>RO;}3*ohM48{|^3p;1vL zRHH$K=^YN^07jNBZ>R^{V9~trLr~Z~cuonu8Ck=sKN)-j7wY#p-}De)lq{&<$#yE= zJ_0|?putj}F8`IM;_UVu7*oBSqzFKg77^{heL^c1L2cw zCm{kx2W~1_HS1LW6Pk!zu0 zF(KSxw=;PA;v@ywfH0IulQx^;!!;rl%&kMat65IdkxUgCKgdm+&2y`Yo&7P7&bd@r z^`x*WfmnTyQY}aOT8LWR^V;(QG|Gx{86PSoM=3Su@y4K`u{siz8jc}=Tb-T`XDaSM zk&R|AV;~^PMYG&=sYrYSkm+)p!-A;E;}B!$mfQ_(n!=d)YA0ACtD}j9S?SPtB1njF zQ{iq%=9F3B2S5BMmR^YJ`Fxl|?%ODr5m1$<>Hab}5q)ogU^TgG~UVY9t+quWu?| z2ur4owLo>3l7vaG5j$|u6RueVp^{MHl=k|5m;%;=OTPbITskGad2QF#(o!i^3Epg9 zDB=$F>s|DGr30CO&ISHc`zkV?-fr{ ztJd6eQb125&QjH`;C*IQxIyGVe!Wn!i;Q@L1DgL^V2{9%i#oANC_tbZ_oUNIvrZ&P z62m7*(Fz){P!N+PDssWP^e)k}DEcr(8ag}@j!^a&CpMk=>4W))mROI8c7i! zhMGEPjW$R>VS-~R+z0Vni_CkHDJWsri7NuVe?g;3pwT8110SZ22eQxKXKJK4EQ=e;uJ6EXI|5AAJ$dwfOlnAfm<>3VR%U8N;AMxl9i&Zj`x%Gicz@ zn=YvQZDkQa^OAEViQ*;}yh@Q0p%V{EfBi8in z9p&wB8;gjOfeP-}TcN$ZEM2VaDtvK~1DlCV!yoygBh=J9=pZW7jl8bif8UCLw-i@$ zmaV6+Hh}vi9sd#YI=f~JK$yn zLfvVcoajSfQmB(97>e8v=0=gI{FHwLVb!O%rB7l$CucCoSO+}sp<(f%?UrD0uB3w3 zXk@v9ob-!3@vz+hO9XQu^?NhWde9&L-F7SRR|Gx3c4^uaJm`ME6?lyYZVOCSktMSE z{NMfk4n4|dYN#AFo^k{TAB5g%JBJ#e!64ic<~ZE|oFV^PpTyx$4bwOk;yelhl%??Z1&#p$K0qZ5Fd7wnyT-r>qA%-$%sM4CwMn&Jz}I4?@YRd2M4K?xqX4dy z)Z%0y{RfCslM9MU7#J8+fNbURk^&IB1IQLB0y+xFo&#d1L)aTY>?8>L2v|)>kTZ~d z1ISiL&PW8B!T1Bn)=5GVPe)=y?F9jDm(1dVoWx3n0Am8KB?FYCR+NBf=lp`oqRjM+ z5(P(KD5WZR<|XUtC>R+Snlmtf!W^Rdb09b@8O6>Z#G3CjFxc+@|NsAPgqYWU28NA5 zom@K+Vj30<41(MY3@3ILBo-xtg_wbuDJ_kG;nQ*k2Hr>p2H^`pJ<-K!#ztUyMjj^y z2G)E3|9@6vU|^rj!0>m%|Np-i{r~@WF;Mdz28IK902Lu+?ls5J`v3qy07*naRCodG zeFvajMe+Bf_uhLCDL{Zg=p6wmBB%%`2#Sh|m1aX!^lvZNK~(ruLXV0FpXSc~F4N?zv3)C%8w?N$jbql;g z3p9O&_;vKU1?m>4TcB=%x&>%~`T$aQgt`Ul7N}dGZh=>2f%*XQs?1g=s#~CLfw~3i z7N`#(bswl(pl*S>1?m=fRTgOes;IVMqE)L_$>PO}WyOjWvU250Y0;vEbm-6_{hBtd zkGb1$^KbK2Teof<{d(=%wbHU>OKIJ@wX|y0YMZZjy@Icg1^#CTkoD`=%fyKj<-rFZ zl!qRANG4C7EYqe&3p+jZ6?Y84@{P^*5;DHB9@7}!|q^TFwE$~KK zVD;+N^y`NoepsG)<{A1l{QAWgUzFv`mkS1v%Fw1w8|mJ?yY%bVPX-SjEF(sYkR5m2 zQO1oM$8;rrJzuv#SfI)O(lp*>%a+MC*IXmlUw^&aeDlq+WXY1S!+#Zz88b%S@|L&A zNhh5o6DCafuXxpo>lSz=7QldT*Ijqfr|-Dq4tf0X#|0NhuT|*LqlfIh_ulmHx4!kQ zGGxe**NSzW&o*Fz|A_(QzWeTzOD?%Y{`}`Z^U>7n*>1QyIpvg7*%P+rN{{HvB%hIJwUuzK!^21%pyWjn8UMvk7G^jzmdcpsO1-9t{1a}!< z|N7T?g|?;5*`#SRY2C4#G;h^bnzd>p8`rOsb*q=l`c*4r-Ku3<%0}9@Z7U!8(1+yg zv(M(Efh{JiBi1d@fCXN7;RX4{FMc5yIA+eA*#Oq&!A+Vqm*y?oNYmym=-V6Cu9l4( zsvm1@o{I?H1>&yz%rnoFqmDZ2<hHZMyc>&e2Tr5A-*Qeyp$m>({K3r7t}vOJ_ePi>FVL*-zao3#UFN zn>KD}P!3*He*EJfmoI<$%hJAm`v&pq1$7HFWPvGDrpULx^)31BZ+~0!9Y!S`{J6*P zou$j*vC_WB0RGx(+_G&)X{wV#a^KdBtX;iA)+|{dE9UEalDRWv@e5OA(Tt~L!PH0f z!fAOW`>H(dO1|)gFUULI@eU3wRS0#!Hfw=xY5>8l{5j{GBbQ!!schW1v1X&S?bcU@ z?f*_0u$CeD5B%H=ZqiMwRlJ=e%f6K`wqY7Q?b zzyJO3%kjq_UsJC-ux^1$3*g1~=RWs2x#Ef|8uVw-wC>zP`tN+O^dJ8g={05#Y1O_9 zFL;cfIW!h5N`CI?@L}_9%a#0G`w#{a`1ebX+%EGbKd618cE5tGQKLrjLJ5z!>fwLG z0^7_0a{KMK%W0>bCex=+ui0|_b~;c-ANfJ)yW{@SL~p-0ZPZ?*eFkw8#27F@iG$}g z!UY;T9?}~%gP4O33N#cQZj-EDwpga$^B0+X)1|U%;VhO{9*#TiIQiADekGkdcdjg| z=j#@r1u&E9JKy<^eDj;%(@W=_eAOX-6TK3IP8lb>)vDHYT++nfcq z`T+!vjmJY5UU*^AA+l!bzsn)A^SjTHZbNrue@dN3aUzY_1_r>4I4@&i1sqr<93vwP z&y=6nPB!VfpUBDuFL_B3VXEYnS6(T*@4kBhtf%T0Pz&67=bduusi(^G&p%(&9v%A( zmF#n=5sx>ryd5fFKP3splA*fCmmcTk2Ct*l-hgl`*-fp!Mt4$Wzz;Sf189 zl1&vaD)E6LW`^NpQ4e(stXZ>$?-73RgCEpO=E}K17CjIT_GX36b6u zv17*?dHuLeSzxOjK<3SxCx;z&7(bdSwTSkd?T`M5?0n*RI@6@36=f%XB-v>a40$}) z^+5rHiI+v^ir6}((-X;P@V>=TiIj4K4lmvx{N>rQ==sM?pX`WPVxRx~=QBt>`M+j? zXPXEN8dnTCt)I`437688m#lgZ8~6)X0h>dyx@tF z=v)yifW>^)j7~`Q3e)sP{kZF~tG+GI{^K{sG&^F(*hLpzltJpr{}l`1I}Ut+x@gg& za+UQ=J^HO5ke!b|M_RV;lzfdy@GT7%X_{ws_eUKCc%skC;m_f?iq3N4XbE_U%Ve0L^XI><| z^g@UM?&!{Ge&t|y!zjaPJp7SGc{$SP54dR3*l>f7BWA$N#S>3p2Y;d(d4e&kgzI%$ z@Wd-F)R*m7IYtqo@8Xejmo7R}tRA*{3v5*b2;O;M?WzC#=RYOG_uKg(IqCNrgPi(Vz5kYNHMm(SJ}9>EI9@Ct~e2HTi>Q!U3@hF_;b_`ZxcXhvmK> zogwR1mcJmz%o}`oSP$F21wQ-P&+^-j%9;+{_c+<>L*KC%Kpq0;EDz_dG)+?SEAyz_{1mMI%2*Tg1$FLJH?=eJ*y4Qp10 z0LFMPf;+DP0|u1f^~~01fvsi$!Fz@yk35o&mz#O;o=3}mpZF=itOXnnoSYfG!9CtO z2D@X{7o)=%oP3p})G5$OjX1@`gCzT}fLGA_Nq{^QpTad-`r;J1^E)TY+U4a(7g%oP zo$q|7iKrj9ISas#Kl;&+%EcF7T&f$UmhXD1zj2=C@1g~ed zE(>fm0|=%A;)Qb_`sv402cLJDG;P*`U^q1$7&-L1=q&7kel^D;dOB&~@)3X*9RqPZ zjxwCu14-b=1!Bl2J!OH~Wgt>S(kTDM)1Q>PzIzI9`9lXR9go$4v8Gl%Y^9pu2Xf2H3j?FZ-3B0F=_Z#<8*`IvcAnu<%;Qkc6~_$v69=oP^R8pADoJMcoI zOg@<6=lv-6@e+L@-0(wx!Z+y>K+EOcAAdyVKm9-mVT?iKo_p?*`kGoLvustFtzrPV z@x~ju-cFpXVc{Mlc9Fxrd=(#C!O6pi*=c~_0QM`UF#y>641)p5LS;t-G?&G}Ks!;b zuiFz|E-$B7xQj3Gv@vCq%zOGlx%Y=3;JQSnjRvCMqCcI^UGkGBB})Ou{yO4nUG z?2A`Q*P%NUHH6PZ4$c!7LV$T0PUi7A*1X2_VHmEV}#Q|ppR51IF0@trwD-U0Kw#zxxu+a>&ShK_4)7>rj1GiOr&mFUSgyGohN==CvfiytaDQ|cai3Li%oaqsGYEfzs|GCS13t zqC)}ELkudQGwD_q^dX0j9Gs42E+6p0`yMX?_I!J=Wc>4=|D3n?C15?XRa)SN8*ZqY z9D(mU4*$wuq*K4)^qbsI+>z+c^d|Kce$=P6FWk0xq-qR^o+FfO1)rlYCk7dPFep)! zGs&MbaDtI{Vr&@J87v+8>wBJY@BOm$-~H%}i^wC7JR&Eacw*{brNVlqQCpx<4PUC{m+Q)ib_FFZ@9*{3}XrT z8h_Gjo3LDXsT8W7A+!y_%`3EzDPX}A!5JL1eN21kM6AWwS5i8ScJq}m^SFd;%l@BB$0 znu4A@GT|x+Fz`lj2`1n5A-Se8%#1(n8`5is{fQfgn{K*^%MnGs9&a5MXtVv9gCOtn zoQ7#HL+1tq(?Qb=c4bI&RR-=|p2CY2S+SROJ#2jzz}5nl%acra-&ds1 z4*Sx7;=+SY2A{zneNgMFe#1O{#C=WE$)D*?;n8NBz)!A9UgmeWXaeteP>#zonRYUu46s!k^vBW1ep){wD>j~* zKYzYpfxUXzx-5Xluh=~>4@34jR<{(;^`Pi7?h`<8zoI*n00{6858+}2>p?@|QZRgOQmZDj<9lW#OCQ|HjAVCnd;*a`nL2{MhJ$7V@j;i4Q!NLI&+ z@cT+woQ4)GSitXOOj`Ze*ey`BTqDx-4}bVWX*uhrEn3NVT~N+F&^i{q(H&B~*E*EW zqC!WZqiXEV&;ALpCh)_FI-K7KERQRoz)9u+KLlP;IksP2AxAzgA78Um4~L^8Q3djd z$7E!tbSBgE@iM*IYtgoo3_bXL6d4E1;KAdNde}NFfE5$o|Ni%9J|_CUW2o%&k?(m0 z;$-j61%L9S`DOSDokVF6YZU&h8S^YB(p>Tn7ANK(L3C-|VY9vZ&|8sz};-S2+4 zWVsOso*-?y^j28WQGk#-8*a1Vr&{g_WPso}%&?eFP6y8D>Ev3h9jO#RZ;TV4)X9uJ zI}+b%sC?^G1qZ{8JQ2vx^adOqG4#-c%O861Dbk`{Cm=B38T#*k|NF?+<3?_QFMa7t zGGoS!LQ8Geea`%mzUM0T@1S4d#2r4WjsQ1=579s9igscLltIfuSNKdu@6r-I^I(dE z(`yR6;btZS^qwYuCO;!13*;S_^2ylrOZsYgSw2Y;=R_a!6&v*#cIXH6@$GoWvj_`} z=I@S5h4oCMvA`StAtA2vu~`mY5Ko&nO>nigY}qpIrHnnp@$&rq^UqHP!b`_ve{`?3 z?`iAiumgB0(sXY3aL-xiP#ikCJAE*uvuE@HM;v7lCwh85G1%dZF*-*GMr@rq=Yupt z@FRtjRKhFipT7Pl^6X8;au+-8v{Uuc_2gbZG$spRh3Fl2*rBv{`S!<_=anrPO6%B82l!k?1^S4+?w zn(;z&#E20IR!=l83%st6u<$JNh$D{RS~s{Qz5e>^U(w(JQ(+?=I57Gg95bBLZj}vA z2d^k6uF?lh9)qtvu7V@uXMw!IN3%>L#vePM$C^eTfP5{mAIOp9qcl(h9I>Uz1AItB z6vr{Vpb5IxoqI{o?e{lCKVoOMdT6{B_~$?WSsFZ`>8>ZAUkn~-YW-OhzJQz!94vDj z!00o9ZD>JkW$-7K!=F(Eo}es(`BPw(;pFcGc3!U}HXV$!(uQstn=Ht)i>yQ+I1uG{ z#0DGzu=F!}_9kU8;;_^8Tc{z1r$;=ExZr{dQof#Od=_|J1`u3)Fn`SbiM=O~y zu&qoO&{hU^ZYiyrH%;Y`2wm(BybwluQ@gf&Kr8hxR4Va@40Cs@KXwoudGyBtPT|Tc zudJHRp9<@VH{AmIQN?Q)UV7;z`Pj!kCf8hZ&1RXl)&=A`57spL*`O=j6#J zpRDgM4cj%caNm9R2_DbpVK=?|Fh6xi^$0lh2XFV!AJ|Ge(aQC5{`E5`CF>5}AELjQ z93b`C`7P41|0vz=V6xLFU!9bIN8t6)_$*Lv?f*hQ3>v4NdTN7Mt6gx&ZUN}3&<7AIN^|lS`v;(UqaSsD=+i|oGL9y0gII~^i?L^5{l z*p#m)8ixfcca!X+OS5+$zB8K|&7JNJ+-IP*`9l*K)w5MqC0aG-LsHq~4ndbmezQq% z2hq2aZDSC`JN0eD0VMfp6lmPLV{`e$zCC4Vw`y5|h6}8o9a_jKyLFc%$8?t8K0IIU zo~i>9eN?fmB2b(pCI<@Y!q0oCte$9CzGtH3yF&d%aDL`R;A{yOo>e zVDJD9f9jEl;evEShtp9N)?B~;2wbY>{uZ!^Lv7_oeB@EaSl>%9UHEF^Y4k)(cPFt3 zW1*({Wd#z1b)Qb+BKI? z?$?WtxCn#%2K0xF14niY(yI92?Xai2fvx_YaZJLXyX)Ue-~DwOSX?CG5qRYWTvbBq zfH%bg|J4A3hQ$X{*yssPkcXbz?Iv&k-amDtoJ*zaknPcP_)B938bn&(tX@XReSSSZD_S{GlKZR)Z@2a}-b4)E{;L-%9yP z1D>4+>zYYfz&;`M@WxtT-MV!$XU-hK!id;T`GE%>;G@Lp)2GX-RjUNc?&R%{JCELD zd_QwlHTkNxXG$xa{y@*HQJ>7s3L0Eg)WA(3y>zK4AKR<7d}Qw)(z?Yf@9MaK`Q-k+ zWq91RRP-J8C)K}&-YxAntlRzOZP? zB#k2Tsvlut@OaOA-XnO)m4~LyTgYzMn)ApHsz2D(I?7WAgTuhfHD(|28193sZ^S@} z1k;)YkY%I-UjP-1yyF)qVJi#=rBgZqV8|PB=%6_T6*}r8CFqX37cz$+<>@%M!!WRx znkZ)%a{ak53LU}tpmSvPKrtxE+vG88P=ed{87}w|eozE{G#C(H<{72r!v@p2&xBM*5mbM&X=S7>_;^aldz&*2{#c}j*q zTCDVBkcz7k@GbU%@Ndn(Z$h!wR8FvL%?5dN=1O^b?rK@KW~1Jnw3I{jPN8z3!He|| z?Ab#;_qP{I@}U#NK&J@<+e^Dvcvo3Fv~Af;_88JZ{`Jg~gpkt{sEi8xi1Z!*R(a~` z@5=gBOIbW`-aNVb>Z`fzdty>gyy+Hr)dR>`XPs3wcwpAl!RP)uEw_%^sgIdsAZ8Ch zN3fG9%EJqzZ=@JkoG5DZkDsB)0tRZmIVBs*bqaHa4=ur|udHNjz2Md#s z^bXz>X$+kZ5CBj|8$_ahKq%1>F9s79;s7)T!_||MZr9tf-*&Qi+GA0~6C2hGyY<#v z<*~;eV}9=3xqO5OIJU*@+O;dU#l`!Q{rBHrcG+c@suNve_&=UlxpJjkb=6heI^fPb z@8n&@mYM|Zln*Y=Tep+JdmW?X-j?npY)`sa;b(xEf7Oy!_(I8_YN6&ZT==+uA2Oo7 z465<6eD>0H^0j}wBy*OnQ%X9p+%r|=Z%-`N7vcS6fX>Lu!-yWOrBmxBvUs)XzRE73E`pXet{j1L79+VuIzUI#C5oyf%;EU#s;qc*TY$gq1T;E!Rp|e4-hH==@N~ z4S;0Z9s^4Q2=4rFMfP&x-h1y&DBZeslM_xjK|b(-56I3t@0{TO%L(j;hjqrTxZ(<~ zi11q2Vu%hN&0G2GD7t_)Ae?}%VgBIB&5Hw#22wChD)TKQYX#uZnazFPy>r(ptc5x& z>I*l{l%=b1N2QF6Ey+;!^_yqPkKQpzTI!ug1YCseI;ex(so}D}q%7{Lvrqb9IP-m8tUq1mD?Uu|@cDJv0sr6kf80`rx+yQ=cj?e%7vA zUpeAy*KzQuWMe+9NA(O#+k%BTUOW;q-#4d9796MZ;Zlq1gXGIDsNd9ua{Spy!E zF&~8*vj{Tum!v~k4d^g*Y#=GD7CrH(qZ6^FBqvYO8&b-9DI-&@wtNU}-5KkE6=6#T zj{@6L=y*T<=}&nla_FIl@;QDb&i_gtOX-|;+G#R+^k^=Xf#q~wE?@`24!sBK`@9`E zy9(3L+jZ}!3(1xr-3;6RowfZb1hARIPv8zkI#4VIyn(U?z&U-dcFlBZa9KB$|6}q} z8$9UF=G!rG1HM0h#d^7a`f^#beq-r}Sz**i^$nGw`vJo{N>lG!;E(X17naMj^YU3& zI)zwq4%EAlyiTC!PalUPKmr0|Eo}~!AmS${jOfU zI@JKT?1y~*cY-?*couw{uBK@OcRQ95;BG=p$MHyMm4=xnX*NktolzM%Iw%1V4OKN7+?B@T(m* z$$mqNCI53Acn$oif0yd%y6aVkTc%*JR6nDenvRu^fq!_%Y^`gB{vbebf3{>3IY{4| zR0h23=-j5c4DZpVRCLhD&T`d%=Ce1UL&Tj&@N@~(KgxRUaDY7j&&v}Ze58!QzaAQg z1zynra?wQ>@tJ$B`o3pgB;AJSlskH)_7076M*uGK;eaK_w3D-Au!9CI>4HXqu>vr=nLHqYgWqY#dBrF+~;M<^N+~F zXCBm-k;^h_OJerW-~ax1KL6iy&poTkw3k_(t6q42hgF}4A9%cU?l*$% zW$hE#Xpd;+;^f*?H#qB{k8Sbmx68qF2l}dV$j-HF?4TzX=LH$H!uGve*8*=btY63S zHzFXMqwhYJtXgk8R0-3wh|M9RSP^Id%Lf#OG5tkG=?t&FoyzY^aOZO0)TPp1?|x3* zqbCQ6;DAAGyPmCOvVId{Jb0vCA@3lP;3t~486uid&p>(MFxDaI&?v_H2$nlvb+>et zL{^3$a^h+wI%q98ai||Sf!=3IkvGMe&LGxc`Xj-d%QqUU77D-zd9VM;WbQ+uKvy$tPu-C?2~*)Z1fgdv&kN=)2q zax@-g@}a9GnJ)a%%24+rBugay@qI^ScyQV>S+il2tky5uC(T|Z zWBQcuq7E9>MJDM0LOoz44XHoEe#}EYN^BZ?i*}u5-HOEkaH7bgk3Pyru>{l)Z>j~h zcmToHs`<0xmhC&s?jQKNDsP>JwTKQBRZ=f7ht4~ebxNRw1INicoEyGl{u3#q$6WGG zy5vLKmQ7``z64zCS5uoXFo3usfWOIb9bG?Tt}siuj!JA^?Fo%kJn0pVxK!7y-hpG} zo1}TOrqW4o;qeT5tzMBW)|ZpZR_m2tlF!T3z_IeeC}HEqwN2dG>EVkQH;LCOkZv#LIPTo%`{Rm)9an zNLyh7v#W5|fep$kL))&sIo*8dzQ<{^Xv^WJ#$p>*B1c}!t>vs~?8FC%{-Pa?4Jj&l zkvcH|iTh>ne~n`J1^P&3z}-QJr^=ZR!*{5kDs2NJYKMB-U}?3~qcdoweQ}?m9V$g| zrj=bNHj%rYTT(TE>^r=Z{PONuvQ`Hf1Rhlet#iom_A-7z8|kgfEaYM3+Kn6?^nd|sw}I{C zz!4P=PX%0$KxXU()`f@w$hU5JQScnOV69L`ZKoafh_PLDzI#V~_S>2#hn(edop=Ax zw54+Eq=hn3Uox5)BW8awfP<|bhNO2L`VM5TbN-|QNA+TNAs@K8{aO34v$qS3-+9=0 z8Gq()WZJEl%8Y;gHbjsXD<)$KFqWj*+5`5O!Aj6~-+gygRq&n0?(h4u4$S)T8;x&< z@}@Lc=BtH2126p4*qT=>9K;0!&A6{?DwbC--Dw&tmu0;2BYb&7*$whyoh8*onm0AQ zO9h$%)TZc`OS-hR?jC-EQqvzPuY;ukDEvu%9)ag{hwm}CgS64_JMyqpFOvQZ-*&a` zes+nRzE^K)mg(7EzZcnEY46lBqJ?iN^GK4XiVQtF4cfjmfYd*x&8`0CrnhJS`QT=nQ;`Nz}xlF-la;efy%*RQP(BDG&U<~%o-#x)tM0Z+%z z^&iuxjhuZ57LqGJ56*d^;~cnM7yfR1e1Tki=Zkt378vNp7Qi-w*-~X@Gkr|H**WCr z^@LumK2$`qqyXX}socnL?AW@weD1IT zP&tKs@Ce|+`WWjj9Y7eu{`6G?fgD;{(WcU}Cct>)kw*-%etgp{utfvN?|%2YlA>dd zJYCnRYDq^%LGl-MLi(XPVVu-Z1_$-m8kSZVoFlI6j6*oGNShW-^Zo4aW=@S z6)@}L^gX&uTcy3`37xMV9YT?lezgc+0Sr77U#$Q%K4G^Wa{MkmN>^Dg$MT3By2;qS z?d1P%nkLg1=!`b&F$Hhv?E1rY_+wyX$adl?U!n$$**a)64qk~K+;p8SeXO;^$={ME zetVwo1ZO84%uYJ#q?5RWHa$dDsbdF>BHs z$xgJFwlDBtx;T*qLpNv9E-1jDfP%2G(<7hLQzCJI?ulSr5$ENRPQFlqYgy>4JoWd& zqUr&pgFaH-bx>P*aM}u@mw0)fh41XE1Jhm=kEfcarARh$!Nu(>-fe_B6ZCtLmkT=& zXfL=MTDW3evVU$P0BsRT)3DP(T?#1+*!2-J>RPp`o@o|nc+tLM#R_TDrj4{{QGRj} zIQ96yw?OrS(h8oFCr=iow9o`IdRvXuW0ZHxr4g3mhZo7gmiAJ zj{(AFQJC4wP6$zeI8JOpLfXqb!H(!4M5IsoKxTW}HNCkUu2zAA6% z&dK!H{1JC02kG570$tzb)1MQN#Vl{$rbB5eI|hfTQ>PMP_Uzg6+u#0HurdZN90m>? zDA*didGqGdsZ%Fu*|H@Ek$(O9$w3Dl#O)@qt`^oYtB0}$@)Z@zB@O3Z_70@-!<{}m z>?7FEBc0gXXW>|AZNQn)%?&==8u^0bj~u?Vjn2)=9gT zO}MW?SAVV>_;Bnq_Ua)I>3fkic!&)7DuTKMlE-+?953Q4)TqD=Ev=)Wu#Y|tc-xM( zuh8+nWAf}3^6cDIe3?GFPdgdZwN+K8!}>v|?%7v@w zGu6uM3vyOF@hR#Lx||Qlx{os!Pa(5zEvmH{Z?nJx6npHU-miabo;)*0uke7bFi1rJIRVTe zA0-Z8q_qP$;Tne=GsreaKS&3VMN{sVu(>LFS`wBSG7Rmp`$uUSH1JSMVBU-tQR>ebTP$#opm3O-FxU{mFo9| zb9K$EX$#jtVc9_+z4q!*{jTr-JupX>ueO(ZX1}DBU{8=^#+AQi8rZe9jL>gFreL2C zH9X(Vss5;9)Q{Cwdx2kD{px?=GX>ArVteN)Q>O6K1gs{0$RUSt|M z+=Z-1IS(pVZ@o@US{ditc>1FxsPzE#ojF37)KmED7?1aq6w>Vyqa9t*^mfe1# z8;Y9fqK(6j`m|$NG;!iY`Nu#0QGzwb43>7O{DuY7baCf_b*a#_v;&Mtn|T|!QNy1~ z_Nc%my`Ri1t*qP_1zpj89H^o_y&Wt45lyt&M?smcl&Lx~_O`8R3%W$qP4peZdVTS| zunG+TJf`~Gq5b6Y_YRjoy?=yU^v)qQ2M^o{{P6adlmdL4NUd)_W-MGSfBN@4x$((` z{N42ABH`}|&Fk;-`)AAQb>;VU(BUnk?GCHzt6!+bCUp@wV{PD*dLfz`&*!T>C2R-{O4O#*UjF6yzs&crA?yn?FW{2NIeH4*gF9-$2B(#*?yaktQOGuS>!~T#tDto z+K8iTJi5Z;puaqB9}!VPMd#V5Z`&}1w{qF);krW_buzUmwbmgXKVXJDeKtcX^o5G7 z0Beb>uv+1=duQo1UpzL$U<#Tt)?+U&*ROP!$UehkhKrSN-?zQWHVfitHgS4qv`On( z(kExFx&f_Pe?v+<6=@URk}4u!w4*qkC`CH-AEPhU50v>&-fo0?{J{@?Ku1e}Mwq~( zHr$p+z^q+90}HF9?hE~bhyZ`GlJ1r(K_(}1zk zRhN+I(07Ei>#f^lckCriTj^AKa8cIS*>ug4Ir{C>G+Fl2GqUK}hh)k0$7It6|0>_4 z@Q4eh@#Dt}9*2J63t!mmi~5Xgt4U%(O1vYw`R1EBWj{9Rdf)rr$2${LWY=AHty*!U zp$cwx0Kw>#hfe*5vt#fC$Ij@dJ+!~J7Oo^1(|K^h12qqxMuX}JlHR5r#EO!L{x!ay zz3rK0B?kkEIffG}lYczDL~hX^`9TlJpy;d z1h?On0fWOd%&5`;JFp`tXV6=xFhvvom#U;a{34PTb`7_p1n z$e;7WW~gk~BP^(SdncqUXe#p=v!}5IB=S}YxFRdid@by0aIgV>>AfES8d7^?@~xzN zGJ+ruK0O_F9e(~K@OzAb!#3|JUaD0uPhjzV&aFvCg z;|`Wq?Ym~UMj!SAP1EKr^%2+rF89-Y^gc4=;L~KCF84FKeymPK{KW~w%fJS5r(8Cf(+;7`fEo+cP1FG-1=SZHk3lv3BA`P( zu6XP9imR9wX{8vHW-PD)1duopB>SOiurY06we@ot;ryP#1@VOFgL%5cbDmJA;BER9 z`Axd_$S7&oXN0VnWe=P&GXr~NeC%T%bMB2Nf^MJwbosGckCEdA+sQJxPqNN_XGjB@ z5om|F10_D-X5cuz6O4n|2-rl@a{W{OC3;e6xxk`IRMX}u>K#N~PSO2TmMH^?a z(o()LCfmw`4Tjb7uYGKxU?$Z^_wA+MO;uk2<$SPz`7iFCEmP;4KSCV*d4hfbSQ#eI zUMaJetYuxPDeC9-CobzPyaQROFZQvyR30X90I??w28T4SbB5TWtu33rwnI*1M7CZ@#!{nIC*_g1Zq+D8W0D zZ6S2**fHI$V=6T6M6j;d-h1zzs)w1kF@r4u8%S(+09m{^Z&>{zx^wX&j2+6`nP~{n z#xXbw&{F6mC@42F^Kdd2+La+@HAUwlfgk4UnmRyBG88^gPvD>2zo&48Y0ZZ;khnM` z^~>>>^n)tA{42u=c!h`Hi;rsiRcyEeR`ZwHy!+78`;nFyWx$b}zkIFq?o^(`K0s$_ znaK*9fi~zF0BsRq{?M!6e^tOk|1;&AqyIwsP&lWk=8#3$4&H_ARdd+ zW=anSsH6{FTn2c7#?%!6rw{O8!bAdT?$r^cK;aZuoXbsbN#ECO6<$Ha8_OF-Z zcMrq>V2UhVjkVaU{v`3VMkV@n8=J_Pf0$Cd(05xbTTG?zrO;d}~eM@f0SUoO|xMf>#vt=g*h@_S;W7ckaxM)bPj}FVYV__~0$7 z;${aBtc{e17Wz^OSM~^WnkY06${H{V5W}Ip{g5L)cz~z~ooeh|fYKG-DTt}Gpd8tw zW!0`}EU!4=cHPyjD0y{R$kxR~cn8rU`jNA0#?FR}IX%xGnlt&e2_34tm0Bgzi3f1EO<;tbj2G~6(gvQk zJ2`pU8Xja&0GxoVL9vK$VBD$C(2xFdnZycu%YZ3T8jKz<$QR&yK*bCFlYEsGX^7=u zKw6`pwREk_erm0$mGm*c1qOWuTtf#Jy-26gFI>3}DzG|MZ2E+Dpm(E@9&n%|%vQNU zgM6?LjHqJBXC0hD>rOoct5O&3TIdTH+X+k=qAVl_T;mXm_KLK}-N)#Obm%Vf^k2Ry z^B(_aBEloBcfb4H{N4ncvTe-)0|$08jlKiFJOBLiStZQI!qW!akzgj)mW0jD!pisk z0JCN-i(6moNN8d>kTnz{gL_P{xmL85DOEHc@wITC-f7edolD<&gqucOiz0`q0z@_a z@{(7aRvp0#FZ^n8(@7s1U<;dVVm2|y_7c}c^ef!zFJrB9<+el`lo}Abd}N_V*xY0m zC-;!c_fbk0Cq`-=X49B>^b#*D}|iun5LuNT~EM`)tU zvy4CKvu-MDE0l$u5VMwXmP*Do8?h8Hw*mQTIxd(>o+Y#c%EJ86+|pK#JY|4B(sF~28bebV=ErhKd$@MoH zqtkO-tM)SH#0zz)pO1usM8{@UIKk%Mkr<<`>xh-Nv4sPs=wihQeCHRB=kQ(xkFu`0 z<{HjM+mf1X_70?N+w#nf_4?w8!CH$A;7K}866kHh#&clJG}_V`%+|T)Sg5dafw&&} zUSyiiM65}l1lCTFM)?;mQpUqe@2}oeabgT)&_(5xY_VQ>F{=*vf=(OWu6Nlsy|cOT ztd`Q`^pVDj!d-Q1&&n`;f!me^$$&*J(np>)2KY zk!3FwFBh@>>!3k{a*=Op8n@bc7^bW0w%2b3qCQ2<3#=#uq(~PRGd!cg&S49;NAOBC zgqvUSYB{xy@*;7_GzL+e=rj3g@BoS0J;w=sC|_CSWqDc)T%gNOSrR3UsGFCS^bGvu z0A_;Z5G+@UgvSbGd_%m+Lk_5=x)G(|k?Rk>uvFZ61Y_p2+oxQAN(g--Rt~|}K*lln zp#p&#cu)&z=&JvTe&GJ^)jOUZGWpM6Q=nZG;!XsQvF^L?J{dJ?6l=7#9WdJp?+&p$ zDc*DB0s8~J(!lHHhKMlTD-Y{d={+-B01X9W7Y0WG8wEfO)+k0DX~abn0ttAAgTZ6A zuB=-b_SK!&$QS6K*WbhXf#3x?vjnqCF1YaprYq?z54Y6dGAvUZ@n!ow9k}qG1@Sz6 zj}gDQNb6&QdAf%)(stc0QKWC%#^k@`%8uIZ29JmM(qkw4xQwMYi3#Lo(M^_D8XG1hz?SWo@t-JJfT8kRf z(hr!&e#aeml=R#F=rd>=)Qi>DIO@wPIo!&W6gJcdZRG^4H|+u>(BsYwNXY6R>`9vv zrWq6~9?LxjoduKc8R8Y~0Y22EVS301FK35z_!D5H4^dPlA*T9N>2DQvmEZ%=-?jBb zT~tGtVH#B*sZzozIt4KmPI^P4`yWrTCu1 zbVRp4V&t%v;=}Q2D}^4rOV0{Q5zJ1RtJ8!brW9((NBzlDk*S*!AP)XC6ilu^1s6B~ zK#$<*X^2epln*?S=50DMjzhi|Ou*i(7+)br-JwH|5#y!n;IRhF>_Yu2Y?|n_npR!2 zCvdH-u&W`UzV5CkD8kA~GdZz;;TPx?^aCqU1sr+E1$*k6=>(wRh3aM-($H3(0*&JaK7^2G zJW@qDB^NY-Tfmhl=~Cc<5qN=v#Yid@<(BIjJc$mO!12&?$Ae_Zq53#03;59RbD#TM z2HAR&_;?WE)!c2=s^w+}km19JmuQyGp2|i@K-f8cNV?%IE*b&({AcuDo;B7z&sT4| ziiOuceLyd1*9whp;zB;1NP%bQ+CH^HI1cqG)G1KG4{+DP^tR{TIXbT7On;MquWppH zuEu5!A3JwuuXg|dKmbWZK~$i>u6d;&FlJY-5Qg_6pg%*)hxRBx`a=2blNTT&4kW~T z6aoR)DX1pj^w5ms28PF}9=PSU8l)2&%UD@2Qy%^oT%!Jv-?T+}mKatxZ7cyZV)8I_ zpSN2!>K}z(UA9NSG^QTh*D9ZTuD#x$9t?oj1SNSAjG>~ zSpk@9=)cd2(sSHl2@i=M{_ux#|NZxuU|U1xf(tI-yBzHAgw5loPMyknzgpclJAjNB zF`~qG>C9)<8T4d^KYuQ02x@3GHXStCzZm6nChi+_s(_nA>DQ_=R`RYQ;1Q414Vu1w zM1R@7ZyRZaxw{m%N&4z^#LpkvSNHm;{be}%UGm0Ymc{K`HI>o2xZEgxpE0^u8yStZ1bU%>q=?c61>q?_oIvX7y(|(8cXd_=Z za;O})OZCTi%XJf;U+LZ;=BqYHhR)IPb3t+IT`EI8y{?SB2bORWlfr(e3w~x$qzn0* zHkKzfyxa#xWb+0bu-8XvwOw_(!F#>UIsqLJ)$tPst6E$E#|v?yR3;XBv(Nt7GaifengBbMs z=Pe0|r8EJ0sM6o@lpG4+Sm01cmP8TDAq(<>O$wkGgNS6<(Vvl4o%9X$q7(Mh(YNjK-Pw}j>}de7BRK#*AL8-?;bOtYKe4w zd-3JN`*AHP>_Uhg0Aq)>#GqyZGU8W87}=|>eEZm;Wncq2I?NxRktuWSW%rd2&5=V! zchR}@({lPKVk^E%++a&7)UC2Zp%encF#;%f~k?-@n}p>)1(Q5RY%;{ADm!V~3;CYuH!i?PMLpWlkCr)B&{>1v5 zwi&jSQI11Hf72c+xE`jA>t?E@qp4vWTz}Zb__~g&4{RIy6Io$F8tp^*Xn&yUANUZ5 z_0ioQTeXpq$9+~F|Me%y0*414c!1j_V>Z*)5++TWRJA}VCI?_y{50mOkZ6PK3<18PrXWut$IFX_rTVBo(LlooucHVT~tXq%NE@Rmcln#C*~<Y(Fq{z8(X|A1bTjb*E$C4me4Py$6m&u3@>yEYLu%kA z4cL}OSClzD!390a1IJ>sU>T-DK?hvZF3}LiGULw|>e&|4cpQZ9Dz=t@-5IfyBYxAS zO_R$myG*|M&2Mtca(q{X?_E%~rH`y%b^v+%+uzPnJPfqp*@?1j?hIxNXLhq&6sV~Y z;5hax5P}xSY|NsF^bgHgA)nS|7asAQ&=S)c3GCPW#lOwa{SC@XudMeM-H`oSgm5qL zg8ukTowrIp_WMbKMH0g)QQlCPvviG|^~cHbo4aS~%RRF~PM7QgXLiNIK9Ns2emV(w zlIis=;07r2b`puJ;}Tq%tY5XXG=SjC)W$M^;CqxjbR8VqovKyrOs;mrBmxc5?ryMX zKa1VI$(R^cECc!gJH*(P94T)f6yrh(CocH2%O_M1c{>6XL1AHcQ`6~LR6&o5s3N8} zsextmp(r)B8bE$j-LNf}psL?3u=wY>gOlu?ov6{@5!?C%OD z#f*!Y(QIr6alEiW(%<1Epy5J|6hl_X4|4KU3~w%^^jDOExOLaQ(nD9Q&cly?{Np^^ z=+o#!xG})DELh1Jn{#1FlP`VgOMGSVv!DHJi|e3EC>7>vhzi2;M+c*h_@M0a9{c<~ z$kBG-6nVszu;5P|7wb*UB7)z+j%v&wb z=njQgZW?@gb!aX_yV>j^tPuUcv}M!>Jg|UVFMa8bfPCmlKsunnGQctYt=qzluq)e= zRrsJV!*c>SBp$0_Dlc{*#MkD_^aVNMJ2zGvZvjb*=0GCmS;?;o;kXhZq4gl&%P* zN8ZzhQQ(Bmrl*#&Icm_m{-n?K2Lq>3H9bVab!LI3O=qpWqJ&PYk^+Lt3c-quS%=_^ zn7R@`N~Wxy;2GuWt)4dgGA@Yd&s4-|3I$jS9HT`+u0IPX4CUlpr9YsCS1BJAwf0w@ zQESlGkWWk&g{&Bfe+BH(q!* zf~mc+#L3lHU%f?Ze{J?9der;WQ%@}oAkW_Z2if7+Pf3fmx;m%Z)*31(EVf2TEt5GP z%8N#cMlDmL`4BeAi;LIlyAfSS$6s<5q{#)y8y_NKWl)qShj8(@OLKE^*fEb3DV*3+ni_qtS^>j1jQ!*XGR zkRS3Au_jWjR3!ER3Hlr-_(U1BTQI5oo}uzFW{Lg{%v7!p-J$0|4j#!E(1rHQ>dp!( zoHlS}QAy(2l|F6g?P>AMz6- znCn0Bk?`6hPn@#+s1HP&u8znId;%rTgNaId6vkj=X(KB8Q&cnjG5C_D9w>&o8ZYik z19*-IWUVi);(C$LR7$#{h>M3RZJ;?ySy%ESD5DJu)4!l|WZ2vJMj2vK`JsKkkzA=^ z<=p2vwC zCg0NG&|U~FNN-Gop3o|K;1v3UIRz*fJPLUmR-*WUDc3QXKN$!M496zdlL_6yf7bO4U&;W8`at7;lTe=St{sB6+cg8@6rk!q{67>DWf^6`V(%f7=q z>C!xHq=UjjU)G59riY2o>JKL7P)JdH(3);WXJzhRZr>ahxPT@74BhI_swIHI43qM` zglv+0@~R=<>qAMJ=0%<3Is=}2z%#FZO#epP7@lrDv3+^9TCB~sH3V#3iKzyo2yvtR-k9RsLv{*X(*;DP>u zVw~7{V!%#-0I+_Z(x_4oIv4Rw?RXCC=N9rP%O^|Y6H-+l)6nYb>{7%!DZ^B^&;#^P z4L`4L!buelo?dT@ceQ8unCmTKwWzrQq=eWEK@hi=|Rw=6}q*^X8rua;E? zWk@M7P69{=j@h%IXG?2gxJ71`bWxKSgSR7wd~5666AJ9WEg(*>@=n^Nt4z>Gv=OLt zWKz6IABAG&UR`!wf75m8^(S6h!8?(VkGQfwL|s^>y!O>ip$;3}0`Jq2-*QZlUx!|ZFw4)bSMNtPl#(iGz{GZpoMIO~zhuHoWb}jWE zM@IB=`a?bf`bpF&WU;{eb<~?M;8y~&ev}tEIh!7EQMZYF;F5FX zz|UPlBLfzVpEp*X22O+=9$k)Yqm9PAHx$zW3t}xJy?aj1OAFZehN@}DK>{pO03OP~ z%PGux0Jnm~SF?cwda4F`{VR0@Ei;;&K7=4H0!8rPG%7r|ANfMBgrl@jkRb*U(<>Nb{6v20m+rGgf%^OG)YWvbIEnhqOS zJEiIpjW!E&Dp}BgL)f$M2Qz(c!@x0#1x<;byo|Ru4KfUr=kka3XjOBM8N)0e@o{T} z=XEjApWZoJAI;i2YJfv8{J`4uCy;j>XYAWY4#b*mn+YBDJEkMH?LS-YI$tio zPnRzWKLMSld+YS{TDx*K#CEK1*^Gd9e^)*5B$)d_0bG*po0Wu5JLx`|e8a^G zLgRWP#=e6IvqsqBr}EF1Dr2Gla>lm1jc<7+aoG)W$Kf`(o}ABt&gIy*LSRDRsu z1P^o$LkzqqsgO@|;<UqrjqfQR-mh;C z{;$&bhU-1M_m%IwYozzthU{lzLjQ6LxXly=W`q6W?lSCNTS+rAX~JJ1K!5N7li`i! zyirC74@M86kl94*(qTZ%05pk@J@!~_(Txa-S+%QHt>Sx*mtK0Qq#14wubtHcuT zC4?H8EgnEX`=0l_r)r(}$NuzfnK|*FWSw`Wl!o!yYTf|Mu-!DJf7%#y0L6lH7*ab{LO$Vb`W}};HWxBWmCIVtyChL-=f#9`=Li)2DV=XiM&Cn}NP*9lHqmWPGn3mCv0KtLbVl4(XDeQC4CYBt$4>rY`e#9#yZ zs5>|?@0JeU(VrC+jR~~qPMRPWr6Wjl1;5}G?GMzpJqD3H4wX~P1Kyb8v(7q8uqOys zv&Qxe_znc&=%bI8p+kqtZMWSfxck7K@LLjI(EtMD{pL5nDVG3WL0@> zvJ92U<9F%Kt!hmdFhu`oA}8(ElT+U-@$oq6p=nFy|J^@ZzVY|z^6PtM$sApJr&c(9 z@4mWpkgn|po%L6$Rz`4%uIcuZf6n0V;(xs;e|~t5%xK^v+f(=Mms-p0W%?GT(3A@b zSjeYH*9B?TS@WsAk%XQo^xz=Jg+zsrMiCereF8-rf*wT(B(V%+w(i=`;C|d129TyY zj!0C{lV8}bsCnTt z2FvBK44R;Q^`}SU4^+t4P{J`#)$zgU%jMF0=M?nHt5ch1ve%Ffa_SyE`G_ee!Xu|M z_UR)R+%P@WzklZza^&`1b2O$`uiGf!ym}%KKBTQ=#@Vw+sku&xmAnzDoUP!c~e$Umnb1QjKCj`JS zv@q40%E2E7O|G0;n+|>)nDK$6rA)}z&w*+D8@-4g0ldD z;qXXdCHZzDF`S?$8)p!DlfUZ+SmCsxlS?iHo5m&ifot+iXLH`9p0iO*>Ta@_WJ%bB zUactR?9NYfF3z}UCiKtaNN-rPN~Zkji?VD+`PnVrdmMGtQAv}=lEB4F9+vBh2=I&C zH)szGt~7~xPND&Lv+YrCS6H_)BziYF@&SWPljwywxdnp^A0X>S&9u@Csi4_qV0*rd4}IPOy*Hp3w~K+J>@2KKZN7BA-ybW0Jio zfrdb+9~A(Y1y2!`NQ%3uwCywSDrX5gaifiJ z4#Q)tz4qEm#*Q8PiU$u=>=oaEgf(!I!HA3XE`E&tA&GsRB=N94&D8 zAtA}i^q?qk%6u(X9O=Xbd7?`%1s#?ALWLUmVS$i~4<1ePTz*j>K-VmuEl=y$=BwsT zbtx9%`6xa%YK&p<;K72me6V^b!m7n{W!u3eXQtNfNqZj&3@ga#!HQ5r1QQDy1JCLlclOo-q}TFhEz34)j04DNr=2D@-+Z%IYRjV? zcGzKy=w9^z0#&efN}D!q|STwXXytOVFxU)E18DPn7b_!!gub=~j5SA(j?*+y?Xu?4t)Ex>PEO?n}T@r{w^eyfgOc4NsHx#@jaxyuMiz9`gdt1o1R}PZS;iSqrENN6J?06*TKQgiUzeh(gMoW z!p%=E5d0vgRA)0L;f`7zp{l^s5cccYL08XiSFOuz-B%|9?F1VA1wHg+I&AEGjfR*` zVEO|NXwjc&VTYoP)uupTFnZ?#A|qBnAs=ZV8y4w}SV0xGQ8?Z&G`4_u9J6N4dc|sO z>FV46b&bzC=Nztyk13ydSgluw4_$JW+;YKDGX37a%6k3A01Z%X9P}!*5L?O0ti=jE zlsa9+CdV2u@pC-mXEf}zq%|?bb^bglAnkeZB>!9&z?d)eE98ZI;8-BN^JAokfaNi; z;gTTC#5*nu01vJ}^FQZ@a_b1d$?#Z?(h7Qozy6p!f8!6T29I5L-BoVC z{dVcnrTqNcxHX<5p3~-G-jx3^n@)sf^2rMDRr(&|xqN9JC&a!v2>rXXq)rY_(WKeB zZX5(d2aPRl{%HD8UE&9x`6_6jFON)D9gJ;a0D5f1cH!GcRG|XERjiF6{kekNbPJ`b zI8^Izb}kU9Klz)jQR3o|B7Z#^K@1$Z5}FPh1Rb%7qjzhKbk7{CkRAW{$3Jq~E~f6{ zht-48$MHT9pJiYjEZouKJ;;xK^dnA?!uKJSyME?8^Ynjp2NH}oYeI$R~-%0ffx;F>n405D5CZ(iw98@mF8$DxRwNMk1oRltB{PF|#w{Os<=`H-VC zO}c2FAnK%umE{oaA+jW5FjBR{x86C@>negrURYeTXUE!KJWrmrToyrp zw^`!o_GjL04%-*WLOyh38FX?5bNeJ-W?$i>#~W_ALGoP(G3ydr-0r^n?y~2edvcAen{K-4Wz~L71`tTWGc$bQ z2n8{+#dGS=0PAPX&^rAGW$6~vMp710ia`>Izc*jXdcaK_64%p-^nb3I2}w;@4!p+z@R!P4?RK%VhfPJ zX33n$GOnEDQ7UXC8Ehy8J@dgMR}G6P;)VK_uD<=64Ioeg)5E^{)vwC?-uFJnnC`n_ zL-B1vuIRcIi)EpHJ5mpCyamv4u+rxDzV|)O!g%8~c=ZzPgj3M3S~OecKl_MovpmjD zj9L`$8lt=FdBB37T;fDj$;(a{andN_M6XT9zfWxp`_9p=4#0O-_I@E~usdd(7ENWPKK??e)IabD4_yNY#i>XFys8B5 zf_`<^3o?0@uH!`x9EOukX`X1HOhRdp^YXAS(87vXrP%D65qpyAdo`>&7=*Hbj4GAr zVRwLN3zOz3BwD?2W=1THX3w0GQWY6I#=?}{%>~@CEMB~Li`K%bzD(I1=PfM5M(vkh zez{Z+}7@PJ9a^{sDJS!UY3*Lugd&X1EV zIyz4VermFlrIn`>gma2KP83Lk7AGu(ROXLDkq=m$eVBJzq)CsX!*V`28zp*5AeyJj zk)jgU1`o|J6!h`FBd3aizhJLWm(h?1a4iOkQVw^z?-zhsYO>7|h=nHGR&Z z1N9}bmAgrEv0IdWcFqd<=>K~{u6&^UQa8z0E}#xOYBLNpA`TLLHI{p5?$<8NzK~Heac;eU7p?7jW3*d}^5MaxtlY?g&r7K;oR~Cc? z22VXWfOW-*2w0#n99-pB(*q}h>Dd4lnbatAI99~%r|&x|!z}EFqhYRoRlZ8MdB$Bw z9u6JTRi3#+ubeCNw|X1qP6Il~z9Y(uF0NEw-~Gq5oK;o&$_HohLqe;+6)#%3PNvOY zslQd+V68-y*^lzGLfE!sJ+C)WkcR$jP$%N(05Be1D?;EC7n~FtFpv<1hhV@s4KfbR zfX32T>0Pc)O@8sI981QJA79Eh#>`va`c{5vju?A>yyG43;0MDiSFYrqCg+`Z9-kU* z>33kS>i_}+;heVXwK{9(*G?zbc!p{ zmy`8M%fb8g2>4E-Lx&C=D7M>fJE^te=IbH%wcrVx;MptI>A`FfEDeDdewbg6yQkHw zi&bZHd8-!AmKPqpQ3mdzQ^t9cz@d0AQGVZGW#zMplI3R?r1eY^Fw#ff%c|&2aJ77# z=yHCPLmu!CdtFH?<0GaXGb0~x(ip>#V(l$ zkdX#W0z?k3_!1r9$MxG0-h@GX1L5*}@WEh#ko2c*P-p&=I%;jTF+EUVG&&3b9Rg{p zOvhjky@{^}Pp~NL>%440uo3(?Ezjw z*VJFdrQD!@dYG)^-#CK zlTSV=zx?GdxkL{3W_~qc(hZl$z}ET|elTDNE>Uw`{hIse+{xV~1U{s-tX?z;`Cen$St3%Zm(>rx@KW>yhP zxP|^ehiy}g#mvBcyJkG-4fs1!Mg2PO#(@#HriymL0as4cssVOJUNx2S*guFbbjB+?mHVCA5JVJuT8QY0c5nU1T;KAt0q~b6@zvDP~J3WyT zPgj0z)l>%RM}p&Z;OJiC13{2qcHb=4&rZIZwb#8p@@o=Dkw^o-XgsOknjBd=bU-wdGEdCrXfl0-1|N= zbLO;}IWzN=De&dP_pdL#*+QQGrW8nS9Ln~~n>+*cqOg$v3ue7_Gnl{z%J9;d$fvKT$j{Y?HpCIG*MiDEKj3 zmZyByhVb@caoutqK=>fje(T^^+!f(5Z*dE841e)TfO9>V?)~m}zkA4Z@6mGFY@a8W zJ8rzDT=Bc_SCTVNdPh0<9p6@OLAT}IQ}hGDoQlCEh zJnzWBG(oW`BORXWU|`{;Od_rjs(70DjoWe7q zc_cZX_~SLVmy>>dZF%M}UgLZNRSFf0$r4S9xVL_ijuK^*5Ke*<+tVQGi zVDUZ0NdmE#1R)Cu|R zsq=^gbv_nWcjo;>FFpGi@5F@V;-ROX3hZDVT!Wv80~9)oBII~VFH-3j72v)6mwtP_ z4GVk61CBA)5Ca_fDq3T%k!M;|xMbXr$Qx#v7zKOpmyw$8C+9FyM{$i(JeDTG)+vJY( zB`1b-7MsTy!HQo3oELq-0SD+T<~BEU^n1MEU1hU9{S`o(WA#Bc3som=rkci@ zPZW17xVn7(HT&8VO1xq*^bL2|w>;_}Kii7&9)lju9J~p(yB1fzq|rg~*ES8p>Jgou z4oQ4MN}@V%@8D8fbxcC&BFo#k<>ZFsoA4x94VR&auQe(V+J zRLbD!HOr`yJlZAx?p?-Y90wWB@jsASV|F?_6aYl_@3yU zM~j&JfD!hNDKIRO>7Lf_V3H=V)nWDMfuL>$27MBk58n-@tXU@53UQEYR)ypAPJVx} zr5J^E6&whac&VSGXW68H#L9dqLxpYBcbhX+`(?3T6|y;vkj)J*TX0vo%+_r9bll^H)qnLtypd{xU6;hGni*WQ=$|^-O&#EQrM-aA707EVHg;D<8=* z{HlNG)^@gNMS7=e=#Q46Q|bk-GD458pfswm)Vtz?hy2h#Y2ce-@?l8!>jvw@?{o?| zU>dK*)n7Uz#+-PY_>MQyhvt9bE!UTGKk=g8F4lbe_<#TJ|2d--pyX_fsVm(xG;%5vky=UC_9E)OP39Kq=n<1gXx z6k7o5pgcw2Ws;eJyx$9YrURwkNw2tdM&=XD?>ZfG_*#ncujxbpJUQj!i;uwv7M}`C zqUakuDepPRvJ+pQvLqRKQO9W#g%@&Apo=>n7vnl+e#E1dZva?80lmWJryJBCcpa45 zn+AhZC$J`Oc!RtQ6fVoZlQXEl^fd-;StPZD7-{?TTj@cvG$w0Ybn<(8g9kfmyzhPQ zt87-30Bby0N^#wF*Om9b|NVunO&CDP%aWWMGWGLFWrV_vU+|$ab?>)*o+HM0+){E@-`p=Yg=haEnF$hU)L>N=Es_HIm8? zn4D>1;ff2aA*uL|+|frIVpv`Bt@oGf&;D*g<#Toh>nK)K$6&$wEB3@Y?X=UheF?9$ z4m|KcJ(VzfYcw;<#s-iT)dE%ky$?I=u-@KPd@z6V2Y*`DvaS5pt63gs#QeO!@Rsi| zOZ1knoF*O-!s*n+)hTefzg+a=FP7hY>W!H!f82yg_B!hmW#+bfL?>;y)~mLBpvC!> zfOj5UN7!|W!b{3pledRbSLpLP7lod0=(+IEP zzWUX#>QFfrfU~dHZ-4vSMP_L171kn)XbfRVO5mJx&gl&v@Yut?c}$pWCmv>}lCLc+ zjea42;HIO&^XiArBpu{9bp{4bAK1M4&nVj+`U-;8-D?N`oci7)%b$PvIqzJWj>L$C zS7nEo@WdPbZHEJsd|Gg=U58-h{EUHkN`E@+&{L^hM>Zn8;iXLF&*Jh8n4LPTY8oZf zD3C#5sViZ4YEX=_cbMxlgg$f@PK1$(OI-O;o(XHy4YA^i5CzO7ZlxchzZW3si3?qj zR~Tt^khidL_J4(uBVmv#vPgRHg?fLv{Kx-2Wbj}i^ob{)xY7(Bm9_L-mi*RtT^v&+uU{+BXt zqU|LWJgOhq+2PjS3cbWRWJn7=Ba@zDE#Q`0W^Au zja>ktf3g+Nh){0oY(p{R_#J*D2>OO^q!>^F?IMl>yZ+8RLPTucdr|qJ58N;rH}j#OK(I>3Fp}78_zqX$C;h1zy0lRuTUG5 z_A)RK`OR;BQ@;G=FPE=;w;jp>4$KIBKis&a+L|u;HDF^(f9J zeah9(eXryO`Qx|#tDN)Yq34H8TxY|w^O1JM@{R|H4js@%E;=j1J>3kacD@phBwXs? z99%C?>C!;6^te+8-u13QAB4RWgJs-x@!Tn_-W54R(%|c%4^O&weZ{6 zaOUm*?6UrLdl{WOXp_HQ30a3jU*iKy*YN=HyaXW_22w|uos;==aPZ@oVW$ZCN>1Ee zkV|G)UMVj-OOK<5Q~DJS(5qYJl~&3(tX|%#J|Pro(9u5!D+Y?BPoVN48IeX97D=SA zUBt&3QFd9TfenF#_sq!JyAQJUxaY$2s`uE*)ce&x5@9SH~ zJ-6K`-tC3&0(@ZRaJ-elF|#TM4R(eYJcDU*^SI1kVcmM`tvYizD~PFK+>P>XZOpA2 zC9v(b+v+&u%VdK%8G&Bu(ZMBS{K#bG)5!1SP!=v!g|EorEVIl5=kIX1ADktB%><$mzUlHml(SEITeJi%uz|?d1{}FzGIC zBB|t}cQMIDe&(lkx#Aa0lQ)=c88jT6Wn2TN@+8i{Ao7M;FX*(+(!xq}1o&vMQuZT_ zutS|h1TKS=MokOD$A`PB6S_d-A{0|N2CyDp=+A4(dl%lR1%L~0y`kJ`-#6~CbIfnK z?5uL{ox=|rD90-83!yo=|dGCAQs|AIbKfZmiG34uC|N2p_0fBCr zzrq@OeU;U*YWf#ne6h|P$lUY&V@=zR9j8|z=6eU@e!t_e#M(2jK`NP-$v;6gv{_9pyaramUbM5bcs-Hu~ zt}A;9@OtOvFMoM?;~U?&SRayCq;lcLj`Pkr))qH@{CGWmu&|JWrT+C_|F!T9lcP9B zbHLOxUk8#O(6dl?*kOkZj`^!V2@GzBFlGJOwWH^LGhaESPsdJopHAVC72ZeC5`$m56uU1|?7<5sl8i-6u&X zf-`9N0FXSp!NC%2@ZgTB4ibix@A(r?zIeqzlk&r#vE{CX-}X;`Q@Q4>AC>F>@C*B$ zUKZZAAVrS(D`5$+rsJT44$_*3Lk~Svcj7ByHiu;C8{hbbZn{`?zUQ8M)^t9mKj8^a zD0Ak_DUW!>BQ%)sG_okS%)f!8z*}y)MQ2{~4TM8X#@wn=0_?5BLZoza_Bfa`#D zr-JOfEs+nIIDxE#S0699L3~8;bP!0-G(lrf5ivY0pfG$0bWblzRWg@IgCd?toE>x# zLYDn*z3M`nwRK^+?V5}ATX@@F?X}izHXZo(^0#{zYN2ANGD^?+{_Wm-%HQmR{@?Do ztNhItH?B2(@23?7-f>?QO$ z(kV`wG)esr1IAWcZKd;wSer&CxwJEo`qEa680Ekd*Ps6Mrx6uqtqY$bxEV})83}or zKY^qm28|Vc(M{&%q2^PumtA(54rk)glgChgbO_9LqtRd_>;%AE@6o)=TvmCUJh^|4 z_dV9(dZ%s@Y+%TaTm7P4tXt(GSMdO;hm3)Mu;op!uIz~ebajC8ok#l=;*iSJa1Bup zAn;@0QpEg}qhQGN)F~Tnt`F$aLk#*jsVhm+g}fzRz&ie}0O>z{%VX>V?k&#DuG>H7 zU1jPf+bDmc4<5-Y{UpdZhYa^M0Xd3C84f;fcMbO~?{6$UQ}>U{6WNw8l#kXY1RCv%<6cPZDM z^P@~CH(m7GGIiFrE<5E9Y}+p6km(uf1`pyXzm`Sdj-Rq#Zva^wyQvC}1y-sO;OUF) zSa?$~+yZaDkI?7PM~vnPb!-56h?xf~_o-k$X#MC%KkBXMUJ_OLM!-C8e&79WwYc5rXONUPfo&8`$h3g%1o5s@yaerD+x^q7xYkMvq)yvxU;`g&<=hRDAv*6D-ra{Iv3? zZ~lnh0P@$1&M33@dyX@Mz8O4Py5ueHA)=Mp;)W(eCI;}7I_6gG65s$S`WO7)|Ni&o zna_NtzI)K`u)RB5P+?2YeeQEdRZz?A8_1}nk0Gf9cqPT_h5!EV|1LlO`OhEpQkgMh zhUUBElk{dt2dvmM&$8B3^|F)c`du2#AZcVc8*taYF&pZol@D z@~i)NMnG&H{luwd-+%pHGnj?KhMV#Of|8=cbA2PF#wn6c{vW^a&*iUwKE291`Q($! z6QB6R8XpU+nkDd(m%OC>*MI%jh?!?UtS^4?izBAD3CrfTv*kDz9(4(D;1Y{@zVL-F z=!4a030EJtW|=f&lQMDL4a&H+Y;oIK6U!RoCmOk}k+2W63+-d?-M1_#ciwbeVc(}k zIbLmi{No=0PeHK0*N@j+#~ypER^cw{1KLP>bvPaX06+jqL_t)1dC#8n%)z<#{9l&I zGv}yt6HWAc!w(+bIeG_4j@;F0zZT83d|gC5@6f^K$wM%J;hYAz0iq+WuYMxl@)14= z4ZZVla#9{>@DQ&`?)o5R;`(#S8n!OxzPtTtfbSu9+<0Y~u+GLcUy>u8Y(PZ5U5rE3 zn5i;S4g@nVs z+8U{}#s-j)N@h92&p!L?^6q!PTTdNz#r6pDCa$wVS%1r2%6ePuRA$(3+D7xNv)Rz! zPpaeaUP_%#^eXCX;AcI|y*AtDwrejdf4%GvWx=KAmg_J0tu3BA*Jc3SGpt0uv#?hV zQ~2KShBxR@mk(--;~w>>NA(7f%YXBoviba{cby)pNq2RDJ=TDgMnFA0$uU4E7&PE{ z7h36u##4@?NF>4NWbNHorOiOa0f3Aiz_Fwjq?RTQ4VxAp(!qT4bTwt;t;r(0=w-p|XG zXZ)aC{_AgR)wQP$y)ui7I~Dc?8m;3aT)X~fwKc|-Cm;XoGGXd^p^JBb{;DUY^EUZu z^MuTd%U9NQp(y<0c;g!~|KU8~E7zS*O(S4*N=q3zumn;o*JpsKgu&K7LUNBO;i3n$ z65Htk%#vz8{`}1kl`DSMy&{-%;L&By0Y~emfNvlohj~vA>e0)}#ltgUS;r~={&MGy zmzT3Y_I$X=O_?&K+N&D|iB5x_$((mmU58R(mR{-AA$cd5@98-_=ZfEbzg+UOua;}=O!3-DiEDS^tYr2F z+GZP@aykmfS?KK^Xb*eRKa?Gh_($mpxjNd$Z()QDK^%QXWC?umgC8uf zdChCeV;=JuO{y8qOsfYfsAcvFi**U=di3+k zT2rmFlyI~JNcQ0?zI3UM>HX`&C3nv_+CdT zU_$xx)Ve6Q z%$`8_YR{CgbWeKHleCY1##c3;N_x+G-c#QC*0&COzA&$*c7Db`^$uFf+h27Y^akeo z9g83pu7HMv?DV2Li9SZybW8@$&Penqw6|tuv;B@Jk38nR6|+9=NPW97_=PU&Cqqw^uUS&S zbq6p2HloJe+7aSF~^c=W}!^6~}s}B0WQ_RydK_BrgB7A5`fI;J& zX9pwh_{rDPoNPGx12PRg<=uSgS>+EWzP?JML(VO`EAa0SpiTVY4}Z|QCT^@)({$;j zmr6HgBC6aQy?6~y$Hr@MIys(xS>MuOK@_ukt-G;5HY^&+Er zgS1?pKv0J7AgiilRufa~u zC+posbT&dtOyfb&I+V6SW;h27kX(*;RC)IS>Duesk+07_u55AObITc@dyDNzdzvd2 zR~9y6U`#tc{P4qjX>>)8mdiPIov7lj_-c$x+VsS(KlqwIFnTg&)KQ?0#c<2NkzH{LOL|3H{-EISJKqhB(VxI4~HsKFCW zq|@0X3LQ(OSGei$T6#;9^gt$jh0UTm^Q5RTX{Vhu zRWRY4vvJ^YUoBJZoQ-tZamo+*q$PaVa1@uPbsyq~(l}!GeI(1>^9g+x610`x$b=Dc zDjal`YbJ;R7qM|X5qG?l7Wi$rvS{%=?EG39^4Qu$gGY_=Z~9;J>;Eg4{h<5y0iR#| z;upP*^MEZ+$5t3$``XuZgTYNyTXxGs?%cU^weXF_aVyKofy<+umIu#eHh^#^@SX2` zr|QTtkaVQ0l4BqJr#QnqbaW{ z_Sj>OvT)(TN;c6Z4DA1glgeZk7b44ko_CWvP&T>SF2vnr0PjqbHW`Z#MC za-5H+4M{ioz!IK*(#yB<&KrQi^vnm+Gn};G`V*w-A973($#Rt!`bP~27AQ_1d2029 zuL-;5^7HJ~l}%J=rf4&#pGB$AN_;F7#x_cnH@~SjDezdTt-C)h3ontI0e1@$~o}5 zc(2%Fm3059^zv|;U zI#!(|h*=iOkay$`MsL?W^XlJE)J50)C!h2)fq3S8NLt9PatyIz2a)JgaMdF@M)^yI zh^wqtegczst4C!GlK!3dh&Yr+bJGCc9oJk~{_y!@dnfI&IE-~YqkSDg87zt8wG=Z9 z89dq@2!`V|!6s3xx5ds{(=>h7)@8Dt7c*)7S@u*uz1lW(+&D0Z+iI>SRZ0$biI8BzfM||fTjay~|2=Dw+ zjB`LZXLrGZ1uONzEN$Vkiknw>gNp*UecaJy_vgP|v#V(2wZk`Jk4Goz{h;+7-hqg> zV00vg!(a3=f!e1JWfCW%juQhxrdibBA%9?yNSu~_FpT^Km;S!|;y+((ue7eqWch57 z%`1!P)2H{~L^$c~qaXcf56;oBI~;Lr+2WuhHDF~TdQ@FhB$R`@>^LbCFCtHK8fTOF z69v2o0F)QLg?-Vj$^T6*J0S@zbEAi$jDJ7L6KW=#|hFzVk1Yx$0oIt?ci zdWWMFF^~WCPp6j)zy6-`w}rmC-BXI5JQinFang;}N0-lj_Om*{kM9&Cxfz@9R5rDz zZ`L+3>rl>UJul=q^QEA}9ZUlM)lr*l(0T3or`yx;56Tt4{chpB`{5inThD*~^UJ^e z+rQ~!{BSs{8tnS(uh)jEOsZgk@V@)*Tlg+QfBn;+{!}yJXurSv-S0;Atz?-EAZvjJ9MwW7niv_zo`TF#NJ}tfaqo$P_l^QoKx52COjqjZ|UR_Rm|MPWL2J+G| zLxu&Vd_->@gX=NJ98*5^sZX_Fh1Z$8L)reRuP-yV-OJ@#Ie6z1sHO*T)JfN-S6wYp z;YMeOAZu~pF_hk5okk39e;c6a1Ug#luk;mZEau)nauDcjeI5m$(Ej zXSDp~SKlfZ|KyA1#*4bu{gsaeI0J*lurGMQ3tI3h8P0vMc+%iR!uIKuSCEtuZxT4u z=gVLI@*>MU@bP#yxRzgr2OCm$ldPSs>74exb=H1uqQ0gEtiKm&ju~ z&Igw*4m%bS;ipWoZ1^$|-+uL<%Jt`bzg&N|ZNg|<)eJh$^d9Yzq=U4hiSxbdm9Knd z;n282H*xyOpe;8Fg}9dus9Ek5mX~|$3mAndy1=1sU~}#H zzbNN@{lj*m>0y`=+&P`v&nc zJ~n`4Ni3#+`st?^R%f&LDIKq54mkExW!BCI);oOq4mouK)?m~C45Ee;4JguRKsawy znI=%ZWW#aX!0A#Ia-6|5N2>Ha`N<&VTU?E@YM-h#4W6{s%DoHkEWh~R3(F0E`b{#C z&zhiM-!&2}Tzjir1wzh*BZ-a2aR^^_@3dZfAjXWj0~mS!MbbyE#&_ zhZM&4f1kacy8D*vb-qV?AXRe3XWhj6-~ayJ0?sylX~TTq`-gw{hhcYeUBk|l-~O~;WQi(X%sOYi z0chxQP=dY7ki!x3=dy z<%JhsSa@AE(r(*qNyU@yHP>9@1@-{f)gK^jENPe$;4X~!gYDn}+MdV!yS5|I;1P7R z4;qYqgz0^)9pGo@YXPO;E2xpg8F-SVM_M+%N-s30cNhR~XoOmN;^|T+#X%DPke_$% zme|rO3_!8A`kwho?E!;{7y$GoRJ`psV`lh>PV_JR#zkC=%J#cjyqvQnjc zs^bZoskDk7J*@HvhmL=T2e))em!wiww*%6)dfW@kL2o^|Oy6|7M3K*1gC{@v$$FjD zOIQUW?1{!qId5(LVqA&ufD>Ix04x9CO2$0Q(dj za>yatI*7q{QLh<>=Q27dXVx$!mRUQko3i2Da^SK5Q>NJma_kFZPdk=5ggsT8vA9`^ zHwnWA?%$n!_u<|*s5B_?b>jjgn&E6$c#VA;vQtTrW@$0?{GwA%49w1>!yM@;xx-p{ zLQHyi*0Y|arwb;$Fi>z4a_zO(>X=(@_?Q62ri<-tO1_scRe#Wp4IqOh@&Ix6&}SFR z+9(Wnlb!Z2kABlhcFSWAbF$jL@v1G{T9Acg!-({XYxG@%tHi_b@}`{n zr4M7#XXArK@C>0KI52Q)dV||Ga_F3=`5v+BYUR4~er6xmj|qMxlrn$*eC@xZ0m(rR zcx6%okJ(HM=Q{~^Orxc38TazsCX6DSWw#`9TlRxfyEBa)3)l)K9UnJe_`(-vwC7V6 z?M(|aXa=7KdIZZT!jlJ`41Vi2-tkf8QLp*0veqOHsFHEZ7V01(P_%=zpMiBNL1y%_ z9ydQ?Vf^W*+%`_wrDkfM&c%m(L&0v@>G;yqzEaNkjLl5b)aj0v>E%qrUlkl}@zhgK zUF@v#1j;nuLAOi>5I(rz7^V0zZEk5B-k_p??>I9~Sf$zCiA|olX?fzmeXp!FWgWGH zYUF0Y8FphP*^Qoj^aluUK(+BjMk{@E`f4;VGgs*qFwVw;@=hAZalQ%9wA4p9G+rYH zvo|>O^mo459EYQaSF8UYzxk1J!8eX~a9lY;^F=RuQ4h8%NB;MJ|5vX#(x#KOAr8t*hfc&kf)ChaFeE&_#CJq+NaXkIS!5c$G~`=w`36*(4J_ z7U#^HDrW@?ut<=n%Efiq9Ba7--7*58=q-kwI*EBqbDajk^Kw}z~K8eJ~cslujP7ZZZ z7KV|zsyB@?oM6b6{@}oKfL^^q&>JhQRWSO~?J4B?^SkrinHI=S1w1Z~xuPSCxf$Vn z7TUJ|r4cUR)klRFt z(J4D^g*$Wdl!&XnU@zE|U9x>RO-EGiGMHsbOVfrceabOy(2a%&9)CCd`M2d4A9z7; z5aBf-i(t9a9&@8F0ekPV^o~`Gt>Xz~WpeC3+rDlfZx4IoOUq0u4o~*6OIbxI}fJYiUD5$}6 z8HQVbu`COe1BV6&kGXptYBR0gXM;!AWyoA)Kr$oQss;}!6LwO~#j#sG(`+1Olm-}L zuR-s2Sec2WhYR~bdPDaoB$8s`r{S%?<*wzCFFoFNh!|JZVbElq$Y^FBRid##>8xQX z3WF(Oty`G}5R8-$=dH0$o3&Nh>FNL2X1N+~Z9p<;LowfB0E8x3E=Uv5Q@jBVHUcMu z(y=0rKK5Uo$Grhc*BogQ17!aXK2_5u4$S`?XxA#k1LAE#^oL~*D3TAY^t!*WVdAz= zdX3oR<^TTg|H}8i_q~jc`J*fW2AkHkYmZv^vIw)Y!bd*xkshm!cG$PUlEs9>09BJ-jg4hs`3>#0`qBmDW9;Cf*xse_j^W}zS+Z(SRGS) z$K2wTz~BHv2e&*rIbNv^J|#}J;+-aJ~SMFBwXxaX^FgiPp(8 z9DCjI@Mj(er|S4V#*@gH8+{4zk#}%!F}|BDOGmvOee}`2?Mc=cKd~I}kDs=ka;J4R zXmt-R^^d)%z7+riDF1e(Z9Cu;69eJFY zo*AD(`C+s&RAOn>z%7d10!%zNWFQ!C&?_tqkR4Ycr`H`;m#h?j!U$Yzl`h|jDyj)U zhUiK1(Ws-hkZ@ai@*)fpEfn+wD?KpLByL|mVsLaEIQF;NXqU$+oEJ9v8)iPhB0((#RN@C41>j(SU3ckXs(L*`@;UZ;wta08|N)xm;E7nyE; z+OX`XffE!g*t&@O>waU2>h=}`LI6_}$qZXV51zWEPpg1w0DkIep=510IjfaN*sCnI z&Pm5x6(-G$*=Ux?$o?`PKvtF~5R8pGk=8K_Wr{u0%TO&H8?8p!cEq%VNK?A3HyVo( z4NKqhY8yoGN`C3XqbI+imrV5GZ!zgRo<1;Cddq|J?es^@nm#C>Mr*S9ZGDJM|68py zH8yh_+rosJ9KB=l*R!AfY)$<=?6AX@>Eb~?$`8I;-00!^*@lQmQo&KjO(z`nj(uT- zev@KhIfuE~f!Ntd+5>28Z_?S0(=t%ta=<_~uHv;cB-t+qr%*-uR2oZ%1@z(^;hGMS zrK~l1ogq)kzxvg$h8#Zt-KyYtL%_|-Xk0f9VKmso;IL|um8uvaGi|oqa?4?`Ry+{9 z$K6SOX0A3Ok2=f~I{ATD zpE^iS`A8$&ILWW_ET(@NTZ5SN!ZTtKITOz|fIuSI@UkOttYRk*ZdPVJ@`>e&Uw%D7 z`cOOC?Gh@{N)n)qcs;s2SCB7GQN80G?bMbi=_Y5n#RAjFWsNj3cW(HO*-NrW#=QY%BLVjT=sU(? z^4>UlcWX2-oUvLg`A%Sj4=mr?Z>N7|q?6UBSN;q{es8<>l5*miF6KxwkGc+xO%PcCK& zZ7Q!QH#!1~p75Q2y}tbTtxxD3r~A%#zO%gPO>Z)amBXEN)>&HP^FTXclhb+g<`s^- zU6fnuCy+%6c&MfGl*EEIzHJa@n+$%O&c$pOW{|O{>jyvhK~MGV4t=Q`syf-8C3bii zq`iZclhH;n4OQkJ=#_;BxQe8;a!@UyHT0nFyucqq-^dqZSS4@Ls&n~@qG#%#04ZP* zLu+?cZL?BYftE5XtS1qRBaWw#8C&gMZv4})EBePi_OWvO@iskl6>|(AJb|!93BR_? z9_*rc-}~Oz+jng385`Rgrd@1ENUx~vAmPBYIL|jR2>c4n zAwidehY)P&NM(YFctCD(4J=BJr=`%fU%o*Ii3Ld$xrHa#StgquaAf)Ow?Bq7x%a;J zz2#*uds%N^s7zZCeKt{K@L>1mZMWUFeD$keEpz6~(N4=3Uwm;n`Q($!?AfzN+yr$n z&nx`^LIdT5Jbvu_${p9@x*41ASk~EeTO;cl17@BE+lH$(-Vbg{%0}Tv3si<1jiU`a z1%wSf9|i?6I!72v-|9xAfH$~tsr131)!Dg&Yl&*(UEW+=BR}aS!}F0D?epo%D%0oe z`?THw!kPn)8(rC65qO!6d>x$O!xlMz565;T9LL!WL*o25+*4T*i2GH3WZ6WmL{-3mA(Dg>40-&a7@-<%c1Xl=R2d? zdZ_!dDe{%$!p@T1IWWTVO@=*O_@=QWg9jzD)q&5Bg2BxE=Yxl@ZY5lujK1F}yjRY4 z74#ZBg5JXEyfF;WN4hBCZ$AwXajdH32Mg$PU=vT^4gE;^XC8*oCliFrh5i<<4Hy0? z1N|)Qe1as>W^GW{s~Zfx!V}`|Y>?yJVfggRNB3t-bDsW%Kz@ zlYMn{t7z?f+YXv1(jl&~!%PFM@>Bb%DH#~zxAucfj~jE$E3k7Qsl=Eek)FH?a1(7* zPMrXSiQ3TwQ_8%9o)1xV3?M7Zj4a-|&@blh25gp@Tk4QuW}j`k>83+=-l&B!5Ll_6 zK=|m*2Ou8HmeoQl=EBdh4di5$Dm?2P-9jyfxki)1Xzfo144x-74M(U-7(yRLtwvHA zz{GeCG}6Iu9yJ~LhMNO{6IbmBgv-GRK;(rK=%s%sFrdHd3^3^-vbf2ttdSorQRI=9 z;ni*Hh>drBe7WS5&q6H6nINm$$8@&FX&pQ9E^Eg(4K~fAO|VrylU#UJ%bF5i)iT|l zyUo^ZcgTxv!Qh$>@=9y$#)5jiX|-*2oOWkbT*DBi4Pgd@15DBg4_gU*x7(g5*-`&> zDspn`cL?3?;E@K{@^5dV&=GyQ^iF>#HeU9|AbHNUC*?nX=L9=0*pC$8b@z9_``z;R z$3GrvE3~8DdE!u+TcPkO3x}PstqyMMt+y^e``OPlXksr9y`!O)@v^5Y4~oHR=m}i(=gM}W!_=DZon9_JG$MBV89r8LmzQ)U7piBtzDy4C@tkD zea0()XOayF9X#yA4PnA ztAfK;c|uM{XFi%I5N5>Oci(*~C(7!>AO7%=wIIBfpgeh9%OC~*XdOq=ZZY2v5lTD3 z@-ple0ym7uyk31%zu1&gD@|)NBk7U+l|H~zqQZVi_hi?p4QEOxqtlkO)XlX+PpF-P zhiOHf8)r`;8{68YcGlntC!DZS3?NKeIpvg73g5ms*FHDB<(OXv5T3O77Bbo!@D+Lh zA(Q{{AOBI_@|L%hyYA|?XYNdalgZZ9%5jd}+{Df?m~IE)tZq|sv8ulDVo+{YvMmeW zakEWxvjWB5Fv)BX9yXZn$e>b+a4`B}cQf+B_(eYucB6KgqF>Fp29C##jT0KXzOKMq z{^W&^iyeKZk(4)Vbe1!Z;jKq+F)+xbCzw}d0(beM0W02tck&`7$?=mOy&??O^m#j$ zH789k_uTG>PO+kz?IRxj=to=X%I|mw-8xPYT+~!a@L4z04hXql72h#9%9e$I+*vNE z+tiL?UTgC7I#GgJciZv$Pdma!FiW`_0M+sOe&Csxwvs+6!CAg0Xr#?F_=%Ks_zIgb zB`obzJT2YWkSJ2WI4*G1gia{QJmfixLU{(CjNV_gvJ|2j#ug0|@5@P_V2A;m&B*tXU7rM^PTRp7WgN=qOJz zGUym6wzh8?cS}(DLP{UAx+wiY~r+dUoWdOn04~m!Dc}87^VdD>0JV8 zO3i5aVZxH>Am;_#3?Qui;I#qvi{l;iq8x*4J9yBr*-(-78z1`6 zhxBS{Q9KrvzQz8}s2ywCcG^01oF>xp$?68?;o-0vTQ~px zzx5%}Vu6E)OIej~CU`Q8F;Mpn2A=dW*z`Humq{DUEw|g-2;9Pj3-w`nMStD;q$fQ| z6DOFSoKv^C2|xVs!|N79>}SYL`DolyuLEfXBank~fGtnQH;(5$?|HpV98=!9b04PT zW#{bvI5#{xFnfx~j;ysI8B7>Z+ETu^mGOl|^6s_LCj;4?8*tBf>r)9Dfy~|eF!ZEi zu;?c_;5*DiO*$XTQ_oBcTpAE^Le(Cx1P&{IIQ@fXCnELFO!xH@|3SS5^FsctOacu} z%Qa{Ipq%yTZa3rIci+9R@^@8oyvE{qQD$H*YAp=78*aE^Ego*vn7KB%4_!rwz~uF3 zmua)M)GJ%|vs!cFWVMBR@3J#U{(5b>)n3!ybmgDTf>tX}J>d-5BBWFy^(2>u1sqt! zJDUyLbL=32*6AB_Fw)>jYKF}R>Q*drrPXu?=_**TfGTg4fXlwXvUJze2{TPeq&0FO za-Nou1`3PN>4L9+uw4AZPdg*K@*0t2msb|YTN`ftaoo^wBS+p3Jn+CW+V65p{RA@j zu55X)u4Td1yWaJ#A$7Rcly%Dm&M-_*}OJ5x_{E}W_q1t7rOSBdrfn5V}`A%5{cZ>og`c&RhE4_G;*QCXF zL_q?a@<{=daSK}5C>U7e@SU5+nuKOXh#N|k@6k@8GYcx`Jz@F=WxP%2U3kk4f^!pc z&N=6lJ@(k6;*ABm5*Qpnx)cvhn6`e|;jovMEf0969j|Q1NgJ5@taa*DSifD>?+v=C zx!)#|tX*-bN7{*uf?7>82%bpvy5{D9n7fpP?>B6HII+a>l!(Km>*WC|?yQ?U1ijiY zdV3~29YlamK^eZw0!A9s<{d{mDu3jcJmIbO)C_`zPvMTHO)FsdeVj^H+aQqj8F7!TI!nihC=ZjGAD9`Nh@EK zr|e!JBQR%lZ5hZ0c`a-%%ZPB7e^22!f_3aXo9ljmDgZfVx2Z0$U%qZh1U`y!N%P9Wr=Kw7af@-tZYelN)Uj8!bGi z&3TwL%%4=6^Ugc3hT8xJfBEH?Yug#_0J)i9CJYOq*m8j%a#mKy>?%HZ4!VtY*r)9M z()X9CwmUR--_;YC8TZQUs!&GbxC&ajqZmyMNR;yfmb?TutfZmLsmp+8z@t5VQa4eE zJc!_sbOs`wt4S0*u_Tj}$R}~C98!e`bcBm&fo-J)`L)V#6-)Vv*yvy7 z8-GZ0(Cie0DCDQE$oCxe&|IYBdPkf}n-72IWLFDChSZP+kMzvw2>L63kp?RClQ!_W zGd6N8dSh^)fwDTCGr8F_ifO73z7x{kkvHenH9pXQ)9k72~~;bHO^xih#Bz*acnSxzbsly;8hXWR2~f-GIq z3Of`RA7$Fkv$5A$t^4h7f7?bCX6IUNbhyD`#vC@uO%69Vx#D{W(Sr!!3w>8GDwb>aQmo_p?D<4Y6x;SYaU*cG>(!Z^t`dwcxb zzgDJfI9KM~+G-k5TCm5{kiv-EQmScrz%9}Z2fOVWB*vE(oBU;Tt>Ccenhw2@d3+f9 zFw|N$#Eqawk5u7x^nbI)m5nh(x@xWbkSnU59QfyJ2g|{z^p_G|1T;b(`#oa=;++{+ z;w#^8~f;wgwEo#6>{E>Vb@6(?4G&O+HOpzUl&)*xK zYh#!`nr<~)$v^)!CzN@69U8V_DUIDw)Y&0Y_+Bg2L)bHnqT<67>x!B5=_@tq-9E*u zl_{1f$@wc5^4$&%9(L*SjaSvKp|PNYE!CL-k^a^0LX_w&9OfuIh%;Z=mMVQ>7{XxN z2$sXjzWde%<;QP6FsN-nnKn)PNPX*D-zxM^e7k#~Es_{S*m;?0>hxt>Y_Ub<_4f$O zpFdyUKw7(68a~moEpapo^B#`JsJ3!gHGar@zFpRxvz;3i zjIfwAXu^%FF?0j2bj(Ml=Dxw=p*hk?lf65-V@0^8)UisgW>~`8|r`|MOPQyI# z*iV(&yFSLVV&&PDwAWkau~&zQm&Nauq`Fa^$WW+jLkNbVm#K7vM)FP{9MK10Fx*+1 zA*2rAB0ubXD2-DM!b60-OiSO>(1j3}XiJ{Zmn{^}B=n*QMO%Kn;=*nn{r>WUf84Wf zYvRx)zjDLP2W$=pV+ZgD+i~h7=YTxWD=fa5u+C^S%Q2RE09jrIzzhJ^d!*xo)}!Ba zQkngT1JlmU0J_dM!(mDC)F@!!b|qFa6t^J*(^*C%$*_=}rZcLJK?lP&hsw|N^d&y< zGAu<9v841P>hD>tassa{fT4P!N2iM-p<738p{_+qZ^a=5R~N3dx`0KRCwW>sm{9IN z{HMLjeG7-*K>#}38f&dx)?R;8Tf;Ebww#%4kk!k*ci&d-z2nw$#|@X4d++pJQ-wUj zW#4`G)sCvX>iT;)R{gTMXF65`|L1@HXGVKIuko0N|C68mqz6CX^`9(r?a8CokELd5 z%Z@DGO$Y(apEgzBfjC(AVqkPmPdo4r%LvZuQ<~A^C(P5mlO2SShD%wMx7kI)0>M0! ziM@zPgq3KZW~PTFJCDwcLZZ}CwE+%g+kQ7j=JuVb2eQ>H(A_mj&V z*IgExEgO&5L>%qRIrO7h@x5%MIu|z3WHP`=2THM2f*W`ih%ZXFrTzx8EHjq8*tVe^ zJb-h+%`6){T89C+@p)r%Tf}gb(xHZ>V15x)nupzJDn%GrgW&uOO$Ii=pc-ArKq}Rz z3S&eVS8)~dOAm%BCLgapRS;IsYe`CRI$hBzr`w9it%IEjM29%W~r%e_rms`MP9P{nJi6O?UKfe)F4kIM9PRkDpfsgR=*7 z)4Hl)6JT5I!NFtKXTPaz#%rqvk09|ZQ;N3QoDic!Ry71=4SV^+fEso%I1D0zEB+oH zc@2}xuW}oGEGPv#-fk<&-F>@Cp0GqhTxm{duvRAnX1cHqHyM%-vrFk8KzXwLisUko zz<-qyY7EFy*%;2mbvAmy0c5o`)+`feY@(ffS6_32-4xxa?cncPaHTcJ;rAnB2VA~y zee#o^)NeHVp@s5G?}UqbRm>S7>US0)?IHIB!s^Ivw%Mj!cG+cB_4T&cxg7HD zZ`qzf?j1(Wdl?~Efu7?D3 dez47OC?&o}f*C#K>(KpzH~^oG(Z&=|2&q@x>i za~Ehy=!^W;wGCEqOZG-hW9aJ-1J4`2@r$@v!aP{_=^eL|=}3e04S8iQ`ITqCya9{G z7}anJ357gweFinZ2L9y-k1V(R#b;kl-)yHcd+#G`@z#O%ZFr5YGoUnrg|qia2kX-L zMzi40zb;q(^8d;W=l?Xd>G}Kbzkgv0#U>Kvh59jKX_H6rHi3yJ`lJh;tCULbkfb3N90|yEX_jIzxBpI#XwcQsJNXtLnty;&vcnrF zgp_ygNyn9|fBUWAYGh`g>u&YPvi^4amUXt+rL4W)Yv4`_ES%I!V|QKA_wX5bi}K! z(R{CY$UT9uFH1Xku-4;Iul|VDj}0DHr2H@jei{s91QrP^8#GodO~!+*fY!L|%o%J* zb^ea!#^dnLP=!`G=<(QM!!@ps-gR}}l~jQ=(DrE~8;#N;Z>~j0D-8`;^#^Z6{n5`- zX>nk==jF${UW4gQqozlBDQ>AC;4B`biULlai6XRkf1GIz1JSh?o;G;WnD>|$YPuu> zJl`E}=!er^d_rf*=VGrpY1lLDcgt0ODwllkgtFj*Zh{9iY#*}AF1r-o^FA1Nee8PL zI=+DnH+X>a#y7sPH+ZZyWu3C$D{Ka0lTX!w7fAP-$7od_5@dsl7o=iJu^W-7>8LZW zv0B!FnV!>U%>As-^nns~PL3lTQn%Jvs8>)T&9R$0ZgIuLG%%RZ4Y`$>w2lU0tAWRW zR^4D?Hx?-(ic)m#w+{Hadg2^1J&c{=#;t8LtM-0cnYHIr3X3T9<9cQ-kLr_`6c4_gy-+^e+flNQeBN6-BeGhS zL4_r#($YH2ObB`vhT#PujWFvQ%*0FUa_X*DT#m;py%8arxEtI6ei!X@Hdv)69PtN~ zj|?zD20sE1s6UDsLW|SV>ur}Ms^|h&VHe?f%CLl{R1^qBYS7~qsz8lNZ>U2|)nDul zjkJZfBk%aNrwhbIWrXs$z`tulI{KYHVU?3)5`guc?@m^gyt6o0E(|A#( z=@>9Fx+Xghc*fn(uyKu1Mh%va92Be>V^6XG?YlDK9a{1|YzR;bwmRWlp zVc+B?2EFUn%4zjOkZy28Zgv*nEWG9V^5<`Suw4K9A4A@A%phbU((+hOhajGIuz9xr z$8p5%+u#0n-MFxo5c=^1!b>_m)p=slE9OG{?ylk?DsUw@6B1?|kmpf_Q_qT-^a;SPFxMdE zL(5-vmVnV${XG_jheD?U~+evf$t)Q=r0`86&ik-PdFvLr#Sy1 z`U@|aCy^PmZR_sYO8Jf>dh&ySaQ>9n>F?mcp}Znv($T{CbT)H`1Ili%KFPL`-z8be z=N&M2U#x&%UXE=eT6frChYdS;fWwn_=7BgzC~tMpbIOeuom0N@(%s5;{`K(kvyZ-} zT=0X>HJ0I>dej1Ba#{XaH~3yWlo$1wwGX@r2faaiaQG))`?L*u!Ye5-_#0oBPl|<7 zPAX$49_-{pS(Am|$t6vkf>lxtk&m4kEkAgr+{715yd0ABtxbzQ~od* z&YKo)Y;fquCj{1DF`;8o?jbXPu%dl%!Ql3`zt4CMbHX6vb&Ew5SDkPdV*g|sN3}-1 z67oW#f`cY%x1(1YBtb6*B6U8bi>u4@l#p*ESqW8s&^tFNAlZ^1cTA}ZIC=$UG*!L} z2yKEM`GfiguT)R;(#IL7IBMLLN?nav)jP6p3b+R1*XZELnJ$AheJ?v+#6h+ z#OMtp>51j5KTl}#Uu8H;=RZ3A(E^v6;HTU*?J4KsM}MGfyhneRXr6cvIph#+E+^t; zd||3)`x;=RbG2FI#iGI1?fmqA)C?GQ%f0Qozm%(g|FiPvQ@&6x|J@H%mYKIK@GR3T zXEIM%c*eMffvKF+cd1F2KBbpK7M5XIk$6K5jZ#!8AyRyj2e7O!x2bNx04MRB zKUe3OaKrGnZ%lFqS*9UeJe;iZa=oNjz+8jD@^e~YQxYA0W<uee2lqmnVbe;h2fY!be_}u?UV>cT4nOY^$O?iNm}+L}>i zSQSVT*ceC9TPPR^EMCwXp3^}R{6KB#9T^Vh!r%mJ=q)9AktBL7s?i^6SsCey1bxOv zk{`HLf5o{p@{OVE?3^v4boA}PL;uKy(xf-o$QND1i##AW;OOtP1}0^IW{^IHCNvd( zpL%$D7wP&lI44gr$ny6{l`ky^bU|ky-0{~aTR!nMWz+db2BmwfGhz^7-utq4+)46k z>a(Bytfp&nc+_x?5AE&lx>M{NiaEOz z;aAl!Qi9$+t0|Q|V80VhK8aTqt8~FH*$TnxPd7Xc>~!=^TCXA2 z)v5H^LMxi{b@4shmC!(w3NUQMr>$1`!NWaKIQ3xu!`5jlxQ;#;7%Xo(C(WE&b~@%G zWx|YE!p#e>mH5EDtli6B{&KN=#VcN+HA%cq<%2rY%vq;&@WBUb+Y_EbMl)M+sRt0| zv@?p)IGAtGN17#ZU--fodSqK2@GN`9VIQxkoTh@Pj08~~S>M1rgC~K3BPtmJLnN-s zY{9{RG*xP(VLXzRJdCBw?jGyJHt9otX2NxGd6Jp*JQk*Wsp$;B86qEj`3pfF4*ABz z!9j0$z5Gjmv2Woom1Pqon~k3@v7{Ryr*vNEVouarjEc+e-C zmX(|}k#BXZQ5lU}>H%c%42LDTlViaO zvtnAe#r{vnzPwH>a}>N+0a#H}5%M(0aip%|-F_@Bj3U$Ff+M}sSH2y+5RAwx9*Q>H z(1(uBv!(aOSLtO)t^SUiodNn8M1Sx?C8bsUYpHm07^*{^FS1e|`BUdG4O)-@06+jq zL_t(dz??ev2oo1x#?4n56AzfYCU0tqC?#5XRaPh)+B=S;cI7YmP8P(j!*Kcw2Afts zR8rE(xn^EFoxA+k@{9aHyV+x3R%UFwU&p{g#~**Z&J*teAB4yiS6tEC3J8(&_IZ+B zJ(D-vmM^W#0Yqg(=AxJhcsW#n$ixLu3yyG>TbGnPI*GC!$cK9$o@E-k!p}jjd#iwC zRY8$%aR43>Y}3_Ob^Z zn|5-_C0kSRF@H2}sRxk3jq8?V0Qtr@zR`PxpSsazWyZWXbcjj-!$3U0(l4?Tm8Ob{ z3ZSAq?iCX#NnjP*A%aIND;gyF^h&QZpza27lP-O*TKO%UG^9CiVlJoh47Cz&h&@~p zI0p}=p5hZvBAx;%%((<4J&OzO@zigyLr8Mc`y8CU!h%=c305hNHbj5tR;iU6Xm}E7 zU8j1BYZxS?Ob8S=Z=gey-a_TlaYlYGM@CPh-ukKkJmj?j=ODDNk~%KSFEqGlaOO6< zm9=fZx77NUA+NAD_nRS9vb<#8$iLg6XJ09#+b>;)({~nLUJippbb4@3dU!O0c=r^6 z3h)m(*r)wTgY?7$u8BL~i(aW}hoTiOA!8!?%o}k)Fn6R3ET#=C^LMy$(}Ul~WTaY>&uO zjd&#yl+#FHA+K#!B0)zrD*N}aqjN+eiTAB1Xnus_p))AVYp0y4(jr0!c?x2=|TNbMk~-8`K~@Z zVdnInPsQ0Bz|OdKI(x+UTFNl`i;6VxaKRuo29%wi zTKJMnE-9=8XFbM)<=8Z_bsOzqPexvu<@Ot|vTa+sZ!>G! zI)t^R&QSKq57~^Yibar#zJq(nizW-mP-J&j8d;vkA?w2ZjssNKL^`iSZUCI#%iT&l zaO8>Hb?nIwHi_#wfOAWqvW!XaNeDim;qz~)mRNaiHelUkX?=q4=PF z^2sMFzPK0Gbu7tD!?|N8OGM+g2R;7~BYj_3~P570bco>>%A%I=*Lq0Nz z+F$DD;V`t&Uo_w;{y_Q*$qktD3wn4X2;Px_PUqm8@|AAdD375(1T zmE*W%Rs^rR?z#zDu710dHG($WE>2~%GPv#fE41o4x5Jp$`~H!|PhK*tV2!IBde!7* z=J0;`we{rutQNCQ%)j5tJKi7`~dX)CeRNdfJslzsFnS}Uivl5@Y zT$M_1E7~_Z_!Z^6&%QoI>v#huWAI^naUDAbFUjD;ndlG70J5~-KzPr`SrF;ivU>fk zcC8hYMkhl6X_&&#L8kJcqCpfcG+}VWH+(dsjL2!dqlq58G&B*=MuKNhbe?f4aH+$#AOm8PxL{`Uf|`l4<5IIHmlEia{S{ zgvJ27m-7rbU`Sq*i_0I(s}a@C8+J8ak)#F7>46YV{S`(g>ETjW=Vib^t{BvxV3ww^ zH$L)H|EE?GnQifBK zV>`X`Y;008bdgSAsqTdscIc$Rpo@sVk>YoQNuar)4q2JBsKHa8ETfE9s!n7pa1-d#YT#}s^z=${Q2{> z&CbRfZ>;k|*ng0jU#w7n&wJieX&xxB%)fyw?klXb&px}S-+J?QwATw097g3t1<6EJ z!Vb&IAUzv|UQ=Xci9^sw9eGs`&>N#lpHB;tgKY%8@xmyofkZkYB%gq}iQ`u*2W~Z0 zzk|%@xBI5Hy3^A5&6>7mIcV!?z2BDWSzc*V=JGX5vakAD8uaxFLLx_#OYc@@ z>fe$WZPlsjBBFrXtE>r=%dCB$6*+W|$Nt6n7J|(`?|IMDRCYE>WWC2|p2XRJkW}ftmYW^PnGO6L|2Y zafN-r0Fd}geuJlk5iV7O2f%lJ&VU9dXP(nqDj=f|4Lm)rutmpY-x{tGEYZoG8bry9 zYd}*ff_nVX0@v})bmv>qEBg#M8+A|y>YfridgtpBq(68HxJ+?~3aIikqoJ2e>8>iJ zAjiw{hdy%BAsYmC8}53jwvEpOPHbD8$9@Kki!So@8Z6FYPW^Bv*Cqe-+dbca>3CZ6 zYhG1%l$Q#|{Ky#5#3K;lESgFWyjBocQ39f|6eAFB_(_*O6iF_U5HHUF9esoYS#YF* zXuLz}khTPK=h5&INn|Q)*Ff&V7BX7pS3b}=Pf#7*;y|Nt1Pk^_4@QRJow#0Jj_g;@ z2{^c+G=Uqk5=XwY$QlSpE%%TK9slO9yUpHalaoM{<4w(yE|Ewc_)HmRFRYOkDzZL= z?WEX-XEbi92M|Usmcg^X{3)lLqQg=~BRFx|*H^%pfrZP;QC;ZLRcBT(6;G8Wt`q8t zNd%mDQ~eg~?({8U43fUJ&xEN?burXHsPy%Y-(~g9tFH&K?94o+yQVDByQ+=;&O7P7 zC>#koR7(1$g;rRXh63$i(&=1rLM8NYSo+8ViZF)>6=hoa0{R8|#3ii3*pLA$fvZPU zI~jIrPXiUW(#U9(f8wT}a*`I`>C%@_gSvdDBkbkJ2lbgdKfWbbL+b%Xh%j%SW0zY; zBVQwrTU@-E%gAhC;^K~ zzx2{ednl~JRVgP_r5_C_U|GzRHx-hOU{r5{!VmhyO`lW?Cl9Y&D!D~);{-A|byQa4 zEZ`K?uVC+jL5O}t3`b8KvVnN! z!ynyCVqMZ;2j4@m2#t|Nf>~aqvvtX6-yaY1=fus_-}B;SPno)NJBWz9Dx3VP%)xbh zPcs_IG+Y}7FXN!JcBXL9fg=u0ox+afO5gCWFhfA6d~KEX6sV$gT-VXnZ*}r^P_;J( zH7q9JT0k;9MY;;;`jMFW^wlR%K@Tq@mwuipFN?~jk=ig3G;mUCW%rz(JTIGVb(kPP(vgQeX!Tl0p3)0shWA3@YWC?`eUjxNGAn4%ZbL`%Rm$ zMj5|`H;~j9Uu^+;!YU%?lN?(4i9gZGVv6xo8Z>AVWR9QIu0Jrr8ytebCYe&}O&V9m ztu9TR7iq!5={#oW(pRvcUwf@J%2d-u1fjn;1+EDZPiUZbm;5+i9#T)6yz$m$!n&Iz zlDN?6X@k(9a4T12*#d2 z&jxvPAjj~{$HCLzT@!ONdSKnN$?&{c$)0FATa>hvk6#qlSL zvgtZ& zmKke%7Vo-3vMm9FU&>+r*JVqtYJJJXFRsuU}D*9-SK6Hb#hef zRkz$z{&d~l<@{^!ET>*{n_x-$fWGuga~Z%SUzYUR{WmHTY>;vjbDYZ-++9xm_4PsE z_t7tZ)JA3cM9)*=e(T&D%b8c)t};s4zMD@i`)o0-Y_s9oW!*_@miw%cUAEw^a{kq~ zmrwoTnsW4B8S<^RBwBeCPK!lzZ;=0V%790qYHiY+lB#VVMi1PWj`4a^__>mm~Myq&$4qlrm?A zZH;Zb?!E7C<-+UkC|^6{n)1DKuNSOsNL~Om(ACPp51&~cId6K|a>L1G^X*ZdB^1AjP-=@PUYHK9LE>gfRQ%8aqWU9d~@}4K$qN$$9Yg zEBhCXw&B5Z(xmPkH}QM!zC-naj@bdB8=rgSh;$jSpk=hyV4#EzlV8=Nz)r6_T;Q<3*@3|xYz6Q#5u9%XaF^FAz zSIZ?jPp7LHufPm+zH=~q2I*v-`HZslf;Am&AZ~9s;Zn;8@eRFeliXb%17Xc}DA&?Y zn7(1*gu}nx;}b3TxXmuo97V9C?rm>-n?9Cf+q}x!V~;(wtqEJEuzfpclQ1i5a3|@J z)0lMRjO=Pq4C zNA-z67`trNgvS52lD?g^ROCEt+m0mA!xEb~7+(h#HpZ3if5dQY=(Ng}deRDi5Ua{7p zv}sbG_SiDMmtyhCwR8%19-UfPD?5u_mwW6U9U9YB2W>&yc5X_yzchE8uR_Ovg$LW z?*M2aJN`*kfM-EF3q*ybq~oK>Ab^)Neu(@0O}0D;s(X`U-a%7V^gbfb@dP8wA2LQG zB~yUUPs4(4f6PUGZGBLh0nJmnmEII+vH0TT!`)VI2gqi5gvJX;&$^-4dPs?!sbyFa zORu=*YQ>X_&)=2jAsBv!o8yhBu!7_qFxi$W#h!XQ@~MW=`+&`^8`6Qm=n1C|UUu1K z;yx<*j308yA$04lw^IA|?LEn}2P?e;QAPS9$`Dl*DF+vGuDvq3NQMeGzE`{Y zbopMb-E@f0FB#s5hbd?GZADA@Q+3$%Xs7R1Z5q%&_G@pxnv+C{c;7CKxq+=qH+(pY zmKUu{?9n zWp@8VMd$VJNC)(aH{400tvkfOd`-S6*1cqHo7SfXPS}bXHz@fL>D93@-FsY5y5iNb zGBS}D(XfjZeZV#)A0vApk1*aJK9`11Un;j84A4-VF`~;OrvNbE zKVbxN+sU{%(im)68W!1Xx>qS93q~pchs0qkvlL~+_QGgfrU-!hQp@c55eD85kVU3i zR2L3qJ%9=Bh5}r=!(ND6@lax>|+~qPo=auEfy_x7-?h9ucUPk}Q7gzi<3QX}qKW%xlUNkDhbOELfiUXYRcJs|QQ|s2Pi$lwp zmo#}_I(w4HIcM}Y4j;a-7V#!RcW+o+kzcM+yXDsj#8Nk@pLr~!e4A%+Ox^k3+lEl_ zP@smPH4`>z2;))Fd=9#>ra@k%;Sex@hQt^yct`w|#!?6qp)xdIodp=YR`K8iYrFLVt!ll23#H9{wQ{VZNf1)o|>}5#-mY z-%#@PCuw-0>E*d%q5QxD4;0H;Ox2dP^FWy;f6|pgG8~A5#YD`b;iZF6+0K1VvP*U# zt9cl2z$jTBE=lm?xZ@GpfbNZClb%hH%UYgVy64MzG-Sp~c|d!&Z$Ovr**4ZeG^$^l zwraZ(uW78JgL^flPR*mn15C!wT}A&MG=~Iew#Oq#SvM}J71 zek)#Ex%KERdGjPMc38l4))QY$=jpiB5GNx{nZJ^5cE9q~qj;6`K@y7-igz2xlPTYnXd84Jhw{Am^d^$b^aP^x?3Y8yIS3_=zQ4BuP(m`ZMPd50hB$LmSTE2%XrDaG5 zij)U>qZz{|CV)^|{zBn#hS>U1iHb0|atxhGliz~_U;58HJFBf z@v1d+|0h$DDAbA<_Z9Z)5nsze`PaWOmZnJO!B{Nw7+%c(-_LWDM6R&xI&x=>h*&dB zBcneFvh*A`=|F}4O}%mZ>Nk#eR@%4&GarCy10Q}H_an_2`L&-UQ&>h$l^s>$IA z51>5tYEn&Qqjinq>yKwNPOp3oH{?MqAIDb7rTKZjTpCWwkNV?J>GTSGdAu4${B;g1DFe}i@+yxM#{O0$H1_GCq0%9wXcJyM_W|o9$8H}j zSj#=;;ybr69gdIN86y|Y^B)91-#rw5q8A38KFnZr=chC1zCU!MM&8%GhfYJLE~R%y za7W8TP!s6uaf8`5VLUK@*&6zOlJUdkWMi5SM=hXJckSr2G-1fW|GMyEyASAC4%seR zzYtzrspMifXg~{H;V%il@yG;LU=A1u)>lPq)=`6c(MYCg!{}aY-X{KW09g**!A@~0 zBj(eoWqj=jSzVb{W!3u2ORJx^qCMMrFg|3iotQGf{P6L|A5VAQc_-mH&o%76`|h#x zKxX{>87-L=y-T&&xn3Lgi6!quvT#qwM(19F#g%7|!>GU1J$kVWPbI;XaWpFAOk4|2 zKaq+8Vi3iVRf~0+h5~%PUrg-sK!p^2b`g|J#uIwUlvMfAkD4eY3YqeJIifJ?D&?9B z^VWAP3q6<_h7kf}w`uzPh=Qz)MlXtvI!5Jds;O-vkI_`V2oFVp-ZHltYgfj%IV3;0 zefr7>$6MZVN|1c9(1DZ|x_9p$L&ZF>xoNCOy?=6FjABK}9Mtn9u(;fqr$T&~x@Zlp zWV2F$e@_>$T2Eh0G#!iHY&@VH*dnPuY`fZFn*M_*kkPn~-4y2ED>M$|)x4i)zfSZvHC^zYwaT%Rb< z8AIbPBwF&hI5^%JcUFu`fao42u9b#0fUCXFf~x5tleI(JAxNBYLC?%h{Uu=AFw0x` z8ziL{-7yhpRBu`g;zqBYY!HMY!exFcqa{>2!I3OPXwQD3@e{?z)yLb28b5Gf9rFpryq62Br zAnLSMEjE@|%Ddgxy#4{%aeMU-K~6}jSoP}a#CTdy@H6E2HT1ap&w>g$J`e zo6w%xH}iA}>T>CTPISS4ekH8aKoO|VZ#-&&uKZERraY~Ken|GJa2Zky#=!={Z05NO zmamot`lQ9z*SvOOKqa-&#AW4bUNeF~835@n&xZYblO$Qh1^~(FE#Q1PZoWaySWJ`% z{kZZ#AzAwTc(Sl&ATsomoO3n>ncuEmyXO3y5KKwH0><}#+N47_TD@!`6)jyrZF}@} zb--h6bSSr7WcKR^`2l*LohI!4fUXU_%Ml)+iSw~|`cx{t=n1s=GdQ3{+^3fmp;0_R z;vcDiaz&xBu!kl5z*A&aEL(qx8dglgWypV&6@hSeNhVu)eretq!es>GSDcu_r_dBGSqWSPPS z09&$^l>*G1|MBV=8MT;lBUubvNXTF>?)kAop)B`I04H&OS1*Z=0E8F+3>-L+-hA`T zQd<`)U1_ez9(!!816j3bE**C7x3p}|6m~A!&B<%@WGZ5-XV{xcnJRM_dO^J-C0HF9 z1~HLbLo2t$auqz#D=@uqhw?ZrUbtx>ExjoM2ob%I_+|UqF5Q;FaU|hh`aJX*L#l$| z3vr@$0%55?XGUU1&xq3MRdIY74LxWKR=nX?%IF%E2Y2yExAIsCeBMSaK(coT^+gQw zBA{^&E}1-xeMe)^M?v( z6E_Fd@62l@5B{kSHTE_eKJd{5dg+_#hBYf6WtkUMBJQoV>Z5i1@v?H>)HG2rLU{f4 z*YnbOpfLC)!Zff>{jOQLlqP)kBK17*Op!%(WUhFMxN$QrmC2VBxi$Kz^3~|rb(V}H z^oAU2LyG9m1idi$13@wjoi=d70}8Y2DO##|jwORL`HFjtB7~0%NxH~y2smhPf#;|t z$(4z(@$MQxH;AAox2FGM^ zO1l1RmYoq%dQRo~%W=U*Y|$BtTE|nV3&wt>WOBuE8HEPfKXc0mQcGTE*_5rc4&SB)T{fVD?4dv>g+Dw{lX?FJ zO9M7p!8~wTvpFM0U0OAu<93RyofP|Zc2$DTAqjukr<07dkPiMY51DHp>yB`iR5HQpT_H_rvPv-)?U6Z&?L=`}2}AI|2wHyybZHRX+tsta7l z&ViivrbS|84Q17fRp;BoW~bHZoB>_wlsz^T_a&R~rokh3?nt*C-Cg67XxLuVM&<(LugO9;y`VnSu*=A4a`-KdoNA#5PQkoUTHQj5=w(By1T- zgU8``;|WT>FYeZW7Cb=Yg(VF?h8ap03>!BQo0lx#7OXN!QNS_iQPD`jf8cvGN@uR1 z=07DyfTL0e@B=CS2@k|0UWR9)^RVl03iT;2LQ6vcg4WmeeZt)Ys^~2 z;_2gP;j}T-tkahA!01g@{>k{~Y+1E?*G9Ar9~tYzVZECX{<6X6Z0fs+r@6rIv++x5 zANFz8gB>U;G~qAnvvzIEf0~kh{hW`h#rgv?Nzb$?NqLiq$j2u0OL%PqU)VlO;|+Yz zd^d;M^18*n$9AU%-fna_NxWd6&UE9ulcf$*c%R1LUl-9{TSn%`KCfcK5Z}HiBW5h4 zk9d#Ae!U{gD|AGE+P#zfW#cn~-H+f8kgd*sI>LBKGl9Y5{X5eZ(|bS9r33p!({9kg z8GCh+zf8198!>e;{WQg_+u+t{G2kn4#*y;5(vrsU<+H|t)B$`BJ^%dkRJ(Tko;ZXY z25`(Md=RN~(IQ?08S%#5wEgjaH#&mQ(qP&`IUegEkSjPGV3lg!I3SfrGo|_Rh3wsL zA#*fFS86dNts8C;BN_IF(6W^@Q$}%9SFXn#b4;;kc*Kw^Fp~c_qZE~C*|H_=x8Hs-%t@cW zhWjAE`m`XoH)Gmk-z}i|>_D%yFmZknJ^BrQY3oD5tRMd7H!&$KHII3#=qYw3VxD0q zWEy!Se_9qNm^vN5bK5jOSa^x=W|Lj)s>hnlGrV~)3D{Xapa(h(Q~s?VOrf`a&ODE= zw4aY#C@x7fj_W$;0sr}mzefg7=Z%UheUxIubC(p+Exbuk>9c7AFGmba^mrNb5Idc8OAO27wod*g- zXZ6>={xw$qxc8r+`4fhTPEAInJE+Yxyq1LJ2l)Dz#+CK0c<$@nK69Mm@{pt*3-D2r zWhr{oup!m5OnHKf!fjcmyr^7>Tb_<7@EKG#4oUgQmo#P?ySjW338_bmB$dg~JNdo( z+sDBY8~!K*kEGTQaVZyAVCmDx7#JGGHGIGb>8d|r;ADeLhN%Qh%oU8H4922KL-{=P zkq!yz=bn46ix*O$AUY5zw>C;&r3wc0(oVtvz zeRn$Vr@#hB$i@q3wt@0-5yk^${*pN+-i@6&9@!`Q+E=VMf1XF9XRi=i{$ZCX^Wz)w zEaEx+H({6U#^pS!^1z_!?7n0od+D7k?$+V#5x%fKV|VIze=?PBeSdPy=m2b%u!${X zG?1c296#&nk@U(Bvt{wzW@Nm;mHI_5j;5JBS~9OdMl@Mq)J9xhGQ6CkyEwkN-&Sv& zwTLalPJ3hsz4*L;4PouBGlzJ^gZyx)95Ewy|kXn66Y%3ZX)_~MJH zOP4NA8Ib#%2QT9Hz)WR8PJmt&5pl^e*#^hr39eKa^)?95uZDqaV*G1$VbO&Tauu{3 z;pwFdKggEtPdOoQj2=+2EYtg+zV z3N@U~rna-$T=%;v%lKRm9;wI;HD<830KOP{G=kk)H=r$eQGGMsd5)dt+}6od75IEU zVJXevMRy&h`!emjRWsSUfHFZdnWvRT&Bm9!IoPRRS9Xn#3;-hynYN5gRa3O<=Ijo| zrXe4<<&)8D%BlN1z>ooZKz7>98UyG;CbRo+KVZ}3O)+6mmKi~f+x zeERTN9Jd~`CC4K8ip_MdW0&lP&Xf-^Q7jhAd$Y&guDr_}ySp)k1bz5JS@vbyn`L1ADjs?XcVs%@syVtaoDdwts)pPpo}%k7gD{ATZ*hk z7IvY-HuuBsJ3+C92M*Zj(4m7%%U1w8!y!*NkP1iSqQMY7+5$W7upjMy;X_o5r_%g% zj>xm*(m1L*bZN{(!=gt~gT)e^c%8WB*{jh!=y9J6ST#yS`Qoa21gWH3-pJ681`tWf zK#GKM@*y7pdXPJw!pr6{2mA+sm&eAWXdcrjy?&4wT#Eo~k1zNEjO-C_q=8;}84|+- zxUORp=D;>QBu|L+IoCYF;2+Mk<@<8X{Q4!D_SxeG8Cd)-efQmWwDZn8yR|ykJ&L=D**n`sY~eU z*T=guKytx=&UEm$ZIJEYrSE3Z<6pwE$(1NH{=@B%eZ2x7BnASH0s0aAqxI|abc~b>eY78k}qlI zc?J)qG07lIzE=?ra1BeVcYTKpqv*a{o!jE&9{6RMTfw|MdZfkpgTR8{@gT{_?8|Bp zwJwYy|KxkNNm_JTEt$ta1M=n`$4NTe!w}T z%8nWeo9%9r!7*PQc$6*Rq2-{X_p$_|BznAHK0Sz#ARsv=#6gr{%HhY>m16_897eFT ze37t;3n!o6Vi`Wl7m4PjmXO6C#y+x=u*0TJ>F)=1 z{_`L4V*TWkPZAFF!9x1>+ixE$aoh(_(9bX3Y|@}F3z0sNV0D9gAYnYSoQBHt1-S@H zK*B~}NQ%H9JHEJ&oM||70Qz$Gi_<+>lt+5Llqy`b^H4HJuPDIcOiB0CoCLN`wlJj^ zR9itq&j^rJ@bqd`;VKi!uSMZ%w5LesIo5K7gJuC2vO3wMFeks^$;ccK5(M!eYyCiv z=`4;H@&2Q6{9)sF9&OsR5yJ|@wJ?7RY6JomVIdLcD=5G>(LuL;MvYo^#Pa}MGvLQQ z$QDs=Fm9SbiQiV_OP){f-+0_q;To0UP=#ki4@~`{k>Vr95>*+^Rg&bPa`Gbs54S}y z_}~%dg4gUnf7;%7i)1US;V1+J9QM zNYm*aL+kE@<0q~#wdIsIYF4a+<$^kkJ$I5qd z7|xK@FJHnY3mF=6yi|{-F1ALdV8J!K44IS<6#i2>$uVRmgMS6jYRxZS!|V8rjWI9& zEv@qauMT~Wp?Xc)(2OshMbyD5r<_tDod*j1ddDc9>(JeI^S44YNdMQ|nF=fTl zom3HDVaq?6@Q@d!djd$heLq+llw|NsnfR3uS3=S~QhlCq6&#xmZyKV#8 z{Lss(Wsm+ecgWi``Gb31KK6Xzn9rn>Plu4>z@Pr~Cwl6srySQa&Nzb}ee_Wm|M};i z#l+n=E{bMKMCVd%~28S1t-Ozy4d$2{|*Ig4+e9WXHT!bt9& zeV-T^q{71>NuiFX$H0^?tPGJ28ZroE_{bN(y>}uDgtvJnATj$5bnj)AO|M3G#bCE|(!jfszBR+6S;{(>iUViyyk<(8v#Bk0!kfNd@YSgHaSd*ZlcoCN`UtTWv4^Zv_2OK~j ze)yr&S?xOYXz#0EU>7Gl>AjVk;1CeNP3OBUH%T7=i-0s8IY^eg5cW3XB2Tb=?IhXc zSq$Nsq(^zC7{@?Li_##I{CBb;X7Z6|%iyd3P)n*q5|)f=a1le$ysw$*O_?SS{;=Ay zb4K_ITfr_(;KqF(TcfQJ>n^P?aHBWKD@w{Wsxkb4CgqD9$?`=yOJn%i5*#0tBXu!M zL15ys@;!NMVOoD_D6D{TPpv${$Z&J~)~{R39mwUpa31aNsNbYDZF$V~v~h>-JnuY@ zMm%~J@A!?@j!r-Q^m31$BN<8;TD^L;oH61bT7?n(fB^$y4DfMw@WBVuq)C%v>4>yp zXN$d#x`bMF+uqij6Ip%GyJ=Jp)Q1 zu3fvvMjgNW@=I(~QWk~Gnl+2ysTW7&D%5Y>oc6o+4Qkx3i@8~xp$%{Hfdq}-Foeww z#Qo$aKsy;^05~c3Kp=s^rxzrk$^+p&fP$&%CITEhN3n8Tx$!w#`@FgO2nQ)c7TNO!DU4h2o=mYP{luThOcp7r@+j2d8$IB9kE|tzp^Ytk-T(%Ui zyqLA&9- zOQA`VCiLsCzvkuOC@sUkzV+5y;s;C9aIOwahhff}gar#0$X+S`qsqr=+iN%Ky63Uf zVcWgsTz7q*{d)nOyL+I|iKlAt(|f5`DFgQh_*h2)4*%(Wi^47uC<#6&)mtBN4<&dR zp6(L6&zx(seV!_T$SAT&Wq@MJH4lYh=`nD1vO+t0Q=5pM;|^B^Q{GYt{}=^$@)@R} zQ{4!vEg#SBsztMD&W~^L-*%}DsOP)eFZd}|o zL|N*9g+(Y6idR^@Y!MB<<1|{q)>#V4O-V>s3TMAAcN7m7GCATsR4!~EDiEj9GZ{z> znPrB81V9)E71@=~*}yUnr71ruBcex|N#_UH;d4O3=Ev}kXxxJyB{3q(06zbD;BdVR zZ4^8-(NU6I;;aCX&(oj0Fj`?SVFc4$a5z~$5)1>m4G2a$0kIT~-wg7jF)<8HnTC~- zUzi$tWx}YytJkO*_4wnB)V9xowDn1MNaum{Szo;n>pVaPGxFr&Xu>p4KUl2CLb*b5 z?kcc00`uNmZn-69si?GbM}9@$J#YcNd)XfJ!(*4z)NkITWpk&X)sA#2L90VeR|b4l z{c^zErdVDSiSC=dost#5KZdU&tBS$pbdfl`@jDq zoC@bd6M5hdjs@7Y`bO0yg0Ayo%u~T=XxJGs zjVMY|dP^?y7;+}U!*0uTX<9tw(|j95mhv-K!!<&6g4l9A`3$O|H-)9i51#V=F&p8} z5iJiS0F4_>acVL0yAcA!vJ8}7b*B0=RV_w-6prHbMMs=L`GLKJCBWgR0KwBA#rNa$ zM-8G0|Gi0*ohn53OgPeO5*zJMFYniG__HKc3D%|NPh#Z#JUZ zb?Z~3*6d@gQB$h7QDb?=WKBzM0Q6>VRdgr7DIW8qnj`lb8Jdt$0GpY>LDcElp_qVs zhBERqqDAbYP#U_IrO$?aRJiPtznhcKO&Whqu;sYoQtQMi=UVotylMfT9J)1IP~$o6K<>TwUb^tY3te5Y5Dw*RD4crgscD;O8nxbpcE9*BYPJdQ zsFu6OKVAc<4}fWCR04gJIL_#ZMfqS7HJ~QfzFfi|Jeb0TIc6JRY2>8c@#qzi(=~{I zDFvLs&M;G1A%9w0LMD&_3~8JmUrWME*qA(*CNw5Z7}&TJiAEX$O|w@?K-dLbD6M?? zQqyt;Er|;zUj02Bt$$3`sMJUR)t{;fO29pbEj@$Z_((~S2$Bz>7x2R6_}6b>8umtD z0e+~kSe@(RYL$P!u7d{;rfaUbrouKn=BpkxkuR+5{`R-O(OGAmRi&n^lJ~0Qt~g0i zOguS-h56)M0IX85xa7l%dF=P<`lD$25AT~+9*w8{TGa*#_^zRKjSdo#Zdw<6;Y6?T z)y7NkgbRKwFA2u!rSUcNXoekcrO~nsspLBxh@tce`H^MtJXu91`4MhljfU4XzS@W* zmot5OAuv=n-PXzLJPetZXBvF)k#zZu%LG;F*QC?G)gQS-|A%x=vN+1tSySpV?iGD;lPm7JYKXM|N8Z@&I8BE;xxdj)_I_AIARx_ z$gp9SG6gtq#|=V0)dqX{P=9W_15wSWhEQ_ zqyVLjW5d;nC!Q!43Q3wR2Ao8_kG_oRZnUv$7^Oj#*2-YJh7^q@Hz?Wwp^5ZyZIc-_ z$hI&?ZyP41Hw|Ib_-SJ+U&glKMVho>h$k9r!55i4oJ0ksEXZtSMYfi&X+mvr9Jv%K z*j6kgn1)s9RemF*FmU7=P+U*^sC-UzU;a{_sRv4lisv*J0oSOUZ1M}eqX9i4Ms}qy zTFjmb*i09;^i4YVrk*Fy=c}ep02Xa66+;epN@yBaYmA>K~ z`S#myWh(BAFTSABnLYr9002M$Nkl5rwUA6N4XWW~G7UZdni7#^g)q1#g0PM} zAcHjImd3C&^p-T@V3Qq<5mxHr$_KUxYNT-G+ccAH%jd$Bm!sh_jC^2&0?n8JX^5KY zTbVh*;96Y>bEUHp)qvmTjdM?HT9`A5*U*m76ue$+MWbdar>6pyX1CjIZ2xVjKfjGFKAH%hCB=AVV1T?kc7;H_?Mf&Og!;$+EIqIlpZe| zl$Y3jwS%7&i63-Ec(`42tt1|f!IuVDYsaLn0ru$Q@`DZyK5#9Vf=wJ@bcR{w2aTn~ zMXK+`C9eeer3}bnYUu6`$0PZ{6Ms?wUkxBd375~E%ueSVopn@HT^Gg&1{k_?Na+TZ z?(XhRL0Y;bhE_VHLy!(>loE!{mrkX-B&F-Se2c$Wx-K*4-n-A)&;C7obJ~2>+krF3 zNd4FLS7kY&Bfj>%9c8xS*wd3&*6lWiy&hL>-N4STEj+~G(h}2wcuHlUbDFqn<{JjR z%fPF}sFk35=fzc`5!9r7gvt=s#SjVaW`X`M#OzvcBr7?*i#Q*jC9;ovL9_O_w~=q+ zn4v;0$xe<3{O{IM%hzRv?vNWST=pVzZWngcs^KY-*d(82++>PPRMmTB>A!@)oF%Yu zp1c$p891=>vikm=VGq9(xaFuA#?gXtbbmSVVWdsyJL`@dv>#8c$@w;xC}_Mxc6fsR z!-l&)zUtu(&GWfnil4_?@lmqqieKYV3zL`d5$odY{hx8_+QZ~@duK#Qy9L$LZ^s(V z&Y(vXOQMGu)v*7v`D@m+8c6s(_cWjat`<(6M76KCLjzemJf@FKq|{VrB~t%Nxc2B( zo3I?OK0lrW6O{aXgD@xi+qr?MY1yCu)^VZ!*A@8`S;l(3Gt%T31G&PVb9! z6DOL`=Ji*%pOn5S@{1;zp-GwtyY+xK-6!(zop9D`rXXcxxSr2 zmPv4#@GUZALfKCam{ew-2HL(5|LkTU#BR*#z9pV$lEgTz>Vgn)kviro`}hY&K3D$G`Kr*?)F(fpGic=aRmYfv(C0|}>bgL4$4I}v#|;xmmF`-23xS$I2;U0n z?oS;mi=Ja(R5`u}5PeI`{r8LA{gTSsv%-_vq`|taV9kgw^zDPnnzSBY9*#dvL7T)g zMUOi(8P5m&_OCEbyvWxWtBIQSTL#OLK_JICdTCAKtmH9qT5l8ICqJQ~R?GzUks)2P%V%`)|({abzC;fqk zcb#jGrmtF2tDQS%19>CVxBoWwa=~NheK29T3GN2a+FjKY80Cjdr;c0XV3%(cE7ACN zhVAOB$XPGJ8`F?i{;;fKgd_QGMm^sjeT-dFpq#_WCP)3u4lTbof@(AEu2|4Abm*Wm zBs3kl$BTUei|kGKnAp9OV12_kB1Dg0)WHv$CSq_^-UQ;IhMwNp0B}b5LIX8bF z6ms?IZ|h-o3)efapj9+cbs(Yt^|30QT=!!c3?w>r|U1;6}?~)^4y9`1(4g&$B7v8X4)oND0}=!iG`$=g2`b ze2#-O_3S&9?H(i&GY`x62`G5kdOv?VREGW_4y}{l9m};&QxovSI>fp~X}0S!OV_Q` z8yYaFtr-5elG1m5(1B=BhHANd|vJ71eK7?Qd^_iYdJpLgeBp3^{Z{M2SvyzB9JD=BJM* zfl3_R(y+Or+s44r;iXPn$f(f4Oq3gCF0NXjU*Z6}n#C*KOt>R8i}%~LcN;Z-2-Pih zOB4!@FrjPO?ApKlFO%S2zUYE+4N~cdMq^aOg&n@3XOfE?8k?%5&k=NTi5k!DZv( zMO^96sh@A;@7ANJ&-E>k-TP;V9#kzw>8W9w|M9N|r1V3{nbzG%we4q@`L&Iz{X*lN z4gtP%6Uur0C+t@#bcxY#3`a|71Y$1`JbDq(<-tG;B0Tkx4jIvpS49viU${E@IzEpp z&;evb$`APdz2|Pgv=sIb+IJAFFXmvsLy0x;SD%&WX`yDtJwQ#pEgnU%Id`qxTGtn3 zrmbB7VXY}=?(R}X#qs_A1ZXbXYiM)!;i&!0C!Nn?rfz*iW)32#BfJLW8x+w8R(-@Z zmY;}{QO@T=){6L56baGkg>kDwkeD&CEaJd<=7DfB!UZS&>4LH{g?5NFeSakLH|%D^ zf&L*Ujm$CHh9SbK%2Jf`A1GRO2cZOQK#O~6Y`?D8$;)j%Ts(M=RU!tPH|HZF>&126A0i7h)2NA?^F;`~}37`WS|Wt+IJy<%897O-`}d?Wk6@u^L;AJvbPsb=#P>+y*| zUZSVrSzQMLSBLHy#dTFGI+?B(n71BWp6tVlYRbR^OCpqO6|=aY;v+~S=3dS!2lr*6 zl+>(2QT24!>B<_RSiZMY5_=No#f$Ktat;>bqSr;h8?!VUSo7=Z+ql?$tSkPjIselr zQAMtf8>_mGksp6tgbjHhbXE3iH_IMlf=t!}8|WAz`61|Yz;##+)(e^sDnoF@A2h~@ zuN^-Xdb}UYQEb1@!8z~lcLgt`hpW+&TjYWv#g>oY!gTK6gC*sOW|`2k32Y?WEc1Xf zWXhnM^;E-M4etqw79P;af37j{qHN2pUdqN{Fr4P8>zyv6JE^(_D_UVl@;hHZP}~}!zRz6qN4nQ? z{pXZGC5k$&^P%I`w#um)oyB<Bl zZ08@Dz9k&hqb%Ab6F^-`D#M{0!G0lO9b8fz0PkS(ra4GXu6EbpE2(l-+UUU%S9wXI zDP2&%sDe+gBkmC$8$qd+A??oe|0=!KLLgHgPk!wtPlOP@FWJNOAe68MWe1433M7^R z%=5;SsQeK)`;FaNs=nDP1G4p#X?5}9FdQ)hK0HPr)t)T0Ic2-{Gk)uS%uKP3i~npm z)?cE+(L{X_YV&Ge9BLZ7VKV*aK4o{Ij3WtGd|GVQk9!4Cz0^e5x?#`!SM2QMw3A8e zix{=*(24sj5k9LPK?t_ZtVDz-4TRD&F()d&yTA>O5v#q;ifEyKLR}QbJ@y_|;|$si zgh1;ETaT94zNc$e!l>S+RGTTWYoqBKphL1g1UXhkcn9_%MkRA_V}b7;4QKnQR9%|X z&4e`0Y!&*i4I~+f^HIFm6^)aPvCjii;+}uXH0cBbzVn0@HuSgURAS@5he)b~P05TA&ZF(7F~xbJLe0dCLy0lRqjX@w-FMXmp&pz$+!{&&$Qqk-e#>s8O#gde zIC-9Ky9sZRA;#&O2}3vsj!-3F35Z(r!FJ>=qrQrja1nUZ#|{$e18OSDkLJOTUI`eV z`$&BArzM#_TZg&fJm7HQngC&I%Fjv;VcKnifltvYkN{$Hn`Y~J^eLX$c0>{O6M>Xg zLJV2uuVqo>7)CBn33A;E614^Tf)jcWylanBg255?!8Zb=ZrF$YbKN70t;mGXeNvXB zZ`hV#2%W91gqX*3Z-S7YkHsup%?&~|EiQhJzuO>IUlH~F^39YhQ{n~+TOUT5p?y+F zPpa=5#85$ucF~*{d@@Z3OvJ#sf@*CjUk@rm_5f~A*QLwL;lOgq-6AbgVs^H z@-WIV5DF>J+AKyC=LXIPVKsK4~E{Nvxwa&Yicu-yRTVe!PB(x z-$!L&%Y#LB z2u*k4jYKN3yz%=SD*Wq|a2+C8=goFAvdaUO_I=DgEldsKzR6kYZ{?4vZScfXNHq_; zN7rb%yZX!K_AGzli3P5Q?}4=4xU==(S#fIom6({vyO-LFo;&5vV>M)u9Jt*AE%ZdGa_2 zbZ(8l9VgUYubvz8!?a!mE zBQEVMMZYMGY&6VT=R$nY?~ z*|xE$XfmG;C9;^o)Xp|)7l|J8#w`&@ldm6>ali)vZr<(`x zWd7KhOstBjD^A^71}^g^qIlbSIb1l7FhL^%a*seDQdLZY^A7=C8cYP7Br<1h2E5*+ zN8cKFyQBk$?lxBK99DfkV&e~OfBF2}E*S;@h5o~Lcdtc_5a2+ZyA4e?0e~yM(rTh?ZnWHE6{FGp|u0t${Yj~Z-)_^`409_CB_SJFhq%}O-u1CJ_xO_Vcoukw3;*ed7CRZ$&-{9#>8HjoS$al z#K&&f3d25Rr#`KEG40$)B*1{Py5dHFZFKJ?q>w6>M}sl@uhJVa16+9~(tK_njaT;) zHzkm=P$)cr$2*!S0W7DxkSsseuHJf67COt`G`Ik|2cF5^)92gq;ONRP23E+j_1__T zMiHD^l>$p!gGLbI);eIqNMWu02cX;<8hMYliToodrMdOpEnx9t$lLlXQ>6q235xwxv6&@osr6td+FlYuZMtD>)-Q;M8q!c%TLs8#iAZlt%sAu`98y#Ev82~)s8 zmlcNJLcv$0BN=(BoOi6QyRx-&p~yt%ZluI=xTFyjfZP9=oiH}s?BieF4Q%<*7)@ew z#!pQ|>?1TJK0-(*L`#;_F9N8qky^7}H88tI1h=!oRUQT*VrsD5L%$e{_C#18!S7z= zg`tG=JAeLz-VWy_ve1jZ_-~yP368Ogl+cL~isDd@!deXO3IYY#!~2cbXZ6jC3*&{9 z3fGOWc;Lbr+m$KbZMTX#4%XUG$63_Z@r;p%(!F?_RSVE1!P|2Z#hx^Y#AeE`Ir3q- zBkMW1vvPf=Za+fBFB3p(HqhD0m=ha8z56TZ5?&W{?5Vbv90sO}J~Eh%v750uar zW9qeA9Uqqa{T9p~4;Jq|tAkf6;)w=?mi&VE62rw=5hV8}(SDYFSqM8VmEe3%;1h?U zd}93$4#~z?J;91654_tiz3^6Fb3ll3bMm6aNZ{_Q8UlIfuG|;jcY^0sA4r|$rPr&e2lS2rxnIaqQGDYfPJ$fcJ zUtm}0AT^~$!`uj0CwmllTglsNr}W{xY3$pBhhLwi?hu`$$B_6bI<$=Q|Cg#Rxm5v8 z+5iThcjH|^T+8KOwz?aJEK44i>h5x@Xs*jq%SW=>?YaSt!tJ%xsuv88!u&lir_p%> zfNwu?yu14~xI;r0(;XR$5cpaj#p!~}OBU{wa%#7YiOsB&a;vKRT8;r^<(9KP|E4FY zXwI}|NnyuC?y|4Q=CuC@kGZlS7mqw9wcl1!M!f4@k`U>rCSHi@ghbP%L~*=CbI7X< z|L$;-_i|h0u0X?Ofd-4dXrdskCT#1tA!;;>QZ8?dT_|AFV-=orOPs{wUy?X7Dpbn& zWra?Z%@}B*|Mz$aQ~cMPo?3%&1awd1L72VDpdU~i)2YZ*-etKJucYth2*>KA9J$8W z0HIqoGT4ZLLay6bN;W6UkQmuA7Z>7XiQb%uFvj$X$)re!ag9b8K=ZV8OW0?>AE65; z)oKtzo+UD_2afG1zKam2X(SWr{J|iFW1v>)c2+th6vYl{&on6OlC( zRL9Q&s3|=I%*Nxl5)8-f7iwghXZy)@LY8zaaz+6QI_#9Ya0#wI&OdWKtJjc4j#j(P zXnnzQh4F9JZQws0WUS+O8N3Cl4@KZ%%MEQFV~Y`ORqChv(RKBBfE=Q32SmItEZj>3 zYu!pI35sexeexiYN5q7Ln^ZOMb^y@Q3<-xYk=K*T-r>(_xD+pBEqBvB)&XdUwpC#U zh5MZ>aYrNTol~4eSMLL7m%n4(@bUnshgb5w=YR4-Wyz}nvhQoIfxy=Ya(TsFuWG_E z>48OZc+XjzzwNy=jwro-JIaoo-@i3iH{vK9r!n>$ee=L3dVu2EjA!!Q9L3FXEt7w6 zhwKglGcBdNZ1_(&s3GxtY${FN8ZHO3fTq)R)mmx0)>H2BX$ihi9NE z+|bVkQ6<0WtY=-deO8C(Ml>5e zB_*Am+fxhTSNjd$4GAV>3dVjqiv3zRhU?Yhdkju;q9T{hM{g!_s7HHEVn~85t_ej$ zOuZ{E5v;V%gy|Dco>8(Vg~te|7gBmD9$={v=}vA0{Bxtlui%C3itl5j6bGUhg$Ta{ z1BW}oOjqy6r~4DG=EN`#L}c-YEj=*ue2B#d6qn|wv!sfa?`~M@{TNj&R8OS~h{cgJ z&Hg$mzupcud=;UiXd)N!t^LR~VYkU6<2^}dzp(iQ=_-ib&(0G807YVY-8cn7aROs) zyFK4b-uI+^PL67rc*F*Mc5>&k56I70+rtqO^c35>Y3%8-L;447vU&M+5!t<=A+&M1iOw`Im%eRe4DC~ZZG-5w@{LQ-e>g$9W?!o42(A)$v7VgRP+ zGkv+Ef2D7U{m-O;eL?o4N)+jo{>)@0GxGv9(_EWo#ePY+aMucH#3R(Ng~4Z z`+S5`p-@lU3|1wVs;wsM^)St_P%71!F$78?uAq@GgmSc*pj=Wp{JR_}k_?p1%Pc~n zx3O8i!AW1hf?W{#KeU*EWjSIWJfVZTDZGzcxMKw<=q`Aa^0vpo$v0q?Ztyyi#h~$2 z!!Z88Sf%f+1wad3HbxO*3*~yx&v;G}NaQEA^GNT5PkF#+Bzq{Xm4-!y@(IM$Hf=}s z%Kf4k)&ucG5pV_^^+nuM1cQgMMt$Ttbr1yx289wn0-bw`VXt+|MR%9;u_ItF| zH83Psz{<`1KsaJ1S(~1{Z^uPaYOR|pz-A^numASQl9+DEUqE+iwn&3L&4|!`+u5=a zP#S6VJ2eyKjvZ2K(T{q*4WaVMzZd`akfRdEv^Q&1{(d|!o^p(NKrvrH0W-T{Y{%Bf|8lj1c2{S+cP`qlGMbSm~F21)6lhsbbDD3eHHV9>J0bp(B{x; zveQVTf^7mi1`Ld=wO=P4_rsUKAU{p~mIEjX^**_>e7G=C0hw4p^H$deTKj_6r`{Ir zbb&bq3(Pfetf;30x+7ovM^o39i?d1kszd64Yeq$&<>gyDWD6wCK-4-?dk(_XG|^VNNr zg~c-xlxO80b8Wy6wu6LTlFUHh43>QKkoZpk`Dz49lv$e0wk?p zle_U8_+Q#woxR?eh$Lv<&Gj);`d;y62RILUJ&51!(W$Cgaz&(JB=m~08#OA7zMK5a z5Q2kEAY|}>FkyJ=nGqXwdR7ecF%TC1vgxyPvj^`M1@W}^zY?+J9G!NoZ;p?bIE)N@ z_4{)-t;)8Xe2Gn2hyUe4F>Sab6_3UQC29wp@YLpVEexksRrzjWZMe;b==-QXua{1c zFIwGocrI2eMEl}_KbH48CnI(j+jY zE$qQQU+JcM!_UZ2V>2@M56p87V7ZZ%##sa-blIXHU45a5!M8SzgWcaK*_Bzz$3(A9 z&JMwy5)Toq!9k=ky{^&-!3rX!98X~ZA}hJslRlb;hkbF7Gu!X=$#~<-G1>8N2#BTO z<{egNJy-^>4H#ooW6`R2EQbhb>}c#Pz&wea9ritf*2nlB+oZ$pL@|#&w9bC(F9}`R z&$R#a*tR)nOFf@>!iV+J*oc37AOjMfXSrwTc~@WLvEEoJ=2{q+8b@I>+s=8)J*hj3 z{2CFDMhcHcqHX&U=o1<(j(HzSJCAK^J0D{G8*sl+;t za50*EsysaKO0*rVik>GOr@?*|Eq`PI4x%_oMY##1cczxM-$PWJkeHBYMM19yck$qj z?ZX0vu6Vu>TkxCd!JqJ*T)O+qrz8qIX);2)J5tX<_ss4tT_e(8i`M&;x`!f*5md+u zE=C|T=Z9pKhDbqOegPi0vFOkfneq~P!WwpBUT)c~>GR=U>g`jM zfly774>$PAB;SMWs||FnRgFD#$qeo+>Kf?DY6objVJxqk^jH;9{_8v+X1n+n6EMSd ziV0=b!6KYN4d0#q{N4JUW`_B{H zmY%TX5e5Wr;$3iUCA6A<7*PNW-tF?()h4cokptP@ntgu05H4Q6k3R)95ay33(z24P&(R|n-oeP+tShzJDUCz9>&LvLU?v1*6)L^^hq zsOpNT720d62EA#OpcUdq=(=Jw3x@(dN+f&R#rJAgPh`M)meV$1!#7L_yIy8UC$)o1^CbOmX zJ$tJ5<9P$BiPgFxuGo`3Ev1j+nHo*PY6Jgr;1zUCZXInGMyp)qUZMXdi=j{$r`a$Fj{YlBtLu8=qPkZ^X*AU2{hdKZyOUN#)+_4=13IMwc3wC9TZu#d znlSFK*TN@1m;$}llm1O8f0(?y7L^{+>lIFp@G{n}v>p*x4SSpca~jECi(aEF-%K<# zTG{xAwfmb!A5i#W@qWni%aXJCzIWJ%C0^uF^XSuW_3VB?T|S>#YCAfjzaeUjT?$s~ z?5-0(=~gbEH48KqNe_ADBhr1uSX$@wr(Q5Op!do1ZW1z8Liwlfe7?p&24l6AC5@O? z5)4Q!6A}ekQRODeoxvQumj}+;7v^nxDO#>j)azpt(`(2{VGCl$?cJ{W{dO7Cq)BQj zh8VZ?_FL5)P93woqIz%B%4e2l#_LDo?Qb1oIHJPFhnS^7r*U6AY4@h940(+6%E|b5 z2dWmh*usG)v;hU(9WR$W%nE@ixsJ_c;{vpV_C8E)Q|^BOyi>0%VjWi2OH*`YZBp56r z_*{{ejoZHT?Nx6&=T~DIdB>ed50{oT1+C7y9q?xpUy zSN~hwmnfaT!K-0IJgKaO_4Tzx0Bg4J*B1JR^DXO&aGXq9G3^48Ex*#q<%K;iv3Z>$KP*3a&zAp+E(*J+HRD}TDsHzHKc)&~Z{})FRJnMkYX4v_{Uff;%H;(e- zo#87pKPXJPcPNEPKMz!}l^+7F2ys{r0m|T96UbReV=%NT|RcDf7kBIm9m7 zm?cxfeEHQ5YYp?2udZ1WYa4Z2bXvK_aL?pCH>=>^#_4lfEuifEZ4S=!vIMj|pcrYM z#3Rlk+UQDm8+OBbnfRPBeF#4BWaSba9uhWiH4Us-YH=}7s)|_l`OLTP@$Vx?lG`kY z3c!opkH`lvcUoyiq66BmRHbDWJ(l_vUgT_{@rqDhcLQ23ImZ88om`I2gNNg<}^jRbYezYp4y5S-BBW2@4Uk%g=JK_^;|U z&wYor{q~;ZY-41B5qSl0ubj$Iz=0DIymJx`vszi#N9J~u_dZ$_P;;Y?oQjgld13z==@1ZE?r6Du^ zMh{)fIy~D<4ql_rntvlzFZA_7-1<2`_=3#ptdPH(v?L$QeOD&`(Egg&`S%wnuY-<* z{d|qNItDR&qWHv5=Raw+F&!cxv(CpWasX7yIGC?d&lUFJ#L0G_1eVgoA%+0JEaG-X zGdJ-a=c`qPk7~nW^nZfPzFGD}_EjKX<53EEekzVoxSWGmybkMAs=Pe&(Rft#CpNJW zzdw2l74~*gsRpLm#Aoq35-VqOTS{d;R{`^0(kIH)VZ4Y|UhkssUs z{wM^h)k3ZBEc#=~{s9X0WV_$RppT$dr5=aFax1Ub$%^T3?A}0$Md?V)vy?qRwkZYt z9FqXPBklSJ8?+46Gs$~K-qU;12u>UW4ItEB=%(1|^X-w5f3o;=oTsQ}CAmH;f{fe6 z>1Ik3(f~yk;)U`4G$<*htnZb+=B8;pXW4LJh*^WJ>gP9IlNR_m=9lQcD=($%+wS>B z75xB(jSnw~i1W{7zDPvFi3uk0c0#l_7K8=C_s4BDu`WE~+NMEw`%Ew1Q=($6YT%v{ zHbqHQ#-QkpL**T9zbV|4Sz-wKV74+W{JcaQ9Sunn08t&?qN{e1IPkL z-Nk7ox9_P%X0pR-$1i2M--AhXrtQse0J~GV0f_<=$8BViuzuDH;|vlBB|7;q6#>YQ z6u7cKKrKeR^(SG*EVWIa`Ux%{^$($S?27qT<)$)jIa-0lAxM=Wh3{I(Ooc96mUwVS zEGu`@Hn^m(5sVO6e*x#Y)-!fy5ScKHC4C(_S8aM|0JA;c8tPMv65LY41Re|gRO)DK zXqgMt;O%>n|5{E29QwKv6S+An4iotyvD_1A`6&wks9}FFQ(mY2j5{EN z4l2Z<27aR}#Q1BGIRD+4-~Vz}KadA1)_;eqU3cTY;s!EVi&WxtOLg4YNRl#Dp|d{t=)pzpb$?7J>9;) z)#~nP>?1ZFB*BhcU_jW>e9;%lO2+dbz(>UI zc~=-sLQmcmf2vFkhcFPScUpX@fuaC31%h>-ZXU#9&)wN)hgb^QSiV>_Z!c7Yw?*%l z)59_O(U|#-A)!7zi+=R`7nC@Z3LU&hXXcG|cwI*OKuU3{wJ90!S>zNB_Pkk-8hDFe z7e(c-;M+wdN?-zru%3XDIL$k6l4AP<_2H$RJ}iaD5YsQ_P=sc<%CKSY1yTZVgV&pC=ANWc z6z-oE{NDqzG6cip<P?Ey}8gg$rw_N~%TZ^1}dUkt(RTm^2c z*5zhx9K$L`nn+I@UL|i{w+gI>6ZG=<6vpulhi^>P?uiB5xB`=&rgs0K>Zxv!!I_+uJt$^kV$8CX{8%<31K#;V@R@)#hG3V;r^ zioW4r^FyZW^1TapW)2egr{$J?II#R9)+|(M(QX!4(YQGx9w~j9>h!_nh;?!yN%HPV zl=g#w+&dZ3zz$){#DG%`st`) zeE3W9DuRjMcIq1~_y*Ks_zMe?nV3b=2~faI*TjJOdvY#(KEMeCk{1SlztU{GlSvc=4E6^~Yer z#MU;kr&SVrzD9>)-ngoGW8WxT_*r0W$G#oQmm;;lxjgSF0G~A_MR5nbW8iqLCU*O7 z-tR_s!}4}O{0Yyo`4LN@cI6boT#_l*Gx;mMr6=mp?PNTquo9IV(_+B(+ADcKz+F{_ z`QJ2YR5%aTjLS^y!H*Soj$f=yRi*+dpM28Oi5zA>$Z#aI$3valf(;cfLQjmP?Gb8& z{DE%ReZ#op!X;S`nw=Z_Uel`lM60^c0V2H^SKiQgB)YiI1WYA{x&;>#`%DIJUPj5q zBMw6Mxi=_|FkNNzWnXUg4Wc1UN;B3rfX5CCe|*-Lux=;$loToA&!dn&HB3af-nOfUW2!*!!^=fdZr-1<>X0ANT7ZQyro{6&Yu9>T)UG$0zSfas&vf6wJ+Bh1i z;ybV>B6s;XPnP6BQuzgZ6aW>$6w0UYe|2Ms&9UqVo>aAsP5_rm9NVpkO{tJ~B3lym z(g(~?S@-8?G;9(H#1$5FW=GW?o<_ud0J9<42?~UVQtsG1-wcRPegC<_`r{}l2o-`= zWDoPn_$hF7GIM~#Ph16-Z6@fVo+C(x@xwD#1|ml6a*{1e1WGbsXy8n@?dPdlYc$m;>(cA*B4t9}*Wa zhogi{tMkCNZP=3$<-Aa9IqH9~U;4EK*f>Z3OR7Mn4mg!`dPbgl#76CL>jZaEu+AAI z9&+9Z&ATQPflRM9@#tK1gzeaQL z*c8wb#D)aB25Uhj26T+blzX`J1@<>vP28{pWWdd5H|N&~ zabg|50f#t6mSJCU@-I!Z=x;&S0Q#83MOz$3GVx;PEOhT9!6#m-bKT7*pnW^-`7#iM zcoVI?Rt%A2#s@i|?$ z5jy$(+F0*%viv}Z8a;u<>%YJxJe85|_;Ub;c)e4E)i&1fmoUo5JeZEIN;tWs4=8za zuh$5`ThZSQxQa{sHzg6qDXg%oS!%x|RcmYiYKAWbrJGQX@%z>*tZc~oF^CSf0h-o( zDZbjsBoBGk0)49Lv6JnE*oH_1PH7LubIpPuFRjp$doMQ^3}1lI74_tDP6}6od|Uhc zA*#|(Ov@8~GgbFyzkf&KDcJMsVp6eJOq{aOP@+o+7m-^Ij(pQb?z_D+UqIdcl4SYbyT@qRZG4v<4zT^5xKFW{ z@}?TkNFO^cGL`|5LDP!>ybrGg_%KFKqry&bo=4ZlF?)!oV_2^Qgmir!m8zcuNEZ4Y zA)IzJ2~dUa#?puS92{O~hx#>y4=NX{#Wu7edYqsAl$kl4W!D~$eL5>YSvjbXEn@#* zi$Uy$Di}5rxrU*fSA}egcgl}+1myvFk#eIq@2tyIq`Jrh7>8o6I5l>^tnaIs3|myN z#GyVX-!0%f?&0v+^$>cJ^U7*0LH@9!AWP7(IT7{?+LGY!OW8f2VWIwe;6wCFtsCu}-xrl}&e+jgn5# z2_M)6X4`xuH}XTOpa!E@x5)|rS-t=6Pp+A(mp%0xd0OD_6HnHqqu%5P^}HVW@%R)G zPn4#`Uw2u8oA%pf>c!S}K3Cd}E<+|p!e7<<-ih0SPU)^HVpzWacATGRFMS?$WZPMI z=c(t#=fWwX7e}UYkv3Xz75tPiLDY>y5&BZ9j-XLW>~f%qmFO-6@FNh^pv{$$sLC6CgPVp>;V- z%8BUCdY&|x>>`BU{r$<~TYf3x+(avx^63q>FVFrn)4U(3(51~qM5&x%a8Cje3G3ao z1Zl{-X%z(=Xqg07RW84*lH-EoYNJ*i_M}C_U$jE(h)Cn(en-l-$yS>S`@TVSo8M^D z`m&pynqTm11pa`74IqYs{lS7#+tGJtwwLwNqg}n$(!5gd*8{bD%*lp<(!;q*<#Qx zaI(MCT29L?(_8j$w{+!y?^S%0Ssf$p2h+n0jB(&y$jJKpk;=JIR{RWZ9K+FKyCede z`=iD|;H-cFg}p4cFLo#jx&-+0w>J@sg<7@|HUFY%n;jpKDj9J~z1_w&{)ZsA>`c+B zo5=YdYwp1BJ`bf?KUlL}=z7Rl;gAcHmZ;gi#BQC-BBIfFFo4b}y;xe5rZiYC?ypbO zf#Z}{uz(OBpq<(C)}F=73nEi_6mPLjST0dc=<1q*kEps<}Kk4WrvA67W zdPc;Q&OxaZG;rFu_jQeO@1oDvu z^v!Y;CuHOB6f=NG0*E}e-B=itX4{2%QR{!dHFI|;ZJgg7)0LvPVW)SF5F4~j9gxx4 zFaCU9m;Kk}bgP>%7YdR7?vIr@ppsZmknp>)NOo!JkNn6#m|Ok9HDrzMB`CCfPAGirOf=p^+kE?{>3j@4=oFcH2$l0lOh-NXhcTzP76H8T5FCJ z%;N1A+AI~O`m?ZKn3}AxkFo1Vsc04R@$Uz?vhVZ;gT2Tn?{9JU6uorWtuA*Xco6`B z_M%s|%UiB@NBjf!{Rytl2Pwc`R?rti@-jlF9B{)l0vP>J%dXdf40)?7UssuCVYsyW zu)~YG*7^LXehz4YGJr<&Q$HDv)4U_6|D(3nk*2M;?b22%M(DiI2vCtv zMRxBhWZyr!6JA6`sy`_Px`oB8#fwF9YVbf11JKpqN0v0QhP0mVwY?jKW*t5h`3peDXgF5pLm zE(5EUz7bD13-mNFpyEG|$G(HHv+Dm~^ATV<22tcvuc@(k^^?7Sx9v8}Qd*Q3a6Z)d zkjkci=R`kBsd@m^-qm_8w}k+CQX6n&Di&&Tnz@5OxR{DEk~)_J%{_5vH$bFv!fqLV*hR5IK@lUo7PTq5uY@->FtLJ&l)~21z;@^AK6l_(&c~HsAAWSRpIkHe ztQdB9q^|~gbog}q%{ZA!AIDZaum7l5zIuu#&8We-q- zc@2n2^2Q8)-n$xIF4kbMo`FHqcHRrq86R{+H0q(z2kGpk0%)T=Z2hxC7w% z*aghLppSG`tL^0d#%%`(u@Q2B|0%6RE%d&)e;fy%OMU@rbO;XBzxsv->G|1(G?=_T zC{OLUASmZ>i0SLUiKvgOb=FZnKz_w^1{>G21UkHk^`@kzZc9>@@iBRbnr8ZipcqKq|gH-ryIm)}rArt0jgFwx56JhcZ?jnfYxK?E^;H36y~lzl$B&>J>yr;!hyF=6uwm zU!ahmDay)e>`8!#VD9qkQjbi)HUA9dMFijDvPo~!^YZ0fMB*jUvEgj`u)0#_v@@1V z?kQ=}#TPOp7@zO%9s%(!Rn4}IFLTq-?E*Uifmac+dnWxW3TK`Asw@F zR20{;=9&NW))pHB@4X2iS_JDM7gohqDZiDiX6-u;#&@nqs`Ib@IG&duw_8=tI=qP# zk%6J+h$Bq}AtTxqc!w+_gg8~|*AO>nNEPGnsvK#kh4DKl$EzZfIuztTcTbaHD2-I@_>+D*?q3YW3%-C{_A;gdw=SZRuZ(^KB zUWa6mLr#%2l4Fv?IAms=LN7TtPQ|Ekh>^%43L(;9LQy26Fw%I3#JB6m_wW1j?O%Ie zYp>_N*R!u{UC*=bwbxVM%U1(#wa%+zYv5yp>LX3@k>}JL#o%oJ8Q^N^7?CMMS zpbBsgB2-x5H{gC??82=GP#KaGRsxzo=4<(;+z5N*+I-wNEI%Nd+M5{$=_x&Z)1kwA z(wA>&b%8Hp2wZcJz9D;(%5^8Mcv$W`a7jA*b+3eLfCXAkInFH#Kcef>_NABqMb(4S zXf@2|M8nc8s+anQM!z(zC#LVYZt}F>ncGbDWEyr0 zSo^BL)>Ija*J2m?8=0A>R^hzOQ=-%r%Bz{bUh_$lN-|}GXt4kdDN|p<0mLPyvWx^0 zh()?V$7Ma@;^`_*-bCuZVZ;QU{4OY@wJye_>l3KANj+7WWHY`5UA@N`9k6SY{TDa8 z#dIHFmaWN%-67DS@mqQ$mW5Ko%7^Af+4mv|L< zm1Zb+6{^vNin8sd;Ynejl<*z=BZ`B286iP}d!J)YHP;5<%h%KSC7U3oM#`M0@MI+2 z4slpgkZ?(=RM>=XKd+Vd{)irzi_u=`TJ8{MrC%ELlYK+8Qv$6iy?!O`)e z!vo0mUVNAJP=p?y)#PiQ*gf1pzGI*pcSb&bHZ**x14;hw;WnIdjJr825@Kq`q%3UyEcj?1xTz3qmvxTEP>U^bqvVHnfkRnhrP=K}Z7tSc_|huPZ9^@9 z6Zm){J`KP73gpzG@J2-LQ9-DPoXfl^)#!8NRfH^3%<6M8x>DM?JXAwoDVprNA^w8YTa>FZ*j_P+)M_XA z&q_8O40a2g>d23H=Df|Eg7h4?7t9tKW(IF}{2ELomvGD5AA(&D=ITD?ruf73>3ShN zoHIK7bphcM4<3`m{lRQpgi65fOZ@3Ii7|I+MKzvMV`z@+wFs-eufR#_BAw9GBPVd$ z)aj!+RwmDqdo${#K1#vwCb%G;@K%guU=tE%b-s6QPF?XIPvR6h0@H6n1zfH$T1njP z@#==!>;5-Yo%Gxn&N&;ukY{%5nTt0D_nr+j`8DB=9A)?BvQCPV%h|9W> zt238<6yZ-&_=GeX^HG)=2qjRjCKYqMx$s|)IgoJgDS_P0Rb9If>9n>wp7cq~VA&QW zeOeQzsi(OwC6vLJHFohEqe0HJWzR(9l2*jBV`rdY1xz@0AMa6y#+=`+a5XTkJGm}) zI{JsEi7xmqi((eu0}qkfmT*Ab-gL=}G|DlmMmnTZ_#;`72=K+J4W zVI|E*b5tQX&jw5|3Ox4bdT-!8h+-M&0h*EnZT@yn_FE^7?H91e5syaBN0j%AotRE> z?uj$nG~vNY#4BJ`GJB3Zr=QrkGfz4`y^!YRY|0*@=r1x}WVtn+<9y%K9?&i}CPJ#p&p+RpCnBn?(x4zxrn}kzeXjR}-u} zYZ||~k6HgzhXdrHNRLW$X{>muPJ2)Ps)JH;HotJzu_$-h2@*aH!c9!${o26mB`kb* zPKsMxobJsDpLZswB2Gs)ol0j1!u7&9crlH=+>pmX@jo5ktDOGECVcEb>o>0jJDd;I z4!2kvQdQl1 z1+%&~pjiTU5R|^lG$dEEC0^=~3HdHoCS*#xOQuF?`6kgJAxM60QA=6b^0ZH)onT<> zXVidW*<6W+v-b%PEY@d zZg-w~j1yp*GQy4vH;*tMjxEl=Et7*}ECfJ%Q$* z-N6Yc*l&M4lfkw1T-(QQu1*L?ZBLk2+S^oBeqC{msjZJ?UEf;XnpQf}Sv#faXkMj- zEZ34{FSc3_lZtW}9-TI^qS$6VLYBosgsUU!iYf;e z#6x4=2kVIbI#Ht+iE*v&3V6rnzkD*Pd?99yr<;{Nq|Ve{z3%Y6d#!56^XuoK_jUeYc)}L6Py=mR7j>!ImLn9Bo`Fyk_Z{{(Yfq3Z&#aIE&*F&SG>p-7QE9sSFP&j zNjH6ebDTj<-f@y*-A^qO^f0Hj`90LnngB}A!_^h#?GFN+;TnNJ8B+kE3ad#b+#JOC zn#jZ0#(0Y#+vL6mFxY@6Xj_0^J;T!wS;7k@81_iuFnT#jwoeoahL>ZKQ2s3KxUHCQ z@PUYqg_9vQ!qGC;02l^ie8D%zIG;D(kRV`g2O5P{8ZL#XgZE~r43M|g{M`{t6&MIY z=K;<9Z@|I`wL4&;4_k?&u8znUJsKa%-rjx`Sj}m{0EI}{uq!_1#~aJxwTyKVh>L<{ z08mOjVjyiU2qcUK3K8H8VY8C49`}bx29V%i^*kEv;Ww18l>xYZurQUw|9WQy5-E-c z3UeXM$-3Ds%XDXuDChua5(MkABmc(~5nb|ohzD>oX1|As0uAp7{kJ9HxNcTzd0o}c z#?gpqjF3D`7l!qloxNm=NlT5gO g8^VqGKO4LDAXi?}tBJUcGZ65yGPgCOns_Dt2mYRQ9{>OV literal 0 HcmV?d00001 diff --git a/resources/images/model_zoo.png b/resources/images/model_zoo.png new file mode 100644 index 0000000000000000000000000000000000000000..5f8e8308a76b64581e4185668d7e07648a37c918 GIT binary patch literal 81462 zcmYhiWmH_j5-ki2F!&H$2X_tb?(VJuf@>f^a0u=Y+&#hFHNoB8-5K2B}Ti>j8 zeoU`();V2W)wR3!u5cA4X;dTvBq%5-R9P7bH7F?Pxc^)T@E;@oU0kCde@v~##Z_#@ zrNtfX99`6%OwBALtiL(gnR=?QLqXBTo0*uX$uiOonwpxJ4306-A-TA#g@r|_nfMKM zqy210`GSHJKGX*n0}U-CLWROgfbHGZ-;I#ZauPVoN5$D7+hEka@TC|s7g;5d!8%s} zRrzan+nn?j7^a?x93h8?ho|muw|A8QgWd)h#G!-XLmJEmd?^uMBosNNM0iHfQIv^e zkMjWxYRQbtz^3;+00!q}#W0kmzlfi~K|7?3ZU<)e$^}E2!BkQKeidr{^0zv82^`KJ zy@|qrlQy9(qW9RRIys`n!^cZ!XZ8O}l>hXNl!p<&Z=AF@{eqo!@%;Vc=kPOi!52Ox zS|rPW|I-%&92AtmRTLDO?>Lk=RDr>%0jYFM(E(dCF7WW{GVt(Q$RdJMeb)U_CZ1AY z8hH45ulIL9tq()xjCMoZ|GvL>;l94VyMI=>CWa3IctT;5S)6kYOg%$Ei9pFph-!F1 zpBe&l!IBL^M;-UDGGuu)=m^9=C(8nnXps;CpEO{9Q7eCvMBh5X+)$7IA|4E+*Ajs~ z;uML)!4RXTioP-0cI<67|?$rV;rvy{{6o}=pVTRBBde!?~aQ>z#P<7%&OLB&HsB3r@w~%|DI0*uEzt@ zJ1Hv1Qmv=*CE2|0>~yP50vHXNRnj?asn3=hLr!)K8ooa@z4PH!NwJbkqw8oGwOe#pp&)9OU?Q-W<)8^mt>N{>SJ^KrU>MT`~Y6+ur*~-Z()T z>}ei7kY^EEIc@gbQ8+TsFTViYCbI(eDbhU1p4I`zzrej}e#Fel`xRBB`FTdw&?;Tk z&}n#q`S3{W(5UmoNNEy-R$0l1EnM!FEJrL-45{r`8V#8Yga5m%-NpL>V>VD}{O*+o zPCalgJJH<3!$}M$f;bgzH^~*Chu?O*r%p$$x6E=gG6NxYoVXKTpy*J^mm*kl9e1Zw zm@`?_jl6ZJ1)iimzwP?ey7bllxZS4TecpKQ%=Fj}zujgq`QOYqL3Uh#!Pol@mXaLb zR)tKSnsu{D%apC!*C|Wc3o@V#MOH+cO;HRMACk!if)@(jW^eyUt2&Pemr407j0use zw+J14nr5xXx?S4CA<1r1o5L?j_6>52v;BW0Z`G@=Gm6tSW&?T}n*UoG>b(F(Uno(i zfwR03macZ!K6Gi*NJCJ39*rTXAw0^~q6(}xA%_Ss6%~cc1eWoPHz8L8viqo)){KPz z4!N+H&3V_x>3pSryi)A)echeccT5tzUu6esKmKQ>=s|XB;Kf29`f4rT-Y@mt3BR{* z#3k;Bf+}}BJ1f&(2LNx4YSnHQA{ml^jVi%kZxm*v1G$!<&7W?LTpqSUuPwS0W^R3Fl?L8I zQC=S;r<<+6iCgrL=)%9iisMhnd#iNQZZ|In^f}h1-!TmA7?#NJmmX)MpM=KWHZ#3s>=EDH%C5$xdVObG6x(& zBL;%>0w)o#dwQ-u$MxD(eo+aBh9kGRj`oH?t-=>2`Br>+aTK_A_dI@LIq|RMVBCBw zGkz^K@Z3wH($V~%Q}>?+DB3{vu!G74Z%^wclD(HL4@KEt7`U(Yy2u}FohE)W=0^4^ z(EkcwKn;A8>xd3sQNdI85BClW3n0HM5)Jbn-+A0brs^*V3VR##xV83t3C&5gKt7;m zGp}sfC+PIhfzb;7-?R$Bi{|j?0+u{ahXs4*Z5PZfwHxaL!@$lP3ZM0WRv6V1H;x&&bXkSncWh^6CZnlDH?Cl5FR9Dl44W*lJ3cY*XHgArCy z){lK+uv-T-!+M-$P==e0{*g2l1j5hC^dOJN8M650qmFHIWq;?Bx7+x3Tl&X>5b$&t z#XcUlY{^d*So**HJ-Qc=(ThEp<-SfgK=e%kX&z$OB-%E-mGTg)JqBz!94*c9x(z36$ zS>4K#oaQ)FEYJ+wx%}S*V`2vBmhyvR`?jyZdp*i7-_xWyaL)D;Xe=|cUx5%yI^F^Q zl>t|qGHi|)B}SUpH(gbRtGn7@5>^mCUZ#i8YbW$-68ohenomH3rJtKnr`4{m4drcd zL5%U9rR$~Ez+)@;|K}e20nxrtJz_r{J2V-Qr+>?;^Fm$-i`)**Z!B=88%Fa2A*jqfj+Fa!T|L2^~EB5rg?;Mk~rqz4Zy~<1Gq-3F`nH$%$n1T6RAiH|NU<5Y9Zu|8(LrclGt}OGx(21I0^lvB^ zC`(RIP6p1TIlzm-9kL&3^duhLnuQ1ue~Hd#N*D)(4rx6nmq3i%<}}&--n(a?OTLb#lD)rfyc;4^xcu+MGXLkrO{*@E zQ6$1bEx(=XaB5eEj3p@#VokF>NuaR&MaF>#;8FZC9|04iB1w@Z1}Jp$B`{BWIUYxd zyj2u)AtmQ^aad5m$-v5KeNf}(abJ~R6y2zH-e;%9?!bV^xhcm)4%;r>>R&S!=AXgLwtX$P4 zD;d=9$PgqJ_)mkB-?A9cCEQ-(SFA+{Z=f#e|K5IUX1(g&vD2e=o>qooGL1s^HNNa~cggP2tN#}cM)v@oM7B0EoRVY9kOfGySxo~?e9AsR#gY3U z;dREAhsWE?bzkj4^v&}wzc1t(rM(&pdX0s#U1QN0*2~M3^-#^93!1)14Y4wpb|Cvj zPNBv8O90^0&!0lOc1{$kS)ngvv93~L0J)&ZdHTZ-(8S~;4Ec=jwV*tvvi3#q!cyou zO{0?SzsulJB?xiE7cugCb!*v);otoLd7%Q3ZotWnibv!cT2kX$Q$4%eEzZP)Gj^^Y z0_0)oM-zB~Qv?vgV-KD7&p(-QDM@-01t)-zYBy*D$7uACb3=v`H$DeOM}r6No57-$ z-W8KMmc9SyDI(w}8^v9q*md6-T^~Iu=$Klz<#ClP*@k;W257h> z;x9^iITOvnSpN?j8a_cUQZ@PP4rr7uqd$HCGbksQm&2|?23}9+n z`>xOFPwA+U_J6*?7GxJ^NvhLk^D&+X@;MZDSuk|(*y#btC*Q1FG(RDTw#ay@DEAmn zp{#-HRYmN^FR6P1+j`=;ttR)NiBtXgvE5Dn{L8T`)S^&!{WFMi8oKS_;UK~d3*Sxq zYF~2zl#lXzn#qyd`@cbdKMgp!McN(X-uT7hy@h-=ApGVW?Pl|VcwJXr^sP6%Y^|3n zKfHKOOY68Ub}tfl@CDy-xSejt;_jrN=wh8@){;U|L(5mHst5aXvS_mkW&qffXYW&( zU^d_@G$9roDl)u^9*F@Ag0J%Kf_H*?L5`^KDQt|?Jmj05KXG-=-)b@@`|q#R@`WSc zy_}1-9v)^tkOuWfN=!Iy-ig_)oc_8At)Oe%XqU#}xNeZnA*x@c_5lV504MfPJ-L@3 z5KC@*{tPKT^T;Fe#;y-Y$Xhv&=41*yH-tNUBfZ&qDgdv$pmg6VeF+J_w(xys| znzrb#tVN)iRE329Hqe<4A{_@DTuR6y3wbj2gV@4*H`dVyZooM~=puTdiZH|elLWDP zfA*2LIkcrHcpZ_2hHV~K5{M-5b3KjrhGSavhJDJ!3zVgJfDRd=NKKFKp}>HTAJ5{K zZ*o{Gblm7drvV90`~7g zS2r=hO9b$RHfR>Ew+@%^yNrl=AcIc=_6r%Q27R<;Ubp$c9OxknSHKc*DCPmmo)pdi zBt61k$6K*v{!d8}UWsk3(3BmA2u10c!F)p(ZYah9U&nVKT+0WVQ^ku06GKw{T8g&d)*hQ1`0uOkjEd&#yHul!&URuAuO%$zuV$Bu?V*qr3G z*I)7$0|gV)sTBb{)z@Nzm#D+tpz zYGjCVl{pd@o;55O8F*mji1j~fd=vw!`9Pcc;>|z$&Sql$UUmd7dAGwTc+?v|PLR_D z(x9HYd5|w8XgW@oAGJ;C&p>*W_a1{AJZLnX@8NGw(cXN|*PdzE-5wIBVsBPe5u|#L zHbRLss2yn&MU9Al(xN@6v{{Lu9&(Ha+VMwZ90;zf3PI4PU2z_(lRfT$N*E0&6Mu2m z$@T>htZCB`XGc`(ARPjEJggtqjp#-=(^Q}=jw)^NBw)+Zaw)?S~t2^6wwMG3h z&ES=Enk?6ZO%G%5%f5Ct>qDlnIiw2oj3S`#$Cl{=;rU1yEfN@&*%vJbVC$t?D&zC> z^^UgF+7ae<)JRE#&%ev*Itm!j@h)2?d@3n%iFbZGuH|jiAD$5Z{bvem+b?xO+YM!B zioYwtc0(Tf<&%5Xuyroh(q>KVWrk?KF%YcFD`IU8+cK9x^eloV5y$k{y8=&dT^0I$&wI9~S7wz`?xqD z)UO}oL8@QmcHdoA)k$ysc9pVM<;N5wNlf=+wH1wsUjOt**v-n3GHH6Oql*7)S82U#SndUJ(oS7vgI2 zijz&~@sG-nzVA%|$))>$Dn)xK#zq~#bedhGTY`~shq*g1Q6INJe$C4u?}i!7l7Oda z2CZm^nHF5VKg9AJTCb9Xq4`#^i@0Zz2nFKe5G?y*pDJGgUP2EDjrmc&C)?wnHuJv} zT`ATD$4aGm+zv#Jq9#xQW3NxQ$j)Z`v4mYb>9fxS}=q z=d&KdmutQ64!n`FY<%ansF)bTx6CBdX~tk@Db?r#I(Z?0N}X?2jgM^gme`@+u6m|pL#MT^+j_m28LR5(stl|=F5qFX9#Zs@gG;CW_{N{5Jd*SSC6?zTZ zXIz;#k|qcS4Sw&Qo2h0oTPQtK1=81KybfMp)XHZ;3E6YbH<4D9<$u zf0@D;$r+guI}YOYcJoBXB}xbJL&tj8)=fI{+U0f8N~Hxib^#raTHj#{4&g6;9E})k|LpdetW>m z4`8FKc;1g^R6ejA(`HOt!seMzhDX;V3X+Wz^y@#l8aA#>bOx=LPFx%>M|KA+ zCD^R8wk`p(=w}L{-;{H@o^I3 zMFd`aKI?javvpGn0-_5DjCMsWwA)MbZVf0~qo0KBXJOUE+tTsHoi&Ib=D0OREqf_W zN=QSKnf@`uA*SC=7dz4U#2KliGd}@!!iD4VXROcTKzfKmdMl=^zDXtJZx1v1nE)@M4&n~r} zm~RaFw^9V~MB#}2@&o=r{b-NSMiK!Khv6FQ#nCyHFTNrUQ@ctDFoh>7mHi2K5< zeMW|BH$|e0jb0x@fp`nFXAW&t>AF_`g-R<*vcKnTs{KIeIt_oXxV?ZPvg^)(DICsm z2w87L3lhp0D0B3Uh)|8e79%+-t}5`*)(u;5~B3daFhT zH>5VsjeM3}`@pAyF402H&O&M{+|Jixfycyet^^R7Wnw}U1rI;CI{VU_x2taF0b-i> zmjmIw`>yv66SlE7MbsZBharRxw5Z?;zS}r^G{J^)eJ_~h&*V7w>57*6{`#@B1pMpQ zVTHOUGN*{pnmUucVA=vYCn+T)SNeohxCV}Xdk(6V?n2=f4A|eeR&h>g?D~rJO_i|6cz@67{jrPXB9*TSR43wA3%5Z`9 z-knWgE<~8tzm5dii1|u1XlJ0{SB$l<-O2vhB2eHiZbi;Ah}h}U(ISIF7}KU1hT9QNi%qC8lWQ$!iDkJuMtuUsuY2Ik*#!UQhqBr^?pf-fCne%iszA7Jx>s_T^(X{ z=Z%oX{t9enui@7dF8v33iVJ^})ZJzGSS`+Hrb1ZGjAd=A!ZEVVxy&dcW4POtl-sNR zXG%T7fZ^&;udSJ8`=DDw0H&SNI5d^zHcdfQsT7);E6Z^SB=LH%DjEgziy5^)l6NAa z(qe$i&6tW)k1Z$1+`Tq@?k|S{DR-XBchxiI6Ej^*(HTc0zB0#$JYoXWw2OH%oj_D+ zr%9VetBc!)Au$mmv(syR?KoA+0-)j~jUJ7{Zo2Hejqg=h*H9QC^JJP{-*$9dq}yrd zZG|H?ed+4gGidK$Xe>DHr;G7E0uIIs$- z@p|KdDL_G{UR@nnA1>QQ7u(}0(R}e#a#!>TNvTLGf?$3`#BhMUVlLY2;h~f`^5VtN z#g#oU%*|D%i6KEu{xG&f|M1D};3<&_me(J7fW?Ui1O+zi1evzAuJEkGD`?9BciXe!3J6hW@5afyj+FLgns@ zvfIz@T=m8ZO-xY3GTnJJ>OMPY$5dTq!~!ATc{@=e-DV*|2w%;&V4&qwcXKeix3hwP zAS2tp1wMZRpW8W?DM?%^(PohMLs3(WXsufTw(XbPuaQza?m9Ma^ZrbvE3aGmQ-sdj zrtsCRmUf5e-~ASye2ITv>C7cgyd0O5ZPLQdHS@E`z&3Gwbin@Yjr`K_%+t&w{=4E zD?pd4&B~hJ`>V@l05I7kL)~;V^>b$Y&nB1a`UCupx{+G%Qi3Q z#w3De-Tlw>{uwk1Ug~gL<4?}HDcg(C`?a0lVj=DXTsn;gP;upEeKB4l z*W)sL1?IhGx{q}+hm_$}Cim<9VddR=EyadSQ{k)rx$^V|y50Yj4mqcohp=h*xEbNL zyF+_gxslyYxL~%;@sljZ{Q5N(zwXaS9Q>ZkY0eu}n2VBSeO1~D=DfTYgm9#B?jxqX z?^~HoGo2?2KijQYcZHi^j&CZUfUs=ZM>wElpoT|j%MjLzpR6`0 zk}cM?r$i{92(kbMVrN&z74*TXhEJTzG}6rssrzvWJi;h zgj^aY2BrOOH9bEH6zx+`m#r7GKgnFOUr%mEuMr4Xm+oP$qQX@hiO9i|A{q)#8vi5JEOUu<2|<(H>uvy&&pY@coW%W{@%kZOwlXi*~bbJ&<_Qb$oU&>rXT@l|7e!B;17 zBekvhGr;W9a!!f)!@!TN2i5=WyZ$Mup*xY$;GDtRxiVr6A}(ui!*|HyQ#YnPgjM=_ zv+wiPVa2WD0LaLdkN@*$A~Z(p7Z1v=wknhZm&}mgrw;`vQ;qjg*oGxrWEnbKB2A%> zVnVQz?@j@`fy8T#E9&3*cWhGZ+GG={FNyu<*0>lxX$LDV1Dat5PW9Tae-_C@rWunR z-{-1)sXUgd^Q@x*gNrq0@-BQg_*T()D&PIfYhZR~q?THudXG8+FhjmNcIDU6A*fvN zHa4Mahu~XI5_B;;4#ucy3)|N_Xia+Lm;=p;s|E5=U5m@Av6mBkQ`B{Gd9TX46V~)J zgqv^S{FHW>p~BbVI)qrDlk+N3T4>fsVB%!m^t5KKtb~m^Z3I;zb@d8*S)`b@i7VoikzmUrbyDh@Uw4 z7sMYMV3)*Y3=;TYDg!kUSuIG)F1NWjnX$5e9Mkch)z83gr1Wry5+uy?_;3Cr_q5sg zCi3a`%P1%3kQHP-F4CP=#8kr$qQ|Bdq#GKPoDYQRuOp?RY>L+lCG&%|VrquknXib{ zoxz4Xb&5w90WV?+Z$hfUwHU;gt*SD!;>es&h-8?L$Q++~aJ^_vWC9moLdl~{U6AW# z@J3NH|Ax)Es00O1eD!7xefbS|h*N$kgabC8Ep$$NpDtb?g-U~vqS{*6qakj{i2{d8 zPPW^K{;@R6rgPaM4lGt>4^KND7m(($U|U&S3!BhBtI7`iBuzQ`C1(j5h^C>fo>(bn zHx2!c_IO$13AX&DNO%7Wq*jYl``4EoOZGYGYv@iVV-^sN!p*K4ezDnkCtv4`n*wB_ zbrvrT9)bq2*FaReWgfz?aI|}|d8wounXt38 zY1T7b5m>>SY`&5hG3*{^tcS2KeiJtF`)}mNByFQ37hkz zBl#cg@R4Ovhugs1TtuVp_AVN}X_Z!oRb_Mnm#`Aw6luMY#A5RRazmEWe0x19B+prB zf|nL>mC-aweFzXw$e7B|oiD8Ri~2^oO2EpmI~`BFneW&2O}^h-?34#Go6Y-yp?LqGgc}Iv1ryRB$ZbMnq8-_!8dNAa}jUz|-1+V~G8G*p*vt|NR*{oF? zn}_}DW$IlyjnyHY+EWd}UgO=t2|Za{3L_=2p~Y!&iuKz2!E+J6IaGoLW8j@2$Z+ z&Z=toZi50MV!Lr&`!Dp@u)o4}ZRH{lJs`IdMG728_O`Es*#y&|>7n@)lwlESc~;>$ z5OQCA-Au4_(LZxSkKc)qHZycxNeRm4is*>7s7@)MZ4KiCYz|p;Ctz%>qlkgYmfM@i z-c(r{?bi0|KTepKZL{^)lyN0a+wNh`g1FK(bgVmynn!5Beck4zq& zD>Ja<+RQzc-3#r(1fdwc4(-T2?YCTKm{TVKQyz;zG{4CUKMerJRjOg`XKEx?W-6jc6lCJsTDrcxaujAhm_kW~a z3=uGAFeO1p^`{fm7!7?bXjcUm0)wjX{-2sFk)pC*T50t8%fT*4_M zdyXOe#^Z@s2v?@KixgG5l*g_p4 zG8ovJucm!L9Hx%ZzM5#?3xcV4HMKI%UyH)C872N6*L!rg(v`?Fw@6%J=Hs04jTl)P zCFyC*j>Uu~j}C>ncTskgRe5>`(j|TwK#3V!=817RGQr^?;@*a-dd}sft7Q0>LCk|F z-W481(2w;(A9e;)9(SZko|hH&J88Ug&xj>3p$2;PxFacKGoq0r(w|{5LSX;>5&D|_=^vHCgc2Zy!@-ox@Mrj zmJ#M~OcjM1$*F*jx7KwJs%X*?a7+&0<%-*NDVe;w{j#6%YKP=~ zQK$6m>sEm2zmQ9~o(FGx`yvR02$K#H&#Q1^r2nz|s6C6`T14LTQE-_f=i9X{vg(S@ zC0kLgNqb9DMEBb$^47re60#ApKRl|Zm$L^15kogyJ)}3GLDjjuGEjziV}p!(ee^K)rSO5wx0mfcn^;=(I+(Bx8ksydq29F zA*KMBQ>6C-SYk4gaRj|;2{}<2QzrgfG}e1e8YRXg?jjhW1kAUNxn`so zGq?J1ELSM#T;WrhOnzjjTI>PxN9-GkghZ-e-v~$Fd6<){m9;7GqjbT~Y<|VygfSWH zgcF1~UC6Ij_@Thv@uA>@^e@qzT;zY`9|J?k=rA`my;zZtP-E8$bNT^^kY} z`xCJo<*VM?M~yZv2qdKAGyrYPVE>=}TJ2eE>TjC*qE11))IZ5Idlk-p@bDBXX2rf| zMiDIgaV{B%OM#OI0zfmI-=^zooWa+Yp|vu_OIl;=9 z!an*B1d`2b<^z}WS}GDQ%YZ+kLH)8V@Z~`Vd8Yb^o|JSRVmNp8KzQ>jSqDCwGi|b7 zn9ke0oOu>~oFXWEn z&$5%Yk*7t_ace&`?C=$b`!G-aAk0s@m$)&4e6lDk$jiZ`$g)vW4@nayjHr<-@?4TN z5LN-+eEup`4B4r7N31D6WtW~fe4#Ms>i8JM+X!60DPi}d={^h42rY-F1R~o_O8CwW z=Y->FY`tv?9YezxpdxLwk?wHkaozpI=#I#d+cK6xOxv7rw1Da|31IU2RG%^Y*0%BL za3tzx>2pvp!3?zBG2v*m$<6DQG@^STTUJtnawJ<4suU18fr)C~U*u8d`|8~9*xTJd zBy!a?GUP5$-(sloXZ0zkzsQ8t8LOK6S*+6(>G>P<*otf18!#)K{hAA>SVY_U8Um$d z+AVVVIN@i+dX3an2I-*jzQe1lujb_NlQOkW9 zVjKIT0-vHX-Z8Aj^QM5jsM$@gqjhN9P)o#0u$(Jby1oO$+~@c~LMC8~->}2RUzv#* z&u~JJ1Tb55n!D2{i~YR1G};Sa?iqXjPRSsm1c(G2EZe} znkAVzS04p~u z-zSrKvPnd>08b8ASL!>}&A4i9s7GS0!F(tRGcE4fsv87IcB;C?Vx3nyw1c24e8*+# z+a`CJ3BsZrPaF?^5ugh8p>HIlU*itM@7`0&i@n1|mh;SFT>@6esV{Rik>Tn=7cYzM z`%G>mEfwLujR2ku)_4{e&AnLrzFcq3W;08hE!**Wx01OMvtswq&u(`%nFlZq88IfX zTd2#iYbarYw;3?X3IJ>g$$C}b_~b$5vtc};s!*uN5h?uILUxFgrHkj4_#=xwK=Vlo zo;(*6#f&CEH)SNO1*h^&%FezghIcZrq?*FkGF*7EqH|2IAxX%Kh$0c($)x(SOC$UT zK5jQ!k>>j8q>B5Y;;LQiQ2IZVC0Z$W*GCKzc}(rztvD)=svG+%w4&e<+3VjgI~8ct zYJN(c%&n-&zsz4_x27iG9Hw(T+pyv=c7+unVa2Xd7C@ps;!c7phFz|GQ=s85O$=13 zD@VGO7Q)nuvl-uvJNpHz`_Am)qAwnBxu{EaJw!8X31gscYTFD7t5qOW8T)Z)ftuNM zdII@CgaN_BV($sOGln5&>t~^sa(~xFp)Z_GOr=NBduVuf3cngkpb8xB=q-%W7&-HU zdA!f%;ku>22bTzKb)7{cx$mWI*m=J6v>HbpJ$Q z^vbtWt3z}dDUz3YU6h`R9a+5B9`sg=%+0gHs11Hq^gX_CwZ8I1%-Jat6o-nb^p{`C zoQThwsmR>7^pv~Jg5`o&V430DiM?*tYwV6ZflmT#dGG~#>gKnDyR^&?&7q?7wwI#| zd89?lD`|)|(Cn$PG5R#z>mC%q*q5R;9;(2mgjpSun;3 z8b^z)dsB5p9U~5V{sEoX_2DZFVG4VP;P=AO46g8^{CF?%sQH8XsqQKNGi`KO78zOX z+{_F`gKLE0LH;cF8@`utWPHrz6B?|*W7%Ti0c-F(8vPv{>P;wWUJ2u}Ig}PRY%oz` z+;yk|EvR`3=oQ5>B{7K%_y>hca#N0SI(UdKzTKR>Dc9sZwK@$Ih)?)F4h;(!Q1*LL zU;1%U!OOl|*>+AZh!AU2Inp^X8C0k_0Cr+{g~IKL{mp;11xOYxlnElk{NT!DC&m1B zuYIzsilCW&G4qQtEkaxkJhV3MF+X z_T^^LV@*004RRUDK%uwG*pW5&jV#{gvC+;NlI>gRHATidY0K-fcO(5E3Ki#ZIqUR~>7 zNithtQkg_u5y8q7*JRKdEg4A@UTP@g0^Cv$JRT)Q0GiVbZ1WL!dUfBS2``;^+_m`l z=v3@*>PlAtBj$Oq(1(Opbv4&(R8si;<$9qtN^AzF;v4{f#%~cg>SNnJI(Zw#5MCe6;aX?YYed(b1@z>BAF8=jOgw zOo((pd?>X_IsBtK-f{4~Us|Ti8#<5AJ1QK+=cES5`%rIFtkt-g2wbC+`gK9Vx>;RX z!h_H=60hSihLsz#nP_B?=4NmAvR_qI?9*Mor{yef(;R89Ok8^2`Pp?dnY`0Tr$})I(5x8Dj|2pJm~>!w|GWO^sW7hWdix=Ct+$dD z+J7te2RhsPYN)6#W-oGoMBeW$o=eQ$m6~q zq}uReQo@{s&vQI+`v)d*hl6TK0><=32^rH=cN{ls)I`d$25Pp)(M!7q{>^}E$vSB1 zQzZL7$391ySv*U-wAm4|(%k5=@=7hAbXIOzbyszs)<8rbvU##mz^=8p@^YLW}}1cruVw&nd@I+4VUYW{*&w#TVF}9Aa05DL$$MLf+Le4^Yi-T z-;aNV5hd|@rB1m?mdw#GYwwu$O8p^@zc6W>SZQL2(S7}g3UttD<*-IFcL#)W?}V4a;I$SBx|5* zs0K*C{8WmOs_+Xp?kQq#m&lxvXrjGQ!~S9N6=*?E9#H=;;OAYBlwuw($B@H98ee|0 z=va4U5*xl0tnao8M?Cewq=wO^OkZ^|I;U;SBu(V}(}dn3n}FNCc!OG4TQ}#0BskY{Lx)6S0g7CuwQw;6jK;&cr4hO?h%ah9uR#rJ1N6d z7&J;`x4*&oHCspX1$uHQp;Jtd@QEkGgeYP7*&W;cYZZm|e6c{yCZEJ*^FTDtAM=>{ z%=UUn`&6Z7kbGXTg9k!EAdJTOm25@uFjVnECNmYxw0Y0As4}Sb*Sh~kq)U@v4}qMy zcX3Q4=}WEV?^jWHyU*W;%?$G}BK(=kEP3ssU*$UHuIDlLVx_m*(47s~uQ%Lq_?$uW{J^Iasq2y29A82xD&M9S zV)>3~dnj=j?Wf~rb3+fw4nHKI4c-0%XWr9&wXdIZyt z3mzwHstf?QG;V_7{IM#mVJYx6eMyOIKNo6Y&;4Bm_gCP$ebbkrfZd#z?uorIX+K*w zx5&BFnkcWcbgS@9;;*bDfcq)GySkhtN|WHAHKk685F$S=5NnLcrODE3@h$7SK7M5( zgKKDOAaLk7xP_{JtV)sYpSUXUAh1DeRNrwH(8#rp*||27Q>dse zM2F+}rxTI6q_l}Nw+K0fu-7lLMY?)b3NjMJrFJ`~6Ifn>2^y+ja4`ni2e!NWFLWTU z{C}cR#XkX=E@OwV?gUkGqRQuiIX9fQt{7LlFU7!hMwrN>vJ&Q7*r+|^gWlmXa{ zQs0EMNEPh5`&|qG#VcG2W?c@=FU}2cR0B(W-DB&x6af!X3woDDJ-xasraCAK-fBGd zfPA|#Wiu7Fy%=<~sExU(KiR_j9xqgp?7$}@3x5cc%-haBxcAZ*>f-=6u!xE>M|wf5 zP^;?&LC~P@!5ZaSV?!v%JZT+TcYm0aBb@_{2?=pcn ztvAU9%w6Kk_ovF5o?KxO(wAhay;YI#GM7` zq)`!4_By%2Y-D8|djhecx(WhrmVCJkW}khl7%hq-fNeu8J1$vLq4uVc+gU%`mY;0# zZQ_u!2UAc}pSFY8u0p;=>ok3c3KNRtQ<9(j#JE_45+^^-6f^Bq>n-07;9WG$PmTjp zjDCc(Ab;sIWSn{Xi_8M)ttNA+Zhu~$<|hy##R8dD6FL1Sr!u!?eQ&v0c`uMGkT{vU z!;9v)+m!coJmh6`T4eXYNV~+Vxs5-Jo&`uq9q^n;^h$KWsHV+7M);q))V{%bL9|0d z5@@^4)#Od%j-)pS8OjRN9J>dfo`jO{Rdh>7a;9)@9GXf4D=J|RG2SCSx_3t}n3GyC zvniOlJeI6{{KC;Tl0+s47pnhyNszQ3GyCAHkj&06N^u7})^{gHW%eVE=IBY2xat6_ znua(?Ha*Cex`R4!GNB%M!pT0h!?zdn7}*8I5h94|FIglxRrh$mp*tpcGAZhep5@R5n|VQxcca$ z+#e~J6R8yvw5^5cekl(QUeZn#4P6;P)eUyoLC=y^{1#BE49 zGa$gk?{|vfHKkG`piiB|C09Qpvo3|IBlL`naau4%$oQE`75a1QsonaM!FGww#Ugcz zK4#g#tu+7GcH zU*sAk?OEV&s=AUvE1wnbsd{JpA_yGCYPQY>qGiV8WdIiQf&4B9uy#%%N2HgDbmwq0@@H|z3S0FMJyBjnWbSIr}kS`jg+0tc%(8V zD0F3_hb%E`8Ez$ZIp;J8Lj&E7ZOTO6gW=&4-acNTU30WCLY;K1t{n59mcd$4zHe5o zsbe1-6FX1YHgLG{rG{+GAbs<`t111mw)zPpsm4B5KJ*ADSfBb<<4bImop zDxp6znkxIwJ%B0rn6T2XUq1@L%qv_yt}Qhp3Nn-c06+jqL_t*a7`)>vkT3uT{lW_` zjAGii?ATofpLmhJIjpxq8pZ1--sRDnqZOk+qzC2z7r5x4Nhq-3W5Y)V-3a`6py_!M z5Ugb0(Q+6WASdI`8#ln7LVxTH>^86h(5dB;{Q#*D)XR3R%eevMziu=^LcGU7X|#)| zg3m|^RBgsz51m^>Jcw)UhBm;ajl5GP-FH1ZxM{L2mN0nWfd}}3nH>IZ^H_}Y#1nlX zLl?H&$zPS7KYCsgooq$Da4dCCnC7t$-@p~fBgKPsx(9rypZ=L2tp3JFa+7CSZYfs@ zz?$^egO8b5wKS>S!Pbn=3T69&PW8|{>@xe!W*Xs^TYa3~>mU5sN2;|y>RrNX!R$6a zCg-5u+HYd;fe+m6&TjS-ukHK3i}#fkON;k|K-9L^K~;=VEQai9N}gMJFk`li@os~9z`U`u-hqC7sA3W@GGNrWJ*Xp zmQjBFEBK8DJWg2XPe9WNy>rGrEw4ZL3(mN5jR5E z?`?VO>A%VwkNlr3onQBjXsoq`la3Af0&8y7`zinRuYX-$ef3pNgurA5EM4+$(s~&{ zU;rM@oOIGjoQ+YRK~41|HNA%(Dm`}EU%GCyyR_)gO}mRXIAvgS_qJ|%6bd|L*3&82 zk-Wz_J6bavG@=F15+MnB?=`W9%#>GV=%r1_DXrOO-(kpYRisjQ3YjL1K;i(;^apw3 z1rhtHOVXX`r}?(Y@sw{gg>sO=&N7@5VWVDI&3|)@%z15uOn>2?(k!cFk7jUnfN8aO z!DxLa+@84Pl1q|UykUO)4<42dy$6`xv{TWnggFI%V7*pz%-};ydQ5{>0R+1o< zrsGUH;}3j+bN#dlgN+FwHDIol8R*eOURMkXR}ZT+Z}t^*WFZc@Xj3^~2&muw!ak}u z^Js(VRea_%YNKsvADT4ZqG@AUx_kw#GmvUmc)5QNqZ>*k^HcxxXiE_Z<}MaUd*Z+I&|ne629IB5NO9VzALV{Law^%D!$FQCX1T2=_u`V<_#`;@dY#_ zHalx$>D z=u2}EETLv~J!+maSMR<7O&4SQRZW>|ys5_E@j1~tk=oim=OGf|UD&E>MO50-5zhC^~7xKwZ zeliDF`FG&~^u;^wxP#wz!MbL6>l|sv9e3nFf_MG$6-U;lYP}91p$y9%eCu1^;u!>9 zFsjnV+e)z*5mhXM&gX?el_;jr>l^y!r>D_G-1KRmRXoU&IBY1Sm3L6W4-=I$3~$c+!bDf5@= zl}Tp5WmP`v0bPMZ*;9f!QBhU`(5r2Lv5L-554sE&&8giJ@4JatBOx1m3?i6PUi;!= zPE+OInFp|-BEIqR```aw?z!h4K3vD_D@^yrRY~otBlT(7AOlD!!Y43)al=pK=mmGn z6BFmj6O-oaZ-Gpjz527_;H~!S-vd|_69dNu7hLd;FIm^cD!kc>`>m0NeB>M%rc0GX zPysvz7taHRV1tBs!+^x-<1h_W7K}(i8pAj(3QmteW%511d(a|h1TzFT1Yo6&hvmSd z9Qrf$h60vIQk7t!MWU*D?01augRUN%D1bs3YOUymiCoK;$5yqX3>wx&T<(K(`~=Tfa-U(iSn zy=km0Sg;_$s?3J-K=z)IH$Tt)~mSWZU*DxfSV^Rs0alXQ4^OJ(3gfBqaJ z_!P-(Xoy1C0Goo}20OD0u^+4nmjr4?5tJ9QvG>6uw;y!<^j6gmPZ}gm8i(_8L95SS z|Ii!q`)8)Ap3L$*OWgou_EVhOuX&P(jkzj?m2jX@`|a?7zqkUN{^A2N>efqDSTUU# zCnF`oUbMJRpPQV2AB z*oKl-JFM5Mk9~U989bIOUnw^}@}^9fu}F5^s-1jTuTJnlJx_=2(3t~BvPnh8vB>F= zM^y^j%Gj(*ATn`r{G2rG4_X^a4B2(Z17z!C&XL#t^501?mes-9Z1~i1mEOGv#*ZJ* znQj<3@~@5Jg~QWMJB<(58=~dkFn~N;vZF=cp$y8n_r4+Tzn7F&g@3yT&@l5fV87#v zK%xi+AB#eWF^q!o0viJ&15_rVM5_$WWI@aXFdq>qLOg{b;N`*C^gD{SL zcQ)OB-#bYkn9o+`ReWgPt3wMpaNAC)9>{FjxurDK?`|z$;kKw+%Jr{rron{ex;Djo zIYs?J<0(6SmNi`Xm^^;L0sr^j^TV=0KSexcMDZPVd?gPL)bYiqdy;~9ZYQJU=;A2r3Jg}Y_ZfRrCIOLE+>fHTVvy=k6K?jg!Z|Ot1WySpN z#%;Su(=J;U+N<1V>4AqHdZ<*do;&Og(IkS~6qhYaZ2GACiak4~q>Z=e3{oIi!Z}xyke7_GhO_>t;;^x2y6r zY35>j#vEsI26StklQ2JH-cnhzqVSa~RUEFO;7fddxi~gFUYG?INdftQ5P6`(e!$ci z0@6SdWj}s4h^&&P`Z?o4$6YMX-&m}84d$cMsrw4}9wb@7G|d{db$n zJ?Gs@tKnZVabyY7qF;6OQF6#;T6p;3hf6iKq4n11RWk+oxO&jpwx?9_3a_Ny(sSO4DN|K zSRD7`^2*97P}6TBN9@>Dju}$-+1Pk}K7z_5dMfron5v)wgLKen&TRq=2=J1PlmG@T zNMk@RU{0X`N>E-J;NVsI^5Ra?ZSW!bBJ8~e(=C=yss@ncj&;glO}a=gzW5?1Cg34@ zJTpD;zyllfE%cDFK?jhfdIgfF7JYZki&cIz^#E2S$WwP6K#&k3Qh-5}@qhyy;1)tW zka*dGQG_G}AW$p4{_?>k1wCV@Jfp*aVoZq7%tl!W9vnNb&H%E#4hHR-i_BeO5}*u& zN^N>ztPL`F7X8_MyVpB-;A#NVgIm>Hxj@!wdv=#6C(M(X3znOWQMpVJEKj}xK7JVu zvZT|!SKsPM8XHhicce4->n$e?>tThO?JZB~qK7H@Kmft+ZRS``L}oWAa@!M6L1(fkeF`iuWmC3e#AuYdhZ?!EV3-g*hDYTs}V;Px^;QI0PY)~0^_ z`te&YANtUTYEku;H|PM;<)fENhl4&VOY~b4OD8`sEe96UI_tHj0&W%_z!^=RI`kQc z2qWnJh!6N-ffzjcKprs!du&r6VL3LWD&v&LLmc-pIK(Zl;KbgzRi_4W-a>IC;@n&F zmdluFi)EYc#ce0tuG+I-JGp1{Tyo$G!V1t(yn%rAWLBhH3Vgv5EDt1`Zr3 zc&QL~4lpwoUo6CJ=)L#ed#%Ft+-bw6rk%HtHoF`posPX&n)TRv!z!z!zvmvn*-ieA z58gg+(N^#4YtV=*hCIk2Zv#O-cxdpzKYs{*nnBVaL;5Hyh&UrJ^28fYPM-o<&Zo2w z=v7E<+N)n1+6De?ySFF>e$YPu#zL97(5@VIujlII`bVeABd^WV*;i)&s40u&^1CM2 zs*z!RZ5f|B2nV1ls0rdL120RCtlhER0R$Ix>#Jtp2q``65gQ3QaM#x zHxJnMvS5(;!WX{KAi*1a0BMjF713tnfqeEA zD%H08R>i=_g`O|@s!CMgeCPRQF>`wb{xY3$R#t*%CChAZ!i$2T6F8*iO%ZIZA}M%!d7(}nu1%fWn0&8#2_13>WHt3& z4U81^B-R;13U|=9=1m$&*S0z4OIrNah=-oa= zhW&*}mb2pnmXOp)mdu?YvtN5!KS3}`=Dq%kt|mR11Bh8tcYED+*KtZOZpYvSIDBmn z3jy})*RL*d1)&G<@;2TB!JFt?Z(aW*g%~)nlJ?qCHGr(mLlu6VJb({K<*A9znUtGFP0%2=l9qXB@D%)7G&;it?F(-% z;xmoTt&1)(xL0d^DG={?l+!=Q%%Sm+fLT@;u=2DV?^4vU)AY9GZLyqM7Z9B5i;J=@ zWn6Q83lY+3r=vQwYEmYO68M!EwAAlrm8~=4$HwaPVH2$M3JX5XkUz-;&S7q!lanH+ zE%JJ-l-7=)2}0S~6%Y~{@gr4JN8T-Ok9xS?51kqJ>bF<~5a0U7@0Y*)B?%E2@ogfk zD2u_RN`(h@+if?_zQQNT@m4z4yvh@nAi}M|{{8z)$ByOc?KxfKi{28DYF~NaU3j2% z>+%)H@+Av3Txh^xurk36@|rCI4r7!D7z7t1$KHZY1RTQwj+FQVO&BrQkU40eJ`&HD zAvoj1tfGhXVft}H^nt%8+{QX$`*J6mqQU-^sX9oKeU-jMxLokeBY*pQYkiThmA>V^ zXjvXKtxkjc)V-B7QQsm*f(^B)Q|l(Fp5eltx(vzcQW2z$0`##~^{h74L=g>(@^(5dZU^|CB%e@sF$| zmOE)k+8_hSiurG=Zhd>VZRdvAP|<8g9%$RPJgaE=Vtw1%BMb);MkG|&fcBiCk-u>s zvBZcX_&d9?C|@4W7@#yTCE5&WdN_l$x(i+SV<6T1 zq*+U3hn~g2i8sZM+Od=TZY27FCfKB-=FlzMN?%=XDo=QxGF{&QhdFa}FsesMAsIEm_%i}Y&b;NETJlY93fJmgQFwMhQ=KK|giYC*pUf?Upo%Px?GU49qW&Z1<09k8lte?Sd+jR?R-ln}YY2J!{T()Sw zEZ1iv3ujKz{hC+xe&UHII4yOD9d?kfeeG)vULhHat*ojPf|)b~G5KKW$9=Qh6a zjc=4{f(PjrUwkp~CB$k98)N{P|M)F3_x>BCN&D{7tk-tZcJGf#t8MmOjR_TabMgRI z0?yNt1$dXjgBdc6B#b41CIG<2I~*ukNGb&}g+c%WT`7nWVFAms#;qd2kHlbxJxhcj zo+W#bnLJD1EZ4QF26ih4`Cx$z&c1R$nDC$uD)|5c^eKDj%EK!EfibgXsa^r>wpCmC z{0Dl6sI-4j@kprgj0JUXmtiffa}VB9{`{}$y3k=`Id(`F8LF%4)=hq!Nd>__R#CuK z%C@?=;?S+i%T{#PnO~O7Bj!^Fxd6PU69YoYeRK^(n&8)Z@S*kTu3(#?2e#a@yw=(LsS{XB#x#ur7OoT&(O1KR1sR;t zYaSE@QH_Tb>o^7y2210&vdeO!B*rZ;*?@wMJ7TEc%m38DV08n?OZqOyRJ~eoCKu31 z?tXce95|@GzARW=h2VC{Y5Fxl{8pD{>45Qz$MO9QO={3&y_JP461-rTCoHp$-Di`K!7R|4cIjdi1=6A90tA zf8_Tv?PZ&?8hWtD3vyTw>Z@Py zNaG~977m=rMNX6RMlds=@dJmSYDuO&sxSXx3T}Premp!+$pUZa|ItHl$vj=bx1kAd zs{ikU_8HmWS)?oU{^RADQC#ibd|5x5b;-;(`!Ybr23*)*IsyU56@=24=}bWBc>7`$PRmT458mVva*mBrma`csV$}v~nCEFf$nzYox!;Xg?cZlNvsCz$D zJWe>4MZJK8ZZo2puKe78|BuS>i+?LeUUjeZ+xNrLsBw9az>9L1Uw%1fY`yWu8(@Cl zq*x07>8GDwFLhq;1IWrH3uMKt?4UGiEX}%ay&7}n%%N<<@N4HO<5>sEA12Gl{U9u z^S6wcDo^X@pI4VK!|Uq1C(3ND09;ThtJMz~uQL5zRKFnhoT11_2>q(LG&rcNoSjuy zSm*+i9SdMA41IxM{g$-D$2FRdo*K2_6an>~)NP6u&JU4;)xEjHm+3%NxRd{>#>LuU*{`Yws z>)qCSz2CxGHg#;trlwu{N~5N6zqN#_3Y)11cHVjC(g$m%zVejx+-d);wiX0D4%j_= zj|$QGK^%D-JV33X31`I~Oh6L@3-Xi)oJIB$UCskS()yNXxe%ZUbUOK}PQsd2i~Ze? z#7!X}HfggLt&pp9U8|jR5UiB@nm%KBa@<_53#Q6wtA1BH z>6@!pF<<}lI5||8Ysa&W0bN_^mjfHgo3oe5%lgvalRD_UsxKpU)ak}8^3Yq)4nN#4n8@=`t)van$trMO`?B{w5XTS23_n!pba9<<9=3zV$6R`|Ps?UmrZ~xZ`A( zU3QTkJ$gufJFi8H77beQc)foUeg2cT%d9_rJNduW;KQZs$FEN1Dzo`};O3ie<{bQ} zDeg}ncIj>DAvrs}#tZHlb&L|_A|RCm1t<((EQ&~eLr?(Yg$MsKE`pO@p?JUp1oQ|@ zgY=~m;A&>#?A+@eI0`T+ z&SC+am2uh>T`0Ly!b<1 z6aebQb7shh8!wQlFFs6?*dTcR<~P6L`y&C{9QR-R;ukrIU~P572fOe#`r6V4xB{^U z=%KiI_534T@xIyPjo>^@AM>;0}nh&_WsOwrHM|-HNN1dQH3m658X+=?@?|0eV9Y!{$YU$vkQC9fBXJ$AX zn9Bk@WY>|yI0zY<0SEPa8M~C6u&~!Ci!wCYU_n7;^+_+aBt=@v=#V$J0H{BdVF0pD zh~@Q>!cM{o0#Sjm+K`l+;9scAj^MV{3Vm@jPdk6~JQ;e@g-(l|4jvR}LQDDFXmZ3r z<9v<}os^aF6mT+glVDo0$fJ%s9Cwbi(ka3tZ@x?sHaIO?woE?qk&noI_ubc^rOXR0 zn@$d&vBhtFrS*ObD-2tyFZwK-K2Dl-8z7BamR~LlnrgrKdH^pd;a+Q`UPBI$gD$>V zi|D?!0gjQ1q|wQP5Bq?{K8KMm0-OdD#uKWr&=h!<3NXcYvQ()Pf#MGmT+6l1VeuL(#7)JPd_UQr;Yg6nV@*o6NO^%$bAI?D>dLp9f5-li5>ZIU10^E_azJSDnVr%yZ0AP@06 z%0UfM=r#Kc7Vy>Ub)gx5!9y-ofhpOg3oXXb#_IE{sC8J?`q=5&0LiRlK zJM8#?6@h`J)xeEAr%icA3Eg<$0Gt8X?{`~_@L&?O9;93&gQ!;CXqv3ycrTf);JXgun{JHUR@BL*%%d3 z@)TZk_8gz(T{+b-5B>~4?O*rE=-Z1cP0;k;?>IT&oa;bITA0F=@gW;>U=OPhFs~1I zKofM3XO%6(fw$vkv>AJuKN$i^8Lh(cOk~WRKcShix$Lsb>MZpUxJ`9GY0@OYf{Hl# z#B_VCg@sjj@ew$DCioA3_=6zsU@bAcY`L~n4Ipdt(7zdv7n|<9^G<##4EJL5ulub} zQ_y(uun+hHO@Fxem<0gZNn*hUB9upwMw#Y`v@ zXk3mb9jURgWMs4-fK+dXqh?7jISN}@Bp!TT(oCva02LMXfe`uPSmbnq$E~)LW4?2zJ}}pHt2~*Xw4nAd?>2-f1_LqdFs7sn&;YI`Umre) zL8yy`utJAI=@-EC%J#8q&ywk*9y0@Ui*=!JE35WOIuUGw84QZw{N^|1*kg~CJMOrn zG<6r%LCir+z|tmDr%qjK4XXiUtuET|1h^%IV0+U|H_4-qKDyx*)@bO`r3)V(o^i$* ztNqYS4NHTHnAw7X3Rg{$`tI`~+2`{=w4k89*E+BTiTVOq@Nu67f>MA`B+3yC5XckW zd58;PfTf6Hz@Vob@FV2l-w6>ggC}4FxZn^+e%J%C(8GOg2RZNlKN~Q41S}l z_sRpZnTq;^{e^CmoZFw{k_1&_G=*J7J(bFwYuZS%N?!ThYFhwdkIylk`E zq0%sNS6yW?s9D9pEwQVvx=OJ1HC_V5didt70bieR+&{(+sCk%t-ydY=#DB6wTF1pW zTR!_mJ_d&1O|U8s`4}KHjggMLK2Q|=rYFg91O}C2FoPWhUGO3fzKDs#;G}xumKAV5 zpO_6UAeRXmmF5_i>v`i>nY5{(i2*Lb3zXYs;>cgIAEdNN>j?PxkpqTl04$GTPQYkd zCX@UM#LI&D>bwO z*c+^aQl|EkRV$att1~9cyva|=+!r60Svq*noAkJ>S>1T&*W3>5$cb&8vH2P<1RRW`V`HE*>eApq7!JiC>qQP?4p7WcfKMid<-p}h zRFc(M0+jR6%PPj?WL2DO3i&h#ncRK?F+>_$tT+lXDz+hg{rXYPs0gUhzf@jTkZg2A zm?xd%JWcyxKo&q7HYOboU_MU=pX)((!jC^L3nxzqj5LfGC&ypV$N#h(tIPequ1QC5$$Dr%QINl?SiU);m4nuAN&%PzZ&%loRh=H{{Lqxy2; z=)+Ezecq1;>S0&4BU>4G_U(mDw8~@DpiwKlyc|e0Z&-zwa8VDAF9~otERb>o=V=Z? z3NYD*NNA;jO$q0ddYRv3=WUY8L4KNFvU$Ds7YZQ3EKNqO!+uaYFQTeV=@w!pE3tUx z0=3}93G(<)J^@q;$9(m-GH9pqQRP&_a!{Z9ET~mLx%^2P;jB+J3~`l?CP0Ljf#Ry^ zTpwj9RWMS0S*{8rL9TDf>{sOWuNB;OKBZAfj_gjqzuSojBx13@}9o-Xs+I3)4sud=Wd)xh(<<@YL@1Qi;RM6{57RlibwT z*nWt?RweVEz2B7Raq#Yk%28ju-q@TE8PHOy#z{7TK0zqdYy+KzygZR+Y{E>YG8M3F za#9fncUdS;=%y`>2c6?U8G6X2dO-Tv_5UR=-ScaLaKMA~C!c&W898#K%%4A>x9f1#fpGH4CztL#;MD-!=B|cTn?RPo^q9>2@AoA69k&^)6UdDz z|3*z<6$fquJ@Ld7HLIY{@SSAWx1TG+^*Kg6|29E5bl$m&4hS)Dai2h#%Q_-Dmp>D* z-U<3jC#mvlr%_*$MKfQPr*Hmmne_Kx*Sy7rTUj_m!Agz|eYa%k(xtNJo_osEPd{x2 z>9JjhIyvOC*UA9hD=QCCe!{G%X<&$Y01YNg{F5F7ObqDYab{;%8UU2rapYHCLKSBD zs8b^fTBFMJD8R*76=#H@C*%gN<-t{!4HIa;6S90RJj&`QqYK5!0`OBvR(=JbK{LA` z1A1~@CAlf1bo#eurGA@a=44s1Xtv<9#A{Y9m-cn`Dcy%S4IEQdDG>N3*J(?3c2&nI|c8D zyq-gI_rj`{5nnQl$tB0N+61zA!k=aC4PQvI>vuX#`knFJlz*co@DTcvOD^G(IjpO$ z2HZ2=q=B_{1kiPkzvK9+#b@9X&>`|`o!$dMx(;noFs^wCGfzf=COEY+<^niH@|;;|R~$(4DX!j3qJQ6W)I#ML^6j=<;XreIM=RXoe;vXIW7 zJw@7hij#y?mq@5##7A161^nxp&&jlZ-3qf9-ucdVzV08W<5>KzR=N=8Rr><5RbHo{X2A(}|w0^GS z-r+T}LBu4W6CmV+3i5RN=sf_Ci&5&B=(OqOwCE_Lb$(AXQaxeHqUn?5U%FRNb>~vh zVVyXZ9b@mH4GDkv!yn}BZ-2YmQ`{=+(5Z_YaPE&};J5<|Q8c2k1x7IkxRxgkAZ{4}vE5;($=fgg zhm1MuJ;me^&QABY4ib)SlLZ`IYtMY!JC!j79dJn7ujm++s{lE z%7VWzT&YvqMnvO>nO?{!N#aw0@far3iYU^*>VO&f*LuUtG|p84aq#$ zb)D;F*!UA@v&jzqG{g$-2sfX_U`Yr7y>8>^X^@7^q;X!tT?^_5f`f-wI#qthQx>Vo z;XFm%P_FVC1UaR{l@sLYukZGIOMTyB99y<*nf&#ye>K9kBWs|8-p~GOYt+T7_y0;( zWtY9(_B}n(s4RNc7H*&Mhfc7O-vQ`I!Efo}tPdp78A+~$$3JA^#EEjk2`4mqtF=U!W$?a9_wL>0w%cwiplajv$M2F?^}hRn z9mh)t+;3M~l5Jkjm{eJBr~|7y%89a>P_L(ihb)RzG=LBo>_+6!vMgt^Iwu!xN_7Oh z2O|YW;|&_H68X_-U-P8TMSQrq*;DPDyN$g!f8CUyA8ShATA~wvneFac=T3z z^1AbMHPxdkqqq_bJYp=z?KxeJA5( zQUxrXJ5}bt_*lZ=Dg+;4Y715at#%7*+W&i-teQ0`)$72I|4KRyE|!QT#8#TX%SL!u zUVYv%P?t6y_L-}szE}TZH55CQIxk0&q8pe)qcdoy=HO8{II0b2M=>baU}Ke8^aId4 zKjR92L8v+~Se5&B48YY53r!39+gf?nB~#_CfxDPZc&0!3iRgw;&wPG zX_3bBtY?1zO_}kpTLM*zJ9qCRqYgTWOUMW5OR9KJC+dZDsMJw6%U_+ZFPpBCdi}me z++KySpwk5YQ#}Nn{3;ZO!JH z$wW>I{(v(<8|FOwPkAuAjWu}iV0oi`N1YYZ#G%zDklDZdoUEEXSynHYA#0Y*mm%lh zE$!=@KbSOO-|GU5LB?jddwl|H4I`()0}6L zWs8ZZDmN;?i%Q)gl+uYn9mdsEQJ;A|q_ooKHbpw&Q2|b~X4Oi$?ehofs7tAace8!Z^XFOCXl@H*6I?#c6zJkjS%qp)-%sM(>hNSSUd6f4_zu_kA5#9a7?>U zw*dq0o6ek~v%!mzM$mNb%;E0ALwPaKIRpC;({?a^6B@Eg9X98OzM@N+Lbijpf4D6C0bAY_6LI7b2>D|-x;(JB$^DWobY@jDFo z_@)K<0e2BK?RPYBV9-!5zrnIXm+&iIohK81^2tWu>%iVsyT9|ZvdanQNPAsU?Iw^n zFC%mXj=FKhV3m}qbQ;)-I#9gTQM8rUU)6S2YGFV0SUG({2>+u2Jfyk@YqHEx;3j|W zBY9mxlT9>km+MdLNq^X{w`sroKAHc5-tP18+u#0{{u=o=a=b|D6*B686N^q4PMW;b5(H-*F*;qUzl@$q&!f{go0Hb=oJkHlRhCo5TUKuahM9Hs-DTiT z2g-nP2S{JNI_chTTOB8`4g`kzfZzoym&5QjxR|sO9u1*AX7FUjXV_c?9{f|DMc z0O3VEe3J(!PIzYmU*z0nmtFYj1T2BwT0`3evXL!|hsSt38!yO3XkXV!4*&c$GIWn4 z*pcDX$&cxk-WkAvd~{sLDUhPrff6pHTTGl$=`lJ?&}96La25WN{whC3tBJ|_FwJ1> ze89ESuhA`)`@Z`@nf=T|A%yYir=QL(TVg;fA=ijA}BFsIe2Z;-&N}NXUbWyM{N@aJHhaSj8@?k5 z`UJiWvcAwZfvm5FYT&0$o5qi_AdY}lD~EpiYJH>pSUMxUIglE|Vd0n-GwEl%Kw2I= zL1(}?f~LYx5MX(z!S5tVZ${n-7W%Uum^1(f-UNV2n$f=W5_qX073tswnCSCX< zCHDT>n1G#$@mbtj%b_<$UK@|qz|cJpmm|*mnLX>EPq}Mp!&>`V1BY)IENTz_1s5=y zs!tjZaN*}h=V}!0`~`r5lP(iZpUO`H3Vp8I`9loxRAurZG-SF0iwqIglO|0d;BmY~ zW;@cepLtOJdBsPNY*83AXb?Zr^wE!gw6sk~i-fL+8COgAIMjv)e2A_&J94*8ARAb- zxV>`NVTU!^dVT+od`re0c?O-IBGtP~4w>nx<=j9=hcMFU4Ccr=uIMo2BDv`@&Jw>f z7TsZ8ol7~2@eu;4kR>Z6Gp=4 z{1sZGw&x3qO2Kp(qq9>-4Df`d1N7m6xldG?jTFeTXbQGvR~8``G-dlO4}9ng8k~tQ zp81m8ebI>kwMxLgTG*U%{P^*$B5z$n@Y&k@dpzr+t>UU1pjPq7w@o0mYGoa;bI(0j zzW@F2mne39$ERdZ-Ij#6LXz{McJ;>&X5`6fGsj0~HMluD=ra}wFlLI1be!Q;<%TSa z!ycLe(hwY-%GgamM7zC6!Y1%19)}rA(Q$zfu}kJmkvlJXhpf;go)HEP9LTNQvHYzy0>x`FY=2vu3qK`U&ov(GyA2#AeMavgT{Z69y{~lW z)LHipTgv^zu()I%wuG5odrKIcTwQwUrSh52e8$XvjgENsh&w6$`t_5Ic~;Xlfowp- z;cWWoqkZox=Nq#7VY(CPFGTc`FT6jwvpTK!BY359Xomt(K|m?YW-#~lf|eZt_j$`5 zvau{Uymf#)j&w{Y)bBEZV@QTNf8b$PQ~H&Ma|{GH|I8=v(Ff}v@;oK;3Iu~bCnN*4 z$`M<}U~@*q_)yNmg$p@w;PQJzLq4EvC@<(YmLGiht;^Z?oe!$12I88QC&!cISlfNppVo6kHhGUNx`{LCEG=u9+iS8QW`;LUNO ze8vIMCWjOzoT%KFOEi$9bitNzt~B7Z(nTj4SBgA-#**u&6{f!sRI<_3ua&=@cL102 zcI?_qhP>%*`a4PL8-|!%6-;*M@zQwl_2t_aW!eM3mnr(omjb(K^1%;&P=55IAFZ?f zgSIteEONoN?Rc3Nx2o_()MuV~Mjm+J0r}z=zu4?AleWqPvV79NShn_@3TuA=06+jq zL_t*D`bc{{(`jGdtJMdK)~vjZk%hqtt19x1$hr?0CU3p;R((6S5B#@qJUW!QlKZ)$ z&4Hmie3kAN8Fla6@gp6BK05$}<^1NgloxbHQP>jjBroSTqj6O2nf{Xf29K^0<$<22 z?0@>^D`etLKLAMz=p^|1Vq4ho8dyI0QJMLpGZSN{VY|t|k6xGZ)x=(V?N$1K*U&wW zkfXnJJ;~74;R^uJuNVj1yuoO+g)IxDQn@cApLKrvOYlQ#q@CYD3Eo)0%Ece#y1qyQ zp8FgK%KR=|c>yoXwKVPa$&zxCZAeg>05L(%zRRyA7cibb$^G9sLk8}y-{{%@WWCa; zccsd&1aYBmc9{iCXZGM0@1$FQB-8%+$HW^6ynuY|wbyR8F%#cJuWmqs2?Qq|SP6*- z{a9g&mxI?AHvCkrhQxU{T`V)NK1-&5_iZxu>xan7sZZCSdVPa%shLk6cxZm`xj(eY zLyHD?g(I0)`eZ=|bthxzvwOeE(cqnQUmv`KKglv)b5hVKzplYg{OAG7ACm;MlL-9G z6#Sm$?FQJY7^79A$&Y;KN2dS-JAmm66xQ~a_(@sV58K8bcaHSm>3~ql`17CtJTH|? zz_v`YYM}Z~)!OW(wyFSJ>E&O4gj4Q&=6^+tyYDC$+R@(tPi^W>!}irQyo3R?bQi>h zpO_!!J`HN8u_W24i!uxsz7+h774Pb4TciTrFghoQz4NggkOuycVEMcw!PMx4yPt7k z@yS{x#hd2%7C~Fs@ET}WS6=$Lt_cJWXY+tnfxU)qZ=i4*Z)Y|o)8+&417sOHE!!8| zZ1x~d03?*b4#rpThb&;%#SQ^bXDTCYy23Z@3K^0}ZHeqyvKi$F`#pv)gELd4dG}?4 zou@_`Z0*{2fR#yOwI76BP*iORCcyiS9WQ&FbG3BtH6Xx@zy9^F%gs06T!L+iOl$^; zg()YUbW-VEtsXsk@WXez?z*dBQ`p(FXRp_UR+>N-mK5O@*47fx=y-4*4vBL08ahVb z;Qd^=h)PGt2`8f`DT&7Bc;Mcb^CB z^iV6{MYccwv3!eU+;Mu_FxuYz%KR*L!9{!Gj5!8VPdg9ciQlwZ8!JDZ0}g_p^COk{ z9#5W}!swCF6#U;=AwJk6O_iPrpF+$4R!tj?r9(U5Ogl198^4|u(F7im;-3k&9MT3= z8Wn&vVUVf8bbI}U5m9F3H4REqBkg=HMtArfD!YE*2hve*`RCySANW9{jaYMhYozbL z|9*bk9WVPf72;dz*s^7Pp~VvjHq~3JFO#L9Gj;ne_Ci;Jy^a&V{N*nNm7a$;eeh!4 zQo}m6tspUMIB_@~9mj$6GkB1Bbn0ZmPiJ!_*6Gke3Mlzg-mYpOD)@8z$gi|8iTU~xc=OJ@VWhn(Pu(G(nP5Q8$j0T_9LfB?~%JwW*o3Cy)A554Yczw z0S4Dv{W@T+fCr-aCzHGN8=_wd+?S+m`+P9-)-u0v-*CS$zq5faPeV}QlW2V8%@TuK zCTI}n>EI_7*^x>D5-o)~KKMar{$@PpYtHLZKnr+4%le@m_A1s1!gfPAjzsWf8jx%< z+1X^^^+#UvM1KKaF^Q2c>YXT4Uf*}H>~zM(ys`-yGiT0}^Ups&vYRBv9$t6>a^Ado zP0F7%X%cZGM~-ZgCJ-&2K-Mg&UVUsmx35sqU;gr!qplba+xJ)*G;V)QN9PDfqu=mA zCshhKJUc_?slb^F1wX|?oc%(s=GkG8C(`K1PXW$v!dwVs6mp}((H^%U_|3G88A<*M ze)b{okym3BmGYTnIur+);lZOc`kZLj)64gEAAb1ZX1(DWa@%69M?YDKx@hVa)?Ihq z6(o#t`^qMB&9dFBUC=h>6B@^r07ys%Z!7*B8d!}jkYAJG4}>PU?n8765M@6S8XuDB z`AY@|8|)=tf;$K}4X}_g-UlHAvnEgT6k#AgI%DE@O5y zC+pxPy~_z$P$J)R+i^1N@b?FZF>XOV^;F{xFH1x%$i&qJo_FLQbqkTWLx9^{qehM5 zN81OXTVnDsww_1Vl| z)ETs8`!Q*e-6i$fc1PJ}uQwaek2uS!<-8@LwoIx4Odt&>e@z-r{f6{C?NVtt<)Vbk z6ZhP6PoCw84%!og90FZX+Xa8jq*&V$6Nv|J!qoP`4|I7TEO?R+HQ>>#Vs1V5l?4S#jWZfSR>c{eb~Z*?z@9zZ(!`t;r9h^5=C^DnH~vk%rp*h6N5O ziqmxDVaK)yo-XzMhEWC%SW;X4;95eqLgJir&XJvW-dTS7)1L~y%Y#oaAmG;L8E2d! z#~pVZ?fd38zu7EpEnddE{KZFP=2h=Y$~tYkiwr!w*mx`lEqMa#PDuPZ_%m59DhUXECfoJ9Z6IPt2>ohw z;s*`z4nv2tiNg_ww(RXfpM5NuABp>;( z9|@*oqA-UBh$l&k44)7FqW=UB1ZHV7%6g5bK~eA{XCYu9PH8vE`j6jXEh8ACeG*{8< zjSiot%~uSKKKNug=o6RI3Fwa~Q0~NZq+;MSXEDkWN;8oMO>w4gE*q_L5r3FgOFtxrQLd*EzrxMo3;TnopbaIaAMofTxC*)J!V{ttjIkB=xN+l3 zu(r&`)xbah`A^wrpM4T@!^oZWCD+?9?zt~03ND6-XVW?$X6M z=>&t%1vB*HiUF3akI0?aL zJP{ERpprZ=Scv+AKFSU!Pk0kxcEhX?s03(_bj1XIf@JcY*UHOx{M03gy!EYbZM32; zA-AAJi?2Yopt3Z}6uggshg5jBfi$+jzz^Fl<2&8>Br7iCO5f{N$vd5JwmSknzt9!V z0Z8r~8Cm#Oa#ql~PelS*vgGodF7a1!#G9Z-{F#0VhRWavE`?-B*?!_(h)2=>I6HkLi&!`SLW!>rV*~Z@=D2HWD0m&yx}#FUwwePV}1E$HIrxqR-_S| z-{2T2YuLOUjoywqoNfW1f`dkIAIs^0GFoUc`4$mM&Jg3+%#Q++Pk5osVp4=&MEiVXL1&o_p>&t|~x9KJ?H-GIi?I z_0>rueBaT#NuOamDm>1H%t7Mn0nQR>Iu<<@v?0peiT>)a+~M-Haeye}*E1G_n6rT< z=V3axEu5Kzc1Zl8KeIVGI}BDMtMaV0)@Pzjrr&6zpk+BlT?KylL8q1`5PZ!M8_I3W zCT=KITPSKEU%m@yU6-zU)E0s7WNi*y_+hjc+S2?Dv^Efe4#99HAmjv`w4lKmwfhZ~ zWfKbdLq7ySc}}Yt(l{*FQRqR+Y;kt-xL6<(9x2d7DBGVnl*i9j&Gt69_9_fxj(E32-~r|Z=5TnhhE&Pp2usdEPQ}By zNap->IOnf)xO65%UWjvkO`3xOqzx;$eR{wc*T8 zB_%cNw_>{e3bit-?65yN5Ny-5n`C&+57_l^>D;rQEMI7ExMF`XynWmDEQFQZq5~eh z=3&js72ZAy^@fOcMLRP@#25zypzW+}oy_TwBV6(B4CJ6ZXD4aSuYjb_1gH!oAWIMJ za3B%$^c%RVpEQAvL5ahkl5H-?qCzMrOII2u-sKebz^-=al*C7w5l8Zij(pvOv0f^LJhlLfx7AOQH%VM`~FQ%^lrw%u00+?fTeuzEos zq+^Hd_q^vlT(XP4h?8vGdBNmy$t9O$98D*;sw)sY^uF-I3kBaSZqC=jny!`tgN@UM z?~fHk{8xlJ7c*#db7?j|@#f^@c5e@7BaDPK_z5>BjIo&XdMNR0K20v@H9k;>Y|zGJ zkYMhd5z&73I4E^nK&)AmEAdzCw=x45oeNBwN5?@D0s6fFWOrPo@!jpgdmk&4Zokgx z^?2irH?~b6sWvt~f!nWnSh;L5+XZ!>+NjbVjv`y`XOX5qu|2hoqmhw--{IsX^B?j! zP4Wmb!AGd6kfGV6Lq}7nKt6O3N2j4_i53J91ytNlH97D}(3$}OBarG9&VR#`(v=x) z*pu@EX1ZLsil7j%E68b?^Spr)PN9k|`b*e|K31|H1yWi(NY5vb7hil4h!zDr7{@OH zmg{2jz($F;+;R(7dp^~>nDTi4X2L|8x{f{_8N`m-JVc~vee?Y!s%dQ0-Pq}hae46wt>4GR+>Qm@|V8| zR-IJOphIt4tb?VlaGd*VeS^P4Cu!HAqqNs8#rqup`BK&3dk%S6GG`jI(PnHP^A@(P zH%pl&6A*kBo5usq)^vaIT=I+LZb}3`a%GANzY9s~RgL`8j0n9+4!}lC`$GbJIIg6T z&TVR>pS0$WRuD)rTP;>-z-n>v{sgRR&(V!S0eMY^{I^18;KC%gV*}(JAcT^!LC2U5+~HCiEvL$862CXf*$MsSa#xbj{ycN)J}U)QmoenDSQ3E@ZDB$_VrXuNQTpb3S@CzumO z`xbCYFczgjTSuU7^1+V+YCZ+7@Gnd2ku)X&N9uRB(3jw3*PJpX*_tqJVrIhDz>HA9iqW>PhK9BkIVXp8ohEo+u8&9;c5_Ld*8cnBY} z8Z%~0lft&&etRzQ#Vs^^!eU*a#S_TNnUnMk=~cmgJ?->O?GC!jWcyBdfVXJ_Zdc*W z$wwY}q^2TF6l3+-#de3Cs$UDUbgr_V z$dSBn9`to;mbZJ?{l^^yYznvnX$wu)z?v0HO2~FioSVD0dPq8hv9ina7j^41A z2B)=O-rj%)Po5?Ze97_QbLJb-;6y_J{wW81^f&k4;0J+gCw=$}V(_{E%CIt-yx=Dd z${O@QL{Mb<<2x|M8|7kIz-We126ccRF!0m=H@@+WMp7|Ryy^X46TI+;hD?p44Hg44n5;pK9cgn8YkagV@tQz;BT$aYi-%tB zPjrUDIY6yB6O(i&e#K#X4}`?eoXe*+T9fpXR&$|0%HwP)qNQ|7*RL3(g}kVT!hTZ# zBKz^GD)_+xF)1JXphe;MU(^lg6>hZ!eHIE^!~T$I{Jlq%&wTI!B;3mC+I7=kxrU9e zr+Dr1@&wY<75>2oA1qiJ9pROK{Y}Q5_;JrhTdvCwjkIXLqTPT=aJquZ!xg!SD{wg- z(oRtL734q~{3V#3F0w`dqKaIC0Il>R)#ALAsp`7<4f zpR#DFvw*<}$(r*!2B6#?*obE`77%N>anwVCMnOPCcwmesX*6M=VnDV_?}5@um-DXh z<-E9W|JY-XadWqzYm18-Sli4ikYkQHM%%6M(b>;FEQ{w%k**DcZ2;7UiatoS|fh?2ppl%ffATLd+G*u22EHpKSGnkvogRK+T>VLfj?wH zpr&CnVUgAowjiOc#G`B`ud)3ULm{C9qVqKEAV0`LI`mtaA;GkwzhIS`hS1C9!r+Pa z(X@*QOP5T4jx2G&vxwFXSgnVp^Z2+MzD|fw*XCjL=+S~@zK0!lSPpJ7-Qo#kZDj(% zo>8qGu(AptTfujO^MD8B<3DtX>~QpZQ-iX>-AUL$0C-?SMT05~?Uy=7w5H;*HoW8@ z;AeYVi-#P-)KTG-$s05kP71anfY*%4WrtJ32_vx6sqlmY0(j`rfAX6sqoLdiztZCm zvKT1e(z*Q{D4fIyGy6lQ381t>mQm+rSr>nl1w}x*_pqI1<`Z`Y3vWkOZ^0zw8$7Wl z|K5()Mf=XRuheiRdB6b&NWLrYGq?SZ?0(vL=055R(O%XLMa812182?dw3>l?n9pSV zkfnbSkY*caIzk?tHqsG;k9sHo%3TMvg-(q_gUJTUAmK-p9m1TLH121a0Ct~vKcrFl zLk8@kO@%II707-WcM_sZs2mMuS2Dn8X}{MW?Jnv8(vl`+TcXdB<0V4K!)_Ssde9A; zjSbif3rlz(fBbQtxM8_4R`EgKK?fbg#Td<%-Qo#E*T0r^ZGs5|-{-AP9`(KY>$8rZ z*+W7!4u0wtm=F-yJnlqn<7hzhR6chLr({D`@D!kOcz==-AvbpbsKn#}Zu_&dEz?gH z@Vh;=@}VDbN(Z6FjV2-$%`Ro8B-zvsTc8j8ZUXt;;Q>a-?a9-@R5Di!SJ1E)Ee~BK z90gN8b$F^uW42jYdkr5~nn0d>^2x-XC$LH2%P+soZD+9c3{EDn-3;yzW5C1@dbZ|J zy@gflA$pV_FNWn4$aDYqbKb_POOuW{PqcH7$2hVC4MDi zS&|t9B#EFSkJ~Z#>3b3m2@K>d-NvLOuto;^(4jAFv&}Zr(9pmM1dA~?By1RU4asXO z6G*%E9cxf+JP7NRzxTcGl>~Hc7$k4G@PDQE;L(J$(JEs|G-Na^93;UM=8{sdC} zflDy&pU_2F83MF`Q6gk&$_vDa!8H-@aO>=#1u|)~@_E@pPq%^Ac1CP(-C}Zh;DHBt ziGAGHN%F}9kDzk#T z=DYw!xp*f7g~~#lP*QdjI$gmOb&g4J)?C8DNX6gM>S>n$ibOaPm}y^UvSf2 zWiWc@cRB8Q4vwm?X;Tkuy_J zz3fcL6KpsLf&REESvd7M`Nw6af{g+9X7OUv`X-E^Zz{%wg6$h{%M-VA-iQI;bGYDw z3sTYB>n9i9a`CO~Gu9~PYv!kxCb2dZeJgwyO~?KMxcN>aypj;++9J&!7EPbg1Ri!I z@$QVs15cF>bFoQB@J9@M&9n-mEC|yOi&zv$A0|&Vt#nmcHYuUMz&hX=p$B;;tG|pO z=#9FN3wV?dqL7~R)ZgXV-(DD`><63jdmv|>bym8ggDVg`_n13(uDtZpODt#KzI|oF zgb9Mr->xsTco}b9-(k{mn;rBwRyqzEqxWaKtxspO@F)=uqxI0 zWAaeKaJ-~RMNldRUuf10rJ88X0$bQ0{DmeXi~+#l#nI@)ezS)hW`K&;oTkZ-bQVZr z1vMz?tb+tu++GU(L0;cyn;>C~-DF!jc|Z_Wli+J>2OfB!?7Q#2f;ZjYXaTo;vCmP2 zHTr(R{r~+Ry50pRVntiWWUh@%Qu75%2QOeHAA<{k9%Lm)0XXCJB>6M>;X%QUIJipu zZa|fv;9x)|OOrq4HKEFAsveT-MqdLXeOqG?CAcfrh(%R8GavO9IPd^(6v!~81Xfo; z2hrenMvX%zD45n&tS{gB(}F=$c)tzLKsH6V>86`FdGzYli~9v)(?+a<#B-2|6DM+u z6ZFH6eB>j|Qs3e$kY;Jw2%?iuKAC%31uMRv;qeo3E8oc5HqYmMoGbvtKSfcqBuO1MVmP=tn=|t*#pAtrGN$U;KgtQC_$) z$DAd5y!!%g55ur6k%SLeOM3gnm1ImfhD8hKSD5At#t$)+;=T~FBc>8<_qdWJoV12< zcGItlVu45SgS4a{7P~LeI(NB(-UsYVM~PqIU;!+6U_+!$Hu8kxz`*Ao%2a~8ZV=@B z#$bAouIit1&-L=kT|Xrm4p>5qhugRpS(osQ#r^u%zb;K4kh9y#{~<$l^2h@k!&{LJ18-pC#b6i2^bp1ApC^g2 zw6#2CC3&6;Vvc+v8#-OE;=@_Y01BWVRGBMT-!E^{W{FLR#0SC%iBnTul@ z-&WXt_ub{oU;eUu@rz$vZ>MM^;B^SVhIn}4Hokd{B0O{JPo;_P4)1>C6*YoPx&&S6_WKx07GrM7G6EAlTEYx(j5#?e>t}Px*{D zn0Z#XP|;Xu6tt?HF<}6;*3~@VhAPgUwAR?P>H(0&KyNVm5`R4SQigCqGMRwFi4rg* zA~t&V#Nf~d*tr2?E$CgmhVRhmVy@(Fzj<9Oll?@lTd^7 z8+QnQIJ2Gm?1M7({@=;$f0y5GU$J5Zp9|n+sT*#%LHhUaPdQuj0fPXxrNGN!5gz&Z zfAI=n`=j5N8ZqjZaWtkkJZ?Qk^+!+1{RVQuSHNsy2z|hXdo!h?6N)xl9qNLuOwJb^ z=zPv*_8ETZ{Au#cZ@!}CE1vHT7%)J-{`Idr=cb7;LEy0fZuQpMOc7eK+7SWIN7ffw z{0wA$3N{pv%@Og9-UxWtCXl`M z+KVf^8X>nb03L?orLpRk7>{26HNix(X7$RV#Hn$k{j+-3x<#OuDY|GIgX2hd@g4>V zpb0u>R{>~P{jV%7Hz`0^3P^sW@t%9`Nl_PFbWu%_Sow%8 z-(g2{9-?ni&pKV?~-{ecE#^v;VYc=B}U1TNAw^}CH#9C@0uA9bPxIZe*byIqQ8 z)j2KGZgbc~d(G%zcP1k=9xJ#V70yJ;F^~FmHAopVRLQ3WPQQ-j3(G< zo8zdTf{U|9X=06a4vzg?dF zjJ)?*mq^c{+ncYlV!bJ~BElciRkKzv+RgmS>e^Y1-z$&ph;(WrI}0aAz|tfVa@<$w zp9Kt)7UU(~iVm~KX~LL{CTwkzUwJUFQ%CTddcv%aD}euwV9|oelv$Z;z)+r4R{hf8 za}UUKf4D?eFMHLkv%LFwGBCmkp`rj`QfYQvkepor#h%twjg0B{fo zCdUJY!3TLS4=>Zi=tKZN8Tls z>@xijWcISS&JQD)H*}~^{4@u)cqo?(z#M1h6B<0!lhbHg|5JO?sG>ZsCh2EB>H@44 zdvp6ii;3#r*Pk!*UwAk~F~&+1T#@Xt#~vm4mXujJ^97lG{b!^@&w)~>+qiceyo2-@ z-}vF@`fSGFaKZ^E@YY36i|>n!KJ48x_V|xV7kws$hCs`3&|^azb+j?DUgsevaCsiE zj2(?cbkIf&V4jYAf*G4z1!Da308$N_bJ?B+wt$6RB8hNQl3>l_9xtp#fV_;K(g-$e z$jfw=^ixE|CWWndb*_H%{2G}xq4BCftTx5ULHPgX2tWJT&*VcN`jFs#kXivBo5Jcv zd>4IvVT+kSP?(V;N6M5b#Y5G-&iIn-cG4$V2DDe44WQ}iobCh2lp0Zqi7GX6!Bog9 zs)rmQ4^9MBm7nBrj<6-;&uBqVli!U7u4F${83z(YE|bY*@FxMFwLpIIA|LuK4Q)mJ zC@#?%FOOj}S&YEwOj%){rKvFxsxS29bpSn|K|IfKeTj%Y@%YgSc z-VZJjtRsO9^iDeIBre&klHpyEq5B*w!w-0eH0U!&+%jg+N<@8Ht1E={=_topuBi7C z4OvJ?vc#^9#ddOlMQkyHhtR}tfM{FfEDfIU@8CCD9#wE-9>be{vb!w~M<>*O^D*Z) z(WxGYqio|LL(q7cfLStYvdnzsPx`w>)~sy2;t&HlUh3P_Z=Hvg5#w7Q*pwDac=5;p zCsWwU1n-DobyjmHvldSvtMp;enk5TlE$*l4uWrC->7e(vH#A`76~P}ilayz{C@ z^;;gjOAbK1HxL}a+swh$LBxg>Qe}jRtK(L^KU z@DhLM&vb?~@Fe|oXejdXKq->J?@Z)rBrES+1wYx*9ctKbq8;IvF)TxzAvqJGC|gYK zx&26^45;r?dBA7}B!1A=mKFAc!a~p?7wMACIb01R4U4l@Cf#UD6R&YIb+xKK?c-#3Yzo7}-p1?|@vYSJFn-^;#FAl{&=x0d-|?a2mQ|YuGH^zSjsmyfp78VVa(z0l1|-pB7#@pmz+eY zyaf?y(=v;OF(3G>cB7hW3^QHlx=)Fq>9V)EuKKySH6x` z>oP4@SS!77;X>{Xnd7!Q?42B@IAE~z>mZ~A%=2g?7xFfc*nne180buz#fnQ43mNp_ z{7RPw7?%z$!EXtdO_7$y0EGa$#P3X{Mp1kWX2!03qzn6@3uPqrw=dUU=n4B-0P?{uKy&?OYF7R!3rJJMEuKK|Dcd}(U9~*VHY<(S>z6Mt_^?$;-%b{$|D6IBe3As1j3V1rJ^?>ywqXk&1S7-UJm(j{Gq zUm*bEm-Ha-lwfAohzNCB#}qQ1-}L1V{z9C~G)6xvECYw#@iuUflEe>)+$>FL&i)PN zc9$PKhJHxPiIIb}B1E41dgWRYSxOjZ)RT%JCfARK^I+v-%^0DhLSN9K-Z7z$J^4%W zuOGf3%XCR)1blA>t5lj=A0Kp^GhSVtKw5YOlG~0!46AVPks<6m_w%3soGa62%$Sjr zwQ9OcT|2q$~6mE+-G%LK{DR zykIqAoDAo6*W70^EuTR2;aeWou4ikCoHo)}36qDB`=7{c40eXop}@x$Mln`kKnP~e z1ezhvh7Kf+RJruu(!ro04k$&wNSVG6>BAQ*JGYDc1yg}nxv;a)7yA8d*b%t0D0JZb zTs8R{O_?hy9!u7!zSwlZDEDv~$ z->zy@Kdy{iM-roX+Jl4ROcu)L{Eo=$GWfxwoQ2JnQCW(qVL$nR108}3$nB4%sMN27 z9rUxXW8d**negL}Y316>T$3hElJ9)yJ3LD?L9hLB)!brH39RYR(6U=twMvA+;^?D~ z<`3O~<5a5fE)om?mwOuXw0jpELK4-)%*|CS6EME1F5pvzjedPXm z&D%dO^wk>hLdS;0kcSL4;1&a(6};AA_2PN5cIDD!Mmyc8v_tmZOhU9`0(HLg&O7thiekCQ zX7v#Jwp_#!t1Te7RL(;`eLJ>8C%>xE!2&}AemtY~vv-Vr1S0am1t8p} z;Na$0Eg(v?7)bPwgNqV(*9Un!Zv#Exv}nTArLtrtcnr2}?~ZcB_B{;m$GQ&fvz>G@J9>xWt!>Z`>D5^$koid3K+1AM zK0&A)Ph@!?{E724)HihPD94N|5ANW)=Q)#)w3Wx~FO>yI*sJ=ioji}ag$XQ+$hHk# z2%ksjG-N0Kl7X+I1otQV&H1wQiNBSs?l^Ee>Hq#8NqgO8 zxh-tI8o*17*dq;Zy<>|T>{Q*7(BcWCefPdn*MEex@6=7&>DJqI4cl&zJ!_G4_<^X|9P4` zKW9Y&pu{YTq{^Xw$it{t31*i8IS6W!u%tgqMN7`8(lN{k5)QIK z;r0<$5SaZg+pS=|1Y0>~Y14p9!$_b5IpT_dq@PGsQ8D0`N`7w^i(semTR!-~XtoX7`vm(a$6OIhsf9u2w!Jw3oWaGHF_Iv`K0m= z0QjO}|My?D$#2KC%<`?1d$-+olMjFR!+h3?;S$>e?kNQ!1C9V-5jWih!d0H8^SpjIL z$+p3tEHK}wlLXS30OCw@@Bm?Hh{Xf-0ec)P)9(LG;{VmJe$_UCq}tf{LKv}uB0HEgz3SXIRvn#Y&0^3Y@8 z2o4enhMtcH&e;HA-Cq$g$T%_JptHduNzjDzb3h?cj!bFtW58-|a7Dq<6h9{p*pFLq z9Pu*+!K@(ildepLv-o*SSIf2c&yX(qEQExr`=zb65~G zkuSwg8}^eu33kqk0Jalgi4JA4`p?_2q$gRb(5(J55oW)OHC|QXdO|0P8LO>G;{t5r zQmRb0$GZmMC$iCAmnHL7^I3b zO_iX89PGe=!9yka_`o!PH3t!pB7h%*ohyvkkmV!gDyVD6cG9`7y)4iHes#9u2h?f6 z3?m>rU9~8I+E}mrcq@DH3Y}bNKluDddk7}ve<#dMwkd$YYy^i5(hd%!^z70>I_Pqj z1xr`zl@GqFZWAo{qO47pQ)yrcB9tZ7EuhYil{CG&bmZ3X3zx5w)nM_^q}(7t4B5^D z9bG%Ay?S-HXz8j_SwVvdB4mbs=E3hcjVLc@Dca>j9TnoJlX+2oeI_<|taQ;Ovdetg zEOxHNlE=5a2nkY zc6uKaIVvbLMjB^`!5D)F1|klkLBp7%ic`0f!96?3X}jBl=#dSL-vmA9fc`Rlq3-FW z;rCC>=K$l(r3kCDPTQk_58=1f)k_$NR;*ehljba!KPuk?FD>Rk#{n0H(LUIs4@EUP zX2)K#i@r!Ts&5w=(8G4ToVH+vOq{utE35uCalQ@om~gy2-E?66)M3SUu3^wKGndE> z56`xN9RQU-x_=ird~6?i(}*6@w|hrU9z>~}h09jUqp!RwfBpAddF16+(_{ks*BNNP zRdGixz?(+(lB0I&D?4n{jT2PPKYz(edHBVJ^7m)w%6<9(p3PDmYN{F8uU_6geu#`6 z)J+EW?#%j}wQ#vSK4p^0OUC?hHzhxF>GE1PUU|N9HC%G@QZN_b?tckUn; z9J7sVr&}@Pp_5*Pj2&1npVQwT9-k-w^Uxf#Q*Dbg_icK1lusTyNX8DR&smw?rdKDy z@6hdg$@oz{Fr=J2t-<=n%E=p+?GVnK}o zw^x0g9I}0H{%(D8j(q?2SM;hx11i(?6daZc*+BR1+CeUU`*t#_f75l?TUT=(p_2=K zH&2);m;c{ny(+Qxs-TCaKN-?OtEuD|_8^_Fk{`31TCso9FQx=1#G zfuMkEK9${1K3Fy@A|T;E<#P;!R!ySQvtRxW4+;uL`#BX=!-PCy>%$;HBYxZ1ib% za32h#v$L;jhF?cA1{))M5W!$zNg^?LZ%n9V7v$`@jtMCAoVZILx&4`i@`_$n;Pw>4 zFumfr^n_u&-4%3A#d{3xCYPQ#T+Y9Fvdq_8IjD#a z>VM263gi6Q2M%mR3;U-n&}S1G5A|DNA!}GeXMM4+uUvgE_9aq_D$iF)hdNPS$lqsp zk49G>&(2sPe|vf!Z!_&Wv^!tqBcC6R*rAVHbMI90=kRJ8^|9CRo{c6CJoCXzfD>L> zBz1ag>ja%J`t>N^8r*wCFS+HhS*(8y-Ax77#r);G?bfAZxn1xK=MzVbkWXL#Z?+95 zcZkxmBQU84%Q8nG=|Do9Cm_N3A~peQrBlxV(!Q<}ck6}RIdkU7ym|AuH3jmUGc14U zF{)cN_hrGa=fP)fPW8>!wQ}YpojlH#)%sb7mD8V*xxe|cG@N>|c2BfJTiDWS03WWp z_10Ty*1h-MEBMt4ScSFOCXm&y&JnElUaen#TeD=o)Q>$#+Ew<*s%0c#)d^&UZY6CY z8ypD8^B55dG7NA4__*NIHn7^6r;Z`Y^0HnLTzH#p6!-B1`pE$!%X`#Z{g2t{JrE2o zWBZq1l5~yo{Oo1&-MePWw1q3Ap}vEB;mB=dROOb|us-^1#2Jnn+pE!St4S*3;=fIi znJNP#+|GmR<*RQQCU{XX!cjWF;}#XJ6q2w&ddxvk3pY-mM^wXA_e|AiDoa=|zkFzh z{P104^-8EX+i2+4QO4-)w23o{?J}w|nAhrklio_IhQB;9TfTeSiwZWIADXm4u6)n9 z5*eP8L{k-wRw9w-W-gZVZhTtiyt;xD(SIB>QjXhgV2NnY5xr&CZF|ULFE32;C^t2F z8uwM!Sl~QFMIF3cgk;$1i!JfXjW3yPHzi!9JGDP zZz+l8FHMlyKl_BNS!!?b!>%P0{w$q_?kqhHIAcpOv(1jI2Cx(Fph1H&NbO{EPXJow z3S`EW?~#1h&_SR6gLLdSqPArfX!RwQAAZ4>VN(@N51mr@+NnE zy4JZKk^M(juCnyZ{j!)mXqV^%<0W$Mq=jvV4X0x-a zJ`bsea}OUX*FQK@9@MLZB|1-CcI!*L)s@pNT8`%|>j0c@by8k;CaiCSd>>7Z|; z=M%`9MLG#Ia{?JYe0bQxc+vD1QztisO$f#I;T$E35deb%2NE3*FfiyJ1_K`*7;}sx z3=(7Huz>@R6u^&x4z!6rVVRgbZ-q?tm0Fagq)$(8dFG;KR~Mkd^NP$DM4i_ru znl9A|WLQJJwXa%bt%r7siNfjv70Jm!1+t}4HcgmMAgTED{7iAkD~-|WD!Iz{ZK|sT z!_G|^_%3B6rG+NIg?HHj>{xDNyh$M+-(sKcGj7%(IN&L|yXATgQ*W&*Ut zhyF5}xw(sNWmd?+fV4udD3+~UV=wtRk7)qGKt8|Ml|5bBOK)BMR1MXAt*ZFQW91ah zsTL#%Jih6%*>a#>z0|r2z%zvH26W}`_+9&P$?>0`m?J-bV45siX7w25sfe|#>c(Gp zeV~2ZF8$=N9U5eWF1>}^=7ge78&gl5X16R$`n4UNo?2es4bI`JlM|Yz<4Q9mmiXx~ zZkxA1_==MY=g-U!{tlH}uh_JmRC$>xi?%!5% z!IRq9>DH_&+q1bp0JK(jvtBXn>AX17Y1>^(`PVwr^RN#|w>?iZGupM6J|}&pZSr`n z8|5|9-}uycBMiYUtURpQfGZGubR`e7UwBMb>5jg2dM1d00cV=*^&EjfiGf`q=! zI0ge^2`64@FbVPbhmnAabSax9AkOqbqmL*Vo=H`LKVn0F#2GE{uwllc@_-a155Bml zYQCdbh*ZFNe#H}fLQP_ zMXjG(vwX31-T7$UfbgHPdf`m&9knIy8`bBe^JTRznCZUH+ofKgd$ff&W(~B;1cDuT z^ROYeu!asDDkDaWkk}Jzjh=zceSU)UAG@!Lv@^|O(D1PV0~;0Mh|X`h48#G2{2X8l zJO>&KE;WLZroe$H<%)!944pOfJ1H>GwOp44&w6#G^w+n-BP`H&J1)BQ6;S9eWI5JH z!lFnv;015380FtqpOxUbOVq_3`oR0Q|C+^#Zj8R%c(A@eh}&1V`pCnMgS*M^9T2f8P`X zXZ8Q4(S53<^75ReUSqkmto{NVu}a|8h}S8YnGBy`^=JG_ziR2ilHTefj}medWIE_W z{J!tJn37k`eMRs>;g%TMb?7Ml-~0V7(X=+3He(I6$^_DqD-e_w+X}}7g8Z~6?%@Q& z0mla?jD&GZ3vdkZ947(~n5OL^Ebh-@V9h#1X!C#}v6;w>D`ifWN

vkoX`Lg@4@~WwjhN`BH8g;(=aNX8Lm*+mN?~+7a^wg`56ZUMp)Nu0bC0Uz? zFuxj91@sTZih{n7M>`aTJc>a$iHsPu2!Jn}H`OwJtS<5>AvbXbn-JpH7TRiHORa(O z>F7pU*`a5D>7XxRVf}agPKQh95qoZsq(cuqv_wDo{+o-u_wJZH5`qIl%4xy|l+vJ) zp__tdBy!R&z2)TH^h|x6t}OB`O>%rnhfN8vj)c3& zcWsah^gWPzJ=@3g1^n=g`gvVG%YXr|R#5nCy~0_9cSW**mjw6RHd{Wdbhw>W4Npze zXC|tLlU)^wyy8~oJ0v>Iu)g)uQy;2>Q%PhXwrM$Xgky_H-$!CtSN&gl>P~XB-bx#! zAIREMUp)No89U1W-NqyjtMpdngqIiOph7O}_v(Qj*pq^1lKxUPqSWp;%CJ(uO1K(Z za(KYEtOpM+?>`o}tvO!1dZo;}=~7wpn7weeHJ7!mY%QySR=EP{JbZT<{Ds>#+L9Ab zJW)D!>?kW&+Vg@1uRJ63UVd774IQI{LHQN~l%&DHHCFQ&^#E%OHwM6QTd5ib>t0l+ z>L!T@vu>YOb1y86H%V?bXjNq~@1xoS`n=;I{X8t*7LS0{QXklV5Pt!y#^9SgXPFOi zRjlS!zg=;y)up+MFmzbq5=;3)s|H@n9pnYPhZTv`K!JNlhhUazD zMl2G@ADj==0sguV?aB=g2dLZ;{c1m0$0m@duhO8N%hxpa!TRC}V=B;c_ID3Wm8tWz zF1${GHM!B&oa^W>fn%B2UZ^LN2Ofw(w#2Xb#nYZKB0p9ac$AP^X$H5<=3M_-S@H5? zviP6>D|G`#NvAFFCC06^`d@d&YLzRHmX^Aqp+Sy1>Zp>KNq_%s8g!!b=Yw1=7zEe= zVIw;R2@J4l@WRMpD0xRH1tL(4uYSfxM{_m+dyme%C(qw66g+A}NSD$rM zLwCK^G+v)oj2_UfG`Zw-opoE19%;*%rb>}>G*06U^=tk0liE z552rt{`~KCcMR}XL+DjOL|N3BGR|~kLYuK@g?#eYFUVh>Y&;NGbxhEg{m%Q}Nis)w z0}lR1xA2?^`ay%q>GQvRPM-4jDylN`Jj!tEle6Vpw_;Fc%|&r?G9V1EMBn_&3o=nx zTdgZR^um0>bBb!%b679h=;L<@efe!K$XuNunhK9ho-ZH%$s@9OS@Hf2Y8z$dse#7$ zP@N+YC4S88;MX5cG!{&HB;&Scb0xr5nnC#&-f^|e`sqiwB~i7Q z8?vgh%G;LTx@(}FI#0RRy+-B52YiMN8z!@6>5@_pd%ou@vcsD{z#ImN^7vpKpQA!B zXi+dY`nV8Kz-sZWOf(dljXJk-BfzSiLeBt~>N%$t`XT z;W@^1y@L4rv-9~~5!wlI{V^yXo@;d0Z;?cqr_5Wb-}P9WmDlQIxIUjabWCq8TPNwO z`(5F78CHJH)$Ky?ApOpX^O(lVm~nlEx8vWub9pr#CX8azu=+nhS94*D^=*1}k$T-e zWrlvYc4PT@MGdA0`i3MM8R-*{ed#%nErx) z`TQC3$PeC~gkeINGGz+C8J&PzVPeg)MKb4x|CD7D?`~AOF1n{(-&4OK?dy9s!nFa~ zY5+B`#Y`YD=E4gvl&^f{D@Zcb_ZcY1fBP<}>s+4=NH%cT(2C&$gN8F8LdIth4k&>N zA|G6QoF{!^X99*7xWXh4}?ab`{ssWg=px#rk!`mG=#i{|7AMVpC`OhlFs zT*5%Dhf4hw6+?d1VWUv7KV+3CK|_E;1IglR26;16lR*umm9dwZ3gHoF-V0(~d&;3K{cQ{xuaChidJkQ#cHsHm@ z8CSkrchY^fP@_)P>&TBuk3&DGzKa@f3vD$}y9TzHTUam;+XvUz>tx|!>AV^8@0+gl zoK75?Dh3O8>=+rcfdVvkt0{AS?Fc3Q3`GU&ZI~nm`pr&z{y_wXF7P;0 z#6U7_@X(R?jhT}W6dIgNA(1e^Q;{^C=v*CffV2~Pn%hYciCX01f+szTMYj)q$zoolpTfb|s z6IcJUek6F?ylwK>94+wr*0DuRAo#@Mr#|(mQjI=+^MA|YSyMPDa1bEY28S5900M-A z7V$P<#2`Vu4jeX!kdO>HKWS7-=s-+<6^tEH(+={06Djg;j$lMvv`$Ye3i+XvgRS$2 z{a`WsQ5VK<_7wayL2+@rEa+H%6F@Ve-^!08Y=ts2D?I}Oh_WmWS;iOq5NG_X6XyXG zf&30bdhXMA%ZluUJ)Fdxdg`f0ycv$G7tNLxFFjglQkypC{#PCQkB~lZ`x3WI8Tj!V zq|0{u6}Ggc+G=1OHPC7k2+rdEKYLdJ;8vBjPh^sDueVU3(BiZdm*UPY?u$Dt&LYdo z4~y*Lx{H^^ZE-D9v{+k8fl^waP#^cqWRlE(zVqIFc`uWUHJM3f?t$dpcm1Ap?z#6U zEN5F6e@0DTJV#Am{;!K+6Bk{1<&_S9^U+N zr9Gx9)8#tGwD~Cs;|-P7{4tJ^Hu{i0QH?*ci6U)IKyBpFHfsKAbfYhn%SdW`^+W-K z%$M`5`6F!YvEkaH)dn2_M%x>&MtdoZ&H?q*MrKwp z=Vxy8*7&NHYKtd^;V*L|!@OimL>|)oB6G#78lCfvH6e%7{GVDsni*-MOtqKBsspqz z`%q7`F}7k1V%!=~W=sns7$cz+Zw@Uqsb{pd#y9&%r!rURK&{Qlv-*tJ-W<_BfAlSr z&gx} zn>bb4?PYyQ!)H~{F5^hhiOcOtP@=On2I6BNX#~VuHje*D&A(C}D_p;d^~k%owTL&Z z6Hh#m_S$PNN6|OW+(?xRzp@WuY&F<06ky}vIUolc1ZHF3FtCfr49zA|54DjnDoao3 z(rS-FE;IVEZMM>pk*~%;M+ya!=jhWIM3u`x_~OB2EX*4NBTKT4wDjqCmnNttfHp>Z z(hwsG;jyn1#lgmETKd6sBrQ1Uq?4#W z4G6Eeso(e9#En8Fm}l zSM0lv(VuKYoQ+M-D2=wqCS?>$+t-9@C(dtrb!?T&?lC3{|Vb9WO zje#}7KvD@vRty0NtpNeSst^Dkf7Etp;)sRsUwb6ARIiW?0(*>X6y|1ww6*W>Mk426 z**uI~DOVNDx>gI$k9)I_9?kaXQMFeiMKg>!S>@)58|A7FKdoY09?Fq#%+J`Ujm=bY zQDQ4MFe2+VOH{9thkngHRr)hpN+Z;Z4vWnn9dLa!(=k3;SY?<8Ue!oO+RBS*FO{*6 zOMUc@#^$@xm6S<6%t$SP=RXF#9s8kl6#)CIwOjgiXdro`Fnxul{}} zE&K2hqaX8o_uY4=^Upuuk=GwDh~hW;iuX7a>{a9&Kbh7<{vo%H$1b3nr!S`n%RL;x zM5>T&J9_Kr)X|52(HMxqKvD@v>|jd7x_%&y|MM^_lrn!hyx$x|q>8msZqsR=R?+8YC#O{7z)VU(-d##q*^ zLuI2af23Dyc}Ak!`Ewt@l#DMOn4OyQk@jjF!4Jm5vZIx)_EtS~+CjBH&Z3`R{+B&h zSf!6V@(BOd(e{oCDSq+f7E%5-htr7DrxR3a`wT1uZ~Jv#r{>V=8p-OpMZ29&F57LV zQ$?MjG0;m4q?CZPSO40}>BO&o|NZyNZNy&MQpIsHu0E8Oefg$I1$d)1oh_-25A}^b zM|Kw@jXezejky__56(1i3%rM5^AV7GNEz?Zx-so#<1yM}3$pr_wE9xL80A(SbgkOQ z%pXNYJ=^%4lswz~v2o%*$;bTFMEPlN&)u1z@==$w%#BmEvp;2IHI`~E@5Y>f8ny9S z__yK9Xlm3|&FphByT&A}c?18zqjY0TKt?u4lzR3#N@ZSyJrDS>z-(Cc-TSooe^>Lr z6T`0-$`P{m#TQ?sF=NJ9)iX5#LR5UfpQ6*}pif16pHIWjdX%z8Oo-;|l*T}kFpyMN zCa?pkoPIWC4PVc21I;I&+>Lij%2Y$45FK~iarE@lPj~8X!tQ^dO%A(+yxDoyCL$Xu zb|7=}ur@7hmdG}DE@QLe&Bv@OPaeVAkj>Go_U6W<+8YBzH!tqkXkpvXkSDn)i`lHD zzRby1CuW5*7Usj4wkO1@`e_qqKD9bXqyd)xn-ki@*l3Ik(*7EEfO1xZU3?AORm~{xJHyvAk!cuYV%T?q}5mww#^Wms6{A@ z_Sh^j0X*Wi$Gps{Dq+kC?aYQej?^(%fHdX=NHl+IBUP(TG(LL3_~?rh(ak?*e4~O< zkH4+)Rb$oO#F^?#b&hJ7E7}=&Fi0QrP^R+L_(olmkYiM|x5nZZH3QC5Z)!|D?P)Z} zAgI?`yOMr+?G|eM&B@wsc2qFu z?zt2U233~zEZ<;j+W3$QseJvZoM&tzQfO`{DjnVUrJB@?5)-LqZc4HjNhR*W*!a{I z7Ozieg@F(N(7=S~W{7Ot#%)zIwnam8%WN;>7#$d08T}zG8?Y+pcdXQyKQzyb_Qp8T zX~;XqGrF`2=~byPUr{@vi5O&a44AZGbTZf6KV$`q7ap z=d>b5zG%^3 zxjAJd0l}~I`RAVtOKb7s#c_YMvI$$#_??fSFL(jAg3^I9nzfZqM6y1F%IVe%nFuOG7h&3u=Vw-d9{IOY^!&>93o}$ErYnYwKwwHHnU^Nd# z49%X+F|7G3Wb;$WU5u6pOWQz|^WLJBpFcyP<`_*Rt_1b)=bwL`GAURTSi^O1-A;k| zZ_6_M>uycMPlaa&4H^Rji-C+KAV6Cn5D@q8x7>0IfxQ(EAQu_2=`5H#oKPseC-mCT8prv_}_4A5xB2$u#m!s$h;`m6&6h?adZyoM`iC zeN>ThYe-|GMO_))UT$k9rRKYR{zhL`XPxFJ{aXDP3286m8FMjO9BLo2=vtW=@abQ!MUcgV{T%L_2jxd=@$g(K?W5D zGMa#>Ut;duxpc)9SI}E;y`{1eK7IKmR4{T~%IA--phN`kUVk3f#|FskAM8(R2eb8J z?q9YdsVEz!RZZ<}j-5}HMhn$uuPB+(lgvO>HlC`9GgszlY}7WH0e5EgYt^@PpprZ{ zS&LebRTl|W$;h-^UXKu(@#H@8_7HG zypyo!U{ftsFP<%*gq3u`puhlJlbmsb1=EF-oVK*WK zb}|O!mRVyqqLTeCCI3289R27Qje&u{z@WH+#3F9dqDA!3Lk|hNV&%$}vBmm*kYiwj z4K|>YPd=GWJ@r(FKed5gc5uZ4T78!@P}PWYpCaGr4LeOOxN;$_zB3w=#KUgZi}pH? za<|x@Z+^JbY0wxLcnqY3fP^?WPw>~T34bub<=<2O)`wH>rv1;ag~PnBzWR#Ze*0~j zF=K|LTUxY7(t-bHm)sdZC3ou7sp7$T-+lL`?YG}PxdSArH^d%qnfJQYTX({6)DJ9} zNrlr+ru-fMK<=!lGKkfQPG}4aAO=!GKp2-U-Z1_&^J{qgdzAg*cFNswmxNt}8!B0VcC3qPchzFb;s_i5zjld1-df&Rik(Js=8UtyEfz%L?v70y++Ro9?5M0WY<#*)w_ez zA+yTX>hEdj@rD=%BEbNK1NB61{&TbW;cEQgLCTwQAYU?cB)}o|Ph!w4w=>Ts2}|?j zm5`@oOtdHdqk%kRAR*G;N}%DyQaA)WD z`#ki3LkI)>@2&Bp>CCFC?YwBdbwH!aH+oX%GM%F_kRcdI2@4Br_l?_(Pgm?}-Gtx( z%R$;#{-CBJªla31dMPpzMG2qTAq`WN;rlwhJNp6D6>!-Y_2T{RJCy>{EOIH>2 zQ)3{5Fpxq55_1D#Imkdld*u?x@Y)%j10DJ$MHnd9;aF<=>`8L@@+g11BdK8fW66`R zwW?BtZryD^V<2?|gsO;Lygn*F`o^@uRfv5i*}+>j z&i<;?8Ux*80L~4Y=e|nKUpz5E7=R`+UYh$J)bniK-pUq|!_C=vcN%umf2jQUJFW7LP>||gxxspGTl^W-zH*)Qt`iyqg9!sehJ`hlel)$7 z{>4D<=KE0HYd2Cyu!THDBPnmILn(ji!4%Kc2;8-YYL-SP$iA#f4!At}u2UKVg9rnf zfDED^Q!k}=7-(bT*5=QjrBLm14iWo6!p3g)pH{rz#Y{X#Q1<#e#Esv9#-X~E)G*W8 z4;+bVW5I-|)dd-jfqo?*VGcdo_RAc~-FUYQN1L90reXkosaoc}Au71-HupNg6+d*j z(dWlwW5pmrZYee)yPIA56zyrqLSm}wSB-%|fq{M@An*qi_~8SRKUj7Fo^`>Clr?;v zL4iQMh|Iu1JKF&@efBgp&whzISRFSOnpicr^q{L^vwQsBy6961eD|)Se$hS`u$@Su zBX6KzAfo|5%|270BZPSH9Mjm7Wd+m1?^$v03GARXhelr-jj@s`YZ@%`>=z+=kkugD zIk1!wjGHLHZq+qt4D=@k>fgSLnm&0fF7d!Tg1^q?f1bQa^$`%lz%Fc;_bI>cV{fTx zhFjLMzive<5SJuFpd;K)%dXyD<{e@6dj)Wf6ltnqIn;2KrJn$%d|kFq|MMF4*ZqjSub~{8`bKZm`^E#*qJ7n37A^(J&F)|9#KK!TjtX}>lL~h} zae!u~hXn@ug@u*Ff20o8Ms@=MSJ(aJ4Y%>*M;#9BiVwJq4Zn`j{{n^(2K?ivh&vs) z@rP_8Z|Wf&YWXmBXgE5lSn#hig+`wDd_qBcZw8A58bmPfV%80`zuf-Z-Zbx2*CHTe zH||uXGQtfN6mF?!L)&pZ3ZV;`QmA?DM&C05-6u*)LGh9_MzsaTo1*E@4y7 z-E?oqW|!ku~H7{I4i2m8Z#0B7EUwGvl zzSqOV1Q2HT_I0n{EXoXOPRP$<|9NFcT+i~+b*Y&Z9crHY3-=vH5w)@mb?INGQsJ(r zON{vC*X|--0Y+c(4)O?{8Q_<-R#Q;1kS3h$lt-R%T)Hw4&-T>e0awR#FcBZ_^2c(pEbUA?@ z6b4yVSo!doLOk3B!`N7H5Av_GCGWZ+YWey#YW(=W(hE-Ss7rj(#{ta_IH*Nfw04&L-5yDId3py0>nlRd*dl0e1ORdleA9`0-;ATDq7IopOjBL*GDB zj`CCv0MS?;wP4}ZpP`tAQv|!lXnm8un5Z~04~$(uL{r>8~GD;qGeIJfy(oZi{`cc z_6@5Zzr&rn#3hR&9_Yh2W*z9=M zFPVV>F#=@vNIc?^HDZEn7QVGNPw8S{lTgj4_8=P}7Vdr)^VU!8Qtr~*$>3nsx@Z;~ zIzG#%c@U~q1Xf+d++clU3=JxaAjAZ|qP#Ud0oq9^&LV@tIb_!|SrZ($N$$>1+Iu`E0MAL>;`hX9&xuKiHwQuijv>ks98)o7#Tb zg}>ELTr8Wa!j;RHBMK;rfT&v!>H-it+76J7w6FS6veX8P7q$5-wh!`@QVTc&@c_yR z0B_k^@>E2~zuq?TuD;}^z%RJn#A=FJgpS#1P+))%zLnY67O+!FZ}DikVW3>C59JmC zv1yGN>su_%u@{I9;BOEbTZoq;YPLMD zRd*aF>w@rGL6rDJ#|y4n7?u67Vu09mu}~iXTaj20<4|UUt*K^QY`^;5z`&3oAbFeb zOZaF=e+-0SUChKj5oDFGO(V{JoNYQL^&8>9T!L&rQv2dR9Mb}SAMBB25HOs+gDY68 zilO}YI}EKUbF>Ci^UBBJp|RNqd1D6P6DyAivWx|wZau9(eJ<}%#kKnu zYh@NxJFDPA7K0GjfN(=4e=0<_Vs}Pq5i#>t0!=K)g)mp#ykhIFzV~<`sH3lZuamNY z45Yu-2m}45#=Azd)Qtx{2Ega9cd%9RaKRb zP_|`kN6}R%TUED5Prgkk2(U^jG?B20!XBiVW3K~LP%5{Nl~EW%)xWYm)#t*-ffiHc z1E)zI%Ixf`_SHWjO8^>6SP#Iz{&wu#FVuQ5CTvFlMSHPr4bRQaPO_io zVK68fVO$sPTkK%5*-ULPB^+=2g{_#hW)x z00hW!U5}({65aG$KjSV6^9kGzCG57uE=abQ^(F0QS%;r__^@NBed$^bLUjYPYQjcW zATGR7Lwz-ii6?H;P#Qp7_R5D&2M>xR-4RPe3ytgobXT^aL;`i)6>NZ)zunPPa^U5Z z$G*B+zJ8s@{G96Fx?Mij-1(+c%*KL{IRV;C0lK|k zi2qh#>Fs6tMiy%|p&qeD6XXav8|}&)<=;i!7w>S@qu5&@Hc)c~p}~flx8?6x4me(J zak*RUCjxBTcwpoQD{$DL6z|8x!)8+hXQ)tz2dm5O{sn&Ka6^ERCRkl|X#M$1shiJ! z$lQEm^Nrb<by2aB7NyfVt! zbWahmx1rC{F?_&?t`D~h5IW;#1&5d}Qx*C^DrFX<*Hj_4&rPS3gMk5yi4LA}%iP!4 z$mb3DhdcW6cdUQ#$yu82C0#K9etPXo*Ri_lCvm5UIO3&;UY*c~Qrt!<&V+v@#QOlh zfB5MS>wl{$#<%W1GcYh9EG+2KS2C|#@nV6hzz;L~&o@?P{5FGjmate>0#*AWaXK#E z|4;1mX-|80TBISo2HOWLH+8a4i;3w;XbcP>1_nfkjz7R$)761md-GQ>s-%7natwf_ zQaJ6T=qh1=w$lkT^1_$Wl6XWX%1S}Zg3+KcpfO-zVBlC->^N=pT}MWL2j0ufFpLOIQm*qb48*>IdQ*AytQ;@o-wHR~mK) zp0t|NK>Rj(OpSq_Vqid6SZZ?6D6vEH?9{QbvdYIZ{~@|u@iSyv;@an+)5*a=^W0a( zmZ7WO(n&WTHWwB5pUNI+PZC>#uI8=FGzK&V3=Bkf)B(VVxO}!5P(hs+Zl_ZLkh{PV z*AB|RmtA398!9Kp{HFv=@C`bKf1 zVb9lTjRB1TCk6)e27;-;#XG{?rkZ$QEdRPrOIcVv>)=wrOy2>)l`1&WUBmnLvw{Mw zfTIBCf91#C63s}XRG6)yD;_-6a%rxn26r3jPCS}99PDzAd0z9v)oR*l`Gk{3%(LpT z3kP(5jh{U&^Qe98rU9MDU>tnF2}pk;)muE$ijXGbvipYMuV44)6b9W`&0oFH8Hemu zwSZlHz+&4TbuOJ24dMEc>chohDubZ8C-(($w39SwcQ0OD4_kzJdq{NOGa zMr9lmJJv5!-$zJI%WU@0T6W|ORQ=?iM72A6;?4%~U}cMR2l%A|*j=`#pfIMUL?u5rY&NawZW zZn7tJu&36R`7?yomX0n;Bmy7HXxzC)a+?$P=_=wNJ12~VNNbbC?tqfzm-q0^hEpj?7Hr7OFtt6w5-%I_$5@`0T;zpmQ! zM_GV96hn(mfFX+++Lp|f7%gz$GV1af5{skqp)<%oeu~^M z8$W$QJnNzXA|T>$mb2k5(moo1Dn~ryN+y3Hcoc$O!Micm0dVIOQPG~~Q2w?y&lH^B z#CBl~@7~L?;a5mKe8Kg5;j~knu5dUXaYY(GdW6FbbH5QIEC2{s-qwdnTSRV5#{+^) zE`z_)Rt5sg@rIA4XE17q*3>hRc&J|}_T;Sc2kME9x$3{?v21CsxHOkAugrEu#68&8 z0Mw}-Hxk4dujBY3kn4a5_$O>9F4{vJq^olJIn>VXdjReNmPK%VXrC#7GJzQIaz5k) zc4(OSfYi_3oL#=NJ9dMZwHxt6-o8`Z!$UCzA$`O3tEl0_hppyT04fCkG7WeeIkIBq z!)LKa<8OFf**s4pq%FeU*1milK{-G{6AQj;IW`E02W~exd|UK#Bt%3Wtbgk^A#SQJ z=2`v3B@zz=ZsL7wZ^4V@$c@XsrP}}fU8G|8?7VbFF*zpOS^puZRJU4H%(*Yo5Kza)Ga+ z|21)p_Lg~kYv2vD^zdt~(9Nh{$q^nwRB$p!P$h5CDCsxSV#Gqn3pQ$EzhnReZvxH` zIWWk+>JY!&<@H;!Pn1jpbqn`6oAXbTxk1LT`tGAenF1cRVc!*v9B2f7LY=zTZ$?wO zMHTIRKDYI7CiqD&E7q%|ZD18H{yshe?!MLe)fTwyf~3vbL{F1<;0lkvDdVJSii4g zPtQ210_tA-7oXf`t!cL@Zx3gVh`2}W-acN$LrVw@f$*pegcTHFqV8rHRQ^_ncAB5J zaD?QkO*eW2F$hS@?3W}0V&3NaTXKpH7Q(}g3*xf7e7RI$y4}Z(Mr0A}H5;3?9VDzR zXLw4-3abl-r6P;qJe>R^!4_WE7t*9~*E8%*B@MQl0JlS|7!VbSlvq&4x0aiz>WPbK z^*zUNINn{^`RYQciqO7b(HW5vv-wtYqS1%#+h+>RAvf`st!*pO$z)+5?FdNrglz~0 zXm)6L-#BI2^OBi{<_h9XgP*K{i3Qjn>{t-&A3L+*6p1cytR~l7WJa()TOc>+>6ssBY0y|7J zp-M)}Mvrn2+pS~`Uk6DCgld<`yPL;?VgQ8aD{nqPWFSqSKF-AK9YP)4DgYn|kc)`I z0KbEsr{*aY%&+!JL-qqArfw$oa-Cjt45Xb51WST#LDlrLt<_;8LkL_S$AOLqIPrj= z+FJm>x#{y~h5w3^kK3Osv6Xw<&tR(G&OV_k8TTsww^|!ciRCBWLq6Wz(h;EUTJzFB zI3&Q!ou&t2ZP`)(O#4j)DzN|vg5v??9RS~0L#}}M=>iGQzr}tRm0oKXzvmQ%Z;(>LEFgXtKD42FK4LkAQY!gx3-SP*#T%E8X za7Mr_JHSz8;JbGuOY!p+>!)?`EV)tG>q8B=?WfO~8hI$s7C235%hj8*gF=%Xg;PDC z9s+4rgDqs1klV~dz7n9v6Gr1822f>6R-)e3JVCG^=dc6ReW9cQfZRt6Qki(*wo=A6 zMIaU+9^ei%u?jK(!gBzORspyH;YMYLz=D~~w)|AG)I99vI-ML0q$L5t(h7HX@=80{ zKnNNl3JlM$hq6xjU3?N*sj_?ZjMY0A?G9$s*a3z~5Y)kAYo-E(jjR8CEVY@ehD7VY zA8PS_mpF={5<2{h=_!pA=@_t&FZeA*eba1@Q)wCd;VKqwpS1mF?pQ5s6^L0=9Zblf zyZ}{pXf=VQQ^iUQU@HOChUi~+st||>8`;(J?F*VyV8=GIY@_P&i==Me)I%t%Jj(x9 z!g9!4aASZ?2SD9GkPlW;{tm}TzWP!uEH$>u$L&H694xo-v0MUR-9XHff7Y<^Y$syK zV`^T!MmBcH*dUjxxc?MU&CO!&0tD<}u>r!&f&?J~VQ6PrN97~uvKsRuDMMX=+=f^t z6KSrILnP{fJ=gU4Ga@&UO8gaj7*+ZDx#k#1OBNQE1qX}p2b9^HaU?PRvQO*DUP9x5EL}WXV)4;w2 zvJ!k0K>fIbLSo?PVPU+6)LFQ4Pvz#Vd8v7;l*Cy>4Z{sD^S zi`hZTH*d1K0Tc`4Ohs{-GO zP?pU%RjJE2pzQV8kdcXioo$#iaYq_??$cu55#(DwZds6#_}88ymhm_!0&o+;-#s`B zA`{|J*9ZicH-+s(n5_r(Vz91pNB|V?cd_6E@*c!-0kJ~AFn$DKQAA8C>#w9^V8G*n z^khYFdr<=^=mT8^-TdPDVVTj+&{2AtMm1C#f;eOAMqn za~K;&EwfTq-+fF%;(_(b?Z6a-_j3JmniKy*_J({-jt6k)mT7=FJBSAsz^A&J=l{wl zv_~^$InWGRdDH&v`0(wFnX{g##=v05K-#jfuu;^$`Y)$Ge;2<-gk>Xc*4*eV;Cil% zle%l0i39}NIEet-Yw$xZcZ>J30^2ZC48DB^~-FJDi7cB|4Ko@RS9 zPhD4IU?^fB?QS60Fe)EDn_W;`+GFB@9}dQEy#?5pth((W7QPwkf!(!-06){9wF?4* zwLlFTj_kn7OC`WMyz0>ldOQSFitAXVbmeUaTf_s6MdOAK78*1LGzJD918H>wX`c6n zuvV0Fx89%!H;|q}sBRUjNnTH_tSjCd=*<|t={Hq%p*!Cb7ap*Gfdw*Rc|e;f+0f4V z|L`sg!#4uD9x_IT$w1)Q7}_p90azv@b~V^c$pri^ReYmVR@1Qj?+)NHB_8nr>uTir z&r{*9r*_wcF4q{)80ZHKq!j_d=@&ZaBQN+Lt0C^#tL4YWGDy@IMpcN_Eio1nRZ5Vb zX)=%iJEH6jz;R_2$7_HwS~38{sOr)4I6~f1t2+q8!Jg_0;J{D(Hkp-Raa_t_a)+OC zA7zypHV|F4*JT<58Uy`a&_!Nj(Wrm<}|NnlcE@fxKVvF}q&cHI781k)^%$#_)=W8WeBkD&8TfBAMI^-LrT$&f#Esm>o z$or5M2vU$0U9;pgwJ5IR8iV_JF58-pLou>1|4!_OQe({gU9F4OjQK_@XcS3rMeS{Y zREW&7Zy?I+j~c&T5wO7Gh47?b2*Zih!f6$2`f35)3zyzvSMc^uxB+a6%9IHT8{J0=!-Rnw= zeDPK+rRG8nh%W)R{UU>FBBw_;bv8%WCJr>2bw*IIZs%Le58O3BIvjot&~G|5r@t9E zKv#+4+kd{wFUw=n1G^$0K6oY}k*{Ce%qE_B%O*aO$3z_($!6@$RTuOWf^9;%Z#ltQ zWx01JY0V$>9~9YSi%)=(VCJ@%ncRsbrqz0MiR4;T;?U!j{W!6kpxu8dxhjzAPyYUk z{_V|({t4FfbNDd~G7V~PS`nOx>b4-5y^<$n&t~VjybG{nh?tN~u*IWqxO)Y1LT!px zn1g(+KxGfDC_4g+o!^=F*=!JP8@`8zrlAhUKz=Jwh16Zs5OTCmon&cC@2gQEZ7}Zm zt_DiALb62qMGE67C!*cyQLe%_daI*wY=LFO57R6xdNHV^?9 zo7lfG?Bo91jyMn@1!>6A8kq!F-@8TrkXSfN&vP!{$pq1{i@-pe!Sojns&wdhDC;{6 ze^vddjC0THH?U&ZmbpTzvS4Bfyjd!AfQF$$PZO_Y#Cz?*Q|V2;S5!byvZGLe+7dI7=`gKJIEkH%&-`bN~5t z;TSi}3=X6Z4VlMjIh=JB0Sqq;#AZk8cLu2uy;7~rER#hZqrDm|LZ`geT+QI{CW5`yEquT2L~ zEdE<^zl=pNik=9kD1t}1GPy)swGRTk&$TZdM`DRzpXo=*x=tO zEFeJwv?8(3<968G#zv<(F>>z$Fcb4v5X$FpZy|%!a#=43<(E-ptVh>S6Lg>#6({gy z7Tex)jZvmAS#HliT$>31Rs}Ob?>ob@tpBdE&rtVFp{x}^Le8zMtUM^dc#K7{cK)<) z{aeJMk(S0(5zDg3^@zY9x)KPx2`bU}p^x{I+a`sEWc`)u-+(w??r@GI|L<)~dZH#| z9(b13x?;@i&0BkSRbF)_xpsdr=g{Go>y;C$s;U%Zx`Ik)TU%T0QW3dtx>YtUwFykeD{&^yRWXH37-eX5#$pV>4ZAx$Cqq7%2 zb8F#+&-uzSlV-{->%7Ob_Z88Uud=)>`@Wq0%?l$ruA4Mu%@p)%3i7`-?FfqX4?b20 zu4Kqur!{+{#=F@0n%F2ot&mxaZjLu4FPTP47p*(`1nVv4k4|4IV{4={R#d-lH0S-= zN>A(en7W<*E51*Wy~xQ}_B)LAV*@wz6rdiq(d-)LCrQw86RIA02OHX}9Z6;9i8KTM zHOv$8tc`1s1rKqLC&GB^Pj5^R#U>2xWlpTkC6hf`rRg+UymW5Bv3^@b`eFNz%fqY7 z(%Rt0G!d}Yz`?2PqUS}2qF6p?wE8EbMb{WnD20O|~Qe=t=^L+}Kr;w7b`< zAJKV$*i5ncGX`T}7W!{kfV4BSnvgdQHqg$;KKE?hSW~{obWwEN7RNdul$rd|6+fo& zqb}k5kWXKCE?RywbDAup1AmL`4)|^GiH6EN<)QLjy@rw$(L|9n;3-fL4m;NGWFfy2 z5hk{i2RIw>GS6FqDqM*a=m}?oys{yd(?s`@(e0IeRi}%HIiSjBkQL^H2Jp?|?JTd7 z$d8U-omN~P^U=B)r|j)W){>_nm7nq6IkB^ZGtQEudeH_O5*!N@3H2+ZvxyngR; z#m9G?=S2&o!skD7=nBqKmsM}NKI$WD6XpaSG>X`=LO_OjW-;lO^J?&G-vBVLO8<_t zk8Qg32vghVFkx=64js9)Pedv=P+LeSHL&ZI)g|aC+bbp@+QNvt{ee^{jmeE^ZGWmD zzvEr{AHAtr$Ln)N)cAfGP@u4`^#+VsktnJO~$X(5xN^|SjfNH&gfw{o?&Ltt&0ca zNS&*a^|FxopY-3wg|tl(|8Y&%yY|?|M%8dUsu}h?pmEp}wd%F!t8DKXHhC#$(w_P3<@;N#ty?Vl8l%t_2gAmLu~gd3 z^Rc1Wto{`zx6iXXt=PV0t^o%DB=crFeY)5F(AxNl--+3+_Ke4MLXkmQc&5T~hQU`Y zlLxuOQk3K0W<21fkEOG5NrFy?3Le9kwXZ!Ye3@$2j1}K%D#3QT<9!sDdxU-Kb`Nq# z>lT+2FC$>dR9p1EH#DPbs`Z%NXcfAnha9Jg1pj%3(5oZ!o|;v;ChA6N%8ppa$#1nqFvmMbruHn(=zOu@Z~d? zw8n0Ln8gB8-|)Ia(VAGHXUjCq0~87f5mLv)8J%HE2#mO)HT@@NT_TAU9W9+Pd<#mO zM^EKN9gY;{x%#xbIqODwQ>bu4xwG<*x?}Q$uKBzl9q+DlkK5m+g!BP?pdc_yfoyXr zW@N$yOnu`!SbamvS8YFJUnHnhKiY+pLMh$FO7xShAxQ*{;pWut!_)+aH{p1P8G75T z?5OD|pRz7I?^f_J($;d=i zpl8vWk7a7?5WvH9dZofkbhD#=@z<3bz-Cv%7GpAiuL@VTPu}D^k%<5W;!z(%KEZG~ zteBW`{4{uS0!HFUpvS?LLxvHkm!ID{hZ}-rW8`+~4Ur&xevH!>KW0PIy`^gt{T=>6 zlT~~-7lYSz_-|64?2eH`i2OU?_$_E^;$bB0KbqGtke3#yh%|@b%|aD&OlRz006$^W z>WB2P-~ta&T+=!DwZyl)*y$b<6z-2UH?T%f;|>7tyX0F2&GoZQ)(1vm%XT3U5(vT3 zlsI)-H?pQA>`dIaFJll+bwNw%{hHOQV=kVKCSW8;45a4Rb9~orJssEYdU`F^D2?L+ zX z(?Q1UB>#mRp+kG^Zc~8krteGTu#_rksjoQ~BH}Rprz3_k?@KP&?DQ`0Y;9b>LE8d( zxWN0S!czt+^V$1)8a04K*FM{tEiv05R3PUsU$1I^5)so&U7QLHB&`$8yTREtv14{O z4`CBZLB@H#vnli0&-u~zH@nWLTc0YsHb2Y7Twa^ zu)^SF+=YjWe*ISb8Ve+Jo@$a+%tEr`9IHde9x4L5BJmvIhD#J`dte!9*K}E^1nkR@ zwb^0#Rd<%o_+ZGQEPC)&XJd0N4~?#?W%?b|_AGXH7JM{?N?z&gd`qjXA={P(X#Dbk z(2Jk==Q7fUlHPJ(bPL6soPfU)+=ht}U=*5LT9O6^UgQJjm`IrUzr{J{TPbug43<&@ zqxwxN){L-MA|JreJaW;W4yY4|0WS?!vqUc$J}hJ;QbawHGlijUNNPM(@NhT=qy1n6 zEXd;|N0SQ>RbLGCvbk;Bdo%9F*WG!)5&OZ;w>u)2=c#KyU11pJBojY@uItQ`A^MpO zd>;Nl`MObb%ZIiLk;~nGuE7WBP~)kYzdfPgt}?M1_S^Or);kG1n+ttT5_QF|$$CiG z#qPfz*TpdrS&q)cT;^0E9Da$KO2&~e2j1K*8f@lGBC4%xcFXTZv3@SMTaAES@y8Ao zsA=2jp~F z#^{(!ej>TZyys7+JN^4I?WaTw=>#r4rq=Es;P;6y+(Xz;e_Dt|)>8q8NbEOSQo1T- zV><;jR@44Zpl_=$%MJ@VD6Y7=BQHz58dn>KQ4i}hID@O;y)OR6x#Nzh#?Hfdj9bmk zvwz3C!P>EZka!60+t!Gi%ki05{M~sZACe_X=(IMXHu<~M{;=i@@_adK#&m_{!EPwO zeA-I_1s6iO=!0>U8G{Fd^Zv{R+hdJ@8*V+RT1{f{RHeOASSdIttp;X!)jZxpd7oZ2 zMXOPR6WiVqm+ABvu_KQ3RTOQ2tpGY_0#^?_1%%srdtTekS4@3GwGT3i!m#%o?_POs zEGs)Q&>v~p5ZpUO+Jq2}K@z9I@AFx{C=f`GgZ9=-0a&yPH>6NwM32;_*vgYBe&iRI zcF$%k@r2)@YsTmp88KgCa{S(ZW6F+h;G8m`q1V)qQC-OJX?eXYalye2#=^Sde{H|s z*eetS|2tuP>f$su%%C%tJzvaKm6zR~DAX7NtG^%83?eqQEC9-40@akqgo9QE-#O3c z*{wkxz(PVFQE*Pb{xew|<>;>}N51~cHU9Xg@Upu5?=$B+$n+MBO%hZ1e`aX}tHLIT z2|q{QpQ4qUnX#(;y}tei#9_R9ZQW0oXm@)5_D*3ong8HBUYl-*01rs^VA^u!FqW8F zz-8R5d4OhoH0{ZyCL0lpKQDg3qchid4uVHZ=UYIxa61V~g&bb_#=BbFR_4PpMbN>x z&H0l|^$On^)l*8OZsAk#D`C#qx57PJ3p9=)2u&$hH_|ORk?AZr5i<1(O(X1}tOj84 zY~;f1T&c*$On0&_F`lngH>o*wsHkk$U9V0U-wKmGllK zFY*jI<{3>M6tKp8kGo3M%<48=;X#IT%z_76R;e~B#4ILv;yAznNwRs}+oBr=H3lP- zLj7XYw5e<)f`U3N)6n<$a>{Ag77*k2{`!Qn6Y$=h4}so8w6$ry(eAOpHZd`A`?wp2 z7=_r3{z5qp4cr>4dijUOYbmT9L9vE*W%?#5{^g-!WFS}&V z(3yUm9*eTw?6a%><|4(VDkvZ-(v1L_rwE3vkB71o3Lc07g5mIebyw4S)n?Xj|0%+U zC322oV^}hJ31zfwSrBuoMK8}^C`M2G6(FV9fFTU@Bopb*ah5v?$2qCHK99U@;ezMy z)MovVvfa#vYb}701BQnv0_Fq<|mt&f0TbUCjb6v5b{fqexmEZp-vCEtim&f&9OFx{8 z(`>*2&R`wl^VwSomnr+`ir#+ZNd1TOeEPU@^E{q3FH%kUWxn8KUZCC<&)Snzj=jpn z?UQ?`1Fe&gZfBsc4g$}zp6l|L=$Bky#7~gnU_9R~rMG#9m?2`(&(O+yppE#D5Vp0U z7WNIL0+2&G$ts2_$+HKOJ3w>mz(_8uFk)Ay8(;vvE(%timalz1uY~b ziyyq1!xQgyip_`PNVi4VGoWUv{hWV8`HX~h>J9`qu!f@9GZUI$9xnDK zSxEi=+{JhNQ>r?8QmXi=*tf0^6_;z9G1|FpfMw==LL#blLXyH&emys-{!j%U{>}@~ zdB`vTAc!eMpd1Ij8C;+&98N-Rg*R>8E+bxeF8!aHW@u>ysAYL~UMKU`|H)}o$dLFw zy=Cr4atLbgw#=Vx`k%Q@``aqr0GF^d3`}~bCm=t(;p+S#RTy|7xH9xyabOJNsPa4& z3p6r%`txyY#i<7}Ap`xTz|0hQB~Y`=C}qY+bC)N-3tH3l(V1azZxUOKfA?`8H{(SaV;sg zIKh8OPx-cv^6f=-L{ES|@7az@Q8eNC%V`UJoWKBKEpyBb?+^CjfhJN(yankSgh;M; zxs3$=&j&N9sqRkwIywbJ$hYv-zpsw(%-ZGbOdg)2xVzP41u`Gsd}}MDvH)ko5)p*3 zcXwShHI{u@er%KZH|L6PAkj}xgLYrvU7B567~$qwirGE_AV2ew{3fQ5pPy?ojZGNH zvw7PEti-ad1sySg<-Wh})uQ$=t?aPW#w!vq>7sbS9Ii?f?BWBC40{Z^Bu5z@P;{hv zS3)yvO3gM-+DG2{jd;a(VCl=(@Z)mrmYcEmeuoe;cw`Y#kDsyc)ZwB4sz5Vszug6j z824DoS+fIO%PpbN`jeys9p5Mcw+v4W+wLGrcTUuL#1P~ywr;J6kkJNMeQ338XCn{p zp&ul1eg1hRf5}x4?5DjG>N&s_(#VoMV9D!i^AUVj^4g-KMRNrLEM5$|tA>ofKeaoK zTvX?;$=}zc5E|W^1MW?wvBheRD>%HTXg9zoOU)x_JiVItbh21-9;4$9gmL_V`go)# zlJ(NNJ9cjx_sg)>;=wT5A@7Dlx|2oiXq5^c`lFTJ+2H$Rfv`AUd^)0`yufFHHIRa; zu-~;v;ADP8zy5=Qxt#uT-hfW|A;^T=Sg`Xc$9;CQXB3tkZv1yBMdPNHhd0I+`#sK7 zD@&@>b;hRAogR(h?=}Is0M_dK*aI`Uny++z$nbF9k}ijaXqfKTBeeUNS-%wCfU?bL zy-3%2g8Ms=Q3D^StRUjZP~r7PWBtDK>42FQMeQ#=K4qY6BRiMRI}*1qpWt~lo_$F! zAonD4pyjb^_-vKnYCf{hce*!z@h1v7klpyPsdDw6I(%Ty>TsB3Kj1^VQeOgan99Ez z(|Ah>J;Zr=>8cXym-Xd-AVJJrpCOgyIPqc*8xYx@vEH>?dHnWWX6GHErsA_&?0iJE z&b~?Pz~`Ptyw2u-5+Mh>b67v8BlQ{Zk_(SX%3y%IInca$-7L8qic?Fu8JPsTX%_tI zxV1CkD$-4b@c7VE40~PK0i9!dV>W3gC#U|NWFiAg;7&P0DU7+fb$wxmS?zJp@K58+ zQNQ{0Y8B4#?hMgby*Cm4atS!;e)IjrM8UJ`^8QkOE)Xc0dB?XqacKcWFziy$4S8NU zO!8Vo0>-)Jl4=xc9#bv=QhGffzGfG_-otk8sxOTRIRN}*VtvaoUQFk96R>6+Y%dM~ z$SSr)Jo(=JsE$$gm>40)jG5A7X*|$H_hSx^3m%?y#33CDhYPNqDFF!dn;*t*a!`5eTWB0s0m;7QYi_gHlvJas9Lt!cFCu_(;XgY>@= zOX*4hHkDPoH+O6{eP)OBT0+P6))Gw3TX>uCHM(LMwPH#PWto#@ffs5aazE=1DNGg_ z9(Jb+pTDFnof1(Pzr@tKu0tPS34EWwU&myd&i5jhUT@bwF0gNoM#$5xuW|VCi%`*&<5|T3~GzpZ~THqa^GeB=p+8YX=RZn21!bEyxFc6F14W^=9pVF z7D5c2-*P?p791Wlzi$yZdpl4MbG4<@Q?q+v+*=oOi(aN9lRP_h5OZen+YX6;TVZ3G zp!7Y|0ZR0WoH44_b4bJFpO< z4CW6s;?SSPn7uPJ$;KuQRwWnsAZZOnxx4%XFr#Pyfl~gQy-jH~&@05fl~BY(QCnXdi4R?dG&=LquZGIl-cHA%q)WdM$`s~Z09<-W1Qei0wi6*iF` zJAapp#9E!4^e`9G_KUkf;z-_cYA6SDJG2Hq z_V-x^$QRuY*nGxB-N5Q$7>n$ygUBcZq*r2b&uqCvk8T z7>sW`8+{ZcKVj{05w`rkj&O9Jj}!b3!20qJgcLY5^T0WE%={t{4~}CtauMy)$nqNB z%Y4^Mb7DxbGs?1+u~>+bV)f_TLTMo!ekS!9uw%!fm7gNJ)rP~43Y^B^N+3@QynOLM zNn?%6ac~diUD{%uQYwk$rs99RNPbh_xAo}=PP;?dl+^6W!{&r$evg>~4(0H+Ar;BK zW-erEn0s>DqF>k5ZKe%!Hw0^uo0?qU#TRu=>h#kTB~bBQlEB?@f1bCI!-$eBb1B-a3-V2_YrX4iSyKP@H|VBK`*sn_~K7j zZt-EuHvRzi{pjivQILN5@)u&HFQO>|R9Cxo7}MZ*nDiJ|?XR6itnzW6mBK{UROML> zs2s6(?BAECu{GCWm*f-C5;V-;Ab!PgR z|Ev$N^=lQ|EyruUEA=&F@o&MXx;_e@YQ3c>44ZFxxQ5bYpk1Sykp}C05pb*@r_jYk z^Dd7ef%-(!;vAjS$e_}N{;t2_v{8!B4kqm9K$loujdQvoFAn`l;ScX#De-Iecx4EP z04Jt~aj&38J0X*pU&NX>)(lTi4MIDov=zuKO}?PbRgEv8*fM$Gd!^kkzW+32FZPL0 zVK?SpFL|KN)l#`ST!#cUv}k64r?_QmV%kN%!?5LhUK zuhSlK3NBnLf9{S=5XwTzb?j2CRs%n0TiUMIVtvzY9XRDUC;)ph8u9 z?eV<(H&XAILt&gloVgq&*euK$2cI7Qo5d4ZRK11~z@}_2c{88<5r5{NM6Akk1y0sW z2=}$;ekjplJoLUyP@!3a4?djmxToTs0JTm_^Ms5{x3puYI2fv5k1hI|h7_%uabpiZ zbd$4)@bGDid)oN9^*`F#<(PSr-9 zI`X{5mseHaR$);joiu+gpL?wOTG#J#xXsEX&~5F$qsA`@vFhUX5DN@TDq55a^~X}~ zZ+vS@_HW4giAi3GVY;EZ}dJem{%t+~fADAD81KL_n*0n#>y{)c%y@T)-Re z_-1VVv}J0fvw!iDzU1TnmB%=yDW)YmYDJ3$!^#*XkT)!15KnzbZ<058air@HzOFr( zDGf7emO?^k&Is?7={1lqkNwvo)QF5Ej?7BdAMx~?x|V#uz9M7*DXP)Saa)|R>jE-E zn+Av)R}7>WmTG1l3aJTQ4cgjIb?-dkvlEZlCH^r(9|L1G8dr zdyft+0~N7hRE55wG`_!LlLjtJC%`*Uzvx7n1svGd6_-4RTBj(_7vNr!@8XizRYnOH zucTIE27MXUf`M$gL0V7(+B~LxVF6?fh1y-e2Wkb}HY|M)?SdQ#Dl6&`182}}(FhYA z=WR<*=nn{4f*g9;uaQ`M16Mk`Qtb+S6Fu#2>;WEM>>}K*F!p^^4(Zn|48#xr58>y! z=8`nxd1xH(ZiwsOuW@nJ)$qAi3rV(&?AZuS{0yQokIRbNjp7tGW^+9<{5ZPH9<^Ev zZK)x@BfuEiU8<=avxh2>L4W8v)$HbEc#JNlAQ_L$ZqBrNJtMgN#i8-&XdrM`6)WJo zaj~e`YIKu)RhDsJd;r(F4A6NTYm%HA9TFlz!a#IAz^sR76RYst(@lx0Nxy(BE;(%h zrSe97MSbE2Fo4>nqKe4cQgEFL%j%n*riq5}ibhmp94u!jv5C3o#xt~`dqOcbIttlu z9?nk-jugI*ZY+85FzquMsY2pZm^dC8=Tj#s>#_{MGCptp;_EajkRE zF^KaZ94-dI)mFE-8fqwIj62hZ*$rf1a+U4<%dsFj($TUxoBCiWNO!LBy)c|$YVP86BU5qY!#Ye;hEN7vaugEk;dela zO5{_2$^?P>qsTC_SNL%;i;CJ_=^QO_S`#i_@Bjpyf0n-pKW+DlyoZYKVM%1hS7)NW z!HR86qC3xPh((%O2rW{?zh~N@BUq*~Y25VBj;t7nXzjcAMWbZ#)h5{M(orh8Rt~Tq zpxWs{4*bj+6YwTx!)-c$S9PW;RjsmY4_mD`5R?12sQzwF9@2}N#c!#{z zRZT~a$MIi@5a5VcwS?_2g(~`64+i{0u{M4nT~ZHx&fAC)!V3{Su`ZI?{SO;e1LP~^ zHdR?edB1WN^Pow{f?`~Z8e-E!5*@Oy7gLMoz@qQY7mwXyPq*pR;nm5b8m;7e^qIfB|iuYr`;1 zy~&Hrc>_sR04l}#?>?J2@oMo%sq8=NJkUYQ0eYtTy-4g*NLkklw;T@*m2^1>+eV@B;%av%=_?g~g`f zO_|U32ZjP#?B$Osa4ZF~t>6G8bo-*_p{xHgX~MbU8?EfLm= zo<+%Pwg7nvkonc$^I5!YFpeYvq~X(z4GTmsHbF$)DBjJqR2fwGF4dr92Z&dE%Y@5y znj1g>Z!a$yHsGi&DS)wEJ?+cmt{J1TlUt)z*vss*y z>I)=}GwHaguz%@}G3-x^bHx}Pv>R?BoZg@pd~|S%@=Dk89R7%CUaC44=UbN%M;MG^ zx>N%*hLXM9fn>pq>tHYt>@c&5^15kfzE{y2Oj^v-^&^8k9z@IBm0Z9aUN`o0^4Lgf z{U=QRdk{m?cLW%cS$0IX9+^e_1#^u?m+dPWj8fsoOxNX^8(Y23ZMnkEmFEPU6TJvx z(a~`HgF){?;gQX@6+VI#7yd8yC6KU7JYbzMTh-G3ygI`gWViu^ZUPgGHAz6K!O1+Y zXa3nlTJWU@&?btk03VESb-S3Z3LIL>?_dod8;vYRQtYl^@}{u8rAgVZxX$D@H*t+o zrh;sKe~fU3fE>poSsUbrj9%b460b^AE_=TS7I4@l%B6Ct^(hI*D?Z@!y$GUC0WnO$of-y@)>9l}UEH3%T(-Y&y3bk`xq!3D3{Xy(hSwL^>Qr-Rp)`YHQw%-?OdTZvOa}j~T zRuxpN@1CeuRvdA>a-*@~883e3u?NknnQnCN`ML5RDtOBle@!SY@Ix$ShNO;5Kg;ys z?$CXjt==Z>&XjoPGEOgNsWG37G4Me3y-5mA2O&EO-F8$#VA}1xNUvO~>}tKjcI#j@ z5$1nEL{lR}5jg2O8ez?6@Kq$=K4W5n-d^J4j_~Ce#|GE?pA}jAG>%@?Mn^^>c(6rn z-!;REIlGx7`;kTt-S3?QnTl^t@77v-0nJy>sF-vN?X{%CIzq)Oa%iZMDm#vGm>sxR zX7vBK7_vIfn}LEPwws@Oy(j0$czxU4)?wghKBEpspUwQVbl!XSJlEN_9Z1x~({jXq zY{=;9@4i1lx~Rvgmo17XU5cX)vu)H}j6fK>T`br?3T2yyj5?_2@0V4|b=_{kU&`00OOR#i2r%YBEq!u6#n&|FKsU8^0iGF z&va^Nluo4*(JnU{tdj2$UL0$@3ao4v4lHz)%($$Ww`G{$+zs#jvK1L(=Fj9{srR+p zlO`Yt*zwg2j@K}CbB~7A0G(Ij5VZev-yzBWO2;}frZr#WTG)b<=JCPXsg$IzBw>0+H85$ z@|gdDiYTudJp=LCAT6>QS0gkE3~kelO*m8=7gAPo6Kz@NRgg}SZDT0|GzD;m$ByM* z3KTE|!tT@I@Hx34s5b)V0e)o!mqUOJNIL z1JoIrg58Zn>hy1>idu#sk!K!(ER!|~Vy8gI24A17mbf_e+nT;1Vl!VBSI%nkC%kEn z_`O}#ho2BgWZ#*QgiVC4>DLS73j2@ z)3Hmi%<>ECe?{BrX(s1wb9Q@*jQQ+-*r~U?e@#M7!cuxjTpxyi-W0T((BAJf-^I0Z z<+g>W&h&Jqrcj&PWhgLZCfiB>WcJ$R!TXI44vmRrpAg0LQ@CJY#t)0fCU29KXvAI) z)>}$m&C<%Q#0YR7NEsR)7X$h}5uu^G?jaOpPAA6SL7@IG#vXAT5=~gB(@eN@!sqHd zu30qlTc7X;YtD#xNsj+GJ_Kfb1>zd7B{7Y-kBLi9kNhH>7zqI5d*u3|0jp8!O|uYJ zm8|z)KH@I-*~#T|bJqxymMaH?d!EM0C)lu5GnW++=zY0PUGw$FYnIn&#dDNsaTR`m zRS6rEB$|~1x+I&b?{wtW=%v$L@3nuDf$~m*!s|vh%~1Zws$#eUUjS&*WJmR@N1H-w zYj0%Po_ux>?_bqxhf7$>9C(AV@ualEJI3;I*1boj)f%3iR2=qziXq-@x&ju=sDm`x zNJkPVSoSZ0Q6$)3z|cz9h|i4|WeN)IE$55nf~(?vd)1tm;)pQGkYxt^I|Hb&vs&wc zpi{#hIY0xa&50Jo@cZ(`!8ZFQQjXlOTpYj|kx(~Ter$5K)JwH{3T*E{3oW5}Op{3B zp|h)%1h`+BJlkaX;1BV;vPQY*cDT*4eB+QJsHm2M3LYEv=wl| z#ti7Yb?`m3cVv*+Klvo{B3^GM=4I?8pWWyr79 zgU!L(#gDU!Y{SXPug{g$0G4(0jr!fu6Gjt{!%1JM;VLEZP54hS=NG$P zeQ4&h#$or};Y&MioJKd8V)zb>$E<1S1 zVg{EpFHhAZL}8%qVuA?!w~gO{>0%e#*d(zrJBe>haw1)+y%LdeL`+mYAo-L5yy=Ii zfNsSX7lNTL!v4#lp*4bRhXe@B8AjZ0wXH0o$e++$h{_Q9kh|Dqk6n~r?u>wm!$@}3 znP(Sh#=nk7k|MH7m^feL08uR0jBI46^ql<*-oG%U1RZzxo79#YC*%HZHR0T=`r^+M z5D@pVhW>Hmrwci|)8(y>Zks^rsFY7IqUH0bqKVoa5jM=#&T>wlkZk5&n?RiIJrK{{1vDEI+9p{ zh@|MHgI&nc#!bD4f#@&}~{Q5IYE!XufhogIuT0SeT}|GXl|$*l#d#<^EXLjzTi zCRi6f*)@5F%|;(vp-OybSVCks=FO%oYQi@b2=nzl1eh@gpIBAS7EnaR>8(@LGiDPj?|+3_00i#6D2Mi@)SA&Io0Y- zX7CylJjbz1lG*=~@__#I-j2#c3I!5ZonBbrzbaN{$QiKL$%@qG`>+@i#_~%*H;44S ztN=_g0`{3xO0pUdOQkw;rHMw<5*ZOnoSUWAi_VzW!;J?%=#voo1?%++T}%Ib@&Oa- z@?Cz?1hRA0^alASVU2cI9`-zMAVNXsR@?`+meiK>p#z)_)>2t;N|&r%C&uW;t$=w! z*1&tiIr&nKNXQZjoAs?cpLU(dblo6ne-k5X9rMT-pm^N9`*~SR&4>UgiJTV3NRn7T z{1FIjU;W8^$>4A>`X$kzE6>he!JAUv;qQuK_v$B@|xTGpR-Vq$0k^4hhp-N?64?%faM9sGvB0v94L>|3e>}n;wjh-4S z5qIrwZ%biaPn6RbX^`=yKhoJqE-cEq*=~L1b#r8i2(#!JJ4-GP%{(Ga{4>(fu>O+V zxRLwf*HLJ2Q}rWa(8-#5hWjtbnH*#JBJG|*4)cCv?4fF0TF=sRdzc#O7)x2vBgNlk zVg0MT?I)>gh9{EbnV8pF8~B7w*=gDR=7L8lzV@S)Fi4d|Ed;aEndoU4@?rE#}S$_jnO|{6~jusrn=XSEp&I=@%`bz2Z zN}lmqu?D2LQH_t=wVoNRGKLjvQC@y`Vi@DG zafy*2;>XagNAn~@BMnHAA`>o#+3|w*3BQpd-<(7)pK_Qg9TdN=u3YGFYYjG3?;6OZhE@Fu|B}zSU;7EUeQ3yFB zd@h7UGT_)p5ir=DpyX*d;Ay(az=^^|3&`OA{SzvbBbksUd<>dD7!(plcL3i1;Q?Ea Y*tp=i;eSmu!GJ%RFA5S>VupeL1KT2tUH||9 literal 0 HcmV?d00001 diff --git a/setup.py b/setup.py index 2fa199560..95e4de8ce 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,9 @@ 'extension': [ "sfaira_extension", ], + 'zenodo': [ + "requests" + ], 'docs': [ 'sphinx', 'sphinx-autodoc-typehints', diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 924940aaa..50910f0a2 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,11 +1,25 @@ +# -*- coding: utf-8 -*- +"""A Data and Model Zoo for Single-Cell Genomics.""" + from ._version import get_versions __version__ = get_versions()['version'] del get_versions - +__maintainer__ = ', '.join([ + "Leander Dony", + "David S. Fischer" +]) __author__ = ', '.join([ - 'theislab' + "Leander Dony", + "David S. Fischer" ]) __email__ = ', '.join([ - 'david.fischer@helmholtz-muenchen.de' -]) \ No newline at end of file + "leander.dony@helmholtz-muenchen.de", + "david.fischer@helmholtz-muenchen.de" +]) + +import sfaira.data +import sfaira.genomes +import sfaira.models +import sfaira.train +import sfaira.interface as ui diff --git a/sfaira/api/__init__.py b/sfaira/api/__init__.py index 2caf3b54c..e69de29bb 100644 --- a/sfaira/api/__init__.py +++ b/sfaira/api/__init__.py @@ -1,6 +0,0 @@ -from . import consts -from . import data -from . import genomes -from . import models -from . import train -from . import ui diff --git a/sfaira/api/data.py b/sfaira/api/data.py deleted file mode 100644 index 4ff0d90f7..000000000 --- a/sfaira/api/data.py +++ /dev/null @@ -1,3 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase, DatasetSuperGroup -from sfaira.data import mouse -from sfaira.data import human diff --git a/sfaira/api/genomes.py b/sfaira/api/genomes.py deleted file mode 100644 index 1d3783d36..000000000 --- a/sfaira/api/genomes.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.genomes import ExtractFeatureListEnsemble diff --git a/sfaira/api/models.py b/sfaira/api/models.py deleted file mode 100644 index 9565f986d..000000000 --- a/sfaira/api/models.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.models import celltype -from sfaira.models import embedding diff --git a/sfaira/api/train.py b/sfaira/api/train.py deleted file mode 100644 index 41b083cf9..000000000 --- a/sfaira/api/train.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.train import GridsearchContainer, SummarizeGridsearchEmbedding, SummarizeGridsearchCelltype -from sfaira.train import TrainModelEmbedding, TrainModelCelltype, TargetZoos diff --git a/sfaira/api/ui.py b/sfaira/api/ui.py deleted file mode 100644 index 135b63440..000000000 --- a/sfaira/api/ui.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.interface import UserInterface diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 209809e16..896eb5de2 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,3 +1,4 @@ from .base import DatasetBase, DatasetGroupBase, DatasetSuperGroup from . import mouse -from . import human \ No newline at end of file +from . import human +from .interactive import DatasetInteractive diff --git a/sfaira/data/base.py b/sfaira/data/base.py index cb7035a44..c6b55ac21 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -98,10 +98,11 @@ def load( self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, - celltype_version=celltype_version - ) + if ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: + self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, + celltype_version=celltype_version + ) # Remove version tag on ensembl gene ID so that different versions are superimposed downstream: if remove_gene_version: @@ -157,7 +158,7 @@ def load( elif isinstance(self.adata.X, scipy.sparse.spmatrix): x = self.adata.X.tocsc() else: - raise ValueError("data type %s not recognized" % type(self.adata.X)) + raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep data_ids = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values @@ -353,11 +354,11 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar ])) ) else: - raise ValueError("did not reccognize backed AnnData.X format %s" % type(adata_backed.X)) + raise ValueError(f"Did not reccognize backed AnnData.X format {type(adata_backed.X)}") def set_unkown_class_id(self, ids: list): """ - Sets list of custom identifiers of unkown cell types in adata.obs["cell_ontology_class"] to the target one. + Sets list of custom identifiers of unknown cell types in adata.obs["cell_ontology_class"] to the target one. :param ids: IDs in adata.obs["cell_ontology_class"] to replace. :return: @@ -384,7 +385,7 @@ def _set_genome(self, genome=genome ) else: - raise ValueError("genomes %s not recognised. please provide valid genomes." % genome) + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") self.genome_container = g @@ -908,7 +909,7 @@ def get_gc( genome=genome ) else: - raise ValueError("genomes %s not recognised. please provide valid genomes." % genome) + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") return g def ncells(self, annotated_only: bool = False): @@ -1016,7 +1017,10 @@ def load_all_tobacked( self.adata.filename = fn_backed # setting this attribute switches this anndata to a backed object # Note that setting .filename automatically redefines .X as dense, so we have to redefine it as sparse: if not as_dense: - self.adata.X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse + X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse + X.indices = X.indices.astype(np.int64) + X.indptr = X.indptr.astype(np.int64) + self.adata.X = X keys = [ ADATA_IDS_SFAIRA.author, ADATA_IDS_SFAIRA.year, diff --git a/sfaira/data/human/adipose/human_adipose.py b/sfaira/data/human/adipose/human_adipose.py index b47b77d89..9994507ab 100644 --- a/sfaira/data/human/adipose/human_adipose.py +++ b/sfaira/data/human/adipose/human_adipose.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupAdipose().datasets) + from sfaira_extension.data.human import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose().datasets) except ImportError: pass diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index 35ee5f198..cc711c8b0 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdipose' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adipose/hcl_AdultAdipose_1.h5ad") + fn = os.path.join(self.path, "human", "adipose", "hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland.py b/sfaira/data/human/adrenalgland/human_adrenalgland.py index 8fad6089d..234a563a9 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland.py @@ -30,7 +30,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupAdrenalgland().datasets) + from sfaira_extension.data.human import DatasetGroupAdrenalgland + self.datasets.update(DatasetGroupAdrenalgland().datasets) except ImportError: pass diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index f7ff1aea5..06db980fe 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'NeonatalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_NeonatalAdrenalGland_1.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 008f50462..fcd778b07 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_2.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index e4b975df6..0e110e5a0 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_3.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 4a8e189ce..7772eb398 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdrenalGland' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_3.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 17c79d022..2314f8b43 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalAdrenalGland' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_FetalAdrenalGland_4.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 4a0eb5e51..37069e26c 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultAdrenalGland' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/adrenalgland/hcl_AdultAdrenalGland_2.h5ad") + fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/artery/human_artery.py b/sfaira/data/human/artery/human_artery.py index 93af2dd06..4575d1b6a 100644 --- a/sfaira/data/human/artery/human_artery.py +++ b/sfaira/data/human/artery/human_artery.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupArtery().datasets) + from sfaira_extension.data.human import DatasetGroupArtery + self.datasets.update(DatasetGroupArtery().datasets) except ImportError: pass diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 9b05fe785..273df1b58 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultArtery' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/artery/hcl_AdultArtery_1.h5ad") + fn = os.path.join(self.path, "human", "artery", "hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder.py b/sfaira/data/human/bladder/human_bladder.py index e9bf573ff..d31620a7f 100644 --- a/sfaira/data/human/bladder/human_bladder.py +++ b/sfaira/data/human/bladder/human_bladder.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBladder().datasets) + from sfaira_extension.data.human import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder().datasets) except ImportError: pass diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index b8969b60f..de82b63e6 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_1.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 281512488..5d4c7e400 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultBladder_2.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 81fc62825..945a5205b 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bladder/hcl_AdultGallbladder_2.h5ad") + fn = os.path.join(self.path, "human", "bladder", "hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood.py b/sfaira/data/human/blood/human_blood.py index d0b16e5ca..e51216bce 100644 --- a/sfaira/data/human/blood/human_blood.py +++ b/sfaira/data/human/blood/human_blood.py @@ -36,7 +36,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBlood().datasets) + from sfaira_extension.data.human import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood().datasets) except ImportError: pass diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index b27a2bb0d..8e4749b9d 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -37,6 +37,7 @@ def __init__( self.species = "human" self.id = "human_blood_2018_10x_ica_001_unknown" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_website_meta = None self.organ = "blood" self.sub_tissue = "umbilical_cord_blood" self.has_celltypes = False @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, (self.adata.obs['emptydrops_is_cell'] == 't').values) @@ -59,7 +60,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/blood/ica_blood.h5ad") + fn = os.path.join(self.path, "human", "blood", "ica_blood.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index 4b0da8c61..cc70f0b67 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -29,6 +29,7 @@ def __init__( self.species = "human" self.id = "human_blood_2019_10x_10xGenomics_001_unknown" self.download_website = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.download_website_meta = None self.organ = "blood" self.sub_tissue = "pbmcs" self.has_celltypes = False @@ -43,7 +44,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = '10x Genomics' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 9395c471d..8c2f78fe2 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_3.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index 3ea9979f5..ae11c35a7 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_PeripheralBlood_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 333cf7daa..7ab28023a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBlood' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBlood_2.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index fd54c0efa..9fb895ef0 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPeripheralBlood' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_AdultPeripheralBlood_4.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 66faeca5c..770fb5c5c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBloodCD34P' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index 3f60960fb..36a2d5662 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBloodCD34P' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBloodCD34P_2.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index e54565913..bbf76f52a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'CordBlood' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/blood/hcl_CordBlood_1.h5ad") + fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bone/human_bone.py b/sfaira/data/human/bone/human_bone.py index 68e5f868f..4cc46ad77 100644 --- a/sfaira/data/human/bone/human_bone.py +++ b/sfaira/data/human/bone/human_bone.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBone().datasets) + from sfaira_extension.data.human import DatasetGroupBone + self.datasets.update(DatasetGroupBone().datasets) except ImportError: pass diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index dc5e2c45e..68b08699f 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -37,6 +37,7 @@ def __init__( self.species = "human" self.id = "human_bone_2018_10x_ica_unknown" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_website_meta = None self.organ = "bone" self.sub_tissue = "bone_marrow" self.has_celltypes = False @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'bone marrow').values, (self.adata.obs['emptydrops_is_cell'] == 't').values) @@ -59,7 +60,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/bone/ica_bone.h5ad") + fn = os.path.join(self.path, "human", "bone", "ica_bone.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index e2bfc7cfe..027972939 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBoneMarrow' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_1.h5ad") + fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index fa2ee77d1..00d11f26f 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultBoneMarrow' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/bone/hcl_BoneMarrow_2.h5ad") + fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain.py b/sfaira/data/human/brain/human_brain.py index 4d15fdf2f..3f2eedda9 100644 --- a/sfaira/data/human/brain/human_brain.py +++ b/sfaira/data/human/brain/human_brain.py @@ -32,7 +32,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupBrain().datasets) + from sfaira_extension.data.human import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain().datasets) except ImportError: pass diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index b2fc47640..2ae25a98c 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" self.download_website = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" + self.download_website_meta = None self.organ = "brain" self.sub_tissue = "hippocampus, prefrontal cortex" self.has_celltypes = True @@ -57,7 +58,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/habib17.processed.h5ad") + fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index 2fa168dfc..b257f59af 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_4.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 430ad3f0d..31a52e85d 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_5.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 8c9d115f5..9ab01e020 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_3.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index c35ac3d57..5dcb7bd39 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultTemporalLobe' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_AdultTemporalLobe_1.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index d31e0665d..24b5636fd 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalBrain' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_FetalBrain_6.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index fc030ab12..a812123a9 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultCerebellum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/brain/hcl_AdultCerebellum_1.h5ad") + fn = os.path.join(self.path, "human", "brain", "hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/calvaria/human_calvaria.py b/sfaira/data/human/calvaria/human_calvaria.py index 431b6ba10..3a101f22e 100644 --- a/sfaira/data/human/calvaria/human_calvaria.py +++ b/sfaira/data/human/calvaria/human_calvaria.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupCalvaria().datasets) + from sfaira_extension.data.human import DatasetGroupCalvaria + self.datasets.update(DatasetGroupCalvaria().datasets) except ImportError: pass diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 62a347869..d370f75b5 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalCalvaria' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/calvaria/hcl_FetalCalvaria_1.h5ad") + fn = os.path.join(self.path, "human", "calvaria", "hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/cervix/human_cervix.py b/sfaira/data/human/cervix/human_cervix.py index 14116d928..eb65f1a1f 100644 --- a/sfaira/data/human/cervix/human_cervix.py +++ b/sfaira/data/human/cervix/human_cervix.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupCervix().datasets) + from sfaira_extension.data.human import DatasetGroupCervix + self.datasets.update(DatasetGroupCervix().datasets) except ImportError: pass diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index c3cad3af5..65e54254f 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultCervix' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/cervix/hcl_AdultCervix_1.h5ad") + fn = os.path.join(self.path, "human", "cervix", "hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py index 8ce4d6c0f..5fcebd370 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupChorionicvillus().datasets) + from sfaira_extension.data.human import DatasetGroupChorionicvillus + self.datasets.update(DatasetGroupChorionicvillus().datasets) except ImportError: pass diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index c4fe7b110..cdb2c119a 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'ChorionicVillus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/chorionicvillus/hcl_ChorionicVillus_1.h5ad") + fn = os.path.join(self.path, "human", "chorionicvillus", "hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon.py b/sfaira/data/human/colon/human_colon.py index 8dc44ab6f..d86d094f4 100644 --- a/sfaira/data/human/colon/human_colon.py +++ b/sfaira/data/human/colon/human_colon.py @@ -34,7 +34,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupColon().datasets) + from sfaira_extension.data.human import DatasetGroupColon + self.datasets.update(DatasetGroupColon().datasets) except ImportError: pass diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index e5406c54a..8d3f7d7ea 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -64,6 +64,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" + self.download_website_meta = 'private' self.organ = "colon" self.sub_tissue = "lamina propria of mucosa of colon" self.has_celltypes = True @@ -94,9 +95,9 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/colon/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.path, "human/colon/uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.path, "human/colon/hc_meta_data_stromal_with_donor.txt") + os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") ] adata = anndata.read_loom(fn[0]) ctuc = pd.read_csv(fn[1], sep='\t') @@ -125,7 +126,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/colon/kinchenetal.h5ad") + fn = os.path.join(self.path, "human", "colon", "kinchenetal.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Simmons' diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 3c1f2a1d5..279e8a85b 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -28,6 +28,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" self.download_website = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic epithelium" self.has_celltypes = True @@ -81,7 +82,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/smillie19_epi.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index 086bd76f1..e50d96f1f 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colon" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/wang20_colon.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index e40f0105f..641a95723 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -28,6 +28,7 @@ def __init__( self.species = "human" self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" self.download_website = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" + self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic immune cells" self.has_celltypes = True @@ -68,7 +69,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/james20.processed.h5ad") + fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index cc0c3e9bb..72aa77936 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultAscendingColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultAscendingColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index d0e1eaead..11de7f12f 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultTransverseColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 6d5225af4..d26e5a99a 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultTransverseColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index cab2ed356..a94dd02d8 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Colon' self.sub_tissue = 'AdultSigmoidColon' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/colon/hcl_AdultColon_1.h5ad") + fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/duodenum/human_duodenum.py b/sfaira/data/human/duodenum/human_duodenum.py index fcb3b5ccf..367138896 100644 --- a/sfaira/data/human/duodenum/human_duodenum.py +++ b/sfaira/data/human/duodenum/human_duodenum.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupDuodenum().datasets) + from sfaira_extension.data.human import DatasetGroupDuodenum + self.datasets.update(DatasetGroupDuodenum().datasets) except ImportError: pass diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 31b38f35e..d3005d603 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultDuodenum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/duodenum/hcl_AdultDuodenum_1.h5ad") + fn = os.path.join(self.path, "human", "duodenum", "hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/epityphlon/human_epityphlon.py b/sfaira/data/human/epityphlon/human_epityphlon.py index dc49920c8..bbf2297e9 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon.py +++ b/sfaira/data/human/epityphlon/human_epityphlon.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEpityphlon().datasets) + from sfaira_extension.data.human import DatasetGroupEpityphlon + self.datasets.update(DatasetGroupEpityphlon().datasets) except ImportError: pass diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index 0612889dd..ec43b2e98 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultEpityphlon' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/epityphlon/hcl_AdultEpityphlon_1.h5ad") + fn = os.path.join(self.path, "human", "epityphlon", "hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/esophagus/human_esophagus.py b/sfaira/data/human/esophagus/human_esophagus.py index 7fe9c574f..b1df21e7a 100644 --- a/sfaira/data/human/esophagus/human_esophagus.py +++ b/sfaira/data/human/esophagus/human_esophagus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEsophagus().datasets) + from sfaira_extension.data.human import DatasetGroupEsophagus + self.datasets.update(DatasetGroupEsophagus().datasets) except ImportError: pass diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 563b423e1..22376fc4f 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_esophagus_2019_10x_madissoon_001_10.1101/741405" self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.download_website_meta = None self.organ = "esophagus" self.sub_tissue = "esophagus" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/oesophagus.cellxgene.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") self.adata = anndata.read(fn) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index 0e1a6b012..ad524c341 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Esophagus' self.sub_tissue = 'AdultEsophagus' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -66,7 +67,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_1.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 6d2242d04..efdb0d499 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Esophagus' self.sub_tissue = 'AdultEsophagus' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -66,7 +67,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/esophagus/hcl_AdultEsophagus_2.h5ad") + fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/eye/human_eye.py b/sfaira/data/human/eye/human_eye.py index 42876bd4f..227bda330 100644 --- a/sfaira/data/human/eye/human_eye.py +++ b/sfaira/data/human/eye/human_eye.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupEye().datasets) + from sfaira_extension.data.human import DatasetGroupEye + self.datasets.update(DatasetGroupEye().datasets) except ImportError: pass diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index e01cee387..2c74fd6b1 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" self.download_website = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/lukowski19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index f9c5d497f..621447908 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" self.download_website = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/menon19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Hafler' diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 0c0cb96c9..7aa0c2591 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" self.download_website = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" + self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" self.has_celltypes = True @@ -52,7 +53,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/voigt19.processed.h5ad") + fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index bb71f0c01..041fc2c4c 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Eye' self.sub_tissue = 'FetalEyes' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -65,7 +66,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/eye/hcl_FetalEyes_1.h5ad") + fn = os.path.join(self.path, "human", "eye", "hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube.py b/sfaira/data/human/fallopiantube/human_fallopiantube.py index e8718df1e..739e221a0 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupFallopiantube().datasets) + from sfaira_extension.data.human import DatasetGroupFallopiantube + self.datasets.update(DatasetGroupFallopiantube().datasets) except ImportError: pass diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 6961b28c9..c78a1a42c 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultFallopiantube' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/fallopiantube/hcl_AdultFallopiantube_1.h5ad") + fn = os.path.join(self.path, "human", "fallopiantube", "hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/femalegonad/human_femalegonad.py b/sfaira/data/human/femalegonad/human_femalegonad.py index 14897f050..a4b4745fb 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad.py +++ b/sfaira/data/human/femalegonad/human_femalegonad.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupFemalegonad().datasets) + from sfaira_extension.data.human import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 051be7563..bafe66bdf 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalFemaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_2.h5ad") + fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 98bb43dc8..e5d3f22d3 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalFemaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/femalegonad/hcl_FetalFemaleGonad_1.h5ad") + fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/gallbladder/human_gallbladder.py b/sfaira/data/human/gallbladder/human_gallbladder.py index 4fd59206e..8d5e660f5 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder.py +++ b/sfaira/data/human/gallbladder/human_gallbladder.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupGallbladder().datasets) + from sfaira_extension.data.human import DatasetGroupGallbladder + self.datasets.update(DatasetGroupGallbladder().datasets) except ImportError: pass diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 75c1faa80..40a6406d9 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultGallbladder' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/gallbladder/hcl_AdultGallbladder_1.h5ad") + fn = os.path.join(self.path, "human", "gallbladder", "hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart.py b/sfaira/data/human/heart/human_heart.py index aced6ac11..5aff6fdce 100644 --- a/sfaira/data/human/heart/human_heart.py +++ b/sfaira/data/human/heart/human_heart.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupHeart().datasets) + from sfaira_extension.data.human import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart().datasets) except ImportError: pass diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index 9e7ef8589..c4ee874db 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalHeart' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_2.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 6664c0bd7..2da149e62 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultHeart' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_2.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 8c5058ac2..430f8c92e 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultHeart' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_AdultHeart_1.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 76d54f986..2b73cebe0 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalHeart' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/heart/hcl_FetalHeart_1.h5ad") + fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/hesc/human_hesc.py b/sfaira/data/human/hesc/human_hesc.py index 3855c509d..3292ff6f3 100644 --- a/sfaira/data/human/hesc/human_hesc.py +++ b/sfaira/data/human/hesc/human_hesc.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupHesc().datasets) + from sfaira_extension.data.human import DatasetGroupHesc + self.datasets.update(DatasetGroupHesc().datasets) except ImportError: pass diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index d7ee8d9c7..70a0816aa 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'HESC' self.dev_stage = 'HESC' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/hesc/hcl_HESC_1.h5ad") + fn = os.path.join(self.path, "human", "hesc", "hcl_HESC_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/ileum/human_ileum.py b/sfaira/data/human/ileum/human_ileum.py index b5ade8f37..636d508f2 100644 --- a/sfaira/data/human/ileum/human_ileum.py +++ b/sfaira/data/human/ileum/human_ileum.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupIleum().datasets) + from sfaira_extension.data.human import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum().datasets) except ImportError: pass diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 6378752c5..9b4e2d89e 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" self.download_website = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" + self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" self.has_celltypes = True @@ -62,7 +63,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/martin19.processed.h5ad") + fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 792ad33b2..255803190 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" + self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" self.has_celltypes = True @@ -49,7 +50,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/wang20_ileum.processed.h5ad") + fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 87a9ab8f9..5726c3f77 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultIleum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -68,7 +69,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ileum/hcl_AdultIleum_2.h5ad") + fn = os.path.join(self.path, "human", "ileum", "hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/jejunum/human_jejunum.py b/sfaira/data/human/jejunum/human_jejunum.py index 0fa39a272..3839f321b 100644 --- a/sfaira/data/human/jejunum/human_jejunum.py +++ b/sfaira/data/human/jejunum/human_jejunum.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupJejunum().datasets) + from sfaira_extension.data.human import DatasetGroupJejunum + self.datasets.update(DatasetGroupJejunum().datasets) except ImportError: pass diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 6085adc05..e9d7712b4 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultJejunum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/jejunum/hcl_AdultJejunum_2.h5ad") + fn = os.path.join(self.path, "human", "jejunum", "hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney.py b/sfaira/data/human/kidney/human_kidney.py index 9e175cd6f..6cc159f97 100644 --- a/sfaira/data/human/kidney/human_kidney.py +++ b/sfaira/data/human/kidney/human_kidney.py @@ -38,7 +38,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupKidney().datasets) + from sfaira_extension.data.human import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney().datasets) except ImportError: pass diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index f4d630a4a..867feb38b 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -70,8 +70,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/kidney/GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.path, 'human/kidney/GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz') + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) annot = pd.read_csv(fn[1], index_col=0, dtype='category') diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index d37963426..8d87c833f 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -29,6 +29,7 @@ def __init__( 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad', 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad' ] + self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "renal medulla, renal pelvis, ureter, cortex of kidney" self.has_celltypes = True @@ -115,8 +116,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/kidney/Mature_Full_v2.1.h5ad"), - os.path.join(self.path, 'human/kidney/Fetal_full.h5ad') + os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), + os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") ] adult = anndata.read(fn[0]) fetal = anndata.read(fn[1]) diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index bd4590027..ee78c0032 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -56,6 +56,7 @@ def __init__( self.species = "human" self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" + self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "kidney" self.has_celltypes = False @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/GSE131685_RAW.tar") + fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") adatas = [] with tarfile.open(fn) as tar: for member in tar.getmembers(): @@ -93,7 +94,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/kidney/GSE131685.h5ad") + fn = os.path.join(self.path, "human", "kidney", "GSE131685.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 791af3883..68203f9b9 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_2.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index f623c4c1c..ca7c14f2c 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_3.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 7f3e36f62..b354034d0 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'AdultKidney' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_AdultKidney_4.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index f66bc7db9..7d404820e 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_3.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 68c304254..1b9b34ef1 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_4.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 83c035039..2f1a0c45f 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_5.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 8fce0aa76..36a28c728 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Kidney' self.sub_tissue = 'FetalKidney' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -91,7 +92,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/kidney/hcl_FetalKidney_6.h5ad") + fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver.py b/sfaira/data/human/liver/human_liver.py index b88736cf1..bfb0e8dbb 100644 --- a/sfaira/data/human/liver/human_liver.py +++ b/sfaira/data/human/liver/human_liver.py @@ -36,7 +36,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupLiver().datasets) + from sfaira_extension.data.human import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver().datasets) except ImportError: pass diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index 2e8cd9ba7..c71297051 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE115469" + self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "caudate lobe" self.has_celltypes = True @@ -63,8 +64,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/liver/GSE115469.csv.gz"), - os.path.join(self.path, 'human/liver/GSE115469_labels.txt') + os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), + os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") ] self.adata = anndata.read_csv(fn[0]).T celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 0e2e55aad..4560abe11 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" self.download_website = "https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-7407/" + self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "liver" self.has_celltypes = True @@ -67,7 +68,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/fetal_liver_alladata_.h5ad") + fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Haniffa' diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index 1c4c192ba..a2584a38f 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -42,6 +42,7 @@ def __init__( self.species = "human" self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" self.download_website = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" + self.download_website_meta = None self.organ = "liver" self.sub_tissue = "liver" self.has_celltypes = True @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/ramachandran.h5ad") + fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Henderson' diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 656dde868..1cc865cb2 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -82,8 +82,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/liver/GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.path, 'human/liver/GSE124395_clusterpartition.txt.gz') + os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) celltype_df = pd.read_csv(fn[1], sep=' ') diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 27dc3c2ed..404b75a83 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_1.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 9146e4339..277efb10f 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_2.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index a48826384..16b465ea4 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'AdultLiver' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_AdultLiver_4.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index c749ebcc7..ed870da4b 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'FetalLiver' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_Liver_1.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_Liver_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 8a4ecf706..48157a9f8 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Liver' self.sub_tissue = 'FetalLiver' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/liver/hcl_Liver_2.h5ad") + fn = os.path.join(self.path, "human", "liver", "hcl_Liver_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung.py b/sfaira/data/human/lung/human_lung.py index 274cfaeff..9297fd7e9 100644 --- a/sfaira/data/human/lung/human_lung.py +++ b/sfaira/data/human/lung/human_lung.py @@ -47,7 +47,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupLung().datasets) + from sfaira_extension.data.human import DatasetGroupLung + self.datasets.update(DatasetGroupLung().datasets) except ImportError: pass diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index 81933a02f..e37dce7a1 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" self.download_website = "https://covid19.cog.sanger.ac.uk/" \ "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "alveoli, parenchyma" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index b24891994..35517ce68 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -27,6 +27,7 @@ def __init__( self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" self.download_website = "https://covid19.cog.sanger.ac.uk/" \ "vieira19_Bronchi_anonymised.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchi" self.has_celltypes = True @@ -60,7 +61,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/vieira19_Bronchi_anonymised.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index 62c0dd849..ed83f3c95 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -25,6 +25,7 @@ def __init__( self.species = "human" self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" self.download_website = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "parenchyma" self.has_celltypes = True @@ -65,7 +66,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/madissoon19_lung.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Meyer' diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index 280ff991a..b4928a0e2 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -56,8 +56,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/lung/GSE130148_raw_counts.csv.gz"), - os.path.join(self.path, "human/lung/GSE130148_barcodes_cell_types.txt.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), ] self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 22326b4f6..075f59ce8 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -91,10 +91,10 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/lung/GSE135893_matrix.mtx.gz"), - os.path.join(self.path, "human/lung/GSE135893_genes.tsv.gz"), - os.path.join(self.path, "human/lung/GSE135893_barcodes.tsv.gz"), - os.path.join(self.path, "human/lung/GSE135893_IPF_metadata.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), ] self.adata = anndata.read_mtx(fn[0]).T self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=['ids']) @@ -104,7 +104,7 @@ def _load(self, fn=None): self.adata.obs = obs else: if fn is None: - fn = os.path.join(self.path, "human/lung/habermann_processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "habermann_processed.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Kropski' diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index a72e5caba..119833343 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "lung" self.has_celltypes = True @@ -51,7 +52,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/lukassen20_lung_orig.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index 3bae58de9..24b52c8ca 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchial epithelial cells" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/lukassen20_airway_orig.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index a01e2cc95..a868b5420 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" self.download_website = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "fetal lung" self.has_celltypes = True @@ -67,7 +68,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/miller20.processed.h5ad") + fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index d1a3c8551..f2cf531e2 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -33,8 +33,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "human" - self.id = "human_lung_2020_10x_travaglini_001_10.1101/742320" + self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" self.has_celltypes = True @@ -107,7 +108,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") self.adata = anndata.read(fn) self.adata.X = scipy.sparse.csc_matrix(self.adata.X) self.adata.X = np.expm1(self.adata.X) @@ -116,7 +117,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 627199ddf..ab1e02530 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalLung' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_FetalLung_1.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index 5efd59b81..f52f73078 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_3.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index cfcf99f45..50fd78529 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_2.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 96d16219d..1f3715724 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultLung' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_AdultLung_1.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index b65ea2a8c..223d3a409 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalLung' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -92,7 +93,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/hcl_FetalLung_2.h5ad") + fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 8147dc611..77f4c67e0 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -33,8 +33,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "human" - self.id = "human_lung_2020_smartseq2_travaglini_002_10.1101/742320" + self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" self.has_celltypes = True @@ -94,7 +95,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/lung/facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") self.adata = anndata.read(fn) self.adata.X = scipy.sparse.csc_matrix(self.adata.X) self.adata.X = np.expm1(self.adata.X) @@ -103,7 +104,7 @@ def _load(self, fn=None): self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/742320" + self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'smartseq2' self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue diff --git a/sfaira/data/human/malegonad/human_malegonad.py b/sfaira/data/human/malegonad/human_malegonad.py index 508259f2a..ea7e995df 100644 --- a/sfaira/data/human/malegonad/human_malegonad.py +++ b/sfaira/data/human/malegonad/human_malegonad.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMalegonad().datasets) + from sfaira_extension.data.human import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 0ebc7036e..03362c90a 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" self.download_website = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" + self.download_website_meta = None self.organ = "malegonad" self.sub_tissue = "testis" self.has_celltypes = True @@ -53,7 +54,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/guo18_donor.processed.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index 372074230..f530fbc85 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_1.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 74b0d6eb1..ca8a98733 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMaleGonad' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -70,7 +71,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/malegonad/hcl_FetalMaleGonad_2.h5ad") + fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/mixed/human_mixed.py b/sfaira/data/human/mixed/human_mixed.py index d46f9e03d..cbce1da35 100644 --- a/sfaira/data/human/mixed/human_mixed.py +++ b/sfaira/data/human/mixed/human_mixed.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMixed().datasets) + from sfaira_extension.data.human import DatasetGroupMixed + self.datasets.update(DatasetGroupMixed().datasets) except ImportError: pass diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 2559948d0..924c3a978 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -82,6 +82,7 @@ def __init__( self.species = "human" self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" + self.download_website_meta = 'private' self.organ = "mixed" self.sub_tissue = "Bone Marrow, Lung, Lymph Node" self.has_celltypes = True @@ -98,9 +99,9 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/mixed/GSE126030_RAW.tar"), - os.path.join(self.path, "human/mixed/donor1.annotation.txt"), - os.path.join(self.path, "human/mixed/donor2.annotation.txt"), + os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), + os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), + os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), ] adatas = [] with tarfile.open(fn[0]) as tar: @@ -147,7 +148,7 @@ def _load(self, fn=None): self.adata.X = scipy.sparse.csc_matrix(self.adata.X) else: if fn is None: - fn = os.path.join(self.path, "human/mixed/GSE126030.h5ad") + fn = os.path.join(self.path, "human", "mixed", "GSE126030.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sims" diff --git a/sfaira/data/human/muscle/human_muscle.py b/sfaira/data/human/muscle/human_muscle.py index a96850a70..3ccea8560 100644 --- a/sfaira/data/human/muscle/human_muscle.py +++ b/sfaira/data/human/muscle/human_muscle.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupMuscle().datasets) + from sfaira_extension.data.human import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle().datasets) except ImportError: pass diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index c24351538..e4defc46f 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalMuscle' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/muscle/hcl_FetalMuscle_1.h5ad") + fn = os.path.join(self.path, "human", "muscle", "hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 5f8f4c2d5..c92719054 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultMuscle' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/muscle/hcl_AdultMuscle_1.h5ad") + fn = os.path.join(self.path, "human", "muscle", "hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum.py b/sfaira/data/human/omentum/human_omentum.py index a1d1879b7..56f49739e 100644 --- a/sfaira/data/human/omentum/human_omentum.py +++ b/sfaira/data/human/omentum/human_omentum.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupOmentum().datasets) + from sfaira_extension.data.human import DatasetGroupOmentum + self.datasets.update(DatasetGroupOmentum().datasets) except ImportError: pass diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 3275d67ea..2550ea8ed 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_2.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index aeaf8842f..88614e843 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_3.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index d2ff1416e..e9fe7fff4 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultOmentum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/omentum/hcl_AdultOmentum_1.h5ad") + fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas.py b/sfaira/data/human/pancreas/human_pancreas.py index 9252cbf37..84bc010fd 100644 --- a/sfaira/data/human/pancreas/human_pancreas.py +++ b/sfaira/data/human/pancreas/human_pancreas.py @@ -32,7 +32,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPancreas().datasets) + from sfaira_extension.data.human import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas().datasets) except ImportError: pass diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index b8f672703..f7b378c33 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" self.download_website = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" + self.download_website_meta = None self.organ = "pancreas" self.sub_tissue = "pancreas" self.has_celltypes = True @@ -56,7 +57,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/baron16.processed.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index 33d382dd0..f60cea9a0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/pancreas/E-MTAB-5061.processed.1.zip"), - os.path.join(self.path, "human/pancreas/E-MTAB-5061.sdrf.txt") + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") ] df = pd.read_csv(fn[0], sep='\t') df.index = df.index.get_level_values(0) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index d9cfa8d4b..a65b155fd 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -93,8 +93,8 @@ def _load(self, fn=None): if self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/pancreas/GSE81547_RAW.tar"), - os.path.join(self.path, "human/pancreas/GSE81547_series_matrix.txt.gz") + os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), + os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") ] dfs = [] with tarfile.open(fn[0]) as tar: @@ -126,7 +126,7 @@ def _load(self, fn=None): else: if fn is None: - fn = os.path.join(self.path, "human/pancreas/GSE81547.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "GSE81547.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 63dfb2c74..7d79c6021 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'AdultPancreas' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_AdultPancreas_1.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 009adbdfa..126a23ce5 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_1.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 3932c1255..70e7187bb 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_2.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 2abd51705..6f3ef7db0 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Pancreas' self.sub_tissue = 'FetalPancreas' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -80,7 +81,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pancreas/hcl_FetalPancreas_3.h5ad") + fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/placenta/human_placenta.py b/sfaira/data/human/placenta/human_placenta.py index 5d8df2fed..03a23584f 100644 --- a/sfaira/data/human/placenta/human_placenta.py +++ b/sfaira/data/human/placenta/human_placenta.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPlacenta().datasets) + from sfaira_extension.data.human import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta().datasets) except ImportError: pass diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 72c453c24..22f6d9e57 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -75,8 +75,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/placenta/E-MTAB-6701.processed.1.zip"), - os.path.join(self.path, "human/placenta/E-MTAB-6701.processed.2.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) df = pd.read_csv(fn[1], sep='\t') diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 22d461ad2..b09596a10 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -75,8 +75,8 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: fn = [ - os.path.join(self.path, "human/placenta/E-MTAB-6678.processed.1.zip"), - os.path.join(self.path, "human/placenta/E-MTAB-6678.processed.2.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) df = pd.read_csv(fn[1], sep='\t') diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index fde6c3d50..607feae07 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Placenta' self.sub_tissue = 'Placenta' self.dev_stage = 'Fetus' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -73,7 +74,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/placenta/hcl_Placenta_1.h5ad") + fn = os.path.join(self.path, "human", "placenta", "hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/pleura/human_pleura.py b/sfaira/data/human/pleura/human_pleura.py index 853035a55..db3707fda 100644 --- a/sfaira/data/human/pleura/human_pleura.py +++ b/sfaira/data/human/pleura/human_pleura.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupPleura().datasets) + from sfaira_extension.data.human import DatasetGroupPleura + self.datasets.update(DatasetGroupPleura().datasets) except ImportError: pass diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index b50155990..39412e70b 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultPleura' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/pleura/hcl_AdultPleura_1.h5ad") + fn = os.path.join(self.path, "human", "pleura", "hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/prostate/human_prostate.py b/sfaira/data/human/prostate/human_prostate.py index a6d15c2e2..cf3a5485b 100644 --- a/sfaira/data/human/prostate/human_prostate.py +++ b/sfaira/data/human/prostate/human_prostate.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupProstate().datasets) + from sfaira_extension.data.human import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate().datasets) except ImportError: pass diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index 82a775f96..fb8c5907b 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -27,6 +27,7 @@ def __init__( self.species = "human" self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" self.download_website = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" + self.download_website_meta = None self.organ = "prostate" self.sub_tissue = "prostate" self.has_celltypes = True @@ -50,7 +51,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/prostate/henry18_0.processed.h5ad") + fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 322236187..7e1c44559 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultProstate' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -62,7 +63,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/prostate/hcl_AdultProstate_1.h5ad") + fn = os.path.join(self.path, "human", "prostate", "hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rectum/human_rectum.py b/sfaira/data/human/rectum/human_rectum.py index 236ccca09..be4385732 100644 --- a/sfaira/data/human/rectum/human_rectum.py +++ b/sfaira/data/human/rectum/human_rectum.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupRectum().datasets) + from sfaira_extension.data.human import DatasetGroupRectum + self.datasets.update(DatasetGroupRectum().datasets) except ImportError: pass diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index f92d2981a..9bbb7957d 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -49,7 +49,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rectum/wang20_rectum.processed.h5ad") + fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index e8f80e4d9..c1a2a0009 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultRectum' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -57,7 +58,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rectum/hcl_AdultRectum_1.h5ad") + fn = os.path.join(self.path, "human", "rectum", "hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rib/human_rib.py b/sfaira/data/human/rib/human_rib.py index 0b7e9a07a..60f2c6df8 100644 --- a/sfaira/data/human/rib/human_rib.py +++ b/sfaira/data/human/rib/human_rib.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupRib().datasets) + from sfaira_extension.data.human import DatasetGroupRib + self.datasets.update(DatasetGroupRib().datasets) except ImportError: pass diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 4de6aa18b..e5e1a4e65 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalRib' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rib/hcl_FetalRib_2.h5ad") + fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index 5eac488d3..421ad2efc 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalRib' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/rib/hcl_FetalRib_3.h5ad") + fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/skin/human_skin.py b/sfaira/data/human/skin/human_skin.py index b13b7c39c..16887b9dd 100644 --- a/sfaira/data/human/skin/human_skin.py +++ b/sfaira/data/human/skin/human_skin.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSkin().datasets) + from sfaira_extension.data.human import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin().datasets) except ImportError: pass diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index af794cb5c..aba00a706 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSkin' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -71,7 +72,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_2.h5ad") + fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 0bcf79a5e..5b1772b9f 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSkin' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -71,7 +72,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/skin/hcl_FetalSkin_3.h5ad") + fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spinalcord/human_spinalcord.py b/sfaira/data/human/spinalcord/human_spinalcord.py index 246497a33..b56b23b34 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord.py +++ b/sfaira/data/human/spinalcord/human_spinalcord.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSpinalcord().datasets) + from sfaira_extension.data.human import DatasetGroupSpinalcord + self.datasets.update(DatasetGroupSpinalcord().datasets) except ImportError: pass diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index e9a0e0561..3a276955e 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalSpinalCord' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spinalcord/hcl_FetalSpinalCord_1.h5ad") + fn = os.path.join(self.path, "human", "spinalcord", "hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spleen/human_spleen.py b/sfaira/data/human/spleen/human_spleen.py index 783e2922c..0ca8ab386 100644 --- a/sfaira/data/human/spleen/human_spleen.py +++ b/sfaira/data/human/spleen/human_spleen.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupSpleen().datasets) + from sfaira_extension.data.human import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen().datasets) except ImportError: pass diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 631710bb9..7517ae6bc 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_spleen_2019_10x_madissoon_001_10.1101/741405" self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.download_website_meta = None self.organ = "spleen" self.sub_tissue = "spleen" self.has_celltypes = True @@ -69,7 +70,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/spleen.cellxgene.h5ad") + fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") self.adata = anndata.read(fn) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index ca36b269a..047d8e8a1 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Spleen' self.sub_tissue = 'AdultSpleen' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -63,7 +64,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleenParenchyma_1.h5ad") + fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 08126ca29..fbe16555f 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -28,7 +28,8 @@ def __init__( self.organ = 'Spleen' self.sub_tissue = 'AdultSpleen' self.dev_stage = 'Adult' - self.download_website = "https://figshare.com/articles/HCL_DGE_Data/7235471" + self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -63,7 +64,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/spleen/hcl_AdultSpleen_1.h5ad") + fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach.py b/sfaira/data/human/stomach/human_stomach.py index 64d5fdd76..0121077f9 100644 --- a/sfaira/data/human/stomach/human_stomach.py +++ b/sfaira/data/human/stomach/human_stomach.py @@ -38,7 +38,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupStomach().datasets) + from sfaira_extension.data.human import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach().datasets) except ImportError: pass diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 9b994d4bb..abccb93fc 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index a4f315103..aaf8690d6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalStomach' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index 0ddfd91a3..d88265402 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_1.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index d756c8490..3d85156cc 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntetsine_3.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index 61e958ae6..da1bdd129 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalStomach' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalStomach_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 8895c9ff4..74f7f340b 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 368acf00a..2f44f8e40 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_5.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 320653c45..b0cd3ba45 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_3.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 623a6ccd7..638af9954 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultStomach' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_AdultStomach_2.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index 2e5fcdf2b..7034a3e71 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalIntestine' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/stomach/hcl_FetalIntestine_4.h5ad") + fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thymus/human_thymus.py b/sfaira/data/human/thymus/human_thymus.py index 22e5d45fc..1e592a837 100644 --- a/sfaira/data/human/thymus/human_thymus.py +++ b/sfaira/data/human/thymus/human_thymus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupThymus().datasets) + from sfaira_extension.data.human import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus().datasets) except ImportError: pass diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index 2d2dce074..b70663e7f 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -26,6 +26,7 @@ def __init__( self.species = "human" self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" self.download_website = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_website_meta = None self.organ = "thymus" self.sub_tissue = "fetal thymus" self.has_celltypes = True @@ -85,7 +86,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/park20.processed.h5ad") + fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 020dc1314..4aef4a0ee 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalThymus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -55,7 +56,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_2.h5ad") + fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index c48987fc9..853191631 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'FetalThymus' self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -55,7 +56,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thymus/hcl_FetalThymus_1.h5ad") + fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thyroid/human_thyroid.py b/sfaira/data/human/thyroid/human_thyroid.py index 39a261a91..0a8cc4e0d 100644 --- a/sfaira/data/human/thyroid/human_thyroid.py +++ b/sfaira/data/human/thyroid/human_thyroid.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupThyroid().datasets) + from sfaira_extension.data.human import DatasetGroupThyroid + self.datasets.update(DatasetGroupThyroid().datasets) except ImportError: pass diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 708a50506..1dff2bf3f 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultThyroid' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_2.h5ad") + fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index db2477eea..64d7fa4da 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultThyroid' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/thyroid/hcl_AdultThyroid_1.h5ad") + fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/trachea/human_trachea.py b/sfaira/data/human/trachea/human_trachea.py index 1860bd810..6ba918535 100644 --- a/sfaira/data/human/trachea/human_trachea.py +++ b/sfaira/data/human/trachea/human_trachea.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupTrachea().datasets) + from sfaira_extension.data.human import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea().datasets) except ImportError: pass diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index d28142b4b..b263a8e5c 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultTrachea' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/trachea/hcl_AdultTrachea_2.h5ad") + fn = os.path.join(self.path, "human", "trachea", "hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/ureter/human_ureter.py b/sfaira/data/human/ureter/human_ureter.py index c7e721f78..452153557 100644 --- a/sfaira/data/human/ureter/human_ureter.py +++ b/sfaira/data/human/ureter/human_ureter.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupUreter().datasets) + from sfaira_extension.data.human import DatasetGroupUreter + self.datasets.update(DatasetGroupUreter().datasets) except ImportError: pass diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 170a2257c..1bf9da65a 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultUreter' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/ureter/hcl_AdultUreter_1.h5ad") + fn = os.path.join(self.path, "human", "ureter", "hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/human/uterus/human_uterus.py b/sfaira/data/human/uterus/human_uterus.py index 742026585..4d8789bff 100644 --- a/sfaira/data/human/uterus/human_uterus.py +++ b/sfaira/data/human/uterus/human_uterus.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.human.DatasetGroupUterus().datasets) + from sfaira_extension.data.human import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus().datasets) except ImportError: pass diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index eec59d718..8d00b14cf 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -29,6 +29,7 @@ def __init__( self.sub_tissue = 'AdultUterus' self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' + self.download_website_meta = None self.has_celltypes = True self.class_maps = { @@ -41,7 +42,7 @@ def _load(self, fn=None): if self._load_raw or not self._load_raw: if fn is None: - fn = os.path.join(self.path, "human/uterus/hcl_AdultUterus_1.h5ad") + fn = os.path.join(self.path, "human", "uterus", "hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' diff --git a/sfaira/data/interactive/__init__.py b/sfaira/data/interactive/__init__.py new file mode 100644 index 000000000..2c6ea4905 --- /dev/null +++ b/sfaira/data/interactive/__init__.py @@ -0,0 +1 @@ +from .loader import DatasetInteractive diff --git a/sfaira/data/mouse/fat/external.py b/sfaira/data/interactive/external.py similarity index 100% rename from sfaira/data/mouse/fat/external.py rename to sfaira/data/interactive/external.py diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py new file mode 100644 index 000000000..45e6b6d8a --- /dev/null +++ b/sfaira/data/interactive/loader.py @@ -0,0 +1,44 @@ +import anndata +from typing import Union +from .external import DatasetBase + + +class DatasetInteractive(DatasetBase): + + def __init__( + self, + data: anndata.AnnData, + species: str, + organ: str, + gene_symbol_col: Union[str, None] = 'index', + gene_ens_col: Union[str, None] = None, + class_maps: dict = {}, + dataset_id: str = "interactive", + **kwargs + ): + """ + + :param data: + :param species: + :param organ: + :param class_maps: + :param id: + :param kwargs: + """ + DatasetBase.__init__(self=self, path=None, meta_path=None, **kwargs) + self.adata = data + self.species = species + self.id = dataset_id + self.organ = organ + + self.gene_symbol_col = gene_symbol_col + self.gene_ensg_col = gene_ens_col + + self.class_maps = class_maps + + def _load(self, fn=None): + self._convert_and_set_var_names( + symbol_col=self.gene_symbol_col, + ensembl_col=self.gene_ensg_col, + new_index='ensembl' + ) diff --git a/sfaira/data/mouse/__init__.py b/sfaira/data/mouse/__init__.py index 69c58a155..f1063a851 100644 --- a/sfaira/data/mouse/__init__.py +++ b/sfaira/data/mouse/__init__.py @@ -1,27 +1,27 @@ from .bladder import DatasetGroupBladder from .brain import DatasetGroupBrain from .diaphragm import DatasetGroupDiaphragm -from .fat import DatasetGroupFat +from .adipose import DatasetGroupAdipose from .heart import DatasetGroupHeart from .kidney import DatasetGroupKidney -from .large_intestine import DatasetGroupLargeintestine -from .limb_muscle import DatasetGroupLimbmuscle +from .colon import DatasetGroupColon +from .muscle import DatasetGroupMuscle from .liver import DatasetGroupLiver from .lung import DatasetGroupLung -from .mammary_gland import DatasetGroupMammaryGland -from .marrow import DatasetGroupMarrow -from .ovary import DatasetGroupOvary +from .mammarygland import DatasetGroupMammaryGland +from .bone import DatasetGroupBone +from .femalegonad import DatasetGroupFemalegonad from .pancreas import DatasetGroupPancreas from .placenta import DatasetGroupPlacenta -from .peripheral_blood import DatasetGroupPeripheralBlood +from .blood import DatasetGroupBlood from .prostate import DatasetGroupProstate from .rib import DatasetGroupRib -from .small_intestine import DatasetGroupSmallintestine +from .ileum import DatasetGroupIleum from .skin import DatasetGroupSkin from .spleen import DatasetGroupSpleen from .stomach import DatasetGroupStomach -from .testis import DatasetGroupTestis +from .malegonad import DatasetGroupMalegonad from .thymus import DatasetGroupThymus from .tongue import DatasetGroupTongue -from .trachae import DatasetGroupTrachea +from .trachea import DatasetGroupTrachea from .uterus import DatasetGroupUterus diff --git a/sfaira/data/mouse/adipose/__init__.py b/sfaira/data/mouse/adipose/__init__.py new file mode 100644 index 000000000..c23acef29 --- /dev/null +++ b/sfaira/data/mouse/adipose/__init__.py @@ -0,0 +1 @@ +from .mouse_adipose import DatasetGroupAdipose \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/external.py b/sfaira/data/mouse/adipose/external.py similarity index 100% rename from sfaira/data/mouse/large_intestine/external.py rename to sfaira/data/mouse/adipose/external.py diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py b/sfaira/data/mouse/adipose/mouse_adipose.py similarity index 54% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py rename to sfaira/data/mouse/adipose/mouse_adipose.py index 6177b0115..67b4ba1c2 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood.py +++ b/sfaira/data/mouse/adipose/mouse_adipose.py @@ -3,14 +3,14 @@ from .external import DatasetGroupBase -from .mouse_peripheral_blood_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_peripheral_blood_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_peripheral_blood_2018_microwell_han_003 import Dataset as Dataset0003 -from .mouse_peripheral_blood_2018_microwell_han_004 import Dataset as Dataset0004 -from .mouse_peripheral_blood_2018_microwell_han_005 import Dataset as Dataset0005 +from .mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 +from .mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 +from .mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 -class DatasetGroupPeripheralBlood (DatasetGroupBase): +class DatasetGroupAdipose(DatasetGroupBase): def __init__( self, @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPeripheralBlood().datasets) + from sfaira_extension.data.mouse import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py index 375b0e14a..09aea8e6c 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_004_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/Fat_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py index 299f40343..57cc116d0 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_004_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/scat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py index a3482f3f9..bfdf3fe1e 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py @@ -7,8 +7,6 @@ class Dataset(DatasetBase): - id: str - def __init__( self, path: Union[str, None] = None, @@ -18,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_001_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/bat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py rename to sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py index 5a94230e4..46fd2d683 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_003_10.1101/661728" + self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "adipose" + self.sub_tissue = "adipose" self.has_celltypes = True self.class_maps = { @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/mat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py new file mode 100644 index 000000000..e74a9c555 --- /dev/null +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py @@ -0,0 +1,71 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "adipose" + self.sub_tissue = "adipose" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/bladder/mouse_bladder.py b/sfaira/data/mouse/bladder/mouse_bladder.py index 605ec696c..2e2e655a6 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder.py +++ b/sfaira/data/mouse/bladder/mouse_bladder.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupBladder().datasets) + from sfaira_extension.data.mouse import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 95964f1e1..5eeccb976 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Bladder_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index 03bcf78c9..3f752594a 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/bladder/tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/bladder/Bladder_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index 33199af3a..624b84d65 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/bladder/tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/bladder/Bladder_facs.h5ad") + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/blood/__init__.py b/sfaira/data/mouse/blood/__init__.py new file mode 100644 index 000000000..6b0e27f4f --- /dev/null +++ b/sfaira/data/mouse/blood/__init__.py @@ -0,0 +1 @@ +from .mouse_blood import DatasetGroupBlood \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/external.py b/sfaira/data/mouse/blood/external.py similarity index 100% rename from sfaira/data/mouse/limb_muscle/external.py rename to sfaira/data/mouse/blood/external.py diff --git a/sfaira/data/mouse/fat/mouse_fat.py b/sfaira/data/mouse/blood/mouse_blood.py similarity index 59% rename from sfaira/data/mouse/fat/mouse_fat.py rename to sfaira/data/mouse/blood/mouse_blood.py index 670227066..6098a0870 100644 --- a/sfaira/data/mouse/fat/mouse_fat.py +++ b/sfaira/data/mouse/blood/mouse_blood.py @@ -3,14 +3,14 @@ from .external import DatasetGroupBase -from .mouse_fat_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_fat_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_fat_2019_smartseq2_pisco_002 import Dataset as Dataset0003 -from .mouse_fat_2019_smartseq2_pisco_003 import Dataset as Dataset0004 -from .mouse_fat_2019_smartseq2_pisco_004 import Dataset as Dataset0005 +from .mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 +from .mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 +from .mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 -class DatasetGroupFat(DatasetGroupBase): +class DatasetGroupBlood (DatasetGroupBase): def __init__( self, @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupFat().datasets) + from sfaira_extension.data.mouse import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py index d467164e8..8f8b87a90 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py index e735ba7b4..a27d7691d 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py new file mode 100644 index 000000000..9d8ef9088 --- /dev/null +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py @@ -0,0 +1,85 @@ +import anndata +import numpy as np +import os +import pandas +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + self.sub_tissue = "blood" + self.has_celltypes = True + + self.class_maps = { + "0": { + 'B cell_Igha high(Peripheral_Blood)': 'B cell', + 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', + 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', + 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', + 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', + 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', + 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', + 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', + 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', + 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', + 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', + 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', + 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', + 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', + 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', + 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', + 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', + 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', + 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + celltypes = pandas.read_csv(fn_meta, index_col=1) + celltypes = celltypes.drop(['Unnamed: 0'], axis=1) + + data = pandas.read_csv(fn, sep=' ', header=0) + self.adata = anndata.AnnData(data.T) + self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() + self.adata.obs = celltypes.loc[self.adata.obs_names, :] + + self.adata.uns["lab"] = "Guo" + self.adata.uns["year"] = "2018" + self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" + self.adata.uns["protocol"] = "microwell-seq" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue # TODO + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'raw' + self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.set_unkown_class_id(ids=[np.nan, "nan"]) + self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py index 98332ad7d..fcbb9fa42 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py similarity index 91% rename from sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py rename to sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py index 0d716fc5b..f48e2108c 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" + self.organ = "blood" + self.sub_tissue = "blood" self.has_celltypes = True self.class_maps = { @@ -55,8 +55,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/bone/__init__.py b/sfaira/data/mouse/bone/__init__.py new file mode 100644 index 000000000..9b6ccd006 --- /dev/null +++ b/sfaira/data/mouse/bone/__init__.py @@ -0,0 +1 @@ +from .mouse_bone import DatasetGroupBone \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/external.py b/sfaira/data/mouse/bone/external.py similarity index 100% rename from sfaira/data/mouse/mammary_gland/external.py rename to sfaira/data/mouse/bone/external.py diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py b/sfaira/data/mouse/bone/mouse_bone.py similarity index 59% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py rename to sfaira/data/mouse/bone/mouse_bone.py index f37eec502..02fe69705 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle.py +++ b/sfaira/data/mouse/bone/mouse_bone.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_limb_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_limb_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_limb_muscle_2018_microwell_han_001 import Dataset as Dataset0003 +from .mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_bone_2018_microwell_001 import Dataset as Dataset0003 -class DatasetGroupLimbmuscle(DatasetGroupBase): +class DatasetGroupBone(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLimbmuscle().datasets) + from sfaira_extension.data.mouse import DatasetGroupBone + self.datasets.update(DatasetGroupBone().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py similarity index 91% rename from sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py rename to sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py index 2a2704d75..93695c333 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2018_microwell_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py @@ -17,9 +17,9 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -49,8 +49,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/BoneMarrow1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py similarity index 90% rename from sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py rename to sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py index 9916020f8..b6126a2f6 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,7 +26,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/marrow/tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/marrow/Marrow_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py similarity index 90% rename from sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py index cf4e02f27..2d3dc7975 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_marrow_2019_smartseq2_pisco_001_10.1101/661728" + self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,7 +26,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "marrow" + self.organ = "bone" self.sub_tissue = "marrow" self.has_celltypes = True @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/marrow/tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/marrow/Marrow_facs.h5ad") + fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/brain/mouse_brain.py b/sfaira/data/mouse/brain/mouse_brain.py index 6a4147b06..4e09daddc 100644 --- a/sfaira/data/mouse/brain/mouse_brain.py +++ b/sfaira/data/mouse/brain/mouse_brain.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupBrain().datasets) + from sfaira_extension.data.mouse import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index fd33e496b..3a692df7a 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -48,8 +48,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Brain1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index 74aaf387b..9a8d0e629 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -48,8 +48,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Brain2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 084e66ab0..0444530fd 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -38,10 +38,10 @@ def __init__( def _load(self, fn=None): if fn is None: - fn = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/matrix.mtx") - fn_barcodes = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/barcodes.tsv") - fn_var = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/genes.tsv") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_brain_atlas/annot_fullAggr.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "matrix.mtx") + fn_barcodes = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "barcodes.tsv") + fn_var = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "genes.tsv") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "annot_fullAggr.csv") self.adata = anndata.read_mtx(fn) self.adata = anndata.AnnData(self.adata.X.T) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index 6ca638f84..f9b5f16e7 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/brain/tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/brain/Brain_Non-Myeloid_facs.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index 15c487af3..6aae2b9a4 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/brain/tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/brain/Brain_Myeloid_facs.h5ad") + fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/colon/__init__.py b/sfaira/data/mouse/colon/__init__.py new file mode 100644 index 000000000..8e57ba03e --- /dev/null +++ b/sfaira/data/mouse/colon/__init__.py @@ -0,0 +1 @@ +from .mouse_colon import DatasetGroupColon \ No newline at end of file diff --git a/sfaira/data/mouse/marrow/external.py b/sfaira/data/mouse/colon/external.py similarity index 100% rename from sfaira/data/mouse/marrow/external.py rename to sfaira/data/mouse/colon/external.py diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine.py b/sfaira/data/mouse/colon/mouse_colon.py similarity index 60% rename from sfaira/data/mouse/large_intestine/mouse_large_intestine.py rename to sfaira/data/mouse/colon/mouse_colon.py index ebf9c14cc..3666f396a 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine.py +++ b/sfaira/data/mouse/colon/mouse_colon.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_large_intestine_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_large_intestine_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -class DatasetGroupLargeintestine(DatasetGroupBase): +class DatasetGroupColon(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLargeintestine().datasets) + from sfaira_extension.data.mouse import DatasetGroupColon + self.datasets.update(DatasetGroupColon().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py similarity index 85% rename from sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py rename to sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py index 17c8372db..17d315b2a 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py @@ -18,7 +18,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_large_intestine_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -26,8 +26,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "large_intestine" - self.sub_tissue = "large_intestine" + self.organ = "colon" + self.sub_tissue = "colon" self.has_celltypes = True self.class_maps = { @@ -38,11 +38,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/large_intestine/tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..299b15ede --- /dev/null +++ b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + id: str + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "colon" + self.sub_tissue = "colon" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py index 777a341bc..f8fc4d5f6 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupDiaphragm().datasets) + from sfaira_extension.data.mouse import DatasetGroupDiaphragm + self.datasets.update(DatasetGroupDiaphragm().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 02e25518e..665983cb3 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/diaphragm/tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") + fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/diaphragm/Diaphragm_facs.h5ad") + fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/__init__.py b/sfaira/data/mouse/fat/__init__.py deleted file mode 100644 index b4ea53fe0..000000000 --- a/sfaira/data/mouse/fat/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_fat import DatasetGroupFat \ No newline at end of file diff --git a/sfaira/data/mouse/femalegonad/__init__.py b/sfaira/data/mouse/femalegonad/__init__.py new file mode 100644 index 000000000..6cca0c4d4 --- /dev/null +++ b/sfaira/data/mouse/femalegonad/__init__.py @@ -0,0 +1 @@ +from .mouse_femalegonad import DatasetGroupFemalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/ovary/external.py b/sfaira/data/mouse/femalegonad/external.py similarity index 100% rename from sfaira/data/mouse/ovary/external.py rename to sfaira/data/mouse/femalegonad/external.py diff --git a/sfaira/data/mouse/testis/mouse_testis.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py similarity index 61% rename from sfaira/data/mouse/testis/mouse_testis.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad.py index 7f7586088..fc35c3ef3 100644 --- a/sfaira/data/mouse/testis/mouse_testis.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_testis_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_testis_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 -class DatasetGroupTestis(DatasetGroupBase): +class DatasetGroupFemalegonad(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTestis().datasets) + from sfaira_extension.data.mouse import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py index 31406e47b..0729ef941 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_001.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ovary" - self.sub_tissue = "ovary" + self.organ = "femalegonad" + self.sub_tissue = "femalegonad" self.has_celltypes = True self.class_maps = { @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Ovary1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py similarity index 90% rename from sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py rename to sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py index a033284b0..24b794523 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary_2018_microwell_han_002.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ovary" - self.sub_tissue = "ovary" + self.organ = "femalegonad" + self.sub_tissue = "femalegonad" self.has_celltypes = True self.class_maps = { @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Ovary2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/heart/mouse_heart.py b/sfaira/data/mouse/heart/mouse_heart.py index ad32d3946..ca7e6af3d 100644 --- a/sfaira/data/mouse/heart/mouse_heart.py +++ b/sfaira/data/mouse/heart/mouse_heart.py @@ -25,7 +25,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupHeart().datasets) + from sfaira_extension.data.mouse import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index f80ec2445..d5b05893c 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/heart/Heart_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index d7f35b3ab..f5a2eb17a 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/heart/Heart_facs.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index e1e2b287e..170b815f2 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -39,7 +39,7 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/heart/tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") elif self.source == "figshare": raise ValueError("not defined") else: diff --git a/sfaira/data/mouse/ileum/__init__.py b/sfaira/data/mouse/ileum/__init__.py new file mode 100644 index 000000000..89c13450a --- /dev/null +++ b/sfaira/data/mouse/ileum/__init__.py @@ -0,0 +1 @@ +from .mouse_ileum import DatasetGroupIleum \ No newline at end of file diff --git a/sfaira/data/mouse/peripheral_blood/external.py b/sfaira/data/mouse/ileum/external.py similarity index 100% rename from sfaira/data/mouse/peripheral_blood/external.py rename to sfaira/data/mouse/ileum/external.py diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine.py b/sfaira/data/mouse/ileum/mouse_ileum.py similarity index 57% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine.py rename to sfaira/data/mouse/ileum/mouse_ileum.py index b7f86a1e0..f56d2c46e 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine.py +++ b/sfaira/data/mouse/ileum/mouse_ileum.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_small_intestine_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_small_intestine_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_small_intestine_2018_microwell_han_003 import Dataset as Dataset0003 +from .mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 -class DatasetGroupSmallintestine(DatasetGroupBase): +class DatasetGroupIleum(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSmallintestine().datasets) + from sfaira_extension.data.mouse import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py index 6dbb2d9c2..69c3b3c91 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py index 2cadc4d9e..6dc73705b 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py similarity index 91% rename from sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py rename to sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py index 4de9bb758..3fefaf0b5 100644 --- a/sfaira/data/mouse/small_intestine/mouse_small_intestine_2018_microwell_han_003.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_small_intestine_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "small_intestine" - self.sub_tissue = "small_intestine" + self.organ = "ileum" + self.sub_tissue = "ileum" self.has_celltypes = True self.class_maps = { @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/SmallIntestine3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney.py b/sfaira/data/mouse/kidney/mouse_kidney.py index d5e121da9..ac9d34cfc 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney.py +++ b/sfaira/data/mouse/kidney/mouse_kidney.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupKidney().datasets) + from sfaira_extension.data.mouse import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index f813d68ea..41dd1438b 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -34,8 +34,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Kidney1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index 033fa731e..7ee5882e9 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -65,8 +65,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Kidney2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index c69d74681..3cbdd2ac0 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -42,9 +42,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/kidney/tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/kidney/Kidney_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index ae9e88206..ceff6e506 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -41,9 +41,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/kidney/tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/kidney/Kidney_facs.h5ad") + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/large_intestine/__init__.py b/sfaira/data/mouse/large_intestine/__init__.py deleted file mode 100644 index dac54df27..000000000 --- a/sfaira/data/mouse/large_intestine/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_large_intestine import DatasetGroupLargeintestine \ No newline at end of file diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py index 766e6a5fa..e69de29bb 100644 --- a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_large_intestine_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "large_intestine" - self.sub_tissue = "large_intestine" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/large_intestine/tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/large_intestine/Large_Intestine_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/limb_muscle/__init__.py b/sfaira/data/mouse/limb_muscle/__init__.py deleted file mode 100644 index 9a3be10fb..000000000 --- a/sfaira/data/mouse/limb_muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_limb_muscle import DatasetGroupLimbmuscle \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py index 0515158ae..e69de29bb 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_limb_muscle_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/limb_muscle/tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/limb_muscle/Limb_Muscle_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/liver/mouse_liver.py b/sfaira/data/mouse/liver/mouse_liver.py index a6df91c00..a78f4af58 100644 --- a/sfaira/data/mouse/liver/mouse_liver.py +++ b/sfaira/data/mouse/liver/mouse_liver.py @@ -26,7 +26,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLiver().datasets) + from sfaira_extension.data.mouse import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index edd19a92e..f01bcd10b 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -52,8 +52,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Liver1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 24f0a03aa..03a83405d 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -46,8 +46,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Liver2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index 6131ea70c..b75e14106 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/liver/tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/liver/Liver_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index 245ac729d..e8750f2b0 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/liver/tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/liver/Liver_facs.h5ad") + fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/lung/mouse_lung.py b/sfaira/data/mouse/lung/mouse_lung.py index ca93dd5b8..aa5a6f0ec 100644 --- a/sfaira/data/mouse/lung/mouse_lung.py +++ b/sfaira/data/mouse/lung/mouse_lung.py @@ -28,7 +28,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupLung().datasets) + from sfaira_extension.data.mouse import DatasetGroupLung + self.datasets.update(DatasetGroupLung().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 1ca4e5964..1d198b276 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 2a8bea9df..43102d566 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index 6d0f14686..f12abcc17 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -66,8 +66,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Lung3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 015eb167a..3590bce81 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/lung/tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/lung/Lung_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index 10c5e48a2..db656e485 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/lung/tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/lung/Lung_facs.h5ad") + fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/malegonad/__init__.py b/sfaira/data/mouse/malegonad/__init__.py new file mode 100644 index 000000000..a56dbc2f4 --- /dev/null +++ b/sfaira/data/mouse/malegonad/__init__.py @@ -0,0 +1 @@ +from .mouse_malegonad import DatasetGroupMalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/small_intestine/external.py b/sfaira/data/mouse/malegonad/external.py similarity index 100% rename from sfaira/data/mouse/small_intestine/external.py rename to sfaira/data/mouse/malegonad/external.py diff --git a/sfaira/data/mouse/ovary/mouse_ovary.py b/sfaira/data/mouse/malegonad/mouse_malegonad.py similarity index 61% rename from sfaira/data/mouse/ovary/mouse_ovary.py rename to sfaira/data/mouse/malegonad/mouse_malegonad.py index 91f6ca71b..da9610f39 100644 --- a/sfaira/data/mouse/ovary/mouse_ovary.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad.py @@ -3,11 +3,11 @@ from .external import DatasetGroupBase -from .mouse_ovary_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_ovary_2018_microwell_han_002 import Dataset as Dataset0002 +from .mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from .mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 -class DatasetGroupOvary(DatasetGroupBase): +class DatasetGroupMalegonad(DatasetGroupBase): def __init__( self, @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupOvary().datasets) + from sfaira_extension.data.mouse import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py similarity index 91% rename from sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py rename to sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py index 31ba920d7..c6cf5653d 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_001.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "testis" - self.sub_tissue = "testis" + self.organ = "malegonad" + self.sub_tissue = "malegonad" self.has_celltypes = True self.class_maps = { @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Testis1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py similarity index 91% rename from sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py rename to sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py index 9cc44c15b..ca536a683 100644 --- a/sfaira/data/mouse/testis/mouse_testis_2018_microwell_han_002.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "testis" - self.sub_tissue = "testis" + self.organ = "malegonad" + self.sub_tissue = "malegonad" self.has_celltypes = True self.class_maps = { @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Testis2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/__init__.py b/sfaira/data/mouse/mammary_gland/__init__.py deleted file mode 100644 index 0c53ff90f..000000000 --- a/sfaira/data/mouse/mammary_gland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_mammary_gland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py index 012bcde2c..e69de29bb 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammary_gland_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" - self.has_celltypes = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse/mammary_gland/tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/mammary_gland/Mammary_Gland_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/mammarygland/__init__.py b/sfaira/data/mouse/mammarygland/__init__.py new file mode 100644 index 000000000..6a42b03d9 --- /dev/null +++ b/sfaira/data/mouse/mammarygland/__init__.py @@ -0,0 +1 @@ +from .mouse_mammarygland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/testis/external.py b/sfaira/data/mouse/mammarygland/external.py similarity index 100% rename from sfaira/data/mouse/testis/external.py rename to sfaira/data/mouse/mammarygland/external.py diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py similarity index 57% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland.py index 9374cee4e..d7cdbd797 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_mammary_gland_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_mammary_gland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_mammary_gland_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_mammary_gland_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_mammary_gland_2018_microwell_han_003 import Dataset as Dataset0005 -from .mouse_mammary_gland_2018_microwell_han_004 import Dataset as Dataset0006 +from .mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 +from .mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 +from .mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 +from .mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 class DatasetGroupMammaryGland(DatasetGroupBase): @@ -30,7 +30,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupMammaryGland().datasets) + from sfaira_extension.data.mouse import DatasetGroupMammaryGland + self.datasets.update(DatasetGroupMammaryGland().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py index 3ffe43c9b..0a01aff3d 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) @@ -79,4 +79,3 @@ def _load(self, fn=None): self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) - diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py index 2806abd02..661d1ff65 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py index 52d749e52..a23535a32 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py similarity index 90% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py index 42674693a..a677d911b 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py @@ -17,10 +17,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -50,8 +50,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/MammaryGland.Virgin2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py similarity index 87% rename from sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py rename to sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py index ed91fbb7b..f90cef8ff 100644 --- a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_mammary_gland_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "mammary_gland" - self.sub_tissue = "mammary_gland" + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/mammary_gland/tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/mammary_gland/Mammary_Gland_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..b442c7f20 --- /dev/null +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py @@ -0,0 +1,69 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) + self.species = "mouse" + self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" + self.source = source + if self.source == "aws": + self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + self.organ = "mammarygland" + self.sub_tissue = "mammarygland" + self.has_celltypes = True + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + self.adata.uns["lab"] = "Quake" + self.adata.uns["year"] = "2019" + self.adata.uns["doi"] = "10.1101/661728" + self.adata.uns["protocol"] = "smartseq2" + self.adata.uns["organ"] = self.organ + self.adata.uns["subtissue"] = self.sub_tissue + self.adata.uns["animal"] = "mouse" + self.adata.uns["id"] = self.id + self.adata.uns["wget_download"] = self.download_website + self.adata.uns["has_celltypes"] = self.has_celltypes + self.adata.uns["counts"] = 'norm' + # self.adata.obs["cell_ontology_class"] is already set + self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() + self.adata.obs["healthy"] = True + self.adata.obs["state_exact"] = "healthy" + + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') diff --git a/sfaira/data/mouse/marrow/__init__.py b/sfaira/data/mouse/marrow/__init__.py deleted file mode 100644 index b2b203505..000000000 --- a/sfaira/data/mouse/marrow/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_marrow import DatasetGroupMarrow \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/__init__.py b/sfaira/data/mouse/muscle/__init__.py new file mode 100644 index 000000000..fa8cb5cfd --- /dev/null +++ b/sfaira/data/mouse/muscle/__init__.py @@ -0,0 +1 @@ +from .mouse_muscle import DatasetGroupMuscle \ No newline at end of file diff --git a/sfaira/data/mouse/trachae/external.py b/sfaira/data/mouse/muscle/external.py similarity index 100% rename from sfaira/data/mouse/trachae/external.py rename to sfaira/data/mouse/muscle/external.py diff --git a/sfaira/data/mouse/marrow/mouse_marrow.py b/sfaira/data/mouse/muscle/mouse_muscle.py similarity index 61% rename from sfaira/data/mouse/marrow/mouse_marrow.py rename to sfaira/data/mouse/muscle/mouse_muscle.py index 4a28ce069..d3eb6f583 100644 --- a/sfaira/data/mouse/marrow/mouse_marrow.py +++ b/sfaira/data/mouse/muscle/mouse_muscle.py @@ -3,12 +3,12 @@ from .external import DatasetGroupBase -from .mouse_marrow_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_marrow_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_marrow_2018_microwell_001 import Dataset as Dataset0003 +from .mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 +from .mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from .mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 -class DatasetGroupMarrow(DatasetGroupBase): +class DatasetGroupMuscle(DatasetGroupBase): def __init__( self, @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupMarrow().datasets) + from sfaira_extension.data.mouse import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py similarity index 90% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py rename to sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py index 7dac35d7f..ce3b6f01e 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py @@ -19,10 +19,10 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_limb_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -51,8 +51,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Muscle_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py similarity index 87% rename from sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py rename to sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py index f538e283c..c88b0e4a3 100644 --- a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_limb_muscle_2019_10x_pisco_001_10.1101/661728" + self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "limb_muscle" - self.sub_tissue = "limb_muscle" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/limb_muscle/tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/limb_muscle/Limb_Muscle_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py similarity index 88% rename from sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py rename to sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py index a320624bd..b71e072d1 100644 --- a/sfaira/data/mouse/fat/mouse_fat_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py @@ -16,7 +16,7 @@ def __init__( ): DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) self.species = "mouse" - self.id = "mouse_fat_2019_smartseq2_pisco_002_10.1101/661728" + self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" self.source = source if self.source == "aws": self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -24,8 +24,8 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" else: raise ValueError("source %s not recognized" % self.source) - self.organ = "fat" - self.sub_tissue = "fat" + self.organ = "muscle" + self.sub_tissue = "muscle" self.has_celltypes = True self.class_maps = { @@ -37,9 +37,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/fat/tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/fat/gat_facs.h5ad") + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/ovary/__init__.py b/sfaira/data/mouse/ovary/__init__.py deleted file mode 100644 index 20d9cccfc..000000000 --- a/sfaira/data/mouse/ovary/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_ovary import DatasetGroupOvary \ No newline at end of file diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas.py b/sfaira/data/mouse/pancreas/mouse_pancreas.py index 2818069a1..b036de4b3 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas.py @@ -40,7 +40,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPancreas().datasets) + from sfaira_extension.data.mouse import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index dd6814c75..34848662f 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -56,8 +56,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Pancreas_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 19590fe66..85ef8fb2d 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/pancreas/tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/pancreas/Pancreas_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 18c2574f7..671eba3cc 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308545_NOD_08w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308545_NOD_08w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index dc7ba1914..76df51fb8 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308547_NOD_08w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308547_NOD_08w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index 307ca856e..a027e8176 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308548_NOD_14w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308548_NOD_14w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 90b0a7147..5f2e685a0 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308549_NOD_14w_B") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308549_NOD_14w_B_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 18c413c0a..3b43226ec 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308550_NOD_14w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308550_NOD_14w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index c3bb0281d..6735a21ad 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308551_NOD_16w_A") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308551_NOD_16w_A_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 570a9596d..447776c5a 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308552_NOD_16w_B") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308552_NOD_16w_B_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index ba12aa485..1fde77cb5 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -47,8 +47,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/pancreas/GSM3308553_NOD_16w_C") - fn_meta = os.path.join(self.path, "mouse/pancreas/GSM3308553_NOD_16w_C_annotation.csv") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") celltypes = pandas.read_csv(fn_meta, index_col=0) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index d78535bfe..7b97fa833 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -41,9 +41,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/pancreas/tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/pancreas/Pancreas_facs.h5ad") + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/peripheral_blood/__init__.py b/sfaira/data/mouse/peripheral_blood/__init__.py deleted file mode 100644 index 51ba0f4ab..000000000 --- a/sfaira/data/mouse/peripheral_blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_peripheral_blood import DatasetGroupPeripheralBlood \ No newline at end of file diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py index fece325ad..e69de29bb 100644 --- a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py @@ -1,86 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_peripheral_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "peripheral_blood" - self.sub_tissue = "peripheral_blood" - self.has_celltypes = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PeripheralBlood5_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/placenta/mouse_placenta.py b/sfaira/data/mouse/placenta/mouse_placenta.py index da38d98d6..412dda22b 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta.py +++ b/sfaira/data/mouse/placenta/mouse_placenta.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupPlacenta().datasets) + from sfaira_extension.data.mouse import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 636fa1f15..71e6c67e4 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -62,8 +62,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PlacentaE14.1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 076cb182b..55bf9197d 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -62,8 +62,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/PlacentaE14.2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/prostate/mouse_prostate.py b/sfaira/data/mouse/prostate/mouse_prostate.py index ebf3dd5b3..68354a363 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate.py +++ b/sfaira/data/mouse/prostate/mouse_prostate.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupProstate().datasets) + from sfaira_extension.data.mouse import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index 9ccac2ac9..99040cb76 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Prostate1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 46ddc0a83..7f6022c77 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Prostate2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib.py b/sfaira/data/mouse/rib/mouse_rib.py index 64ec6b62f..c2a80b1a0 100644 --- a/sfaira/data/mouse/rib/mouse_rib.py +++ b/sfaira/data/mouse/rib/mouse_rib.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupRib().datasets) + from sfaira_extension.data.mouse import DatasetGroupRib + self.datasets.update(DatasetGroupRib().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index 17ab250bc..504019f64 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 676c15725..65718f86f 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index 0268b6e81..b8d5ff7de 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -57,8 +57,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/NeonatalRib3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/skin/mouse_skin.py b/sfaira/data/mouse/skin/mouse_skin.py index 3ed4c307c..b8b33a0e2 100644 --- a/sfaira/data/mouse/skin/mouse_skin.py +++ b/sfaira/data/mouse/skin/mouse_skin.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSkin().datasets) + from sfaira_extension.data.mouse import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin().datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 262885440..65b961511 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/skin/tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/skin/Skin_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index b582e2011..04ddff85c 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -38,11 +38,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/skin/Skin_facs.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/skin/tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/skin/Skin_facs.h5ad") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/small_intestine/__init__.py b/sfaira/data/mouse/small_intestine/__init__.py deleted file mode 100644 index 87c890041..000000000 --- a/sfaira/data/mouse/small_intestine/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_small_intestine import DatasetGroupSmallintestine \ No newline at end of file diff --git a/sfaira/data/mouse/spleen/mouse_spleen.py b/sfaira/data/mouse/spleen/mouse_spleen.py index 43d4cd15a..27ab1559e 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen.py +++ b/sfaira/data/mouse/spleen/mouse_spleen.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupSpleen().datasets) + from sfaira_extension.data.mouse import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 5e31c0eea..1b5174b1f 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -45,8 +45,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Spleen_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 451baa207..84118862e 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/spleen/tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/spleen/Spleen_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 9bde5f267..00ae4b975 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -39,9 +39,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/spleen/tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/spleen/Spleen_facs.h5ad") + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/stomach/mouse_stomach.py b/sfaira/data/mouse/stomach/mouse_stomach.py index 9cee2ea17..cabeaf994 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach.py +++ b/sfaira/data/mouse/stomach/mouse_stomach.py @@ -20,7 +20,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupStomach().datasets) + from sfaira_extension.data.mouse import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 56ab7c683..64501c484 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -51,8 +51,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Stomach_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/testis/__init__.py b/sfaira/data/mouse/testis/__init__.py deleted file mode 100644 index cbd4fa1e7..000000000 --- a/sfaira/data/mouse/testis/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_testis import DatasetGroupTestis \ No newline at end of file diff --git a/sfaira/data/mouse/thymus/mouse_thymus.py b/sfaira/data/mouse/thymus/mouse_thymus.py index 3f7ed534f..fe707f8fd 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus.py +++ b/sfaira/data/mouse/thymus/mouse_thymus.py @@ -24,7 +24,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupThymus().datasets) + from sfaira_extension.data.mouse import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index 125b2ad69..15f68b11f 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -40,8 +40,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Thymus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index 188d26cb5..f0f1e370d 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -38,9 +38,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/thymus/tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/thymus/Thymus_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index c8f960e2a..11f5971fd 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -38,9 +38,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/thymus/tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/thymus/Thymus_facs.h5ad") + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/tongue/mouse_tongue.py b/sfaira/data/mouse/tongue/mouse_tongue.py index 392933740..382e06f5e 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue.py +++ b/sfaira/data/mouse/tongue/mouse_tongue.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTongue().datasets) + from sfaira_extension.data.mouse import DatasetGroupTongue + self.datasets.update(DatasetGroupTongue().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index 2db82fdfb..d2fcce167 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/tongue/tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/tongue/Tongue_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index a168f163e..c412c6d00 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/tongue/tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/tongue/Tongue_facs.h5ad") + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/trachae/__init__.py b/sfaira/data/mouse/trachea/__init__.py similarity index 100% rename from sfaira/data/mouse/trachae/__init__.py rename to sfaira/data/mouse/trachea/__init__.py diff --git a/sfaira/data/mouse/trachea/external.py b/sfaira/data/mouse/trachea/external.py new file mode 100644 index 000000000..9f4e3db68 --- /dev/null +++ b/sfaira/data/mouse/trachea/external.py @@ -0,0 +1 @@ +from sfaira.data import DatasetBase, DatasetGroupBase diff --git a/sfaira/data/mouse/trachae/mouse_trachea.py b/sfaira/data/mouse/trachea/mouse_trachea.py similarity index 84% rename from sfaira/data/mouse/trachae/mouse_trachea.py rename to sfaira/data/mouse/trachea/mouse_trachea.py index 2ef2426a6..bb578a632 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea.py +++ b/sfaira/data/mouse/trachea/mouse_trachea.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupTrachea().datasets) + from sfaira_extension.data.mouse import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py similarity index 90% rename from sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py rename to sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py index df3dd8cb1..f19ec1043 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py @@ -39,11 +39,11 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/trachea/Trachea_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") if self.source == "aws": - fn = os.path.join(self.path, "mouse/trachea/tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/trachea/Trachea_droplet.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py similarity index 93% rename from sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py rename to sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py index 39b5c48c2..2e7a16097 100644 --- a/sfaira/data/mouse/trachae/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py @@ -40,9 +40,9 @@ def _load(self, fn=None): if self.path is None: raise ValueError("provide either fn in load or path in constructor") if self.source == "aws": - fn = os.path.join(self.path, "mouse/trachea/tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") elif self.source == "figshare": - fn = os.path.join(self.path, "mouse/trachea/Trachea_facs.h5ad") + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") else: raise ValueError("source %s not recognized" % self.source) self.adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/mouse/uterus/mouse_uterus.py b/sfaira/data/mouse/uterus/mouse_uterus.py index 9b6e6e9c2..b7a7ea6fc 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus.py +++ b/sfaira/data/mouse/uterus/mouse_uterus.py @@ -22,7 +22,7 @@ def __init__( self.datasets = dict(zip(keys, datasets)) # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - self.datasets.update(sfairae.data.mouse.DatasetGroupUterus().datasets) + from sfaira_extension.data.mouse import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus().datasets) except ImportError: pass diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 5ffd95ce4..327445518 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Uterus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index db2724715..5bd723063 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -53,8 +53,8 @@ def _load(self, fn=None): if fn is None: if self.path is None: raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse/temp_mouse_atlas/500more_dge/Uterus2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse/temp_mouse_atlas/MCA_CellAssignments.csv") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") celltypes = pandas.read_csv(fn_meta, index_col=1) celltypes = celltypes.drop(['Unnamed: 0'], axis=1) diff --git a/sfaira/data/utils/create_meta_mouse.py b/sfaira/data/utils/create_meta_mouse.py index a56678c74..1634af953 100644 --- a/sfaira/data/utils/create_meta_mouse.py +++ b/sfaira/data/utils/create_meta_mouse.py @@ -15,29 +15,29 @@ "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "fat": mouse.DatasetGroupFat(path=path, meta_path=path_meta), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=path_meta), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=path_meta), + "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=path_meta), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=path_meta), + "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=path_meta), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=path_meta), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=path_meta), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta) } for k in list(ds_dict.keys()): diff --git a/sfaira/data/utils/write_backed_human.py b/sfaira/data/utils/write_backed_human.py index a90bc8c4e..1788f5e36 100644 --- a/sfaira/data/utils/write_backed_human.py +++ b/sfaira/data/utils/write_backed_human.py @@ -1,6 +1,6 @@ import sys import tensorflow as tf -import sfaira.api as sfaira +import sfaira import os from sfaira.data import human diff --git a/sfaira/data/utils/write_backed_mouse.py b/sfaira/data/utils/write_backed_mouse.py index fc6ff9c5a..a408380e9 100644 --- a/sfaira/data/utils/write_backed_mouse.py +++ b/sfaira/data/utils/write_backed_mouse.py @@ -1,6 +1,6 @@ import sys import tensorflow as tf -import sfaira.api as sfaira +import sfaira import os from sfaira.data import mouse @@ -21,29 +21,29 @@ "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "fat": mouse.DatasetGroupFat(path=path, meta_path=path_meta), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=path_meta), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=path_meta), + "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=path_meta), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=path_meta), + "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=path_meta), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=path_meta), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=path_meta), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta), } ds = sfaira.data.DatasetSuperGroup( diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 08e7714d8..3e27959ef 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ from sfaira.versions.celltype_versions import SPECIES_DICT, CelltypeVersionsBase from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel +from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 188d39b05..960a091b8 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -45,7 +45,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): self.data = data self.obs_train = None @@ -78,52 +78,65 @@ def load_pretrained_weights(self): """ Loads model weights from local directory or zenodo. """ - if self.model_dir.endswith('/'): - self.model_dir += '/' - if self.model_dir.startswith('http'): # Remote repo if not os.path.exists(self.cache_path): os.makedirs(self.cache_path) import urllib.request + from urllib.parse import urljoin from urllib.error import HTTPError try: - urllib.request.urlretrieve(self.model_dir + self.model_id + '_weights.h5', - self.cache_path + self.model_id + '_weights.h5') + urllib.request.urlretrieve(self.model_dir, + os.path.join(self.cache_path, os.path.basename(self.model_dir)) + ) + fn = os.path.join(self.cache_path, os.path.basename(self.model_dir)) except HTTPError: try: - urllib.request.urlretrieve(self.model_dir + self.model_id + '_weights.data-00000-of-00001', - self.cache_path + self.model_id + '_weights.data-00000-of-00001') + urllib.request.urlretrieve(urljoin(self.model_dir, f'{self.model_id}_weights.h5'), + os.path.join(self.cache_path, f'{self.model_id}_weights.h5') + ) + fn = os.path.join(self.cache_path, f"{self.model_id}_weights.h5") except HTTPError: - raise FileNotFoundError(f'cannot find remote weightsfile: {self.model_dir + self.model_id}') - - fn = self.cache_path + self.model_id + "_weights" + try: + urllib.request.urlretrieve(urljoin(self.model_dir, f'{self.model_id}_weights.data-00000-of-00001'), + os.path.join(self.cache_path, f'{self.model_id}_weights.data-00000-of-00001') + ) + fn = os.path.join(self.cache_path, f"{self.model_id}_weights.data-00000-of-00001") + except HTTPError: + raise FileNotFoundError(f'cannot find remote weightsfile') else: # Local repo if not self.model_dir: raise ValueError('the model_id is set but the path to the model is empty') - fn = self.model_dir + self.model_id + "_weights" + if os.path.isfile(self.model_dir) \ + and not self.model_dir.endswith(".h5") \ + and not self.model_dir.endswith(".data-00000-of-00001"): + raise ValueError('weights files saved in h5 format need to have an h5 file extension') + + if os.path.isfile(self.model_dir): + fn = self.model_dir + elif os.path.isfile(os.path.join(self.model_dir, f"{self.model_id}_weights.data-00000-of-00001")): + fn = os.path.join(self.model_dir, f"{self.model_id}_weights.data-00000-of-00001") + elif os.path.isfile(os.path.join(self.model_dir, f"{self.model_id}_weights.h5")): + fn = os.path.join(self.model_dir, f"{self.model_id}_weights.h5") + else: + raise ValueError('the weightsfile could not be found') - if os.path.exists(fn+'.h5'): - self._assert_md5_sum(fn+'.h5', self.md5) - self.model.training_model.load_weights(fn+'.h5') - elif os.path.exists(fn + ".data-00000-of-00001"): - self._assert_md5_sum(fn + ".data-00000-of-00001", self.md5) - self.model.training_model.load_weights(fn) - elif os.path.exists(fn): - raise ValueError('weights files saved in h5 format need to have an h5 file extension') + self._assert_md5_sum(fn, self.md5) + if fn.endswith(".data-00000-of-00001"): + self.model.training_model.load_weights(".".join(fn.split(".")[:-1])) else: - raise ValueError(f'the weightsfile {fn} could not be found') + self.model.training_model.load_weights(fn) def save_weights_to_cache(self): - if not os.path.exists(self.cache_path+'weights/'): - os.makedirs(self.cache_path+'weights/') - fn = self.cache_path + 'weights/' + str(self.model_id) + "_weights_cache.h5" + if not os.path.exists(os.path.join(self.cache_path, 'weights')): + os.makedirs(os.path.join(self.cache_path, 'weights')) + fn = os.path.join(self.cache_path, 'weights', f"{self.model_id}_weights_cache.h5") self.model.training_model.save_weights(fn) def load_weights_from_cache(self): - fn = self.cache_path + 'weights/' + str(self.model_id) + "_weights_cache.h5" + fn = os.path.join(self.cache_path, 'weights', f"{self.model_id}_weights_cache.h5") self.model.training_model.load_weights(fn) def init_model(self, clear_weight_cache=True, override_hyperpar=None): @@ -132,9 +145,9 @@ def init_model(self, clear_weight_cache=True, override_hyperpar=None): :return: """ if clear_weight_cache: - if os.path.exists(self.cache_path+'weights/'): - for file in os.listdir(self.cache_path+'weights/'): - file_path = os.path.join(self.cache_path+'weights/', file) + if os.path.exists(os.path.join(self.cache_path, 'weights')): + for file in os.listdir(os.path.join(self.cache_path, 'weights')): + file_path = os.path.join(os.path.join(self.cache_path, 'weights'), file) os.remove(file_path) def _assert_md5_sum( @@ -466,7 +479,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): super(EstimatorKerasEmbedding, self).__init__( data=data, @@ -891,7 +904,7 @@ def __init__( model_type: Union[str, None], model_topology: Union[str, None], weights_md5: Union[str, None] = None, - cache_path: str = 'cache/', + cache_path: str = os.path.join('cache', ''), max_class_weight: float = 1e3 ): super(EstimatorKerasCelltype, self).__init__( @@ -1047,7 +1060,7 @@ def generator(): return dataset - elif mode == 'eval' or mode == 'predict': + elif mode == 'eval': weights, y = self._get_celltype_out(idx=idx) if not weighted: weights = np.ones_like(weights) @@ -1064,6 +1077,19 @@ def generator(): return x, y, weights + elif mode == 'predict': + # Prepare data reading according to whether anndata is backed or not: + if self.data.isbacked: + # Need to supply sorted indices to backed anndata: + x = self.data.X[np.sort(idx), :] + # Sort back in original order of indices. + x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] + else: + x = self._prepare_data_matrix(idx=idx) + x = x.toarray() + + return x, None, None + else: raise ValueError(f'Mode {mode} not recognised. Should be "train", "eval" or" predict"') @@ -1092,7 +1118,7 @@ def predict(self): prediction """ if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y, _ = self._get_dataset( + x, _, _ = self._get_dataset( idx=self.idx_test, batch_size=None, mode='predict' diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py index 9bc2dad49..5e70f72b4 100644 --- a/sfaira/interface/__init__.py +++ b/sfaira/interface/__init__.py @@ -1,2 +1 @@ from sfaira.interface.user_interface import UserInterface -from sfaira.interface.model_zoo import ModelZooEmbedding, ModelZooCelltype, ModelZoo diff --git a/sfaira/interface/external.py b/sfaira/interface/external.py index f4e9a8a8f..fdb52e721 100644 --- a/sfaira/interface/external.py +++ b/sfaira/interface/external.py @@ -1,5 +1,5 @@ from sfaira.estimators import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype -from sfaira.preprocessing import gene_filter, cell_filter, tpm_normalize import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies +from sfaira.data.interactive import DatasetInteractive diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index 98783060a..ef0e1ad70 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -7,8 +7,9 @@ import pandas as pd import os from typing import List, Union +import warnings -from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype +from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype, DatasetInteractive from .model_zoo import ModelZooEmbedding, ModelZooCelltype @@ -17,7 +18,7 @@ class UserInterface: This class performs data set handling and coordinates estimators for the different model types. Example code to obtain a UMAP embedding plot of the embedding created from your data with cell-type labels: ``` - import sfaira.api as sfaira + import sfaira import anndata import scanpy @@ -50,31 +51,24 @@ def __init__( self, custom_repo: Union[list, str, None] = None, sfaira_repo: bool = False, - cache_path: str = 'cache/' + cache_path: str = os.path.join('cache', '') ): self.model_kipoi_embedding = None self.model_kipoi_celltype = None self.estimator_embedding = None self.estimator_celltype = None self.use_sfaira_repo = sfaira_repo - - if cache_path.endswith("/"): - self.cache_path = cache_path - else: - self.cache_path = cache_path + "/" - - if custom_repo is not None and not custom_repo.endswith("/"): - custom_repo += "/" + self.cache_path = os.path.join(cache_path, '') if sfaira_repo: # check if public sfaira repository should be accessed - self.model_lookuptable = self._load_lookuptable("https://sandbox.zenodo.org/record/647061/files/") #TODO: this still points to zenodo sandbox + self.model_lookuptable = self._load_lookuptable("https://zenodo.org/record/4304660/files/") if custom_repo: if isinstance(custom_repo, str): custom_repo = [custom_repo] for repo in custom_repo: - if os.path.exists(repo) and not os.path.exists(repo + 'model_lookuptable.csv'): + if os.path.exists(repo) and not os.path.exists(os.path.join(repo, 'model_lookuptable.csv')): self.write_lookuptable(repo) if hasattr(self, 'model_lookuptable'): @@ -89,6 +83,9 @@ def __init__( raise ValueError("please either provide a custom folder/repository with model weights or specify " "`sfaira_repo=True` to access the public weight repository") + # TODO: workaround to deal with model ids bearing file endings in model lookuptable (as is the case in first sfaira model repo upload) + self.model_lookuptable['model_id'] = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in self.model_lookuptable['model_id']] + self.zoo_embedding = ModelZooEmbedding(self.model_lookuptable) self.zoo_celltype = ModelZooCelltype(self.model_lookuptable) @@ -103,7 +100,7 @@ def _load_lookuptable( :param repo_path: :return: model_lookuptable """ - model_lookuptable = pd.read_csv(repo_path + 'model_lookuptable.csv', header=0, index_col=0) + model_lookuptable = pd.read_csv(os.path.join(repo_path, 'model_lookuptable.csv'), header=0, index_col=0) # check for any duplicated model_ids if hasattr(self, 'model_lookuptable'): @@ -120,50 +117,190 @@ def write_lookuptable( repo_path: str ): """ - checks if there is a txt file that lists the model_id and path of models in the directory - adds model_index that connects model_id with the link to the model - :param repo_path: :return: """ import hashlib - files = [ - os.path.join(repo_path, f) for f in os.listdir(repo_path) - if (os.path.isfile(os.path.join(repo_path, f)) - and (f.endswith('_weights.h5') or f.endswith('_weights.data-00000-of-00001')) - and (f.startswith('embedding') or f.startswith('celltype')) - ) - ] - - if files: - file_names = [f.split('/')[-1] for f in files] - s = [i.split('_')[0:7] for i in file_names] - ids = ['_'.join(i) for i in s] - md5 = [] + file_names = [] + model_paths = [] + file_paths = [] + md5 = [] + for subdir, dirs, files in os.walk(repo_path): for file in files: - with open(file, 'rb') as f: - md5.append(hashlib.md5(f.read()).hexdigest()) - + if os.path.isfile(os.path.join(subdir, file)) and ( + file.endswith('.h5') or file.endswith('.data-00000-of-00001')) and ( + file.startswith('embedding_') or file.startswith('celltype_')): + model_paths.append(os.path.join(subdir, "")) + file_paths.append(os.path.join(subdir, file)) + file_names.append(file) + with open(os.path.join(subdir, file), 'rb') as f: + md5.append(hashlib.md5(f.read()).hexdigest()) + s = [i.split('_')[0:7] for i in file_names] + ids = ['_'.join(i) for i in s] + ids_cleaned = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in ids] # remove file extensions from ids + + if ids: pd.DataFrame( - list(zip(ids, [repo_path for i in files], md5)), - columns=['model_id', 'model_path', 'md5'] - ).to_csv(repo_path + 'model_lookuptable.csv') + list(zip(ids_cleaned, model_paths, file_paths, md5)), + columns=['model_id', 'model_path', 'model_file_path', 'md5'] + )\ + .sort_values('model_id')\ + .reset_index(drop=True)\ + .to_csv(os.path.join(repo_path, 'model_lookuptable.csv')) else: - raise ValueError('No model weights found in {}.' + raise ValueError(f'No model weights found in {repo_path} ' 'Weights need to have .h5 or .data-00000-of-00001 extension' - 'to be recognised'.format(repo_path) + 'to be recognised' ) - def load_data( + def deposit_zenodo( self, - data: anndata.AnnData + zenodo_access_token: str, + title: str, + authors: list, + description: str, + metadata: dict = {}, + publish: bool = False, + sandbox: bool = False ): """ + Deposit all models in model lookup table on Zenodo. If publish is set to false, files will be uploaded to a + deposition draft, which can be further edited (additional metadata, files etc.). Returns the DOI link if + publish=True or a link to the deposition draft if publish=False. + + :param zenodo_access_token: Your personal Zenodo API access token. Create one here: https://zenodo.org/account/settings/applications/tokens/new/ + :param title: Title of the Zenodo deposition + :param authors: List of dicts, where each dict defines one author (dict keys: name: Name of creator in the format "Family name, Given names", affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), gnd: GND identifier of creator (optional) + :param description: Description of the Zenodo deposition. + :param metadata: Dictionary with further metadata attributes of the deposit. See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation + :param publish: Set this to True to directly publish the weights on Zenodo. When set to False a draft will be created, which can be edited in the browser before publishing. + :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. + """ + + import requests + import json + headers = {"Content-Type": "application/json"} + params = {'access_token': zenodo_access_token} + sandbox = 'sandbox.' if sandbox else '' + + # Verify access token + r = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions', params=params) + if r.status_code != 200: + raise ValueError( + "Your Zenodo access token was not accepted by the API. Please provide a valid access token.") + + # Create empty deposition + r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions', + params=params, + json={}, + headers=headers) + + # Obtain bucket URL and deposition ID + bucket_url = r.json()["links"]["bucket"] + deposition_id = r.json()['id'] + + # Loop over files in model lookup table and upload them one by one + for i, weight_path in enumerate(self.model_lookuptable['model_file_path']): + filename = os.path.basename(weight_path) + with open(weight_path, "rb") as fp: + r = requests.put( + f"{bucket_url}/{filename}", + data=fp, + params=params, + ) + # Verify checksum after upload + if r.json()['checksum'][4:] != self.model_lookuptable['md5'][i]: + warnings.warn(f"The md5 checksum in your model_lookuptable for {self.model_lookuptable['model_id'][i]} " + f"does not match the md5 checksum of the uploaded file.") + + # Add model lookup table to zenodo + df = self.model_lookuptable.copy() + df['model_path'] = f"https://{sandbox}zenodo.org/record/{deposition_id}/files/" + df['model_file_path'] = [f"https://{sandbox}zenodo.org/record/{deposition_id}/files/{os.path.basename(f)}" for f + in self.model_lookuptable['model_file_path']] + r = requests.put( + f"{bucket_url}/model_lookuptable.csv", + data=df.to_csv(), + params=params, + ) - :return: + # Add metadata + meta_core = { + 'title': title, + 'creators': authors, + 'description': description, + 'license': 'cc-by-4.0', + 'upload_type': 'dataset', + 'access_right': 'open' + } + meta = {**meta_core, **metadata} + r = requests.put(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}', + params=params, + data=json.dumps({ + 'metadata': meta + }), + headers=headers) + + if not publish: + print(f"Zenodo deposition draft has been created: {r.json()['links']['latest_draft_html']}") + return r.json()['links']['latest_draft_html'] + else: + # Publish the deposition + r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/actions/publish', + params=params) + if r.status_code == 202: + if sandbox: + print(f"Weights referenced in model_lookuptable have been sucessfully published on Zenodo: " + f"{r.json()['links']['latest_html']}") + return r.json()['links']['latest_html'] + else: + print(f"Weights referenced in model_lookuptable have been sucessfully published on Zenodo: " + f"{r.json()['links']['conceptdoi']}") + return r.json()['links']['conceptdoi'] + else: + try: + m = r.json()['message'] + except KeyError: + m = f"Submission failed with html status code {r.status_code}" + raise ValueError(m) + + def load_data( + self, + data: anndata.AnnData, + gene_symbol_col: Union[str, None] = None, + gene_ens_col: Union[str, None] = None + ): """ - self.data = data + Loads the provided AnnData object into sfaira. + If genes in the provided AnnData object are annotated as gene symbols, please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. + If genes in the provided AnnData object are annotated as ensembl ids, please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. + You need to provide at least one of the two. + :param data: AnnData object to load + :param gene_symbol_col: Var column name (or 'index') which contains gene symbols + :param gene_ens_col: ar column name (or 'index') which contains ensembl ids + """ + if self.zoo_embedding.species is not None: + species = self.zoo_embedding.species + organ = self.zoo_embedding.organ + elif self.zoo_celltype.species is not None: + species = self.zoo_celltype.species + organ = self.zoo_celltype.organ + else: + raise ValueError("Please first set which model_id to use via the model zoo before loading the data") + + if gene_ens_col is None and gene_symbol_col is None: + raise ValueError("Please provide either the gene_ens_col or the gene_symbol_col argument.") + + dataset = DatasetInteractive( + data=data, + species=species, + organ=organ, + gene_symbol_col=gene_symbol_col, + gene_ens_col=gene_ens_col + ) + dataset.load() + self.data = dataset.adata def filter_cells(self): """ @@ -183,9 +320,7 @@ def load_model_embedding(self): :return: Model ID loaded. """ assert self.zoo_embedding.model_id is not None, "choose embedding model first" - model_dir = self.model_lookuptable.model_path[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] - if not model_dir.endswith("/"): - model_dir += "/" + model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] self.estimator_embedding = EstimatorKerasEmbedding( data=self.data, @@ -210,9 +345,7 @@ def load_model_celltype(self): :return: Model ID loaded. """ assert self.zoo_celltype.model_id is not None, "choose cell type model first" - model_dir = self.model_lookuptable.model_path[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] - if not model_dir.endswith("/"): - model_dir += "/" + model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] self.estimator_celltype = EstimatorKerasCelltype( data=self.data, diff --git a/sfaira/models/celltype/external.py b/sfaira/models/celltype/external.py index cbee67fd8..46629b4d4 100644 --- a/sfaira/models/celltype/external.py +++ b/sfaira/models/celltype/external.py @@ -1,3 +1,4 @@ import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel, PreprocInput +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/external.py b/sfaira/models/embedding/external.py index 7656c708e..c831cdb04 100644 --- a/sfaira/models/embedding/external.py +++ b/sfaira/models/embedding/external.py @@ -1,2 +1,4 @@ from sfaira.versions.topology_versions import Topologies -from sfaira.models import BasicModel, PreprocInput, MaskingDense \ No newline at end of file +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput +from sfaira.models.made import MaskingDense diff --git a/sfaira/preprocessing.py b/sfaira/preprocessing.py deleted file mode 100644 index cf12b6e20..000000000 --- a/sfaira/preprocessing.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np - -def cell_filter(): - pass - - -def gene_filter(): - pass - - -def tpm_normalize(): - pass \ No newline at end of file diff --git a/sfaira/train/external.py b/sfaira/train/external.py index 7fb82b713..158904c07 100644 --- a/sfaira/train/external.py +++ b/sfaira/train/external.py @@ -1,5 +1,5 @@ from sfaira.versions.celltype_versions import SPECIES_DICT from sfaira.data import DatasetGroupBase, DatasetSuperGroup from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype from sfaira.data import mouse, human diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index c42fa6779..86119aa17 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -135,7 +135,7 @@ def load_gs( :param gs_ids: :return: """ - res_dirs = [self.source_path + x + "/results/" for x in gs_ids] + res_dirs = [os.path.join(self.source_path, x, "results", "") for x in gs_ids] run_ids = [ np.sort(np.unique([ x.split("_history.pickle")[0] @@ -152,33 +152,36 @@ def load_gs( gs_keys = [] for i, indir in enumerate(res_dirs): for x in run_ids[i]: - fn_history = indir + x + "_history.pickle" + fn_history = os.path.join(indir, f"{x}_history.pickle") if os.path.isfile(fn_history): with open(fn_history, 'rb') as f: histories[x] = pickle.load(f) else: - print("file %s not found" % (x + "_history.pickle")) - fn_eval = indir + x + "_evaluation.pickle" + print(f"file {x}_history.pickle not found") + + fn_eval = os.path.join(indir, f"{x}_evaluation.pickle") if os.path.isfile(fn_eval): with open(fn_eval, 'rb') as f: evals[x] = pickle.load(f) else: - print("file %s not found" % (x + "_evaluation.pickle")) - fn_hp = indir + x + "_hyperparam.pickle" + print(f"file {x}_evaluation.pickle not found") + + fn_hp = os.path.join(indir, f"{x}_hyperparam.pickle") if os.path.isfile(fn_hp): with open(fn_hp, 'rb') as f: hyperpars[x] = pickle.load(f) else: - print("file %s not found" % (x + "_hyperparam.pickle")) - fn_mhp = indir + x + "_model_hyperparam.pickle" + print(f"file {x}_hyperparam.pickle not found") + + fn_mhp = os.path.join(indir, f"{x}_model_hyperparam.pickle") if os.path.isfile(fn_mhp): with open(fn_mhp, 'rb') as f: model_hyperpars[x] = pickle.load(f) else: - pass - #TODO add: print("file %s not found" % (x + "_model_hyperparam.pickle")) + print(f"file {x}_model_hyperparam.pickle not found") + run_ids_proc.append(x) - gs_keys.append(indir.split("/")[-3]) + gs_keys.append(os.path.normpath(indir).split(os.path.sep)[-2]) self.run_ids = run_ids_proc self.gs_keys = dict(zip(run_ids_proc, gs_keys)) @@ -192,7 +195,7 @@ def load_y( hat_or_true: str, run_id: str ): - fn = self.source_path + self.gs_keys[run_id] + "/results/" + run_id + f"_y{hat_or_true}.npy" + fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_y{hat_or_true}.npy") return np.load(fn) def best_model_by_partition( @@ -273,7 +276,7 @@ def get_best_model_ids( if partition_select not in ["test", "val", "train"]: raise ValueError("partition %s not recognised" % partition_select) - metric_select = partition_select + "_" + metric_select + metric_select = f"{partition_select}_{metric_select}" if cv_mode.lower() == "mean": best_model = tab.groupby("run", as_index=False)[metric_select].mean().\ @@ -337,8 +340,8 @@ def save_best_weight( subset=subset, ) shutil.copyfile( - self.source_path + self.gs_keys[model_id] + "/results/" + model_id + "_weights.h5", - path + model_id + "_weights.h5" + os.path.join(self.source_path, self.gs_keys[model_id], "results", f"{model_id}_weights.h5"), + os.path.join(path, f"{model_id}_weights.h5") ) def plot_completions( @@ -438,7 +441,7 @@ def plot_best_model_by_hyperparam( for i, organ in enumerate(organs): summary_table = summary_table_param.loc[summary_table_param["organ"].values == organ, :] # Plot each metric: - ycol = partition_show + "_" + metric_select + ycol = f"{partition_show}_{metric_select}" if len(organs) == 1 and len(params) == 1: ax = np.array([ax]) sns.boxplot( @@ -506,12 +509,12 @@ def plot_training_history( ).tolist(): sns_data_temp = pandas.DataFrame(self.histories[run]) sns_data_temp["epoch"] = np.arange(0, sns_data_temp.shape[0]) - sns_data_temp["cv"] = run.split("_")[-1] + sns_data_temp["cv"] = int(run.split("_")[-1]) sns_data.append(sns_data_temp) sns_data = pandas.concat(sns_data, axis=0) else: cv = cv_key - sns_data = pandas.DataFrame(self.histories[model_gs_id + "_" + cv]) + sns_data = pandas.DataFrame(self.histories[f"{model_gs_id}_{cv}"]) sns_data["epoch"] = np.arange(0, sns_data.shape[0]) sns_data["cv"] = cv @@ -531,13 +534,13 @@ def plot_training_history( # metric if metric_show not in sns_data.columns: - raise ValueError("metric %s not found in %s" % (metric_show, str(sns_data.columns))) + raise ValueError(f"metric {metric_show} not found in {sns_data.columns}") sns_data_metric = pandas.concat([pandas.DataFrame({ "epoch": sns_data["epoch"].values, "cv": sns_data["cv"].values, metric_show: sns_data[metric_show].values, "partition": x - }) for i, x in enumerate([metric_show, "val_" + metric_show])]) + }) for i, x in enumerate([metric_show, f"val_{metric_show}"])]) sns.lineplot( x="epoch", y=metric_show, style="partition", hue="cv", data=sns_data_metric, ax=ax[i, 1] @@ -580,26 +583,26 @@ def write_best_hyparam( self.source_path, self.gs_keys[best_model_id], 'results', - best_model_id + best_model_id, ) else: file_path_base = os.path.join( self.source_path, - self.gs_keys[best_model_id + "_cv" + str(cvs[0])], + self.gs_keys[f"{best_model_id}_cv{cvs[0]}"], 'results', - best_model_id + "_cv" + str(cvs[0]) + f"{best_model_id}_cv{cvs[0]}", ) # Read model hyperparameter - with open(file_path_base + "_model_hyperparam.pickle", 'rb') as file: + with open(f"{file_path_base}_model_hyperparam.pickle", 'rb') as file: hyparam_model = pickle.load(file) # Read optimizer hyperparameter - with open(file_path_base + "_hyperparam.pickle", 'rb') as file: + with open(f"{file_path_base}_hyperparam.pickle", 'rb') as file: hyparam_optim = pickle.load(file) # Write both hyperparameter dicts - with open(os.path.join(write_path, best_model_id[:-12] + "_best_hyperparam.txt"), 'w') as file: + with open(os.path.join(write_path, f"{best_model_id[:-12]}_best_hyperparam.txt"), 'w') as file: file.write(json.dumps({"model": hyparam_model, "optimizer": hyparam_optim})) return @@ -630,7 +633,7 @@ def load_ontology_names( :param run_id: :return: """ - fn = self.source_path + self.gs_keys[run_id] + "/results/" + run_id + "_ontology_names.pickle" + fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_ontology_names.pickle") if not os.path.isfile(fn): raise FileNotFoundError(f"file {run_id}_ontology_names.pickle not found") with open(fn, 'rb') as f: @@ -663,15 +666,15 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], "model_type": [ "linear" if (id_i.split("_")[3] == "mlp" and id_i.split("_")[5].split(".")[1] == "0") else id_i.split("_")[3] @@ -705,12 +708,12 @@ def best_model_celltype( if model_id is not None: if cvs is not None: fns = [ - self.source_path + self.gs_keys[model_id + "_cv" + str(x)] + "/results/" + model_id + "_cv" + str(x) + os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") for x in cvs ] else: - fns = [self.source_path + self.gs_keys[model_id] + "/results/" + model_id] - covar = [pandas.read_csv(x + "_covar.csv") for x in fns] + fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, covar else: return None, [None] @@ -777,7 +780,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), partition_show + "_" + metric_show + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -809,7 +812,7 @@ def plot_best( annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, - cbar_kws={'label': partition_show + "_" + metric_show}, + cbar_kws={'label': f"{partition_show}_{metric_show}"}, cmap=None ) return fig, axs, sns_data_heatmap @@ -910,12 +913,13 @@ def plot_best_classwise_heatmap( elif metric_show == "f1": m = f1(yhat, ytrue) else: - raise ValueError("did not recognize metric_show %s" % metric_show) + raise ValueError(f"did not recognize metric_show {metric_show}") vals.append(m) - sns_tab[metric_show + "_classwise"] = vals + sns_tab[f"{metric_show}_classwise"] = vals # Build figure. model_types = sns_tab["model_type"].unique() + model_types.sort() classes = self.load_ontology_names(run_id=sns_tab["run"].values[0]) if 'unknown' not in classes and 'Unknown' not in classes: classes = classes + ['Unknown'] @@ -923,7 +927,7 @@ def plot_best_classwise_heatmap( hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): - data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, metric_show + "_classwise"].values) + data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, f"{metric_show}_classwise"].values) if data_temp.shape[0] > 0: if self.cv: if collapse_cv == "mean": @@ -943,7 +947,7 @@ def plot_best_classwise_heatmap( if c in cell_counts.keys(): n_cells.append(np.round(cell_counts[c])) else: - warnings.warn(f"Celltype {c} from cell ontology now found in {organism} {organ} dataset") + warnings.warn(f"Celltype {c} from cell ontology not found in {organism} {organ} dataset") n_cells.append(np.nan) n_cells = np.array(n_cells)[:, None] sns_data_heatmap = pandas.DataFrame( @@ -961,7 +965,7 @@ def plot_best_classwise_heatmap( annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, - cbar_kws={'label': "test_" + metric_show}, + cbar_kws={'label': f"test_{metric_show}"}, cmap=None ) axs = sns.heatmap( @@ -1076,7 +1080,7 @@ def plot_best_classwise_scatter( else: raise ValueError("did not recognize metric_show %s" % metric_show) vals.append(m) - sns_tab[metric_show + "_classwise"] = vals + sns_tab[f"{metric_show}_classwise"] = vals # Build figure. model_types = sns_tab["model_type"].unique() @@ -1087,7 +1091,7 @@ def plot_best_classwise_scatter( hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): - data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, metric_show + "_classwise"].values) + data_temp = np.vstack(sns_tab.loc[sns_tab["model_type"].values == m, f"{metric_show}_classwise"].values) if data_temp.shape[0] > 0: if self.cv: if collapse_cv == "mean": @@ -1107,7 +1111,7 @@ def plot_best_classwise_scatter( if c in cell_counts.keys(): n_cells.append(np.round(cell_counts[c])) else: - warnings.warn(f"Celltype {c} from cell ontology now found in {organism} {organ} dataset") + warnings.warn(f"Celltype {c} from cell ontology not found in {organism} {organ} dataset") n_cells.append(np.nan) n_cells = np.array(n_cells)[:, None] sns_data_scatter = pandas.DataFrame( @@ -1171,23 +1175,23 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "model_type": [id_i.split("_")[3] for id_i in self.run_ids], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids, }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("all_" + m, [self.evals[x]["all"][m] for x in self.run_ids]) for m in metrics]).items()) + list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() else self.evals[x]["train"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() else self.evals[x]["test"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() else self.evals[x]["val"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() else self.evals[x]["all"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models )) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models @@ -1223,13 +1227,13 @@ def best_model_embedding( if model_id is not None: if cvs is not None: fns = [ - self.source_path + self.gs_keys[model_id + "_cv" + str(x)] + "/results/" + model_id + "_cv" + str(x) + os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") for x in cvs ] else: - fns = [self.source_path + self.gs_keys[model_id] + "/results/" + model_id] - embedding = [np.load(x + "_embedding.npy") for x in fns] - covar = [pandas.read_csv(x + "_covar.csv") for x in fns] + fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + embedding = [np.load(f"{x}_embedding.npy") for x in fns] + covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, embedding, covar else: return None, [None], [None] @@ -1288,7 +1292,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), partition_show + "_" + metric_show + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -1319,7 +1323,7 @@ def plot_best( annot=True, fmt=".2f", ax=axs, xticklabels=True, yticklabels=True, - cbar_kws={'label': partition_show + "_" + metric_show} + cbar_kws={'label': f"{partition_show}_{metric_show}"} ) return fig, axs, sns_data_heatmap @@ -1362,11 +1366,11 @@ def get_gradients_by_celltype( ) # check cached file - resultspath = os.path.join(self.source_path, self.gs_keys[model_id], 'results') + resultspath = os.path.join(self.source_path, self.gs_keys[model_id], 'results', '') - if os.path.isfile(os.path.join(resultspath, model_id + '_grads.pickle')) and not ignore_cache: + if os.path.isfile(os.path.join(resultspath, f'{model_id}_grads.pickle')) and not ignore_cache: print('Load gradients from cached file...') - with open(os.path.join(resultspath, model_id + '_grads.pickle'), 'rb') as f: + with open(os.path.join(resultspath, f'{model_id}_grads.pickle'), 'rb') as f: gradients_raw = pickle.load(f) else: print('Compute gradients (1/3): load data') @@ -1394,12 +1398,12 @@ def get_gradients_by_celltype( model_topology=model_id.split('_')[5] ) embedding.init_model() - embedding.model.training_model.load_weights(os.path.join(resultspath, model_id + '_weights.h5')) + embedding.model.training_model.load_weights(os.path.join(resultspath, f'{model_id}_weights.h5')) # compute gradients print('Compute gradients (3/3): cumulate gradients') gradients_raw = embedding.compute_gradients_input(test_data=test_data, batch_size=256, per_celltype=True) - with open(os.path.join(resultspath, model_id + '_grads.pickle'), 'wb') as f: + with open(os.path.join(resultspath, f'{model_id}_grads.pickle'), 'wb') as f: pickle.dump(gradients_raw, f, pickle.HIGHEST_PROTOCOL) print('Gradients saved to cache file!') @@ -1581,7 +1585,6 @@ def plot_gradient_cor( if by_type: v = avg_grads[model_type[0]] celltypes_coord = celltypes[model_type[0]] - cell_names = [str(i) for i in range(v.shape[0])] cormat = pandas.DataFrame( np.corrcoef(v), index=celltypes_coord, @@ -1594,4 +1597,119 @@ def plot_gradient_cor( plt.tight_layout() if save is not None: plt.savefig(save) - plt.show() \ No newline at end of file + plt.show() + + def plot_npc( + self, + organ, + topology_version, + cvs=None + ): + """ + Plots the explained variance ration that accumulates explained variation of the latent space’s ordered + principal components. + If an embedding file is found that contains z, z_mean, z_var (eg. output from predict_variational() function) + the model will use z, and not z_mean. + """ + import matplotlib.pyplot as plt + if self.summary_tab is None: + self.create_summary_tab() + models = np.unique(self.summary_tab["model_type"]).tolist() + self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] + + with plt.style.context("seaborn-whitegrid"): + plt.figure(figsize=(12, 6)) + for model in models: + model_id, embedding, covar = self.best_model_embedding( + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) + if len(embedding[0].shape) == 3: + z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z + else: + z = embedding[0] + cov = np.cov(z.T) + eig_vals, eig_vecs = np.linalg.eig(cov) + eig_sum = sum(eig_vals) + var_exp = [(i / eig_sum) for i in sorted(eig_vals, reverse=True)] + cum_var_exp = np.cumsum([0] + var_exp) + plt.step(range(0, eig_vals.shape[0]+1), cum_var_exp, where="post", linewidth=3, + label="%s cumulative explained variance (95%%: %s / 99%%: %s)" % (model, np.sum(cum_var_exp < .95), np.sum(cum_var_exp < .99))) + plt.yticks([0.0, .25, .50, .75, .95, .99]) + plt.ylabel("Explained variance ratio", fontsize=16) + plt.xlabel("Principal components", fontsize=16) + plt.legend(loc="best", fontsize=16, frameon=True) + plt.tight_layout() + plt.show() + + def plot_active_latent_units( + self, + organ, + topology_version, + cvs=None + ): + """ + Plots latent unit activity measured by empirical variance of the expected latent space. + See: https://arxiv.org/abs/1509.00519 + If an embedding file is found that contains z, z_mean, z_var (eg. output from predict_variational() function) + the model will use z, and not z_mean. + """ + + colors = ['red', 'blue', 'green', 'cyan', 'magenta', 'yellow', 'darkgreen', 'lime', 'navy', 'royalblue', 'pink', 'peru'] + + def active_latent_units_mask(z): + var_x = np.diagonal(np.cov(z.T)) + min_var_x = 0.01 + active_units_mask = var_x > min_var_x + return active_units_mask + + import matplotlib.pyplot as plt + if self.summary_tab is None: + self.create_summary_tab() + models = np.unique(self.summary_tab["model_type"]).tolist() + self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] + + with plt.style.context("seaborn-whitegrid"): + plt.figure(figsize=(12, 6)) + plt.axhline(np.log(0.01), color="k", linestyle='dashed', linewidth=2, label="active unit threshold") + for i, model in enumerate(models): + model_id, embedding, covar = self.best_model_embedding( + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) + if len(embedding[0].shape) == 3: + z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z + else: + z = embedding[0] + latent_dim = z.shape[1] + var = np.sort(np.diagonal(np.cov(z.T)))[::-1] + log_var = np.log(var) + active_units = np.log(var[active_latent_units_mask(z)]) + + plt.plot(range(1,log_var.shape[0]+1), log_var, color=colors[i], alpha=1.0, linewidth=3, + label="%s active units: %i" % (model, len(active_units))) + # to plot vertical lines + log_var_cut = var.copy() + log_var_cut[~active_latent_units_mask(z)] = 0 + log_var_cut = np.log(log_var_cut) + num_active = np.argmax(log_var_cut) + if num_active > 0: + plt.vlines(num_active, ymin = -.15, ymax = 0.15, color=colors[i], linestyle='solid', linewidth=3) + if model == "vaevamp": + z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))),2) + plt.plot(range(1, int(latent_dim/2)+1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_2$ active units: %i" % (model, len(z2[z2>np.log(0.01)])), linestyle='dashed', + linewidth=3) + plt.plot(range(1, int(latent_dim/2)+1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_1$ active units: %i" % (model, len(z1[z1 > np.log(0.01)])), + linestyle='dotted', linewidth=3) + plt.xlabel(r'Latent unit $i$', fontsize=16) + plt.ylabel(r'$\log\,{(A_{\bf z})}_i$', fontsize=16) + plt.title(r"Latent unit activity", fontsize=16) + plt.legend(loc="upper right", frameon=True, fontsize=12) + plt.tight_layout() + plt.show() diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index e3ee5fcfd..c6125d400 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -13,6 +13,16 @@ class TargetZoos: + """ + Class that provides access to all available dataset groups in sfaira. + + Parameters + ---------- + path : str + The name of the animal + meta_path : str + The sound the animal makes + """ def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): if path is not None: @@ -20,29 +30,29 @@ def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path), "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path), - "fat": mouse.DatasetGroupFat(path=path, meta_path=meta_path), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path), "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path), - "largeintestine": mouse.DatasetGroupLargeintestine(path=path, meta_path=meta_path), - "limbmuscle": mouse.DatasetGroupLimbmuscle(path=path, meta_path=meta_path), + "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path), "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path), "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path), - "marrow": mouse.DatasetGroupMarrow(path=path, meta_path=meta_path), - "ovary": mouse.DatasetGroupOvary(path=path, meta_path=meta_path), + "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path), - "peripheralblood": mouse.DatasetGroupPeripheralBlood(path=path, meta_path=meta_path), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path), "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path), "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path), - "smallintestine": mouse.DatasetGroupSmallintestine(path=path, meta_path=meta_path), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path), - "testis": mouse.DatasetGroupTestis(path=path, meta_path=meta_path), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path), - "trachae": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), "uterus": mouse.DatasetGroupUterus(path=path) } self.data_human = { diff --git a/sfaira/unit_tests/external.py b/sfaira/unit_tests/external.py index 21b7266ef..6e2cfddc3 100644 --- a/sfaira/unit_tests/external.py +++ b/sfaira/unit_tests/external.py @@ -1,6 +1,6 @@ from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo, ModelZooCelltype, ModelZooEmbedding, UserInterface -from sfaira.preprocessing import gene_filter, cell_filter, tpm_normalize +from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.interface.user_interface import UserInterface import sfaira.versions.celltype_versions as celltype_versions from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies diff --git a/sfaira/unit_tests/test_models.py b/sfaira/unit_tests/test_models.py index 1a3937827..02281d44d 100644 --- a/sfaira/unit_tests/test_models.py +++ b/sfaira/unit_tests/test_models.py @@ -7,7 +7,7 @@ from sfaira.estimators.metrics import custom_mse import sfaira.models as models -from sfaira.models import BasicModel +from sfaira.models.base import BasicModel class _TestModel: diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py index 482315305..3e4990909 100644 --- a/sfaira/versions/celltype_versions/__init__.py +++ b/sfaira/versions/celltype_versions/__init__.py @@ -8,16 +8,27 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - mouse_e = sfairae.versions.celltype_versions.SPECIES_DICT["mouse"] - human_e = sfairae.versions.celltype_versions.SPECIES_DICT["human"] - for k in mouse.keys(): - if k in mouse_e.keys(): - mouse[k].celltype_universe.update(mouse_e[k]) - mouse[k].ontology.update(mouse_e[k]) - if k in mouse_e.keys(): - human[k].celltype_universe.update(human_e[k]) - human[k].ontology.update(human_e[k]) + from sfaira_extension.versions.celltype_versions import SPECIES_DICT as SPECIES_DICT_EXTENSION + + for organ in mouse.keys(): + if organ in SPECIES_DICT_EXTENSION["mouse"].keys(): + for v in SPECIES_DICT_EXTENSION["mouse"][organ].versions: + if v in mouse[organ].celltype_universe.keys(): + raise ValueError(f'Celltype version {v} already defined for mouse organ {organ} in base sfaira. ' + f'Please define a new version in sfaira_extension.') + else: + mouse[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["mouse"][organ].celltype_universe[v] + mouse[organ].ontology[v] = SPECIES_DICT_EXTENSION["mouse"][organ].ontology[v] + + for organ in human.keys(): + if organ in SPECIES_DICT_EXTENSION["human"].keys(): + for v in SPECIES_DICT_EXTENSION["human"][organ].versions: + if v in human[organ].celltype_universe.keys(): + raise ValueError(f'Celltype version {v} already defined for human organ {organ} in base sfaira. ' + f'Please define a new version in sfaira_extension.') + else: + human[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["human"][organ].celltype_universe[v] + human[organ].ontology[v] = SPECIES_DICT_EXTENSION["human"][organ].ontology[v] except ImportError: pass diff --git a/sfaira/versions/celltype_versions/mouse/__init__.py b/sfaira/versions/celltype_versions/mouse/__init__.py index 1b764769d..e19c15aea 100644 --- a/sfaira/versions/celltype_versions/mouse/__init__.py +++ b/sfaira/versions/celltype_versions/mouse/__init__.py @@ -1,57 +1,57 @@ from .bladder import CelltypeVersionsMouseBladder from .brain import CelltypeVersionsMouseBrain from .diaphragm import CelltypeVersionsMouseDiaphragm -from .fat import CelltypeVersionsMouseFat +from .adipose import CelltypeVersionsMouseAdipose from .heart import CelltypeVersionsMouseHeart from .kidney import CelltypeVersionsMouseKidney -from .large_intestine import CelltypeVersionsMouseLargeintestine -from .limb_muscle import CelltypeVersionsMouseLimbmuscle +from .colon import CelltypeVersionsMouseColon +from .muscle import CelltypeVersionsMouseMuscle from .liver import CelltypeVersionsMouseLiver from .lung import CelltypeVersionsMouseLung -from .mammary_gland import CelltypeVersionsMouseMammarygland -from .marrow import CelltypeVersionsMouseMarrow -from .ovary import CelltypeVersionsMouseOvary -from .peripheral_blood import CelltypeVersionsMousePeripheralblood +from .mammarygland import CelltypeVersionsMouseMammarygland +from .bone import CelltypeVersionsMouseBone +from .femalegonad import CelltypeVersionsMouseFemalegonad +from .blood import CelltypeVersionsMouseBlood from .placenta import CelltypeVersionsMousePlacenta from .pancreas import CelltypeVersionsMousePancreas from .prostate import CelltypeVersionsMouseProstate from .rib import CelltypeVersionsMouseRib from .skin import CelltypeVersionsMouseSkin -from .small_intestine import CelltypeVersionsMouseSmallintestine +from .ileum import CelltypeVersionsMouseIleum from .spleen import CelltypeVersionsMouseSpleen from .stomach import CelltypeVersionsMouseStomach -from .testis import CelltypeVersionsMouseTestis +from .malegonad import CelltypeVersionsMouseMalegonad from .thymus import CelltypeVersionsMouseThymus from .tongue import CelltypeVersionsMouseTongue -from .trachae import CelltypeVersionsMouseTrachae +from .trachea import CelltypeVersionsMouseTrachea from .uterus import CelltypeVersionsMouseUterus ORGAN_DICT = { "bladder": CelltypeVersionsMouseBladder(), "brain": CelltypeVersionsMouseBrain(), "diaphragm": CelltypeVersionsMouseDiaphragm(), - "fat": CelltypeVersionsMouseFat(), + "adipose": CelltypeVersionsMouseAdipose(), "heart": CelltypeVersionsMouseHeart(), "kidney": CelltypeVersionsMouseKidney(), - "largeintestine": CelltypeVersionsMouseLargeintestine(), - "limbmuscle": CelltypeVersionsMouseLimbmuscle(), + "colon": CelltypeVersionsMouseColon(), + "muscle": CelltypeVersionsMouseMuscle(), "liver": CelltypeVersionsMouseLiver(), "lung": CelltypeVersionsMouseLung(), "mammarygland": CelltypeVersionsMouseMammarygland(), - "marrow": CelltypeVersionsMouseMarrow(), - "ovary": CelltypeVersionsMouseOvary(), - "peripheralblood": CelltypeVersionsMousePeripheralblood(), + "bone": CelltypeVersionsMouseBone(), + "femalegonad": CelltypeVersionsMouseFemalegonad(), + "blood": CelltypeVersionsMouseBlood(), "placenta": CelltypeVersionsMousePlacenta(), "pancreas": CelltypeVersionsMousePancreas(), "prostate": CelltypeVersionsMouseProstate(), "rib": CelltypeVersionsMouseRib(), "skin": CelltypeVersionsMouseSkin(), - "smallintestine": CelltypeVersionsMouseSmallintestine(), + "ileum": CelltypeVersionsMouseIleum(), "spleen": CelltypeVersionsMouseSpleen(), "stomach": CelltypeVersionsMouseStomach(), - "testis": CelltypeVersionsMouseTestis(), + "malegonad": CelltypeVersionsMouseMalegonad(), "thymus": CelltypeVersionsMouseThymus(), "tongue": CelltypeVersionsMouseTongue(), - "trachae": CelltypeVersionsMouseTrachae(), + "trachea": CelltypeVersionsMouseTrachea(), "uterus": CelltypeVersionsMouseUterus() } diff --git a/sfaira/versions/celltype_versions/mouse/fat.py b/sfaira/versions/celltype_versions/mouse/adipose.py similarity index 76% rename from sfaira/versions/celltype_versions/mouse/fat.py rename to sfaira/versions/celltype_versions/mouse/adipose.py index 8df7c99a1..5b390523d 100644 --- a/sfaira/versions/celltype_versions/mouse/fat.py +++ b/sfaira/versions/celltype_versions/mouse/adipose.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_FAT_V0 = [ +CELLTYPES_MOUSE_ADIPOSE_V0 = [ ["B cell", "CL:0000236"], ["CD4-positive, alpha-beta T cell", "nan"], ["CD8-positive, alpha-beta T cell", "nan"], @@ -14,7 +14,7 @@ ["NK cell", "CL:0000623"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_FAT_V0 = { +ONTOLOGIES_MOUSE_ADIPOSE_V0 = { "names": { "lymphocyte": [ "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", @@ -26,13 +26,13 @@ } -class CelltypeVersionsMouseFat(CelltypeVersionsBase): +class CelltypeVersionsMouseAdipose(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_FAT_V0 + "0": CELLTYPES_MOUSE_ADIPOSE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_FAT_V0 + "0": ONTOLOGIES_MOUSE_ADIPOSE_V0 } - super(CelltypeVersionsMouseFat, self).__init__(**kwargs) + super(CelltypeVersionsMouseAdipose, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/peripheral_blood.py b/sfaira/versions/celltype_versions/mouse/blood.py similarity index 60% rename from sfaira/versions/celltype_versions/mouse/peripheral_blood.py rename to sfaira/versions/celltype_versions/mouse/blood.py index a381cd5ae..cc4613157 100644 --- a/sfaira/versions/celltype_versions/mouse/peripheral_blood.py +++ b/sfaira/versions/celltype_versions/mouse/blood.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_PERIPHERALBLOOD_V0 = [ +CELLTYPES_MOUSE_BLOOD_V0 = [ ["B cell", "CL:0000236"], ["macrophage", "CL:0000235"], ["T cell", "CL:0000084"], @@ -13,19 +13,19 @@ ["basophil", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_PERIPHERALBLOOD_V0 = { +ONTOLOGIES_MOUSE_BLOOD_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMousePeripheralblood(CelltypeVersionsBase): +class CelltypeVersionsMouseBlood(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_PERIPHERALBLOOD_V0 + "0": CELLTYPES_MOUSE_BLOOD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_PERIPHERALBLOOD_V0 + "0": ONTOLOGIES_MOUSE_BLOOD_V0 } - super(CelltypeVersionsMousePeripheralblood, self).__init__(**kwargs) + super(CelltypeVersionsMouseBlood, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/marrow.py b/sfaira/versions/celltype_versions/mouse/bone.py similarity index 83% rename from sfaira/versions/celltype_versions/mouse/marrow.py rename to sfaira/versions/celltype_versions/mouse/bone.py index a9f2f2afc..8cadbb0a2 100644 --- a/sfaira/versions/celltype_versions/mouse/marrow.py +++ b/sfaira/versions/celltype_versions/mouse/bone.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_MARROW_V0 = [ +CELLTYPES_MOUSE_BONE_V0 = [ ["basophil", "CL:0000767"], ["CD4-positive, alpha-beta T cell", "nan"], ["dendritic cell", "nan"], @@ -31,7 +31,7 @@ ["promonocyte", "CL:0000559"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_MARROW_V0 = { +ONTOLOGIES_MOUSE_BONE_V0 = { "names": { "granulocyte": ["basophil", "neutrophil", "mast cell"], "mature alpha-beta T cell": ["CD4-positive, alpha-beta T cell"] @@ -40,13 +40,13 @@ } -class CelltypeVersionsMouseMarrow(CelltypeVersionsBase): +class CelltypeVersionsMouseBone(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_MARROW_V0 + "0": CELLTYPES_MOUSE_BONE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_MARROW_V0 + "0": ONTOLOGIES_MOUSE_BONE_V0 } - super(CelltypeVersionsMouseMarrow, self).__init__(**kwargs) + super(CelltypeVersionsMouseBone, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/large_intestine.py b/sfaira/versions/celltype_versions/mouse/colon.py similarity index 67% rename from sfaira/versions/celltype_versions/mouse/large_intestine.py rename to sfaira/versions/celltype_versions/mouse/colon.py index 16248610e..c901104ce 100644 --- a/sfaira/versions/celltype_versions/mouse/large_intestine.py +++ b/sfaira/versions/celltype_versions/mouse/colon.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_LARGEINTESTINE_V0 = [ +CELLTYPES_MOUSE_COLON_V0 = [ ["Brush cell of epithelium proper of large intestine", "CL:0002203"], ["enterocyte of epithelium of large intestine", "CL:0002071"], ["enteroendocrine cell", "CL:0000164"], @@ -12,19 +12,19 @@ ["secretory cell", "CL:0000151"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_LARGEINTESTINE_V0 = { +ONTOLOGIES_MOUSE_COLON_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMouseLargeintestine(CelltypeVersionsBase): +class CelltypeVersionsMouseColon(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_LARGEINTESTINE_V0 + "0": CELLTYPES_MOUSE_COLON_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_LARGEINTESTINE_V0 + "0": ONTOLOGIES_MOUSE_COLON_V0 } - super(CelltypeVersionsMouseLargeintestine, self).__init__(**kwargs) + super(CelltypeVersionsMouseColon, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/ovary.py b/sfaira/versions/celltype_versions/mouse/femalegonad.py similarity index 67% rename from sfaira/versions/celltype_versions/mouse/ovary.py rename to sfaira/versions/celltype_versions/mouse/femalegonad.py index b2c10f074..19278ed31 100644 --- a/sfaira/versions/celltype_versions/mouse/ovary.py +++ b/sfaira/versions/celltype_versions/mouse/femalegonad.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_OVARY_V0 = [ +CELLTYPES_MOUSE_FEMALEGONAD_V0 = [ ["cumulus cell", "nan"], ["granulosa cell", "nan"], ["large luteal cell", "nan"], @@ -13,7 +13,7 @@ ["thecal cell", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_OVARY_V0 = { +ONTOLOGIES_MOUSE_FEMALEGONAD_V0 = { "names": { 'luteal cell': ['small luteal cell', 'large luteal cell'], }, @@ -21,13 +21,13 @@ } -class CelltypeVersionsMouseOvary(CelltypeVersionsBase): +class CelltypeVersionsMouseFemalegonad(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_OVARY_V0 + "0": CELLTYPES_MOUSE_FEMALEGONAD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_OVARY_V0 + "0": ONTOLOGIES_MOUSE_FEMALEGONAD_V0 } - super(CelltypeVersionsMouseOvary, self).__init__(**kwargs) + super(CelltypeVersionsMouseFemalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/small_intestine.py b/sfaira/versions/celltype_versions/mouse/ileum.py similarity index 64% rename from sfaira/versions/celltype_versions/mouse/small_intestine.py rename to sfaira/versions/celltype_versions/mouse/ileum.py index c350d7f46..1f190bd5c 100644 --- a/sfaira/versions/celltype_versions/mouse/small_intestine.py +++ b/sfaira/versions/celltype_versions/mouse/ileum.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_SMALLINTESTINE_V0 = [ +CELLTYPES_MOUSE_ILEUM_V0 = [ ["B cell", "CL:0000236"], ["macrophage", "CL:0000235"], ["T cell", "CL:0000084"], @@ -15,19 +15,19 @@ ["erythroblast", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_SMALLINTESTINE_V0 = { +ONTOLOGIES_MOUSE_ILEUM_V0 = { "names": {}, "ontology_ids": {}, } -class CelltypeVersionsMouseSmallintestine(CelltypeVersionsBase): +class CelltypeVersionsMouseIleum(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_SMALLINTESTINE_V0 + "0": CELLTYPES_MOUSE_ILEUM_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_SMALLINTESTINE_V0 + "0": ONTOLOGIES_MOUSE_ILEUM_V0 } - super(CelltypeVersionsMouseSmallintestine, self).__init__(**kwargs) + super(CelltypeVersionsMouseIleum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/testis.py b/sfaira/versions/celltype_versions/mouse/malegonad.py similarity index 66% rename from sfaira/versions/celltype_versions/mouse/testis.py rename to sfaira/versions/celltype_versions/mouse/malegonad.py index 78b9b8b61..f311320a7 100644 --- a/sfaira/versions/celltype_versions/mouse/testis.py +++ b/sfaira/versions/celltype_versions/mouse/malegonad.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_TESTIS_V0 = [ +CELLTYPES_MOUSE_MALEGONAD_V0 = [ ["macrophage", "CL:0000235"], ["leydig cell", "nan"], ["elongating spermatid", "nan"], @@ -14,20 +14,20 @@ ["spermatid", "nan"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_TESTIS_V0 = { +ONTOLOGIES_MOUSE_MALEGONAD_V0 = { "names": { }, "ontology_ids": {}, } -class CelltypeVersionsMouseTestis(CelltypeVersionsBase): +class CelltypeVersionsMouseMalegonad(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_TESTIS_V0 + "0": CELLTYPES_MOUSE_MALEGONAD_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_TESTIS_V0 + "0": ONTOLOGIES_MOUSE_MALEGONAD_V0 } - super(CelltypeVersionsMouseTestis, self).__init__(**kwargs) + super(CelltypeVersionsMouseMalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/mammary_gland.py b/sfaira/versions/celltype_versions/mouse/mammarygland.py similarity index 100% rename from sfaira/versions/celltype_versions/mouse/mammary_gland.py rename to sfaira/versions/celltype_versions/mouse/mammarygland.py diff --git a/sfaira/versions/celltype_versions/mouse/limb_muscle.py b/sfaira/versions/celltype_versions/mouse/muscle.py similarity index 72% rename from sfaira/versions/celltype_versions/mouse/limb_muscle.py rename to sfaira/versions/celltype_versions/mouse/muscle.py index 8ac490914..b2ffdc66d 100644 --- a/sfaira/versions/celltype_versions/mouse/limb_muscle.py +++ b/sfaira/versions/celltype_versions/mouse/muscle.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_LIMBMUSCLE_V0 = [ +CELLTYPES_MOUSE_MUSCLE_V0 = [ ["B cell", "CL:0000236"], ["dendritic cell", "nan"], ["endothelial cell", "CL:0000115"], @@ -19,20 +19,20 @@ ["T cell", "CL:0000084"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_LIMBMUSCLE_V0 = { +ONTOLOGIES_MOUSE_MUSCLE_V0 = { "names": { }, "ontology_ids": {}, } -class CelltypeVersionsMouseLimbmuscle(CelltypeVersionsBase): +class CelltypeVersionsMouseMuscle(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_LIMBMUSCLE_V0 + "0": CELLTYPES_MOUSE_MUSCLE_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_LIMBMUSCLE_V0 + "0": ONTOLOGIES_MOUSE_MUSCLE_V0 } - super(CelltypeVersionsMouseLimbmuscle, self).__init__(**kwargs) + super(CelltypeVersionsMouseMuscle, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/trachae.py b/sfaira/versions/celltype_versions/mouse/trachea.py similarity index 77% rename from sfaira/versions/celltype_versions/mouse/trachae.py rename to sfaira/versions/celltype_versions/mouse/trachea.py index 68d58e0d5..57ad18f33 100644 --- a/sfaira/versions/celltype_versions/mouse/trachae.py +++ b/sfaira/versions/celltype_versions/mouse/trachea.py @@ -1,7 +1,7 @@ from .external import CelltypeVersionsBase # Version 0 -CELLTYPES_MOUSE_TRACHAE_V0 = [ +CELLTYPES_MOUSE_TRACHEA_V0 = [ ["basal epithelial cell of tracheobronchial tree", "CL:0002329"], ["chondrocyte", "CL:0000138"], ["ciliated columnar cell of tracheobronchial tree", "CL:0002145"], @@ -19,7 +19,7 @@ ["T cell", "CL:0000084"], ["unknown", "nan"] ] -ONTOLOGIES_MOUSE_TRACHAE_V0 = { +ONTOLOGIES_MOUSE_TRACHEA_V0 = { "names": { 'blood cell': ["granulocyte", "macrophage", "T cell"] }, @@ -27,13 +27,13 @@ } -class CelltypeVersionsMouseTrachae(CelltypeVersionsBase): +class CelltypeVersionsMouseTrachea(CelltypeVersionsBase): def __init__(self, **kwargs): self.celltype_universe = { - "0": CELLTYPES_MOUSE_TRACHAE_V0 + "0": CELLTYPES_MOUSE_TRACHEA_V0 } self.ontology = { - "0": ONTOLOGIES_MOUSE_TRACHAE_V0 + "0": ONTOLOGIES_MOUSE_TRACHEA_V0 } - super(CelltypeVersionsMouseTrachae, self).__init__(**kwargs) + super(CelltypeVersionsMouseTrachea, self).__init__(**kwargs) diff --git a/sfaira/versions/genome_versions/class_interface.py b/sfaira/versions/genome_versions/class_interface.py index a7ac20546..9b28a5994 100644 --- a/sfaira/versions/genome_versions/class_interface.py +++ b/sfaira/versions/genome_versions/class_interface.py @@ -14,11 +14,30 @@ def __init__( ): self.species = species if self.species == "human": - from .human import GenomeContainer + try: + from sfaira_extension.versions.genome_versions.human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + from .human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") + except ImportError: + from .human import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") elif self.species == "mouse": - from .mouse import GenomeContainer + try: + from sfaira_extension.versions.genome_versions.mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + from .mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") + except ImportError: + from .mouse import GenomeContainer + if genome not in GenomeContainer.available_genomes: + raise ValueError(f"Genome {genome} not recognised.") else: - raise ValueError("species %s not recognized" % species) + raise ValueError(f"Species {species} not recognised.") + self.gc = GenomeContainer() self.set_genome(genome=genome) diff --git a/sfaira/versions/genome_versions/human/genome_container.py b/sfaira/versions/genome_versions/human/genome_container.py index 1065497f2..80e2bd8a3 100644 --- a/sfaira/versions/genome_versions/human/genome_container.py +++ b/sfaira/versions/genome_versions/human/genome_container.py @@ -5,6 +5,7 @@ class GenomeContainer: + available_genomes = ["Homo_sapiens_GRCh38_97"] def __init__(self): self.genomes = { @@ -15,4 +16,4 @@ def __init__(self): } def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) \ No newline at end of file + return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/mouse/genome_container.py b/sfaira/versions/genome_versions/mouse/genome_container.py index 7425c9421..ce9d047ab 100644 --- a/sfaira/versions/genome_versions/mouse/genome_container.py +++ b/sfaira/versions/genome_versions/mouse/genome_container.py @@ -5,6 +5,7 @@ class GenomeContainer: + available_genomes = ["Mus_musculus_GRCm38_97"] def __init__(self): self.genomes = { diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py b/sfaira/versions/topology_versions/human/celltype/celltypemarker.py index 83971dd50..a31807448 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py +++ b/sfaira/versions/topology_versions/human/celltype/celltypemarker.py @@ -15,10 +15,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.celltype.CELLTYPEMARKER_TOPOLOGIES - for k in CELLTYPEMARKER_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMARKER_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.celltype import CELLTYPEMARKER_TOPOLOGIES as CELLTYPEMARKER_TOPOLOGIES_EXTENSION + CELLTYPEMARKER_TOPOLOGIES = { + **CELLTYPEMARKER_TOPOLOGIES, + **CELLTYPEMARKER_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py b/sfaira/versions/topology_versions/human/celltype/celltypemlp.py index e184b2cfa..827ffba91 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py +++ b/sfaira/versions/topology_versions/human/celltype/celltypemlp.py @@ -67,10 +67,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.celltype.CELLTYPEMLP_TOPOLOGIES - for k in CELLTYPEMLP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMLP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.celltype import CELLTYPEMLP_TOPOLOGIES as CELLTYPEMLP_TOPOLOGIES_EXTENSION + CELLTYPEMLP_TOPOLOGIES = { + **CELLTYPEMLP_TOPOLOGIES, + **CELLTYPEMLP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/ae.py b/sfaira/versions/topology_versions/human/embedding/ae.py index 0a4956afb..225100769 100644 --- a/sfaira/versions/topology_versions/human/embedding/ae.py +++ b/sfaira/versions/topology_versions/human/embedding/ae.py @@ -62,10 +62,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.AE_TOPOLOGIES - for k in AE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - AE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import AE_TOPOLOGIES as AE_TOPOLOGIES_EXTENSION + AE_TOPOLOGIES = { + **AE_TOPOLOGIES, + **AE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/linear.py b/sfaira/versions/topology_versions/human/embedding/linear.py index 5b16800a7..80f9edeca 100644 --- a/sfaira/versions/topology_versions/human/embedding/linear.py +++ b/sfaira/versions/topology_versions/human/embedding/linear.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.LINEAR_TOPOLOGIES - for k in LINEAR_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - LINEAR_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import LINEAR_TOPOLOGIES as LINEAR_TOPOLOGIES_EXTENSION + LINEAR_TOPOLOGIES = { + **LINEAR_TOPOLOGIES, + **LINEAR_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/nmf.py b/sfaira/versions/topology_versions/human/embedding/nmf.py index 74975ddd0..d006be9cb 100644 --- a/sfaira/versions/topology_versions/human/embedding/nmf.py +++ b/sfaira/versions/topology_versions/human/embedding/nmf.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.NMF_TOPOLOGIES - for k in NMF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - NMF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import NMF_TOPOLOGIES as NMF_TOPOLOGIES_EXTENSION + NMF_TOPOLOGIES = { + **NMF_TOPOLOGIES, + **NMF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vae.py b/sfaira/versions/topology_versions/human/embedding/vae.py index 8906ace58..535a907c8 100644 --- a/sfaira/versions/topology_versions/human/embedding/vae.py +++ b/sfaira/versions/topology_versions/human/embedding/vae.py @@ -58,10 +58,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAE_TOPOLOGIES - for k in VAE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAE_TOPOLOGIES as VAE_TOPOLOGIES_EXTENSION + VAE_TOPOLOGIES = { + **VAE_TOPOLOGIES, + **VAE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py index db0f678cd..0602ac457 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py @@ -31,10 +31,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAEIAF_TOPOLOGIES - for k in VAEIAF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEIAF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAEIAF_TOPOLOGIES as VAEIAF_TOPOLOGIES_EXTENSION + VAEIAF_TOPOLOGIES = { + **VAEIAF_TOPOLOGIES, + **VAEIAF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/human/embedding/vaevamp.py b/sfaira/versions/topology_versions/human/embedding/vaevamp.py index 7b4d1585c..d4fff9f69 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaevamp.py +++ b/sfaira/versions/topology_versions/human/embedding/vaevamp.py @@ -7,8 +7,8 @@ "l2_coef": 0., "dropout_rate": 0., "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", + "activation": "selu", + "init": "lecun_normal", "output_layer": "nb_shared_disp" } }, @@ -20,8 +20,8 @@ "l2_coef": 0., "dropout_rate": 0., "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", + "activation": "selu", + "init": "lecun_normal", "output_layer": "nb_shared_disp" } } @@ -29,10 +29,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.human.embedding.VAEVAMP_TOPOLOGIES - for k in VAEVAMP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEVAMP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.human.embedding import VAEVAMP_TOPOLOGIES as VAEVAMP_TOPOLOGIES_EXTENSION + VAEVAMP_TOPOLOGIES = { + **VAEVAMP_TOPOLOGIES, + **VAEVAMP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py b/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py index 6548bd428..8043c48ad 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py +++ b/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py @@ -15,10 +15,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.celltype.CELLTYPEMARKER_TOPOLOGIES - for k in CELLTYPEMARKER_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMARKER_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.celltype import CELLTYPEMARKER_TOPOLOGIES as CELLTYPEMARKER_TOPOLOGIES_EXTENSION + CELLTYPEMARKER_TOPOLOGIES = { + **CELLTYPEMARKER_TOPOLOGIES, + **CELLTYPEMARKER_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py b/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py index 9117eb2cb..97d029fb9 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py +++ b/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py @@ -67,10 +67,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.celltype.CELLTYPEMLP_TOPOLOGIES - for k in CELLTYPEMLP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - CELLTYPEMLP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.celltype import CELLTYPEMLP_TOPOLOGIES as CELLTYPEMLP_TOPOLOGIES_EXTENSION + CELLTYPEMLP_TOPOLOGIES = { + **CELLTYPEMLP_TOPOLOGIES, + **CELLTYPEMLP_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/ae.py b/sfaira/versions/topology_versions/mouse/embedding/ae.py index f10ab4e4b..4c628642a 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/ae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/ae.py @@ -62,10 +62,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.AE_TOPOLOGIES - for k in AE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - AE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import AE_TOPOLOGIES as AE_TOPOLOGIES_EXTENSION + AE_TOPOLOGIES = { + **AE_TOPOLOGIES, + **AE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/linear.py b/sfaira/versions/topology_versions/mouse/embedding/linear.py index da9bff3af..cd07f0366 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/linear.py +++ b/sfaira/versions/topology_versions/mouse/embedding/linear.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.LINEAR_TOPOLOGIES - for k in LINEAR_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - LINEAR_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import LINEAR_TOPOLOGIES as LINEAR_TOPOLOGIES_EXTENSION + LINEAR_TOPOLOGIES = { + **LINEAR_TOPOLOGIES, + **LINEAR_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/nmf.py b/sfaira/versions/topology_versions/mouse/embedding/nmf.py index 913139b12..65b2b44a3 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/nmf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/nmf.py @@ -35,10 +35,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.NMF_TOPOLOGIES - for k in NMF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - NMF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import NMF_TOPOLOGIES as NMF_TOPOLOGIES_EXTENSION + NMF_TOPOLOGIES = { + **NMF_TOPOLOGIES, + **NMF_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/vae.py b/sfaira/versions/topology_versions/mouse/embedding/vae.py index c23307f56..49b45b01f 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vae.py @@ -58,10 +58,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAE_TOPOLOGIES - for k in VAE_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAE_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAE_TOPOLOGIES as VAE_TOPOLOGIES_EXTENSION + VAE_TOPOLOGIES = { + **VAE_TOPOLOGIES, + **VAE_TOPOLOGIES_EXTENSION + } except ImportError: pass diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py index 06dd826ce..d6dd458b2 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py @@ -31,10 +31,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAEIAF_TOPOLOGIES - for k in VAEIAF_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEIAF_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAEIAF_TOPOLOGIES as VAEIAF_TOPOLOGIES_EXTENSION + VAEIAF_TOPOLOGIES = { + **VAEIAF_TOPOLOGIES, + **VAEIAF_TOPOLOGIES_EXTENSION + } except ImportError: - pass + pass \ No newline at end of file diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py b/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py index f88b3488b..33e488224 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py @@ -29,10 +29,10 @@ # Load versions from extension if available: try: - import sfaira_extension.api as sfairae - ADD_TOPOLOGIES = sfairae.versions.topology_versions.mouse.embedding.VAEVAMP_TOPOLOGIES - for k in VAEVAMP_TOPOLOGIES.keys(): - if k in ADD_TOPOLOGIES.keys(): - VAEVAMP_TOPOLOGIES.update(ADD_TOPOLOGIES) + from sfaira_extension.versions.topology_versions.mouse.embedding import VAEVAMP_TOPOLOGIES as VAEVAMP_TOPOLOGIES_EXTENSION + VAEVAMP_TOPOLOGIES = { + **VAEVAMP_TOPOLOGIES, + **VAEVAMP_TOPOLOGIES_EXTENSION + } except ImportError: pass From ee92ef83c4247f1804b1468b59a7087b9cb2749d Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 12:05:19 +0100 Subject: [PATCH 016/161] Dev merge fix (#28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add plot_npc and plot_active_latent_units (#9) * add plot_npc and plot_active_latent_units * make sure handling of z and z_mean is consistent for VAE embeddings * clean up and documentation * formatting Co-authored-by: Martin König Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> * added data loader for interactive workflows with unprocessed data * made cell type loading optional in dataset .load() * enabled usage of type estimator on data without labels in prediction mode * recursively search custom model repo for weights files * sort model lookuptable alphabetically before writing it * make sure mode_path is set correctly in model_lookuptable when recursive weights loading is used * fix os.path.join usage in dataloaders * replace path handling through string concatenations with os.paths.join and f-strings * fix bug in lookup table writing * add mdoel file path to lookup table * reset index in model lookuptable before saving * add method to user interface for pushing local model weights to zenodo * fix bug in user interface * fix bux in summaries.py * use absolute model paths when model_lookuptable is used * fix bug in pretrained weights loading * fix bug in pretrained weights loading * automatically create an InteractiveDataset when loading data through the UI * fix bug inUI data loading * Explicitly cast indices and indptr of final backed file to int64. (#17) For the background on this: https://github.com/theislab/anndata/issues/453 * update human lung dataset doi * align mouse organ names with human organ names * fix typo in trachea organ naming in mouse * rename mouse ovary organ to femalegonad * rename mouse ovary organ to femalegonad * sort by model type in classwise f1 heatmap plot * another hacky solution to ensure a summary tab can be created when both vae and other models are loaded at once * allow custom metadata in zenodo submission * do not return doi but deposit url after depositing to zenodo sandbox as dois don't wrk on sandbox * updated model zoo description * recognise all .h5 and .data-0000... files as sfaira weights when constructing lookuptable * Update README.rst * Add selu activation and lecun_normal weight_init scheme for human VAEVAMP. (#19) * update sfaira erpo url and handle .h5 extension in model lookuptable id * add meta_data download information to all human dataloaders * updated docs * updated reference to README in docs * updated index * included reference to svensson et al data base in docs * fixed typo in docs * fixed typos in docs * restructured docs * fixed bug in reference roadmap in docs * updated data and model zoo description * added summary picture into index of docs * fixed typo in docs * updated summary panel * add badges to readme and docs index * updated summary panel (#20) * Doc updates (#21) * updated summary panel * fixed concept figure references * Doc updates (#22) * updated zoo panels * move from `import sfaira.api as sfaira` to `import sfaira` and from `import sfaira_extension.api as sfairae` to `import sfaira_extension` * add custom genomes to sfaira_extension * fix loading of custom topology versions from sfaira_extension * fix circular imports between sfaira_extension and sfaira * fix dataloader * fix celltype versioning through sfaira_extension * fix celltype versioning through sfaira_extension * formatting * Doc updates (#25) * added mention of download scripts into docs Co-authored-by: mk017 Co-authored-by: Martin König Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> Co-authored-by: Abdul Moeed From af3e575991f848e5d4d486714512c5799bfff5f2 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 15:48:37 +0100 Subject: [PATCH 017/161] fixed missing import in mouse trachea (#29) * fixed missing import in mouse trachea * fixed meta data accession and added automatic subsetting of datasetgroups during loading subsets to available data sets * depreceated api and added consts into __init__ api --- sfaira/__init__.py | 1 + sfaira/api/__init__.py | 0 sfaira/api/consts.py | 2 - sfaira/consts/adata_fields.py | 7 + sfaira/data/base.py | 238 +++++++++++++++----------- sfaira/data/mouse/trachea/external.py | 1 + 6 files changed, 145 insertions(+), 104 deletions(-) delete mode 100644 sfaira/api/__init__.py delete mode 100644 sfaira/api/consts.py diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 50910f0a2..319b4e98f 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -18,6 +18,7 @@ "david.fischer@helmholtz-muenchen.de" ]) +import sfaira.consts as consts import sfaira.data import sfaira.genomes import sfaira.models diff --git a/sfaira/api/__init__.py b/sfaira/api/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/api/consts.py b/sfaira/api/consts.py deleted file mode 100644 index 00cc5cb17..000000000 --- a/sfaira/api/consts.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 3c777daec..a9488c018 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -21,6 +21,7 @@ class ADATA_IDS_BASE: _gene_id_names: str _healthy: str _id: str + _ncells: str _normalization: str _organ: str _protocol: str @@ -80,6 +81,10 @@ def healthy(self): def id(self): return self._id + @property + def ncells(self): + return self._ncells + @property def normalization(self): return self._normalization @@ -154,6 +159,7 @@ def __init__(self): self._has_celltypes = "has_celltypes" self._healthy = "healthy" self._id = "id" + self._ncells = "ncells" self._normalization = "normalization" self._lab = "lab" self._organ = "organ" @@ -190,6 +196,7 @@ def __init__(self): self._has_celltypes = "" # TODO self._healthy = None # is inferred from _disease self._id = "" # TODO + self._ncells = "ncells" self._normalization = None # is always "counts" self._lab = "" # TODO self._organ = "" # TODO diff --git a/sfaira/data/base.py b/sfaira/data/base.py index c6b55ac21..d5b597650 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -24,17 +24,17 @@ class DatasetBase(abc.ABC): id: Union[None, str] genome: Union[None, str] - _annotated: str - _author: str - _doi: str - _download: str - _id: str - _ncells: str - _normalization: str - _organ: str - _protocol: str - _species: str - _year: str + _annotated: Union[None, bool] + _author: Union[None, str] + _doi: Union[None, str] + _download: Union[None, str] + _id: Union[None, str] + _ncells: Union[None, int] + _normalization: Union[None, str] + _organ: Union[None, str] + _protocol: Union[None, str] + _species: Union[None, str] + _year: Union[None, str] def __init__( self, @@ -50,6 +50,18 @@ def __init__( self.meta_path = meta_path self._load_raw = None + self._annotated = None + self._author = None + self._doi = None + self._download = None + self._id = None + self._ncells = None + self._normalization = None + self._organ = None + self._protocol = None + self._species = None + self._year = None + @abc.abstractmethod def _load(self, fn): pass @@ -72,7 +84,6 @@ def load( :param load_raw: Loads unprocessed version of data if available in data loader. :return: """ - self._load_raw = load_raw if match_to_reference and not remove_gene_version: @@ -479,148 +490,160 @@ def write_meta( if self.adata is None: self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ - "author": self.adata.uns[ADATA_IDS_SFAIRA.author], - "annotated": self.adata.uns[ADATA_IDS_SFAIRA.annotated], - "doi": self.adata.uns[ADATA_IDS_SFAIRA.doi], - "download": self.adata.uns[ADATA_IDS_SFAIRA.download], - "id": self.adata.uns[ADATA_IDS_SFAIRA.id], - "ncells": self.adata.n_obs, - "normalization": self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - "organ": self.adata.uns[ADATA_IDS_SFAIRA.organ], - "protocol": self.adata.uns[ADATA_IDS_SFAIRA.protocol], - "species": self.adata.uns[ADATA_IDS_SFAIRA.species], - "year": self.adata.uns[ADATA_IDS_SFAIRA.year], + ADATA_IDS_SFAIRA.annotated: self.adata.uns[ADATA_IDS_SFAIRA.annotated], + ADATA_IDS_SFAIRA.author: self.adata.uns[ADATA_IDS_SFAIRA.author], + ADATA_IDS_SFAIRA.doi: self.adata.uns[ADATA_IDS_SFAIRA.doi], + ADATA_IDS_SFAIRA.download: self.adata.uns[ADATA_IDS_SFAIRA.download], + ADATA_IDS_SFAIRA.id: self.adata.uns[ADATA_IDS_SFAIRA.id], + ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, + ADATA_IDS_SFAIRA.normalization: self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, + ADATA_IDS_SFAIRA.organ: self.adata.uns[ADATA_IDS_SFAIRA.organ], + ADATA_IDS_SFAIRA.protocol: self.adata.uns[ADATA_IDS_SFAIRA.protocol], + ADATA_IDS_SFAIRA.species: self.adata.uns[ADATA_IDS_SFAIRA.species], + ADATA_IDS_SFAIRA.year: self.adata.uns[ADATA_IDS_SFAIRA.year], }, index=range(1)) meta.to_csv(fn_meta) @property - def author(self): + def annotated(self) -> bool: + if self._annotated is not None: + return self._annotated + else: + if self.meta is None: + self.load_meta(fn=None) + return self.meta[ADATA_IDS_SFAIRA.annotated] + + @annotated.setter + def annotated(self, x: bool): + self._annotated = x + + @property + def author(self) -> str: if self._author is not None: return self._author else: if self.meta is None: self.load_meta(fn=None) - return self.meta["author"] + return self.meta[ADATA_IDS_SFAIRA.author] @author.setter - def author(self, x): + def author(self, x: str): self._author = x @property - def doi(self): + def doi(self) -> str: if self._doi is not None: return self._doi else: if self.meta is None: self.load_meta(fn=None) - return self.meta["doi"] + return self.meta[ADATA_IDS_SFAIRA.doi] @doi.setter - def doi(self, x): + def doi(self, x: str): self._doi = x @property - def download(self): + def download(self) -> str: if self._download is not None: return self._download else: if self.meta is None: self.load_meta(fn=None) - return self.meta["download"] + return self.meta[ADATA_IDS_SFAIRA.download] @download.setter - def download(self, x): + def download(self, x: str): self._download = x @property - def annotated(self): - if self._annotated is not None: - return self._annotated + def id(self) -> str: + if self._id is not None: + return self._id else: if self.meta is None: self.load_meta(fn=None) - return self.meta["annotated"] + return self.meta[ADATA_IDS_SFAIRA.id] - @annotated.setter - def annotated(self, x): - self._annotated = x + @id.setter + def id(self, x: str): + self._id = x @property - def id(self): - if self._id is not None: - return self._id + def ncells(self) -> int: + if self.adata is not None: + x = self.adata.n_obs + elif self._ncells is not None: + x = self._ncells else: if self.meta is None: self.load_meta(fn=None) - return self.meta["id"] - - @id.setter - def id(self, x): - self._id = x + x = self.meta[ADATA_IDS_SFAIRA.ncells] + return int(x) @property - def normalization(self): + def normalization(self) -> str: if self._normalization is not None: return self._normalization else: if self.meta is None: self.load_meta(fn=None) - return self.meta["normalization"] + return self.meta[ADATA_IDS_SFAIRA.normalization] @normalization.setter - def normalization(self, x): + def normalization(self, x: str): self._normalization = x @property - def organ(self): + def organ(self) -> str: if self._organ is not None: return self._organ else: if self.meta is None: self.load_meta(fn=None) - return self.meta["organ"] + return self.meta[ADATA_IDS_SFAIRA.organ] @organ.setter - def organ(self, x): + def organ(self, x: str): self._organ = x @property - def protocol(self): + def protocol(self) -> str: if self._protocol is not None: return self._protocol else: if self.meta is None: self.load_meta(fn=None) - return self.meta["protocol"] + return self.meta[ADATA_IDS_SFAIRA.protocol] @protocol.setter - def protocol(self, x): + def protocol(self, x: str): self._protocol = x @property - def species(self): + def species(self) -> str: if self._species is not None: return self._species else: if self.meta is None: self.load_meta(fn=None) - return self.meta["species"] + return self.meta[ADATA_IDS_SFAIRA.species] @species.setter - def species(self, x): + def species(self, x: str): self._species = x @property - def year(self): + def year(self) -> str: if self._year is not None: return self._year else: if self.meta is None: self.load_meta(fn=None) - return self.meta["year"] + return self.meta[ADATA_IDS_SFAIRA.year] @year.setter - def year(self, x): + def year(self, x: str): self._year = x @@ -638,7 +661,6 @@ class DatasetGroupBase(abc.ABC): """ datasets: Dict - def subset_organs(self, subset: Union[None, List]): for i in self.ids: if self.datasets[i].organ == "mixed": @@ -656,6 +678,8 @@ def load_all( ): """ + Subsets self.datasets to the data sets that were found. + :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param annotated_only: :param remove_gene_version: @@ -663,20 +687,25 @@ def load_all( :param load_raw: Loads unprocessed version of data if available in data loader. :return: """ - for i in self.ids: - if self.datasets[i].annotated or not annotated_only: - self.datasets[i].load( - celltype_version=self.format_type_version(celltype_version), - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw - ) + for x in self.ids: + try: + if self.datasets[x].annotated or not annotated_only: + self.datasets[x].load( + celltype_version=self.format_type_version(celltype_version), + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=load_raw + ) + except FileNotFoundError: + del self.datasets[x] def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: List[np.ndarray], annotated_only: bool = False, celltype_version: Union[str, None] = None): """ Loads data set group into slice of backed anndata object. + Subsets self.datasets to the data sets that were found. + :param adata_backed: :param genome: Genome container target genomes loaded. :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a @@ -687,13 +716,16 @@ def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: Lis :return: New row index for next element to be written into backed anndata. """ i = 0 - for ident in self.ids: + for x in self.ids: # if this is for celltype prediction, only load the data with have celltype annotation - if self.datasets[ident].annotated or not annotated_only: - self.datasets[ident].load_tobacked( - adata_backed=adata_backed, genome=genome, idx=idx[i], - celltype_version=self.format_type_version(celltype_version)) - i += 1 + try: + if self.datasets[x].annotated or not annotated_only: + self.datasets[x].load_tobacked( + adata_backed=adata_backed, genome=genome, idx=idx[i], + celltype_version=self.format_type_version(celltype_version)) + i += 1 + except FileNotFoundError: + del self.datasets[x] @property def ids(self): @@ -804,22 +836,21 @@ def obs_concat(self, keys: Union[list, None] = None): )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat - def ncells(self, annotated_only: bool = False): - cells = [] - for ident in self.ids: - # if this is for celltype prediction, only load the data with have celltype annotation - if self.datasets[ident].has_celltypes or not annotated_only: - cells.append(self.datasets[ident].ncells) - return sum(cells) - - def ncells_bydataset(self, annotated_only: bool = False): + def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: cells = [] - for ident in self.ids: + for x in self.ids: # if this is for celltype prediction, only load the data with have celltype annotation - if self.datasets[ident].has_celltypes or not annotated_only: - cells.append(self.datasets[ident].ncells) + try: + if self.datasets[x].has_celltypes or not annotated_only: + cells.append(self.datasets[x].ncells) + except FileNotFoundError: + del self.datasets[x] return cells + def ncells(self, annotated_only: bool = False): + cells = self.ncells_bydataset(annotated_only=annotated_only) + return np.sum(cells) + def assert_celltype_version_key( self, celltype_version @@ -912,9 +943,6 @@ def get_gc( raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") return g - def ncells(self, annotated_only: bool = False): - return sum([x.ncells(annotated_only=annotated_only) for x in self.dataset_groups]) - def ncells_bydataset(self, annotated_only: bool = False): """ List of list of length of all data sets by data set group. @@ -927,7 +955,10 @@ def ncells_bydataset_flat(self, annotated_only: bool = False): Flattened list of length of all data sets. :return: """ - return [xx for x in self.dataset_groups for xx in x.ncells_bydataset(annotated_only=annotated_only)] + return [xx for x in self.ncells_bydataset(annotated_only=annotated_only) for xx in x] + + def ncells(self, annotated_only: bool = False): + return np.sum(self.ncells_bydataset(annotated_only=annotated_only)) def set_dataset_groups(self, dataset_groups: List[DatasetGroupBase]): self.dataset_groups = dataset_groups @@ -1022,17 +1053,17 @@ def load_all_tobacked( X.indptr = X.indptr.astype(np.int64) self.adata.X = X keys = [ + ADATA_IDS_SFAIRA.annotated, ADATA_IDS_SFAIRA.author, - ADATA_IDS_SFAIRA.year, - ADATA_IDS_SFAIRA.protocol, - ADATA_IDS_SFAIRA.organ, - ADATA_IDS_SFAIRA.subtissue, + ADATA_IDS_SFAIRA.dataset, ADATA_IDS_SFAIRA.cell_ontology_class, - ADATA_IDS_SFAIRA.state_exact, - ADATA_IDS_SFAIRA.normalization, ADATA_IDS_SFAIRA.dev_stage, - ADATA_IDS_SFAIRA.annotated, - ADATA_IDS_SFAIRA.dataset + ADATA_IDS_SFAIRA.normalization, + ADATA_IDS_SFAIRA.organ, + ADATA_IDS_SFAIRA.protocol, + ADATA_IDS_SFAIRA.state_exact, + ADATA_IDS_SFAIRA.subtissue, + ADATA_IDS_SFAIRA.year, ] if scatter_update: self.adata.obs = pandas.DataFrame({ @@ -1047,7 +1078,10 @@ def load_all_tobacked( np.random.shuffle(idx_vector) idx_ls = [] row = 0 - for x in self.ncells_bydataset(annotated_only=annotated_only): + ncells = self.ncells_bydataset(annotated_only=annotated_only) + if np.all([len(x) == 0 for x in ncells]): + raise ValueError("no datasets found") + for x in ncells: temp_ls = [] for y in x: temp_ls.append(idx_vector[row:(row+y)]) diff --git a/sfaira/data/mouse/trachea/external.py b/sfaira/data/mouse/trachea/external.py index 9f4e3db68..cc51e6fda 100644 --- a/sfaira/data/mouse/trachea/external.py +++ b/sfaira/data/mouse/trachea/external.py @@ -1 +1,2 @@ from sfaira.data import DatasetBase, DatasetGroupBase +from sfaira.consts import ADATA_IDS_SFAIRA From 273d7b6688f34fc72db1650ef29c61982ccbfa34 Mon Sep 17 00:00:00 2001 From: le-ander <20015434+le-ander@users.noreply.github.com> Date: Thu, 10 Dec 2020 16:29:19 +0100 Subject: [PATCH 018/161] pass paths correctly to extension datasets --- sfaira/data/human/adipose/human_adipose.py | 2 +- sfaira/data/human/adrenalgland/human_adrenalgland.py | 2 +- sfaira/data/human/artery/human_artery.py | 2 +- sfaira/data/human/bladder/human_bladder.py | 2 +- sfaira/data/human/blood/human_blood.py | 2 +- sfaira/data/human/bone/human_bone.py | 2 +- sfaira/data/human/brain/human_brain.py | 2 +- sfaira/data/human/calvaria/human_calvaria.py | 2 +- sfaira/data/human/cervix/human_cervix.py | 2 +- sfaira/data/human/chorionicvillus/human_chorionicvillus.py | 2 +- sfaira/data/human/colon/human_colon.py | 2 +- sfaira/data/human/duodenum/human_duodenum.py | 2 +- sfaira/data/human/epityphlon/human_epityphlon.py | 2 +- sfaira/data/human/esophagus/human_esophagus.py | 2 +- sfaira/data/human/eye/human_eye.py | 2 +- sfaira/data/human/fallopiantube/human_fallopiantube.py | 2 +- sfaira/data/human/femalegonad/human_femalegonad.py | 2 +- sfaira/data/human/gallbladder/human_gallbladder.py | 2 +- sfaira/data/human/heart/human_heart.py | 2 +- sfaira/data/human/hesc/human_hesc.py | 2 +- sfaira/data/human/ileum/human_ileum.py | 2 +- sfaira/data/human/jejunum/human_jejunum.py | 2 +- sfaira/data/human/kidney/human_kidney.py | 2 +- sfaira/data/human/liver/human_liver.py | 2 +- sfaira/data/human/lung/human_lung.py | 2 +- sfaira/data/human/malegonad/human_malegonad.py | 2 +- sfaira/data/human/mixed/human_mixed.py | 2 +- sfaira/data/human/muscle/human_muscle.py | 2 +- sfaira/data/human/omentum/human_omentum.py | 2 +- sfaira/data/human/pancreas/human_pancreas.py | 2 +- sfaira/data/human/placenta/human_placenta.py | 2 +- sfaira/data/human/pleura/human_pleura.py | 2 +- sfaira/data/human/prostate/human_prostate.py | 2 +- sfaira/data/human/rectum/human_rectum.py | 2 +- sfaira/data/human/rib/human_rib.py | 2 +- sfaira/data/human/skin/human_skin.py | 2 +- sfaira/data/human/spinalcord/human_spinalcord.py | 2 +- sfaira/data/human/spleen/human_spleen.py | 2 +- sfaira/data/human/stomach/human_stomach.py | 2 +- sfaira/data/human/thymus/human_thymus.py | 2 +- sfaira/data/human/thyroid/human_thyroid.py | 2 +- sfaira/data/human/trachea/human_trachea.py | 2 +- sfaira/data/human/ureter/human_ureter.py | 2 +- sfaira/data/human/uterus/human_uterus.py | 2 +- sfaira/data/mouse/adipose/mouse_adipose.py | 2 +- sfaira/data/mouse/bladder/mouse_bladder.py | 2 +- sfaira/data/mouse/blood/mouse_blood.py | 2 +- sfaira/data/mouse/bone/mouse_bone.py | 2 +- sfaira/data/mouse/brain/mouse_brain.py | 2 +- sfaira/data/mouse/colon/mouse_colon.py | 2 +- sfaira/data/mouse/diaphragm/mouse_diaphragm.py | 2 +- sfaira/data/mouse/femalegonad/mouse_femalegonad.py | 2 +- sfaira/data/mouse/heart/mouse_heart.py | 2 +- sfaira/data/mouse/ileum/mouse_ileum.py | 2 +- sfaira/data/mouse/kidney/mouse_kidney.py | 2 +- sfaira/data/mouse/liver/mouse_liver.py | 2 +- sfaira/data/mouse/lung/mouse_lung.py | 2 +- sfaira/data/mouse/malegonad/mouse_malegonad.py | 2 +- sfaira/data/mouse/mammarygland/mouse_mammarygland.py | 2 +- sfaira/data/mouse/muscle/mouse_muscle.py | 2 +- sfaira/data/mouse/pancreas/mouse_pancreas.py | 2 +- sfaira/data/mouse/placenta/mouse_placenta.py | 2 +- sfaira/data/mouse/prostate/mouse_prostate.py | 2 +- sfaira/data/mouse/rib/mouse_rib.py | 2 +- sfaira/data/mouse/skin/mouse_skin.py | 2 +- sfaira/data/mouse/spleen/mouse_spleen.py | 2 +- sfaira/data/mouse/stomach/mouse_stomach.py | 2 +- sfaira/data/mouse/thymus/mouse_thymus.py | 2 +- sfaira/data/mouse/tongue/mouse_tongue.py | 2 +- sfaira/data/mouse/trachea/mouse_trachea.py | 2 +- sfaira/data/mouse/uterus/mouse_uterus.py | 2 +- 71 files changed, 71 insertions(+), 71 deletions(-) diff --git a/sfaira/data/human/adipose/human_adipose.py b/sfaira/data/human/adipose/human_adipose.py index 9994507ab..ca12c0d40 100644 --- a/sfaira/data/human/adipose/human_adipose.py +++ b/sfaira/data/human/adipose/human_adipose.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose().datasets) + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland.py b/sfaira/data/human/adrenalgland/human_adrenalgland.py index 234a563a9..af942823d 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland.py @@ -31,6 +31,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupAdrenalgland - self.datasets.update(DatasetGroupAdrenalgland().datasets) + self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/artery/human_artery.py b/sfaira/data/human/artery/human_artery.py index 4575d1b6a..7e51a6d0c 100644 --- a/sfaira/data/human/artery/human_artery.py +++ b/sfaira/data/human/artery/human_artery.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupArtery - self.datasets.update(DatasetGroupArtery().datasets) + self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/bladder/human_bladder.py b/sfaira/data/human/bladder/human_bladder.py index d31620a7f..6efc756b8 100644 --- a/sfaira/data/human/bladder/human_bladder.py +++ b/sfaira/data/human/bladder/human_bladder.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder().datasets) + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/blood/human_blood.py b/sfaira/data/human/blood/human_blood.py index e51216bce..1e439ff31 100644 --- a/sfaira/data/human/blood/human_blood.py +++ b/sfaira/data/human/blood/human_blood.py @@ -37,6 +37,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood().datasets) + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/bone/human_bone.py b/sfaira/data/human/bone/human_bone.py index 4cc46ad77..3a8842e1f 100644 --- a/sfaira/data/human/bone/human_bone.py +++ b/sfaira/data/human/bone/human_bone.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupBone - self.datasets.update(DatasetGroupBone().datasets) + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/brain/human_brain.py b/sfaira/data/human/brain/human_brain.py index 3f2eedda9..016c8fce5 100644 --- a/sfaira/data/human/brain/human_brain.py +++ b/sfaira/data/human/brain/human_brain.py @@ -33,6 +33,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain().datasets) + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/calvaria/human_calvaria.py b/sfaira/data/human/calvaria/human_calvaria.py index 3a101f22e..bd37cd357 100644 --- a/sfaira/data/human/calvaria/human_calvaria.py +++ b/sfaira/data/human/calvaria/human_calvaria.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupCalvaria - self.datasets.update(DatasetGroupCalvaria().datasets) + self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/cervix/human_cervix.py b/sfaira/data/human/cervix/human_cervix.py index eb65f1a1f..9468a333b 100644 --- a/sfaira/data/human/cervix/human_cervix.py +++ b/sfaira/data/human/cervix/human_cervix.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupCervix - self.datasets.update(DatasetGroupCervix().datasets) + self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py index 5fcebd370..030412ab7 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupChorionicvillus - self.datasets.update(DatasetGroupChorionicvillus().datasets) + self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/colon/human_colon.py b/sfaira/data/human/colon/human_colon.py index d86d094f4..fa2642d40 100644 --- a/sfaira/data/human/colon/human_colon.py +++ b/sfaira/data/human/colon/human_colon.py @@ -35,6 +35,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupColon - self.datasets.update(DatasetGroupColon().datasets) + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/duodenum/human_duodenum.py b/sfaira/data/human/duodenum/human_duodenum.py index 367138896..c4d3b8bba 100644 --- a/sfaira/data/human/duodenum/human_duodenum.py +++ b/sfaira/data/human/duodenum/human_duodenum.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupDuodenum - self.datasets.update(DatasetGroupDuodenum().datasets) + self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/epityphlon/human_epityphlon.py b/sfaira/data/human/epityphlon/human_epityphlon.py index bbf2297e9..3330e03bf 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon.py +++ b/sfaira/data/human/epityphlon/human_epityphlon.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupEpityphlon - self.datasets.update(DatasetGroupEpityphlon().datasets) + self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/esophagus/human_esophagus.py b/sfaira/data/human/esophagus/human_esophagus.py index b1df21e7a..2e3df391e 100644 --- a/sfaira/data/human/esophagus/human_esophagus.py +++ b/sfaira/data/human/esophagus/human_esophagus.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupEsophagus - self.datasets.update(DatasetGroupEsophagus().datasets) + self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/eye/human_eye.py b/sfaira/data/human/eye/human_eye.py index 227bda330..5a3c43f93 100644 --- a/sfaira/data/human/eye/human_eye.py +++ b/sfaira/data/human/eye/human_eye.py @@ -27,6 +27,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupEye - self.datasets.update(DatasetGroupEye().datasets) + self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube.py b/sfaira/data/human/fallopiantube/human_fallopiantube.py index 739e221a0..cff1f8131 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupFallopiantube - self.datasets.update(DatasetGroupFallopiantube().datasets) + self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/femalegonad/human_femalegonad.py b/sfaira/data/human/femalegonad/human_femalegonad.py index a4b4745fb..790322e71 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad.py +++ b/sfaira/data/human/femalegonad/human_femalegonad.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad().datasets) + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/gallbladder/human_gallbladder.py b/sfaira/data/human/gallbladder/human_gallbladder.py index 8d5e660f5..aa015fe75 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder.py +++ b/sfaira/data/human/gallbladder/human_gallbladder.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupGallbladder - self.datasets.update(DatasetGroupGallbladder().datasets) + self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/heart/human_heart.py b/sfaira/data/human/heart/human_heart.py index 5aff6fdce..a1c2195a7 100644 --- a/sfaira/data/human/heart/human_heart.py +++ b/sfaira/data/human/heart/human_heart.py @@ -27,6 +27,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart().datasets) + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/hesc/human_hesc.py b/sfaira/data/human/hesc/human_hesc.py index 3292ff6f3..ffc258d70 100644 --- a/sfaira/data/human/hesc/human_hesc.py +++ b/sfaira/data/human/hesc/human_hesc.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupHesc - self.datasets.update(DatasetGroupHesc().datasets) + self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/ileum/human_ileum.py b/sfaira/data/human/ileum/human_ileum.py index 636d508f2..53c884c69 100644 --- a/sfaira/data/human/ileum/human_ileum.py +++ b/sfaira/data/human/ileum/human_ileum.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum().datasets) + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/jejunum/human_jejunum.py b/sfaira/data/human/jejunum/human_jejunum.py index 3839f321b..0d5dba57e 100644 --- a/sfaira/data/human/jejunum/human_jejunum.py +++ b/sfaira/data/human/jejunum/human_jejunum.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupJejunum - self.datasets.update(DatasetGroupJejunum().datasets) + self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/kidney/human_kidney.py b/sfaira/data/human/kidney/human_kidney.py index 6cc159f97..9d4b2e4d5 100644 --- a/sfaira/data/human/kidney/human_kidney.py +++ b/sfaira/data/human/kidney/human_kidney.py @@ -39,6 +39,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney().datasets) + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/liver/human_liver.py b/sfaira/data/human/liver/human_liver.py index bfb0e8dbb..278a6a4dc 100644 --- a/sfaira/data/human/liver/human_liver.py +++ b/sfaira/data/human/liver/human_liver.py @@ -37,6 +37,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver().datasets) + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/lung/human_lung.py b/sfaira/data/human/lung/human_lung.py index 9297fd7e9..fc29e35a3 100644 --- a/sfaira/data/human/lung/human_lung.py +++ b/sfaira/data/human/lung/human_lung.py @@ -48,6 +48,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupLung - self.datasets.update(DatasetGroupLung().datasets) + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/malegonad/human_malegonad.py b/sfaira/data/human/malegonad/human_malegonad.py index ea7e995df..681f1f334 100644 --- a/sfaira/data/human/malegonad/human_malegonad.py +++ b/sfaira/data/human/malegonad/human_malegonad.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad().datasets) + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/mixed/human_mixed.py b/sfaira/data/human/mixed/human_mixed.py index cbce1da35..9041ae787 100644 --- a/sfaira/data/human/mixed/human_mixed.py +++ b/sfaira/data/human/mixed/human_mixed.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupMixed - self.datasets.update(DatasetGroupMixed().datasets) + self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/muscle/human_muscle.py b/sfaira/data/human/muscle/human_muscle.py index 3ccea8560..30cdb789b 100644 --- a/sfaira/data/human/muscle/human_muscle.py +++ b/sfaira/data/human/muscle/human_muscle.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle().datasets) + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/omentum/human_omentum.py b/sfaira/data/human/omentum/human_omentum.py index 56f49739e..f16e94458 100644 --- a/sfaira/data/human/omentum/human_omentum.py +++ b/sfaira/data/human/omentum/human_omentum.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupOmentum - self.datasets.update(DatasetGroupOmentum().datasets) + self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/pancreas/human_pancreas.py b/sfaira/data/human/pancreas/human_pancreas.py index 84bc010fd..e609c44fe 100644 --- a/sfaira/data/human/pancreas/human_pancreas.py +++ b/sfaira/data/human/pancreas/human_pancreas.py @@ -33,6 +33,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas().datasets) + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/placenta/human_placenta.py b/sfaira/data/human/placenta/human_placenta.py index 03a23584f..4700e8083 100644 --- a/sfaira/data/human/placenta/human_placenta.py +++ b/sfaira/data/human/placenta/human_placenta.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta().datasets) + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/pleura/human_pleura.py b/sfaira/data/human/pleura/human_pleura.py index db3707fda..da4bba12c 100644 --- a/sfaira/data/human/pleura/human_pleura.py +++ b/sfaira/data/human/pleura/human_pleura.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupPleura - self.datasets.update(DatasetGroupPleura().datasets) + self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/prostate/human_prostate.py b/sfaira/data/human/prostate/human_prostate.py index cf3a5485b..71f2a1991 100644 --- a/sfaira/data/human/prostate/human_prostate.py +++ b/sfaira/data/human/prostate/human_prostate.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate().datasets) + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/rectum/human_rectum.py b/sfaira/data/human/rectum/human_rectum.py index be4385732..bc246d917 100644 --- a/sfaira/data/human/rectum/human_rectum.py +++ b/sfaira/data/human/rectum/human_rectum.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupRectum - self.datasets.update(DatasetGroupRectum().datasets) + self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/rib/human_rib.py b/sfaira/data/human/rib/human_rib.py index 60f2c6df8..1a5481a0b 100644 --- a/sfaira/data/human/rib/human_rib.py +++ b/sfaira/data/human/rib/human_rib.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupRib - self.datasets.update(DatasetGroupRib().datasets) + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/skin/human_skin.py b/sfaira/data/human/skin/human_skin.py index 16887b9dd..db470536f 100644 --- a/sfaira/data/human/skin/human_skin.py +++ b/sfaira/data/human/skin/human_skin.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin().datasets) + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/spinalcord/human_spinalcord.py b/sfaira/data/human/spinalcord/human_spinalcord.py index b56b23b34..386ee4d15 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord.py +++ b/sfaira/data/human/spinalcord/human_spinalcord.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupSpinalcord - self.datasets.update(DatasetGroupSpinalcord().datasets) + self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/spleen/human_spleen.py b/sfaira/data/human/spleen/human_spleen.py index 0ca8ab386..5a6f95804 100644 --- a/sfaira/data/human/spleen/human_spleen.py +++ b/sfaira/data/human/spleen/human_spleen.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen().datasets) + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/stomach/human_stomach.py b/sfaira/data/human/stomach/human_stomach.py index 0121077f9..537ea659a 100644 --- a/sfaira/data/human/stomach/human_stomach.py +++ b/sfaira/data/human/stomach/human_stomach.py @@ -39,6 +39,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach().datasets) + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/thymus/human_thymus.py b/sfaira/data/human/thymus/human_thymus.py index 1e592a837..c8d5da0ad 100644 --- a/sfaira/data/human/thymus/human_thymus.py +++ b/sfaira/data/human/thymus/human_thymus.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus().datasets) + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/thyroid/human_thyroid.py b/sfaira/data/human/thyroid/human_thyroid.py index 0a8cc4e0d..a30ad5c92 100644 --- a/sfaira/data/human/thyroid/human_thyroid.py +++ b/sfaira/data/human/thyroid/human_thyroid.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupThyroid - self.datasets.update(DatasetGroupThyroid().datasets) + self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/trachea/human_trachea.py b/sfaira/data/human/trachea/human_trachea.py index 6ba918535..f6b9578c2 100644 --- a/sfaira/data/human/trachea/human_trachea.py +++ b/sfaira/data/human/trachea/human_trachea.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea().datasets) + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/ureter/human_ureter.py b/sfaira/data/human/ureter/human_ureter.py index 452153557..7d3615eff 100644 --- a/sfaira/data/human/ureter/human_ureter.py +++ b/sfaira/data/human/ureter/human_ureter.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupUreter - self.datasets.update(DatasetGroupUreter().datasets) + self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/human/uterus/human_uterus.py b/sfaira/data/human/uterus/human_uterus.py index 4d8789bff..9feee712d 100644 --- a/sfaira/data/human/uterus/human_uterus.py +++ b/sfaira/data/human/uterus/human_uterus.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.human import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus().datasets) + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/adipose/mouse_adipose.py b/sfaira/data/mouse/adipose/mouse_adipose.py index 67b4ba1c2..3493d3d21 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose.py +++ b/sfaira/data/mouse/adipose/mouse_adipose.py @@ -29,6 +29,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose().datasets) + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/bladder/mouse_bladder.py b/sfaira/data/mouse/bladder/mouse_bladder.py index 2e2e655a6..93d08570a 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder.py +++ b/sfaira/data/mouse/bladder/mouse_bladder.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder().datasets) + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/blood/mouse_blood.py b/sfaira/data/mouse/blood/mouse_blood.py index 6098a0870..e1dfaf511 100644 --- a/sfaira/data/mouse/blood/mouse_blood.py +++ b/sfaira/data/mouse/blood/mouse_blood.py @@ -29,6 +29,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood().datasets) + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/bone/mouse_bone.py b/sfaira/data/mouse/bone/mouse_bone.py index 02fe69705..ff7f50125 100644 --- a/sfaira/data/mouse/bone/mouse_bone.py +++ b/sfaira/data/mouse/bone/mouse_bone.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupBone - self.datasets.update(DatasetGroupBone().datasets) + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/brain/mouse_brain.py b/sfaira/data/mouse/brain/mouse_brain.py index 4e09daddc..e62393153 100644 --- a/sfaira/data/mouse/brain/mouse_brain.py +++ b/sfaira/data/mouse/brain/mouse_brain.py @@ -27,6 +27,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain().datasets) + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/colon/mouse_colon.py b/sfaira/data/mouse/colon/mouse_colon.py index 3666f396a..3a64a819a 100644 --- a/sfaira/data/mouse/colon/mouse_colon.py +++ b/sfaira/data/mouse/colon/mouse_colon.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupColon - self.datasets.update(DatasetGroupColon().datasets) + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py index f8fc4d5f6..0a78a5bfc 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupDiaphragm - self.datasets.update(DatasetGroupDiaphragm().datasets) + self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py index fc35c3ef3..e8b6fecf6 100644 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad().datasets) + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/heart/mouse_heart.py b/sfaira/data/mouse/heart/mouse_heart.py index ca7e6af3d..11b15636e 100644 --- a/sfaira/data/mouse/heart/mouse_heart.py +++ b/sfaira/data/mouse/heart/mouse_heart.py @@ -26,6 +26,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart().datasets) + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/ileum/mouse_ileum.py b/sfaira/data/mouse/ileum/mouse_ileum.py index f56d2c46e..0d01e26df 100644 --- a/sfaira/data/mouse/ileum/mouse_ileum.py +++ b/sfaira/data/mouse/ileum/mouse_ileum.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum().datasets) + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/kidney/mouse_kidney.py b/sfaira/data/mouse/kidney/mouse_kidney.py index ac9d34cfc..9f20d3dbb 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney.py +++ b/sfaira/data/mouse/kidney/mouse_kidney.py @@ -27,6 +27,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney().datasets) + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/liver/mouse_liver.py b/sfaira/data/mouse/liver/mouse_liver.py index a78f4af58..0d166852e 100644 --- a/sfaira/data/mouse/liver/mouse_liver.py +++ b/sfaira/data/mouse/liver/mouse_liver.py @@ -27,6 +27,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver().datasets) + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/lung/mouse_lung.py b/sfaira/data/mouse/lung/mouse_lung.py index aa5a6f0ec..9aea13353 100644 --- a/sfaira/data/mouse/lung/mouse_lung.py +++ b/sfaira/data/mouse/lung/mouse_lung.py @@ -29,6 +29,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupLung - self.datasets.update(DatasetGroupLung().datasets) + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad.py b/sfaira/data/mouse/malegonad/mouse_malegonad.py index da9610f39..e1818bc07 100644 --- a/sfaira/data/mouse/malegonad/mouse_malegonad.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad().datasets) + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py index d7cdbd797..71fca2ff3 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py @@ -31,6 +31,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupMammaryGland - self.datasets.update(DatasetGroupMammaryGland().datasets) + self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path).datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/mouse_muscle.py b/sfaira/data/mouse/muscle/mouse_muscle.py index d3eb6f583..1bf0eddcc 100644 --- a/sfaira/data/mouse/muscle/mouse_muscle.py +++ b/sfaira/data/mouse/muscle/mouse_muscle.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle().datasets) + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas.py b/sfaira/data/mouse/pancreas/mouse_pancreas.py index b036de4b3..ea87d9d50 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas.py @@ -41,6 +41,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas().datasets) + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/placenta/mouse_placenta.py b/sfaira/data/mouse/placenta/mouse_placenta.py index 412dda22b..3c885a94f 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta.py +++ b/sfaira/data/mouse/placenta/mouse_placenta.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta().datasets) + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/prostate/mouse_prostate.py b/sfaira/data/mouse/prostate/mouse_prostate.py index 68354a363..bd16b0e46 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate.py +++ b/sfaira/data/mouse/prostate/mouse_prostate.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate().datasets) + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/rib/mouse_rib.py b/sfaira/data/mouse/rib/mouse_rib.py index c2a80b1a0..1320a5e8c 100644 --- a/sfaira/data/mouse/rib/mouse_rib.py +++ b/sfaira/data/mouse/rib/mouse_rib.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupRib - self.datasets.update(DatasetGroupRib().datasets) + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/skin/mouse_skin.py b/sfaira/data/mouse/skin/mouse_skin.py index b8b33a0e2..dba84de3a 100644 --- a/sfaira/data/mouse/skin/mouse_skin.py +++ b/sfaira/data/mouse/skin/mouse_skin.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin().datasets) + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) except ImportError: pass \ No newline at end of file diff --git a/sfaira/data/mouse/spleen/mouse_spleen.py b/sfaira/data/mouse/spleen/mouse_spleen.py index 27ab1559e..05ff9439c 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen.py +++ b/sfaira/data/mouse/spleen/mouse_spleen.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen().datasets) + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/stomach/mouse_stomach.py b/sfaira/data/mouse/stomach/mouse_stomach.py index cabeaf994..3fc8ba1ba 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach.py +++ b/sfaira/data/mouse/stomach/mouse_stomach.py @@ -21,6 +21,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach().datasets) + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/thymus/mouse_thymus.py b/sfaira/data/mouse/thymus/mouse_thymus.py index fe707f8fd..b74e325c1 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus.py +++ b/sfaira/data/mouse/thymus/mouse_thymus.py @@ -25,6 +25,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus().datasets) + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/tongue/mouse_tongue.py b/sfaira/data/mouse/tongue/mouse_tongue.py index 382e06f5e..a88388371 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue.py +++ b/sfaira/data/mouse/tongue/mouse_tongue.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupTongue - self.datasets.update(DatasetGroupTongue().datasets) + self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/trachea/mouse_trachea.py b/sfaira/data/mouse/trachea/mouse_trachea.py index bb578a632..b83539438 100644 --- a/sfaira/data/mouse/trachea/mouse_trachea.py +++ b/sfaira/data/mouse/trachea/mouse_trachea.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea().datasets) + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) except ImportError: pass diff --git a/sfaira/data/mouse/uterus/mouse_uterus.py b/sfaira/data/mouse/uterus/mouse_uterus.py index b7a7ea6fc..aca5ad69d 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus.py +++ b/sfaira/data/mouse/uterus/mouse_uterus.py @@ -23,6 +23,6 @@ def __init__( # Load versions from extension if available: try: from sfaira_extension.data.mouse import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus().datasets) + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) except ImportError: pass From 6a37f0be1e32c52409e6d452df02893fe88c0ae5 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 18:12:01 +0100 Subject: [PATCH 019/161] Dataloading fix (#33) fixed unit tests and fixed adata field related bugs * updated order of data sets into meta loading dict to be alphabetic * fixed bug in setting pandas dataframe index * depreceated kipoi test * fixed vae unit test * many: field related bugs --- .gitignore | 1 + sfaira/consts/adata_fields.py | 78 ++++---- sfaira/data/base.py | 187 ++++++++++-------- .../human_adipose_2020_microwell_han_001.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_001.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_002.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_003.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_004.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_005.py | 33 ++-- ...man_adrenalgland_2020_microwell_han_006.py | 33 ++-- .../human_artery_2020_microwell_han_001.py | 33 ++-- .../human_bladder_2020_microwell_han_001.py | 33 ++-- .../human_bladder_2020_microwell_han_002.py | 33 ++-- .../human_bladder_2020_microwell_han_003.py | 33 ++-- .../blood/human_blood_2018_10x_ica_001.py | 35 ++-- .../human_blood_2019_10x_10xGenomics_001.py | 35 ++-- .../human_blood_2020_microwell_han_001.py | 33 ++-- .../human_blood_2020_microwell_han_002.py | 33 ++-- .../human_blood_2020_microwell_han_003.py | 33 ++-- .../human_blood_2020_microwell_han_004.py | 33 ++-- .../human_blood_2020_microwell_han_005.py | 33 ++-- .../human_blood_2020_microwell_han_006.py | 33 ++-- .../human_blood_2020_microwell_han_007.py | 33 ++-- .../human/bone/human_bone_2018_10x_ica_001.py | 35 ++-- .../bone/human_bone_2020_microwell_han_001.py | 33 ++-- .../bone/human_bone_2020_microwell_han_002.py | 33 ++-- .../human_brain_2017_DroNcSeq_habib_001.py | 35 ++-- .../human_brain_2020_microwell_han_001.py | 31 ++- .../human_brain_2020_microwell_han_002.py | 31 ++- .../human_brain_2020_microwell_han_003.py | 31 ++- .../human_brain_2020_microwell_han_004.py | 31 ++- .../human_brain_2020_microwell_han_005.py | 31 ++- .../human_brain_2020_microwell_han_006.py | 31 ++- .../human_calvaria_2020_microwell_han_001.py | 33 ++-- .../human_cervix_2020_microwell_han_001.py | 33 ++-- ..._chorionicvillus_2020_microwell_han_001.py | 33 ++-- .../colon/human_colon_2019_10x_kinchen_001.py | 39 ++-- .../colon/human_colon_2019_10x_smilie_001.py | 35 ++-- .../colon/human_colon_2019_10x_wang_001.py | 35 ++-- .../colon/human_colon_2020_10x_james_001.py | 35 ++-- .../human_colon_2020_microwell_han_001.py | 31 ++- .../human_colon_2020_microwell_han_002.py | 31 ++- .../human_colon_2020_microwell_han_003.py | 31 ++- .../human_colon_2020_microwell_han_004.py | 31 ++- .../human_duodenum_2020_microwell_han_001.py | 33 ++-- ...human_epityphlon_2020_microwell_han_001.py | 33 ++-- .../human_esophagus_2019_10x_madissoon_001.py | 33 ++-- .../human_esophagus_2020_microwell_han_001.py | 31 ++- .../human_esophagus_2020_microwell_han_002.py | 31 ++- .../eye/human_eye_2019_10x_lukowski_001.py | 35 ++-- .../human/eye/human_eye_2019_10x_menon_001.py | 35 ++-- .../human/eye/human_eye_2019_10x_voigt_001.py | 35 ++-- .../eye/human_eye_2020_microwell_han_001.py | 31 ++- ...an_fallopiantube_2020_microwell_han_001.py | 33 ++-- ...uman_femalegonad_2020_microwell_han_001.py | 31 ++- ...uman_femalegonad_2020_microwell_han_002.py | 31 ++- ...uman_gallbladder_2020_microwell_han_001.py | 31 ++- .../human_heart_2020_microwell_han_001.py | 31 ++- .../human_heart_2020_microwell_han_002.py | 31 ++- .../human_heart_2020_microwell_han_003.py | 31 ++- .../human_heart_2020_microwell_han_004.py | 31 ++- .../hesc/human_hesc_2020_microwell_han_001.py | 31 ++- .../ileum/human_ileum_2019_10x_martin_001.py | 35 ++-- .../ileum/human_ileum_2019_10x_wang_001.py | 35 ++-- .../human_ileum_2020_microwell_han_001.py | 31 ++- .../human_jejunum_2020_microwell_han_001.py | 33 ++-- .../human_kidney_2019_10xSn_lake_001.py | 35 ++-- .../human_kidney_2019_10x_stewart_001.py | 35 ++-- .../kidney/human_kidney_2020_10x_liao_001.py | 35 ++-- .../human_kidney_2020_microwell_han_001.py | 31 ++- .../human_kidney_2020_microwell_han_002.py | 31 ++- .../human_kidney_2020_microwell_han_003.py | 31 ++- .../human_kidney_2020_microwell_han_004.py | 31 ++- .../human_kidney_2020_microwell_han_005.py | 31 ++- .../human_kidney_2020_microwell_han_006.py | 31 ++- .../human_kidney_2020_microwell_han_007.py | 31 ++- .../human_liver_2018_10x_macparland_001.py | 35 ++-- .../liver/human_liver_2019_10x_popescu_001.py | 35 ++-- .../human_liver_2019_10x_ramachandran_001.py | 35 ++-- .../human_liver_2019_mCELSeq2_aizarani_001.py | 33 ++-- .../human_liver_2020_microwell_han_001.py | 31 ++- .../human_liver_2020_microwell_han_002.py | 31 ++- .../human_liver_2020_microwell_han_003.py | 31 ++- .../human_liver_2020_microwell_han_004.py | 31 ++- .../human_liver_2020_microwell_han_005.py | 31 ++- .../lung/human_lung_2019_10x_braga_001.py | 35 ++-- .../lung/human_lung_2019_10x_braga_002.py | 35 ++-- .../lung/human_lung_2019_10x_madissoon_001.py | 35 ++-- .../lung/human_lung_2019_dropseq_braga_003.py | 35 ++-- .../lung/human_lung_2020_10x_habermann_001.py | 35 ++-- .../lung/human_lung_2020_10x_lukassen_001.py | 35 ++-- .../lung/human_lung_2020_10x_lukassen_002.py | 35 ++-- .../lung/human_lung_2020_10x_miller_001.py | 35 ++-- .../human_lung_2020_10x_travaglini_001.py | 37 ++-- .../lung/human_lung_2020_microwell_han_001.py | 31 ++- .../lung/human_lung_2020_microwell_han_002.py | 31 ++- .../lung/human_lung_2020_microwell_han_003.py | 31 ++- .../lung/human_lung_2020_microwell_han_004.py | 31 ++- .../lung/human_lung_2020_microwell_han_005.py | 31 ++- ...uman_lung_2020_smartseq2_travaglini_002.py | 37 ++-- .../human_malegonad_2018_10x_guo_001.py | 35 ++-- .../human_malegonad_2020_microwell_han_001.py | 31 ++- .../human_malegonad_2020_microwell_han_002.py | 31 ++- .../mixed/human_mixed_2019_10x_szabo_001.py | 33 ++-- .../human_muscle_2020_microwell_han_001.py | 31 ++- .../human_muscle_2020_microwell_han_002.py | 33 ++-- .../human_omentum_2020_microwell_han_001.py | 31 ++- .../human_omentum_2020_microwell_han_002.py | 31 ++- .../human_omentum_2020_microwell_han_003.py | 31 ++- .../human_pancreas_2016_indrop_baron_001.py | 35 ++-- ...pancreas_2016_smartseq2_segerstolpe_001.py | 37 ++-- .../human_pancreas_2017_smartseq2_enge_001.py | 35 ++-- .../human_pancreas_2020_microwell_han_001.py | 31 ++- .../human_pancreas_2020_microwell_han_002.py | 31 ++- .../human_pancreas_2020_microwell_han_003.py | 31 ++- .../human_pancreas_2020_microwell_han_004.py | 31 ++- .../human_placenta_2018_10x_ventotormo_001.py | 33 ++-- ..._placenta_2018_smartseq2_ventotormo_001.py | 33 ++-- .../human_placenta_2020_microwell_han_001.py | 31 ++- .../human_pleura_2020_microwell_han_001.py | 31 ++- .../human_prostate_2018_10x_henry_001.py | 35 ++-- .../human_prostate_2020_microwell_han_001.py | 31 ++- .../rectum/human_rectum_2019_10x_wang_001.py | 35 ++-- .../human_rectum_2020_microwell_han_001.py | 31 ++- .../rib/human_rib_2020_microwell_han_001.py | 33 ++-- .../rib/human_rib_2020_microwell_han_002.py | 31 ++- .../skin/human_skin_2020_microwell_han_001.py | 31 ++- .../skin/human_skin_2020_microwell_han_002.py | 31 ++- ...human_spinalcord_2020_microwell_han_001.py | 31 ++- .../human_spleen_2019_10x_madissoon_001.py | 33 ++-- .../human_spleen_2020_microwell_han_001.py | 31 ++- .../human_spleen_2020_microwell_han_002.py | 31 ++- .../human_stomach_2020_microwell_han_001.py | 31 ++- .../human_stomach_2020_microwell_han_002.py | 31 ++- .../human_stomach_2020_microwell_han_003.py | 31 ++- .../human_stomach_2020_microwell_han_004.py | 31 ++- .../human_stomach_2020_microwell_han_005.py | 31 ++- .../human_stomach_2020_microwell_han_006.py | 31 ++- .../human_stomach_2020_microwell_han_007.py | 31 ++- .../human_stomach_2020_microwell_han_008.py | 31 ++- .../human_stomach_2020_microwell_han_009.py | 31 ++- .../human_stomach_2020_microwell_han_010.py | 31 ++- .../thymus/human_thymus_2020_10x_park_001.py | 35 ++-- .../human_thymus_2020_microwell_han_001.py | 31 ++- .../human_thymus_2020_microwell_han_002.py | 31 ++- .../human_thyroid_2020_microwell_han_001.py | 33 ++-- .../human_thyroid_2020_microwell_han_002.py | 33 ++-- .../human_trachea_2020_microwell_han_001.py | 31 ++- .../human_ureter_2020_microwell_han_001.py | 31 ++- .../human_uterus_2020_microwell_han_001.py | 34 ++-- .../mouse_adipose_2019_10x_pisco_001.py | 35 ++-- .../mouse_adipose_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_adipose_2019_smartseq2_pisco_002.py | 35 ++-- .../mouse_adipose_2019_smartseq2_pisco_003.py | 35 ++-- .../mouse_adipose_2019_smartseq2_pisco_004.py | 34 ++-- .../mouse_bladder_2018_microwell_han_001.py | 35 ++-- .../mouse_bladder_2019_10x_pisco_001.py | 35 ++-- .../mouse_bladder_2019_smartseq2_pisco_001.py | 33 ++-- .../mouse_blood_2018_microwell_han_001.py | 35 ++-- .../mouse_blood_2018_microwell_han_002.py | 35 ++-- .../mouse_blood_2018_microwell_han_003.py | 34 ++-- .../mouse_blood_2018_microwell_han_004.py | 35 ++-- .../mouse_blood_2018_microwell_han_005.py | 35 ++-- .../bone/mouse_bone_2018_microwell_001.py | 35 ++-- .../bone/mouse_bone_2019_10x_pisco_001.py | 35 ++-- .../mouse_bone_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_brain_2018_microwell_han_001.py | 35 ++-- .../mouse_brain_2018_microwell_han_002.py | 35 ++-- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 35 ++-- .../mouse_brain_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_brain_2019_smartseq2_pisco_002.py | 35 ++-- .../colon/mouse_colon_2019_10x_pisco_001.py | 35 ++-- .../mouse_colon_2019_smartseq2_pisco_001.py | 35 ++-- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 33 ++-- ...ouse_femalegonad_2018_microwell_han_001.py | 35 ++-- ...ouse_femalegonad_2018_microwell_han_002.py | 35 ++-- .../heart/mouse_heart_2019_10x_pisco_001.py | 35 ++-- .../mouse_heart_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_heart_2019_smartseq2_pisco_002.py | 35 ++-- .../mouse_ileum_2018_microwell_han_001.py | 35 ++-- .../mouse_ileum_2018_microwell_han_002.py | 35 ++-- .../mouse_ileum_2018_microwell_han_003.py | 35 ++-- .../mouse_kidney_2018_microwell_han_001.py | 35 ++-- .../mouse_kidney_2018_microwell_han_002.py | 35 ++-- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 35 ++-- .../mouse_kidney_2019_smartseq2_pisco_001.py | 35 ++-- ...arge_intestine_2019_smartseq2_pisco_001.py | 0 ...se_limb_muscle_2019_smartseq2_pisco_001.py | 0 .../mouse_liver_2018_microwell_han_001.py | 35 ++-- .../mouse_liver_2018_microwell_han_002.py | 35 ++-- .../liver/mouse_liver_2019_10x_pisco_001.py | 35 ++-- .../mouse_liver_2019_smartseq2_pisco_001.py | 35 ++-- .../lung/mouse_lung_2018_microwell_han_001.py | 35 ++-- .../lung/mouse_lung_2018_microwell_han_002.py | 35 ++-- .../lung/mouse_lung_2018_microwell_han_003.py | 35 ++-- .../lung/mouse_lung_2019_10x_pisco_001.py | 35 ++-- .../mouse_lung_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_malegonad_2018_microwell_han_001.py | 35 ++-- .../mouse_malegonad_2018_microwell_han_002.py | 35 ++-- ..._mammary_gland_2019_smartseq2_pisco_001.py | 0 ...use_mammarygland_2018_microwell_han_001.py | 35 ++-- ...use_mammarygland_2018_microwell_han_002.py | 35 ++-- ...use_mammarygland_2018_microwell_han_003.py | 35 ++-- ...use_mammarygland_2018_microwell_han_004.py | 35 ++-- .../mouse_mammarygland_2019_10x_pisco_001.py | 35 ++-- ...e_mammarygland_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_muscle_2018_microwell_han_001.py | 35 ++-- .../muscle/mouse_muscle_2019_10x_pisco_001.py | 35 ++-- .../mouse_muscle_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_pancreas_2018_microwell_han_001.py | 35 ++-- .../mouse_pancreas_2019_10x_pisco_001.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_001.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_002.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_003.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_004.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_005.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_006.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_007.py | 35 ++-- .../mouse_pancreas_2019_10x_thompson_008.py | 35 ++-- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_placenta_2018_microwell_han_001.py | 35 ++-- .../mouse_placenta_2018_microwell_han_002.py | 35 ++-- .../mouse_prostate_2018_microwell_han_001.py | 35 ++-- .../mouse_prostate_2018_microwell_han_002.py | 35 ++-- .../rib/mouse_rib_2018_microwell_han_001.py | 35 ++-- .../rib/mouse_rib_2018_microwell_han_002.py | 35 ++-- .../rib/mouse_rib_2018_microwell_han_003.py | 35 ++-- .../skin/mouse_skin_2019_10x_pisco_001.py | 35 ++-- .../mouse_skin_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_spleen_2018_microwell_han_001.py | 35 ++-- .../spleen/mouse_spleen_2019_10x_pisco_001.py | 35 ++-- .../mouse_spleen_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_stomach_2018_microwell_han_001.py | 35 ++-- .../mouse_thymus_2018_microwell_han_001.py | 35 ++-- .../thymus/mouse_thymus_2019_10x_pisco_001.py | 35 ++-- .../mouse_thymus_2019_smartseq2_pisco_001.py | 35 ++-- .../tongue/mouse_tongue_2019_10x_pisco_001.py | 35 ++-- .../mouse_tongue_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_trachea_2019_10x_pisco_001.py | 35 ++-- .../mouse_trachea_2019_smartseq2_pisco_001.py | 35 ++-- .../mouse_uterus_2018_microwell_han_001.py | 35 ++-- .../mouse_uterus_2018_microwell_han_002.py | 35 ++-- sfaira/data/utils/create_meta_mouse.py | 16 +- sfaira/unit_tests/test_dataset.py | 27 ++- sfaira/unit_tests/test_models.py | 2 +- sfaira/unit_tests/test_userinterface.py | 2 +- 246 files changed, 4013 insertions(+), 4197 deletions(-) delete mode 100644 sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py diff --git a/.gitignore b/.gitignore index a4099d8f9..974bd87ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +sfaira/unit_tests/test_data git abuild cache sfaira.egg-info diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index a9488c018..fdac269bc 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -18,6 +18,7 @@ class ADATA_IDS_BASE: _dataset: str _dataset_group: str _gene_id_ensembl: str + _gene_id_index: str _gene_id_names: str _healthy: str _id: str @@ -30,83 +31,91 @@ class ADATA_IDS_BASE: _year: str @property - def annotated(self): + def annotated(self) -> str: return self._annotated @property - def author(self): + def author(self) -> str: return self._author @property - def cell_types_original(self): + def cell_types_original(self) -> str: return self._cell_types_original @property - def cell_ontology_class(self): + def cell_ontology_class(self) -> str: return self._cell_ontology_class @property - def cell_ontology_id(self): + def cell_ontology_id(self) -> str: return self._cell_ontology_id @property - def dataset(self): + def dataset(self) -> str: return self._dataset @property - def dataset_group(self): + def dataset_group(self) -> str: return self._dataset_group @property - def doi(self): + def doi(self) -> str: return self._doi @property - def download(self): + def download(self) -> str: return self._download @property - def gene_id_ensembl(self): + def gene_id_ensembl(self) -> str: return self._gene_id_ensembl @property - def gene_id_names(self): + def gene_id_index(self) -> str: + return self._gene_id_index + + @gene_id_index.setter + def gene_id_index(self, x: str): + self._gene_id_index = x + + @property + def gene_id_names(self) -> str: return self._gene_id_names @property - def healthy(self): + def healthy(self) -> str: return self._healthy @property - def id(self): + def id(self) -> str: return self._id @property - def ncells(self): + def ncells(self) -> str: return self._ncells @property - def normalization(self): + def normalization(self) -> str: return self._normalization @property - def protocol(self): + def protocol(self) -> str: return self._protocol @property - def organ(self): + def organ(self) -> str: return self._organ @property - def species(self): + def species(self) -> str: return self._species @property - def subtissue(self): + def subtissue(self) -> str: return self._subtissue @property - def year(self): + def year(self) -> str: return self._year @@ -121,23 +130,23 @@ class ADATA_IDS_EXTENDED(ADATA_IDS_BASE): _state_exact: str @property - def age(self): + def age(self) -> str: return self._age @property - def dev_stage(self): + def dev_stage(self) -> str: return self._dev_stage @property - def ethnicity(self): + def ethnicity(self) -> str: return self._ethnicity @property - def sex(self): + def sex(self) -> str: return self._sex @property - def state_exact(self): + def state_exact(self) -> str: return self._state_exact @@ -147,25 +156,26 @@ class ADATA_IDS_SFAIRA(ADATA_IDS_EXTENDED): """ def __init__(self): - self._animal = "animal" + self._annotated = "annotated" + self._author = "author" self._cell_types_original = "cell_types_original" self._cell_ontology_class = "cell_ontology_class" self._cell_ontology_id = "cell_ontology_id" self._doi = "doi" self._dataset = "dataset" self._dataset_group = "dataset_group" + self._download = "download" self._gene_id_ensembl = "ensembl" + self._gene_id_index = "ensembl" self._gene_id_names = "names" - self._has_celltypes = "has_celltypes" self._healthy = "healthy" self._id = "id" self._ncells = "ncells" self._normalization = "normalization" - self._lab = "lab" self._organ = "organ" self._protocol = "protocol" + self._species = "organism" self._subtissue = "subtissue" - self._wget_download = "wget_download" self._year = "year" self._age = "age" @@ -184,25 +194,25 @@ class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): _disease_state_healthy: str def __init__(self): - self._animal = "organism" self._cell_types_original = "free_annotation" self._cell_ontology_class = "cell_type" self._cell_ontology_id = "cell_type_ontology_term_id" self._doi = "" # TODO self._dataset = "dataset" self._dataset_group = "dataset_group" + self._download = "" # TODO self._gene_id_ensembl = "" # TODO + self._gene_id_index = "ensembl" self._gene_id_names = "" # TODO self._has_celltypes = "" # TODO self._healthy = None # is inferred from _disease self._id = "" # TODO self._ncells = "ncells" self._normalization = None # is always "counts" - self._lab = "" # TODO self._organ = "" # TODO self._protocol = "assay" + self._species = "organism" self._subtissue = "" # TODO - self._wget_download = "" # TODO self._year = "" # TODO self._age = "age" @@ -217,9 +227,9 @@ def __init__(self): self._author_names = "names" @property - def author_names(self): + def author_names(self) -> str: return self._author_names @property - def disease_state_healthy(self): + def disease_state_healthy(self) -> str: return self._disease_state_healthy diff --git a/sfaira/data/base.py b/sfaira/data/base.py index d5b597650..996dbdb94 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -62,6 +62,9 @@ def __init__( self._species = None self._year = None + self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + self._META_DATA_FIELDS = META_DATA_FIELDS + @abc.abstractmethod def _load(self, fn): pass @@ -105,13 +108,13 @@ def load( self._load(fn=fn) - if ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = None + if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - if ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values, + if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, celltype_version=celltype_version ) @@ -155,8 +158,8 @@ def load( self.adata.obs_names = obs_names self.adata.var_names = new_index_collapsed new_index = new_index_collapsed - self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index - self.adata.var.index = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index + self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values # Match feature space to a genomes provided with sfaira if match_to_reference: @@ -172,7 +175,7 @@ def load( raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep - data_ids = self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].values + data_ids = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.genome_container.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -199,7 +202,7 @@ def load( obs=self.adata.obs, obsm=self.adata.obsm, var=pd.DataFrame(data={'names': self.genome_container.names, - ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, + self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, index=self.genome_container.ensembl), uns=self.adata.uns ) @@ -210,30 +213,29 @@ def _convert_and_set_var_names( self, symbol_col: str = None, ensembl_col: str = None, - new_index: str = ADATA_IDS_SFAIRA.gene_id_ensembl ): if symbol_col and ensembl_col: if symbol_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS_SFAIRA.gene_id_names}, + {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {symbol_col: ADATA_IDS_SFAIRA.gene_id_names}, + {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) if ensembl_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS_SFAIRA.gene_id_ensembl}, + {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {ensembl_col: ADATA_IDS_SFAIRA.gene_id_ensembl}, + {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) @@ -243,12 +245,12 @@ def _convert_and_set_var_names( if symbol_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS_SFAIRA.gene_id_names}, + {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {symbol_col: ADATA_IDS_SFAIRA.gene_id_names}, + {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) @@ -256,39 +258,39 @@ def _convert_and_set_var_names( # match it straight away, if it is not in there we try to match everything in front of the first period in # the gene name with a dictionary that was modified in the same way, if there is still no match we append na ensids = [] - for n in self.adata.var[ADATA_IDS_SFAIRA.gene_id_names]: + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: if n in id_dict.keys(): ensids.append(id_dict[n]) elif n.split(".")[0] in id_strip_dict.keys(): ensids.append(id_strip_dict[n.split(".")[0]]) else: ensids.append('n/a') - self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids elif ensembl_col: id_dict = self.genome_container.id_to_names_dict if ensembl_col == 'index': self.adata.var.index.name = 'index' self.adata.var = self.adata.var.reset_index().rename( - {'index': ADATA_IDS_SFAIRA.gene_id_ensembl}, + {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) else: self.adata.var = self.adata.var.rename( - {ensembl_col: ADATA_IDS_SFAIRA.gene_id_names}, + {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_names}, axis='columns' ) - self.adata.var[ADATA_IDS_SFAIRA.gene_id_names] = [ + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl] + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] ] else: raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' 'the name of the var column containing gene symbols') - self.adata.var.index = self.adata.var[new_index].tolist() + self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() self.adata.var_names_make_unique() def subset_organs(self, subset: Union[None, List]): @@ -336,8 +338,8 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar adata_backed.X[np.sort(idx), :] = x_new[np.argsort(idx), :] for k in adata_backed.obs.columns: - if k == ADATA_IDS_SFAIRA.dataset: - adata_backed.obs.loc[np.sort(idx), ADATA_IDS_SFAIRA.dataset] = [self.id for i in range(len(idx))] + if k == self._ADATA_IDS_SFAIRA.dataset: + adata_backed.obs.loc[np.sort(idx), self._ADATA_IDS_SFAIRA.dataset] = [self.id for i in range(len(idx))] elif k in self.adata.obs.columns: adata_backed.obs.loc[np.sort(idx), k] = self.adata.obs[k].values[np.argsort(idx)] elif k in list(self.adata.uns.keys()): @@ -357,7 +359,7 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar adata_backed._n_obs = adata_backed.X.shape[0] # not automatically updated after append adata_backed.obs = adata_backed.obs.append( # .obs was not broadcasted to the right shape! pandas.DataFrame(dict([ - (k, [self.id for i in range(len(idx))]) if k == ADATA_IDS_SFAIRA.dataset + (k, [self.id for i in range(len(idx))]) if k == self._ADATA_IDS_SFAIRA.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns else (k, [self.adata.uns[k] for i in range(len(idx))]) if k in list(self.adata.uns.keys()) else (k, ["key_not_found" for i in range(len(idx))]) @@ -377,9 +379,9 @@ def set_unkown_class_id(self, ids: list): target_id = "unknown" ontology_classes = [ x if x not in ids else target_id - for x in self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].tolist() + for x in self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].tolist() ] - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ontology_classes + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ontology_classes def _set_genome(self, genome: str @@ -473,7 +475,7 @@ def load_meta(self, fn: Union[PathLike, str]): else: if isinstance(fn, str): fn = os.path.normpath(fn) - self.meta = pandas.read_csv(fn, usecols=META_DATA_FIELDS) + self.meta = pandas.read_csv(fn, usecols=self.META_DATA_FIELDS) def write_meta( self, @@ -490,17 +492,17 @@ def write_meta( if self.adata is None: self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) meta = pandas.DataFrame({ - ADATA_IDS_SFAIRA.annotated: self.adata.uns[ADATA_IDS_SFAIRA.annotated], - ADATA_IDS_SFAIRA.author: self.adata.uns[ADATA_IDS_SFAIRA.author], - ADATA_IDS_SFAIRA.doi: self.adata.uns[ADATA_IDS_SFAIRA.doi], - ADATA_IDS_SFAIRA.download: self.adata.uns[ADATA_IDS_SFAIRA.download], - ADATA_IDS_SFAIRA.id: self.adata.uns[ADATA_IDS_SFAIRA.id], - ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, - ADATA_IDS_SFAIRA.normalization: self.adata.uns[ADATA_IDS_SFAIRA.normalization] if ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - ADATA_IDS_SFAIRA.organ: self.adata.uns[ADATA_IDS_SFAIRA.organ], - ADATA_IDS_SFAIRA.protocol: self.adata.uns[ADATA_IDS_SFAIRA.protocol], - ADATA_IDS_SFAIRA.species: self.adata.uns[ADATA_IDS_SFAIRA.species], - ADATA_IDS_SFAIRA.year: self.adata.uns[ADATA_IDS_SFAIRA.year], + self._ADATA_IDS_SFAIRA.annotated: self.adata.uns[self._ADATA_IDS_SFAIRA.annotated], + self._ADATA_IDS_SFAIRA.author: self.adata.uns[self._ADATA_IDS_SFAIRA.author], + self._ADATA_IDS_SFAIRA.doi: self.adata.uns[self._ADATA_IDS_SFAIRA.doi], + self._ADATA_IDS_SFAIRA.download: self.adata.uns[self._ADATA_IDS_SFAIRA.download], + self._ADATA_IDS_SFAIRA.id: self.adata.uns[self._ADATA_IDS_SFAIRA.id], + self._ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, + self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] if self._ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, + self._ADATA_IDS_SFAIRA.organ: self.adata.uns[self._ADATA_IDS_SFAIRA.organ], + self._ADATA_IDS_SFAIRA.protocol: self.adata.uns[self._ADATA_IDS_SFAIRA.protocol], + self._ADATA_IDS_SFAIRA.species: self.adata.uns[self._ADATA_IDS_SFAIRA.species], + self._ADATA_IDS_SFAIRA.year: self.adata.uns[self._ADATA_IDS_SFAIRA.year], }, index=range(1)) meta.to_csv(fn_meta) @@ -511,7 +513,7 @@ def annotated(self) -> bool: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.annotated] + return self.meta[self._ADATA_IDS_SFAIRA.annotated] @annotated.setter def annotated(self, x: bool): @@ -524,7 +526,7 @@ def author(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.author] + return self.meta[self._ADATA_IDS_SFAIRA.author] @author.setter def author(self, x: str): @@ -537,7 +539,7 @@ def doi(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.doi] + return self.meta[self._ADATA_IDS_SFAIRA.doi] @doi.setter def doi(self, x: str): @@ -550,7 +552,7 @@ def download(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.download] + return self.meta[self._ADATA_IDS_SFAIRA.download] @download.setter def download(self, x: str): @@ -563,7 +565,7 @@ def id(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.id] + return self.meta[self._ADATA_IDS_SFAIRA.id] @id.setter def id(self, x: str): @@ -578,7 +580,7 @@ def ncells(self) -> int: else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[ADATA_IDS_SFAIRA.ncells] + x = self.meta[self._ADATA_IDS_SFAIRA.ncells] return int(x) @property @@ -588,7 +590,7 @@ def normalization(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.normalization] + return self.meta[self._ADATA_IDS_SFAIRA.normalization] @normalization.setter def normalization(self, x: str): @@ -601,7 +603,7 @@ def organ(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.organ] + return self.meta[self._ADATA_IDS_SFAIRA.organ] @organ.setter def organ(self, x: str): @@ -614,7 +616,7 @@ def protocol(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.protocol] + return self.meta[self._ADATA_IDS_SFAIRA.protocol] @protocol.setter def protocol(self, x: str): @@ -627,7 +629,7 @@ def species(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.species] + return self.meta[self._ADATA_IDS_SFAIRA.species] @species.setter def species(self, x: str): @@ -640,7 +642,7 @@ def year(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - return self.meta[ADATA_IDS_SFAIRA.year] + return self.meta[self._ADATA_IDS_SFAIRA.year] @year.setter def year(self, x: str): @@ -661,6 +663,9 @@ class DatasetGroupBase(abc.ABC): """ datasets: Dict + def __init__(self): + self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + def subset_organs(self, subset: Union[None, List]): for i in self.ids: if self.datasets[i].organ == "mixed": @@ -696,7 +701,8 @@ def load_all( match_to_reference=match_to_reference, load_raw=load_raw ) - except FileNotFoundError: + except FileNotFoundError as e: + print(e) del self.datasets[x] def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: List[np.ndarray], @@ -747,15 +753,15 @@ def adata(self): adata_ls = self.adata_ls # Save uns attributes that are fixed for entire data set to .obs to retain during concatenation: for adata in adata_ls: - adata.obs[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_SFAIRA.author] - adata.obs[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_SFAIRA.year] - adata.obs[ADATA_IDS_SFAIRA.protocol] = adata.uns[ADATA_IDS_SFAIRA.protocol] - adata.obs[ADATA_IDS_SFAIRA.subtissue] = adata.uns[ADATA_IDS_SFAIRA.subtissue] - if ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): - adata.obs[ADATA_IDS_SFAIRA.normalization] = adata.uns[ADATA_IDS_SFAIRA.normalization] - if ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: - adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.uns[ADATA_IDS_SFAIRA.dev_stage] - adata.obs[ADATA_IDS_SFAIRA.annotated] = adata.uns[ADATA_IDS_SFAIRA.annotated] + adata.obs[self._ADATA_IDS_SFAIRA.author] = adata.uns[self._ADATA_IDS_SFAIRA.author] + adata.obs[self._ADATA_IDS_SFAIRA.year] = adata.uns[self._ADATA_IDS_SFAIRA.year] + adata.obs[self._ADATA_IDS_SFAIRA.protocol] = adata.uns[self._ADATA_IDS_SFAIRA.protocol] + adata.obs[self._ADATA_IDS_SFAIRA.subtissue] = adata.uns[self._ADATA_IDS_SFAIRA.subtissue] + if self._ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): + adata.obs[self._ADATA_IDS_SFAIRA.normalization] = adata.uns[self._ADATA_IDS_SFAIRA.normalization] + if self._ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: + adata.obs[self._ADATA_IDS_SFAIRA.dev_stage] = adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] + adata.obs[self._ADATA_IDS_SFAIRA.annotated] = adata.uns[self._ADATA_IDS_SFAIRA.annotated] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: # Fix 1: @@ -765,13 +771,13 @@ def adata(self): if adata.uns is not None: keys_to_keep = [ 'neighbors', - ADATA_IDS_SFAIRA.author, - ADATA_IDS_SFAIRA.year, - ADATA_IDS_SFAIRA.protocol, - ADATA_IDS_SFAIRA.subtissue, - ADATA_IDS_SFAIRA.normalization, - ADATA_IDS_SFAIRA.dev_stage, - ADATA_IDS_SFAIRA.annotated, + self._ADATA_IDS_SFAIRA.author, + self._ADATA_IDS_SFAIRA.year, + self._ADATA_IDS_SFAIRA.protocol, + self._ADATA_IDS_SFAIRA.subtissue, + self._ADATA_IDS_SFAIRA.normalization, + self._ADATA_IDS_SFAIRA.dev_stage, + self._ADATA_IDS_SFAIRA.annotated, "mapped_features" ] for k in list(adata.uns.keys()): @@ -784,7 +790,7 @@ def adata(self): # To preserve gene names in .var, the target gene names are copied into var_names and are then copied # back into .var. for adata in adata_ls: - adata.var.index = adata.var[ADATA_IDS_SFAIRA.gene_id_ensembl].tolist() + adata.var.index = adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].tolist() if len(adata_ls) > 1: # TODO: need to keep this? -> yes, still catching errors here (March 2020) # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. @@ -792,18 +798,18 @@ def adata(self): adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=ADATA_IDS_SFAIRA.dataset, + batch_key=self._ADATA_IDS_SFAIRA.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) except ValueError: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=ADATA_IDS_SFAIRA.dataset, + batch_key=self._ADATA_IDS_SFAIRA.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) - adata_concat.var[ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index + adata_concat.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index if len(set([a.uns['mapped_features'] for a in adata_ls])) == 1: adata_concat.uns['mapped_features'] = adata_ls[0].uns['mapped_features'] @@ -811,7 +817,7 @@ def adata(self): adata_concat.uns['mapped_features'] = False else: adata_concat = adata_ls[0] - adata_concat.obs[ADATA_IDS_SFAIRA.dataset] = self.ids[0] + adata_concat.obs[self._ADATA_IDS_SFAIRA.dataset] = self.ids[0] adata_concat.var_names_make_unique() return adata_concat @@ -832,7 +838,7 @@ def obs_concat(self, keys: Union[list, None] = None): (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns else (k, ["nan" for i in range(self.datasets[x].adata.obs.shape[0])]) for k in keys - ] + [(ADATA_IDS_SFAIRA.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] + ] + [(self._ADATA_IDS_SFAIRA.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat @@ -841,7 +847,7 @@ def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: for x in self.ids: # if this is for celltype prediction, only load the data with have celltype annotation try: - if self.datasets[x].has_celltypes or not annotated_only: + if self.datasets[x].annotated or not annotated_only: cells.append(self.datasets[x].ncells) except FileNotFoundError: del self.datasets[x] @@ -925,6 +931,8 @@ def __init__(self, dataset_groups: Union[None, List[DatasetGroupBase]]): self.fn_backed = None self.set_dataset_groups(dataset_groups=dataset_groups) + self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + def get_gc( self, genome: str = None @@ -981,6 +989,11 @@ def load_all( :param celltype_version: Version of cell type ontology to use. Uses most recent within each DatasetGroup if None. + :param match_to_reference: + :param remove_gene_version: + :param annotated_only: + :param load_raw: + :return: """ for x in self.dataset_groups: x.load_all( @@ -997,7 +1010,7 @@ def load_all( self.adata = self.dataset_groups[i].adata.concatenate( *[x.adata for x in self.dataset_groups[1:] if x is not None], join="outer", - batch_key=ADATA_IDS_SFAIRA.dataset_group + batch_key=self._ADATA_IDS_SFAIRA.dataset_group ) def load_all_tobacked( @@ -1053,17 +1066,17 @@ def load_all_tobacked( X.indptr = X.indptr.astype(np.int64) self.adata.X = X keys = [ - ADATA_IDS_SFAIRA.annotated, - ADATA_IDS_SFAIRA.author, - ADATA_IDS_SFAIRA.dataset, - ADATA_IDS_SFAIRA.cell_ontology_class, - ADATA_IDS_SFAIRA.dev_stage, - ADATA_IDS_SFAIRA.normalization, - ADATA_IDS_SFAIRA.organ, - ADATA_IDS_SFAIRA.protocol, - ADATA_IDS_SFAIRA.state_exact, - ADATA_IDS_SFAIRA.subtissue, - ADATA_IDS_SFAIRA.year, + self._ADATA_IDS_SFAIRA.annotated, + self._ADATA_IDS_SFAIRA.author, + self._ADATA_IDS_SFAIRA.dataset, + self._ADATA_IDS_SFAIRA.cell_ontology_class, + self._ADATA_IDS_SFAIRA.dev_stage, + self._ADATA_IDS_SFAIRA.normalization, + self._ADATA_IDS_SFAIRA.organ, + self._ADATA_IDS_SFAIRA.protocol, + self._ADATA_IDS_SFAIRA.state_exact, + self._ADATA_IDS_SFAIRA.subtissue, + self._ADATA_IDS_SFAIRA.year, ] if scatter_update: self.adata.obs = pandas.DataFrame({ diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index cc711c8b0..e56668222 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adipose", "hcl_AdultAdipose_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index 06db980fe..ddaba9b20 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_NeonatalAdrenalGland_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index fcd778b07..15d77b3d7 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index 0e110e5a0..fbb336335 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index 7772eb398..d32708200 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 2314f8b43..84ed1cde8 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 37069e26c..7cb38578f 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index 273df1b58..be3a67f34 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "artery", "hcl_AdultArtery_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index de82b63e6..535cb4ea0 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index 5d4c7e400..a16cce5fd 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 945a5205b..553e135b5 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bladder", "hcl_AdultGallbladder_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py index 8e4749b9d..4d733a8b5 100644 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -40,7 +39,7 @@ def __init__( self.download_website_meta = None self.organ = "blood" self.sub_tissue = "umbilical_cord_blood" - self.has_celltypes = False + self.annotated = False self.class_maps = { "0": {}, @@ -63,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "ica_blood.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py index cc70f0b67..72581d7ee 100644 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -32,7 +31,7 @@ def __init__( self.download_website_meta = None self.organ = "blood" self.sub_tissue = "pbmcs" - self.has_celltypes = False + self.annotated = False self.class_maps = { "0": {}, @@ -47,20 +46,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = '10x Genomics' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = '10x Genomics' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 8c2f78fe2..39b050986 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index ae11c35a7..023d9467c 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_PeripheralBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 7ab28023a..8f9fd060b 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index 9fb895ef0..b8b782ed7 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 770fb5c5c..470ac2681 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index 36a2d5662..dfb7748b8 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index bbf76f52a..bb9f21a0a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py index 68b08699f..85b019bbe 100644 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -40,7 +39,7 @@ def __init__( self.download_website_meta = None self.organ = "bone" self.sub_tissue = "bone_marrow" - self.has_celltypes = False + self.annotated = False self.class_maps = { "0": {}, @@ -63,20 +62,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bone", "ica_bone.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index 027972939..801d901bf 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 00d11f26f..50025c102 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py index 2ae25a98c..6610531c7 100644 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "brain" self.sub_tissue = "hippocampus, prefrontal cortex" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -64,20 +63,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/nmeth.4407" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/nmeth.4407" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index b257f59af..c19438a2e 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index 31a52e85d..ca5284f80 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index 9ab01e020..c952327cc 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index 5dcb7bd39..f595e62b1 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_AdultTemporalLobe_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index 24b5636fd..370158a58 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index a812123a9..b1d3936cc 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,18 +76,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "brain", "hcl_AdultCerebellum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index d370f75b5..641315e86 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "calvaria", "hcl_FetalCalvaria_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index 65e54254f..d0840bca6 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "cervix", "hcl_AdultCervix_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index cdb2c119a..d63c79975 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "chorionicvillus", "hcl_ChorionicVillus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index 8d3f7d7ea..ff1f38dcc 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -67,7 +66,7 @@ def __init__( self.download_website_meta = 'private' self.organ = "colon" self.sub_tissue = "lamina propria of mucosa of colon" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -129,23 +128,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "colon", "kinchenetal.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Simmons' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.08.067" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [line == 'normal' for line in + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Simmons' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.08.067" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [line == 'normal' for line in self.adata.obs['donor_organism.diseases.ontology_label']] - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[ADATA_IDS_SFAIRA.state_exact]\ + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact]\ .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col='Accession') diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py index 279e8a85b..3f5493506 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -31,7 +30,7 @@ def __init__( self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic epithelium" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -88,20 +87,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.06.029" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.06.029" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py index e50d96f1f..6ab9ba0bd 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colon" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -56,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py index 641a95723..0bd47f057 100644 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -31,7 +30,7 @@ def __init__( self.download_website_meta = None self.organ = "colon" self.sub_tissue = "colonic immune cells" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -75,20 +74,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41590-020-0602-z" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41590-020-0602-z" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index 72aa77936..571b8a40d 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "colon", "hcl_AdultAscendingColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index 11de7f12f..b6ce69993 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index d26e5a99a..28df255c4 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index a94dd02d8..31cac84b1 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,17 +72,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index d3005d603..7276b2985 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "duodenum", "hcl_AdultDuodenum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index ec43b2e98..1b47ad8cc 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "epityphlon", "hcl_AdultEpityphlon_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py index 22376fc4f..14c097816 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "esophagus" self.sub_tissue = "esophagus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,21 +65,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7413619', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) \ No newline at end of file diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index ad524c341..b7dc9fc33 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -70,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index efdb0d499..0b6cf55db 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -70,17 +69,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py index 2c74fd6b1..f73d4e388 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -63,20 +62,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Wong' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.15252/embj.2018100811' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Wong' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.15252/embj.2018100811' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py index 621447908..5701ac731 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -53,20 +52,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Hafler' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-12780-8' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Hafler' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-12780-8' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py index 7aa0c2591..31dfce8a7 100644 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = None self.organ = "eye" self.sub_tissue = "retina" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -57,20 +56,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mullins' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1073/pnas.1914143116' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mullins' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1073/pnas.1914143116' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index 041fc2c4c..d2494261a 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -69,18 +68,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "eye", "hcl_FetalEyes_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index c78a1a42c..04b317c65 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "fallopiantube", "hcl_AdultFallopiantube_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index bafe66bdf..479c0cc77 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index e5d3f22d3..6845a9e8d 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 40a6406d9..6483dee51 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "gallbladder", "hcl_AdultGallbladder_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index c4ee874db..1be4fda43 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index 2da149e62..f44e804da 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 430f8c92e..33388a4b3 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 2b73cebe0..7eefedef5 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index 70a0816aa..f245679da 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'HESC' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "hesc", "hcl_HESC_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py index 9b4e2d89e..1de123a07 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -70,20 +69,20 @@ def _load(self, fn=None): .multiply(1/10000) self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Kenigsberg" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.08.008" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Kenigsberg" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.08.008" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py index 255803190..9e9a6a79f 100644 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "ileum" self.sub_tissue = "ileum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -56,20 +55,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 5726c3f77..235186829 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -72,18 +71,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "ileum", "hcl_AdultIleum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index e9d7712b4..142be26aa 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,17 +44,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "jejunum", "hcl_AdultJejunum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py index 867feb38b..6cdc98279 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,20 +76,20 @@ def _load(self, fn=None): annot = pd.read_csv(fn[1], index_col=0, dtype='category') self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Jain' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-10861-2' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10xSn' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Jain' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-10861-2' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10xSn' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py index 8d87c833f..3c97f7a87 100644 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -32,7 +31,7 @@ def __init__( self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "renal medulla, renal pelvis, ureter, cortex of kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -126,21 +125,21 @@ def _load(self, fn=None): self.adata = adult.concatenate(fetal) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Clatworthy' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1126/science.aat5031' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Clatworthy' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1126/science.aat5031' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] self.adata.obs["cell_ontology_id"] = None - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID') diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index ee78c0032..2dd817b38 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd import scipy.io import gzip @@ -59,7 +58,7 @@ def __init__( self.download_website_meta = None self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = False + self.annotated = False self.class_maps = { "0": {}, @@ -97,20 +96,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "GSE131685.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Mo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41597-019-0351-8' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41597-019-0351-8' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 68203f9b9..4fcaa6872 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index ca7c14f2c..a6f6cfe25 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index b354034d0..5d766eab9 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 7d404820e..7a7c469b0 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 1b9b34ef1..3c997f3a2 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index 2f1a0c45f..d538f2567 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 36a28c728..4c6342f9d 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -95,17 +94,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_6.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py index c71297051..5828651a0 100644 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "caudate lobe" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -71,20 +70,20 @@ def _load(self, fn=None): celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'McGilvray' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-018-06318-7' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'McGilvray' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-018-06318-7' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py index 4560abe11..d335ed46d 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( self.download_website_meta = 'private' self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -71,20 +70,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Haniffa' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1652-y' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Haniffa' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1652-y' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py index a2584a38f..0e9623afa 100644 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -45,7 +44,7 @@ def __init__( self.download_website_meta = None self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,20 +72,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Henderson' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1631-3' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Henderson' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1631-3' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py index 1cc865cb2..19e9e2783 100644 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -90,20 +89,20 @@ def _load(self, fn=None): self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Gruen' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1373-2' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Gruen' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1373-2' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index 404b75a83..a617648e9 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index 277efb10f..ba9e76661 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 16b465ea4..258924659 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index ed870da4b..98df6120e 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "hcl_Liver_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index 48157a9f8..a7fd93836 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,17 +73,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "liver", "hcl_Liver_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py index e37dce7a1..a9119aaa8 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "alveoli, parenchyma" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -65,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py index 35517ce68..2ce4619c1 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchi" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -65,21 +64,21 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py index ed83f3c95..4327033ee 100644 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "parenchyma" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -69,21 +68,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Meyer' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1186/s13059-019-1906-x" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Meyer' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1186/s13059-019-1906-x" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734') diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py index b4928a0e2..6fadd1a90 100644 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" self.organ = "lung" self.sub_tissue = "parenchymal lung and distal airway specimens" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -62,21 +61,21 @@ def _load(self, fn=None): self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'dropseq' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'dropseq' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'uninvolved areas of tumour resection material' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'uninvolved areas of tumour resection material' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py index 075f59ce8..5f5872577 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -46,7 +45,7 @@ def __init__( self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" self.organ = "lung" self.sub_tissue = "parenchyma" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -107,21 +106,21 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "habermann_processed.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Kropski' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/753806" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Kropski' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/753806" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [i == 'Control' for i in self.adata.obs['Status']] - self.adata.obs['state_exact'] = self.adata.obs['Diagnosis'].astype('category') + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Control' for i in self.adata.obs['Status']] + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Diagnosis'].astype('category') - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py index 119833343..8ad57e976 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py index 24b52c8ca..7ecfbaf49 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "bronchial epithelial cells" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -63,21 +62,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py index a868b5420..955e85b9e 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "fetal lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,21 +73,21 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Spence' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.devcel.2020.01.033" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Spence' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.devcel.2020.01.033" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py index f2cf531e2..630f99090 100644 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import scipy.sparse import numpy as np @@ -38,7 +37,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -115,22 +114,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ .multiply(1 / 10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index ab1e02530..4d83d01b0 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -96,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index f52f73078..ee208054a 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -96,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 50fd78529..03a9d9783 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -96,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index 1f3715724..b6b48c97f 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -96,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 223d3a409..088f12d59 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -96,18 +95,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py index 77f4c67e0..d347e5766 100644 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import scipy.sparse import numpy as np @@ -38,7 +37,7 @@ def __init__( self.download_website_meta = None self.organ = "lung" self.sub_tissue = "proximal, medial, distal, blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -102,22 +101,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ .multiply(1 / 1000000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'smartseq2' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'smartseq2' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs['state_exact'] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py index 03362c90a..0b4b7ab2f 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "malegonad" self.sub_tissue = "testis" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -60,20 +59,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Cairns" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41422-018-0099-2" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Cairns" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41422-018-0099-2" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index f530fbc85..f961a5a0b 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index ca8a98733..5e0745e4a 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -74,18 +73,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py index 924c3a978..dde7c0605 100644 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import tarfile import pandas as pd import scipy.sparse @@ -85,7 +84,7 @@ def __init__( self.download_website_meta = 'private' self.organ = "mixed" self.sub_tissue = "Bone Marrow, Lung, Lymph Node" - self.has_celltypes = True + self.annotated = True self.loaded = False self.class_maps = { @@ -151,23 +150,23 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "mixed", "GSE126030.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sims" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41467-019-12464-3" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sims" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41467-019-12464-3" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs["subtissue"] = self.adata.obs["organ"] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession') # If the subset_organs() method has been run before, subset to specified organs if "organsubset" in self.__dict__: diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index e4defc46f..95b631139 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "muscle", "hcl_FetalMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index c92719054..958b96a4d 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "muscle", "hcl_AdultMuscle_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index 2550ea8ed..bf4e80d49 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 88614e843..2ead8c196 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index e9fe7fff4..62112e74e 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py index f7b378c33..7afe0094b 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -63,19 +62,19 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Yanai" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cels.2016.08.011" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'inDrop' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Yanai" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cels.2016.08.011" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'inDrop' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py index f60cea9a0..44d2d183e 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import pandas as pd @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -69,21 +68,21 @@ def _load(self, fn=None): # filter observations which are not cells (empty wells, low quality cells etc.) self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Sandberg" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2016.08.020" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sandberg" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2016.08.020" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[ADATA_IDS_SFAIRA.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py index a65b155fd..6bdd8965e 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import tarfile import gzip from io import StringIO @@ -72,7 +71,7 @@ def __init__( self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" self.organ = "pancreas" self.sub_tissue = "islet of Langerhans" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -129,20 +128,20 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pancreas", "GSE81547.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2017.09.004" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2017.09.004" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index 7d79c6021..a1d603cbd 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -84,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pancreas", "hcl_AdultPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 126a23ce5..53a907ca7 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -84,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 70e7187bb..94ac26a0d 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -84,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 6f3ef7db0..3c2ee550f 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -84,17 +83,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 22f6d9e57..5c47f3567 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -1,7 +1,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA import pandas as pd import anndata @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip' self.organ = "placenta" self.sub_tissue = "placenta, decidua, blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -83,30 +82,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index b09596a10..2645c60ac 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -1,7 +1,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA import pandas as pd import anndata @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip' self.organ = "placenta" self.sub_tissue = "placenta, decidua, blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -83,30 +82,30 @@ def _load(self, fn=None): for i in df.columns: self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "Smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "Smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index 607feae07..262758097 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,17 +76,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "placenta", "hcl_Placenta_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 39412e70b..3c2bc16dc 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "pleura", "hcl_AdultPleura_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py index fb8c5907b..a0d2ed360 100644 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -30,7 +29,7 @@ def __init__( self.download_website_meta = None self.organ = "prostate" self.sub_tissue = "prostate" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -57,20 +56,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Strand" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.celrep.2018.11.086" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Strand" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.celrep.2018.11.086" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index 7e1c44559..e841798d1 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,18 +65,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "prostate", "hcl_AdultProstate_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py index 9bbb7957d..8ef1d79b6 100644 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np import scipy.sparse @@ -29,7 +28,7 @@ def __init__( self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" self.organ = "rectum" self.sub_tissue = "rectum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -55,20 +54,20 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index c1a2a0009..8281d0d45 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,17 +60,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "rectum", "hcl_AdultRectum_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index e5e1a4e65..84089275e 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index 421ad2efc..7b09e8b4f 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index aba00a706..77de8ec43 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -75,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index 5b1772b9f..ab5b62e29 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -75,18 +74,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 3a276955e..9b69be1fe 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "spinalcord", "hcl_FetalSpinalCord_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py index 7517ae6bc..ab6dc0b09 100644 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import scipy.sparse @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = None self.organ = "spleen" self.sub_tissue = "spleen" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -75,22 +74,22 @@ def _load(self, fn=None): self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ .multiply(1/10000) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] self.set_unkown_class_id(ids=["Unknown"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7463846', new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index 047d8e8a1..e855953f2 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -67,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleenParenchyma_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index fbe16555f..9f03b1467 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -67,17 +66,17 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleen_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index abccb93fc..1c68613f3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index aaf8690d6..21367d1b3 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index d88265402..6e598f4f1 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index 3d85156cc..e795169b0 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntetsine_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index da1bdd129..c159287ad 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 74f7f340b..695c0b8cb 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index 2f44f8e40..d46ff6f9e 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_5.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index b0cd3ba45..12177da7e 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_3.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index 638af9954..bb63d705c 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index 7034a3e71..da8d07986 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_4.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py index b70663e7f..295656f14 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata import numpy as np @@ -29,7 +28,7 @@ def __init__( self.download_website_meta = None self.organ = "thymus" self.sub_tissue = "fetal thymus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -90,20 +89,20 @@ def _load(self, fn=None): self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1126/science.aay3224" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1126/science.aay3224" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = 'healthy' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 4aef4a0ee..954804147 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -59,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index 853191631..3686909cd 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Fetus' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -59,18 +58,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 1dff2bf3f..82a1c1194 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 64d7fa4da..20bbbf689 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index b263a8e5c..a433a3fe2 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "trachea", "hcl_AdultTrachea_2.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 1bf9da65a..4c1102ff6 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -1,8 +1,7 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata class Dataset(DatasetBase): @@ -30,7 +29,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +44,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "ureter", "hcl_AdultUreter_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 8d00b14cf..1af51d4d3 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -1,8 +1,8 @@ +import anndata import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA -import anndata + class Dataset(DatasetBase): @@ -30,7 +30,7 @@ def __init__( self.dev_stage = 'Adult' self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' self.download_website_meta = None - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -45,18 +45,18 @@ def _load(self, fn=None): fn = os.path.join(self.path, "human", "uterus", "hcl_AdultUterus_1.h5ad") self.adata = anndata.read(fn) - self.adata.uns[ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage + + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py index 09aea8e6c..7c72f9c4e 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "adipose" self.sub_tissue = "adipose" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py index 57cc116d0..faad974eb 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "adipose" self.sub_tissue = "adipose" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py index bfdf3fe1e..1e386e00c 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "adipose" self.sub_tissue = "adipose" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -51,20 +50,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py index 46fd2d683..782caa53a 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "adipose" self.sub_tissue = "adipose" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py index e74a9c555..656d4e504 100644 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py +++ b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py @@ -27,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "adipose" self.sub_tissue = "adipose" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -52,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py index 5eeccb976..011fa3759 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "bladder" self.sub_tissue = "bladder" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,21 +60,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py index 3f752594a..c1483afba 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "bladder" self.sub_tissue = "bladder" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py index 624b84d65..a1e1f6500 100644 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "bladder" self.sub_tissue = "bladder" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -51,19 +50,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py index 8f8b87a90..aa373428b 100644 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.sub_tissue = "blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,21 +65,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py index a27d7691d..9088bd83a 100644 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.sub_tissue = "blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,21 +65,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py index 9d8ef9088..fcd971a98 100644 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py @@ -22,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.sub_tissue = "blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -65,21 +65,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns["lab"] = "Guo" - self.adata.uns["year"] = "2018" - self.adata.uns["doi"] = "10.1016/j.cell.2018.02.001" - self.adata.uns["protocol"] = "microwell-seq" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue # TODO - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.obs["cell_ontology_class"] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs["cell_types_original"] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py index fcbb9fa42..204613af0 100644 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.sub_tissue = "blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,21 +65,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py index f48e2108c..33efff1d5 100644 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py +++ b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.sub_tissue = "blood" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -66,21 +65,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py index 93695c333..f6e14e5cf 100644 --- a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "bone" self.sub_tissue = "marrow" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -60,22 +59,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs['Annotation'] - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs['Annotation'] + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py index b6126a2f6..0eaae87b9 100644 --- a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "bone" self.sub_tissue = "marrow" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py index 2d3dc7975..6e72e85db 100644 --- a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "bone" self.sub_tissue = "marrow" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py index 3a692df7a..eda8e1088 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "brain" self.sub_tissue = "brain" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -59,21 +58,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py index 9a8d0e629..31f7dafdc 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "brain" self.sub_tissue = "brain" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -59,21 +58,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 0444530fd..5749d53e3 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( "www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" self.organ = "brain" self.sub_tissue = "brain" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -65,23 +64,23 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Movahedi" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1038/s41593-019-0393-4" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Movahedi" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41593-019-0393-4" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py index f9b5f16e7..543ff2c65 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py @@ -3,7 +3,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "brain" self.sub_tissue = "brain" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py index 6aae2b9a4..16bed27d5 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py @@ -3,7 +3,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "brain" self.sub_tissue = "brain" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py index 17d315b2a..e7f4077d6 100644 --- a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "colon" self.sub_tissue = "colon" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py index 299b15ede..2f925accc 100644 --- a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py @@ -27,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "colon" self.sub_tissue = "colon" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.has_celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ + self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py index 665983cb3..061e65439 100644 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "diaphragm" self.sub_tissue = "diaphragm" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,19 +52,19 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py index 0729ef941..e69c4b146 100644 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "femalegonad" self.sub_tissue = "femalegonad" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -57,21 +56,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py index 24b794523..776162f34 100644 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py +++ b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "femalegonad" self.sub_tissue = "femalegonad" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -57,21 +56,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py index d5b05893c..e10649e05 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "heart" self.sub_tissue = "heart" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py index f5a2eb17a..721e03cee 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "heart" self.sub_tissue = "heart" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py index 170b815f2..53900692f 100644 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "heart" self.sub_tissue = "heart" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py index 69c3b3c91..5363e094c 100644 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.sub_tissue = "ileum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py index 6dc73705b..c96944ea9 100644 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.sub_tissue = "ileum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py index 3fefaf0b5..b3e7390aa 100644 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py +++ b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.sub_tissue = "ileum" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,22 +67,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py index 41dd1438b..7088e667f 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -45,21 +44,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py index 7ee5882e9..86ace4b56 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -76,21 +75,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py index 3cbdd2ac0..67cd214f8 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py @@ -3,7 +3,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -27,7 +26,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -56,21 +55,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py index ceff6e506..2e05e00b5 100644 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "kidney" self.sub_tissue = "kidney" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -55,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/large_intestine/mouse_large_intestine_2019_smartseq2_pisco_001.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/limb_muscle/mouse_limb_muscle_2019_smartseq2_pisco_001.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py index f01bcd10b..5a856484d 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -63,21 +62,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py index 03a83405d..c1f4f3b2c 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -57,21 +56,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py index b75e14106..e8366603e 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py index e8750f2b0..73067787a 100644 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "liver" self.sub_tissue = "liver" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py index 1d198b276..62bebd82c 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,22 +76,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py index 43102d566..bff170a68 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,22 +76,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py index f12abcc17..a45337414 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -77,22 +76,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py index 3590bce81..eabf196af 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -53,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py index db656e485..77b2fa934 100644 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "lung" self.sub_tissue = "lung" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -53,21 +52,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py index c6cf5653d..acc94d1ed 100644 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "malegonad" self.sub_tissue = "malegonad" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -64,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py index ca536a683..7e5d1feea 100644 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py +++ b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "malegonad" self.sub_tissue = "malegonad" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -64,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammary_gland/mouse_mammary_gland_2019_smartseq2_pisco_001.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py index 0a01aff3d..c9c33b8b7 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,21 +60,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py index 661d1ff65..63d95e9d1 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,22 +60,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py index a23535a32..59e75d075 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,22 +60,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py index a677d911b..85f714899 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -61,22 +60,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py index f90cef8ff..ba044e9f2 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -51,20 +50,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py index b442c7f20..acddb5514 100644 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py @@ -25,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "mammarygland" self.sub_tissue = "mammarygland" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -50,20 +50,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns["lab"] = "Quake" - self.adata.uns["year"] = "2019" - self.adata.uns["doi"] = "10.1101/661728" - self.adata.uns["protocol"] = "smartseq2" - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = "mouse" - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'norm' - # self.adata.obs["cell_ontology_class"] is already set - self.adata.obs["cell_types_original"] = self.adata.obs["cell_ontology_class"].values.tolist() - self.adata.obs["healthy"] = True - self.adata.obs["state_exact"] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ + self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index='ensembl') + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py index ce3b6f01e..53ad4269d 100644 --- a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "muscle" self.sub_tissue = "muscle" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -62,22 +61,22 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py index c88b0e4a3..e3a9b589b 100644 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "muscle" self.sub_tissue = "muscle" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -51,20 +50,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py index b71e072d1..e16e3911c 100644 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "muscle" self.sub_tissue = "muscle" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -51,20 +50,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py index 34848662f..71901248d 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -67,21 +66,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py index 85ef8fb2d..fd4e43323 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -26,7 +25,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py index 671eba3cc..433ff3de1 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py index 76df51fb8..378006b63 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py index a027e8176..f7773faf1 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py index 5f2e685a0..f3117cba6 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py index 3b43226ec..a30f30d8b 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py index 6735a21ad..3c1873c96 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py index 447776c5a..d619bf262 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py index 1fde77cb5..5747594ff 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -58,21 +57,21 @@ def _load(self, fn=None): self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "diabetic" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py index 7b97fa833..8f17750bf 100644 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "pancreas" self.sub_tissue = "pancreas" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -55,20 +54,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py index 71e6c67e4..a1b4c6a35 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "placenta" self.sub_tissue = "placenta" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,21 +72,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py index 55bf9197d..871354a1d 100644 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "placenta" self.sub_tissue = "placenta" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -73,21 +72,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py index 99040cb76..486ba9b5c 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "prostate" self.sub_tissue = "prostate" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -51,21 +50,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py index 7f6022c77..9c5a357d7 100644 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "prostate" self.sub_tissue = "prostate" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -51,21 +50,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py index 504019f64..358d4b053 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.sub_tissue = "rib" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py index 65718f86f..1099a3f6c 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.sub_tissue = "rib" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py index b8d5ff7de..e1a4f8264 100644 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.sub_tissue = "rib" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -68,21 +67,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py index 65b961511..6d1889244 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "skin" self.sub_tissue = "skin" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py index 04ddff85c..6b8f05078 100644 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "skin" self.sub_tissue = "skin" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py index 1b5174b1f..3eac25aa7 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "spleen" self.sub_tissue = "spleen" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -56,21 +55,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py index 84118862e..3042be5fc 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "spleen" self.sub_tissue = "spleen" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py index 00ae4b975..d386c4609 100644 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -28,7 +27,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "spleen" self.sub_tissue = "spleen" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -53,20 +52,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py index 64501c484..87cce385b 100644 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "stomach" self.sub_tissue = "stomach" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -62,21 +61,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py index 15f68b11f..602777280 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -21,7 +20,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "thymus" self.sub_tissue = "thymus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -51,21 +50,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py index f0f1e370d..12bfbbe3e 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -27,7 +26,7 @@ def __init__( self.organ = "spleen" self.organ = "thymus" self.sub_tissue = "thymus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -52,21 +51,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py index 11f5971fd..aed1fac7e 100644 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -27,7 +26,7 @@ def __init__( self.organ = "spleen" self.organ = "thymus" self.sub_tissue = "thymus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -52,21 +51,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py index d2fcce167..0721142b2 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( self.organ = "spleen" self.organ = "tongue" self.sub_tissue = "tongue" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py index c412c6d00..b076e08cf 100644 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py @@ -2,7 +2,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( self.organ = "spleen" self.organ = "tongue" self.sub_tissue = "tongue" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,20 +53,20 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py index f19ec1043..fda917f76 100644 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py @@ -3,7 +3,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "trachea" self.sub_tissue = "trachea" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -55,21 +54,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py index 2e7a16097..e18bb8b6a 100644 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py @@ -3,7 +3,6 @@ import os from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -29,7 +28,7 @@ def __init__( raise ValueError("source %s not recognized" % self.source) self.organ = "trachea" self.sub_tissue = "trachea" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": {}, @@ -54,21 +53,21 @@ def _load(self, fn=None): self.adata.varm = {} self.adata.uns = {} - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' + # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py index 327445518..58985d2a0 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "uterus" self.sub_tissue = "uterus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -64,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py index 5bd723063..dad5f7194 100644 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py @@ -4,7 +4,6 @@ import pandas from typing import Union from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA class Dataset(DatasetBase): @@ -23,7 +22,7 @@ def __init__( self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "uterus" self.sub_tissue = "uterus" - self.has_celltypes = True + self.annotated = True self.class_maps = { "0": { @@ -64,21 +63,21 @@ def _load(self, fn=None): self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] - self.adata.uns[ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.has_celltypes - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" + self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" + self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ + self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO + self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = "healthy" + self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True + self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None, new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/utils/create_meta_mouse.py b/sfaira/data/utils/create_meta_mouse.py index 1634af953..c2ab7c4ca 100644 --- a/sfaira/data/utils/create_meta_mouse.py +++ b/sfaira/data/utils/create_meta_mouse.py @@ -12,29 +12,29 @@ path_meta = str(sys.argv[2]) ds_dict = { + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), + "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), + "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), diff --git a/sfaira/unit_tests/test_dataset.py b/sfaira/unit_tests/test_dataset.py index 17937e696..9f745322e 100644 --- a/sfaira/unit_tests/test_dataset.py +++ b/sfaira/unit_tests/test_dataset.py @@ -6,9 +6,9 @@ from sfaira.data import mouse, DatasetSuperGroup -class TestDatasets(unittest.TestCase): - dir_data: str = "." - dir_meta: str = "." +class TestDatasetGroups(unittest.TestCase): + dir_data: str = "./test_data" + dir_meta: str = "./test_data/meta" def test_load(self): ds = mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) @@ -18,6 +18,27 @@ def test_adata(self): ds = mouse.DatasetGroupBladder(path=self.dir_data, meta_path=self.dir_meta) _ = ds.adata + +class TestDatasetSuperGroups(unittest.TestCase): + dir_data: str = "./test_data" + dir_meta: str = "./test_data/meta" + + def test_load(self): + ds = DatasetSuperGroup( + dataset_groups=[ + mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) + ] + ) + ds.load_all() + + def test_adata(self): + ds = DatasetSuperGroup( + dataset_groups=[ + mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) + ] + ) + _ = ds.adata + def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): ds = DatasetSuperGroup( dataset_groups=[ diff --git a/sfaira/unit_tests/test_models.py b/sfaira/unit_tests/test_models.py index 02281d44d..a2762fd9d 100644 --- a/sfaira/unit_tests/test_models.py +++ b/sfaira/unit_tests/test_models.py @@ -104,7 +104,7 @@ def test_for_fatal(self): self.train() # (_,_), (_,sf) is dummy for kl loss _ = self.model.training_model.evaluate(x=(self.data, self.sf), y=(self.data, self.sf)) - embedding = self.model.predict_embedding(x=(self.data, self.sf))[0] + embedding = self.model.predict_embedding(x=(self.data, self.sf)) assert embedding.shape[0] == self.data.shape[0], embedding.shape denoised = self.model.predict_reconstructed(x=(self.data, self.sf)) assert denoised.shape == self.data.shape, (denoised.shape, self.data.shape) diff --git a/sfaira/unit_tests/test_userinterface.py b/sfaira/unit_tests/test_userinterface.py index 7548153b3..f7b8dbc90 100644 --- a/sfaira/unit_tests/test_userinterface.py +++ b/sfaira/unit_tests/test_userinterface.py @@ -37,7 +37,7 @@ def test_basic(self): temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), 'test_data') self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) - def test_kipoi(self): + def _test_kipoi(self): """ Test all kipoi_experimental model methods. From 198350ef64d0dbcdb8853fa563344a6ce86b218e Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 18:20:36 +0100 Subject: [PATCH 020/161] self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) uses strings now (#35) --- .../data/human/adipose/human_adipose_2020_microwell_han_001.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_001.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_002.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_003.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_004.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_005.py | 2 +- .../adrenalgland/human_adrenalgland_2020_microwell_han_006.py | 2 +- sfaira/data/human/artery/human_artery_2020_microwell_han_001.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_001.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_002.py | 2 +- .../data/human/bladder/human_bladder_2020_microwell_han_003.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_001.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_002.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_003.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_004.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_005.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_006.py | 2 +- sfaira/data/human/blood/human_blood_2020_microwell_han_007.py | 2 +- sfaira/data/human/bone/human_bone_2020_microwell_han_001.py | 2 +- sfaira/data/human/bone/human_bone_2020_microwell_han_002.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_001.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_002.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_003.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_004.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_005.py | 2 +- sfaira/data/human/brain/human_brain_2020_microwell_han_006.py | 2 +- .../human/calvaria/human_calvaria_2020_microwell_han_001.py | 2 +- sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py | 2 +- .../human_chorionicvillus_2020_microwell_han_001.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_001.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_002.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_003.py | 2 +- sfaira/data/human/colon/human_colon_2020_microwell_han_004.py | 2 +- .../human/duodenum/human_duodenum_2020_microwell_han_001.py | 2 +- .../human/epityphlon/human_epityphlon_2020_microwell_han_001.py | 2 +- .../human/esophagus/human_esophagus_2020_microwell_han_001.py | 2 +- .../human/esophagus/human_esophagus_2020_microwell_han_002.py | 2 +- sfaira/data/human/eye/human_eye_2020_microwell_han_001.py | 2 +- .../fallopiantube/human_fallopiantube_2020_microwell_han_001.py | 2 +- .../femalegonad/human_femalegonad_2020_microwell_han_001.py | 2 +- .../femalegonad/human_femalegonad_2020_microwell_han_002.py | 2 +- .../gallbladder/human_gallbladder_2020_microwell_han_001.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_001.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_002.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_003.py | 2 +- sfaira/data/human/heart/human_heart_2020_microwell_han_004.py | 2 +- sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py | 2 +- sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py | 2 +- .../data/human/jejunum/human_jejunum_2020_microwell_han_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py | 2 +- sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_001.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_002.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_003.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_004.py | 2 +- sfaira/data/human/liver/human_liver_2020_microwell_han_005.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_001.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_002.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_003.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_004.py | 2 +- sfaira/data/human/lung/human_lung_2020_microwell_han_005.py | 2 +- .../human/malegonad/human_malegonad_2020_microwell_han_001.py | 2 +- .../human/malegonad/human_malegonad_2020_microwell_han_002.py | 2 +- sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py | 2 +- sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_001.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_002.py | 2 +- .../data/human/omentum/human_omentum_2020_microwell_han_003.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_001.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_002.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_003.py | 2 +- .../human/pancreas/human_pancreas_2020_microwell_han_004.py | 2 +- .../human/placenta/human_placenta_2018_10x_ventotormo_001.py | 2 +- .../placenta/human_placenta_2018_smartseq2_ventotormo_001.py | 2 +- .../human/placenta/human_placenta_2020_microwell_han_001.py | 2 +- sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py | 2 +- .../human/prostate/human_prostate_2020_microwell_han_001.py | 2 +- sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py | 2 +- sfaira/data/human/rib/human_rib_2020_microwell_han_001.py | 2 +- sfaira/data/human/rib/human_rib_2020_microwell_han_002.py | 2 +- sfaira/data/human/skin/human_skin_2020_microwell_han_001.py | 2 +- sfaira/data/human/skin/human_skin_2020_microwell_han_002.py | 2 +- .../human/spinalcord/human_spinalcord_2020_microwell_han_001.py | 2 +- sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py | 2 +- sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_001.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_002.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_003.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_004.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_005.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_006.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_007.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_008.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_009.py | 2 +- .../data/human/stomach/human_stomach_2020_microwell_han_010.py | 2 +- sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py | 2 +- sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py | 2 +- .../data/human/thyroid/human_thyroid_2020_microwell_han_001.py | 2 +- .../data/human/thyroid/human_thyroid_2020_microwell_han_002.py | 2 +- .../data/human/trachea/human_trachea_2020_microwell_han_001.py | 2 +- sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py | 2 +- sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py | 2 +- .../data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py | 2 +- 109 files changed, 109 insertions(+), 109 deletions(-) diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py index e56668222..d72bb0627 100644 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py index ddaba9b20..e9db32eb1 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py index 15d77b3d7..ec1f52bdc 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py index fbb336335..0deb5571f 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py index d32708200..310c19e76 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py index 84ed1cde8..120868718 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py index 7cb38578f..437aab383 100644 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py index be3a67f34..3ea6ffc35 100644 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py index 535cb4ea0..5bb5d35d8 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py index a16cce5fd..21ba1594d 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py index 553e135b5..29312ed94 100644 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py index 39b050986..9745f5970 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py index 023d9467c..ef055d7ba 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py index 8f9fd060b..78c553a83 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py index b8b782ed7..1e80922d4 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py index 470ac2681..0ae2a490e 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py index dfb7748b8..b745cb32a 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py index bb9f21a0a..a96dc2fb5 100644 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py index 801d901bf..eace2ce68 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py index 50025c102..3d7d21c78 100644 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py index c19438a2e..a3da7a14e 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py index ca5284f80..ffa89b995 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py index c952327cc..d06a74c5e 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py index f595e62b1..a5e5d3798 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py index 370158a58..da1d294fc 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py index b1d3936cc..0add7c561 100644 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py @@ -89,5 +89,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py index 641315e86..6c18f11c7 100644 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py index d0840bca6..400d4808f 100644 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py index d63c79975..1c9bfb424 100644 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py index 571b8a40d..c83fd925f 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py index b6ce69993..3cd8d5469 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py index 28df255c4..56d1f309a 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py index 31cac84b1..acdcef798 100644 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py @@ -85,4 +85,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py index 7276b2985..be0e66b06 100644 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py index 1b47ad8cc..e68044b9e 100644 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py index b7dc9fc33..860dabe23 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py @@ -82,4 +82,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py index 0b6cf55db..ab5d04b70 100644 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py @@ -82,4 +82,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py index d2494261a..88f2468c5 100644 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py @@ -81,5 +81,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py index 04b317c65..cd3d107e2 100644 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py index 479c0cc77..78b0e1cc5 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py index 6845a9e8d..45c00bf50 100644 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py index 6483dee51..3d685e0f3 100644 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py index 1be4fda43..325d4e08e 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py index f44e804da..bc8eeb41a 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py index 33388a4b3..01fbee187 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py index 7eefedef5..24b48e6bf 100644 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py index f245679da..4553eedae 100644 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py index 235186829..db65ce9bb 100644 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py @@ -84,5 +84,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py index 142be26aa..48507f945 100644 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py @@ -57,4 +57,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py index 2dd817b38..99a147d75 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py @@ -112,4 +112,4 @@ def _load(self, fn=None): self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py index 4fcaa6872..ffea57f1b 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py index a6f6cfe25..353dae669 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py index 5d766eab9..94c6bf3b8 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py index 7a7c469b0..264ec8857 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py index 3c997f3a2..68079fd52 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py index d538f2567..10699d7a8 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py index 4c6342f9d..91667d873 100644 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py @@ -107,4 +107,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py index a617648e9..ed46c5da4 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py index ba9e76661..ba74db0ba 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py index 258924659..9ecdc5456 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py index 98df6120e..6f8003ab9 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py index a7fd93836..a4909b27f 100644 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py @@ -86,4 +86,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py index 4d83d01b0..99e467b2e 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py index ee208054a..1da535072 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py index 03a9d9783..9e1566a2f 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py index b6b48c97f..af7875309 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py index 088f12d59..8233627f0 100644 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py @@ -108,5 +108,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py index f961a5a0b..045e0ba9c 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py @@ -86,5 +86,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py index 5e0745e4a..3df75da7f 100644 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py @@ -86,5 +86,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py index 95b631139..c8da3462a 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py index 958b96a4d..032d37ce1 100644 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py index bf4e80d49..fa911836d 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py index 2ead8c196..3fb5c9d11 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py index 62112e74e..7cda691f9 100644 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py index a1d603cbd..bcc01b053 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py index 53a907ca7..22ff8326d 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py index 94ac26a0d..7bd2e3004 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py index 3c2ee550f..c218c072c 100644 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py @@ -96,4 +96,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py index 5c47f3567..e5c833dac 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py @@ -105,7 +105,7 @@ def _load(self, fn=None): self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py index 2645c60ac..8de479d29 100644 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py @@ -105,7 +105,7 @@ def _load(self, fn=None): self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") self.adata = self.adata[:, ~self.adata.var.index.isin( ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py index 262758097..a87c4a0a8 100644 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py @@ -89,4 +89,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py index 3c2bc16dc..80e79ecd1 100644 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py index e841798d1..ef194dee4 100644 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py @@ -78,5 +78,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py index 8281d0d45..702e630a7 100644 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py @@ -73,4 +73,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py index 84089275e..c2d3a7c5b 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py index 7b09e8b4f..a909d5890 100644 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py index 77de8ec43..1e17922c3 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py @@ -87,5 +87,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py index ab5b62e29..f05d0e8e8 100644 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py @@ -87,5 +87,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py index 9b69be1fe..afa0de19e 100644 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py index e855953f2..dbcc0eb87 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py @@ -79,4 +79,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py index 9f03b1467..4c44116aa 100644 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py @@ -79,4 +79,4 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py index 1c68613f3..d9100c776 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py index 21367d1b3..b5e9fe7e0 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py index 6e598f4f1..c1a2d80e0 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py index e795169b0..5dc7e5944 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py index c159287ad..9c779b667 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py index 695c0b8cb..90f12d3c8 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py index d46ff6f9e..c583b2bf7 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py index 12177da7e..041004ec7 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py index bb63d705c..935271988 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py index da8d07986..5cc789fa6 100644 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py index 954804147..e50d8bf2d 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py @@ -71,5 +71,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py index 3686909cd..b0a97e9de 100644 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py @@ -71,5 +71,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py index 82a1c1194..65472f513 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py index 20bbbf689..69cc04769 100644 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py index a433a3fe2..25af6f305 100644 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py index 4c1102ff6..4a9af9e70 100644 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py index 1af51d4d3..35a9432e0 100644 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py @@ -58,5 +58,5 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py index 5749d53e3..04c86cb19 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -64,7 +64,7 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col=ADATA_IDS_SFAIRA.gene_id_ensembl) + self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) From ac24ca99c03a929a2bdaba4d0c8963b8ca03926d Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 18:23:38 +0100 Subject: [PATCH 021/161] fixed remaining instance of has_celltypes (#36) --- sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py index 2f925accc..f1a8d3d71 100644 --- a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py +++ b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py @@ -62,7 +62,7 @@ def _load(self, fn=None): self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.has_celltypes + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ From 135685639aef1f06e41007e3524d3b0e66edcb4a Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 10 Dec 2020 18:28:12 +0100 Subject: [PATCH 022/161] Has celltypes bug (#37) * fixed remaining instance of has_celltypes * fixed fields reference --- sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py index ff1f38dcc..616dc5927 100644 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py @@ -147,4 +147,4 @@ def _load(self, fn=None): self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact]\ .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - self._convert_and_set_var_names(symbol_col=ADATA_IDS_SFAIRA.gene_id_names, ensembl_col='Accession') + self._convert_and_set_var_names(symbol_col="names", ensembl_col='Accession') From 582e6c83ef9246ee01dcd6e9deedbe45f6c5d13f Mon Sep 17 00:00:00 2001 From: Abdul Moeed Date: Wed, 30 Dec 2020 14:47:22 +0100 Subject: [PATCH 023/161] Data generator for model evaluation and prediction. (#46) * Refactor __get_dataset() to always return data generator. Modify functions to handle generator. * Remove unnecessary sparse tensor conversion. --- sfaira/estimators/keras.py | 141 ++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 82 deletions(-) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 960a091b8..759eb3d85 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -522,8 +522,11 @@ def init_model( ) @staticmethod - def _get_output_dim(n_features, model_type): - if model_type == "vae": + def _get_output_dim(n_features, model_type, mode='train'): + if mode == 'predict': # Output shape is same for predict mode regardless of model type + output_types = (tf.float32, tf.float32) + output_shapes = (n_features, ()) + elif model_type == "vae": output_types = ((tf.float32, tf.float32), (tf.float32, tf.float32)) output_shapes = ((n_features, ()), (n_features, ())) else: @@ -556,75 +559,43 @@ def _get_dataset( if idx is None: idx = np.arange(0, self.data.n_obs) - if mode == 'train' or mode == 'train_val': + if mode in ['train', 'train_val', 'eval', 'predict']: # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: - n_features = self.data.X.shape[1] - output_types, output_shapes = self._get_output_dim(n_features, model_type) - - if model_type == "vae": - def generator(): - sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) - for i in idx: - # (_,_), (_,sf) is dummy for kl loss - x = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() - sf = self._prepare_sf(x=x)[0] - yield (x, sf), (x, sf) - else: - def generator(): - sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) - for i in idx: - x = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() - sf = self._prepare_sf(x=x)[0] - yield (x, sf), x - else: - x = self._prepare_data_matrix(idx=idx) - sf = self._prepare_sf(x=x) - n_features = x.shape[1] - output_types, output_shapes = self._get_output_dim(n_features, model_type) - - if model_type == "vae": - def generator(): - for i in range(x.shape[0]): - # (_,_), (_,sf) is dummy for kl loss - yield (x[i, :].toarray().flatten(), sf[i]), (x[i, :].toarray().flatten(), sf[i]) - else: - def generator(): - for i in range(x.shape[0]): - yield (x[i, :].toarray().flatten(), sf[i]), x[i, :].toarray().flatten() + x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + + def generator(): + is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) + indices = idx if self.data.isbacked else range(x.shape[0]) + for i in indices: + x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() + sf = self._prepare_sf(x=x_sample)[0] + if mode == 'predict': # If predicting, only return X regardless of model type + yield x_sample, sf + elif model_type == "vae": + yield (x_sample, sf), (x_sample, sf) + else: + yield (x_sample, sf), x_sample + + n_features = x.shape[1] + n_samples = x.shape[0] + output_types, output_shapes = self._get_output_dim(n_features, model_type, mode=mode) dataset = tf.data.Dataset.from_generator( generator=generator, output_types=output_types, output_shapes=output_shapes ) - if mode == 'train': + # Only shuffle in train modes + if mode in ['train', 'train_val']: dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(self.data.X.shape[0], shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True - ).batch(batch_size).prefetch(prefetch) + dataset = dataset.shuffle( + buffer_size=min(n_samples, shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True) + dataset = dataset.batch(batch_size).prefetch(prefetch) return dataset - elif mode == 'eval' or mode == 'predict': - # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: - # Need to supply sorted indices to backed anndata: - x = self.data.X[np.sort(idx), :] - # Sort back in original order of indices. - x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] - else: - x = self._prepare_data_matrix(idx=idx) - x = x.toarray() - - sf = self._prepare_sf(x=x) - if self.model_type[:3] == "vae": - return (x, sf), (x, sf) - else: - return (x, sf), x - elif mode == 'gradient_method': # Prepare data reading according to whether anndata is backed or not: if self.data.isbacked: @@ -644,9 +615,9 @@ def generator(): sf = self._prepare_sf(x=x) cell_to_class = self._get_class_dict() y = self.data.obs['cell_ontology_class'][idx] # for gradients per celltype in compute_gradients_input() - n_features = x.shape[1] + n_features = x.shape[1] output_types, output_shapes = self._get_output_dim(n_features, 'vae') - + def generator(): for i in range(x.shape[0]): yield (x[i, :].toarray().flatten(), sf[i]), (x[i, :].toarray().flatten(), cell_to_class[y[i]]) @@ -705,27 +676,31 @@ def _metrics(self): return {"neg_ll": [custom_mse, custom_negll]} - def evaluate_any(self, idx): + def evaluate_any(self, idx, batch_size=64, max_steps=20): """ Evaluate the custom model on any local data. :param idx: Indices of observations to evaluate on. Evaluates on all observations if None. + :param batch_size: Batch size for evaluation. + :param max_steps: Maximum steps before evaluation round is considered complete. :return: Dictionary of metric names and values. """ - if idx is None or idx.any(): # true if the array is not empty or if the passed value is None - x, y = self._get_dataset( + if idx is None or idx.any(): # true if the array is not empty or if the passed value is None + idx = np.arange(0, self.data.n_obs) if idx is None else idx + dataset = self._get_dataset( idx=idx, - batch_size=None, + batch_size=batch_size, mode='eval' ) + steps = min(max(len(idx) // batch_size, 1), max_steps) results = self.model.training_model.evaluate( - x=x, y=y + x=dataset, steps=steps ) return dict(zip(self.model.training_model.metrics_names, results)) else: return {} - def evaluate(self): + def evaluate(self, batch_size=64, max_steps=20): """ Evaluate the custom model on local data. @@ -733,14 +708,16 @@ def evaluate(self): :return: Dictionary of metric names and values. """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y = self._get_dataset( - idx=self.idx_test, - batch_size=None, + if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None + idx = np.arange(0, self.data.n_obs) if self.idx_test is None else self.idx_test + dataset = self._get_dataset( + idx=idx, + batch_size=batch_size, mode='eval' ) + steps = min(max(len(idx) // batch_size, 1), max_steps) results = self.model.training_model.evaluate( - x=x, y=y + x=dataset, steps=steps ) return dict(zip(self.model.training_model.metrics_names, results)) else: @@ -753,10 +730,10 @@ def predict(self): :return: prediction """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y = self._get_dataset( + if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None + x = self._get_dataset( idx=self.idx_test, - batch_size=None, + batch_size=64, mode='predict' ) return self.model.predict_reconstructed( @@ -772,10 +749,10 @@ def predict_embedding(self): :return: latent space """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y = self._get_dataset( + if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None + x = self._get_dataset( idx=self.idx_test, - batch_size=None, + batch_size=64, mode='predict' ) return self.model.predict_embedding( @@ -792,10 +769,10 @@ def predict_embedding_variational(self): :return: sample of latent space, mean of latent space, variance of latent space """ - if self.idx_test is None or self.idx_test: # true if the array is not empty or if the passed value is None - x, y = self._get_dataset( + if self.idx_test is None or self.idx_test: # true if the array is not empty or if the passed value is None + x = self._get_dataset( idx=self.idx_test, - batch_size=None, + batch_size=64, mode='predict' ) return self.model.predict_embedding( From 91b8a1b91fd7979e6f779b967023d5a4306c9b7c Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 19 Jan 2021 09:46:16 +0100 Subject: [PATCH 024/161] Adjust data loaders to directory-wise set-up (#38) * improved and modified meta data usage * fixed download website to be able to contain multiple entries in meta data saving * added support for variable uns and obs setting of data meta data that can be cell specific * added support to check for equiality of column entries to condition string to define healthy into super method * moved _mapped_features into constants * introduced cell ontology obs keys into base class * updated cellxgene data loader to new constant field standards * coupled .annotated to obs keys that indicate presence of cell types * added dev stage and age meta data * refactored UNS_STRING_META_IN_OBS as constant * updated cellxgene data loader to new data loading format * updated data loaders to new formate * updated interactive data set * grouped data loaders by study * made all data loaders raw data loaders * added new auto chaching * added download method into base class * removed separate caching from data loaders * use "organsism" instead of "species" across sfaira and optimise imports * fixed example code block * exluded data loader tests from git path * took out external.py files from data loaders to make directories leaner * added directory oriented, automated data set groups * added sfaira wide super groups and super groups nesting * enabled parallelised loading * refactored unknown cell type identification * added xlrd dependency * added github workflows * homogenized string style to "" * enabled rapid raw loading of groups saved in one object * remove superflous raw loading docstrings Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> Co-authored-by: Lukas Heumos --- .bandit.yml | 7 + .github/ISSUE_TEMPLATE/bug_report.md | 28 + .github/ISSUE_TEMPLATE/feature_request.md | 17 + .github/ISSUE_TEMPLATE/general_question.md | 13 + .github/dependabot.yml | 25 + .github/pull_request_template.md | 19 + .github/workflows/build_package.yml | 42 + .../pr_to_master_from_patch_release_only.yml | 34 + .github/workflows/publish_docs.yml | 42 + .github/workflows/publish_package.yml | 31 + .github/workflows/run_bandit.yml | 30 + .github/workflows/run_flake8_linting.yml | 31 + .gitignore | 138 +- .readthedocs.yml | 13 +- README.rst | 19 +- docs/Makefile | 20 + docs/api/sfaira.data.DatasetBase.rst | 75 + docs/api/sfaira.data.DatasetInteractive.rst | 75 + docs/api/sfaira.data.DatasetSuperGroup.rst | 35 + docs/api/sfaira.data.human.rst | 23 + docs/api/sfaira.data.mouse.rst | 23 + ...ira.genomes.ExtractFeatureListEnsemble.rst | 26 + docs/api/sfaira.models.celltype.rst | 23 + docs/api/sfaira.models.embedding.rst | 23 + docs/api/sfaira.train.GridsearchContainer.rst | 37 + ...aira.train.SummarizeGridsearchCelltype.rst | 43 + ...ira.train.SummarizeGridsearchEmbedding.rst | 47 + docs/api/sfaira.train.TargetZoos.rst | 24 + docs/api/sfaira.train.TrainModelCelltype.rst | 36 + docs/api/sfaira.train.TrainModelEmbedding.rst | 36 + docs/api/sfaira.ui.UserInterface.rst | 38 + docs/changelog.rst | 21 + docs/data.rst | 238 ++- docs/index.rst | 4 +- docs/release-latest.rst | 6 - docs/release-notes.rst | 11 - docs/requirements.txt | 7 + requirements.txt | 4 +- setup.cfg | 15 + setup.py | 38 +- sfaira/__init__.py | 13 +- sfaira/consts/__init__.py | 2 +- sfaira/consts/adata_fields.py | 72 +- sfaira/consts/meta_data_files.py | 30 +- sfaira/data/__init__.py | 8 +- sfaira/data/base.py | 1419 ++++++++++++++--- sfaira/data/databases/__init__.py | 1 - sfaira/data/databases/cellxgene/__init__.py | 2 - .../databases/cellxgene/cellxgene_group.py | 30 - .../databases/cellxgene/cellxgene_loader.py | 71 - sfaira/data/databases/cellxgene/external.py | 3 - sfaira/data/dataloaders/__init__.py | 4 + .../dataloaders/anatomical_groups/__init__.py | 2 + .../anatomical_groups/human/__init__.py | 44 + .../anatomical_groups/human/external.py | 2 + .../anatomical_groups/human/human_adipose.py | 26 + .../human/human_adrenalgland.py | 36 + .../anatomical_groups/human/human_artery.py | 26 + .../anatomical_groups/human/human_bladder.py | 30 + .../anatomical_groups/human/human_blood.py | 42 + .../anatomical_groups/human/human_bone.py | 30 + .../anatomical_groups/human/human_brain.py | 38 + .../anatomical_groups/human/human_calvaria.py | 26 + .../anatomical_groups/human/human_cervix.py | 26 + .../human/human_chorionicvillus.py | 26 + .../anatomical_groups/human/human_colon.py | 40 + .../anatomical_groups/human/human_duodenum.py | 26 + .../human/human_epityphlon.py | 26 + .../human/human_esophagus.py | 30 + .../anatomical_groups/human/human_eye.py | 32 + .../human/human_fallopiantube.py | 26 + .../human/human_femalegonad.py | 28 + .../human/human_gallbladder.py | 26 + .../anatomical_groups/human/human_heart.py | 32 + .../anatomical_groups/human/human_hesc.py | 26 + .../anatomical_groups/human/human_ileum.py | 30 + .../anatomical_groups/human/human_jejunum.py | 26 + .../anatomical_groups/human/human_kidney.py | 44 + .../anatomical_groups/human/human_liver.py | 42 + .../anatomical_groups/human/human_lung.py | 54 + .../human/human_malegonad.py | 30 + .../anatomical_groups/human/human_mixed.py | 26 + .../anatomical_groups/human/human_muscle.py | 28 + .../anatomical_groups/human/human_omentum.py | 30 + .../anatomical_groups/human/human_pancreas.py | 38 + .../anatomical_groups/human/human_placenta.py | 30 + .../anatomical_groups/human/human_pleura.py | 26 + .../anatomical_groups/human/human_prostate.py | 28 + .../anatomical_groups/human/human_rectum.py | 28 + .../anatomical_groups/human/human_rib.py | 28 + .../anatomical_groups/human/human_skin.py | 28 + .../human/human_spinalcord.py | 26 + .../anatomical_groups/human/human_spleen.py | 30 + .../anatomical_groups/human/human_stomach.py | 44 + .../anatomical_groups/human/human_thymus.py | 30 + .../anatomical_groups/human/human_thyroid.py | 28 + .../anatomical_groups/human/human_trachea.py | 26 + .../anatomical_groups/human/human_ureter.py | 26 + .../anatomical_groups/human/human_uterus.py | 26 + .../anatomical_groups/mouse/__init__.py | 27 + .../anatomical_groups/mouse/external.py | 2 + .../anatomical_groups/mouse/mouse_adipose.py | 35 + .../anatomical_groups/mouse/mouse_bladder.py | 31 + .../anatomical_groups/mouse/mouse_blood.py | 35 + .../anatomical_groups/mouse/mouse_bone.py | 31 + .../anatomical_groups/mouse/mouse_brain.py | 33 + .../anatomical_groups/mouse/mouse_colon.py | 29 + .../mouse/mouse_diaphragm.py | 27 + .../mouse/mouse_femalegonad.py | 29 + .../anatomical_groups/mouse/mouse_heart.py | 32 + .../anatomical_groups/mouse/mouse_ileum.py | 31 + .../anatomical_groups/mouse/mouse_kidney.py | 33 + .../anatomical_groups/mouse/mouse_liver.py | 33 + .../anatomical_groups/mouse/mouse_lung.py | 35 + .../mouse/mouse_malegonad.py | 29 + .../mouse/mouse_mammarygland.py | 37 + .../anatomical_groups/mouse/mouse_muscle.py | 31 + .../anatomical_groups/mouse/mouse_pancreas.py | 47 + .../anatomical_groups/mouse/mouse_placenta.py | 29 + .../anatomical_groups/mouse/mouse_prostate.py | 29 + .../anatomical_groups/mouse/mouse_rib.py | 31 + .../anatomical_groups/mouse/mouse_skin.py | 29 + .../anatomical_groups/mouse/mouse_spleen.py | 31 + .../anatomical_groups/mouse/mouse_stomach.py | 27 + .../anatomical_groups/mouse/mouse_thymus.py | 31 + .../anatomical_groups/mouse/mouse_tongue.py | 29 + .../anatomical_groups/mouse/mouse_trachea.py | 29 + .../anatomical_groups/mouse/mouse_uterus.py | 29 + sfaira/data/dataloaders/databases/__init__.py | 1 + .../databases/cellxgene/__init__.py | 2 + .../databases/cellxgene/cellxgene_group.py | 26 + .../databases/cellxgene/cellxgene_loader.py | 70 + .../databases/cellxgene/external.py | 3 + .../data/dataloaders/databases/super_group.py | 22 + sfaira/data/dataloaders/loaders/__init__.py | 1 + .../d10_1016_j_cell_2017_09_004/__init__.py | 1 + .../human_pancreas_2017_smartseq2_enge_001.py | 87 + .../d10_1016_j_cell_2018_02_001/__init__.py | 1 + .../d10_1016_j_cell_2018_02_001/base.py | 43 + .../mouse_bladder_2018_microwell_han_001.py | 48 + .../mouse_blood_2018_microwell_han_001.py | 53 + .../mouse_blood_2018_microwell_han_002.py | 53 + .../mouse_blood_2018_microwell_han_003.py | 53 + .../mouse_blood_2018_microwell_han_004.py | 53 + .../mouse_blood_2018_microwell_han_005.py | 53 + .../mouse_bone_2018_microwell_001.py | 50 + .../mouse_brain_2018_microwell_han_001.py | 46 + .../mouse_brain_2018_microwell_han_002.py | 46 + ...ouse_femalegonad_2018_microwell_han_001.py | 46 + ...ouse_femalegonad_2018_microwell_han_002.py | 46 + .../mouse_ileum_2018_microwell_han_001.py | 55 + .../mouse_ileum_2018_microwell_han_002.py | 55 + .../mouse_ileum_2018_microwell_han_003.py | 55 + .../mouse_kidney_2018_microwell_han_001.py | 35 + .../mouse_kidney_2018_microwell_han_002.py | 66 + .../mouse_liver_2018_microwell_han_001.py | 53 + .../mouse_liver_2018_microwell_han_002.py | 47 + .../mouse_lung_2018_microwell_han_001.py | 64 + .../mouse_lung_2018_microwell_han_002.py | 64 + .../mouse_lung_2018_microwell_han_003.py | 64 + .../mouse_malegonad_2018_microwell_han_001.py | 51 + .../mouse_malegonad_2018_microwell_han_002.py | 51 + ...use_mammarygland_2018_microwell_han_001.py | 50 + ...use_mammarygland_2018_microwell_han_002.py | 50 + ...use_mammarygland_2018_microwell_han_003.py | 50 + ...use_mammarygland_2018_microwell_han_004.py | 50 + .../mouse_muscle_2018_microwell_han_001.py | 49 + .../mouse_pancreas_2018_microwell_han_001.py | 54 + .../mouse_placenta_2018_microwell_han_001.py | 60 + .../mouse_placenta_2018_microwell_han_002.py | 60 + .../mouse_prostate_2018_microwell_han_001.py | 38 + .../mouse_prostate_2018_microwell_han_002.py | 38 + .../mouse_rib_2018_microwell_han_001.py | 57 + .../mouse_rib_2018_microwell_han_002.py | 57 + .../mouse_rib_2018_microwell_han_003.py | 57 + .../mouse_spleen_2018_microwell_han_001.py | 43 + .../mouse_stomach_2018_microwell_han_001.py | 49 + .../mouse_thymus_2018_microwell_han_001.py | 40 + .../mouse_uterus_2018_microwell_han_001.py | 51 + .../mouse_uterus_2018_microwell_han_002.py | 51 + .../d10_1016_j_cell_2018_08_067/__init__.py | 1 + .../human_colon_2019_10x_kinchen_001.py | 89 ++ .../d10_1016_j_cell_2019_06_029/__init__.py | 1 + .../human_colon_2019_10x_smilie_001.py | 88 + .../d10_1016_j_cell_2019_08_008/__init__.py | 1 + .../human_ileum_2019_10x_martin_001.py | 70 + .../d10_1016_j_celrep_2018_11_086/__init__.py | 1 + .../human_prostate_2018_10x_henry_001.py | 58 + .../d10_1016_j_cels_2016_08_011/__init__.py | 1 + .../human_pancreas_2016_indrop_baron_001.py | 64 + .../d10_1016_j_cmet_2016_08_020/__init__.py | 1 + ...pancreas_2016_smartseq2_segerstolpe_001.py | 72 + .../d10_1016_j_cmet_2019_01_021/__init__.py | 1 + .../d10_1016_j_cmet_2019_01_021/base.py | 59 + .../mouse_pancreas_2019_10x_thompson_001.py | 26 + .../mouse_pancreas_2019_10x_thompson_002.py | 26 + .../mouse_pancreas_2019_10x_thompson_003.py | 26 + .../mouse_pancreas_2019_10x_thompson_004.py | 26 + .../mouse_pancreas_2019_10x_thompson_005.py | 26 + .../mouse_pancreas_2019_10x_thompson_006.py | 26 + .../mouse_pancreas_2019_10x_thompson_007.py | 26 + .../mouse_pancreas_2019_10x_thompson_008.py | 26 + .../d10_1016_j_devcel_2020_01_033/__init__.py | 1 + .../human_lung_2020_10x_miller_001.py | 77 + .../loaders/d10_1038_nmeth_4407/__init__.py | 1 + .../human_brain_2017_DroNcSeq_habib_001.py | 64 + .../d10_1038_s41422_018_0099_2/__init__.py | 1 + .../human_malegonad_2018_10x_guo_001.py | 61 + .../d10_1038_s41467_018_06318_7/__init__.py | 1 + .../human_liver_2018_10x_macparland_001.py | 71 + .../d10_1038_s41467_019_10861_2/__init__.py | 1 + .../human_kidney_2019_10xSn_lake_001.py | 80 + .../d10_1038_s41467_019_12464_3/__init__.py | 1 + .../human_mixed_2019_10x_szabo_001.py | 117 ++ .../d10_1038_s41467_019_12780_8/__init__.py | 1 + .../human_eye_2019_10x_menon_001.py | 54 + .../d10_1038_s41586_018_0698_6/__init__.py | 1 + .../human_placenta_2018_10x_ventotormo_001.py | 92 ++ ..._placenta_2018_smartseq2_ventotormo_001.py | 92 ++ .../d10_1038_s41586_019_1373_2/__init__.py | 1 + .../human_liver_2019_mCELSeq2_aizarani_001.py | 91 ++ .../d10_1038_s41586_019_1631_3/__init__.py | 1 + .../human_liver_2019_10x_ramachandran_001.py | 83 + .../d10_1038_s41586_019_1652_y/__init__.py | 1 + .../human_liver_2019_10x_popescu_001.py | 72 + .../d10_1038_s41586_020_2157_4/__init__.py | 1 + .../d10_1038_s41586_020_2157_4/base.py | 156 ++ .../human_adipose_2020_microwell_han_001.py | 22 + ...man_adrenalgland_2020_microwell_han_001.py | 22 + ...man_adrenalgland_2020_microwell_han_002.py | 22 + ...man_adrenalgland_2020_microwell_han_003.py | 22 + ...man_adrenalgland_2020_microwell_han_004.py | 22 + ...man_adrenalgland_2020_microwell_han_005.py | 22 + ...man_adrenalgland_2020_microwell_han_006.py | 22 + .../human_artery_2020_microwell_han_001.py | 22 + .../human_bladder_2020_microwell_han_001.py | 22 + .../human_bladder_2020_microwell_han_002.py | 22 + .../human_bladder_2020_microwell_han_003.py | 22 + .../human_blood_2020_microwell_han_001.py | 22 + .../human_blood_2020_microwell_han_002.py | 22 + .../human_blood_2020_microwell_han_003.py | 22 + .../human_blood_2020_microwell_han_004.py | 22 + .../human_blood_2020_microwell_han_005.py | 22 + .../human_blood_2020_microwell_han_006.py | 22 + .../human_blood_2020_microwell_han_007.py | 22 + .../human_bone_2020_microwell_han_001.py | 22 + .../human_bone_2020_microwell_han_002.py | 22 + .../human_brain_2020_microwell_han_001.py | 54 + .../human_brain_2020_microwell_han_002.py | 54 + .../human_brain_2020_microwell_han_003.py | 54 + .../human_brain_2020_microwell_han_004.py | 54 + .../human_brain_2020_microwell_han_005.py | 54 + .../human_brain_2020_microwell_han_006.py | 54 + .../human_calvaria_2020_microwell_han_001.py | 22 + .../human_cervix_2020_microwell_han_001.py | 22 + ..._chorionicvillus_2020_microwell_han_001.py | 22 + .../human_colon_2020_microwell_han_001.py | 50 + .../human_colon_2020_microwell_han_002.py | 50 + .../human_colon_2020_microwell_han_003.py | 50 + .../human_colon_2020_microwell_han_004.py | 50 + .../human_duodenum_2020_microwell_han_001.py | 22 + ...human_epityphlon_2020_microwell_han_001.py | 22 + .../human_esophagus_2020_microwell_han_001.py | 47 + .../human_esophagus_2020_microwell_han_002.py | 47 + .../human_eye_2020_microwell_han_001.py | 46 + ...an_fallopiantube_2020_microwell_han_001.py | 22 + ...uman_femalegonad_2020_microwell_han_001.py | 22 + ...uman_femalegonad_2020_microwell_han_002.py | 22 + ...uman_gallbladder_2020_microwell_han_001.py | 22 + .../human_heart_2020_microwell_han_001.py | 22 + .../human_heart_2020_microwell_han_002.py | 22 + .../human_heart_2020_microwell_han_003.py | 22 + .../human_heart_2020_microwell_han_004.py | 22 + .../human_hesc_2020_microwell_han_001.py | 19 + .../human_ileum_2020_microwell_han_001.py | 49 + .../human_jejunum_2020_microwell_han_001.py | 22 + .../human_kidney_2020_microwell_han_001.py | 72 + .../human_kidney_2020_microwell_han_002.py | 72 + .../human_kidney_2020_microwell_han_003.py | 72 + .../human_kidney_2020_microwell_han_004.py | 72 + .../human_kidney_2020_microwell_han_005.py | 72 + .../human_kidney_2020_microwell_han_006.py | 72 + .../human_kidney_2020_microwell_han_007.py | 72 + .../human_liver_2020_microwell_han_001.py | 51 + .../human_liver_2020_microwell_han_002.py | 51 + .../human_liver_2020_microwell_han_003.py | 51 + .../human_liver_2020_microwell_han_004.py | 51 + .../human_liver_2020_microwell_han_005.py | 51 + .../human_lung_2020_microwell_han_001.py | 73 + .../human_lung_2020_microwell_han_002.py | 73 + .../human_lung_2020_microwell_han_003.py | 73 + .../human_lung_2020_microwell_han_004.py | 73 + .../human_lung_2020_microwell_han_005.py | 73 + .../human_malegonad_2020_microwell_han_001.py | 51 + .../human_malegonad_2020_microwell_han_002.py | 51 + .../human_muscle_2020_microwell_han_001.py | 22 + .../human_muscle_2020_microwell_han_002.py | 22 + .../human_omentum_2020_microwell_han_001.py | 22 + .../human_omentum_2020_microwell_han_002.py | 22 + .../human_omentum_2020_microwell_han_003.py | 22 + .../human_pancreas_2020_microwell_han_001.py | 61 + .../human_pancreas_2020_microwell_han_002.py | 61 + .../human_pancreas_2020_microwell_han_003.py | 61 + .../human_pancreas_2020_microwell_han_004.py | 61 + .../human_placenta_2020_microwell_han_001.py | 54 + .../human_pleura_2020_microwell_han_001.py | 22 + .../human_prostate_2020_microwell_han_001.py | 43 + .../human_rectum_2020_microwell_han_001.py | 38 + .../human_rib_2020_microwell_han_001.py | 22 + .../human_rib_2020_microwell_han_002.py | 22 + .../human_skin_2020_microwell_han_001.py | 52 + .../human_skin_2020_microwell_han_002.py | 52 + ...human_spinalcord_2020_microwell_han_001.py | 22 + .../human_spleen_2020_microwell_han_001.py | 44 + .../human_spleen_2020_microwell_han_002.py | 44 + .../human_stomach_2020_microwell_han_001.py | 22 + .../human_stomach_2020_microwell_han_002.py | 22 + .../human_stomach_2020_microwell_han_003.py | 22 + .../human_stomach_2020_microwell_han_004.py | 22 + .../human_stomach_2020_microwell_han_005.py | 22 + .../human_stomach_2020_microwell_han_006.py | 22 + .../human_stomach_2020_microwell_han_007.py | 22 + .../human_stomach_2020_microwell_han_008.py | 22 + .../human_stomach_2020_microwell_han_009.py | 22 + .../human_stomach_2020_microwell_han_010.py | 22 + .../human_thymus_2020_microwell_han_001.py | 36 + .../human_thymus_2020_microwell_han_002.py | 36 + .../human_thyroid_2020_microwell_han_001.py | 22 + .../human_thyroid_2020_microwell_han_002.py | 22 + .../human_trachea_2020_microwell_han_001.py | 22 + .../human_ureter_2020_microwell_han_001.py | 22 + .../human_uterus_2020_microwell_han_001.py | 22 + .../d10_1038_s41586_020_2922_4/__init__.py | 1 + .../human_lung_2020_10x_travaglini_001.py | 196 +++ ...uman_lung_2020_smartseq2_travaglini_002.py | 162 ++ .../d10_1038_s41590_020_0602_z/__init__.py | 1 + .../human_colon_2020_10x_james_001.py | 76 + .../d10_1038_s41591_019_0468_5/__init__.py | 1 + .../human_lung_2019_10x_braga_001.py | 67 + .../human_lung_2019_10x_braga_002.py | 67 + .../human_lung_2019_dropseq_braga_003.py | 65 + .../d10_1038_s41593_019_0393_4/__init__.py | 1 + ...mouse_brain_2019_mouse_brain_atlas_temp.py | 49 +- .../d10_1038_s41597_019_0351_8/__init__.py | 1 + .../human_kidney_2020_10x_liao_001.py | 65 + .../d10_1073_pnas_1914143116/__init__.py | 1 + .../human_eye_2019_10x_voigt_001.py | 58 + .../loaders/d10_1084_jem_20191130/__init__.py | 1 + .../human_colon_2019_10x_wang_001.py | 57 + .../human_ileum_2019_10x_wang_001.py | 57 + .../human_rectum_2019_10x_wang_001.py | 56 + .../d10_1101_2020_03_13_991455/__init__.py | 1 + .../human_lung_2020_10x_lukassen_001.py | 61 + .../human_lung_2020_10x_lukassen_002.py | 66 + .../loaders/d10_1101_661728/__init__.py | 1 + .../loaders/d10_1101_661728/base.py | 56 + .../mouse_adipose_2019_10x_pisco_001.py | 35 + .../mouse_adipose_2019_smartseq2_pisco_001.py | 35 + .../mouse_adipose_2019_smartseq2_pisco_002.py | 35 + .../mouse_adipose_2019_smartseq2_pisco_003.py | 35 + .../mouse_adipose_2019_smartseq2_pisco_004.py | 35 + .../mouse_bladder_2019_10x_pisco_001.py | 35 + .../mouse_bladder_2019_smartseq2_pisco_001.py | 35 + .../mouse_bone_2019_10x_pisco_001.py | 36 + .../mouse_bone_2019_smartseq2_pisco_001.py | 36 + .../mouse_brain_2019_smartseq2_pisco_001.py | 37 + .../mouse_brain_2019_smartseq2_pisco_002.py | 37 + .../mouse_colon_2019_10x_pisco_001.py | 34 + .../mouse_colon_2019_smartseq2_pisco_001.py | 35 + ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 35 + .../mouse_heart_2019_10x_pisco_001.py | 35 + .../mouse_heart_2019_smartseq2_pisco_001.py | 35 + .../mouse_heart_2019_smartseq2_pisco_002.py | 35 + .../mouse_kidney_2019_10x_pisco_001.py | 39 + .../mouse_kidney_2019_smartseq2_pisco_001.py | 39 + .../mouse_liver_2019_10x_pisco_001.py | 35 + .../mouse_liver_2019_smartseq2_pisco_001.py | 35 + .../mouse_lung_2019_10x_pisco_001.py | 36 + .../mouse_lung_2019_smartseq2_pisco_001.py | 36 + .../mouse_mammarygland_2019_10x_pisco_001.py | 35 + ...e_mammarygland_2019_smartseq2_pisco_001.py | 35 + .../mouse_muscle_2019_10x_pisco_001.py | 35 + .../mouse_muscle_2019_smartseq2_pisco_001.py | 35 + .../mouse_pancreas_2019_10x_pisco_001.py | 36 + ...mouse_pancreas_2019_smartseq2_pisco_001.py | 36 + .../mouse_skin_2019_10x_pisco_001.py | 33 + .../mouse_skin_2019_smartseq2_pisco_001.py | 34 + .../mouse_spleen_2019_10x_pisco_001.py | 35 + .../mouse_spleen_2019_smartseq2_pisco_001.py | 33 + .../mouse_thymus_2019_10x_pisco_001.py | 35 + .../mouse_thymus_2019_smartseq2_pisco_001.py | 35 + .../mouse_tongue_2019_10x_pisco_001.py | 35 + .../mouse_tongue_2019_smartseq2_pisco_001.py | 35 + .../mouse_trachea_2019_10x_pisco_001.py | 36 + .../mouse_trachea_2019_smartseq2_pisco_001.py | 36 + .../loaders/d10_1101_753806/__init__.py | 1 + .../human_lung_2020_10x_habermann_001.py | 94 ++ .../d10_1126_science_aat5031/__init__.py | 1 + .../human_kidney_2019_10x_stewart_001.py | 128 ++ .../d10_1126_science_aay3224/__init__.py | 1 + .../human_thymus_2020_10x_park_001.py | 91 ++ .../d10_1186_s13059_019_1906_x/__init__.py | 1 + .../human_esophagus_2019_10x_madissoon_001.py | 68 + .../human_lung_2019_10x_madissoon_001.py | 73 + .../human_spleen_2019_10x_madissoon_001.py | 77 + .../d10_15252_embj_2018100811/__init__.py | 1 + .../human_eye_2019_10x_lukowski_001.py | 65 + .../dataloaders/loaders/d_nan/__init__.py | 1 + .../d_nan/human_blood_2018_10x_ica_001.py | 48 + .../human_blood_2019_10x_10xGenomics_001.py | 55 + .../d_nan/human_bone_2018_10x_ica_001.py | 48 + .../data/dataloaders/loaders/super_group.py | 45 + sfaira/data/dataloaders/super_group.py | 35 + ...et_and_preprocess_HumanCellLandscape.ipynb | 411 ----- sfaira/data/human/__init__.py | 44 - sfaira/data/human/adipose/__init__.py | 1 - sfaira/data/human/adipose/external.py | 2 - sfaira/data/human/adipose/human_adipose.py | 26 - .../human_adipose_2020_microwell_han_001.py | 61 - sfaira/data/human/adrenalgland/__init__.py | 1 - sfaira/data/human/adrenalgland/external.py | 2 - .../human/adrenalgland/human_adrenalgland.py | 36 - ...man_adrenalgland_2020_microwell_han_001.py | 61 - ...man_adrenalgland_2020_microwell_han_002.py | 61 - ...man_adrenalgland_2020_microwell_han_003.py | 61 - ...man_adrenalgland_2020_microwell_han_004.py | 61 - ...man_adrenalgland_2020_microwell_han_005.py | 61 - ...man_adrenalgland_2020_microwell_han_006.py | 61 - sfaira/data/human/artery/__init__.py | 1 - sfaira/data/human/artery/external.py | 2 - sfaira/data/human/artery/human_artery.py | 26 - .../human_artery_2020_microwell_han_001.py | 61 - sfaira/data/human/bladder/__init__.py | 1 - sfaira/data/human/bladder/external.py | 2 - sfaira/data/human/bladder/human_bladder.py | 30 - .../human_bladder_2020_microwell_han_001.py | 61 - .../human_bladder_2020_microwell_han_002.py | 61 - .../human_bladder_2020_microwell_han_003.py | 61 - sfaira/data/human/blood/__init__.py | 1 - sfaira/data/human/blood/external.py | 2 - sfaira/data/human/blood/human_blood.py | 42 - .../blood/human_blood_2018_10x_ica_001.py | 81 - .../human_blood_2019_10x_10xGenomics_001.py | 65 - .../human_blood_2020_microwell_han_001.py | 61 - .../human_blood_2020_microwell_han_002.py | 61 - .../human_blood_2020_microwell_han_003.py | 61 - .../human_blood_2020_microwell_han_004.py | 61 - .../human_blood_2020_microwell_han_005.py | 61 - .../human_blood_2020_microwell_han_006.py | 61 - .../human_blood_2020_microwell_han_007.py | 61 - sfaira/data/human/bone/__init__.py | 1 - sfaira/data/human/bone/external.py | 2 - sfaira/data/human/bone/human_bone.py | 30 - .../human/bone/human_bone_2018_10x_ica_001.py | 81 - .../bone/human_bone_2020_microwell_han_001.py | 61 - .../bone/human_bone_2020_microwell_han_002.py | 61 - sfaira/data/human/brain/__init__.py | 1 - sfaira/data/human/brain/external.py | 2 - sfaira/data/human/brain/human_brain.py | 38 - .../human_brain_2017_DroNcSeq_habib_001.py | 82 - .../human_brain_2020_microwell_han_001.py | 93 -- .../human_brain_2020_microwell_han_002.py | 93 -- .../human_brain_2020_microwell_han_003.py | 93 -- .../human_brain_2020_microwell_han_004.py | 93 -- .../human_brain_2020_microwell_han_005.py | 93 -- .../human_brain_2020_microwell_han_006.py | 93 -- sfaira/data/human/calvaria/__init__.py | 1 - sfaira/data/human/calvaria/external.py | 2 - sfaira/data/human/calvaria/human_calvaria.py | 26 - .../human_calvaria_2020_microwell_han_001.py | 61 - sfaira/data/human/cervix/__init__.py | 1 - sfaira/data/human/cervix/external.py | 2 - sfaira/data/human/cervix/human_cervix.py | 26 - .../human_cervix_2020_microwell_han_001.py | 61 - sfaira/data/human/chorionicvillus/__init__.py | 1 - sfaira/data/human/chorionicvillus/external.py | 2 - .../chorionicvillus/human_chorionicvillus.py | 26 - ..._chorionicvillus_2020_microwell_han_001.py | 61 - sfaira/data/human/colon/__init__.py | 1 - sfaira/data/human/colon/external.py | 2 - sfaira/data/human/colon/human_colon.py | 40 - .../colon/human_colon_2019_10x_kinchen_001.py | 150 -- .../colon/human_colon_2019_10x_smilie_001.py | 106 -- .../colon/human_colon_2019_10x_wang_001.py | 74 - .../colon/human_colon_2020_10x_james_001.py | 93 -- .../human_colon_2020_microwell_han_001.py | 88 - .../human_colon_2020_microwell_han_002.py | 88 - .../human_colon_2020_microwell_han_003.py | 88 - .../human_colon_2020_microwell_han_004.py | 88 - sfaira/data/human/duodenum/__init__.py | 1 - sfaira/data/human/duodenum/external.py | 2 - sfaira/data/human/duodenum/human_duodenum.py | 26 - .../human_duodenum_2020_microwell_han_001.py | 61 - sfaira/data/human/epityphlon/__init__.py | 1 - sfaira/data/human/epityphlon/external.py | 2 - .../data/human/epityphlon/human_epityphlon.py | 26 - ...human_epityphlon_2020_microwell_han_001.py | 61 - sfaira/data/human/esophagus/__init__.py | 1 - sfaira/data/human/esophagus/external.py | 2 - .../data/human/esophagus/human_esophagus.py | 30 - .../human_esophagus_2019_10x_madissoon_001.py | 85 - .../human_esophagus_2020_microwell_han_001.py | 85 - .../human_esophagus_2020_microwell_han_002.py | 85 - sfaira/data/human/eye/__init__.py | 1 - sfaira/data/human/eye/external.py | 2 - sfaira/data/human/eye/human_eye.py | 32 - .../eye/human_eye_2019_10x_lukowski_001.py | 81 - .../human/eye/human_eye_2019_10x_menon_001.py | 71 - .../human/eye/human_eye_2019_10x_voigt_001.py | 75 - .../eye/human_eye_2020_microwell_han_001.py | 85 - sfaira/data/human/fallopiantube/__init__.py | 1 - sfaira/data/human/fallopiantube/external.py | 2 - .../fallopiantube/human_fallopiantube.py | 26 - ...an_fallopiantube_2020_microwell_han_001.py | 61 - sfaira/data/human/femalegonad/__init__.py | 1 - sfaira/data/human/femalegonad/external.py | 2 - .../human/femalegonad/human_femalegonad.py | 28 - ...uman_femalegonad_2020_microwell_han_001.py | 61 - ...uman_femalegonad_2020_microwell_han_002.py | 61 - sfaira/data/human/gallbladder/__init__.py | 1 - sfaira/data/human/gallbladder/external.py | 2 - .../human/gallbladder/human_gallbladder.py | 26 - ...uman_gallbladder_2020_microwell_han_001.py | 61 - sfaira/data/human/heart/__init__.py | 1 - sfaira/data/human/heart/external.py | 2 - sfaira/data/human/heart/human_heart.py | 32 - .../human_heart_2020_microwell_han_001.py | 61 - .../human_heart_2020_microwell_han_002.py | 61 - .../human_heart_2020_microwell_han_003.py | 61 - .../human_heart_2020_microwell_han_004.py | 61 - sfaira/data/human/hesc/__init__.py | 1 - sfaira/data/human/hesc/external.py | 2 - sfaira/data/human/hesc/human_hesc.py | 26 - .../hesc/human_hesc_2020_microwell_han_001.py | 61 - sfaira/data/human/ileum/__init__.py | 1 - sfaira/data/human/ileum/external.py | 2 - sfaira/data/human/ileum/human_ileum.py | 30 - .../ileum/human_ileum_2019_10x_martin_001.py | 88 - .../ileum/human_ileum_2019_10x_wang_001.py | 74 - .../human_ileum_2020_microwell_han_001.py | 88 - sfaira/data/human/jejunum/__init__.py | 1 - sfaira/data/human/jejunum/external.py | 2 - sfaira/data/human/jejunum/human_jejunum.py | 26 - .../human_jejunum_2020_microwell_han_001.py | 60 - sfaira/data/human/kidney/__init__.py | 1 - sfaira/data/human/kidney/external.py | 2 - sfaira/data/human/kidney/human_kidney.py | 44 - .../human_kidney_2019_10xSn_lake_001.py | 95 -- .../human_kidney_2019_10x_stewart_001.py | 145 -- .../kidney/human_kidney_2020_10x_liao_001.py | 115 -- .../human_kidney_2020_microwell_han_001.py | 110 -- .../human_kidney_2020_microwell_han_002.py | 110 -- .../human_kidney_2020_microwell_han_003.py | 110 -- .../human_kidney_2020_microwell_han_004.py | 110 -- .../human_kidney_2020_microwell_han_005.py | 110 -- .../human_kidney_2020_microwell_han_006.py | 110 -- .../human_kidney_2020_microwell_han_007.py | 110 -- sfaira/data/human/liver/__init__.py | 1 - sfaira/data/human/liver/external.py | 2 - sfaira/data/human/liver/human_liver.py | 42 - .../human_liver_2018_10x_macparland_001.py | 89 -- .../liver/human_liver_2019_10x_popescu_001.py | 89 -- .../human_liver_2019_10x_ramachandran_001.py | 91 -- .../human_liver_2019_mCELSeq2_aizarani_001.py | 108 -- .../human_liver_2020_microwell_han_001.py | 89 -- .../human_liver_2020_microwell_han_002.py | 89 -- .../human_liver_2020_microwell_han_003.py | 89 -- .../human_liver_2020_microwell_han_004.py | 89 -- .../human_liver_2020_microwell_han_005.py | 89 -- sfaira/data/human/lung/__init__.py | 1 - sfaira/data/human/lung/external.py | 2 - sfaira/data/human/lung/human_lung.py | 53 - .../lung/human_lung_2019_10x_braga_001.py | 84 - .../lung/human_lung_2019_10x_braga_002.py | 84 - .../lung/human_lung_2019_10x_madissoon_001.py | 88 - .../lung/human_lung_2019_dropseq_braga_003.py | 81 - .../lung/human_lung_2020_10x_habermann_001.py | 126 -- .../lung/human_lung_2020_10x_lukassen_001.py | 77 - .../lung/human_lung_2020_10x_lukassen_002.py | 82 - .../lung/human_lung_2020_10x_miller_001.py | 93 -- .../human_lung_2020_10x_travaglini_001.py | 135 -- .../lung/human_lung_2020_microwell_han_001.py | 112 -- .../lung/human_lung_2020_microwell_han_002.py | 112 -- .../lung/human_lung_2020_microwell_han_003.py | 112 -- .../lung/human_lung_2020_microwell_han_004.py | 112 -- .../lung/human_lung_2020_microwell_han_005.py | 112 -- ...uman_lung_2020_smartseq2_travaglini_002.py | 122 -- sfaira/data/human/malegonad/__init__.py | 1 - sfaira/data/human/malegonad/external.py | 2 - .../data/human/malegonad/human_malegonad.py | 30 - .../human_malegonad_2018_10x_guo_001.py | 78 - .../human_malegonad_2020_microwell_han_001.py | 90 -- .../human_malegonad_2020_microwell_han_002.py | 90 -- sfaira/data/human/mixed/__init__.py | 1 - sfaira/data/human/mixed/external.py | 2 - sfaira/data/human/mixed/human_mixed.py | 26 - .../mixed/human_mixed_2019_10x_szabo_001.py | 189 --- sfaira/data/human/muscle/__init__.py | 1 - sfaira/data/human/muscle/external.py | 2 - sfaira/data/human/muscle/human_muscle.py | 28 - .../human_muscle_2020_microwell_han_001.py | 61 - .../human_muscle_2020_microwell_han_002.py | 61 - sfaira/data/human/omentum/__init__.py | 1 - sfaira/data/human/omentum/external.py | 2 - sfaira/data/human/omentum/human_omentum.py | 30 - .../human_omentum_2020_microwell_han_001.py | 61 - .../human_omentum_2020_microwell_han_002.py | 61 - .../human_omentum_2020_microwell_han_003.py | 61 - sfaira/data/human/pancreas/__init__.py | 1 - sfaira/data/human/pancreas/external.py | 2 - sfaira/data/human/pancreas/human_pancreas.py | 38 - .../human_pancreas_2016_indrop_baron_001.py | 80 - ...pancreas_2016_smartseq2_segerstolpe_001.py | 88 - .../human_pancreas_2017_smartseq2_enge_001.py | 147 -- .../human_pancreas_2020_microwell_han_001.py | 99 -- .../human_pancreas_2020_microwell_han_002.py | 99 -- .../human_pancreas_2020_microwell_han_003.py | 99 -- .../human_pancreas_2020_microwell_han_004.py | 99 -- sfaira/data/human/placenta/__init__.py | 1 - sfaira/data/human/placenta/external.py | 2 - sfaira/data/human/placenta/human_placenta.py | 30 - .../human_placenta_2018_10x_ventotormo_001.py | 111 -- ..._placenta_2018_smartseq2_ventotormo_001.py | 111 -- .../human_placenta_2020_microwell_han_001.py | 92 -- sfaira/data/human/pleura/__init__.py | 1 - sfaira/data/human/pleura/external.py | 2 - sfaira/data/human/pleura/human_pleura.py | 26 - .../human_pleura_2020_microwell_han_001.py | 61 - sfaira/data/human/prostate/__init__.py | 1 - sfaira/data/human/prostate/external.py | 2 - sfaira/data/human/prostate/human_prostate.py | 28 - .../human_prostate_2018_10x_henry_001.py | 75 - .../human_prostate_2020_microwell_han_001.py | 82 - sfaira/data/human/rectum/__init__.py | 1 - sfaira/data/human/rectum/external.py | 2 - sfaira/data/human/rectum/human_rectum.py | 28 - .../rectum/human_rectum_2019_10x_wang_001.py | 73 - .../human_rectum_2020_microwell_han_001.py | 76 - sfaira/data/human/rib/__init__.py | 1 - sfaira/data/human/rib/external.py | 2 - sfaira/data/human/rib/human_rib.py | 28 - .../rib/human_rib_2020_microwell_han_001.py | 61 - .../rib/human_rib_2020_microwell_han_002.py | 61 - sfaira/data/human/skin/__init__.py | 1 - sfaira/data/human/skin/external.py | 2 - sfaira/data/human/skin/human_skin.py | 28 - .../skin/human_skin_2020_microwell_han_001.py | 91 -- .../skin/human_skin_2020_microwell_han_002.py | 91 -- sfaira/data/human/spinalcord/__init__.py | 1 - sfaira/data/human/spinalcord/external.py | 2 - .../data/human/spinalcord/human_spinalcord.py | 26 - ...human_spinalcord_2020_microwell_han_001.py | 61 - sfaira/data/human/spleen/__init__.py | 1 - sfaira/data/human/spleen/external.py | 2 - sfaira/data/human/spleen/human_spleen.py | 30 - .../human_spleen_2019_10x_madissoon_001.py | 95 -- .../human_spleen_2020_microwell_han_001.py | 82 - .../human_spleen_2020_microwell_han_002.py | 82 - sfaira/data/human/stomach/__init__.py | 1 - sfaira/data/human/stomach/external.py | 2 - sfaira/data/human/stomach/human_stomach.py | 44 - .../human_stomach_2020_microwell_han_001.py | 61 - .../human_stomach_2020_microwell_han_002.py | 61 - .../human_stomach_2020_microwell_han_003.py | 61 - .../human_stomach_2020_microwell_han_004.py | 61 - .../human_stomach_2020_microwell_han_005.py | 61 - .../human_stomach_2020_microwell_han_006.py | 61 - .../human_stomach_2020_microwell_han_007.py | 61 - .../human_stomach_2020_microwell_han_008.py | 61 - .../human_stomach_2020_microwell_han_009.py | 61 - .../human_stomach_2020_microwell_han_010.py | 61 - sfaira/data/human/thymus/__init__.py | 1 - sfaira/data/human/thymus/external.py | 2 - sfaira/data/human/thymus/human_thymus.py | 30 - .../thymus/human_thymus_2020_10x_park_001.py | 108 -- .../human_thymus_2020_microwell_han_001.py | 75 - .../human_thymus_2020_microwell_han_002.py | 75 - sfaira/data/human/thyroid/__init__.py | 1 - sfaira/data/human/thyroid/external.py | 2 - sfaira/data/human/thyroid/human_thyroid.py | 28 - .../human_thyroid_2020_microwell_han_001.py | 61 - .../human_thyroid_2020_microwell_han_002.py | 61 - sfaira/data/human/trachea/__init__.py | 1 - sfaira/data/human/trachea/external.py | 2 - sfaira/data/human/trachea/human_trachea.py | 26 - .../human_trachea_2020_microwell_han_001.py | 61 - sfaira/data/human/ureter/__init__.py | 1 - sfaira/data/human/ureter/external.py | 2 - sfaira/data/human/ureter/human_ureter.py | 26 - .../human_ureter_2020_microwell_han_001.py | 61 - sfaira/data/human/uterus/__init__.py | 1 - sfaira/data/human/uterus/external.py | 2 - sfaira/data/human/uterus/human_uterus.py | 26 - .../human_uterus_2020_microwell_han_001.py | 62 - sfaira/data/interactive/external.py | 2 - sfaira/data/interactive/loader.py | 75 +- sfaira/data/mouse/__init__.py | 27 - sfaira/data/mouse/adipose/__init__.py | 1 - sfaira/data/mouse/adipose/external.py | 2 - sfaira/data/mouse/adipose/mouse_adipose.py | 34 - .../mouse_adipose_2019_10x_pisco_001.py | 71 - .../mouse_adipose_2019_smartseq2_pisco_001.py | 71 - .../mouse_adipose_2019_smartseq2_pisco_002.py | 69 - .../mouse_adipose_2019_smartseq2_pisco_003.py | 71 - .../mouse_adipose_2019_smartseq2_pisco_004.py | 71 - sfaira/data/mouse/bladder/__init__.py | 1 - sfaira/data/mouse/bladder/external.py | 2 - sfaira/data/mouse/bladder/mouse_bladder.py | 30 - .../mouse_bladder_2018_microwell_han_001.py | 80 - .../mouse_bladder_2019_10x_pisco_001.py | 71 - .../mouse_bladder_2019_smartseq2_pisco_001.py | 68 - sfaira/data/mouse/blood/__init__.py | 1 - sfaira/data/mouse/blood/external.py | 2 - sfaira/data/mouse/blood/mouse_blood.py | 34 - .../mouse_blood_2018_microwell_han_001.py | 85 - .../mouse_blood_2018_microwell_han_002.py | 85 - .../mouse_blood_2018_microwell_han_003.py | 85 - .../mouse_blood_2018_microwell_han_004.py | 85 - .../mouse_blood_2018_microwell_han_005.py | 85 - sfaira/data/mouse/bone/__init__.py | 1 - sfaira/data/mouse/bone/external.py | 2 - sfaira/data/mouse/bone/mouse_bone.py | 30 - .../bone/mouse_bone_2018_microwell_001.py | 80 - .../bone/mouse_bone_2019_10x_pisco_001.py | 72 - .../mouse_bone_2019_smartseq2_pisco_001.py | 72 - sfaira/data/mouse/brain/__init__.py | 1 - sfaira/data/mouse/brain/external.py | 2 - sfaira/data/mouse/brain/mouse_brain.py | 32 - .../mouse_brain_2018_microwell_han_001.py | 78 - .../mouse_brain_2018_microwell_han_002.py | 78 - .../mouse_brain_2019_smartseq2_pisco_001.py | 73 - .../mouse_brain_2019_smartseq2_pisco_002.py | 73 - sfaira/data/mouse/colon/__init__.py | 1 - sfaira/data/mouse/colon/external.py | 2 - sfaira/data/mouse/colon/mouse_colon.py | 28 - .../colon/mouse_colon_2019_10x_pisco_001.py | 72 - .../mouse_colon_2019_smartseq2_pisco_001.py | 73 - sfaira/data/mouse/diaphragm/__init__.py | 1 - sfaira/data/mouse/diaphragm/external.py | 2 - .../data/mouse/diaphragm/mouse_diaphragm.py | 26 - ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 70 - sfaira/data/mouse/femalegonad/__init__.py | 1 - sfaira/data/mouse/femalegonad/external.py | 2 - .../mouse/femalegonad/mouse_femalegonad.py | 28 - ...ouse_femalegonad_2018_microwell_han_001.py | 76 - ...ouse_femalegonad_2018_microwell_han_002.py | 76 - sfaira/data/mouse/heart/__init__.py | 1 - sfaira/data/mouse/heart/external.py | 2 - sfaira/data/mouse/heart/mouse_heart.py | 31 - .../heart/mouse_heart_2019_10x_pisco_001.py | 72 - .../mouse_heart_2019_smartseq2_pisco_001.py | 71 - .../mouse_heart_2019_smartseq2_pisco_002.py | 71 - sfaira/data/mouse/ileum/__init__.py | 1 - sfaira/data/mouse/ileum/external.py | 2 - sfaira/data/mouse/ileum/mouse_ileum.py | 30 - .../mouse_ileum_2018_microwell_han_001.py | 87 - .../mouse_ileum_2018_microwell_han_002.py | 87 - .../mouse_ileum_2018_microwell_han_003.py | 88 - sfaira/data/mouse/kidney/__init__.py | 1 - sfaira/data/mouse/kidney/external.py | 2 - sfaira/data/mouse/kidney/mouse_kidney.py | 32 - .../mouse_kidney_2018_microwell_han_001.py | 64 - .../mouse_kidney_2018_microwell_han_002.py | 95 -- .../kidney/mouse_kidney_2019_10x_pisco_001.py | 75 - .../mouse_kidney_2019_smartseq2_pisco_001.py | 74 - sfaira/data/mouse/liver/__init__.py | 1 - sfaira/data/mouse/liver/external.py | 2 - sfaira/data/mouse/liver/mouse_liver.py | 32 - .../mouse_liver_2018_microwell_han_001.py | 82 - .../mouse_liver_2018_microwell_han_002.py | 76 - .../liver/mouse_liver_2019_10x_pisco_001.py | 71 - .../mouse_liver_2019_smartseq2_pisco_001.py | 71 - sfaira/data/mouse/lung/__init__.py | 1 - sfaira/data/mouse/lung/external.py | 2 - sfaira/data/mouse/lung/mouse_lung.py | 34 - .../lung/mouse_lung_2018_microwell_han_001.py | 97 -- .../lung/mouse_lung_2018_microwell_han_002.py | 97 -- .../lung/mouse_lung_2018_microwell_han_003.py | 97 -- .../lung/mouse_lung_2019_10x_pisco_001.py | 72 - .../mouse_lung_2019_smartseq2_pisco_001.py | 72 - sfaira/data/mouse/malegonad/__init__.py | 1 - sfaira/data/mouse/malegonad/external.py | 2 - .../data/mouse/malegonad/mouse_malegonad.py | 28 - .../mouse_malegonad_2018_microwell_han_001.py | 83 - .../mouse_malegonad_2018_microwell_han_002.py | 83 - sfaira/data/mouse/mammarygland/__init__.py | 1 - sfaira/data/mouse/mammarygland/external.py | 2 - .../mouse/mammarygland/mouse_mammarygland.py | 36 - ...use_mammarygland_2018_microwell_han_001.py | 80 - ...use_mammarygland_2018_microwell_han_002.py | 81 - ...use_mammarygland_2018_microwell_han_003.py | 81 - ...use_mammarygland_2018_microwell_han_004.py | 81 - .../mouse_mammarygland_2019_10x_pisco_001.py | 69 - ...e_mammarygland_2019_smartseq2_pisco_001.py | 70 - sfaira/data/mouse/muscle/__init__.py | 1 - sfaira/data/mouse/muscle/external.py | 2 - sfaira/data/mouse/muscle/mouse_muscle.py | 30 - .../mouse_muscle_2018_microwell_han_001.py | 82 - .../muscle/mouse_muscle_2019_10x_pisco_001.py | 69 - .../mouse_muscle_2019_smartseq2_pisco_001.py | 69 - sfaira/data/mouse/pancreas/__init__.py | 1 - sfaira/data/mouse/pancreas/external.py | 2 - sfaira/data/mouse/pancreas/mouse_pancreas.py | 46 - .../mouse_pancreas_2018_microwell_han_001.py | 86 - .../mouse_pancreas_2019_10x_pisco_001.py | 71 - .../mouse_pancreas_2019_10x_thompson_001.py | 77 - .../mouse_pancreas_2019_10x_thompson_002.py | 77 - .../mouse_pancreas_2019_10x_thompson_003.py | 77 - .../mouse_pancreas_2019_10x_thompson_004.py | 77 - .../mouse_pancreas_2019_10x_thompson_005.py | 77 - .../mouse_pancreas_2019_10x_thompson_006.py | 77 - .../mouse_pancreas_2019_10x_thompson_007.py | 77 - .../mouse_pancreas_2019_10x_thompson_008.py | 77 - ...mouse_pancreas_2019_smartseq2_pisco_001.py | 73 - ...peripheral_blood_2018_microwell_han_005.py | 0 sfaira/data/mouse/placenta/__init__.py | 1 - sfaira/data/mouse/placenta/external.py | 2 - sfaira/data/mouse/placenta/mouse_placenta.py | 28 - .../mouse_placenta_2018_microwell_han_001.py | 92 -- .../mouse_placenta_2018_microwell_han_002.py | 92 -- sfaira/data/mouse/prostate/__init__.py | 1 - sfaira/data/mouse/prostate/external.py | 2 - sfaira/data/mouse/prostate/mouse_prostate.py | 28 - .../mouse_prostate_2018_microwell_han_001.py | 70 - .../mouse_prostate_2018_microwell_han_002.py | 70 - sfaira/data/mouse/rib/__init__.py | 1 - sfaira/data/mouse/rib/external.py | 2 - sfaira/data/mouse/rib/mouse_rib.py | 30 - .../rib/mouse_rib_2018_microwell_han_001.py | 87 - .../rib/mouse_rib_2018_microwell_han_002.py | 87 - .../rib/mouse_rib_2018_microwell_han_003.py | 87 - sfaira/data/mouse/skin/__init__.py | 1 - sfaira/data/mouse/skin/external.py | 2 - sfaira/data/mouse/skin/mouse_skin.py | 28 - .../skin/mouse_skin_2019_10x_pisco_001.py | 71 - .../mouse_skin_2019_smartseq2_pisco_001.py | 72 - sfaira/data/mouse/spleen/__init__.py | 1 - sfaira/data/mouse/spleen/external.py | 2 - sfaira/data/mouse/spleen/mouse_spleen.py | 30 - .../mouse_spleen_2018_microwell_han_001.py | 75 - .../spleen/mouse_spleen_2019_10x_pisco_001.py | 71 - .../mouse_spleen_2019_smartseq2_pisco_001.py | 71 - sfaira/data/mouse/stomach/__init__.py | 1 - sfaira/data/mouse/stomach/external.py | 2 - sfaira/data/mouse/stomach/mouse_stomach.py | 26 - .../mouse_stomach_2018_microwell_han_001.py | 81 - sfaira/data/mouse/thymus/__init__.py | 1 - sfaira/data/mouse/thymus/external.py | 2 - sfaira/data/mouse/thymus/mouse_thymus.py | 30 - .../mouse_thymus_2018_microwell_han_001.py | 70 - .../thymus/mouse_thymus_2019_10x_pisco_001.py | 71 - .../mouse_thymus_2019_smartseq2_pisco_001.py | 71 - sfaira/data/mouse/tongue/__init__.py | 1 - sfaira/data/mouse/tongue/external.py | 2 - sfaira/data/mouse/tongue/mouse_tongue.py | 28 - .../tongue/mouse_tongue_2019_10x_pisco_001.py | 72 - .../mouse_tongue_2019_smartseq2_pisco_001.py | 72 - sfaira/data/mouse/trachea/__init__.py | 1 - sfaira/data/mouse/trachea/external.py | 2 - sfaira/data/mouse/trachea/mouse_trachea.py | 28 - .../mouse_trachea_2019_10x_pisco_001.py | 74 - .../mouse_trachea_2019_smartseq2_pisco_001.py | 73 - sfaira/data/mouse/uterus/__init__.py | 1 - sfaira/data/mouse/uterus/external.py | 2 - sfaira/data/mouse/uterus/mouse_uterus.py | 28 - .../mouse_uterus_2018_microwell_han_001.py | 83 - .../mouse_uterus_2018_microwell_han_002.py | 83 - sfaira/data/utils/create_meta.py | 34 + sfaira/data/utils/create_meta_human.py | 62 - sfaira/data/utils/create_meta_mouse.py | 45 - sfaira/data/utils/write_backed_human.py | 59 +- sfaira/data/utils/write_backed_mouse.py | 42 +- sfaira/estimators/__init__.py | 3 +- sfaira/estimators/callbacks.py | 8 +- sfaira/estimators/external.py | 2 +- sfaira/estimators/keras.py | 82 +- sfaira/estimators/losses.py | 2 +- sfaira/estimators/metrics.py | 12 +- sfaira/genomes/generate_feature_list.py | 8 +- sfaira/interface/__init__.py | 1 + sfaira/interface/external.py | 5 - sfaira/interface/model_zoo.py | 164 +- sfaira/interface/user_interface.py | 78 +- sfaira/models/celltype/marker.py | 12 +- sfaira/models/celltype/mlp.py | 11 +- sfaira/models/embedding/ae.py | 13 +- sfaira/models/embedding/linear.py | 5 +- sfaira/models/embedding/output_layers.py | 3 +- sfaira/models/embedding/vae.py | 6 +- sfaira/models/embedding/vaeiaf.py | 34 +- sfaira/models/embedding/vaevamp.py | 5 +- sfaira/models/made.py | 1 + sfaira/train/external.py | 5 - sfaira/train/summaries.py | 143 +- sfaira/train/train_model.py | 186 +-- sfaira/unit_tests/test_data_template.py | 49 + sfaira/unit_tests/test_dataset.py | 47 +- sfaira/unit_tests/test_estimator.py | 12 +- sfaira/unit_tests/test_userinterface.py | 4 +- sfaira/unit_tests/test_zoo.py | 6 +- sfaira/versions/__init__.py | 3 + sfaira/versions/celltype_versions/__init__.py | 24 +- sfaira/versions/celltype_versions/base.py | 5 +- .../versions/celltype_versions/human/brain.py | 4 +- .../versions/celltype_versions/human/eye.py | 4 +- .../celltype_versions/human/kidney.py | 12 +- .../versions/celltype_versions/human/liver.py | 4 +- .../versions/celltype_versions/human/mixed.py | 1 + .../celltype_versions/human/rectum.py | 2 +- .../versions/celltype_versions/human/skin.py | 5 +- .../celltype_versions/human/spleen.py | 2 +- .../celltype_versions/mouse/adipose.py | 2 +- .../celltype_versions/mouse/bladder.py | 2 +- .../celltype_versions/mouse/kidney.py | 2 +- .../celltype_versions/mouse/pancreas.py | 2 +- .../celltype_versions/mouse/spleen.py | 2 +- .../genome_versions/class_interface.py | 12 +- .../genome_versions/human/genome_sizes.py | 2 +- .../genome_versions/mouse/genome_container.py | 2 +- .../genome_versions/mouse/genome_sizes.py | 2 +- .../topology_versions/class_interface.py | 26 +- sfaira/versions/topology_versions/external.py | 2 +- .../topology_versions/human/embedding/ae.py | 72 +- .../human/embedding/linear.py | 30 +- .../topology_versions/human/embedding/nmf.py | 30 +- .../topology_versions/human/embedding/vae.py | 64 +- .../human/embedding/vaeiaf.py | 36 +- .../topology_versions/mouse/embedding/ae.py | 72 +- .../mouse/embedding/linear.py | 30 +- .../topology_versions/mouse/embedding/nmf.py | 30 +- .../topology_versions/mouse/embedding/vae.py | 64 +- .../mouse/embedding/vaeiaf.py | 38 +- versioneer.py | 6 +- 933 files changed, 17139 insertions(+), 23174 deletions(-) create mode 100644 .bandit.yml create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/general_question.md create mode 100644 .github/dependabot.yml create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/build_package.yml create mode 100644 .github/workflows/pr_to_master_from_patch_release_only.yml create mode 100644 .github/workflows/publish_docs.yml create mode 100644 .github/workflows/publish_package.yml create mode 100644 .github/workflows/run_bandit.yml create mode 100644 .github/workflows/run_flake8_linting.yml create mode 100644 docs/Makefile create mode 100644 docs/api/sfaira.data.DatasetBase.rst create mode 100644 docs/api/sfaira.data.DatasetInteractive.rst create mode 100644 docs/api/sfaira.data.DatasetSuperGroup.rst create mode 100644 docs/api/sfaira.data.human.rst create mode 100644 docs/api/sfaira.data.mouse.rst create mode 100644 docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst create mode 100644 docs/api/sfaira.models.celltype.rst create mode 100644 docs/api/sfaira.models.embedding.rst create mode 100644 docs/api/sfaira.train.GridsearchContainer.rst create mode 100644 docs/api/sfaira.train.SummarizeGridsearchCelltype.rst create mode 100644 docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst create mode 100644 docs/api/sfaira.train.TargetZoos.rst create mode 100644 docs/api/sfaira.train.TrainModelCelltype.rst create mode 100644 docs/api/sfaira.train.TrainModelEmbedding.rst create mode 100644 docs/api/sfaira.ui.UserInterface.rst create mode 100644 docs/changelog.rst delete mode 100644 docs/release-latest.rst delete mode 100644 docs/release-notes.rst create mode 100644 docs/requirements.txt delete mode 100644 sfaira/data/databases/__init__.py delete mode 100644 sfaira/data/databases/cellxgene/__init__.py delete mode 100644 sfaira/data/databases/cellxgene/cellxgene_group.py delete mode 100644 sfaira/data/databases/cellxgene/cellxgene_loader.py delete mode 100644 sfaira/data/databases/cellxgene/external.py create mode 100644 sfaira/data/dataloaders/__init__.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/__init__.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/__init__.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/external.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_artery.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_blood.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_bone.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_brain.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_colon.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_eye.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_heart.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_liver.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_lung.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_rib.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_skin.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/external.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py create mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py create mode 100644 sfaira/data/dataloaders/databases/__init__.py create mode 100644 sfaira/data/dataloaders/databases/cellxgene/__init__.py create mode 100644 sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py create mode 100644 sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py create mode 100644 sfaira/data/dataloaders/databases/cellxgene/external.py create mode 100644 sfaira/data/dataloaders/databases/super_group.py create mode 100644 sfaira/data/dataloaders/loaders/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py rename sfaira/data/{mouse/brain => dataloaders/loaders/d10_1038_s41593_019_0393_4}/mouse_brain_2019_mouse_brain_atlas_temp.py (59%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/base.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py create mode 100644 sfaira/data/dataloaders/loaders/d_nan/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py create mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py create mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py create mode 100644 sfaira/data/dataloaders/loaders/super_group.py create mode 100644 sfaira/data/dataloaders/super_group.py delete mode 100644 sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb delete mode 100644 sfaira/data/human/__init__.py delete mode 100644 sfaira/data/human/adipose/__init__.py delete mode 100644 sfaira/data/human/adipose/external.py delete mode 100644 sfaira/data/human/adipose/human_adipose.py delete mode 100644 sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/adrenalgland/__init__.py delete mode 100644 sfaira/data/human/adrenalgland/external.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py delete mode 100644 sfaira/data/human/artery/__init__.py delete mode 100644 sfaira/data/human/artery/external.py delete mode 100644 sfaira/data/human/artery/human_artery.py delete mode 100644 sfaira/data/human/artery/human_artery_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/bladder/__init__.py delete mode 100644 sfaira/data/human/bladder/external.py delete mode 100644 sfaira/data/human/bladder/human_bladder.py delete mode 100644 sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/blood/__init__.py delete mode 100644 sfaira/data/human/blood/external.py delete mode 100644 sfaira/data/human/blood/human_blood.py delete mode 100644 sfaira/data/human/blood/human_blood_2018_10x_ica_001.py delete mode 100644 sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_006.py delete mode 100644 sfaira/data/human/blood/human_blood_2020_microwell_han_007.py delete mode 100644 sfaira/data/human/bone/__init__.py delete mode 100644 sfaira/data/human/bone/external.py delete mode 100644 sfaira/data/human/bone/human_bone.py delete mode 100644 sfaira/data/human/bone/human_bone_2018_10x_ica_001.py delete mode 100644 sfaira/data/human/bone/human_bone_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/bone/human_bone_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/brain/__init__.py delete mode 100644 sfaira/data/human/brain/external.py delete mode 100644 sfaira/data/human/brain/human_brain.py delete mode 100644 sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/brain/human_brain_2020_microwell_han_006.py delete mode 100644 sfaira/data/human/calvaria/__init__.py delete mode 100644 sfaira/data/human/calvaria/external.py delete mode 100644 sfaira/data/human/calvaria/human_calvaria.py delete mode 100644 sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/cervix/__init__.py delete mode 100644 sfaira/data/human/cervix/external.py delete mode 100644 sfaira/data/human/cervix/human_cervix.py delete mode 100644 sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/chorionicvillus/__init__.py delete mode 100644 sfaira/data/human/chorionicvillus/external.py delete mode 100644 sfaira/data/human/chorionicvillus/human_chorionicvillus.py delete mode 100644 sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/colon/__init__.py delete mode 100644 sfaira/data/human/colon/external.py delete mode 100644 sfaira/data/human/colon/human_colon.py delete mode 100644 sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py delete mode 100644 sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py delete mode 100644 sfaira/data/human/colon/human_colon_2019_10x_wang_001.py delete mode 100644 sfaira/data/human/colon/human_colon_2020_10x_james_001.py delete mode 100644 sfaira/data/human/colon/human_colon_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/colon/human_colon_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/colon/human_colon_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/colon/human_colon_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/duodenum/__init__.py delete mode 100644 sfaira/data/human/duodenum/external.py delete mode 100644 sfaira/data/human/duodenum/human_duodenum.py delete mode 100644 sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/epityphlon/__init__.py delete mode 100644 sfaira/data/human/epityphlon/external.py delete mode 100644 sfaira/data/human/epityphlon/human_epityphlon.py delete mode 100644 sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/esophagus/__init__.py delete mode 100644 sfaira/data/human/esophagus/external.py delete mode 100644 sfaira/data/human/esophagus/human_esophagus.py delete mode 100644 sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/eye/__init__.py delete mode 100644 sfaira/data/human/eye/external.py delete mode 100644 sfaira/data/human/eye/human_eye.py delete mode 100644 sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py delete mode 100644 sfaira/data/human/eye/human_eye_2019_10x_menon_001.py delete mode 100644 sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py delete mode 100644 sfaira/data/human/eye/human_eye_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/fallopiantube/__init__.py delete mode 100644 sfaira/data/human/fallopiantube/external.py delete mode 100644 sfaira/data/human/fallopiantube/human_fallopiantube.py delete mode 100644 sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/femalegonad/__init__.py delete mode 100644 sfaira/data/human/femalegonad/external.py delete mode 100644 sfaira/data/human/femalegonad/human_femalegonad.py delete mode 100644 sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/gallbladder/__init__.py delete mode 100644 sfaira/data/human/gallbladder/external.py delete mode 100644 sfaira/data/human/gallbladder/human_gallbladder.py delete mode 100644 sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/heart/__init__.py delete mode 100644 sfaira/data/human/heart/external.py delete mode 100644 sfaira/data/human/heart/human_heart.py delete mode 100644 sfaira/data/human/heart/human_heart_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/heart/human_heart_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/heart/human_heart_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/heart/human_heart_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/hesc/__init__.py delete mode 100644 sfaira/data/human/hesc/external.py delete mode 100644 sfaira/data/human/hesc/human_hesc.py delete mode 100644 sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/ileum/__init__.py delete mode 100644 sfaira/data/human/ileum/external.py delete mode 100644 sfaira/data/human/ileum/human_ileum.py delete mode 100644 sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py delete mode 100644 sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py delete mode 100644 sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/jejunum/__init__.py delete mode 100644 sfaira/data/human/jejunum/external.py delete mode 100644 sfaira/data/human/jejunum/human_jejunum.py delete mode 100644 sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/kidney/__init__.py delete mode 100644 sfaira/data/human/kidney/external.py delete mode 100644 sfaira/data/human/kidney/human_kidney.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py delete mode 100644 sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py delete mode 100644 sfaira/data/human/liver/__init__.py delete mode 100644 sfaira/data/human/liver/external.py delete mode 100644 sfaira/data/human/liver/human_liver.py delete mode 100644 sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py delete mode 100644 sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py delete mode 100644 sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py delete mode 100644 sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py delete mode 100644 sfaira/data/human/liver/human_liver_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/liver/human_liver_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/liver/human_liver_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/liver/human_liver_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/liver/human_liver_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/lung/__init__.py delete mode 100644 sfaira/data/human/lung/external.py delete mode 100644 sfaira/data/human/lung/human_lung.py delete mode 100644 sfaira/data/human/lung/human_lung_2019_10x_braga_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2019_10x_braga_002.py delete mode 100644 sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_10x_miller_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py delete mode 100644 sfaira/data/human/malegonad/__init__.py delete mode 100644 sfaira/data/human/malegonad/external.py delete mode 100644 sfaira/data/human/malegonad/human_malegonad.py delete mode 100644 sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py delete mode 100644 sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/mixed/__init__.py delete mode 100644 sfaira/data/human/mixed/external.py delete mode 100644 sfaira/data/human/mixed/human_mixed.py delete mode 100644 sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py delete mode 100644 sfaira/data/human/muscle/__init__.py delete mode 100644 sfaira/data/human/muscle/external.py delete mode 100644 sfaira/data/human/muscle/human_muscle.py delete mode 100644 sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/omentum/__init__.py delete mode 100644 sfaira/data/human/omentum/external.py delete mode 100644 sfaira/data/human/omentum/human_omentum.py delete mode 100644 sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/pancreas/__init__.py delete mode 100644 sfaira/data/human/pancreas/external.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/placenta/__init__.py delete mode 100644 sfaira/data/human/placenta/external.py delete mode 100644 sfaira/data/human/placenta/human_placenta.py delete mode 100644 sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py delete mode 100644 sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py delete mode 100644 sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/pleura/__init__.py delete mode 100644 sfaira/data/human/pleura/external.py delete mode 100644 sfaira/data/human/pleura/human_pleura.py delete mode 100644 sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/prostate/__init__.py delete mode 100644 sfaira/data/human/prostate/external.py delete mode 100644 sfaira/data/human/prostate/human_prostate.py delete mode 100644 sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py delete mode 100644 sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/rectum/__init__.py delete mode 100644 sfaira/data/human/rectum/external.py delete mode 100644 sfaira/data/human/rectum/human_rectum.py delete mode 100644 sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py delete mode 100644 sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/rib/__init__.py delete mode 100644 sfaira/data/human/rib/external.py delete mode 100644 sfaira/data/human/rib/human_rib.py delete mode 100644 sfaira/data/human/rib/human_rib_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/rib/human_rib_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/skin/__init__.py delete mode 100644 sfaira/data/human/skin/external.py delete mode 100644 sfaira/data/human/skin/human_skin.py delete mode 100644 sfaira/data/human/skin/human_skin_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/skin/human_skin_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/spinalcord/__init__.py delete mode 100644 sfaira/data/human/spinalcord/external.py delete mode 100644 sfaira/data/human/spinalcord/human_spinalcord.py delete mode 100644 sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/spleen/__init__.py delete mode 100644 sfaira/data/human/spleen/external.py delete mode 100644 sfaira/data/human/spleen/human_spleen.py delete mode 100644 sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/stomach/__init__.py delete mode 100644 sfaira/data/human/stomach/external.py delete mode 100644 sfaira/data/human/stomach/human_stomach.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py delete mode 100644 sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py delete mode 100644 sfaira/data/human/thymus/__init__.py delete mode 100644 sfaira/data/human/thymus/external.py delete mode 100644 sfaira/data/human/thymus/human_thymus.py delete mode 100644 sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py delete mode 100644 sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/thyroid/__init__.py delete mode 100644 sfaira/data/human/thyroid/external.py delete mode 100644 sfaira/data/human/thyroid/human_thyroid.py delete mode 100644 sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py delete mode 100644 sfaira/data/human/trachea/__init__.py delete mode 100644 sfaira/data/human/trachea/external.py delete mode 100644 sfaira/data/human/trachea/human_trachea.py delete mode 100644 sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/ureter/__init__.py delete mode 100644 sfaira/data/human/ureter/external.py delete mode 100644 sfaira/data/human/ureter/human_ureter.py delete mode 100644 sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py delete mode 100644 sfaira/data/human/uterus/__init__.py delete mode 100644 sfaira/data/human/uterus/external.py delete mode 100644 sfaira/data/human/uterus/human_uterus.py delete mode 100644 sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py delete mode 100644 sfaira/data/interactive/external.py delete mode 100644 sfaira/data/mouse/__init__.py delete mode 100644 sfaira/data/mouse/adipose/__init__.py delete mode 100644 sfaira/data/mouse/adipose/external.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py delete mode 100644 sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py delete mode 100644 sfaira/data/mouse/bladder/__init__.py delete mode 100644 sfaira/data/mouse/bladder/external.py delete mode 100644 sfaira/data/mouse/bladder/mouse_bladder.py delete mode 100644 sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/blood/__init__.py delete mode 100644 sfaira/data/mouse/blood/external.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py delete mode 100644 sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py delete mode 100644 sfaira/data/mouse/bone/__init__.py delete mode 100644 sfaira/data/mouse/bone/external.py delete mode 100644 sfaira/data/mouse/bone/mouse_bone.py delete mode 100644 sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py delete mode 100644 sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/brain/__init__.py delete mode 100644 sfaira/data/mouse/brain/external.py delete mode 100644 sfaira/data/mouse/brain/mouse_brain.py delete mode 100644 sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/mouse/colon/__init__.py delete mode 100644 sfaira/data/mouse/colon/external.py delete mode 100644 sfaira/data/mouse/colon/mouse_colon.py delete mode 100644 sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/diaphragm/__init__.py delete mode 100644 sfaira/data/mouse/diaphragm/external.py delete mode 100644 sfaira/data/mouse/diaphragm/mouse_diaphragm.py delete mode 100644 sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/femalegonad/__init__.py delete mode 100644 sfaira/data/mouse/femalegonad/external.py delete mode 100644 sfaira/data/mouse/femalegonad/mouse_femalegonad.py delete mode 100644 sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/heart/__init__.py delete mode 100644 sfaira/data/mouse/heart/external.py delete mode 100644 sfaira/data/mouse/heart/mouse_heart.py delete mode 100644 sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/mouse/ileum/__init__.py delete mode 100644 sfaira/data/mouse/ileum/external.py delete mode 100644 sfaira/data/mouse/ileum/mouse_ileum.py delete mode 100644 sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py delete mode 100644 sfaira/data/mouse/kidney/__init__.py delete mode 100644 sfaira/data/mouse/kidney/external.py delete mode 100644 sfaira/data/mouse/kidney/mouse_kidney.py delete mode 100644 sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/liver/__init__.py delete mode 100644 sfaira/data/mouse/liver/external.py delete mode 100644 sfaira/data/mouse/liver/mouse_liver.py delete mode 100644 sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/lung/__init__.py delete mode 100644 sfaira/data/mouse/lung/external.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/malegonad/__init__.py delete mode 100644 sfaira/data/mouse/malegonad/external.py delete mode 100644 sfaira/data/mouse/malegonad/mouse_malegonad.py delete mode 100644 sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/mammarygland/__init__.py delete mode 100644 sfaira/data/mouse/mammarygland/external.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/muscle/__init__.py delete mode 100644 sfaira/data/mouse/muscle/external.py delete mode 100644 sfaira/data/mouse/muscle/mouse_muscle.py delete mode 100644 sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/pancreas/__init__.py delete mode 100644 sfaira/data/mouse/pancreas/external.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py delete mode 100644 sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py delete mode 100644 sfaira/data/mouse/placenta/__init__.py delete mode 100644 sfaira/data/mouse/placenta/external.py delete mode 100644 sfaira/data/mouse/placenta/mouse_placenta.py delete mode 100644 sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/prostate/__init__.py delete mode 100644 sfaira/data/mouse/prostate/external.py delete mode 100644 sfaira/data/mouse/prostate/mouse_prostate.py delete mode 100644 sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/rib/__init__.py delete mode 100644 sfaira/data/mouse/rib/external.py delete mode 100644 sfaira/data/mouse/rib/mouse_rib.py delete mode 100644 sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py delete mode 100644 sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py delete mode 100644 sfaira/data/mouse/skin/__init__.py delete mode 100644 sfaira/data/mouse/skin/external.py delete mode 100644 sfaira/data/mouse/skin/mouse_skin.py delete mode 100644 sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/spleen/__init__.py delete mode 100644 sfaira/data/mouse/spleen/external.py delete mode 100644 sfaira/data/mouse/spleen/mouse_spleen.py delete mode 100644 sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/stomach/__init__.py delete mode 100644 sfaira/data/mouse/stomach/external.py delete mode 100644 sfaira/data/mouse/stomach/mouse_stomach.py delete mode 100644 sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/thymus/__init__.py delete mode 100644 sfaira/data/mouse/thymus/external.py delete mode 100644 sfaira/data/mouse/thymus/mouse_thymus.py delete mode 100644 sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/tongue/__init__.py delete mode 100644 sfaira/data/mouse/tongue/external.py delete mode 100644 sfaira/data/mouse/tongue/mouse_tongue.py delete mode 100644 sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/trachea/__init__.py delete mode 100644 sfaira/data/mouse/trachea/external.py delete mode 100644 sfaira/data/mouse/trachea/mouse_trachea.py delete mode 100644 sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py delete mode 100644 sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/mouse/uterus/__init__.py delete mode 100644 sfaira/data/mouse/uterus/external.py delete mode 100644 sfaira/data/mouse/uterus/mouse_uterus.py delete mode 100644 sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py delete mode 100644 sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py create mode 100644 sfaira/data/utils/create_meta.py delete mode 100644 sfaira/data/utils/create_meta_human.py delete mode 100644 sfaira/data/utils/create_meta_mouse.py delete mode 100644 sfaira/interface/external.py delete mode 100644 sfaira/train/external.py create mode 100644 sfaira/unit_tests/test_data_template.py diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 000000000..21892526a --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,7 @@ +### profile may optionally select or skip tests + +# (optional) list included tests here: +tests: [] + +# (optional) list skipped tests here: +skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310'] diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..c08bbb0fd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a bug report to help us improve +title: 'Bug Summary' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. ... +2. ... +3. ... + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**System [please complete the following information]:** + - OS: e.g. [Ubuntu 20.04] + - Language Version: [e.g. Python 3.9] + - Virtual environment: [e.g. Conda] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..071044f83 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest a new feature +title: 'Feature Request Summary' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when ... + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/general_question.md b/.github/ISSUE_TEMPLATE/general_question.md new file mode 100644 index 000000000..9b429f568 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/general_question.md @@ -0,0 +1,13 @@ +--- +name: General question +about: Ask a question about anything related to this project +title: 'Question' +labels: '' +assignees: '' + +--- + +**Question** + +Please ask your question here. It can be about the usage of this project, the internals, the implementation or whatever interests you. +Please use the BUG template for bugs, and the FEATURE REQUEST template for feature requests. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..8ace0c2b2 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "04:00" + open-pull-requests-limit: 99 + target-branch: development + labels: + - dependabot + commit-message: + prefix: "[DEPENDABOT]" + +- package-ecosystem: github-actions + directory: "/" + schedule: + interval: daily + time: "04:00" + open-pull-requests-limit: 99 + target-branch: development + labels: + - dependabot + commit-message: + prefix: "[DEPENDABOT]" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..c017fb887 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,19 @@ +Many thanks for contributing to sfaira! + +**PR Checklist** +Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). + + - [ ] This comment contains a description of changes (with reason) + - [ ] Referenced issue is linked + - [ ] If you've fixed a bug or added code that should be tested, add tests! + - [ ] Documentation in `docs` is updated + - [ ] `docs/release-notes.rst` is updated + +**Description of changes** +Please state what you've changed and how it might affect the user. + +**Technical details** +Please state any technical details such as limitations, reasons for additional dependencies, benchmarks etc. here. + +**Additional context** +Add any other context or screenshots here. diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml new file mode 100644 index 000000000..ec199d5a6 --- /dev/null +++ b/.github/workflows/build_package.yml @@ -0,0 +1,42 @@ +name: Build sfaira Package + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + strategy: + matrix: + os: [macos-latest, ubuntu-latest, windows-latest] + python: [3.7, 3.8] + env: + PYTHONIOENCODING: utf-8 + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: ${{ matrix.python }} + + - name: Upgrade and install pip + run: python -m pip install --upgrade pip + + - name: Build sfaira + run: pip install . + + - name: Import sfaira + run: python -c "import sfaira" + + # Verify that the package does adhere to PyPI's standards + - name: Install required twine packaging dependencies + run: pip install setuptools wheel twine + + - name: Build twine package + run: python setup.py sdist bdist_wheel + + - name: Check twine package + run: twine check dist/* diff --git a/.github/workflows/pr_to_master_from_patch_release_only.yml b/.github/workflows/pr_to_master_from_patch_release_only.yml new file mode 100644 index 000000000..c1d9467ab --- /dev/null +++ b/.github/workflows/pr_to_master_from_patch_release_only.yml @@ -0,0 +1,34 @@ +name: PR to master branch from patch/release branch only + +on: + pull_request: + branches: + - master + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the repository master branch are only ok if coming from any patch or release branch + - name: Check PRs + run: | + { [[ $GITHUB_HEAD_REF = *"release"* ]]; } || [[ $GITHUB_HEAD_REF == *"patch"* ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this may not work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. + The `master` branch should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from any ${{github.event.pull_request.head.repo.full_name}} `release` or `patch` branch. + + You do not need to close this PR, you can change the target branch to `development` by clicking the _"Edit"_ button at the top of this page. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml new file mode 100644 index 000000000..8fba4ee58 --- /dev/null +++ b/.github/workflows/publish_docs.yml @@ -0,0 +1,42 @@ +name: Build Documentation + +on: + push: + paths: + - "docs/**" + pull_request: + paths: + - "docs/**" + +jobs: + build: + + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.8 + + - name: Install pip + run: python -m pip install --upgrade pip + + - name: Install doc dependencies + run: pip install -r docs/requirements.txt + + - name: Build docs + run: | + cd docs + make html + + - name: Deploy + if: ${{ github.ref == 'refs/heads/master' && github.event_name == 'push' }} + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html diff --git a/.github/workflows/publish_package.yml b/.github/workflows/publish_package.yml new file mode 100644 index 000000000..409a1b10e --- /dev/null +++ b/.github/workflows/publish_package.yml @@ -0,0 +1,31 @@ +name: Publish sfaira to PyPI + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Set up Python + uses: actions/setup-python@v2.1.4 + with: + python-version: '3.9' + + - name: Install pip, setuptools, wheel, twine + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/run_bandit.yml b/.github/workflows/run_bandit.yml new file mode 100644 index 000000000..95719c26f --- /dev/null +++ b/.github/workflows/run_bandit.yml @@ -0,0 +1,30 @@ +name: Run bandit + +on: + push: + paths: + - "**/*.py" + pull_request: + paths: + - "**/*.py" + +jobs: + build: + + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.9 + + - name: Install bandit + run: pip install bandit + + - name: Run bandit + run: bandit -r sfaira -c .bandit.yml diff --git a/.github/workflows/run_flake8_linting.yml b/.github/workflows/run_flake8_linting.yml new file mode 100644 index 000000000..737f7b80e --- /dev/null +++ b/.github/workflows/run_flake8_linting.yml @@ -0,0 +1,31 @@ +name: Run flake8 linting + +on: + push: + paths: + - "**/*.py" + pull_request: + paths: + - "**/*.py" + +jobs: + lint: + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: 3.9 + + - name: Install pip + run: python -m pip install --upgrade pip + + - name: Lint with flake8 + run: | + pip install flake8 + flake8 . diff --git a/.gitignore b/.gitignore index 974bd87ce..362e74721 100644 --- a/.gitignore +++ b/.gitignore @@ -1,17 +1,149 @@ -sfaira/unit_tests/test_data +sfaira/unit_tests/test_data_loaders/* +sfaira/unit_tests/test_data/* +sfaira/unit_tests/test_data_template.py git abuild cache sfaira.egg-info config.ini .metadata -.idea .Rhistory playground/* venv/* -**/__pycache__ *.ipynb_checkpoints/ */*.ipynb_checkpoints/ **/.DS_Store docs/_templates/ dist/ !**/.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains IDE +.idea/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 6a8f1a146..b5013855b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,4 +1,15 @@ +version: 2 + build: image: latest + +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + python: - version: 3.8 + version: 3.9 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/README.rst b/README.rst index cf758b0c7..a116498a7 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,25 @@ -|Stars| |PyPI| |PyPIDownloads| +|Build| |Documentation| |Stars| |PyPI| |PyPIDownloads| + + +.. |Build| image:: https://github.com/theislab/sfaira/workflows/Build%20sfaira%20Package/badge.svg + :target: https://github.com/theislab/sfaira/workflows/Build%20sfaira%20Package/badge.svg + :alt: Github Workflow Build sfaira Status + +.. |Documentation| image:: https://readthedocs.org/projects/sfaira/badge/?version=latest + :target: https://sfaira.readthedocs.io/en/latest/ + :alt: Documentation Status .. |Stars| image:: https://img.shields.io/github/stars/theislab/sfaira?logo=GitHub&color=yellow :target: https://github.com/theislab/sfaira/stargazers + :alt: Github Stars + .. |PyPI| image:: https://img.shields.io/pypi/v/sfaira?logo=PyPI :target: https://pypi.org/project/sfaira + :alt: PyPI Version + .. |PyPIDownloads| image:: https://pepy.tech/badge/sfaira :target: https://pepy.tech/project/sfaira + :alt: Number of downloads sfaira - data and model repository for single-cell data @@ -15,7 +29,7 @@ sfaira - data and model repository for single-cell data :width: 1000px :align: center -sfaira_ is a model and a data repository in a single python package. +sfaira_ is a model and a data repository in a single python package (preprint_). We provide an interactive overview of the current state of the zoos on sfaira-site_. Its data zoo gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. @@ -26,6 +40,7 @@ sfaira integrates into scanpy_ workflows. .. _scanpy: https://github.com/theislab/scanpy .. _sfaira: https://sfaira.readthedocs.io +.. _preprint: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 .. _DCA: https://github.com/theislab/dca .. _scArches: https://github.com/theislab/scarches .. _sfaira-site: https://theislab.github.io/sfaira-site/index.html diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..4a52c31ee --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = system_intelligence +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst new file mode 100644 index 000000000..22b767b80 --- /dev/null +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -0,0 +1,75 @@ +sfaira.data.DatasetBase +======================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetBase + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetBase.__init__ + ~DatasetBase.assert_celltype_version_key + ~DatasetBase.clear + ~DatasetBase.load + ~DatasetBase.load_meta + ~DatasetBase.load_tobacked + ~DatasetBase.map_ontology_class + ~DatasetBase.set_default_type_version + ~DatasetBase.set_unkown_class_id + ~DatasetBase.subset_organs + ~DatasetBase.write_meta + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetBase.age + ~DatasetBase.annotated + ~DatasetBase.author + ~DatasetBase.available_type_versions + ~DatasetBase.citation + ~DatasetBase.dev_stage + ~DatasetBase.doi + ~DatasetBase.doi_cleaned_id + ~DatasetBase.download + ~DatasetBase.download_meta + ~DatasetBase.ethnicity + ~DatasetBase.healthy + ~DatasetBase.healthy_state_healthy + ~DatasetBase.id + ~DatasetBase.meta + ~DatasetBase.meta_fn + ~DatasetBase.ncells + ~DatasetBase.normalization + ~DatasetBase.obs_key_age + ~DatasetBase.obs_key_cellontology_id + ~DatasetBase.obs_key_cellontology_original + ~DatasetBase.obs_key_dev_stage + ~DatasetBase.obs_key_ethnicity + ~DatasetBase.obs_key_healthy + ~DatasetBase.obs_key_organ + ~DatasetBase.obs_key_organism + ~DatasetBase.obs_key_protocol + ~DatasetBase.obs_key_sex + ~DatasetBase.obs_key_state_exact + ~DatasetBase.organ + ~DatasetBase.organism + ~DatasetBase.protocol + ~DatasetBase.sex + ~DatasetBase.source + ~DatasetBase.state_exact + ~DatasetBase.var_ensembl_col + ~DatasetBase.var_symbol_col + ~DatasetBase.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst new file mode 100644 index 000000000..d1bda2a3f --- /dev/null +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -0,0 +1,75 @@ +sfaira.data.DatasetInteractive +============================== + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetInteractive + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetInteractive.__init__ + ~DatasetInteractive.assert_celltype_version_key + ~DatasetInteractive.clear + ~DatasetInteractive.load + ~DatasetInteractive.load_meta + ~DatasetInteractive.load_tobacked + ~DatasetInteractive.map_ontology_class + ~DatasetInteractive.set_default_type_version + ~DatasetInteractive.set_unkown_class_id + ~DatasetInteractive.subset_organs + ~DatasetInteractive.write_meta + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetInteractive.age + ~DatasetInteractive.annotated + ~DatasetInteractive.author + ~DatasetInteractive.available_type_versions + ~DatasetInteractive.citation + ~DatasetInteractive.dev_stage + ~DatasetInteractive.doi + ~DatasetInteractive.doi_cleaned_id + ~DatasetInteractive.download + ~DatasetInteractive.download_meta + ~DatasetInteractive.ethnicity + ~DatasetInteractive.healthy + ~DatasetInteractive.healthy_state_healthy + ~DatasetInteractive.id + ~DatasetInteractive.meta + ~DatasetInteractive.meta_fn + ~DatasetInteractive.ncells + ~DatasetInteractive.normalization + ~DatasetInteractive.obs_key_age + ~DatasetInteractive.obs_key_cellontology_id + ~DatasetInteractive.obs_key_cellontology_original + ~DatasetInteractive.obs_key_dev_stage + ~DatasetInteractive.obs_key_ethnicity + ~DatasetInteractive.obs_key_healthy + ~DatasetInteractive.obs_key_organ + ~DatasetInteractive.obs_key_organism + ~DatasetInteractive.obs_key_protocol + ~DatasetInteractive.obs_key_sex + ~DatasetInteractive.obs_key_state_exact + ~DatasetInteractive.organ + ~DatasetInteractive.organism + ~DatasetInteractive.protocol + ~DatasetInteractive.sex + ~DatasetInteractive.source + ~DatasetInteractive.state_exact + ~DatasetInteractive.var_ensembl_col + ~DatasetInteractive.var_symbol_col + ~DatasetInteractive.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst new file mode 100644 index 000000000..cdbb18916 --- /dev/null +++ b/docs/api/sfaira.data.DatasetSuperGroup.rst @@ -0,0 +1,35 @@ +sfaira.data.DatasetSuperGroup +============================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetSuperGroup + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetSuperGroup.__init__ + ~DatasetSuperGroup.delete_backed + ~DatasetSuperGroup.extend_dataset_groups + ~DatasetSuperGroup.flatten + ~DatasetSuperGroup.get_gc + ~DatasetSuperGroup.load_all + ~DatasetSuperGroup.load_all_tobacked + ~DatasetSuperGroup.load_cached_backed + ~DatasetSuperGroup.ncells + ~DatasetSuperGroup.ncells_bydataset + ~DatasetSuperGroup.ncells_bydataset_flat + ~DatasetSuperGroup.set_dataset_groups + ~DatasetSuperGroup.subset + ~DatasetSuperGroup.subset_organs + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.data.human.rst b/docs/api/sfaira.data.human.rst new file mode 100644 index 000000000..bddab197c --- /dev/null +++ b/docs/api/sfaira.data.human.rst @@ -0,0 +1,23 @@ +sfaira.data.human +================= + +.. automodule:: sfaira.data.human + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.data.mouse.rst b/docs/api/sfaira.data.mouse.rst new file mode 100644 index 000000000..af3c07453 --- /dev/null +++ b/docs/api/sfaira.data.mouse.rst @@ -0,0 +1,23 @@ +sfaira.data.mouse +================= + +.. automodule:: sfaira.data.mouse + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst new file mode 100644 index 000000000..415c01979 --- /dev/null +++ b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst @@ -0,0 +1,26 @@ +sfaira.genomes.ExtractFeatureListEnsemble +========================================= + +.. currentmodule:: sfaira.genomes + +.. autoclass:: ExtractFeatureListEnsemble + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ExtractFeatureListEnsemble.__init__ + ~ExtractFeatureListEnsemble.from_ensemble_gtf + ~ExtractFeatureListEnsemble.reduce_types + ~ExtractFeatureListEnsemble.reduce_types_protein_coding + ~ExtractFeatureListEnsemble.write_gene_table_to_csv + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.models.celltype.rst b/docs/api/sfaira.models.celltype.rst new file mode 100644 index 000000000..48b1f74e2 --- /dev/null +++ b/docs/api/sfaira.models.celltype.rst @@ -0,0 +1,23 @@ +sfaira.models.celltype +====================== + +.. automodule:: sfaira.models.celltype + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.models.embedding.rst b/docs/api/sfaira.models.embedding.rst new file mode 100644 index 000000000..2446b787d --- /dev/null +++ b/docs/api/sfaira.models.embedding.rst @@ -0,0 +1,23 @@ +sfaira.models.embedding +======================= + +.. automodule:: sfaira.models.embedding + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.train.GridsearchContainer.rst b/docs/api/sfaira.train.GridsearchContainer.rst new file mode 100644 index 000000000..ae233fecf --- /dev/null +++ b/docs/api/sfaira.train.GridsearchContainer.rst @@ -0,0 +1,37 @@ +sfaira.train.GridsearchContainer +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: GridsearchContainer + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~GridsearchContainer.__init__ + ~GridsearchContainer.best_model_by_partition + ~GridsearchContainer.get_best_model_ids + ~GridsearchContainer.load_gs + ~GridsearchContainer.load_y + ~GridsearchContainer.plot_best_model_by_hyperparam + ~GridsearchContainer.plot_completions + ~GridsearchContainer.plot_training_history + ~GridsearchContainer.save_best_weight + ~GridsearchContainer.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~GridsearchContainer.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst new file mode 100644 index 000000000..eeb1cb5a9 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst @@ -0,0 +1,43 @@ +sfaira.train.SummarizeGridsearchCelltype +======================================== + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchCelltype.__init__ + ~SummarizeGridsearchCelltype.best_model_by_partition + ~SummarizeGridsearchCelltype.best_model_celltype + ~SummarizeGridsearchCelltype.create_summary_tab + ~SummarizeGridsearchCelltype.get_best_model_ids + ~SummarizeGridsearchCelltype.load_gs + ~SummarizeGridsearchCelltype.load_ontology_names + ~SummarizeGridsearchCelltype.load_y + ~SummarizeGridsearchCelltype.plot_best + ~SummarizeGridsearchCelltype.plot_best_classwise_heatmap + ~SummarizeGridsearchCelltype.plot_best_classwise_scatter + ~SummarizeGridsearchCelltype.plot_best_model_by_hyperparam + ~SummarizeGridsearchCelltype.plot_completions + ~SummarizeGridsearchCelltype.plot_training_history + ~SummarizeGridsearchCelltype.save_best_weight + ~SummarizeGridsearchCelltype.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchCelltype.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst new file mode 100644 index 000000000..5ef0ddf33 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst @@ -0,0 +1,47 @@ +sfaira.train.SummarizeGridsearchEmbedding +========================================= + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.__init__ + ~SummarizeGridsearchEmbedding.best_model_by_partition + ~SummarizeGridsearchEmbedding.best_model_embedding + ~SummarizeGridsearchEmbedding.create_summary_tab + ~SummarizeGridsearchEmbedding.get_best_model_ids + ~SummarizeGridsearchEmbedding.get_gradients_by_celltype + ~SummarizeGridsearchEmbedding.load_gs + ~SummarizeGridsearchEmbedding.load_y + ~SummarizeGridsearchEmbedding.plot_active_latent_units + ~SummarizeGridsearchEmbedding.plot_best + ~SummarizeGridsearchEmbedding.plot_best_model_by_hyperparam + ~SummarizeGridsearchEmbedding.plot_completions + ~SummarizeGridsearchEmbedding.plot_gradient_cor + ~SummarizeGridsearchEmbedding.plot_gradient_distr + ~SummarizeGridsearchEmbedding.plot_npc + ~SummarizeGridsearchEmbedding.plot_training_history + ~SummarizeGridsearchEmbedding.save_best_weight + ~SummarizeGridsearchEmbedding.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.List + ~SummarizeGridsearchEmbedding.Union + ~SummarizeGridsearchEmbedding.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TargetZoos.rst b/docs/api/sfaira.train.TargetZoos.rst new file mode 100644 index 000000000..bbf18cd74 --- /dev/null +++ b/docs/api/sfaira.train.TargetZoos.rst @@ -0,0 +1,24 @@ +sfaira.train.TargetZoos +======================= + +.. currentmodule:: sfaira.train + +.. autoclass:: TargetZoos + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TargetZoos.__init__ + ~TargetZoos.write_celltypes_tocsv_human + ~TargetZoos.write_celltypes_tocsv_mouse + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst new file mode 100644 index 000000000..cde6646e2 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelCelltype.rst @@ -0,0 +1,36 @@ +sfaira.train.TrainModelCelltype +=============================== + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelCelltype.__init__ + ~TrainModelCelltype.human_target + ~TrainModelCelltype.init_estim + ~TrainModelCelltype.mouse_target + ~TrainModelCelltype.save + ~TrainModelCelltype.save_eval + ~TrainModelCelltype.set_data + ~TrainModelCelltype.write_celltypes_tocsv_human + ~TrainModelCelltype.write_celltypes_tocsv_mouse + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TrainModelCelltype.adata + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst new file mode 100644 index 000000000..e7c1b6be8 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelEmbedding.rst @@ -0,0 +1,36 @@ +sfaira.train.TrainModelEmbedding +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelEmbedding.__init__ + ~TrainModelEmbedding.human_target + ~TrainModelEmbedding.init_estim + ~TrainModelEmbedding.mouse_target + ~TrainModelEmbedding.save + ~TrainModelEmbedding.save_eval + ~TrainModelEmbedding.set_data + ~TrainModelEmbedding.write_celltypes_tocsv_human + ~TrainModelEmbedding.write_celltypes_tocsv_mouse + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TrainModelEmbedding.adata + + \ No newline at end of file diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst new file mode 100644 index 000000000..d14d56879 --- /dev/null +++ b/docs/api/sfaira.ui.UserInterface.rst @@ -0,0 +1,38 @@ +sfaira.ui.UserInterface +======================= + +.. currentmodule:: sfaira + +.. autoclass:: ui.UserInterface + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ui.UserInterface.__init__ + ~ui.UserInterface.celltype_summary + ~ui.UserInterface.compute_all + ~ui.UserInterface.compute_all_kipoi + ~ui.UserInterface.compute_celltype + ~ui.UserInterface.compute_celltype_kipoi + ~ui.UserInterface.compute_denoised_expression + ~ui.UserInterface.compute_denoised_expression_kipoi + ~ui.UserInterface.compute_embedding + ~ui.UserInterface.compute_embedding_kipoi + ~ui.UserInterface.deposit_zenodo + ~ui.UserInterface.filter_cells + ~ui.UserInterface.get_references + ~ui.UserInterface.load_data + ~ui.UserInterface.load_model_celltype + ~ui.UserInterface.load_model_embedding + ~ui.UserInterface.write_lookuptable + + + + + + \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 000000000..598481d8d --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,21 @@ +Changelog +========== + +.. role:: small +.. role:: smaller + +This project adheres to `Semantic Versioning `_. + +0.2.1 :small:`2020-09-7` +~~~~~~~~~~~~~~~~~~~~~~~~ + +**Added** + +* Initial release with online documentation. + +**Fixed** + +**Dependencies** + +**Deprecated** + diff --git a/docs/data.rst b/docs/data.rst index 21ac5972c..1cf1f118a 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -12,11 +12,11 @@ Build a repository structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. Choose a directory to dedicate to the data base, called root in the following. - 2. Make subfolders in root for each organism for which you want to build a data base. - 3. Make subfolders for each organ whithin each organism for which you want to build a data base. + 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. -We maintain a couple of download scripts that automatise this process, which have to be executed in a shell once to download specific subsets of the full data zoo. -These scripts can be found in sfaira.data.download_scripts. +Note that the automated download is a feature of sfaira but not the core purpose of the package: +Sfaira allows you efficiently interact with such a local data repository. +Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. Use 3rd party repositories ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -31,26 +31,40 @@ Contact us for support of any other repositories. Add data sets ~~~~~~~~~~~~~ - 4. For each species and organ combination, choose the data sets that you want to use. - 5. Identify the raw files as indicated in the data loader classes and copy them into the folder. Use processed data - using the described processing if this is required: This is usually done to speed up loading for file - formats that are difficult to access. - -Data loaders ------------- + 1. Write a data loader as outlined below. + 2. Identify the raw files as indicated in the data loader classes and copy them into your directory structure as required by your data laoder. + 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the dataloader contributed to sfaira would use this download link. Use data loaders on existing data repository -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-------------------------------------------- You only want to use data sets with existing data loaders and have adapted your directory structure as above? In that case, you can immediately start using the data loader functions, you just need to supply the root directory of the directory structure as `path to the constructor of the class that you are using. -Depending on the functionalities you want to use, you need to create a directory with data set meta data first. This -can be easily done via the data set api itself, example python scripts are under benchmarks/data_preparation. This -meta information is necessary to anticipate file sizes for backing merged adata objects for example. +Depending on the functionalities you want to use, you would often want to create a directory with cached meta data +first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to +anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. + +Write data loaders +------------------ + +The study-centric data loader module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the sfaira code, data loaders are organised into directories, which correspond to publications. +All data loaders corresponding to data sets of one study are grouped into this directory. +This directory contains an `__init__.py` file which makes these data loaders visible to sfaira: + +.. code-block:: python -Contribute data loaders -~~~~~~~~~~~~~~~~~~~~~~~ + FILE_PATH = __file__ + + +Next, each data set is represented by one data loader python file in this directory. +See below for more complex set ups with repetitive data loader code. + +The data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class @@ -68,14 +82,51 @@ before it is loaded into memory: meta_path: Union[str, None] = None, **kwargs ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = x # your-species - self.id = x # "organism_organ_year_protocoll_first-author_doi" - self.download_website = x # link to raw data - self.organ = x #y ourorgan - self.sub_tissue = x # sub-tissue name, otherwise organ - self.dev_stage = x # developmental stage of organism - self.has_celltypes = x # if cell type annotation is available + super().__init__(path=path, meta_path=meta_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download = x # download website(s) of data files + self.download_meta = x # download website(s) of meta data files + + self.age = x # (*, optional) age of sample + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.protocol = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cellontology_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and + # directory as the python file of this data loader (see below). # A dictionary of dictionaries with: # One item for each annotation label that is not contained in the ontology. @@ -93,34 +144,38 @@ before it is loaded into memory: .. code-block:: python def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "human", "eye", "my_data.h5ad") defined file in streamlined directory structure self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad + # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. - self.adata.uns["lab"] = x # load the adata.uns with meta data - self.adata.uns["year"] = x - self.adata.uns["doi"] = x - self.adata.uns["protocol"] = x # e.g. 10x, microwell, seqwell... - self.adata.uns["organ"] = self.organ - self.adata.uns["subtissue"] = self.sub_tissue - self.adata.uns["animal"] = x - self.adata.uns["id"] = self.id - self.adata.uns["wget_download"] = self.download_website - self.adata.uns["has_celltypes"] = self.has_celltypes - self.adata.uns["counts"] = 'raw' - self.adata.uns["dev_stage"] = self.dev_stage - # Class expects unprocessed cell type labels in self.adata.obs["cell_ontology_class"] - self.adata.obs["cell_ontology_class"] = self.adata.obs['CellType'] - # You can additional set self.adata.obs["cell_ontology_id"] if you have streamlined ontology IDs. This are also - # defined in the cell type universe lists. - self.adata.obs["healthy"] = x # boolean tissue sample healthy or diseased / treated - self.adata.obs["state_exact"] = x # exact tissue state as string, e.g. "tumor" or "healthy" +In summary, a simply example data loader for a mouse lung data set could look like this: - self._convert_and_set_var_names(symbol_col='names', ensembl_col='ensembl', new_index='ensembl') +.. code-block:: python + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, **kwargs) + self.author = "me" + self.doi = "my preprint" + self.download = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.protocol = "smart-seq2" + self.year = "2020" + + self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here + + def _load(self, fn=None): + # assuming that i uploaded an h5ad somewhere (in self.download) + if fn is None: + fn = os.path.join(self.path, "mouse", "lung", "my.h5ad") + self.adata = anndata.read(fn) Data loaders can be added into a copy of the sfaira repository and can be used locally before they are contributed to @@ -130,43 +185,51 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. -Ontology management -------------------- +Map cell type labels to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with +the same name and directory as the python file in which the data loader is located. +This .csv contains two columns with one row for each unique cell type label and their free text identifiers in the first +column, and the corresponding ontology term in the second column. +You could write this file entirely from scratch. +Sfaira also allows you to generate a first guess of this file using fuzzy string matching via ToDo. +Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which +ontology term in the case of conflicts. +Still, this first guess usually drastically speeds up this annotation harmonization. -Sfaira maintains versioned cell type universes and ontologies by species and organ. -A cell type universe is a list of the unique, most fine-grained cell type definitions available. -These cell types can be referred to by a human readable cell type name or a structure identifier within an ontology, -an ontology ID. -Often, one is also interested in access to more coarse grained groups of cell types, for example if the data quality -does not allow to distinguish between T cell subtypes. -To allow coarser type definition, sfaira maintains hierarchies of cell types, in which each hierarchical level is again -defined by a cell type identifier. -Such a hierarchy can be writted as directed acyclic graph which has the cell type universe as its leave nodes. -Intuitively, the cell type hierarchy graph depends on the cell type universe. -Accordingly, both are versioned together in sfaira: -Updates in the cell type universe, such as discovery of a new cell type, lead to an update of the ontology and an -incrementation in both of their versions. -These versioned changes materialise as a distinct list (universe) and dictionary (ontology) for each version in the -file that harbors the species- and organ-specific class that inherits from CelltypeVersionsBase and thus are available -even after updates. -This versioning without depreceation of the old objects allows sfaira to execute and train models that were designed -for older cell type universes and thus ensures reproducibility. - -Contribute cell types to ontologies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To contibute new cell types or change existing cell type universe entries, the cell type universe version has to be -incremented and the new entry can simply be added to the list or modified in the list. -We do not increment the universe version if a change does not influence the identity of a leave node with respect to -the other types in the universe, ie if it simply changes the spelling of a cell type or if an onology ID is added to -a type that previously did not have one. - -Contribute hierarchies to ontologies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To contribute a term to a cell type ontology, one just has to add a dictionary item that defines the new term as a set -of the leave nodes (cell type universe) of the corresponding universe version. +Repetitive data loader code +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are instances in which you find yourself copying code between data loader files corresponding to one study. +In most of these cases, you can avoid the copy operations and share the code more efficiently. + +If you have multiple data files which each correspond to a data set and are structured similarly, you can define a super +class which contains the shared constructor and `_load()` code, from which each data set specific loader inherits. +ToDo: Example. + +If you have a single file which contains the data from multiple data sets which belong to a data loader each, +because of different meta data or batches for example, +you can set up a `group.py` file which defines a DatasetGroup for this study, which controls the generation of Datasets. +ToDo: Example. + +Cell type ontology management +----------------------------- + +Sfaira maintains a wrapper of the Cell Ontology as a class which allows additions to this ontology. +This allows us to use the core ontology used in the community as a backbone and to keep up with newly identifed cell types on our own. +We require all extensions of the core ontology not to break the directed acyclic graph that is the ontology: +Usually, such extensions would be additional leave nodes. + +Second, we maintain cell type universes for anatomic structures. +These are dedicated for cell type-dependent models which require a defined set of cell types. +Such a universe is a set of nodes in the ontology. + +Contribute cell types to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Please open an issue on the sfaira repo with a description what type of cell type you want to add. Using ontologies to train cell type classifiers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -176,6 +239,17 @@ cross-entropy as a loss and aggregate accuracy as a metric. The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. +Metadata management +------------------- + +We constrain meta data by ontologies where possible. The current restrictions are: + + - .organism must either mouse or human. + +Follow this issue_ for details on upcoming ontology integrations. + +.. _issue: https://github.com/theislab/sfaira/issues/16 + Genome management ----------------- diff --git a/docs/index.rst b/docs/index.rst index c699c8c1d..497452180 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,7 +17,7 @@ sfaira - data and model repository for single-cell data sfaira_ is a model and a data repository in a single python package. We provide an interactive overview of the current state of the zoos on sfaira-site_. -.. _sfaira: https://sfaira.readthedocs.io +.. _sfaira: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 .. _sfaira-site: https://theislab.github.io/sfaira-site/index.html .. include:: environment_brief.rst @@ -43,4 +43,4 @@ Latest additions models ecosystem roadmap - release-notes + changelog diff --git a/docs/release-latest.rst b/docs/release-latest.rst deleted file mode 100644 index 913476eb3..000000000 --- a/docs/release-latest.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. role:: small -.. role:: smaller - -0.2.1 :small:`2020-09-7` -~~~~~~~~~~~~~~~~~~~~~~~~ -Initial release with online documentation. diff --git a/docs/release-notes.rst b/docs/release-notes.rst deleted file mode 100644 index 7f8561271..000000000 --- a/docs/release-notes.rst +++ /dev/null @@ -1,11 +0,0 @@ -Release Notes -============= - -.. role:: small -.. role:: smaller - - -Version 0.2 ------------ - -.. include:: release-latest.rst diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..5a833b05d --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,7 @@ +sphinx +matplotlib +sphinx_rtd_theme +sphinx-autodoc-typehints +jinja2 +docutils +-r ../requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2ecab3f17..cee7a6606 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,12 +4,14 @@ jinja2 loompy numpy>=1.14.0 matplotlib +openpyxl pandas scanpy scipy seaborn sphinx -sphinx-autodoc-typehints sphinx_rtd_theme +tensorflow # TODO remove as soon as # 70 is solved tqdm requests +xlrd==1.* diff --git a/setup.cfg b/setup.cfg index 9320da5cc..2b1c2738b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,6 @@ +[bdist_wheel] +universal = 1 + [versioneer] VCS = git style = pep440 @@ -7,3 +10,15 @@ tag_prefix = [build_ext] inplace = 1 + +[flake8] +ignore=F401, W504 +exclude = docs +max-line-length = 160 + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] \ No newline at end of file diff --git a/setup.py b/setup.py index 95e4de8ce..71270b465 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,10 @@ author = 'theislab' author_email = 'david.fischer@helmholtz-muenchen.de' -description = "" +description = "sfaira is a model and a data repository for single-cell data in a single python package." with open("README.rst", "r") as fh: - long_description = fh.read() + long_description = fh.read() setup( name='sfaira', @@ -15,6 +15,15 @@ description=description, long_description=long_description, long_description_content_type="text/markdown", + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], packages=find_packages(), install_requires=[ 'anndata>=0.7', @@ -22,12 +31,12 @@ 'numpy>=1.16.4', 'pandas', 'scipy>=1.2.1', - 'tqdm' + 'tqdm', + 'tensorflow>=2.0.0' # TODO Remove and add to tensorflow profile ], extras_require={ 'tensorflow': [ - 'tensorflow>=2.0.0', - 'tensorflow-gpu>=2.0.0' + # 'tensorflow>=2.0.0' # TODO Add Tensorflow here again ], 'kipoi': [ 'kipoi', @@ -38,25 +47,16 @@ "matplotlib", "sklearn" ], - 'scanpy': [ - "scanpy" - ], - 'loompy': [ + 'data': [ + "scanpy", "loompy", + "requests", + "xlrd==1.*", + "openpyxl", ], 'extension': [ "sfaira_extension", ], - 'zenodo': [ - "requests" - ], - 'docs': [ - 'sphinx', - 'sphinx-autodoc-typehints', - 'sphinx_rtd_theme', - 'jinja2', - 'docutils', - ], }, version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 319b4e98f..feb4fbd59 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- """A Data and Model Zoo for Single-Cell Genomics.""" +import sfaira.interface as ui +import sfaira.train +import sfaira.models +import sfaira.genomes +import sfaira.data +import sfaira.consts as consts from ._version import get_versions __version__ = get_versions()['version'] @@ -17,10 +23,3 @@ "leander.dony@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de" ]) - -import sfaira.consts as consts -import sfaira.data -import sfaira.genomes -import sfaira.models -import sfaira.train -import sfaira.interface as ui diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 15d95e718..1505f6ec2 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,3 +1,3 @@ -from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED +from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA from sfaira.consts.meta_data_files import META_DATA_FIELDS diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index fdac269bc..1eee5e775 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,3 +1,6 @@ +import numpy as np +from typing import List + """ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. @@ -15,6 +18,7 @@ class ADATA_IDS_BASE: _cell_ontology_id: str _doi: str _download: str + _download_meta: str _dataset: str _dataset_group: str _gene_id_ensembl: str @@ -25,9 +29,8 @@ class ADATA_IDS_BASE: _ncells: str _normalization: str _organ: str + _organism: str _protocol: str - _species: str - _subtissue: str _year: str @property @@ -66,6 +69,10 @@ def doi(self) -> str: def download(self) -> str: return self._download + @property + def download_meta(self) -> str: + return self._download_meta + @property def gene_id_ensembl(self) -> str: return self._gene_id_ensembl @@ -98,21 +105,17 @@ def ncells(self) -> str: def normalization(self) -> str: return self._normalization - @property - def protocol(self) -> str: - return self._protocol - @property def organ(self) -> str: return self._organ @property - def species(self) -> str: - return self._species + def organism(self) -> str: # TODO refactor into organism + return self._organism @property - def subtissue(self) -> str: - return self._subtissue + def protocol(self) -> str: + return self._protocol @property def year(self) -> str: @@ -165,6 +168,7 @@ def __init__(self): self._dataset = "dataset" self._dataset_group = "dataset_group" self._download = "download" + self._download_meta = "download_meta" self._gene_id_ensembl = "ensembl" self._gene_id_index = "ensembl" self._gene_id_names = "names" @@ -173,9 +177,8 @@ def __init__(self): self._ncells = "ncells" self._normalization = "normalization" self._organ = "organ" + self._organism = "organism" self._protocol = "protocol" - self._species = "organism" - self._subtissue = "subtissue" self._year = "year" self._age = "age" @@ -184,6 +187,39 @@ def __init__(self): self._sex = "sex" self._state_exact = "state_exact" + self._load_raw = "load_raw" + self._mapped_features = "mapped_features" + self._remove_gene_version = "remove_gene_version" + + # Allowed field values: + self.age_allowed_entries = None + self.dev_stage_allowed_entries = None + self.ethnicity_allowed_entries = None + self.normalization_allowed_entries = None + self.organ_allowed_entries = None + self.organism_allowed_entries = ["mouse", "human"] + self.protocol_allowed_entries = None + self.sex_allowed_entries = None + self.subtissue_allowed_entries = None + self.year_allowed_entries = list(range(2000, 3000)) + # Free fields that are not constrained: + # _author, _download, _download_meta, _doi, _id, _state_exact + + self.unknown_celltype_name = "unknown" + self.unknown_celltype_identifiers = ["nan", "none", "unknown", np.nan, None] + + @property + def load_raw(self) -> str: + return self._load_raw + + @property + def mapped_features(self) -> str: + return self._mapped_features + + @property + def remove_gene_version(self) -> str: + return self._remove_gene_version + class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): """ @@ -192,6 +228,7 @@ class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): """ _author_names: str _disease_state_healthy: str + accepted_file_names: List[str] def __init__(self): self._cell_types_original = "free_annotation" @@ -201,6 +238,7 @@ def __init__(self): self._dataset = "dataset" self._dataset_group = "dataset_group" self._download = "" # TODO + self._download_meta = "" # never necessary as we interface via anndata objects self._gene_id_ensembl = "" # TODO self._gene_id_index = "ensembl" self._gene_id_names = "" # TODO @@ -208,11 +246,10 @@ def __init__(self): self._healthy = None # is inferred from _disease self._id = "" # TODO self._ncells = "ncells" - self._normalization = None # is always "counts" + self._normalization = "" # is always "raw" self._organ = "" # TODO + self._organism = "organism" self._protocol = "assay" - self._species = "organism" - self._subtissue = "" # TODO self._year = "" # TODO self._age = "age" @@ -226,6 +263,11 @@ def __init__(self): self._disease_state_healthy = "normal" self._author_names = "names" + # accepted file names + self.accepted_file_names = [ + "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad", + ] + @property def author_names(self) -> str: return self._author_names diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 8478aba28..7adfd59d8 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -2,16 +2,20 @@ The classes contains constants related to sfaira streamlined meta data files. """ -META_DATA_FIELDS = [ - "annotated", - "author", - "doi", - "download", - "id", - "ncells", - "normalization", - "organ", - "protocol", - "species", - "year", -] +META_DATA_FIELDS = { + "annotated": bool, + "author": str, + "cell_ontology_class": str, + "doi": str, + "download": str, + "download_meta": str, + "healthy": bool, + "id": str, + "ncells": str, + "normalization": str, + "organ": str, + "protocol": str, + "organism": str, + "state_exact": str, + "year": int, +} diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 896eb5de2..ac96ab48f 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,6 @@ -from .base import DatasetBase, DatasetGroupBase, DatasetSuperGroup -from . import mouse -from . import human +from .base import DatasetBase, DatasetBaseGroupLoading, \ + DatasetGroup, DatasetGroupDirectoryOriented, \ + DatasetSuperGroup +from . import dataloaders +from .dataloaders import DatasetSuperGroupSfaira from .interactive import DatasetInteractive diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 996dbdb94..73e071aaf 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1,81 +1,235 @@ +from __future__ import annotations + import abc import anndata import h5py +import multiprocessing import numpy as np import pandas as pd import os from os import PathLike import pandas +import pydoc import scipy.sparse -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import warnings from .external import SuperGenomeContainer from .external import ADATA_IDS_SFAIRA, META_DATA_FIELDS +UNS_STRING_META_IN_OBS = "__obs__" + + +def map_fn(inputs): + ds, formatted_version, remove_gene_version, match_to_reference, load_raw, allow_caching, func, \ + kwargs_func = inputs + try: + ds.load( + celltype_version=formatted_version, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=load_raw, + allow_caching=allow_caching, + ) + if func is not None: + x = func(ds, **kwargs_func) + ds.clear() + return x + else: + return None + except FileNotFoundError as e: + return ds.id, e, + class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict - meta: Union[None, pandas.DataFrame] - download_website_meta: Union[None, str] + _meta: Union[None, pandas.DataFrame] path: Union[None, str] + meta_path: Union[None, str] + cache_path: Union[None, str] id: Union[None, str] genome: Union[None, str] - _annotated: Union[None, bool] + _age: Union[None, str] _author: Union[None, str] + _dev_stage: Union[None, str] _doi: Union[None, str] - _download: Union[None, str] + _download: Union[Tuple[List[None]], Tuple[List[str]]] + _download_meta: Union[Tuple[List[None]], Tuple[List[str]]] + _ethnicity: Union[None, str] + _healthy: Union[None, bool] _id: Union[None, str] _ncells: Union[None, int] _normalization: Union[None, str] _organ: Union[None, str] + _organism: Union[None, str] _protocol: Union[None, str] - _species: Union[None, str] - _year: Union[None, str] + _sex: Union[None, str] + _source: Union[None, str] + _state_exact: Union[None, str] + _year: Union[None, int] + + _obs_key_age: Union[None, str] + _obs_key_cellontology_id: Union[None, str] + _obs_key_cellontology_original: Union[None, str] + _obs_key_dev_stage: Union[None, str] + _obs_key_ethnicity: Union[None, str] + _obs_key_healthy: Union[None, str] + _obs_key_healthy: Union[None, str] + _obs_key_organ: Union[None, str] + _obs_key_organism: Union[None, str] + _obs_key_protocol: Union[None, str] + _obs_key_sex: Union[None, str] + _obs_key_state_exact: Union[None, str] + + _healthy_state_healthy: Union[None, str] + + _var_symbol_col: Union[None, str] + _var_ensembl_col: Union[None, str] def __init__( self, path: Union[str, None] = None, meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, **kwargs ): + self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + self._META_DATA_FIELDS = META_DATA_FIELDS + self.adata = None - self.download_website_meta = None self.meta = None self.genome = None self.path = path self.meta_path = meta_path - self._load_raw = None + self.cache_path = cache_path - self._annotated = None + self._age = None self._author = None + self._dev_stage = None self._doi = None self._download = None + self._download_meta = None + self._ethnicity = None + self._healthy = None self._id = None self._ncells = None self._normalization = None self._organ = None + self._organism = None self._protocol = None - self._species = None + self._sex = None + self._source = None + self._state_exact = None self._year = None - self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - self._META_DATA_FIELDS = META_DATA_FIELDS + self._obs_key_age = None + self._obs_key_cellontology_id = None + self._obs_key_cellontology_original = None + self._obs_key_dev_stage = None + self._obs_key_ethnicity = None + self._obs_key_healthy = None + self._obs_key_organ = None + self._obs_key_organism = None + self._obs_key_protocol = None + self._obs_key_sex = None + self._obs_key_state_exact = None + + self._healthy_state_healthy = None + + self._var_symbol_col = None + self._var_ensembl_col = None + + self.class_maps = {"0": {}} + self._unknown_celltype_identifiers = self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers @abc.abstractmethod def _load(self, fn): pass + def _download(self, fn): + pass + + @property + def _directory_formatted_doi(self) -> str: + return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) + + @property + def _directory_formatted_id(self) -> str: + return "_".join("_".join(self.id.split("/")).split(".")) + + def clear(self): + """ + Remove loaded .adata to reduce memory footprint. + + :return: + """ + import gc + self.adata = None + gc.collect() + + def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None) -> bool: + """ + Only relevant for DatasetBaseGroupLoading but has to be a method of this class + because it is used in DatasetGroup. + + :param fn: + :param adata_group: + :return: Whether group loading is used. + """ + return False + + def _load_cached( + self, + fn: str, + load_raw: bool, + allow_caching: bool, + ): + """ + Wraps data set specific load and allows for caching. + + Cache is written into director named after doi and h5ad named after data set id. + + :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. + :return: + """ + if fn is None and self.path is None: + raise ValueError("provide either fn in load or path in constructor") + + assert self.cache_path is not None, "set self.cache_path first" + assert self._directory_formatted_doi is not None, "set self.doi first" + assert self._directory_formatted_id is not None, "set self.id first" + fn_cache = os.path.join( + self.cache_path, + self._directory_formatted_doi, + self._directory_formatted_id + ".h5ad" + ) + # Check if raw loader has to be called: + if load_raw or not os.path.exists(fn_cache): + self._load(fn=fn) + else: + assert self.cache_path is not None, "set cache_path to use caching" + assert os.path.exists(fn_cache), f"did not find cache file {fn_cache}, consider caching first" + self.adata = anndata.read_h5ad(fn_cache) + # Check if file needs to be cached: + if allow_caching and not os.path.exists(fn_cache): + assert self.cache_path is not None, "set cache_path to use caching" + dir_cache = os.path.dirname(fn_cache) + if not os.path.exists(dir_cache): + os.makedirs(dir_cache) + self.adata.write_h5ad(fn_cache) + def load( self, celltype_version: Union[str, None] = None, fn: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, ): """ @@ -85,50 +239,124 @@ def load( data sets are superimposed. :param match_to_reference: Reference genomes name. :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. :return: """ - self._load_raw = load_raw - if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" "while not removing gene versions. this can lead to very poor matching performance") - # set default genomes if none provided + # Set default genomes per organism if none provided: if match_to_reference: genome = match_to_reference - self._set_genome(genome=genome) - elif self.species == "human": + elif self.organism == "human": genome = "Homo_sapiens_GRCh38_97" warnings.warn(f"using default genomes {genome}") - self._set_genome(genome=genome) - elif self.species == "mouse": + elif self.organism == "mouse": genome = "Mus_musculus_GRCm38_97" warnings.warn(f"using default genomes {genome}") - self._set_genome(genome=genome) + else: + raise ValueError(f"genome was not supplied and organism {self.organism} " + f"was not matched to a default choice") + self._set_genome(genome=genome) + + # Run data set-specific loading script: + self._load_cached(fn=fn, load_raw=load_raw, allow_caching=allow_caching) + # Set data-specific meta data in .adata: + self._set_metadata_in_adata(celltype_version=celltype_version) + # Set loading hyper-parameter-specific meta data: + self.adata.uns[self._ADATA_IDS_SFAIRA.load_raw] = load_raw + self.adata.uns[self._ADATA_IDS_SFAIRA.mapped_features] = match_to_reference + self.adata.uns[self._ADATA_IDS_SFAIRA.remove_gene_version] = remove_gene_version + # Streamline feature space: + self._convert_and_set_var_names() + self._collapse_gene_versions(remove_gene_version=remove_gene_version) + self._match_features_to_reference(match_to_reference=match_to_reference) + + def _convert_and_set_var_names( + self, + symbol_col: str = None, + ensembl_col: str = None, + ): + # Use defaults defined in data loader if none given to this function. + if symbol_col is None: + symbol_col = self.var_symbol_col + if ensembl_col is None: + ensembl_col = self.var_ensembl_col + if not ensembl_col and not symbol_col: + raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' + 'the name of the var column containing gene symbols') + # Process given gene names: Full gene names ("symbol") or ENSEMBL IDs ("ensembl"). + # Below the .var column that contain the target IDs are renamed to follow streamlined naming. + # If the IDs were contained in the index, a new column is added to .var. + if symbol_col: + if symbol_col == 'index': + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = self.adata.var.index.values.tolist() + else: + assert symbol_col in self.adata.var.columns, f"symbol_col {symbol_col} not found in .var" + self.adata.var = self.adata.var.rename( + {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, + axis='columns' + ) + if ensembl_col: + if ensembl_col == 'index': + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = self.adata.var.index.values.tolist() + else: + assert ensembl_col in self.adata.var.columns, f"ensembl_col {ensembl_col} not found in .var" + self.adata.var = self.adata.var.rename( + {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, + axis='columns' + ) + # If only symbol or ensembl was supplied, the other one is inferred ia a genome mapping dictionary. + if not ensembl_col: + id_dict = self.genome_container.names_to_id_dict + id_strip_dict = self.genome_container.strippednames_to_id_dict + # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, + # match it straight away, if it is not in there we try to match everything in front of the first period in + # the gene name with a dictionary that was modified in the same way, if there is still no match we append na + ensids = [] + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: + if n in id_dict.keys(): + ensids.append(id_dict[n]) + elif n.split(".")[0] in id_strip_dict.keys(): + ensids.append(id_strip_dict[n.split(".")[0]]) + else: + ensids.append('n/a') + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids - self._load(fn=fn) + if not symbol_col: + id_dict = self.genome_container.id_to_names_dict + self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ + id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' + for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] + ] - if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None + # Lastly, the index of .var is set to ensembl IDs. + try: # debugging + self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() + except KeyError as e: + raise KeyError(e) - # Map cell type names from raw IDs to ontology maintained ones:: - if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, - celltype_version=celltype_version - ) + self.adata.var_names_make_unique() - # Remove version tag on ensembl gene ID so that different versions are superimposed downstream: + def _collapse_gene_versions(self, remove_gene_version): + """ + Remove version tag on ensembl gene ID so that different versions are superimposed downstream. + + :param remove_gene_version: + :return: + """ if remove_gene_version: new_index = [x.split(".")[0] for x in self.adata.var_names.tolist()] # Collapse if necessary: new_index_collapsed = list(np.unique(new_index)) if len(new_index_collapsed) < self.adata.n_vars: - raise ValueError("duplicate features detected after removing gene versions." - "the code to collapse these features is implemented but not tested.") + print("WARNING: duplicate features detected after removing gene versions." + "the code to collapse these features is implemented but not tested.") idx_map = np.array([new_index_collapsed.index(x) for x in new_index]) # Need reverse sorting to find index of last element in sorted list to split array using list index(). - idx_map_sorted_rev = np.argsort(idx_map)[::-1] + idx_map_sorted_fwd = np.argsort(idx_map) + idx_map_sorted_rev = idx_map_sorted_fwd[::-1].tolist() n_genes = len(idx_map_sorted_rev) # 1. Sort array in non-reversed order: idx_map_sorted_rev[::-1] # 2. Split into chunks based on blocks of identical entries in idx_map, using the occurrence of the @@ -138,10 +366,10 @@ def load( counts = np.concatenate([ np.sum(x, axis=1, keepdims=True) for x in np.split( - self.adata[:, idx_map_sorted_rev[::-1]].X, # forward ordered data + self.adata[:, idx_map_sorted_fwd].X, # forward ordered data indices_or_sections=[ n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order - for x in np.arange(0, len(new_index_collapsed)-1) # -1: do not need end of last partition + for x in np.arange(0, len(new_index_collapsed) - 1) # -1: do not need end of last partition ], axis=1 ) @@ -161,7 +389,13 @@ def load( self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values - # Match feature space to a genomes provided with sfaira + def _match_features_to_reference(self, match_to_reference): + """ + Match feature space to a genomes provided with sfaira + + :param match_to_reference: + :return: + """ if match_to_reference: # Convert data matrix to csc matrix if isinstance(self.adata.X, np.ndarray): @@ -198,100 +432,80 @@ def load( x_new = x_new.tocsr() self.adata = anndata.AnnData( - X=x_new, - obs=self.adata.obs, - obsm=self.adata.obsm, - var=pd.DataFrame(data={'names': self.genome_container.names, - self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, - index=self.genome_container.ensembl), - uns=self.adata.uns + X=x_new, + obs=self.adata.obs, + obsm=self.adata.obsm, + var=pd.DataFrame(data={'names': self.genome_container.names, + self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, + index=self.genome_container.ensembl), + uns=self.adata.uns ) - self.adata.uns['mapped_features'] = match_to_reference - - def _convert_and_set_var_names( - self, - symbol_col: str = None, - ensembl_col: str = None, - ): - if symbol_col and ensembl_col: - if symbol_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - - if ensembl_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) - - elif symbol_col: - id_dict = self.genome_container.names_to_id_dict - id_strip_dict = self.genome_container.strippednames_to_id_dict - if symbol_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - else: - self.adata.var = self.adata.var.rename( - {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) + def _set_metadata_in_adata(self, celltype_version): + """ + Copy meta data from dataset class in .anndata. - # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, - # match it straight away, if it is not in there we try to match everything in front of the first period in - # the gene name with a dictionary that was modified in the same way, if there is still no match we append na - ensids = [] - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: - if n in id_dict.keys(): - ensids.append(id_dict[n]) - elif n.split(".")[0] in id_strip_dict.keys(): - ensids.append(id_strip_dict[n.split(".")[0]]) + :param celltype_version: + :return: + """ + # Set data set-wide attributes (.uns): + self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated + self.adata.uns[self._ADATA_IDS_SFAIRA.author] = self.author + self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = self.doi + self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download + self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta] = self.download_meta + self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id + self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = self.normalization + self.adata.uns[self._ADATA_IDS_SFAIRA.year] = self.year + + # Set cell-wise or data set-wide attributes (.uns / .obs): + # These are saved in .uns if they are data set wide to save memory. + for x, y, z in ( + [self.age, self._ADATA_IDS_SFAIRA.age, self.obs_key_age], + [self.dev_stage, self._ADATA_IDS_SFAIRA.dev_stage, self.obs_key_dev_stage], + [self.ethnicity, self._ADATA_IDS_SFAIRA.ethnicity, self.obs_key_ethnicity], + [self.healthy, self._ADATA_IDS_SFAIRA.healthy, self.obs_key_healthy], + [self.organ, self._ADATA_IDS_SFAIRA.organ, self.obs_key_organ], + [self.protocol, self._ADATA_IDS_SFAIRA.protocol, self.obs_key_protocol], + [self.sex, self._ADATA_IDS_SFAIRA.sex, self.obs_key_sex], + [self.organism, self._ADATA_IDS_SFAIRA.organism, self.obs_key_organism], + [self.state_exact, self._ADATA_IDS_SFAIRA.state_exact, self.obs_key_state_exact], + ): + if x is None and z is None: + self.adata.uns[y] = None + elif x is not None and z is not None: + raise ValueError(f"attribute {y} of data set {self.id} was set both for full data set and per cell, " + f"only set one of the two or neither.") + elif x is not None and z is None: + # Attribute supplied per data set: Write into .uns. + self.adata.uns[y] = x + elif x is None and z is not None: + # Attribute supplied per cell: Write into .obs. + # Search for direct match of the sought-after column name or for attribute specific obs key. + if z not in self.adata.obs.keys(): + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + self.adata.uns[y] = None + print(f"WARNING: attribute {y} of data set {self.id} was not found in column {z}") # debugging else: - ensids.append('n/a') - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids - - elif ensembl_col: - id_dict = self.genome_container.id_to_names_dict - if ensembl_col == 'index': - self.adata.var.index.name = 'index' - self.adata.var = self.adata.var.reset_index().rename( - {'index': self._ADATA_IDS_SFAIRA.gene_id_ensembl}, - axis='columns' - ) + # Include flag in .uns that this attribute is in .obs: + self.adata.uns[y] = UNS_STRING_META_IN_OBS + # Remove potential pd.Categorical formatting: + self.adata.obs[y] = self.adata.obs[z].values.tolist() else: - self.adata.var = self.adata.var.rename( - {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_names}, - axis='columns' - ) - - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ - id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] - ] - - else: - raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' - 'the name of the var column containing gene symbols') - - self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() - self.adata.var_names_make_unique() + assert False, "switch option should not occur" + # Set cell-wise attributes (.obs): + # None so far other than celltypes. + # Set cell types: + if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None + # Map cell type names from raw IDs to ontology maintained ones:: + if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( + raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, + celltype_version=celltype_version + ) def subset_organs(self, subset: Union[None, List]): if self.organ == "mixed": @@ -302,8 +516,16 @@ def subset_organs(self, subset: Union[None, List]): warnings.warn("You are trying to subset organs after loading the dataset." "This will have no effect unless the dataset is loaded again.") - def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndarray, fn: Union[None, str] = None, - celltype_version: Union[str, None] = None): + def load_tobacked( + self, + adata_backed: anndata.AnnData, + genome: str, + idx: np.ndarray, + fn: Union[None, str] = None, + celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True + ): """ Loads data set into slice of backed anndata object. @@ -317,13 +539,17 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar :param keys: :param fn: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param load_raw: See .load(). + :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ self.load( fn=fn, celltype_version=celltype_version, remove_gene_version=True, - match_to_reference=genome + match_to_reference=genome, + load_raw=load_raw, + allow_caching=allow_caching ) # Check if writing to sparse or dense matrix: if isinstance(adata_backed.X, np.ndarray) or \ @@ -346,7 +572,7 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar adata_backed.obs.loc[np.sort(idx), k] = [self.adata.uns[k] for i in range(len(idx))] else: # Need to fill this instead of throwing an exception as this condition can trigger for one element - # within a loop over multiple data sets (ie in data set groups). + # within a loop over multiple data sets (ie in data set human). adata_backed.obs.loc[idx, k] = ["key_not_found" for i in range(len(idx))] elif isinstance(adata_backed.X, anndata._core.sparse_dataset.SparseDataset): # backed sparse # cannot scatter update on backed sparse yet! assert that updated block is meant to be appended: @@ -361,40 +587,36 @@ def load_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: np.ndar pandas.DataFrame(dict([ (k, [self.id for i in range(len(idx))]) if k == self._ADATA_IDS_SFAIRA.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns - else (k, [self.adata.uns[k] for i in range(len(idx))]) if k in list(self.adata.uns.keys()) - else (k, ["key_not_found" for i in range(len(idx))]) + else (k, [self.adata.uns[k] for _ in range(len(idx))]) if k in list(self.adata.uns.keys()) + else (k, ["key_not_found" for _ in range(len(idx))]) for k in adata_backed.obs.columns ])) ) + self.clear() else: - raise ValueError(f"Did not reccognize backed AnnData.X format {type(adata_backed.X)}") + raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def set_unkown_class_id(self, ids: list): + def set_unkown_class_id(self, ids: List[str]): """ - Sets list of custom identifiers of unknown cell types in adata.obs["cell_ontology_class"] to the target one. + Sets list of custom identifiers of unknown cell types data annotation. - :param ids: IDs in adata.obs["cell_ontology_class"] to replace. + :param ids: IDs in cell type name column to replace by "unknown identifier. :return: """ - target_id = "unknown" - ontology_classes = [ - x if x not in ids else target_id - for x in self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].tolist() - ] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ontology_classes + self._unknown_celltype_identifiers.extend( + [x for x in ids if x not in self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers] + ) - def _set_genome(self, - genome: str - ): + def _set_genome(self, genome: str): if genome.lower().startswith("homo_sapiens"): g = SuperGenomeContainer( - species="human", + organism="human", genome=genome ) elif genome.lower().startswith("mus_musculus"): g = SuperGenomeContainer( - species="mouse", + organism="mouse", genome=genome ) else: @@ -436,7 +658,6 @@ def map_ontology_class( """ :param raw_ids: - :param class_maps: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :return: """ @@ -444,7 +665,8 @@ def map_ontology_class( celltype_version = self.set_default_type_version() self.assert_celltype_version_key(celltype_version=celltype_version) return [ - self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() else x + self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() + else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x for x in raw_ids ] @@ -467,7 +689,7 @@ def meta_fn(self): else: return os.path.join(self.meta_path, self.doi_cleaned_id + "_meta.csv") - def load_meta(self, fn: Union[PathLike, str]): + def load_meta(self, fn: Union[PathLike, str, None]): if fn is None: if self.meta_fn is None: raise ValueError("provide either fn in load or path in constructor") @@ -475,49 +697,130 @@ def load_meta(self, fn: Union[PathLike, str]): else: if isinstance(fn, str): fn = os.path.normpath(fn) - self.meta = pandas.read_csv(fn, usecols=self.META_DATA_FIELDS) + # Only load meta data if file exists: + if os.path.isfile(fn): + meta = pandas.read_csv( + fn, usecols=list(self._META_DATA_FIELDS.keys()), dtype=str, + ) + # Formatting: All are read as string to allow dealing wth None entries: + # Make sure bool entries are bool: + for k, v in self._META_DATA_FIELDS.items(): + if v == bool: + meta[k] = [ + True if x == "True" else + False if x == "False" else None + for x in meta[k].values.tolist() + ] + else: + # Make sure None entries are formatted as None and not as string "None": + meta[k] = [None if x == "None" else x for x in meta[k].values.tolist()] + self.meta = meta def write_meta( self, fn_meta: Union[None, str] = None, - fn_data: Union[None, str] = None, dir_out: Union[None, str] = None, + fn_data: Union[None, str] = None, ): - if fn_meta is None: - if self.path is None and dir_out is None: - raise ValueError("provide either fn in load or path in constructor") - if dir_out is None: - dir_out = self.meta_path + """ + Write meta data object for data set. + + Does not cache data and attempts to load raw data. + + :param fn_meta: File to write to, selects automatically based on self.meta_path and self.id otherwise. + :param dir_out: Path to write to, file name is selected automatically based on self.id. + :param fn_data: See .load() + :return: + """ + if fn_meta is not None and dir_out is not None: + raise ValueError("supply either fn_meta or dir_out but not both") + elif fn_meta is None and dir_out is None: + if self.meta_fn is None: + raise ValueError("provide either fn in load or via constructor (meta_path)") fn_meta = self.meta_fn + elif fn_meta is None and dir_out is not None: + fn_meta = os.path.join(dir_out, self.doi_cleaned_id + "_meta.csv") + elif fn_meta is not None and dir_out is None: + pass # fn_meta is used + else: + assert False, "bug in switch" + if self.adata is None: - self.load(fn=fn_data, remove_gene_version=False, match_to_reference=None) + self.load( + fn=fn_data, + remove_gene_version=False, + match_to_reference=None, + load_raw=True, + allow_caching=False, + ) + # Add data-set wise meta data into table: meta = pandas.DataFrame({ self._ADATA_IDS_SFAIRA.annotated: self.adata.uns[self._ADATA_IDS_SFAIRA.annotated], self._ADATA_IDS_SFAIRA.author: self.adata.uns[self._ADATA_IDS_SFAIRA.author], self._ADATA_IDS_SFAIRA.doi: self.adata.uns[self._ADATA_IDS_SFAIRA.doi], self._ADATA_IDS_SFAIRA.download: self.adata.uns[self._ADATA_IDS_SFAIRA.download], + self._ADATA_IDS_SFAIRA.download_meta: self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta], self._ADATA_IDS_SFAIRA.id: self.adata.uns[self._ADATA_IDS_SFAIRA.id], self._ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, - self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] if self._ADATA_IDS_SFAIRA.normalization in self.adata.uns.keys() else None, - self._ADATA_IDS_SFAIRA.organ: self.adata.uns[self._ADATA_IDS_SFAIRA.organ], - self._ADATA_IDS_SFAIRA.protocol: self.adata.uns[self._ADATA_IDS_SFAIRA.protocol], - self._ADATA_IDS_SFAIRA.species: self.adata.uns[self._ADATA_IDS_SFAIRA.species], + self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization], self._ADATA_IDS_SFAIRA.year: self.adata.uns[self._ADATA_IDS_SFAIRA.year], }, index=range(1)) + # Expand table by variably cell-wise or data set-wise meta data: + for x in [ + self._ADATA_IDS_SFAIRA.age, + self._ADATA_IDS_SFAIRA.dev_stage, + self._ADATA_IDS_SFAIRA.ethnicity, + self._ADATA_IDS_SFAIRA.healthy, + self._ADATA_IDS_SFAIRA.organ, + self._ADATA_IDS_SFAIRA.protocol, + self._ADATA_IDS_SFAIRA.sex, + self._ADATA_IDS_SFAIRA.organism, + self._ADATA_IDS_SFAIRA.state_exact, + ]: + if self.adata.uns[x] == UNS_STRING_META_IN_OBS: + meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) + else: + meta[x] = self.adata.uns[x] + # Add cell types into table if available: + if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.keys(): + meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = str(( + np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values)), + )) + else: + meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = " " meta.to_csv(fn_meta) + # Properties: + @property - def annotated(self) -> bool: - if self._annotated is not None: - return self._annotated + def age(self) -> Union[None, str]: + if self._age is not None: + return self._age else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.annotated] + if self.meta is not None and self._ADATA_IDS_SFAIRA.age in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.age] + else: + return None - @annotated.setter - def annotated(self, x: bool): - self._annotated = x + @age.setter + def age(self, x: str): + self.__erasing_protection(attr="age", val_old=self._age, val_new=x) + self.__value_protection(attr="age", allowed=self._ADATA_IDS_SFAIRA.age_allowed_entries, attempted=x) + self._age = x + + @property + def annotated(self) -> bool: + if self.obs_key_cellontology_id is not None or self.obs_key_cellontology_original is not None: + return True + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.annotated in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.annotated] + else: + return None @property def author(self) -> str: @@ -526,12 +829,33 @@ def author(self) -> str: else: if self.meta is None: self.load_meta(fn=None) + if self.meta is None or self._ADATA_IDS_SFAIRA.author not in self.meta.columns: + raise ValueError("author must be set but was neither set in constructor nor in meta data") return self.meta[self._ADATA_IDS_SFAIRA.author] @author.setter def author(self, x: str): + self.__erasing_protection(attr="author", val_old=self._author, val_new=x) self._author = x + @property + def dev_stage(self) -> Union[None, str]: + if self._dev_stage is not None: + return self._dev_stage + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.dev_stage in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.dev_stage] + else: + return None + + @dev_stage.setter + def dev_stage(self, x: str): + self.__erasing_protection(attr="dev_stage", val_old=self._dev_stage, val_new=x) + self.__value_protection(attr="dev_stage", allowed=self._ADATA_IDS_SFAIRA.dev_stage_allowed_entries, attempted=x) + self._dev_stage = x + @property def doi(self) -> str: if self._doi is not None: @@ -539,24 +863,121 @@ def doi(self) -> str: else: if self.meta is None: self.load_meta(fn=None) + if self.meta is None or self._ADATA_IDS_SFAIRA.healthy not in self.meta.columns: + raise ValueError("doi must be set but was neither set in constructor nor in meta data") return self.meta[self._ADATA_IDS_SFAIRA.doi] @doi.setter def doi(self, x: str): + self.__erasing_protection(attr="doi", val_old=self._doi, val_new=x) self._doi = x @property - def download(self) -> str: + def download(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + """ + Data download website(s). + + Save as tuple with single element, which is a list of all download websites relevant to dataset. + :return: + """ if self._download is not None: - return self._download + x = self._download else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.download] + x = self.meta[self._ADATA_IDS_SFAIRA.download] + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + return x @download.setter - def download(self, x: str): - self._download = x + def download(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download", val_old=self._download, val_new=x) + # Formats to tuple with single element, which is a list of all download websites relevant to dataset, + # which can be used as a single element column in a pandas data frame. + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + self._download = (x,) + + @property + def download_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + """ + Meta data download website(s). + + Save as tuple with single element, which is a list of all download websites relevant to dataset. + :return: + """ + x = self._download_meta + # if self._download_meta is not None: # TODO add this back in once download_meta is routineyl set in datasets + # x = self._download_meta + # else: + # if self.meta is None: + # self.load_meta(fn=None) + # x = self.meta[self._ADATA_IDS_SFAIRA.download_meta] + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + return x + + @download_meta.setter + def download_meta(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download_meta", val_old=self._download_meta, val_new=x) + # Formats to tuple with single element, which is a list of all download websites relevant to dataset, + # which can be used as a single element column in a pandas data frame. + if isinstance(x, str) or x is None: + x = [x] + if isinstance(x, list): + x = (x,) + self._download_meta = (x,) + + @property + def ethnicity(self) -> Union[None, str]: + if self._ethnicity is not None: + return self._ethnicity + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.ethnicity in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.ethnicity] + else: + return None + + @ethnicity.setter + def ethnicity(self, x: str): + self.__erasing_protection(attr="ethnicity", val_old=self._ethnicity, val_new=x) + self.__value_protection(attr="ethnicity", allowed=self._ADATA_IDS_SFAIRA.ethnicity_allowed_entries, attempted=x) + self._ethnicity = x + + @property + def healthy(self) -> Union[None, bool]: + if self._healthy is not None: + return self._healthy + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.healthy in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.healthy] + else: + return None + + @healthy.setter + def healthy(self, x: bool): + self.__erasing_protection(attr="healthy", val_old=self._healthy, val_new=x) + self._healthy = x + + @property + def healthy_state_healthy(self) -> str: + return self._healthy_state_healthy + + @healthy_state_healthy.setter + def healthy_state_healthy(self, x: str): + self.__erasing_protection(attr="healthy_state_healthy", val_old=self._healthy_state_healthy, val_new=x) + self._healthy_state_healthy = x @property def id(self) -> str: @@ -569,8 +990,29 @@ def id(self) -> str: @id.setter def id(self, x: str): + self.__erasing_protection(attr="id", val_old=self._id, val_new=x) self._id = x + @property + def meta(self) -> Union[None, pd.DataFrame]: + return self._meta + + @meta.setter + def meta(self, x: Union[None, pd.DataFrame]): + # Make sure formatting is correct: + if x is not None: + for k, v in x.items(): + v = v.tolist() # avoid numpy data types + if k not in self._META_DATA_FIELDS.keys(): + raise ValueError(f"did not find {k} in format look up table") + else: + if x[k] is not None: # None is always allowed. + if not isinstance(v[0], self._META_DATA_FIELDS[k]): + raise ValueError(f"key {k} of signature {str(v[0])} " + f"in meta data table did not match signature " + f"{str(self._META_DATA_FIELDS[k])}") + self._meta = x + @property def ncells(self) -> int: if self.adata is not None: @@ -584,78 +1026,369 @@ def ncells(self) -> int: return int(x) @property - def normalization(self) -> str: + def normalization(self) -> Union[None, str]: if self._normalization is not None: return self._normalization else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.normalization] + if self.meta is not None and self._ADATA_IDS_SFAIRA.normalization in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.normalization] + else: + return None @normalization.setter def normalization(self, x: str): + self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) + self.__value_protection(attr="normalization", allowed=self._ADATA_IDS_SFAIRA.normalization_allowed_entries, + attempted=x) self._normalization = x @property - def organ(self) -> str: + def obs_key_age(self) -> str: + return self._obs_key_age + + @obs_key_age.setter + def obs_key_age(self, x: str): + self.__erasing_protection(attr="obs_key_age", val_old=self._obs_key_age, val_new=x) + self._obs_key_age = x + + @property + def obs_key_cellontology_id(self) -> str: + return self._obs_key_cellontology_id + + @obs_key_cellontology_id.setter + def obs_key_cellontology_id(self, x: str): + self.__erasing_protection(attr="obs_key_cellontology_id", val_old=self._obs_key_cellontology_id, val_new=x) + self._obs_key_cellontology_id = x + + @property + def obs_key_cellontology_original(self) -> str: + return self._obs_key_cellontology_original + + @obs_key_cellontology_original.setter + def obs_key_cellontology_original(self, x: str): + self.__erasing_protection(attr="obs_key_cellontology_original", val_old=self._obs_key_cellontology_original, + val_new=x) + self._obs_key_cellontology_original = x + + @property + def obs_key_dev_stage(self) -> str: + return self._obs_key_dev_stage + + @obs_key_dev_stage.setter + def obs_key_dev_stage(self, x: str): + self.__erasing_protection(attr="obs_key_dev_stage", val_old=self._obs_key_dev_stage, val_new=x) + self._obs_key_dev_stage = x + + @property + def obs_key_ethnicity(self) -> str: + return self._obs_key_ethnicity + + @obs_key_ethnicity.setter + def obs_key_ethnicity(self, x: str): + self.__erasing_protection(attr="obs_key_ethnicity", val_old=self._obs_key_ethnicity, val_new=x) + self._obs_key_ethnicity = x + + @property + def obs_key_healthy(self) -> str: + return self._obs_key_healthy + + @obs_key_healthy.setter + def obs_key_healthy(self, x: str): + self.__erasing_protection(attr="obs_key_healthy", val_old=self._obs_key_healthy, val_new=x) + self._obs_key_healthy = x + + @property + def obs_key_organ(self) -> str: + return self._obs_key_organ + + @obs_key_organ.setter + def obs_key_organ(self, x: str): + self.__erasing_protection(attr="obs_key_organ", val_old=self._obs_key_organ, val_new=x) + self._obs_key_organ = x + + @property + def obs_key_organism(self) -> str: + return self._obs_key_organism + + @obs_key_organism.setter + def obs_key_organism(self, x: str): + self.__erasing_protection(attr="obs_key_organism", val_old=self._obs_key_organism, val_new=x) + self._obs_key_organism = x + + @property + def obs_key_protocol(self) -> str: + return self._obs_key_protocol + + @obs_key_protocol.setter + def obs_key_protocol(self, x: str): + self.__erasing_protection(attr="obs_key_protocol", val_old=self._obs_key_protocol, val_new=x) + self._obs_key_protocol = x + + @property + def obs_key_sex(self) -> str: + return self._obs_key_sex + + @obs_key_sex.setter + def obs_key_sex(self, x: str): + self.__erasing_protection(attr="obs_key_sex", val_old=self._obs_key_sex, val_new=x) + self._obs_key_sex = x + + @property + def obs_key_state_exact(self) -> str: + return self._obs_key_state_exact + + @obs_key_state_exact.setter + def obs_key_state_exact(self, x: str): + self.__erasing_protection(attr="obs_key_state_exact", val_old=self._obs_key_state_exact, val_new=x) + self._obs_key_state_exact = x + + @property + def organ(self) -> Union[None, str]: if self._organ is not None: return self._organ else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.organ] + if self.meta is not None and self._ADATA_IDS_SFAIRA.organ in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.organ] + else: + return None @organ.setter def organ(self, x: str): + self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) + self.__value_protection(attr="organ", allowed=self._ADATA_IDS_SFAIRA.organ_allowed_entries, attempted=x) self._organ = x @property - def protocol(self) -> str: + def organism(self) -> Union[None, str]: + if self._organism is not None: + return self._organism + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.organism in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.organism] + else: + return None + + @organism.setter + def organism(self, x: str): + self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) + self.__value_protection(attr="organism", allowed=self._ADATA_IDS_SFAIRA.organism_allowed_entries, attempted=x) + self._organism = x + + @property + def protocol(self) -> Union[None, str]: if self._protocol is not None: return self._protocol else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.protocol] + if self.meta is not None and self._ADATA_IDS_SFAIRA.protocol in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.protocol] + else: + return None @protocol.setter def protocol(self, x: str): + self.__erasing_protection(attr="protocol", val_old=self._protocol, val_new=x) + self.__value_protection(attr="protocol", allowed=self._ADATA_IDS_SFAIRA.protocol_allowed_entries, attempted=x) self._protocol = x @property - def species(self) -> str: - if self._species is not None: - return self._species + def sex(self) -> Union[None, str]: + if self._sex is not None: + return self._sex + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._ADATA_IDS_SFAIRA.sex in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.sex] + else: + return None + + @sex.setter + def sex(self, x: str): + self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) + self.__value_protection(attr="sex", allowed=self._ADATA_IDS_SFAIRA.sex_allowed_entries, attempted=x) + self._sex = x + + @property + def source(self) -> str: + return self._source + + @source.setter + def source(self, x: Union[str, None]): + self.__erasing_protection(attr="source", val_old=self._source, val_new=x) + self._source = x + + @property + def state_exact(self) -> Union[None, str]: + if self._state_exact is not None: + return self._state_exact else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.species] + if self.meta is not None and self._ADATA_IDS_SFAIRA.state_exact in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.state_exact] + else: + return None + + @state_exact.setter + def state_exact(self, x: str): + self.__erasing_protection(attr="state_exact", val_old=self._state_exact, val_new=x) + self._state_exact = x + + @property + def var_ensembl_col(self) -> str: + return self._var_ensembl_col + + @var_ensembl_col.setter + def var_ensembl_col(self, x: str): + self.__erasing_protection(attr="var_ensembl_col", val_old=self._var_ensembl_col, val_new=x) + self._var_ensembl_col = x + + @property + def var_symbol_col(self) -> str: + return self._var_symbol_col - @species.setter - def species(self, x: str): - self._species = x + @var_symbol_col.setter + def var_symbol_col(self, x: str): + self.__erasing_protection(attr="var_symbol_col", val_old=self._var_symbol_col, val_new=x) + self._var_symbol_col = x @property - def year(self) -> str: + def year(self) -> Union[None, int]: if self._year is not None: return self._year else: if self.meta is None: self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.year] + if self.meta is not None and self._ADATA_IDS_SFAIRA.year in self.meta.columns: + return self.meta[self._ADATA_IDS_SFAIRA.year] + else: + return None @year.setter - def year(self, x: str): + def year(self, x: int): + self.__erasing_protection(attr="year", val_old=self._year, val_new=x) + self.__value_protection(attr="year", allowed=self._ADATA_IDS_SFAIRA.year_allowed_entries, attempted=x) self._year = x + # Private methods: + + def __erasing_protection(self, attr, val_old, val_new): + """ + This is called when a erasing protected attribute is set to check whether it was set before. + + :param attr: Attribute to be set. + :param val_old: Old value for attribute to be set. + :param val_new: New value for attribute to be set. + """ + if val_old is not None: + raise ValueError(f"attempted to set erasing protected attribute {attr}: " + f"previously was {str(val_old)}, attempted to set {str(val_new)}") + + def __value_protection(self, attr, allowed, attempted): + """ + Check whether value is from set of allowed values. + + Does not check if allowed is None. + + :param attr: + :param allowed: + :param attempted: + :return: + """ + if allowed is not None: + if not isinstance(attempted, list) and not isinstance(attempted, tuple): + attempted = [attempted] + for x in attempted: + if x not in allowed: + raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + + +class DatasetBaseGroupLoading(DatasetBase): + """ + Container class specific to datasets which come in groups and require specialised loading. + """ + _unprocessed_full_group_object: bool + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self._unprocessed_full_group_object = False + + @abc.abstractmethod + def _load_full_group_object(self, fn=None) -> Union[None, anndata.AnnData]: + """ + Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. + + Override this method in the Dataset if this is relevant. + :return: adata_group + """ + pass + + def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None): + if self.adata is None and adata_group is not None: + self.adata = adata_group + elif self.adata is None and adata_group is not None: + self.adata = self._load_full_group_object(fn=fn) + elif self.adata is not None and self._unprocessed_full_group_object: + pass + else: + assert False, "switch error" + self._unprocessed_full_group_object = True + return True + + def _load_from_group(self): + """ + Sets .adata based on a raw anndata object that correponds to a superset of the data belonging to this Dataset, + including subsetting. + + Override this method in the Dataset if this is relevant. + """ + pass + + def _subset_from_group( + self, + subset_items: dict, + ): + """ + Subsets a raw anndata object to the data corresponding to this Dataset. + + :param subset_items: Key-value pairs for subsetting: Keys are columns in .obs, values are entries that should + be kept. If the dictionary has multiple entries, these are sequentially subsetted (AND-gate). + :return: + """ + assert self.adata is not None, "this method should only be called if .adata is not None" + for k, v in subset_items: + self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] -class DatasetGroupBase(abc.ABC): + def _load(self, fn): + _ = self.set_raw_full_group_object(fn=fn, adata_group=None) + if self._unprocessed_full_group_object: + self._load_from_group() + self._unprocessed_full_group_object = False + + +class DatasetGroup: """ + Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through + wrapping them. Example: - #query human lung - #from sfaira.dev.data.human.lung import DatasetGroupLung as DatasetGroup + #query loaders lung + #from sfaira.dev.data.loaders.lung import DatasetGroupLung as DatasetGroup #dsg_humanlung = DatasetGroupHuman(path='path/to/data') #dsg_humanlung.load_all(match_to_reference='Homo_sapiens_GRCh38_97') #dsg_humanlung[some_id] @@ -663,62 +1396,116 @@ class DatasetGroupBase(abc.ABC): """ datasets: Dict - def __init__(self): + def __init__(self, datasets: dict): + self.datasets = datasets self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - def subset_organs(self, subset: Union[None, List]): - for i in self.ids: - if self.datasets[i].organ == "mixed": - self.datasets[i].subset_organs(subset) - else: - raise ValueError("Only data that contain multiple organs can be subset.") + def _load_group(self, load_raw: bool): + """ - def load_all( + :param load_raw: See .load(). + :return: + """ + return None + + def load( self, - celltype_version: Union[str, None] = None, annotated_only: bool = False, + celltype_version: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, + func=None, + kwargs_func: Union[None, dict] = None, ): """ + Load all datasets in group (option for temporary loading). - Subsets self.datasets to the data sets that were found. + Note: This method automatically subsets to the group to the data sets for which input files were found. + + This method also allows temporarily loading data sets to execute function on loaded data sets (supply func). + In this setting, datasets are removed from memory after the function has been executed. - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param annotated_only: - :param remove_gene_version: - :param match_to_reference: - :param load_raw: Loads unprocessed version of data if available in data loader. + :param celltype_version: See .load(). + :param remove_gene_version: See .load(). + :param match_to_reference: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset + instance. The return can be empty: + + def func(dataset, **kwargs_func): + # code manipulating dataset and generating output x. + return x + :param kwargs_func: Kwargs of func. :return: """ - for x in self.ids: - try: - if self.datasets[x].annotated or not annotated_only: - self.datasets[x].load( - celltype_version=self.format_type_version(celltype_version), - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw - ) - except FileNotFoundError as e: - print(e) - del self.datasets[x] + formatted_version = self.format_type_version(celltype_version) + args = [ + formatted_version, + remove_gene_version, + match_to_reference, + load_raw, + allow_caching, + func, + kwargs_func + ] - def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: List[np.ndarray], - annotated_only: bool = False, celltype_version: Union[str, None] = None): + if processes > 1 and len(self.datasets.items()) > 1: # multiprocessing parallelisation + print(f"using python multiprocessing (processes={processes}), " + f"for easier debugging revert to sequential execution (processes=1)") + with multiprocessing.Pool(processes=processes) as pool: + res = pool.starmap(map_fn, [ + (tuple([v] + args),) + for k, v in self.datasets.items() if v.annotated or not annotated_only + ]) + # Clear data sets that were not successfully loaded because of missing data: + for x in res: + if x is not None: + print(x[1]) + del self.datasets[x[0]] + else: # for loop + adata_group = None + for k, v in self.datasets.items(): + print(f"loading {k}") + group_loading = v.set_raw_full_group_object(fn=None, adata_group=adata_group) + if adata_group is None and group_loading: # cache full adata object for subsequent Datasets + adata_group = v.adata.copy() + x = map_fn(tuple([v] + args)) + # Clear data sets that were not successfully loaded because of missing data: + if x is not None: + print(x[1]) + del self.datasets[x[0]] + del adata_group + + def load_tobacked( + self, + adata_backed: anndata.AnnData, + genome: str, + idx: List[np.ndarray], + annotated_only: bool = False, + celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True, + ): """ Loads data set group into slice of backed anndata object. - Subsets self.datasets to the data sets that were found. + Subsets self.datasets to the data sets that were found. Note that feature space is automatically formatted as + this is necessary for concatenation. - :param adata_backed: + :param adata_backed: Anndata instance to load into. :param genome: Genome container target genomes loaded. :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a shuffled object. This has to be a list of the length of self.data, one index array for each dataset. - :param keys: :param annotated_only: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param celltype_version: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ i = 0 @@ -727,8 +1514,13 @@ def load_all_tobacked(self, adata_backed: anndata.AnnData, genome: str, idx: Lis try: if self.datasets[x].annotated or not annotated_only: self.datasets[x].load_tobacked( - adata_backed=adata_backed, genome=genome, idx=idx[i], - celltype_version=self.format_type_version(celltype_version)) + adata_backed=adata_backed, + genome=genome, + idx=idx[i], + celltype_version=self.format_type_version(celltype_version), + load_raw=load_raw, + allow_caching=allow_caching + ) i += 1 except FileNotFoundError: del self.datasets[x] @@ -756,7 +1548,6 @@ def adata(self): adata.obs[self._ADATA_IDS_SFAIRA.author] = adata.uns[self._ADATA_IDS_SFAIRA.author] adata.obs[self._ADATA_IDS_SFAIRA.year] = adata.uns[self._ADATA_IDS_SFAIRA.year] adata.obs[self._ADATA_IDS_SFAIRA.protocol] = adata.uns[self._ADATA_IDS_SFAIRA.protocol] - adata.obs[self._ADATA_IDS_SFAIRA.subtissue] = adata.uns[self._ADATA_IDS_SFAIRA.subtissue] if self._ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): adata.obs[self._ADATA_IDS_SFAIRA.normalization] = adata.uns[self._ADATA_IDS_SFAIRA.normalization] if self._ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: @@ -774,11 +1565,10 @@ def adata(self): self._ADATA_IDS_SFAIRA.author, self._ADATA_IDS_SFAIRA.year, self._ADATA_IDS_SFAIRA.protocol, - self._ADATA_IDS_SFAIRA.subtissue, self._ADATA_IDS_SFAIRA.normalization, self._ADATA_IDS_SFAIRA.dev_stage, self._ADATA_IDS_SFAIRA.annotated, - "mapped_features" + self._ADATA_IDS_SFAIRA.mapped_features, ] for k in list(adata.uns.keys()): if k not in keys_to_keep: @@ -811,10 +1601,11 @@ def adata(self): adata_concat.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index - if len(set([a.uns['mapped_features'] for a in adata_ls])) == 1: - adata_concat.uns['mapped_features'] = adata_ls[0].uns['mapped_features'] + if len(set([a.uns[self._ADATA_IDS_SFAIRA.mapped_features] for a in adata_ls])) == 1: + adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = \ + adata_ls[0].uns[self._ADATA_IDS_SFAIRA.mapped_features] else: - adata_concat.uns['mapped_features'] = False + adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = False else: adata_concat = adata_ls[0] adata_concat.obs[self._ADATA_IDS_SFAIRA.dataset] = self.ids[0] @@ -851,7 +1642,7 @@ def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: cells.append(self.datasets[x].ncells) except FileNotFoundError: del self.datasets[x] - return cells + return np.asarray(cells) def ncells(self, annotated_only: bool = False): cells = self.ncells_bydataset(annotated_only=annotated_only) @@ -886,7 +1677,7 @@ def format_type_version(self, version): versions = np.array(list(versions)) return versions[np.argmax([int(x) for x in versions])] else: - self.assert_celltype_version_key() + self.assert_celltype_version_key(celltype_version=version) return version def subset(self, key, values): @@ -915,36 +1706,111 @@ def subset(self, key, values): for x in ids_del: del self.datasets[x] + def subset_organs(self, subset: Union[None, List]): + for i in self.ids: + if self.datasets[i].organ == "mixed": + self.datasets[i].subset_organs(subset) + else: + raise ValueError("Only data that contain multiple organs can be subset.") + + +class DatasetGroupDirectoryOriented(DatasetGroup): + + def __init__( + self, + file_base: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Automatically collects Datasets from python files in directory. + + Uses a pre-built DatasetGroup if this is defined in a group.py file, otherwise, the DatasetGroup is initialised + here. + + :param file_base: + :param path: + :param meta_path: + :param cache_path: + """ + # Collect all data loaders from files in directory: + datasets = [] + cwd = os.path.dirname(file_base) + dataset_module = str(cwd.split("/")[-1]) + if "group.py" in os.listdir(cwd): + DatasetGroupFound = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + ".group.DatasetGroup") + dsg = DatasetGroupFound(path=path, meta_path=meta_path, cache_path=cache_path) + datasets.extend(list(dsg.datasets.values)) + else: + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + DatasetFound = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + "." + file_module + ".Dataset") + datasets.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + class DatasetSuperGroup: """ Container for multiple DatasetGroup instances. - Can be used to grid_searches models across organs. Supports backed anndata objects. + Used to manipulate structured dataset collections. Primarly designed for this manipulation, convert to DatasetGroup + via flatten() for more functionalities. """ adata: Union[None, anndata.AnnData] fn_backed: Union[None, PathLike] - dataset_groups: List[DatasetGroupBase] + dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]] - def __init__(self, dataset_groups: Union[None, List[DatasetGroupBase]]): + def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetSuperGroup]]): self.adata = None self.fn_backed = None self.set_dataset_groups(dataset_groups=dataset_groups) self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + def set_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups = dataset_groups + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups = dataset_groups_proc + else: + assert False + + def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups.extend(dataset_groups) + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups.extend(dataset_groups_proc) + else: + assert False + def get_gc( self, genome: str = None ): if genome.lower().startswith("homo_sapiens"): g = SuperGenomeContainer( - species="human", + organism="human", genome=genome ) elif genome.lower().startswith("mus_musculus"): g = SuperGenomeContainer( - species="mouse", + organism="mouse", genome=genome ) else: @@ -968,40 +1834,52 @@ def ncells_bydataset_flat(self, annotated_only: bool = False): def ncells(self, annotated_only: bool = False): return np.sum(self.ncells_bydataset(annotated_only=annotated_only)) - def set_dataset_groups(self, dataset_groups: List[DatasetGroupBase]): - self.dataset_groups = dataset_groups + def flatten(self) -> DatasetGroup: + """ + Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. - def subset_organs(self, subset: Union[None, List]): + :return: + """ + ds = {} for x in self.dataset_groups: - if x.datasets[0].organ == "mixed": - x.subset_organs(subset) + for k, v in x.datasets.items(): + assert k not in ds.keys(), f"{k} was duplicated in super group, purge duplicates before flattening" + ds[k] = v + return DatasetGroup(datasets=ds) def load_all( self, celltype_version: Union[str, None] = None, + annotated_only: bool = False, match_to_reference: Union[str, None] = None, remove_gene_version: bool = True, - annotated_only: bool = False, - load_raw: bool = False + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, ): """ - Loads data set groups into anndata object. + Loads data set human into anndata object. :param celltype_version: Version of cell type ontology to use. Uses most recent within each DatasetGroup if None. - :param match_to_reference: - :param remove_gene_version: :param annotated_only: - :param load_raw: + :param match_to_reference: See .load(). + :param remove_gene_version: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + Note: parallelises loading of each dataset group, but not across groups. :return: """ for x in self.dataset_groups: - x.load_all( + x.load( annotated_only=annotated_only, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, celltype_version=celltype_version, - load_raw=load_raw + load_raw=load_raw, + allow_caching=allow_caching, + processes=processes, ) # making sure that concatenate is not used on a None adata object resulting from organ filtering for i in range(len(self.dataset_groups)): @@ -1021,9 +1899,11 @@ def load_all_tobacked( as_dense: bool = False, annotated_only: bool = False, celltype_version: Union[str, None] = None, + load_raw: bool = False, + allow_caching: bool = True, ): """ - Loads data set groups into backed anndata object. + Loads data set human into backed anndata object. Example usage: @@ -1039,9 +1919,11 @@ def load_all_tobacked( :param fn_backed: File name to save backed anndata to temporarily. :param genome: ID of target genomes. :param shuffled: Whether to shuffle data when writing to backed. - :param as_dense: + :param as_dense: Whether to load into dense count matrix. :param annotated_only: :param celltype_version: Version of cell type ontology to use. Uses most recent if None. + :param load_raw: See .load(). + :param allow_caching: See .load(). """ if shuffled and not as_dense: raise ValueError("cannot write backed shuffled and sparse") @@ -1075,7 +1957,6 @@ def load_all_tobacked( self._ADATA_IDS_SFAIRA.organ, self._ADATA_IDS_SFAIRA.protocol, self._ADATA_IDS_SFAIRA.state_exact, - self._ADATA_IDS_SFAIRA.subtissue, self._ADATA_IDS_SFAIRA.year, ] if scatter_update: @@ -1097,23 +1978,30 @@ def load_all_tobacked( for x in ncells: temp_ls = [] for y in x: - temp_ls.append(idx_vector[row:(row+y)]) + temp_ls.append(idx_vector[row:(row + y)]) row += y idx_ls.append(temp_ls) print("checking expected and received data set sizes, rerun meta data generation if mismatch is found:") print(self.ncells_bydataset(annotated_only=annotated_only)) print([[len(x) for x in xx] for xx in idx_ls]) for i, x in enumerate(self.dataset_groups): - x.load_all_tobacked(adata_backed=self.adata, genome=genome, idx=idx_ls[i], annotated_only=annotated_only, - celltype_version=celltype_version) + x.load_tobacked( + adata_backed=self.adata, + genome=genome, + idx=idx_ls[i], + annotated_only=annotated_only, + celltype_version=celltype_version, + load_raw=load_raw, + allow_caching=allow_caching, + ) # If the sparse non-shuffled approach is used, make sure that self.adata.obs.index is unique() before saving if not scatter_update: self.adata.obs.index = pd.RangeIndex(0, len(self.adata.obs.index)) # Explicitly write backed file to disk again to make sure that obs are included and that n_obs is set correctly self.adata.write() # Saving obs separately below is therefore no longer required (hence commented out) - #fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" - #self.adata.obs.to_csv(fn_backed_obs) + # fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" + # self.adata.obs.to_csv(fn_backed_obs) def delete_backed(self): del self.adata @@ -1136,3 +2024,8 @@ def subset(self, key, values): """ for x in self.dataset_groups: x.subset(key=key, values=values) + + def subset_organs(self, subset: Union[None, List]): + for x in self.dataset_groups: + if x.datasets[0].organ == "mixed": + x.subset_organs(subset) diff --git a/sfaira/data/databases/__init__.py b/sfaira/data/databases/__init__.py deleted file mode 100644 index 89402624a..000000000 --- a/sfaira/data/databases/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.data.databases.cellxgene import DatasetCellxgene, DatasetGroupCellxgene diff --git a/sfaira/data/databases/cellxgene/__init__.py b/sfaira/data/databases/cellxgene/__init__.py deleted file mode 100644 index ac116b424..000000000 --- a/sfaira/data/databases/cellxgene/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data.databases.cellxgene.cellxgene_loader import DatasetCellxgene -from sfaira.data.databases.cellxgene.cellxgene_group import DatasetGroupCellxgene \ No newline at end of file diff --git a/sfaira/data/databases/cellxgene/cellxgene_group.py b/sfaira/data/databases/cellxgene/cellxgene_group.py deleted file mode 100644 index 4fd786d51..000000000 --- a/sfaira/data/databases/cellxgene/cellxgene_group.py +++ /dev/null @@ -1,30 +0,0 @@ -import pandas as pd -import os -from typing import Union - -from .external import DatasetGroupBase - -from .cellxgene_loader import DatasetCellxgene - - -class DatasetGroupCellxgene(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - fn_ls = os.listdir(path) - fn_ls = [x for x in fn_ls if x in self.accepted_file_names] - datasets = [ - DatasetCellxgene(path=path, fn=x, meta_path=meta_path) - for x in fn_ls - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - - @property - def accepted_file_names(self): - return [ - "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad" - ] diff --git a/sfaira/data/databases/cellxgene/cellxgene_loader.py b/sfaira/data/databases/cellxgene/cellxgene_loader.py deleted file mode 100644 index ba9bd42df..000000000 --- a/sfaira/data/databases/cellxgene/cellxgene_loader.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE, META_DATA_FIELDS_CELLXGENE - - -class DatasetCellxgene(DatasetBase): - """ - This is a dataloader for downloaded h5ad from cellxgene. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None], - fn: str, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.fn = fn - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - fn = os.path.join(self.path, self.fn) - adata = anndata.read(fn) - adata.X = adata.raw.X - - self.adata.uns[ADATA_IDS_SFAIRA.author] = adata.uns[ADATA_IDS_CELLXGENE.author][ADATA_IDS_CELLXGENE.author_names] - self.adata.uns[ADATA_IDS_SFAIRA.year] = adata.uns[ADATA_IDS_CELLXGENE.year] - self.adata.uns[ADATA_IDS_SFAIRA.doi] = adata.uns[ADATA_IDS_CELLXGENE.doi] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: - raise Warning("found multiple assay in data set %s" % self.fn) - self.adata.uns[ADATA_IDS_SFAIRA.protocol] = adata.obs[ADATA_IDS_CELLXGENE.protocol].values[0] - # Select tissue: blood is handled as a separate tissue in .obs - #if len(np.unique(adata.obs["tissue"].values)) > 1: - # raise Warning("found multiple tissue in data set %s" % self.fn) - #self.adata.uns["organ"] = adata.obs["tissue"].values[0] - self.adata.uns[ADATA_IDS_SFAIRA.organ] = str(self.fn).split("_")[3] - if len(np.unique(adata.obs[ADATA_IDS_SFAIRA.species].values)) > 1: - raise Warning("found multiple organisms in data set %s" % self.fn) - self.adata.uns[ADATA_IDS_SFAIRA.species] = adata.obs[ADATA_IDS_CELLXGENE.species].values[0] - self.adata.uns[ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[ADATA_IDS_SFAIRA.download] = self.download - self.adata.uns[ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[ADATA_IDS_SFAIRA.subtissue] = adata.obs[ADATA_IDS_CELLXGENE.subtissue].values - self.adata.obs[ADATA_IDS_SFAIRA.dev_stage] = adata.obs[ADATA_IDS_CELLXGENE.dev_stage].values - self.adata.obs[ADATA_IDS_SFAIRA.sex] = adata.obs[ADATA_IDS_CELLXGENE.sex].values - self.adata.obs[ADATA_IDS_SFAIRA.ethnicity] = adata.obs[ADATA_IDS_CELLXGENE.ethnicity].values - self.adata.obs[ADATA_IDS_SFAIRA.healthy] = adata.obs[ADATA_IDS_CELLXGENE.disease].values == ADATA_IDS_CELLXGENE.disease_state_healthy - self.adata.obs[ADATA_IDS_SFAIRA.state_exact] = adata.obs[ADATA_IDS_CELLXGENE.disease].values - - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_id] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_id].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] = adata.obs[ADATA_IDS_CELLXGENE.cell_ontology_class].values.tolist() - self.adata.obs[ADATA_IDS_SFAIRA.cell_types_original] = adata.obs[ADATA_IDS_CELLXGENE.cell_types_original].values.tolist() - - self._convert_and_set_var_names( - symbol_col=ADATA_IDS_CELLXGENE.gene_id_names, - ensembl_col=ADATA_IDS_CELLXGENE.gene_id_ensembl, - new_index=ADATA_IDS_CELLXGENE.gene_id_ensembl - ) - diff --git a/sfaira/data/databases/cellxgene/external.py b/sfaira/data/databases/cellxgene/external.py deleted file mode 100644 index 11aaeafb8..000000000 --- a/sfaira/data/databases/cellxgene/external.py +++ /dev/null @@ -1,3 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE -from sfaira.consts import META_DATA_FIELDS, META_DATA_FIELDS_CELLXGENE diff --git a/sfaira/data/dataloaders/__init__.py b/sfaira/data/dataloaders/__init__.py new file mode 100644 index 000000000..1df580fb0 --- /dev/null +++ b/sfaira/data/dataloaders/__init__.py @@ -0,0 +1,4 @@ +from . import anatomical_groups +from . import databases +from . import loaders +from .super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/__init__.py b/sfaira/data/dataloaders/anatomical_groups/__init__.py new file mode 100644 index 000000000..739fab642 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/__init__.py @@ -0,0 +1,2 @@ +from . import human +from . import mouse diff --git a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py new file mode 100644 index 000000000..c4dfd5b7c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py @@ -0,0 +1,44 @@ +from .human_adipose import DatasetGroupAdipose +from .human_adrenalgland import DatasetGroupAdrenalgland +from .human_mixed import DatasetGroupMixed +from .human_artery import DatasetGroupArtery +from .human_bladder import DatasetGroupBladder +from .human_blood import DatasetGroupBlood +from .human_bone import DatasetGroupBone +from .human_brain import DatasetGroupBrain +from .human_calvaria import DatasetGroupCalvaria +from .human_cervix import DatasetGroupCervix +from .human_chorionicvillus import DatasetGroupChorionicvillus +from .human_colon import DatasetGroupColon +from .human_duodenum import DatasetGroupDuodenum +from .human_epityphlon import DatasetGroupEpityphlon +from .human_esophagus import DatasetGroupEsophagus +from .human_eye import DatasetGroupEye +from .human_fallopiantube import DatasetGroupFallopiantube +from .human_femalegonad import DatasetGroupFemalegonad +from .human_gallbladder import DatasetGroupGallbladder +from .human_heart import DatasetGroupHeart +from .human_hesc import DatasetGroupHesc +from .human_ileum import DatasetGroupIleum +from .human_jejunum import DatasetGroupJejunum +from .human_kidney import DatasetGroupKidney +from .human_liver import DatasetGroupLiver +from .human_lung import DatasetGroupLung +from .human_malegonad import DatasetGroupMalegonad +from .human_muscle import DatasetGroupMuscle +from .human_omentum import DatasetGroupOmentum +from .human_pancreas import DatasetGroupPancreas +from .human_placenta import DatasetGroupPlacenta +from .human_pleura import DatasetGroupPleura +from .human_prostate import DatasetGroupProstate +from .human_rectum import DatasetGroupRectum +from .human_rib import DatasetGroupRib +from .human_skin import DatasetGroupSkin +from .human_spinalcord import DatasetGroupSpinalcord +from .human_spleen import DatasetGroupSpleen +from .human_stomach import DatasetGroupStomach +from .human_thymus import DatasetGroupThymus +from .human_thyroid import DatasetGroupThyroid +from .human_trachea import DatasetGroupTrachea +from .human_ureter import DatasetGroupUreter +from .human_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/human/external.py b/sfaira/data/dataloaders/anatomical_groups/human/external.py new file mode 100644 index 000000000..413092483 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/external.py @@ -0,0 +1,2 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py new file mode 100644 index 000000000..4a531d920 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adipose_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupAdipose(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py new file mode 100644 index 000000000..0c6ab1bfa --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -0,0 +1,36 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_005 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_006 import Dataset as Dataset0006 + + +class DatasetGroupAdrenalgland(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupAdrenalgland + self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py new file mode 100644 index 000000000..0aa3abedf --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_artery_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupArtery(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupArtery + self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py new file mode 100644 index 000000000..f39d8a55a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupBladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py new file mode 100644 index 000000000..a63658118 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -0,0 +1,42 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d_nan.human_blood_2018_10x_ica_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d_nan.human_blood_2019_10x_10xGenomics_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_003 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_004 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_005 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_006 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_007 import Dataset as Dataset0009 + + +class DatasetGroupBlood(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py new file mode 100644 index 000000000..0decbe187 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d_nan.human_bone_2018_10x_ica_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupBone(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBone + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py new file mode 100644 index 000000000..1f81ae71c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -0,0 +1,38 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_nmeth_4407.human_brain_2017_DroNcSeq_habib_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_004 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_005 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_006 import Dataset as Dataset0007 + + +class DatasetGroupBrain(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py new file mode 100644 index 000000000..24a8c4c6a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_calvaria_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupCalvaria(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupCalvaria + self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py new file mode 100644 index 000000000..07b677bd2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_cervix_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupCervix(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupCervix + self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py new file mode 100644 index 000000000..6098b30a2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_chorionicvillus_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupChorionicvillus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupChorionicvillus + self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py new file mode 100644 index 000000000..ab40519be --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -0,0 +1,40 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_08_067.human_colon_2019_10x_kinchen_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_06_029.human_colon_2019_10x_smilie_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_colon_2019_10x_wang_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41590_020_0602_z.human_colon_2020_10x_james_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_002 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_003 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_004 import Dataset as Dataset0008 + + +class DatasetGroupColon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupColon + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py new file mode 100644 index 000000000..f7ce00833 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_duodenum_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupDuodenum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupDuodenum + self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py new file mode 100644 index 000000000..21f9cae8f --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_epityphlon_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupEpityphlon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEpityphlon + self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py new file mode 100644 index 000000000..c3300c274 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_esophagus_2019_10x_madissoon_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupEsophagus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEsophagus + self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py new file mode 100644 index 000000000..68ee322cb --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -0,0 +1,32 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_15252_embj_2018100811.human_eye_2019_10x_lukowski_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12780_8.human_eye_2019_10x_menon_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1073_pnas_1914143116.human_eye_2019_10x_voigt_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_eye_2020_microwell_han_001 import Dataset as Dataset0004 + + +class DatasetGroupEye(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupEye + self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py new file mode 100644 index 000000000..3ed7986a2 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_fallopiantube_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupFallopiantube(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupFallopiantube + self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py new file mode 100644 index 000000000..237ad73e0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupFemalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py new file mode 100644 index 000000000..0e0a033f4 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_gallbladder_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupGallbladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupGallbladder + self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py new file mode 100644 index 000000000..2425637cb --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -0,0 +1,32 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_004 import Dataset as Dataset0004 + + +class DatasetGroupHeart(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py new file mode 100644 index 000000000..c32bd0730 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_hesc_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupHesc(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupHesc + self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py new file mode 100644 index 000000000..93d9ad6b0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_08_008.human_ileum_2019_10x_martin_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_ileum_2019_10x_wang_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ileum_2020_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupIleum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py new file mode 100644 index 000000000..d9a7fb8e6 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_jejunum_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupJejunum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupJejunum + self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py new file mode 100644 index 000000000..2a6f1bd94 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -0,0 +1,44 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_10861_2.human_kidney_2019_10xSn_lake_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1126_science_aat5031.human_kidney_2019_10x_stewart_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41597_019_0351_8.human_kidney_2020_10x_liao_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_004 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_005 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_006 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_007 import Dataset as Dataset0010 + + +class DatasetGroupKidney(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py new file mode 100644 index 000000000..4674aedc0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -0,0 +1,42 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_018_06318_7.human_liver_2018_10x_macparland_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1652_y.human_liver_2019_10x_popescu_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1631_3.human_liver_2019_10x_ramachandran_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1373_2.human_liver_2019_mCELSeq2_aizarani_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_002 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_003 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_004 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_005 import Dataset as Dataset0009 + + +class DatasetGroupLiver(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py new file mode 100644 index 000000000..2d312fd3e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -0,0 +1,54 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_lung_2019_10x_madissoon_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_dropseq_braga_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1101_753806.human_lung_2020_10x_habermann_001 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_001 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_002 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1016_j_devcel_2020_01_033.human_lung_2020_10x_miller_001 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_10x_travaglini_001 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_001 import Dataset as Dataset0010 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_002 import Dataset as Dataset0011 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_003 import Dataset as Dataset0012 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_004 import Dataset as Dataset0013 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_005 import Dataset as Dataset0014 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_smartseq2_travaglini_002 import Dataset as Dataset0015 + + +class DatasetGroupLung(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0012(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0013(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0014(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0015(path=path, meta_path=meta_path, cache_path=cache_path), + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupLung + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py new file mode 100644 index 000000000..0b607e309 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41422_018_0099_2.human_malegonad_2018_10x_guo_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupMalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py b/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py new file mode 100644 index 000000000..018f0c413 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12464_3.human_mixed_2019_10x_szabo_001 import Dataset as Dataset0001 + + +class DatasetGroupMixed(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMixed + self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py new file mode 100644 index 000000000..6ca10dad9 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupMuscle(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py new file mode 100644 index 000000000..1af19a624 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupOmentum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupOmentum + self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py new file mode 100644 index 000000000..08b067dd7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -0,0 +1,38 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cels_2016_08_011.human_pancreas_2016_indrop_baron_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2016_08_020.human_pancreas_2016_smartseq2_segerstolpe_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2017_09_004.human_pancreas_2017_smartseq2_enge_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_004 import Dataset as Dataset0007 + + +class DatasetGroupPancreas(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py new file mode 100644 index 000000000..106b9cd20 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_smartseq2_ventotormo_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_10x_ventotormo_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_placenta_2020_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupPlacenta(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py new file mode 100644 index 000000000..61a2f6be7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pleura_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupPleura(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupPleura + self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py new file mode 100644 index 000000000..3ed1f9a44 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_celrep_2018_11_086.human_prostate_2018_10x_henry_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_prostate_2020_microwell_han_001 import Dataset as Dataset0002 + + +class DatasetGroupProstate(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py new file mode 100644 index 000000000..67ee06c82 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_rectum_2019_10x_wang_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rectum_2020_microwell_han_001 import Dataset as Dataset0002 + + +class DatasetGroupRectum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupRectum + self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py new file mode 100644 index 000000000..a39e0646a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupRib(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupRib + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py new file mode 100644 index 000000000..30985fe65 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupSkin(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py new file mode 100644 index 000000000..4434146c8 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spinalcord_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupSpinalcord(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSpinalcord + self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py new file mode 100644 index 000000000..fa36e2bf7 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_spleen_2019_10x_madissoon_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupSpleen(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py new file mode 100644 index 000000000..b6030d318 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -0,0 +1,44 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_005 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_006 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_007 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_008 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_009 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_010 import Dataset as Dataset0010 + + +class DatasetGroupStomach(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py new file mode 100644 index 000000000..9ece40261 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -0,0 +1,30 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1126_science_aay3224.human_thymus_2020_10x_park_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_002 import Dataset as Dataset0003 + + +class DatasetGroupThymus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py new file mode 100644 index 000000000..e521b4f37 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -0,0 +1,28 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupThyroid(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupThyroid + self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py new file mode 100644 index 000000000..1fb26ad18 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_trachea_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupTrachea(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py new file mode 100644 index 000000000..143f88545 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ureter_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupUreter(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupUreter + self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py new file mode 100644 index 000000000..303e1ed50 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -0,0 +1,26 @@ +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_uterus_2020_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupUterus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.human import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py b/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py new file mode 100644 index 000000000..8d8c1569d --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py @@ -0,0 +1,27 @@ +from .mouse_bladder import DatasetGroupBladder +from .mouse_brain import DatasetGroupBrain +from .mouse_diaphragm import DatasetGroupDiaphragm +from .mouse_adipose import DatasetGroupAdipose +from .mouse_heart import DatasetGroupHeart +from .mouse_kidney import DatasetGroupKidney +from .mouse_colon import DatasetGroupColon +from .mouse_muscle import DatasetGroupMuscle +from .mouse_liver import DatasetGroupLiver +from .mouse_lung import DatasetGroupLung +from .mouse_mammarygland import DatasetGroupMammaryGland +from .mouse_bone import DatasetGroupBone +from .mouse_femalegonad import DatasetGroupFemalegonad +from .mouse_pancreas import DatasetGroupPancreas +from .mouse_placenta import DatasetGroupPlacenta +from .mouse_blood import DatasetGroupBlood +from .mouse_prostate import DatasetGroupProstate +from .mouse_rib import DatasetGroupRib +from .mouse_ileum import DatasetGroupIleum +from .mouse_skin import DatasetGroupSkin +from .mouse_spleen import DatasetGroupSpleen +from .mouse_stomach import DatasetGroupStomach +from .mouse_malegonad import DatasetGroupMalegonad +from .mouse_thymus import DatasetGroupThymus +from .mouse_tongue import DatasetGroupTongue +from .mouse_trachea import DatasetGroupTrachea +from .mouse_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/external.py b/sfaira/data/dataloaders/anatomical_groups/mouse/external.py new file mode 100644 index 000000000..413092483 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/external.py @@ -0,0 +1,2 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py new file mode 100644 index 000000000..f7532d03c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 + + +class DatasetGroupAdipose(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupAdipose + self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py new file mode 100644 index 000000000..6576b2987 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bladder_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupBladder(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBladder + self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py new file mode 100644 index 000000000..35638cefa --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 + + +class DatasetGroupBlood (DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBlood + self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py new file mode 100644 index 000000000..f2135c456 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bone_2018_microwell_001 import Dataset as Dataset0003 + + +class DatasetGroupBone(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBone + self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py new file mode 100644 index 000000000..5b932749e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupBrain(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupBrain + self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py new file mode 100644 index 000000000..6507cd3fd --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupColon(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupColon + self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py new file mode 100644 index 000000000..7c69001d0 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -0,0 +1,27 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_diaphragm_2019_smartseq2_pisco_001 import Dataset as Dataset0001 + + +class DatasetGroupDiaphragm(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupDiaphragm + self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py new file mode 100644 index 000000000..b2d687412 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupFemalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupFemalegonad + self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py new file mode 100644 index 000000000..50458cd02 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -0,0 +1,32 @@ +import os +from typing import Union + +from .external import DatasetGroup + + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_002 import Dataset as Dataset0003 + + +class DatasetGroupHeart(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupHeart + self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py new file mode 100644 index 000000000..bcd9fd9ca --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupIleum(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupIleum + self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py new file mode 100644 index 000000000..ce6788cd6 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupKidney(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupKidney + self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py new file mode 100644 index 000000000..e9915b36b --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -0,0 +1,33 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_002 import Dataset as Dataset0004 + + +class DatasetGroupLiver(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupLiver + self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py new file mode 100644 index 000000000..b6e6c9e5c --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -0,0 +1,35 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_003 import Dataset as Dataset0005 + + +class DatasetGroupLung(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupLung + self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py new file mode 100644 index 000000000..8e62116b5 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupMalegonad(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMalegonad + self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py new file mode 100644 index 000000000..e5bd9eb2e --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -0,0 +1,37 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 + + +class DatasetGroupMammaryGland(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMammaryGland + self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py new file mode 100644 index 000000000..6e1deee58 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupMuscle(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupMuscle + self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py new file mode 100644 index 000000000..a70918270 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -0,0 +1,47 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_pancreas_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_001 import Dataset as Dataset0004 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_002 import Dataset as Dataset0005 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_003 import Dataset as Dataset0006 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_004 import Dataset as Dataset0007 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_005 import Dataset as Dataset0008 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_006 import Dataset as Dataset0009 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_007 import Dataset as Dataset0010 +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_008 import Dataset as Dataset0011 + + +class DatasetGroupPancreas(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupPancreas + self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py new file mode 100644 index 000000000..04a87566d --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupPlacenta(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupPlacenta + self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py new file mode 100644 index 000000000..a816076de --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupProstate(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupProstate + self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py new file mode 100644 index 000000000..069b179df --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_003 import Dataset as Dataset0003 + + +class DatasetGroupRib(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupRib + self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py new file mode 100644 index 000000000..d0a012add --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupSkin(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupSkin + self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py new file mode 100644 index 000000000..8bd62249a --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_spleen_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupSpleen(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupSpleen + self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py new file mode 100644 index 000000000..15b2fcd63 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -0,0 +1,27 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_stomach_2018_microwell_han_001 import Dataset as Dataset0001 + + +class DatasetGroupStomach(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupStomach + self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py new file mode 100644 index 000000000..a2f30d3df --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -0,0 +1,31 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_thymus_2018_microwell_han_001 import Dataset as Dataset0003 + + +class DatasetGroupThymus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupThymus + self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py new file mode 100644 index 000000000..695f666de --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupTongue(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupTongue + self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py new file mode 100644 index 000000000..eaff5c910 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_10x_pisco_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_smartseq2_pisco_001 import Dataset as Dataset0002 + + +class DatasetGroupTrachea(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupTrachea + self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py new file mode 100644 index 000000000..7513bf339 --- /dev/null +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -0,0 +1,29 @@ +import os +from typing import Union + +from .external import DatasetGroup + +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_002 import Dataset as Dataset0002 + + +class DatasetGroupUterus(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + datasets = [ + Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), + Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) + ] + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + # Load versions from extension if available: + try: + from sfaira_extension.data.mouse import DatasetGroupUterus + self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) + except ImportError: + pass diff --git a/sfaira/data/dataloaders/databases/__init__.py b/sfaira/data/dataloaders/databases/__init__.py new file mode 100644 index 000000000..328e18fc8 --- /dev/null +++ b/sfaira/data/dataloaders/databases/__init__.py @@ -0,0 +1 @@ +from .super_group import DatasetSuperGroupDatabases diff --git a/sfaira/data/dataloaders/databases/cellxgene/__init__.py b/sfaira/data/dataloaders/databases/cellxgene/__init__.py new file mode 100644 index 000000000..2d6a4a900 --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/__init__.py @@ -0,0 +1,2 @@ +from sfaira.data.dataloaders.databases.cellxgene.cellxgene_group import DatasetGroup +from sfaira.data.dataloaders.databases.cellxgene.cellxgene_loader import Dataset diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py new file mode 100644 index 000000000..41328d40c --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -0,0 +1,26 @@ +import os +from typing import Union + +from .external import ADATA_IDS_CELLXGENE, DatasetGroup + +from .cellxgene_loader import Dataset + + +class DatasetGroup(DatasetGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None + ): + self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() + + fn_ls = os.listdir(path) + fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] + datasets = [ + Dataset(path=path, fn=x, meta_path=meta_path, cache_path=cache_path) + for x in fn_ls + ] + keys = [x.id for x in datasets] + super().__init__(dict(zip(keys, datasets))) diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py new file mode 100644 index 000000000..48429f7ea --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -0,0 +1,70 @@ +import anndata +import os +from typing import Union +from .external import DatasetBase +from .external import ADATA_IDS_CELLXGENE + + +class Dataset(DatasetBase): + """ + This is a dataloader for downloaded h5ad from cellxgene. + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None], + fn: str, + meta_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, **kwargs) + self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() + self.fn = fn + + self.obs_key_cellontology_class = self._ADATA_IDS_CELLXGENE.cell_ontology_class + self.obs_key_cellontology_id = self._ADATA_IDS_CELLXGENE.cell_ontology_id + self.obs_key_cellontology_original = self._ADATA_IDS_CELLXGENE.cell_types_original + self.obs_key_dev_stage = self._ADATA_IDS_CELLXGENE.dev_stage + self.obs_key_ethnicity = self._ADATA_IDS_CELLXGENE.ethnicity + self.obs_key_healthy = self._ADATA_IDS_CELLXGENE.healthy + self.obs_key_sex = self._ADATA_IDS_CELLXGENE.sex + self.obs_key_organism = self._ADATA_IDS_CELLXGENE.organism + self.obs_key_state_exact = self._ADATA_IDS_CELLXGENE.state_exact + + self.healthy_state_healthy = self._ADATA_IDS_CELLXGENE.disease_state_healthy + + self.var_ensembl_col = self._ADATA_IDS_CELLXGENE.gene_id_ensembl + self.var_symbol_col = self._ADATA_IDS_CELLXGENE.gene_id_names + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + """ + Note that in contrast to data set specific data loaders, here, the core attributes are only identified from + the data in this function and are not already set in the constructor. These attributes can still be + used through meta data containers after the data was loaded once. + + :param fn: + :return: + """ + fn = os.path.join(self.path, self.fn) + adata = anndata.read(fn) + adata.X = adata.raw.X + # TODO delete raw? + + self.author = adata.uns[self._ADATA_IDS_CELLXGENE.author][self._ADATA_IDS_CELLXGENE.author_names] + self.doi = adata.uns[self._ADATA_IDS_CELLXGENE.doi] + self.download = self.download + self.id = self.id + self.normalization = 'raw' + self.organ = str(self.fn).split("_")[3] # TODO interface this properly + # self.organ = adata.obs["tissue"].values[0] + self.organism = adata.obs[self._ADATA_IDS_CELLXGENE.organism].values[0] + self.protocol = adata.obs[self._ADATA_IDS_CELLXGENE.protocol].values[0] + self.year = adata.uns[self._ADATA_IDS_CELLXGENE.year] diff --git a/sfaira/data/dataloaders/databases/cellxgene/external.py b/sfaira/data/dataloaders/databases/cellxgene/external.py new file mode 100644 index 000000000..c7a6982b7 --- /dev/null +++ b/sfaira/data/dataloaders/databases/cellxgene/external.py @@ -0,0 +1,3 @@ +from sfaira.data import DatasetBase, DatasetGroup +from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE +from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/data/dataloaders/databases/super_group.py b/sfaira/data/dataloaders/databases/super_group.py new file mode 100644 index 000000000..df0605579 --- /dev/null +++ b/sfaira/data/dataloaders/databases/super_group.py @@ -0,0 +1,22 @@ +from typing import Union + +from sfaira.data import DatasetSuperGroup +from sfaira.data.dataloaders.databases.cellxgene import DatasetGroup as DatasetGroupCellxgene + + +class DatasetSuperGroupDatabases(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + dataset_groups = [] + # List all data bases here: + dataset_groups.append(DatasetGroupCellxgene( + path=path, + meta_path=meta_path, + cache_path=cache_path + )) + super().__init__(dataset_groups=dataset_groups) diff --git a/sfaira/data/dataloaders/loaders/__init__.py b/sfaira/data/dataloaders/loaders/__init__.py new file mode 100644 index 000000000..cf0bdc722 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/__init__.py @@ -0,0 +1 @@ +from .super_group import DatasetSuperGroupLoaders diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py new file mode 100644 index 000000000..75444b20e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -0,0 +1,87 @@ +import anndata +import os +from typing import Union +import tarfile +import gzip +from io import StringIO +import anndata as ad +import pandas as pd +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" + + self.author = "Quake" + self.doi = "10.1016/j.cell.2017.09.004" + self.healthy = True + self.normalization = "raw" + self.protocol = "Smartseq2" + self.organ = "pancreas" # ToDo: "islet of Langerhans" + self.organism = "human" + self.state_exact = "healthy" + self.year = 2017 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "alpha": "Alpha cell", + "acinar": "Acinar cell", + "ductal": "Ductal cell", + "beta": "Beta cell", + "unsure": "Unknown", + "delta": "Delta cell", + "mesenchymal": "Mesenchymal Cell" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), + os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") + ] + dfs = [] + with tarfile.open(fn[0]) as tar: + for member in tar.getmembers(): + d = pd.read_csv(tar.extractfile(member), compression="gzip", header=None, sep="\t", index_col=0, + names=[member.name.split("_")[0]]) + dfs.append(d) + self.adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + with gzip.open(fn[1]) as f: + file_content = [i.decode("utf-8") for i in f.readlines()] + inputstring = "" + for line in file_content: + if "ID_REF" in line: + inputstring += line + if "!Sample_title" in line: + inputstring += line[1:] + if "!Sample_characteristics_ch1\t\"inferred_cell_type: alpha" in line: + inputstring += line[1:] + data = StringIO(inputstring) + d = pd.read_csv(data, sep="\t").T + d.columns = d.iloc[0] + d.drop("Sample_title", inplace=True) + d = d.reset_index().set_index("ID_REF") + d.columns.name = None + d.index.name = None + self.adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in self.adata.obs.index] + self.adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py new file mode 100644 index 000000000..e397eecf3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py @@ -0,0 +1,43 @@ +import anndata +import numpy as np +import pandas +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1016_j_cell_2018_02_001(DatasetBase): + """ + This is a dataloader template for mca data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.obs_key_cellontology_class = "Annotation" + self.obs_key_cellontology_original = "Annotation" + + self.author = "Guo" + self.doi = "10.1016/j.cell.2018.02.001" + self.normalization = "raw" + self.healthy = True + self.organism = "mouse" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + def _load_generalized(self, fn, fn_meta): + celltypes = pandas.read_csv(fn_meta, index_col=1) + celltypes = celltypes.drop(["Unnamed: 0"], axis=1) + + data = pandas.read_csv(fn, sep=" ", header=0) + self.adata = anndata.AnnData(data.T) + self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() + self.adata.obs = celltypes.loc[self.adata.obs_names, :] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py new file mode 100644 index 000000000..347120a7d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -0,0 +1,48 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "bladder" + + self.class_maps = { + "0": { + "Endothelial cell_Ly6c1 high(Bladder)": "endothelial cell", + "Vascular endothelial cell(Bladder)": "endothelial cell", + "Urothelium(Bladder)": "bladder urothelial cell", + "Dendritic cell_Cd74 high(Bladder)": "dendritic cell", + "Dendritic cell_Lyz2 high(Bladder)": "dendritic cell", + "Macrophage_Pf4 high(Bladder)": "macrophage", + "NK cell(Bladder)": "NK cell", + "Basal epithelial cell(Bladder)": "basal epithelial cell", + "Epithelial cell_Upk3a high(Bladder)": "epithelial cell", + "Epithelial cell_Gm23935 high(Bladder)": "epithelial cell", + "Mesenchymal stromal cell(Bladder)": "mesenchymal stromal cell", + "Stromal cell_Dpt high(Bladder)": "stromal cell", + "Stromal cell_Car3 high(Bladder)": "stromal cell", + "Smooth muscle cell(Bladder)": "smooth muscle cell", + "Vascular smooth muscle progenitor cell(Bladder)": "smooth muscle cell", + "Umbrella cell(Bladder)": "umbrella cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py new file mode 100644 index 000000000..4c4aa21e8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py new file mode 100644 index 000000000..980024416 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py new file mode 100644 index 000000000..10069693f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py new file mode 100644 index 000000000..c0602d28a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py new file mode 100644 index 000000000..ae8f80e2f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "blood" + + self.class_maps = { + "0": { + "B cell_Igha high(Peripheral_Blood)": "B cell", + "B cell_Ly6d high(Peripheral_Blood)": "B cell", + "B cell_Rps27rt high(Peripheral_Blood)": "B cell", + "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", + "Basophil_Prss34 high(Peripheral_Blood)": "basophil", + "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", + "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", + "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", + "Macrophage_Ace high(Peripheral_Blood)": "macrophage", + "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", + "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", + "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", + "Monocyte_Elane high(Peripheral_Blood)": "monocyte", + "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", + "NK cell_Gzma high(Peripheral_Blood)": "NK cell", + "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", + "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", + "T cell_Gm14303 high(Peripheral_Blood)": "T cell", + "T cell_Trbc2 high(Peripheral_Blood)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py new file mode 100644 index 000000000..baa4cb60e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "bone" + + self.class_maps = { + "0": { + "B cell_Igkc high(Bone-Marrow)": "naive B cell", + "Dendritic cell_H2-Eb1 high(Bone-Marrow)": "dendritic cell", + "Dendritic cell_Siglech high(Bone-Marrow)": "dendritic cell", + "Macrophage_Ms4a6c high(Bone-Marrow)": "macrophage", + "Macrophage_S100a4 high(Bone-Marrow)": "macrophage", + "Erythroblast(Bone-Marrow)": "erythroid progenitor", + "Mast cell(Bone-Marrow)": "mast cell", + "Monocyte_Mif high(Bone-Marrow)": "monocyte", + "Monocyte_Prtn3 high(Bone-Marrow)": "monocyte", + "Neutrophil progenitor(Bone-Marrow)": "neutrophil progenitor", + "Neutrophil_Cebpe high(Bone-Marrow)": "neutrophil", + "Neutrophil_Fcnb high(Bone-Marrow)": "neutrophil", + "Neutrophil_Mmp8 high(Bone-Marrow)": "neutrophil", + "Neutrophil_Ngp high(Bone-Marrow)": "neutrophil", + "Hematopoietic stem progenitor cell(Bone-Marrow)": "hematopoietic precursor cell", + "Pre-pro B cell(Bone-Marrow)": "early pro-B cell", + "T cell_Ms4a4b high(Bone-Marrow)": "CD4-positive, alpha-beta T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py new file mode 100644 index 000000000..a96602c7c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "brain" + + self.class_maps = { + "0": { + "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", + "Astrocyte_Atp1b2 high(Brain)": "astrocyte", + "Astrocyte_Mfe8 high(Brain)": "astrocyte", + "Astrocyte_Pla2g7 high(Brain)": "astrocyte", + "Granulocyte_Ngp high(Brain)": "granulocyte", + "Hypothalamic ependymal cell(Brain)": "ependymal cell", + "Macrophage_Klf2 high(Brain)": "macrophage", + "Macrophage_Lyz2 high(Brain)": "macrophage", + "Microglia(Brain)": "microglial cell", + "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", + "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", + "Neuron(Brain)": "neuron", + "Pan-GABAergic(Brain)": "GABAergic cell", + "Schwann cell(Brain)": "schwann cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py new file mode 100644 index 000000000..7d188840b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "brain" + + self.class_maps = { + "0": { + "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", + "Astrocyte_Atp1b2 high(Brain)": "astrocyte", + "Astrocyte_Mfe8 high(Brain)": "astrocyte", + "Astrocyte_Pla2g7 high(Brain)": "astrocyte", + "Granulocyte_Ngp high(Brain)": "granulocyte", + "Hypothalamic ependymal cell(Brain)": "ependymal cell", + "Macrophage_Klf2 high(Brain)": "macrophage", + "Macrophage_Lyz2 high(Brain)": "macrophage", + "Microglia(Brain)": "microglial cell", + "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", + "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", + "Neuron(Brain)": "neuron", + "Pan-GABAergic(Brain)": "GABAergic cell", + "Schwann cell(Brain)": "schwann cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py new file mode 100644 index 000000000..db4d0801e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "femalegonad" + + self.class_maps = { + "0": { + "Cumulus cell_Car14 high(Ovary)": "cumulus cell", + "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", + "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", + "Granulosa cell_Inhba high(Ovary)": "granulosa cell", + "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", + "Large luteal cell(Ovary)": "large luteal cell", + "Macrophage_Lyz2 high(Ovary)": "macrophage", + "Marcrophage_Cd74 high(Ovary)": "macrophage", + "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", + "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", + "Small luteal cell(Ovary)": "small luteal cell", + "Stroma cell (Ovary)": "stromal cell", + "Thecal cell(Ovary)": "thecal cell", + "luteal cells(Ovary)": "luteal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py new file mode 100644 index 000000000..affa74f21 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py @@ -0,0 +1,46 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "femalegonad" + + self.class_maps = { + "0": { + "Cumulus cell_Car14 high(Ovary)": "cumulus cell", + "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", + "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", + "Granulosa cell_Inhba high(Ovary)": "granulosa cell", + "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", + "Large luteal cell(Ovary)": "large luteal cell", + "Macrophage_Lyz2 high(Ovary)": "macrophage", + "Marcrophage_Cd74 high(Ovary)": "macrophage", + "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", + "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", + "Small luteal cell(Ovary)": "small luteal cell", + "Stroma cell (Ovary)": "stromal cell", + "Thecal cell(Ovary)": "thecal cell", + "luteal cells(Ovary)": "luteal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py new file mode 100644 index 000000000..2f817c510 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py new file mode 100644 index 000000000..61fb53c0e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py new file mode 100644 index 000000000..08303f3a1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py @@ -0,0 +1,55 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "ileum" + + self.class_maps = { + "0": { + "B cell_Ighd high(Small-Intestine)": "B cell", + "B cell_Igkv12-46 high(Small-Intestine)": "B cell", + "B cell_Jchain high(Small-Intestine)": "B cell", + "B cell_Ms4a1 high(Small-Intestine)": "B cell", + "Columnar epithelium(Small-Intestine)": "epithelial cell", + "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", + "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", + "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", + "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", + "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", + "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", + "Macrophage_Apoe high(Small-Intestine)": "macrophage", + "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", + "Paneth cell(Small-Intestine)": "paneth cell", + "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", + "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", + "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", + "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", + "T cell_Ccl5 high(Small-Intestine)": "T cell", + "T cell_Icos high(Small-Intestine)": "T cell", + "T cell_Cd7 high(Small-Intestine)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py new file mode 100644 index 000000000..365e62c50 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "kidney" + + self.class_maps = { + "0": { + "Cell in cell cycle(Fetal_Kidney)": "fetal proliferative cell", + "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py new file mode 100644 index 000000000..6b1aa65cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py @@ -0,0 +1,66 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "kidney" + + self.class_maps = { + "0": { + "Adipocyte(Fetal_Kidney)": "fetal adipocyte", + "B cell(Kidney)": "B cell", + "Dendritic cell_Ccr7 high(Kidney)": "dendritic cell", + "Dendritic cell_Cst3 high(Kidney)": "dendritic cell", + "Distal collecting duct principal cell_Cldn4 high(Kidney)": "kidney collecting duct principal cell", + "Distal collecting duct principal cell_Hsd11b2 high(Kidney)": "kidney collecting duct principal cell", + "Distal convoluted tubule_Pvalb high(Kidney)": "kidney distal convoluted tubule epithelial cell", + "Distal convoluted tubule_S100g high(Kidney)": "kidney distal convoluted tubule epithelial cell", + "Endothelial cell(Kidney)": "fenestrated cell", + "Epithelial cell_Cryab high(Kidney)": "epithelial cell", + "Fenestrated endothelial cell_Plvap high(Kidney)": "fenestrated cell", + "Fenestrated endothelial cell_Tm4sf1 high(Kidney)": "fenestrated cell", + "Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney)": "glomerular epithelial cell", + "Intercalated cells of collecting duct_Aqp6 high(Kidney)": "kidney collecting duct epithelial cell", + "Intercalated cells of collecting duct_Slc26a4 high(Kidney)": "kidney collecting duct epithelial cell", + "Macrophage_Ccl4 high (Kidney)": "macrophage", + "Macrophage_Lyz2 high(Kidney)": "macrophage", + "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell", + "Neutrophil progenitor_S100a8 high(Kidney)": "neutrophil progenitor", + "Proximal tubule brush border cell(Kidney)": "brush cell", + "Proximal tubule cell_Cyp4a14 high(Kidney)": "epithelial cell of proximal tubule", + "Proximal tubule cell_Osgin1 high(Kidney)": "epithelial cell of proximal tubule", + "S1 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", + "S3 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", + "Stromal cell_Ankrd1 high(Kidney)": "fibroblast", + "Stromal cell_Cxcl10 high(Kidney)": "fibroblast", + "Stromal cell_Dcn high(Kidney)": "fibroblast", + "Stromal cell_Mgp high(Fetal_Kidney)": "fibroblast", + "Stromal cell_Mgp high(Kidney)": "fibroblast", + "Stromal cell_Ptgds high(Kidney)": "fibroblast", + "T cell(Kidney)": "T cell", + "Thick ascending limb of the loop of Henle(Kidney)": "kidney loop of Henle ascending limb epithelial cell", + "Ureteric epithelium(Kidney)": "ureteric epithelial cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py new file mode 100644 index 000000000..345d8a0eb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py @@ -0,0 +1,53 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "liver" + + self.class_maps = { + "0": { + "B cell_Fcmr high(Liver)": "B cell", + "B cell_Jchain high(Liver)": "B cell", + "Dendritic cell_Cst3 high(Liver)": "dendritic cell", + "Dendritic cell_Siglech high(Liver)": "dendritic cell", + "Endothelial cell(Liver)": "endothelial cell of hepatic sinusoid", + "Epithelial cell(Liver)": "duct epithelial cell", + "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", + "Erythroblast_Hbb-bs high(Liver)": "erythroblast", + "Erythroblast_Hbb-bt high(Liver)": "erythroblast", + "Granulocyte(Liver)": "granulocyte", + "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", + "Hepatocyte_mt-Nd4 high(Liver)": "hepatocyte", + "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", + "Periportal (PP) hepatocyte(Liver)": "hepatocyte", + "Kuppfer cell(Liver)": "Kupffer cell", + "Macrophage_Chil3 high(Liver)": "macrophage", + "Neutrophil_Ngp high(Liver)": "neutrophil", + "Stromal cell(Liver)": "stromal cell", + "T cell_Gzma high(Liver)": "T cell", + "T cell_Trbc2 high(Liver)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py new file mode 100644 index 000000000..e9223074a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py @@ -0,0 +1,47 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "liver" + + self.class_maps = { + "0": { + "B cell_Jchain high(Liver)": "B cell", + "Dendritic cell_Cst3 high(Liver)": "dendritic cell", + "Dendritic cell_Siglech high(Liver)": "dendritic cell", + "Epithelial cell(Liver)": "duct epithelial cell", + "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", + "Erythroblast_Hbb-bs high(Liver)": "erythroblast", + "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", + "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", + "Periportal (PP) hepatocyte(Liver)": "hepatocyte", + "Kuppfer cell(Liver)": "Kupffer cell", + "Macrophage_Chil3 high(Liver)": "macrophage", + "Stromal cell(Liver)": "stromal cell", + "T cell_Gzma high(Liver)": "T cell", + "T cell_Trbc2 high(Liver)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py new file mode 100644 index 000000000..8926dbf10 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py new file mode 100644 index 000000000..3f82240d9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py new file mode 100644 index 000000000..19b8775f3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py @@ -0,0 +1,64 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "lung" + + self.class_maps = { + "0": { + "AT1 Cell(Lung)": "alveolar epithelial cell type I", + "AT2 Cell(Lung)": "alveolar epithelial cell type II", + "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", + "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", + "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", + "B Cell(Lung)": "B cell", + "Basophil(Lung)": "basophil", + "Ciliated cell(Lung)": "ciliated cell", + "Clara Cell(Lung)": "clara cell", + "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", + "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", + "Dendritic cell_Naaa high(Lung)": "dendritic cell", + "Dividing T cells(Lung)": "T cell", + "Dividing cells(Lung)": "unknown", + "Dividing dendritic cells(Lung)": "dendritic cell", + "Endothelial cell_Kdr high(Lung)": "endothelial cell", + "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", + "Endothelial cells_Vwf high(Lung)": "endothelial cell", + "Eosinophil granulocyte(Lung)": "eosinophil", + "Ig−producing B cell(Lung)": "B cell", + "Interstitial macrophage(Lung)": "lung macrophage", + "Monocyte progenitor cell(Lung)": "monocyte progenitor", + "NK Cell(Lung)": "NK cell", + "Neutrophil granulocyte(Lung)": "neutrophil", + "Nuocyte(Lung)": "nuocyte", + "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", + "Stromal cell_Acta2 high(Lung)": "stromal cell", + "Stromal cell_Dcn high(Lung)": "stromal cell", + "Stromal cell_Inmt high(Lung)": "stromal cell", + "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py new file mode 100644 index 000000000..32b6e3f18 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "malegonad" + + self.class_maps = { + "0": { + "Elongating spermatid(Testis)": "elongating spermatid", + "Erythroblast_Hbb-bs high(Testis)": "erythroblast", + "Leydig cell(Testis)": "leydig cell", + "Macrophage_Lyz2 high(Testis)": "macrophage", + "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", + "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", + "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", + "Sertoli cell(Testis)": "sertoli cell", + "Spermatids_1700016P04Rik high(Testis)": "spermatid", + "Spermatids_Cst13 high(Testis)": "spermatid", + "Spermatids_Hmgb4 high(Testis)": "spermatid", + "Spermatids_Tnp1 high(Testis)": "spermatid", + "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", + "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", + "Spermatocyte_Calm2 high(Testis)": "spermatocyte", + "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", + "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", + "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", + "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py new file mode 100644 index 000000000..67da67428 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "malegonad" + + self.class_maps = { + "0": { + "Elongating spermatid(Testis)": "elongating spermatid", + "Erythroblast_Hbb-bs high(Testis)": "erythroblast", + "Leydig cell(Testis)": "leydig cell", + "Macrophage_Lyz2 high(Testis)": "macrophage", + "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", + "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", + "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", + "Sertoli cell(Testis)": "sertoli cell", + "Spermatids_1700016P04Rik high(Testis)": "spermatid", + "Spermatids_Cst13 high(Testis)": "spermatid", + "Spermatids_Hmgb4 high(Testis)": "spermatid", + "Spermatids_Tnp1 high(Testis)": "spermatid", + "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", + "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", + "Spermatocyte_Calm2 high(Testis)": "spermatocyte", + "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", + "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", + "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", + "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py new file mode 100644 index 000000000..350514e31 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py new file mode 100644 index 000000000..a8e2bca14 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py new file mode 100644 index 000000000..5f3bab9bd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py new file mode 100644 index 000000000..b6c0351df --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py @@ -0,0 +1,50 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "mammarygland" + + self.class_maps = { + "0": { + "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", + "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", + "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", + "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", + "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", + "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", + "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", + "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", + "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", + "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", + "NK cell(Mammary-Gland-Virgin)": "NK cell", + "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", + "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", + "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", + "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", + "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", + "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py new file mode 100644 index 000000000..34860727d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py @@ -0,0 +1,49 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "muscle" + + self.class_maps = { + "0": { + "B cell_Jchain high(Muscle)": "B cell", + "B cell_Vpreb3 high(Muscle)": "B cell", + "Dendritic cell(Muscle)": "dendritic cell", + "Endothelial cell(Muscle)": "endothelial cell", + "Erythroblast_Car1 high(Muscle)": "erythroblast", + "Erythroblast_Car2 high(Muscle)": "erythroblast", + "Granulocyte monocyte progenitor cell(Muscle)": "monocyte progenitor", + "Macrophage_Ms4a6c high(Muscle)": "macrophage", + "Macrophage_Retnla high(Muscle)": "macrophage", + "Muscle cell_Tnnc1 high(Muscle)": "muscle cell", + "Muscle cell_Tnnc2 high(Muscle)": "muscle cell", + "Muscle progenitor cell(Muscle)": "skeletal muscle satellite cell", + "Neutrophil_Camp high(Muscle)": "neutrophil", + "Neutrophil_Prg2 high(Muscle)": "neutrophil", + "Neutrophil_Retnlg high(Muscle)": "neutrophil", + "Stromal cell(Muscle)": "stromal cell", + "T cell(Muscle)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py new file mode 100644 index 000000000..58acfa317 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py @@ -0,0 +1,54 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "Acinar cell(Pancreas)": "pancreatic acinar cell", + "Dendrtic cell(Pancreas)": "dendritic cell", + "Ductal cell(Pancreas)": "pancreatic ductal cell", + "Endocrine cell(Pancreas)": "endocrine cell", + "Dividing cell(Pancreas)": "endocrine cell", + "Endothelial cell_Fabp4 high(Pancreas)": "endothelial cell", + "Endothelial cell_Lrg1 high(Pancreas)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Pancreas)": "endothelial cell", + "Erythroblast_Hbb-bt high(Pancreas)": "erythroblast", + "Erythroblast_Igkc high(Pancreas)": "erythroblast", + "Granulocyte(Pancreas)": "granulocyte", + "Macrophage_Ly6c2 high(Pancreas)": "macrophage", + "Macrophage(Pancreas)": "macrophage", + "Glial cell(Pancreas)": "glial cell", + "Smooth muscle cell_Acta2 high(Pancreas)": "smooth muscle cell", + "Smooth muscle cell_Rgs5 high(Pancreas)": "smooth muscle cell", + "Stromal cell_Fn1 high(Pancreas)": "stromal cell", + "Stromal cell_Mfap4 high(Pancreas)": "stromal cell", + "Stromal cell_Smoc2 high(Pancreas)": "stromal cell", + "T cell(Pancreas)": "t cell", + "B cell(Pancreas)": "b cell", + "β-cell(Pancreas)": "pancreatic B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py new file mode 100644 index 000000000..12e4fcd7a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py @@ -0,0 +1,60 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "placenta" + + self.class_maps = { + "0": { + "B cell(Placenta)": "B cell", + "Basophil(Placenta)": "basophil", + "Decidual stromal cell(Placenta)": "decidual stromal cell", + "Dendritic cell(Placenta)": "dendritic cell", + "Endodermal cell_Afp high(Placenta)": "endodermal cell", + "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", + "Erythroblast_Hbb-y high(Placenta)": "erythroblast", + "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", + "Granulocyte_Neat1 high(Placenta)": "granulocyte", + "Granulocyte_S100a9 high(Placenta)": "granulocyte", + "HSPC_Lmo2 high(Placenta)": "HSPC", + "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", + "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", + "Macrophage_Apoe high(Placenta)": "macrophage", + "Macrophage_Spp1 high(Placenta)": "macrophage", + "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", + "Monocyte(Placenta)": "monocyte", + "NK cell(Placenta)": "NK cell", + "NKT cell(Placenta)": "NKT cell", + "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", + "PE lineage cell_S100g high(Placenta)": "PE lineage cell", + "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", + "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", + "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", + "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", + "Stromal cell(Placenta)": "stromal cell", + "Stromal cell_Acta2 high(Placenta)": "stromal cell", + "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py new file mode 100644 index 000000000..e62aaa0af --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py @@ -0,0 +1,60 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "placenta" + + self.class_maps = { + "0": { + "B cell(Placenta)": "B cell", + "Basophil(Placenta)": "basophil", + "Decidual stromal cell(Placenta)": "decidual stromal cell", + "Dendritic cell(Placenta)": "dendritic cell", + "Endodermal cell_Afp high(Placenta)": "endodermal cell", + "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", + "Erythroblast_Hbb-y high(Placenta)": "erythroblast", + "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", + "Granulocyte_Neat1 high(Placenta)": "granulocyte", + "Granulocyte_S100a9 high(Placenta)": "granulocyte", + "HSPC_Lmo2 high(Placenta)": "HSPC", + "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", + "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", + "Macrophage_Apoe high(Placenta)": "macrophage", + "Macrophage_Spp1 high(Placenta)": "macrophage", + "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", + "Monocyte(Placenta)": "monocyte", + "NK cell(Placenta)": "NK cell", + "NKT cell(Placenta)": "NKT cell", + "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", + "PE lineage cell_S100g high(Placenta)": "PE lineage cell", + "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", + "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", + "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", + "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", + "Stromal cell(Placenta)": "stromal cell", + "Stromal cell_Acta2 high(Placenta)": "stromal cell", + "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py new file mode 100644 index 000000000..52baaaa0b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py @@ -0,0 +1,38 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "prostate" + + self.class_maps = { + "0": { + "Dendritic cell(Prostate)": "dendritic cell", + "Epithelial cell(Prostate)": "epithelial cell", + "Glandular epithelium(Prostate)": "glandular epithelial cell", + "Prostate gland cell(Prostate)": "glandular cell", + "Stromal cell(Prostate)": "stromal cell", + "T cell(Prostate)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py new file mode 100644 index 000000000..ddcaa51ec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py @@ -0,0 +1,38 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "prostate" + + self.class_maps = { + "0": { + "Dendritic cell(Prostate)": "dendritic cell", + "Epithelial cell(Prostate)": "epithelial cell", + "Glandular epithelium(Prostate)": "glandular epithelial cell", + "Prostate gland cell(Prostate)": "glandular cell", + "Stromal cell(Prostate)": "stromal cell", + "T cell(Prostate)": "T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py new file mode 100644 index 000000000..7947f5881 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py new file mode 100644 index 000000000..9dbbab288 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py new file mode 100644 index 000000000..d1461dd33 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py @@ -0,0 +1,57 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "rib" + + self.class_maps = { + "0": { + "B cell(Neonatal-Rib)": "B cell", + "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", + "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", + "Dividing cell(Neonatal-Rib)": "proliferative cell", + "Endothelial cell(Neonatal-Rib)": "endothelial cell", + "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", + "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", + "Granulocyte(Neonatal-Rib)": "granulocyte", + "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", + "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", + "Muscle cell(Neonatal-Rib)": "muscle cell", + "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", + "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", + "Neuron_Mpz high(Neonatal-Rib)": "neuron", + "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", + "Neutrophil(Neonatal-Rib)": "neutrophil", + "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", + "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", + "Osteoblast(Neonatal-Rib)": "osteoblast", + "Osteoclast(Neonatal-Rib)": "osteoclast", + "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", + "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py new file mode 100644 index 000000000..dba49eeff --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py @@ -0,0 +1,43 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "spleen" + + self.class_maps = { + "0": { + "Erythroblast(Spleen)": "proerythroblast", + "Dendritic cell_S100a4 high(Spleen)": "dendritic cell", + "Dendritic cell_Siglech high(Spleen)": "dendritic cell", + "Granulocyte(Spleen)": "granulocyte", + "Macrophage(Spleen)": "macrophage", + "Monocyte(Spleen)": "monocyte", + "NK cell(Spleen)": "NK cell", + "Neutrophil(Spleen)": "neutrophil", + "Plasma cell(Spleen)": "plasma cell", + "T cell(Spleen)": "T cell", + "Marginal zone B cell(Spleen)": "B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py new file mode 100644 index 000000000..184f6cb13 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py @@ -0,0 +1,49 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "stomach" + + self.class_maps = { + "0": { + "Antral mucous cell (Stomach)": "antral mucous cell", + "Dendritic cell(Stomach)": "dendritic cell", + "Dividing cell(Stomach)": "proliferative cell", + "Epithelial cell_Gkn3 high(Stomach)": "epithelial cell", + "Epithelial cell_Krt20 high(Stomach)": "epithelial cell", + "Epithelial cell_Pla2g1b high(Stomach)": "epithelial cell", + "G cell(Stomach)": "G cell", + "Gastric mucosal cell(Stomach)": "gastric mucosal cell", + "Macrophage(Stomach)": "macrophage", + "Muscle cell(Stomach)": "muscle cell", + "Parietal cell (Stomach)": "parietal cell", + "Pit cell_Gm26917 high(Stomach)": "pit cell", + "Pit cell_Ifrd1 high(Stomach)": "pit cell", + "Stomach cell_Gkn2 high(Stomach)": "stomach cell", + "Stomach cell_Mt2 high(Stomach)": "stomach cell", + "Stomach cell_Muc5ac high(Stomach)": "stomach cell", + "Tuft cell(Stomach)": "tuft cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py new file mode 100644 index 000000000..4f5f041a5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py @@ -0,0 +1,40 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "thymus" + + self.class_maps = { + "0": { + "abT cell(Thymus)": "abT cell", + "B cell(Thymus)": "B cell", + "DPT cell(Thymus)": "double positive T cell", + "gdT cell (Thymus)": "gdT cell", + "Pre T cell(Thymus)": "immature T cell", + "Proliferating thymocyte(Thymus)": "immature T cell", + "T cell_Id2 high(Thymus)": "abT cell", # TODO check, not sure about this gene + "T cell_Ms4a4b high(Thymus)": "abT cell" # TODO check, not sure about this gene + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py new file mode 100644 index 000000000..fd148575f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "uterus" + + self.class_maps = { + "0": { + "B cell(Uterus)": "B cell", + "Dendritic cell(Uterus)": "dendritic cell", + "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", + "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", + "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", + "Granulocyte(Uterus)": "granulocyte", + "Keratinocyte(Uterus)": "keratinocyte", + "Macrophage(Uterus)": "macrophage", + "Monocyte(Uterus)": "monocyte", + "Muscle cell_Mgp high(Uterus)": "muscle cell", + "Muscle cell_Pcp4 high(Uterus)": "muscle cell", + "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", + "NK cell(Uterus)": "NK cell", + "Stromal cell_Ccl11 high(Uterus)": "stromal cell", + "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", + "Stromal cell_Gm23935 high(Uterus)": "stromal cell", + "Stromal cell_Has1 high(Uterus)": "stromal cell", + "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py new file mode 100644 index 000000000..b9c6ae41c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py @@ -0,0 +1,51 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cell_2018_02_001 + + +class Dataset(Dataset_d10_1016_j_cell_2018_02_001): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.organ = "uterus" + + self.class_maps = { + "0": { + "B cell(Uterus)": "B cell", + "Dendritic cell(Uterus)": "dendritic cell", + "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", + "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", + "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", + "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", + "Granulocyte(Uterus)": "granulocyte", + "Keratinocyte(Uterus)": "keratinocyte", + "Macrophage(Uterus)": "macrophage", + "Monocyte(Uterus)": "monocyte", + "Muscle cell_Mgp high(Uterus)": "muscle cell", + "Muscle cell_Pcp4 high(Uterus)": "muscle cell", + "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", + "NK cell(Uterus)": "NK cell", + "Stromal cell_Ccl11 high(Uterus)": "stromal cell", + "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", + "Stromal cell_Gm23935 high(Uterus)": "stromal cell", + "Stromal cell_Has1 high(Uterus)": "stromal cell", + "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") + fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") + + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py new file mode 100644 index 000000000..e40cb5c55 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -0,0 +1,89 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" + self.download_meta = "private" + + self.author = "Simmons" + self.doi = "10.1016/j.cell.2018.08.067" + self.normalization = "raw" + self.organ = "colon" # ToDo: "lamina propria of mucosa of colon" + self.organism = "human" + self.protocol = "10x" + self.year = 2019 + + self.var_symbol_col = "names" + self.var_ensembl_col = "Accession" + + self.obs_key_state_exact = "donor_organism.diseases.ontology_label" + self.obs_key_healthy = self.obs_key_state_exact + self.healthy_state_healthy = "normal" + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Endothelial 1": "Endothelial", + "Endothelial 2": "Endothelial", + "Glial": "Glial cells", + "Myofibroblasts": "Myofibroblasts", + "Pericyte 1": "Pericytes", + "Pericyte 2": "Pericytes", + "Pericytes": "Pericytes", + "Plasma Cells": "Plasma Cells", + "Smooth Muscle": "Smooth Muscle", + "Stromal 1": "Stromal", + "Stromal 2a": "Stromal", + "Stromal 2b": "Stromal", + "Stromal 3": "Stromal", + "Stromal 4": "Stromal", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") + ] + adata = anndata.read_loom(fn[0]) + ctuc = pd.read_csv(fn[1], sep="\t") + cthealthy = pd.read_csv(fn[2], sep="\t") + adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() + adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() + uc = adata[adata.obs["donor_organism.diseases.ontology_label"] == "ulcerative colitis (disease)"].copy() + bcuc = [i.split("-")[0] for i in ctuc["Barcode"]] + seluc = [] + for i in uc.obs["barcode"]: + seluc.append((uc.obs["barcode"].str.count(i).sum() == 1) and i in bcuc) + uc = uc[seluc].copy() + ctuc.index = [i.split("-")[0] for i in ctuc["Barcode"]] + uc.obs["celltype"] = [ctuc.loc[i]["Cluster"] for i in uc.obs["barcode"]] + uc.var = uc.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") + healthy = adata[adata.obs["donor_organism.diseases.ontology_label"] == "normal"].copy() + bchealthy = [i.split("-")[0] for i in cthealthy["Barcode"]] + selhealthy = [] + for i in healthy.obs["barcode"]: + selhealthy.append((healthy.obs["barcode"].str.count(i).sum() == 1) and i in bchealthy) + healthy = healthy[selhealthy].copy() + cthealthy.index = [i.split("-")[0] for i in cthealthy["Barcode"]] + healthy.obs["celltype"] = [cthealthy.loc[i]["Cluster"] for i in healthy.obs["barcode"]] + healthy.var = healthy.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") + self.adata = healthy.concatenate(uc) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py new file mode 100644 index 000000000..9c4f27a3e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -0,0 +1,88 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" + + self.download = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_meta = None + + self.author = "Regev" + self.doi = "10.1016/j.cell.2019.06.029" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" # ToDo: "colonic epithelium" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Cycling TA": "Cycling TA", + "TA 1": "TA 1", + "TA 2": "TA 2", + "Immature Enterocytes 2": "Immature Enterocytes 2", + "Immature Enterocytes 1": "Immature Enterocytes 1", + "Enterocyte Progenitors": "Enterocyte Progenitors", + "Immature Goblet": "Immature Goblet", + "Enterocytes": "Enterocytes", + "Secretory TA": "Secretory TA", + "Best4+ Enterocytes": "Best4+ Enterocytes", + "CD8+ IELs": "CD8+ IELs", + "Goblet": "Goblet cells", + "Stem": "Stem cells", + "Tuft": "Tuft", + "Follicular": "Follicular", + "Enteroendocrine": "Enteroendocrine cells", + "Plasma": "Plasma Cells", + "CD4+ Memory": "CD4+ Memory", + "CD8+ LP": "CD8+ LP", + "CD69- Mast": "CD69- Mast", + "Macrophages": "Macrophage", + "GC": "Glial cells", + "Cycling B": "B cell cycling", + "CD4+ Activated Fos-hi": "CD4+ T Activated Fos-hi", + "CD4+ Activated Fos-lo": "CD4+ T Activated Fos-lo", + "NKs": "NK", + "Cycling T": "Cycling T", + "M cells": "M cells", + "CD69+ Mast": "CD69+ Mast", + "MT-hi": "MT-hi", + "CD8+ IL17+": "CD8+ IL17+", + "CD4+ PD1+": "CD4+ PD1+", + "DC2": "DC2", + "Treg": "Treg", + "ILCs": "ILC", + "DC1": "DC1", + "WNT2B+ Fos-lo 1": "WNT2B+ Fos-lo 1", + "WNT5B+ 2": "WNT5B+ 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py new file mode 100644 index 000000000..bba8ea11c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -0,0 +1,70 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" + + self.download = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" + self.download_meta = None + + self.author = "Kenigsberg" + self.doi = "v" + self.healthy = True + self.normalization = "raw" + self.organ = "ileum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "T cells": "T cells", + "Plasma cells": "Plasma Cells", + "B cells": "B cells", + "MNP": "MNP", + "ILC": "ILC", + "Enterocytes": "Enterocytes", + "Fibs": "Fibroblasts", + "CD36+ endothelium": "CD36+ endothelium", + "Progenitors": "Progenitors", + "Goblets": "Goblet cells", + "Glial cells": "Glial cells", + "Cycling": "Cycling", + "ACKR1+ endothelium": "ACKR1+ endothelium", + "Pericytes": "Pericytes", + "Lymphatics": "Lymphatics", + "Mast cells": "Mast cells", + "SM": "Smooth muscle cell", + "TA": "TA", + "Paneth cells": "Paneth cells", + "Enteroendocrines": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + self.adata = self.adata[self.adata.obs["CellType"] != "Doublets"].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py new file mode 100644 index 000000000..c5222d562 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -0,0 +1,58 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" + + self.download = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" + self.download_meta = None + + self.author = "Strand" + self.doi = "10.1016/j.celrep.2018.11.086" + self.healthy = True + self.normalization = "raw" + self.state_exact = "healthy" + self.organ = "prostate" + self.organism = "human" + self.protocol = "10x" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Basal": "Basal cell", + "Hillock": "Hillock", + "Luminal": "Luminal", + "Endothelia": "Endothelial cell", + "Club": "Club", + "Fibroblast": "Fibroblast", + "Smooth muscle": "Smooth muscle cell", + "Leukocytes": "Leukocytes", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py new file mode 100644 index 000000000..6b28e777a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -0,0 +1,64 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" + + self.download = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" + self.download_meta = None + + self.author = "Yanai" + self.doi = "10.1016/j.cels.2016.08.011" + self.healthy = True + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "human" + self.protocol = "inDrop" + self.state_exact = "healthy" + self.year = 2016 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "t_cell": "T cell", + "quiescent_stellate": "Quiescent Stellate cell", + "mast": "Mast cell", + "delta": "Delta cell", + "beta": "Beta cell", + "endothelial": "Endothelial cell", + "macrophage": "Macrophage", + "epsilon": "Epsilon cell", + "activated_stellate": "Activated Stellate cell", + "acinar": "Acinar cell", + "alpha": "Alpha cell", + "ductal": "Ductal cell", + "schwann": "Schwann cell", + "gamma": "Gamma cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py new file mode 100644 index 000000000..d03786716 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" + + self.author = "Sandberg" + self.doi = "10.1016/j.cmet.2016.08.020" + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "human" + self.protocol = "Smartseq2" + self.year = 2016 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Characteristics[cell type]" + self.obs_key_state_exact = "Characteristics[disease]" + self.obs_key_healthy = self.obs_key_state_exact + + self.healthy_state_healthy = "normal" + + self.class_maps = { + "0": { + "alpha cell": "Alpha cell", + "ductal cell": "Ductal cell", + "beta cell": "Beta cell", + "gamma cell": "Gamma cell", + "acinar cell": "Acinar cell", + "delta cell": "Delta cell", + "PSC cell": "PSC cell", + "unclassified endocrine cell": "Unclassified endocrine cell", + "co-expression cell": "Co-expression cell", + "endothelial cell": "Endothelial cell", + "epsilon cell": "Epsilon cell", + "mast cell": "Mast cell", + "MHC class II cell": "MHC class II cell", + "unclassified cell": "Unknown", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), + os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") + ] + df = pd.read_csv(fn[0], sep="\t") + df.index = df.index.get_level_values(0) + df = df.drop("#samples", axis=1) + df = df.T.iloc[:, :26178] + self.adata = anndata.AnnData(df) + self.adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[self.adata.obs.index] + # filter observations which are not cells (empty wells, low quality cells etc.) + self.adata = self.adata[self.adata.obs["Characteristics[cell type]"] != "not applicable"].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py new file mode 100644 index 000000000..2c89b9310 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py @@ -0,0 +1,59 @@ +import anndata +import numpy as np +import os +import pandas +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1016_j_cmet_2019_01_021(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" + + self.author = "Bhushan" + self.doi = "10.1016/j.cmet.2019.01.021" + self.healthy = False + self.normalization = "raw" + self.organ = "pancreas" + self.organism = "mouse" + self.protocol = "10x" + self.state_exact = "diabetic" + self.year = 2019 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "acinar": "pancreatic acinar cell", + "ductal": "pancreatic ductal cell", + "leukocyte": "leukocyte", + "T cell(Pancreas)": "t cell", + "B cell(Pancreas)": "b cell", + "beta": "pancreatic B cell", + "alpha": "pancreatic A cell", + "delta": "pancreatic D cell", + "pp": "pancreatic PP cell", + "smooth_muscle": "smooth muscle cell", + "stellate cell": "pancreatic stellate cell", + "fibroblast": "stromal cell", + "endothelial": "endothelial cell" + }, + } + + def _load_generalized(self, fn, fn_meta): + celltypes = pandas.read_csv(fn_meta, index_col=0) + + self.adata = anndata.read_mtx(fn + "_matrix.mtx.gz").transpose() + self.adata.var_names = np.genfromtxt(fn + "_genes.tsv.gz", dtype=str)[:, 1] + self.adata.obs_names = np.genfromtxt(fn + "_barcodes.tsv.gz", dtype=str) + self.adata.var_names_make_unique() + self.adata = self.adata[celltypes.index] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py new file mode 100644 index 000000000..5128278fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_001_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py new file mode 100644 index 000000000..142e0f759 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_002_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py new file mode 100644 index 000000000..e8ba6f466 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_003_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py new file mode 100644 index 000000000..50d2cf114 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_004_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py new file mode 100644 index 000000000..06b60eaaa --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_005_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py new file mode 100644 index 000000000..e9c96a3e5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_006_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py new file mode 100644 index 000000000..efc3678ed --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_007_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py new file mode 100644 index 000000000..b64c76432 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py @@ -0,0 +1,26 @@ +import os +from typing import Union +from .base import Dataset_d10_1016_j_cmet_2019_01_021 + + +class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "mouse_pancreas_2019_10x_thompson_008_10.1016/j.cmet.2019.01.021" + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") + fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") + else: + fn_meta = os.path.join(fn, "_annotation.csv") + self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py new file mode 100644 index 000000000..184392a49 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -0,0 +1,77 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" + + self.download = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_meta = None + + self.author = "Spence" + self.doi = "10.1016/j.devcel.2020.01.033" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Cell_type" + + self.class_maps = { + "0": { + "Airway Smooth Muscle": "Airway smooth muscle", + "Basal cell": "Basal", + "Bud tip adjacent": "Fetal airway progenitors", + "Bud tip progenitor": "Fetal airway progenitors", + "Cartilage": "Cartilage", + "Club-like secretory": "Secretory", + "Endothelial": "1_Endothelial", + "Epithelial": "1_Epithelial", + "Goblet-like secretory": "Secretory", + "Hematopoietic, B Cells": "B cell lineage", + "Hematopoietic, Macrophage": "Macrophages", + "Hematopoietic, Natural Killer Cell": "Innate lymphoid cells", + "Hematopoietic, T Cells": "T cell lineage", + "Immune": "1_Immune", + "Intermediate ciliated": "Multiciliated lineage", + "Mesenchyme RSPO2+": "1_Stroma", + "Mesenchyme SERPINF1-high": "1_Stroma", + "Multiciliated cell": "Multiciliated lineage", + "Multiciliated precursor": "Multiciliated lineage", + "Neuroendocrine": "Rare", + "Pericyte": "Fibroblasts", + "RBC": "Erythrocytes", + "Secretory progenitor": "Secretory", + "Submucosal gland": "Submucosal Secretory", + "Submucosal gland basal": "Submucosal Secretory", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py new file mode 100644 index 000000000..15283deff --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -0,0 +1,64 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" + self.download = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" + self.download_meta = None + + self.author = "Regev" + self.doi = "10.1038/nmeth.4407" + self.healthy = True + self.normalization = "raw" + self.organ = "brain" + self.organism = "human" + self.protocol = "DroNcSeq" + self.state_exact = "healthy" + self.year = 2017 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "exPFC1": "Glutamatergic neurons from the PFC 1", + "exPFC2": "Glutamatergic neurons from the PFC 2", + "exDG": "Granule neurons from the hip dentate gyrus region", + "GABA1": "GABAergic interneurons 1", + "GABA2": "GABAergic interneurons 2", + "exCA3": "Pyramidal neurons from the hip CA region 1", + "exCA1": "Pyramidal neurons from the hip CA region 2", + "ODC1": "Oligodendrocytes", + "ASC1": "Astrocytes 1", + "OPC": "Oligodendrocyte precursors", + "ASC2": "Astrocytes 2", + "Unclassified": "Unknown", + "MG": "Microglia", + "NSC": "Neuronal stem cells", + "END": "Endothelial cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py new file mode 100644 index 000000000..1a22b898a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py @@ -0,0 +1,61 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" + + self.download = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" + self.download_meta = None + + self.author = "Cairns" + self.doi = "10.1038/s41422-018-0099-2" + self.healthy = True + self.normalization = "raw" + self.organ = "malegonad" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Elongated Spermatids": "Elongated Spermatids", + "Leydig cells": "Leydig cells", + "Early Primary Spermatocytes": "Early Primary Spermatocytes", + "Round Spermatids": "Round Spermatids", + "Endothelial cells": "Endothelial cells", + "Macrophages": "Macrophages", + "Myoid cells": "Myoid cells", + "Differentiating Spermatogonia": "Differentiating Spermatogonia", + "Late primary Spermatocytes": "Late primary Spermatocytes", + "Spermatogonial Stem cell": "Spermatogonial Stem cell", + "Sertoli cells": "Sertoli cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py new file mode 100644 index 000000000..67945d2ce --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -0,0 +1,71 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" + + self.download = "private" + self.download_meta = "private" + + self.author = "McGilvray" + self.doi = "10.1038/s41467-018-06318-7" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" # ToDo: "caudate lobe" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "1": "Hepatocyte 1", + "2": "Alpha beta T cells", + "3": "Hepatocyte 2", + "4": "Inflammatory macrophages", + "5": "Hepatocyte 3", + "6": "Hepatocyte 4", + "7": "Plasma cells", + "8": "NK cell", + "9": "Gamma delta T cells 1", + "10": "Non inflammatory macrophages", + "11": "Periportal LSECs", + "12": "Central venous LSECs", + "13": "Endothelial cell", + "14": "Hepatocyte 5", + "15": "Hepatocyte 6", + "16": "Mature B cells", + "17": "Cholangiocytes", + "18": "Gamma delta T cells 2", + "19": "Erythroid cells", + "20": "Hepatic stellate cells" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), + os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") + ] + self.adata = anndata.read_csv(fn[0]).T + celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") + self.adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py new file mode 100644 index 000000000..2e27a91c5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py @@ -0,0 +1,80 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ + "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ + "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" + + self.author = "Jain" + self.doi = "10.1038/s41467-019-10861-2" + self.healthy = True + self.normalization = "raw" + self.organ = "kidney" + self.organism = "human" + self.protocol = "10xSn" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Collecting Duct - Intercalated Cells Type A (cortex)": "Collecting Duct - Intercalated Cells Type A (cortex)", + "Collecting Duct - Intercalated Cells Type A (medulla)": "Collecting Duct - Intercalated Cells Type A (medulla)", + "Collecting Duct - Intercalated Cells Type B": "Collecting Duct - Intercalated Cells Type B", + "Collecting Duct - PCs - Stressed Dissoc Subset": "Collecting Duct - PCs - Stressed Dissoc Subset", + "Collecting Duct - Principal Cells (cortex)": "Collecting Duct - Principal Cells (cortex)", + "Collecting Duct - Principal Cells (medulla)": "Collecting Duct - Principal Cells (medulla)", + "Connecting Tubule": "Connecting tubule", + "Decending Limb": "Decending Limb", + "Distal Convoluted Tubule": "Distal Convoluted Tubule", + "Endothelial Cells (unassigned)": "Endothelial Cells (unassigned)", + "Endothelial Cells - AEA & DVR ": "Endothelial Cells - AEA & DVR", + "Endothelial Cells - AVR": "Endothelial Cells - AVR", + "Endothelial Cells - glomerular capillaries": "Endothelial Cells - glomerular capillaries", + "Epithelial Cells (unassigned)": "Epithelial Cells (unassigned)", + "Immune Cells - Macrophages": "Macrophage", + "Interstitium": "Interstitium", + "Mesangial Cells": "Mesangial Cells", + "Podocytes": "Podocyte", + "Proximal Tubule Epithelial Cells (S1)": "Proximal Tubule Epithelial Cells (S1)", + "Proximal Tubule Epithelial Cells (S2)": "Proximal Tubule Epithelial Cells (S2)", + "Proximal Tubule Epithelial Cells (S3)": "Proximal Tubule Epithelial Cells (S3)", + "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3 )": "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)", + "Proximal Tubule Epithelial Cells - Stress/Inflam": "Proximal Tubule Epithelial Cells - Stress/Inflam", + "Thick Ascending Limb": "Thick ascending limb of Loop of Henle", + "Thin ascending limb": "Thin ascending limb", + "Unknown - Novel PT CFH+ Subpopulation (S2)": "Unknown - Novel PT CFH+ Subpopulation (S2)", + "Vascular Smooth Muscle Cells and pericytes": "Vascular Smooth Muscle Cells and pericytes", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + annot = pd.read_csv(fn[1], index_col=0, dtype="category") + self.adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py new file mode 100644 index 000000000..902beb985 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -0,0 +1,117 @@ +import anndata +import os +from typing import Union +import tarfile +import pandas as pd +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" + self.download_meta = "private" + + self.author = "Sims" + self.doi = "10.1038/s41467-019-12464-3" + self.healthy = True + self.normalization = "raw" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "Gene" + self.var_ensembl_col = "Accession" + + self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_organ = "organ" + + self.loaded = False # TODO do this differently? + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), + os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), + os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), + ] + adatas = [] + with tarfile.open(fn[0]) as tar: + for member in tar.getmembers(): + df = pd.read_csv(tar.extractfile(member.name), compression="gzip", sep="\t") + df.index = [i.split(".")[0] for i in df["Accession"]] + var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) + if df.columns[-1].startswith("Un"): + df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) + self.adata = anndata.AnnData(df.T) + self.adata.var = var + if "PP001" in member.name or "PP002" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lung" + elif "PP003" in member.name or "PP004" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP005" in member.name or "PP006" in member.name: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lymph Node" + elif "PP009" in member.name or "PP010" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lung" + elif "PP011" in member.name or "PP012" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP013" in member.name or "PP014" in member.name: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lymph Node" + else: + continue + self.adata.obs.index = member.name.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index + adatas.append(self.adata) + self.adata = adatas[0].concatenate(adatas[1:], index_unique=None) + self.adata.obs.drop("batch", axis=1, inplace=True) + self.adata = self.adata[:, self.adata.X.sum(axis=0) > 0].copy() + self.adata.obs["cell_ontology_class"] = "Unknown" + df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) + df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) + for i in df1.index: + self.adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] + for i in df2.index: + self.adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + + # TODO we should move this code into the base class + # If the subset_organs() method has been run before, subset to specified organs + if "organsubset" in self.__dict__: + self.adata = self.adata[self.adata.obs["organ"].isin(self.organsubset)] + # If adata object is empty, set it to None + if not len(self.adata): + self.adata = None + self.loaded = True + + @property + def ncells(self): + if "organsubset" in self.__dict__: + if not self.loaded: + self._load() + if self.adata is None: + return 0 + else: + return self.adata.n_obs + else: + return super().ncells diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py new file mode 100644 index 000000000..8ce5afabe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -0,0 +1,54 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" + + self.download = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" + self.download_meta = None + + self.author = "Hafler" + self.doi = "10.1038/s41467-019-12780-8" + self.healthy = True + self.normalization = "raw" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "ACs": "Amacrine cell", + "BPs": "BPs", + "Cones": "Retinal cone cell", + "Endo": "Endothelial cell", + "HCs": "Horizontal cells", + "Macroglia": "Macroglia", + "Microglia": "Microglia", + "RGCs": "Retinal ganglion cell", + "Rods": "Rods", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py new file mode 100644 index 000000000..cb2e14589 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -0,0 +1,92 @@ +import os +from typing import Union +import pandas as pd +import anndata + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2018_10x_ventotormo_10.1038/s41586-018-0698-6" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip" + + self.author = "Teichmann" + self.healthy = True + self.normalization = "raw" + self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? + self.organism = "human" + self.doi = "10.1038/s41586-018-0698-6" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.obs_key_cellontology_original = "annotation" + # ToDo: further anatomical information for subtissue in "location" + + self.class_maps = { + "0": { + "DC1": "Dendritic Cells 1", + "DC2": "Dendritic Cells 2", + "EVT": "Extravillous Trophoblasts", + "Endo (f)": "Endothelial Cells f", + "Endo (m)": "Endothelial Cells m", + "Endo L": "Endothelial Cells L", + "Epi1": "Epithelial Glandular Cells 1", + "Epi2": "Epithelial Glandular Cells 2", + "Granulocytes": "Granulocytes", + "HB": "Hofbauer Cells", + "ILC3": "ILC3", + "MO": "Monocyte", + "NK CD16+": "NK Cells CD16+", + "NK CD16-": "NK Cells CD16-", + "Plasma": "B cell (Plasmocyte)", + "SCT": "Syncytiotrophoblasts", + "Tcells": "T cell", + "VCT": "Villous Cytotrophoblasts", + "dM1": "Decidual Macrophages 1", + "dM2": "Decidual Macrophages 2", + "dM3": "Decidual Macrophages 3", + "dNK p": "Decidual NK Cells p", + "dNK1": "Decidual NK Cells 1", + "dNK2": "Decidual NK Cells 2", + "dNK3": "Decidual NK Cells 3", + "dP1": "Perivascular Cells 1", + "dP2": "Perivascular Cells 2", + "dS1": "Decidual Stromal Cells 1", + "dS2": "Decidual Stromal Cells 2", + "dS3": "Decidual Stromal Cells 3", + "fFB1": "Fibroblasts 1", + "fFB2": "Fibroblasts 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + df = pd.read_csv(fn[1], sep="\t") + for i in df.columns: + self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] + + self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] + self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] + self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) + self.adata = self.adata[:, ~self.adata.var.index.isin( + ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py new file mode 100644 index 000000000..666d41719 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py @@ -0,0 +1,92 @@ +import os +from typing import Union +import pandas as pd +import anndata + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2018_smartseq2_ventotormo_10.1038/s41586-018-0698-6" + + self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.1.zip" + self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip" + + self.author = "Teichmann" + self.healthy = True + self.normalization = "raw" + self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? + self.organism = "human" + self.doi = "10.1038/s41586-018-0698-6" + self.protocol = "Smartseq2" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.obs_key_cellontology_original = "annotation" + # ToDo: further anatomical information for subtissue in "location" + + self.class_maps = { + "0": { + "DC1": "Dendritic Cells 1", + "DC2": "Dendritic Cells 2", + "EVT": "Extravillous Trophoblasts", + "Endo (f)": "Endothelial Cells f", + "Endo (m)": "Endothelial Cells m", + "Endo L": "Endothelial Cells L", + "Epi1": "Epithelial Glandular Cells 1", + "Epi2": "Epithelial Glandular Cells 2", + "Granulocytes": "Granulocytes", + "HB": "Hofbauer Cells", + "ILC3": "ILC3", + "MO": "Monocyte", + "NK CD16+": "NK Cells CD16+", + "NK CD16-": "NK Cells CD16-", + "Plasma": "B cell (Plasmocyte)", + "SCT": "Syncytiotrophoblasts", + "Tcells": "T cell", + "VCT": "Villous Cytotrophoblasts", + "dM1": "Decidual Macrophages 1", + "dM2": "Decidual Macrophages 2", + "dM3": "Decidual Macrophages 3", + "dNK p": "Decidual NK Cells p", + "dNK1": "Decidual NK Cells 1", + "dNK2": "Decidual NK Cells 2", + "dNK3": "Decidual NK Cells 3", + "dP1": "Perivascular Cells 1", + "dP2": "Perivascular Cells 2", + "dS1": "Decidual Stromal Cells 1", + "dS2": "Decidual Stromal Cells 2", + "dS3": "Decidual Stromal Cells 3", + "fFB1": "Fibroblasts 1", + "fFB2": "Fibroblasts 2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), + os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + df = pd.read_csv(fn[1], sep="\t") + for i in df.columns: + self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] + + self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] + self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] + self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) + self.adata = self.adata[:, ~self.adata.var.index.isin( + ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py new file mode 100644 index 000000000..9df9d2693 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py @@ -0,0 +1,91 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" + + self.author = "Gruen" + self.doi = "10.1038/s41586-019-1373-2" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "mCEL-Seq2" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "1": "NK, NKT and T cells", + "2": "Kupffer Cell", + "3": "NK, NKT and T cells", + "4": "Cholangiocytes", + "5": "NK, NKT and T cells", + "6": "Kupffer Cell", + "7": "Cholangiocytes", + "8": "B Cell", + "9": "Liver sinusoidal endothelial cells", + "10": "Macrovascular endothelial cells", + "11": "Hepatocyte", + "12": "NK, NKT and T cells", + "13": "Liver sinusoidal endothelial cells", + "14": "Hepatocyte", + "15": "Other endothelial cells", + "16": "Unknown", + "17": "Hepatocyte", + "18": "NK, NKT and T cells", + "19": "Unknown", + "20": "Liver sinusoidal endothelial cells", + "21": "Macrovascular endothelial cells", + "22": "B Cell", + "23": "Kupffer Cell", + "24": "Cholangiocytes", + "25": "Kupffer Cell", + "26": "Other endothelial cells", + "27": "Unknown", + "28": "NK, NKT and T cells", + "29": "Macrovascular endothelial cells", + "30": "Hepatocyte", + "31": "Kupffer Cell", + "32": "Liver sinusoidal endothelial cells", + "33": "Hepatic stellate cells", + "34": "B Cell", + "35": "Other endothelial cells", + "36": "Unknown", + "37": "Unknown", + "38": "B Cell", + "39": "Cholangiocytes" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") + ] + self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + celltype_df = pd.read_csv(fn[1], sep=" ") + self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() + self.adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py new file mode 100644 index 000000000..cf932bc48 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -0,0 +1,83 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This dataloader requires manual preprocessing of the Rdata file that can be obtained from the link in the + `download_website` attribute of this class. The preprocessing code below uses the rpy2 and anndata2ri python + packages to convert the R object to anndata (pip install anndata2ri), run it in a jupyter notebook: + + ## Notebook Cell 1 + import anndata2ri + anndata2ri.activate() + %load_ext rpy2.ipython + + ## Notebook Cell 2 + %%R -o sce + library(Seurat) + load("tissue.rdata") + new_obj = CreateSeuratObject(counts = tissue@raw.data) + new_obj@meta.data = tissue@meta.data + sce <- as.SingleCellExperiment(new_obj) + + ## Notebook cell 3 + sce.write("ramachandran.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" + + self.download = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" + self.download_meta = None + + self.author = "Henderson" + self.doi = "10.1038/s41586-019-1631-3" + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "10x" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "annotation_lineage" + self.obs_key_state_exact = "condition" + self.obs_key_healthy = self.obs_key_state_exact + self.healthy_state_healthy = "Uninjured" + + self.class_maps = { + "0": { + "MPs": "MP", + "Tcells": "Tcells", + "ILCs": "ILC", + "Endothelia": "Endothelia", + "Bcells": "Bcells", + "pDCs": "pDCs", + "Plasma Bcells": "Plasma B cell", + "Mast cells": "Mast cell", + "Mesenchyme": "Mesenchyme", + "Cholangiocytes": "Cholangiocytes", + "Hepatocytes": "Hepatocytes", + "Mesothelia": "Mesothelia", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py new file mode 100644 index 000000000..e70da4873 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -0,0 +1,72 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" + + self.download = "private" + self.download_meta = "private" + + self.author = "Haniffa" + self.doi = "10.1038/s41586-019-1652-y" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "cell.labels" + + self.class_maps = { + "0": { + "B cell": "Mature B cells", + "DC1": "Dendritic cell 1", + "DC2": "Dendritic cell 2", + "DC precursor": "Dendritic cell precursor", + "Early Erythroid": "Early Erythroid", + "Early lymphoid_T lymphocyte": "Early lymphoid T lymphocyte", + "Endothelial cell": "Endothelial cell", + "Fibroblast": "Fibroblast", + "HSC_MPP": "HSC MPP", + "Hepatocyte": "Hepatocyte", + "ILC precursor": "ILC precursor", + "Kupffer Cell": "Kupffer Cell", + "Late Erythroid": "Late Erythroid", + "MEMP": "MEMP", + "Mast cell": "Mast cell", + "Megakaryocyte": "Megakaryocyte", + "Mid Erythroid": "Mid Erythroid", + "Mono-Mac": "Mono Macrophage", + "Monocyte": "Monocyte", + "Monocyte precursor": "Monocyte precursor", + "NK": "NK cell", + "Neutrophil-myeloid progenitor": "Neutrophil myeloid progenitor", + "Pre pro B cell": "Pre pro B cell", + "VCAM1+ EI macrophage": "VCAM1pos EI macrophage", + "pDC precursor": "pDendritic cell precursor", + "pre-B cell": "pre B cell", + "pro-B cell": "pro B cell" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py new file mode 100644 index 000000000..75b926aa6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -0,0 +1,156 @@ +import anndata +import numpy as np +import os +import pandas as pd +import scipy.sparse +from typing import Union +import urllib.request +import zipfile + +from sfaira.data import DatasetBase + + +class Dataset_d10_1038_s41586_020_2157_4(DatasetBase): + """ + This is a dataloader template for loaders cell landscape data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + self.download = "https://ndownloader.figshare.com/files/17727365" + self.download_meta = [ + "https://ndownloader.figshare.com/files/21758835", + "https://ndownloader.figshare.com/files/22447898", + ] + + self.author = "Guo" + self.doi = "10.1038/s41586-020-2157-4" + self.healthy = True + self.normalization = "raw" + self.organism = "human" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2020 + + self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_dev_stage = "dev_stage" + self.obs_key_sex = "gender" + self.obs_key_age = "age" + + self.var_symbol_col = "index" + + def _download(self): + # download required files from loaders cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471 + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/17727365", + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad") + )) + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/21758835", + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx") + )) + + print(urllib.request.urlretrieve( + "https://ndownloader.figshare.com/files/22447898", + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + )) + # extract the downloaded zip archive + with zipfile.ZipFile( + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip"), + "r" + ) as zip_ref: + zip_ref.extractall(os.path.join(self.path, self._directory_formatted_doi)) + + def _load_generalized(self, fn, sample_id: str): + """ + Attempt to find file, cache entire HCL if file was not found. + + :param fn: + :return: + """ + adata = anndata.read(os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad")) + # convert to sparse matrix + adata.X = scipy.sparse.csr_matrix(adata.X).copy() + + # harmonise annotations + for col in ["batch", "tissue"]: + adata.obs[col] = adata.obs[col].astype("str") + adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", + "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) + adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] + + # load celltype labels and harmonise them + # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: + fig1_anno = pd.read_excel( + os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + index_col="cellnames", + engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables + ) + fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + + # check that the order of cells and cell labels is the same + assert np.all(fig1_anno.index == adata.obs.index) + + # add annotations to adata object and rename columns + adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) + adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", + "celltype_global"] + + # add sample-wise annotations to the full adata object + df = pd.DataFrame( + columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", + "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) + for f in os.listdir( + os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417") + ): + df1 = pd.read_csv( + os.path.join( + self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417", f + ), encoding="unicode_escape") + df = pd.concat([df, df1], sort=True) + df = df.set_index("Cell_id") + adata = adata[[i in df.index for i in adata.obs.index]].copy() + a_idx = adata.obs.index.copy() + adata.obs = pd.concat([adata.obs, df[["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"]]], axis=1) + assert np.all(a_idx == adata.obs.index) + + # remove mouse cells from the object # ToDo: add this back in as mouse data sets? + adata = adata[adata.obs["Source"] != "MCA2.0"].copy() + + # tidy up the column names of the obs annotations + adata.obs.columns = ["sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", + "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", + "protocol", "source"] + + # create a tidy organ annotation which is then used in sfaira + adata.obs["organ"] = adata.obs["sub_tissue"] \ + .str.replace("Adult", "") \ + .str.replace("Fetal", "") \ + .str.replace("Neonatal", "") \ + .str.replace("Transverse", "") \ + .str.replace("Sigmoid", "") \ + .str.replace("Ascending", "") \ + .str.replace("Cord", "") \ + .str.replace("Peripheral", "") \ + .str.replace("CD34P", "") \ + .str.replace("Cerebellum", "Brain") \ + .str.replace("TemporalLobe", "Brain") \ + .str.replace("BoneMarrow", "Bone") \ + .str.replace("Spinal", "SpinalCord") \ + .str.replace("Intestine", "Stomach") \ + .str.replace("Eyes", "Eye") \ + .str.lower() + + self.adata = adata[adata.obs["sample"] == sample_id].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py new file mode 100644 index 000000000..1ca7d67ed --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "adipose" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdipose_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py new file mode 100644 index 000000000..3bbf998fe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="NeonatalAdrenalGland_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py new file mode 100644 index 000000000..f103be794 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py new file mode 100644 index 000000000..28fa28b71 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py new file mode 100644 index 000000000..9250204e6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py new file mode 100644 index 000000000..6df0c6ba7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py new file mode 100644 index 000000000..6bdad262b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "adrenalgland" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py new file mode 100644 index 000000000..e8fef6576 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "artery" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultArtery_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py new file mode 100644 index 000000000..60f46cc6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultBladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py new file mode 100644 index 000000000..80fce100f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultBladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py new file mode 100644 index 000000000..6a275fd56 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "bladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultGallbladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py new file mode 100644 index 000000000..f42cdfd46 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py new file mode 100644 index 000000000..f12385f4c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="PeripheralBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py new file mode 100644 index 000000000..2b5b470ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBlood_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py new file mode 100644 index 000000000..ad3e7090c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py new file mode 100644 index 000000000..9eb937bef --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBloodCD34P_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py new file mode 100644 index 000000000..015d311ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBloodCD34P_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py new file mode 100644 index 000000000..a3ada5b3f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "blood" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="CordBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py new file mode 100644 index 000000000..9f04b99d6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "bone" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="BoneMarrow_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py new file mode 100644 index 000000000..3bf057903 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "bone" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="BoneMarrow_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py new file mode 100644 index 000000000..373b2c325 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py new file mode 100644 index 000000000..f6b377c91 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py new file mode 100644 index 000000000..78487bf6f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py new file mode 100644 index 000000000..bd4c0cfde --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTemporalLobe_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py new file mode 100644 index 000000000..ea462000c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalBrain_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py new file mode 100644 index 000000000..749d7d71f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "brain" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Astrocyte": "Astrocyte", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Endothelial cell (APC)": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal endocrine cell": "Fetal endocrine cell", + "Fetal enterocyte ": "Fetal enterocyte ", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal Neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Oligodendrocyte": "Oligodendrocytes", + "Primordial germ cell": "Primordial germ cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "Neuronal stem cells" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultCerebellum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py new file mode 100644 index 000000000..8524c25e4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "calvaria" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalCalvaria_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py new file mode 100644 index 000000000..521bb924e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "cervix" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultCervix_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py new file mode 100644 index 000000000..560e297aa --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "chorionicvillus" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="ChorionicVillus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py new file mode 100644 index 000000000..01ce9f3ee --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultAscendingColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py new file mode 100644 index 000000000..1082dfa60 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py new file mode 100644 index 000000000..099d79147 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTransverseColon_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py new file mode 100644 index 000000000..30da95dfe --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py @@ -0,0 +1,50 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Colon" + self.class_maps = { + "0": { + "Enterocyte progenitor": "Enterocyte Progenitors", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Enterocyte": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "T cell": "T cell", + "Stromal cell": "Stromal", + "Macrophage": "Macrophage", + "B cell": "B cell", + "Smooth muscle cell": "Smooth Muscle", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "Endothelial cell": "Endothelial", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Enterocyte Progenitors", + "Fibroblast": "Fibroblast", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", + "Fetal stromal cell": "Stromal", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Monocyte": "Monocyte", + "Erythroid cell": "Erythroid cell", + "Fetal endocrine cell": "Enteroendocrine cells", + "Primordial germ cell": "Primordial germ cell", + "Fetal enterocyte": "Fetal enterocyte", + "M2 Macrophage": "Macrophage", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSigmoidColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py new file mode 100644 index 000000000..122bd7bf8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "duodenum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultDuodenum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py new file mode 100644 index 000000000..c50ae3fac --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "epityphlon" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEpityphlon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py new file mode 100644 index 000000000..2f948e5a0 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py @@ -0,0 +1,47 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Esophagus" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Basal cell": "Basal cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Macrophage": "Macrophage", + "B cell": "B cell", + "T cell": "T cell", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Stromal cell": "Stromal cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "Neutrophil": "Neutrophil", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Fetal stromal cell": "Fetal stromal cell", + "CB CD34+": "CB CD34+", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Loop of Henle": "Loop of Henle", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEsophagus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py new file mode 100644 index 000000000..af43f661a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py @@ -0,0 +1,47 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Esophagus" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Basal cell": "Basal cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Macrophage": "Macrophage", + "B cell": "B cell", + "T cell": "T cell", + "Dendritic cell": "Dendritic cell", + "Mast cell": "Mast cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Stromal cell": "Stromal cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "Neutrophil": "Neutrophil", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Fetal stromal cell": "Fetal stromal cell", + "CB CD34+": "CB CD34+", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Loop of Henle": "Loop of Henle", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultEsophagus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py new file mode 100644 index 000000000..10dde6d24 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py @@ -0,0 +1,46 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Eye" + self.class_maps = { + "0": { + "Fetal neuron": "Fetal neuron", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Erythroid cell": "Erythroid cell", + "Primordial germ cell": "Primordial germ cell", + "Endothelial cell": "Endothelial cell", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fetal fibroblast": "Fibroblast", + "Fetal Neuron": "Fetal neuron", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Dendritic cell": "Dendritic cell", + "Fetal endocrine cell": "Fetal endocrine cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Basal cell": "Basal cell", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "Stratified epithelial cell": "Stratified epithelial cell", + "CB CD34+": "CB CD34_pos", + "hESC": "hESC" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalEyes_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py new file mode 100644 index 000000000..fec492e2e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "fallopiantube" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultFallopiantube_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py new file mode 100644 index 000000000..1fcb34991 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "femalegonad" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py new file mode 100644 index 000000000..257d7750a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "femalegonad" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py new file mode 100644 index 000000000..b3674ca67 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "gallbladder" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultGallbladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py new file mode 100644 index 000000000..4b25db497 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py new file mode 100644 index 000000000..839528da7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py new file mode 100644 index 000000000..a4cc0fdfb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py new file mode 100644 index 000000000..7439b9fec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "heart" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py new file mode 100644 index 000000000..625583aa7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py @@ -0,0 +1,19 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "hesc" + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="HESC_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py new file mode 100644 index 000000000..66d4209fc --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py @@ -0,0 +1,49 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "ileum" + self.class_maps = { + "0": { + "B cell": "B cells", + "B cell (Plasmocyte)": "Plasma Cells", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte": "Enterocytes", + "Enterocyte progenitor": "Enterocytes", + "Epithelial cell": "Epithelial cell", + "Fetal Neuron": "Fetal neuron", + "Fetal enterocyte": "Enterocytes", + "Fetal epithelial progenitor": "Progenitors", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblasts", + "Hepatocyte/Endodermal cell": "Hepatocyte/Endodermal cell", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cells", + "Monocyte": "Monocyte", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "T cells", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cells", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultIleum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py new file mode 100644 index 000000000..9db8f62a8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "jejunum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultJejunum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py new file mode 100644 index 000000000..77b2e117e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py new file mode 100644 index 000000000..42b21e827 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py new file mode 100644 index 000000000..2811fe77f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py new file mode 100644 index 000000000..07cad1336 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py new file mode 100644 index 000000000..751e9470f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py new file mode 100644 index 000000000..a1b0a195e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py new file mode 100644 index 000000000..9793a4b2e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py @@ -0,0 +1,72 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "Kidney" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Epithelial cell (intermediated)": "Intermediated cell", + "Erythroid cell": "Erythroid", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal chondrocyte": "Chondrocyte", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Stroma progenitor", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stroma progenitor", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Intercalated cell": "Intercalated cell", + "Intermediated cell": "Intermediated cell", + "Kidney intercalated cell": "Intercalated cell", + "Loop of Henle": "Loop of Henle", + "M2 Macrophage": "M2 Macrophage", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalKidney_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py new file mode 100644 index 000000000..1318b3dba --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py new file mode 100644 index 000000000..59b07abf1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py new file mode 100644 index 000000000..1701446f1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLiver_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py new file mode 100644 index 000000000..119f16030 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Liver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py new file mode 100644 index 000000000..94fd323db --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "Liver" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "Plasma B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Late Erythroid", + "Erythroid progenitor cell (RP high)": "Early Erythroid", + "Fetal enterocyte ": "Enterocyte ", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Non inflammatory macrophages", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Myeloid cell": "Myeloid cell", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", + "Smooth muscle cell": "Smooth muscle cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Liver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py new file mode 100644 index 000000000..2487b789a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py new file mode 100644 index 000000000..d1d6a73c4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py new file mode 100644 index 000000000..9dc30ed7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py new file mode 100644 index 000000000..6a85c3db0 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py new file mode 100644 index 000000000..0083c8e5b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py @@ -0,0 +1,73 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "lung" + self.class_maps = { + "0": { + "AT2 cell": "AT2", + "Antigen presenting cell (RPS high)": "unknown", + "B cell": "B cell lineage", + "B cell (Plasmocyte)": "B cell lineage", + "Basal cell": "Basal", + "CB CD34+": "Fetal airway progenitors", + "Chondrocyte": "1_Stroma", + "Dendritic cell": "Dendritic cells", + "Endothelial cell": "1_Endothelial", + "Endothelial cell (APC)": "1_Endothelial", + "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", + "Enterocyte progenitor": "1_Epithelial", + "Epithelial cell": "1_Epithelial", + "Epithelial cell (intermediated)": "1_Epithelial", + "Erythroid cell": "Erythrocytes", + "Erythroid progenitor cell (RP high)": "Erythrocytes", + "Fasciculata cell": "unknown", + "Fetal Neuron": "unknown", + "Fetal chondrocyte": "1_Stroma", + "Fetal endocrine cell": "unknown", + "Fetal enterocyte ": "1_Epithelial", + "Fetal epithelial progenitor": "1_Epithelial", + "Fetal fibroblast": "Fibroblasts", + "Fetal mesenchymal progenitor": "1_Stroma", + "Fetal neuron": "unknown", + "Fetal skeletal muscle cell": "unknown", + "Fetal stromal cell": "1_Stroma", + "Fibroblast": "Fibroblasts", + "Gastric endocrine cell": "unknown", + "Goblet cell": "Secretory", + "Kidney intercalated cell": "unknown", + "Loop of Henle": "unknown", + "M2 Macrophage": "Macrophages", + "Macrophage": "Macrophages", + "Mast cell": "Mast cells", + "Mesothelial cell": "Mast cells", + "Monocyte": "Monocytes", + "Myeloid cell": "2_Myeloid", + "Neutrophil": "Neutrophilic", + "Neutrophil (RPS high)": "Neutrophilic", + "Primordial germ cell": "unknown", + "Proliferating T cell": "T cell lineage", + "Proximal tubule progenitor": "unknown", + "Sinusoidal endothelial cell": "1_Endothelial", + "Smooth muscle cell": "2_Smooth Muscle", + "Stratified epithelial cell": "1_Epithelial", + "Stromal cell": "1_Stroma", + "T cell": "T cell lineage", + "Ventricle cardiomyocyte": "1_Stroma", + "hESC": "Fetal airway progenitors", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py new file mode 100644 index 000000000..9acf2ac6e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "malegonad" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal acinar cell": "Fetal acinar cell", + "Fetal chondrocyte": "Fetal chondrocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", + "Loop of Henle": "Loop of Henle", + "Macrophage": "Macrophages", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py new file mode 100644 index 000000000..8964fa222 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py @@ -0,0 +1,51 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "malegonad" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cells", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fasciculata cell": "Fasciculata cell", + "Fetal acinar cell": "Fetal acinar cell", + "Fetal chondrocyte": "Fetal chondrocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal neuron": "Fetal neuron", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", + "Loop of Henle": "Loop of Henle", + "Macrophage": "Macrophages", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "Ureteric bud cell": "Ureteric bud cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py new file mode 100644 index 000000000..1ac3d1e92 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "muscle" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py new file mode 100644 index 000000000..9af4d2f15 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "muscle" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py new file mode 100644 index 000000000..82e0aa456 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py new file mode 100644 index 000000000..c903a8e78 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py new file mode 100644 index 000000000..37652c8ca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "omentum" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultOmentum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py new file mode 100644 index 000000000..a5b273cbd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell" + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py new file mode 100644 index 000000000..c8eb43976 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py new file mode 100644 index 000000000..aa5ba9d05 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py new file mode 100644 index 000000000..02662bd9f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py @@ -0,0 +1,61 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "Pancreas" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Neuron", + "Fetal acinar cell": "Acinar cell", + "Fetal endocrine cell": "Endocrine cell", + "Fetal enterocyte ": "Enterocyte", + "Fetal epithelial progenitor": "Epithelial progenitor", + "Fetal fibroblast": "Fibroblast", + "Fetal mesenchymal progenitor": "Mesenchymal Cell", + "Fetal neuron": "Neuron", + "Fetal skeletal muscle cell": "Skeletal muscle cell", + "Fetal stromal cell": "Stromal cell", + "Fibroblast": "Fibroblast", + "Gastric endocrine cell": "Gastric endocrine cell", + "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Pancreas exocrine cell": "Pancreas exocrine cell", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "T cell", + "Proximal tubule progenitor": "Proximal tubule progenitor", + "Sinusoidal endothelial cell": "Endothelial cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalPancreas_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py new file mode 100644 index 000000000..537345671 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py @@ -0,0 +1,54 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Placenta" + self.class_maps = { + "0": { + "Fibroblast": "Fibroblast", + "Macrophage": "Macrophage", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Fetal stromal cell": "Fetal stromal cell", + "Stromal cell": "Stromal cell", + "Smooth muscle cell": "Smooth muscle cell", + "Endothelial cell": "Endothelial cell", + "T cell": "T cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Intermediated cell": "Intermediated cell", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Stratified epithelial cell": "Stratified epithelial cell", + "Fetal neuron": "Fetal neuron", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "hESC": "hESC", + "Basal cell": "Basal cell", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "M2 Macrophage": "M2 Macrophage", + "Myeloid cell": "Myeloid cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="Placenta_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py new file mode 100644 index 000000000..efcd5c949 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "pleura" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultPleura_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py new file mode 100644 index 000000000..1a6bef219 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py @@ -0,0 +1,43 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "prostate" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Basal cell": "Basal cell", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell", + "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", + "Fasciculata cell": "Fasciculata cell", + "Fetal enterocyte": "Fetal enterocyte", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Gastric endocrine cell": "Gastric endocrine cell", + "Goblet cell": "Goblet cell", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "Primordial germ cell": "Primordial germ cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stratified epithelial cell": "Stratified epithelial cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultProstate_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py new file mode 100644 index 000000000..25af7abb2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py @@ -0,0 +1,38 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "rectum" + self.class_maps = { + "0": { + "B cell": "B cell", + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Dendritic cell": "Dendritic cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Enterocyte": "Enterocyte", + "Enterocyte progenitor": "Enterocyte progenitor", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Fetal stromal cell": "Fetal stromal cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultRectum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py new file mode 100644 index 000000000..1df96a84c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "rib" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalRib_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py new file mode 100644 index 000000000..d37bf1bca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "rib" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalRib_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py new file mode 100644 index 000000000..26d732174 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py @@ -0,0 +1,52 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "skin" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Kidney intercalated cell": "Kidney intercalated cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSkin_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py new file mode 100644 index 000000000..591ed500f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py @@ -0,0 +1,52 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "skin" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "Basal cell": "Basal cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Endothelial cell": "Endothelial cell", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "Epithelial cell": "Epithelial cell", + "Erythroid cell": "Erythroid cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Fetal Neuron": "Fetal Neuron", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Fetal fibroblast": "Fetal fibroblast", + "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", + "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", + "Fetal stromal cell": "Fetal stromal cell", + "Fibroblast": "Fibroblast", + "Kidney intercalated cell": "Kidney intercalated cell", + "Macrophage": "Macrophage", + "Mast cell": "Mast cell", + "Monocyte": "Monocyte", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Primordial germ cell": "Primordial germ cell", + "Proliferating T cell": "Proliferating T cell", + "Smooth muscle cell": "Smooth muscle cell", + "Stromal cell": "Stromal cell", + "T cell": "T cell", + "hESC": "hESC", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSkin_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py new file mode 100644 index 000000000..935cad23b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "spinalcord" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalSpinalCord_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py new file mode 100644 index 000000000..f5dad107a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py @@ -0,0 +1,44 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "Spleen" + self.class_maps = { + "0": { + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Erythroid cell": "Erythroid cell", + "Monocyte": "Monocyte", + "Endothelial cell": "Endothelial cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Proliferating T cell": "Proliferating T cell", + "Fibroblast": "Fibroblast", + "Stromal cell": "Stromal cell", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Mast cell": "Mast cell", + "Smooth muscle cell": "Smooth muscle cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSpleenParenchyma_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py new file mode 100644 index 000000000..fd40ef79a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py @@ -0,0 +1,44 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "Spleen" + self.class_maps = { + "0": { + "B cell (Plasmocyte)": "B cell (Plasmocyte)", + "Neutrophil": "Neutrophil", + "Endothelial cell (APC)": "Endothelial cell (APC)", + "B cell": "B cell", + "Macrophage": "Macrophage", + "T cell": "T cell", + "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", + "Dendritic cell": "Dendritic cell", + "CB CD34+": "CB CD34+", + "Erythroid cell": "Erythroid cell", + "Monocyte": "Monocyte", + "Endothelial cell": "Endothelial cell", + "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Proliferating T cell": "Proliferating T cell", + "Fibroblast": "Fibroblast", + "Stromal cell": "Stromal cell", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Mast cell": "Mast cell", + "Smooth muscle cell": "Smooth muscle cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultSpleen_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py new file mode 100644 index 000000000..997a1795d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py new file mode 100644 index 000000000..c32a24ee2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py new file mode 100644 index 000000000..d93ed24d7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py new file mode 100644 index 000000000..9707559a3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py new file mode 100644 index 000000000..5319b9ce6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py new file mode 100644 index 000000000..76b06f3dd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py new file mode 100644 index 000000000..9de6d6c5a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py new file mode 100644 index 000000000..115dd11ea --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py new file mode 100644 index 000000000..d26759d0d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py new file mode 100644 index 000000000..58741fd2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" + self.organ = "stomach" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalIntestine_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py new file mode 100644 index 000000000..9ec801179 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py @@ -0,0 +1,36 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "thymus" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Erythroid cell": "Ery", + "Erythroid progenitor cell (RP high)": "Ery", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Macrophage": "Mac", + "Monocyte": "Mono", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "Proliferating T cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalThymus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py new file mode 100644 index 000000000..9dd3bf713 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py @@ -0,0 +1,36 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "thymus" + self.class_maps = { + "0": { + "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", + "B cell": "B cell", + "CB CD34+": "CB CD34+", + "Dendritic cell": "Dendritic cell", + "Erythroid cell": "Ery", + "Erythroid progenitor cell (RP high)": "Ery", + "Fetal epithelial progenitor": "Fetal epithelial progenitor", + "Macrophage": "Mac", + "Monocyte": "Mono", + "Neutrophil": "Neutrophil", + "Neutrophil (RPS high)": "Neutrophil (RPS high)", + "Proliferating T cell": "Proliferating T cell", + "T cell": "T cell", + }, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="FetalThymus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py new file mode 100644 index 000000000..74b94bff4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "thyroid" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultThyroid_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py new file mode 100644 index 000000000..2e932292b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "thyroid" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultThyroid_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py new file mode 100644 index 000000000..368c39418 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "trachea" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultTrachea_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py new file mode 100644 index 000000000..0ddad9999 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "ureter" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultUreter_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py new file mode 100644 index 000000000..82efa82fb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py @@ -0,0 +1,22 @@ +from typing import Union +from .base import Dataset_d10_1038_s41586_020_2157_4 + + +class Dataset(Dataset_d10_1038_s41586_020_2157_4): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "uterus" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + self._load_generalized(fn=fn, sample_id="AdultUterus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py new file mode 100644 index 000000000..ed648d9db --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -0,0 +1,196 @@ +import anndata +import os +from typing import Union +import scipy.sparse +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a + free account at https://www.synapse.org. You can then use those login credentials to download the file with python + using the synapse client, installable via `pip install synapseclient`: + + import synapseclient + import shutil + syn = synapseclient.Synapse() + syn.login("synapse_username","password") + syn21625095 = syn.get(entity="syn21625095") + shutil.move(syn21625095.path, "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" + + self.download = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_meta = None + + self.author = "Krasnow" + self.doi = "10.1038/s41586-020-2922-4" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "Adventitial Fibroblast_P1": "Fibroblasts", + "Adventitial Fibroblast_P2": "Fibroblasts", + "Adventitial Fibroblast_P3": "Fibroblasts", + "Airway Smooth Muscle_P1": "Airway smooth muscle", + "Airway Smooth Muscle_P2": "Airway smooth muscle", + "Airway Smooth Muscle_P3": "Airway smooth muscle", + "Alveolar Epithelial Type 1_P1": "AT1", + "Alveolar Epithelial Type 1_P2": "AT1", + "Alveolar Epithelial Type 1_P3": "AT1", + "Alveolar Epithelial Type 2_P1": "AT2", + "Alveolar Epithelial Type 2_P2": "AT2", + "Alveolar Epithelial Type 2_P3": "AT2", + "Alveolar Fibroblast_P1": "Fibroblasts", + "Alveolar Fibroblast_P2": "Fibroblasts", + "Alveolar Fibroblast_P3": "Fibroblasts", + "Artery_P1": "Arterial", + "Artery_P2": "Arterial", + "Artery_P3": "Arterial", + "B_P1": "B cell lineage", + "B_P2": "B cell lineage", + "B_P3": "B cell lineage", + "Basal_P1": "Basal", + "Basal_P2": "Basal", + "Basal_P3": "Basal", + "Basophil/Mast 1_P1": "Mast cells", + "Basophil/Mast 1_P2": "Mast cells", + "Basophil/Mast 1_P3": "Mast cells", + "Basophil/Mast 2_P3": "Mast cells", + "Bronchial Vessel 1_P1": "Bronchial Vessel 1", + "Bronchial Vessel 1_P3": "Bronchial Vessel 1", + "Bronchial Vessel 2_P1": "Bronchial Vessel 2", + "Bronchial Vessel 2_P3": "Bronchial Vessel 2", + "CD4+ Memory/Effector T_P1": "T cell lineage", + "CD4+ Memory/Effector T_P2": "T cell lineage", + "CD4+ Memory/Effector T_P3": "T cell lineage", + "CD4+ Naive T_P1": "T cell lineage", + "CD4+ Naive T_P2": "T cell lineage", + "CD4+ Naive T_P3": "T cell lineage", + "CD8+ Memory/Effector T_P1": "T cell lineage", + "CD8+ Memory/Effector T_P2": "T cell lineage", + "CD8+ Memory/Effector T_P3": "T cell lineage", + "CD8+ Naive T_P1": "T cell lineage", + "CD8+ Naive T_P2": "T cell lineage", + "CD8+ Naive T_P3": "T cell lineage", + "Capillary Aerocyte_P1": "Capillary", + "Capillary Aerocyte_P2": "Capillary", + "Capillary Aerocyte_P3": "Capillary", + "Capillary Intermediate 1_P2": "Capillary Intermediate 1", + "Capillary Intermediate 2_P2": "Capillary Intermediate 2", + "Capillary_P1": "Capillary", + "Capillary_P2": "Capillary", + "Capillary_P3": "Capillary", + "Ciliated_P1": "Multiciliated lineage", + "Ciliated_P2": "Multiciliated lineage", + "Ciliated_P3": "Multiciliated lineage", + "Classical Monocyte_P1": "Monocytes", + "Classical Monocyte_P2": "Monocytes", + "Classical Monocyte_P3": "Monocytes", + "Club_P1": "Secretory", + "Club_P2": "Secretory", + "Club_P3": "Secretory", + "Differentiating Basal_P1": "Basal", + "Differentiating Basal_P3": "Basal", + "EREG+ Dendritic_P1": "Macrophages", + "EREG+ Dendritic_P2": "Macrophages", + "Fibromyocyte_P3": "Fibromyocyte", + "Goblet_P3": "Secretory", + "IGSF21+ Dendritic_P1": "Macrophages", + "IGSF21+ Dendritic_P2": "Macrophages", + "IGSF21+ Dendritic_P3": "Macrophages", + "Intermediate Monocyte_P2": "Monocytes", + "Ionocyte_P3": "Rare", + "Lipofibroblast_P1": "Fibroblasts", + "Lymphatic_P1": "Lymphatic EC", + "Lymphatic_P2": "Lymphatic EC", + "Lymphatic_P3": "Lymphatic EC", + "Macrophage_P1": "Macrophages", + "Macrophage_P2": "Macrophages", + "Macrophage_P3": "Macrophages", + "Mesothelial_P1": "Mesothelium", + "Mucous_P2": "Submucosal Secretory", + "Mucous_P3": "Submucosal Secretory", + "Myeloid Dendritic Type 1_P1": "Dendritic cells", + "Myeloid Dendritic Type 1_P2": "Dendritic cells", + "Myeloid Dendritic Type 1_P3": "Dendritic cells", + "Myeloid Dendritic Type 2_P1": "Dendritic cells", + "Myeloid Dendritic Type 2_P2": "Dendritic cells", + "Myeloid Dendritic Type 2_P3": "Dendritic cells", + "Myofibroblast_P1": "Myofibroblasts", + "Myofibroblast_P2": "Myofibroblasts", + "Myofibroblast_P3": "Myofibroblasts", + "Natural Killer T_P2": "T cell lineage", + "Natural Killer T_P3": "T cell lineage", + "Natural Killer_P1": "Innate lymphoid cells", + "Natural Killer_P2": "Innate lymphoid cells", + "Natural Killer_P3": "Innate lymphoid cells", + "Neuroendocrine_P3": "Rare", + "Nonclassical Monocyte_P1": "Monocytes", + "Nonclassical Monocyte_P2": "Monocytes", + "Nonclassical Monocyte_P3": "Monocytes", + "OLR1+ Classical Monocyte_P2": "Monocytes", + "Pericyte_P1": "Fibroblasts", + "Pericyte_P2": "Fibroblasts", + "Pericyte_P3": "Fibroblasts", + "Plasma_P1": "B cell lineage", + "Plasma_P3": "B cell lineage", + "Plasmacytoid Dendritic_P1": "Dendritic cells", + "Plasmacytoid Dendritic_P2": "Dendritic cells", + "Plasmacytoid Dendritic_P3": "Dendritic cells", + "Platelet/Megakaryocyte_P1": "Megakaryocytes", + "Platelet/Megakaryocyte_P3": "Megakaryocytes", + "Proliferating Basal_P1": "Basal", + "Proliferating Basal_P3": "Basal", + "Proliferating Macrophage_P1": "Macrophages", + "Proliferating Macrophage_P2": "Macrophages", + "Proliferating Macrophage_P3": "Macrophages", + "Proliferating NK/T_P2": "Innate lymphoid cells", + "Proliferating NK/T_P3": "Innate lymphoid cells", + "Proximal Basal_P3": "Basal", + "Proximal Ciliated_P3": "Multiciliated lineage", + "Serous_P3": "Submucosal Secretory", + "Signaling Alveolar Epithelial Type 2_P3": "AT2", + "TREM2+ Dendritic_P1": "Macrophages", + "TREM2+ Dendritic_P3": "Macrophages", + "Vascular Smooth Muscle_P2": "2_Smooth Muscle", + "Vascular Smooth Muscle_P3": "2_Smooth Muscle", + "Vein_P1": "Venous", + "Vein_P2": "Venous", + "Vein_P3": "Venous", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + self.adata = anndata.read(fn) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None])) \ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py new file mode 100644 index 000000000..1ebf48fb2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py @@ -0,0 +1,162 @@ +import anndata +import os +from typing import Union +import scipy.sparse +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a + free account at https://www.synapse.org. You can then use those login credentials to download the file with python + using the synapse client, installable via `pip install synapseclient`: + + import synapseclient + import shutil + syn = synapseclient.Synapse() + syn.login("synapse_username","password") + syn21625142 = syn.get(entity="syn21625142") + shutil.move(syn21625142.path, "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" + + self.download = "https://www.synapse.org/#!Synapse:syn21041850" + self.download_meta = None + + self.author = "Krasnow" + self.doi = "10.1038/s41586-020-2922-4" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "smartseq2" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.class_maps = { + "0": { + "Adventitial Fibroblast_P1": "Fibroblasts", + "Adventitial Fibroblast_P2": "Fibroblasts", + "Adventitial Fibroblast_P3": "Fibroblasts", + "Airway Smooth Muscle_P1": "Airway smooth muscle", + "Airway Smooth Muscle_P2": "Airway smooth muscle", + "Airway Smooth Muscle_P3": "Airway smooth muscle", + "Alveolar Epithelial Type 1_P1": "AT1", + "Alveolar Epithelial Type 1_P2": "AT1", + "Alveolar Epithelial Type 1_P3": "AT1", + "Alveolar Epithelial Type 2_P1": "AT2", + "Alveolar Epithelial Type 2_P2": "AT2", + "Alveolar Epithelial Type 2_P3": "AT2", + "Alveolar Fibroblast_P1": "Fibroblasts", + "Alveolar Fibroblast_P2": "Fibroblasts", + "Alveolar Fibroblast_P3": "Fibroblasts", + "Artery_P1": "Arterial", + "Artery_P2": "Arterial", + "Artery_P3": "Arterial", + "B_P1": "B cell lineage", + "B_P2": "B cell lineage", + "B_P3": "B cell lineage", + "Basal_P1": "Basal", + "Basal_P2": "Basal", + "Basal_P3": "Basal", + "Basophil/Mast 1_P1": "Mast cells", + "Basophil/Mast 1_P2": "Mast cells", + "Basophil/Mast 1_P3": "Mast cells", + "Bronchial Vessel 1_P1": "Bronchial Vessel 1", + "CD4+ Memory/Effector T_P1": "T cell lineage", + "CD4+ Naive T_P1": "T cell lineage", + "CD4+ Naive T_P2": "T cell lineage", + "CD8+ Memory/Effector T_P1": "T cell lineage", + "CD8+ Naive T_P1": "T cell lineage", + "CD8+ Naive T_P2": "T cell lineage", + "Capillary Aerocyte_P1": "Capillary", + "Capillary Aerocyte_P2": "Capillary", + "Capillary Aerocyte_P3": "Capillary", + "Capillary Intermediate 1_P2": "Capillary Intermediate 1", + "Capillary_P1": "Capillary", + "Capillary_P2": "Capillary", + "Capillary_P3": "Capillary", + "Ciliated_P1": "Multiciliated lineage", + "Ciliated_P2": "Multiciliated lineage", + "Ciliated_P3": "Multiciliated lineage", + "Classical Monocyte_P1": "Monocytes", + "Club_P1": "Secretory", + "Club_P2": "Secretory", + "Club_P3": "Secretory", + "Dendritic_P1": "Dendritic cells", + "Differentiating Basal_P3": "Basal", + "Fibromyocyte_P3": "Fibromyocyte", + "Goblet_P1": "Secretory", + "Goblet_P2": "Secretory", + "Goblet_P3": "Secretory", + "IGSF21+ Dendritic_P2": "Macrophages", + "IGSF21+ Dendritic_P3": "Macrophages", + "Intermediate Monocyte_P2": "Monocytes", + "Intermediate Monocyte_P3": "Monocytes", + "Ionocyte_P3": "Rare", + "Lipofibroblast_P1": "Fibroblasts", + "Lymphatic_P1": "Lymphatic EC", + "Lymphatic_P2": "Lymphatic EC", + "Lymphatic_P3": "Lymphatic EC", + "Macrophage_P2": "Macrophages", + "Macrophage_P3": "Macrophages", + "Myeloid Dendritic Type 2_P3": "Dendritic cells", + "Myofibroblast_P2": "Myofibroblasts", + "Myofibroblast_P3": "Myofibroblasts", + "Natural Killer T_P2": "T cell lineage", + "Natural Killer T_P3": "T cell lineage", + "Natural Killer_P1": "Innate lymphoid cells", + "Natural Killer_P2": "Innate lymphoid cells", + "Natural Killer_P3": "Innate lymphoid cells", + "Neuroendocrine_P1": "Rare", + "Neuroendocrine_P3": "Rare", + "Neutrophil_P1": "Monocytes", + "Neutrophil_P2": "Monocytes", + "Neutrophil_P3": "Monocytes", + "Nonclassical Monocyte_P1": "Monocytes", + "Nonclassical Monocyte_P2": "Monocytes", + "Pericyte_P1": "Fibroblasts", + "Pericyte_P2": "Fibroblasts", + "Pericyte_P3": "Fibroblasts", + "Plasma_P3": "B cell lineage", + "Plasmacytoid Dendritic_P1": "Dendritic cells", + "Plasmacytoid Dendritic_P2": "Dendritic cells", + "Plasmacytoid Dendritic_P3": "Dendritic cells", + "Proliferating NK/T_P2": "Innate lymphoid cells", + "Proliferating NK/T_P3": "Innate lymphoid cells", + "Signaling Alveolar Epithelial Type 2_P1": "AT2", + "Signaling Alveolar Epithelial Type 2_P3": "AT2", + "Vascular Smooth Muscle_P1": "2_Smooth Muscle", + "Vascular Smooth Muscle_P2": "2_Smooth Muscle", + "Vascular Smooth Muscle_P3": "2_Smooth Muscle", + "Vein_P2": "Venous", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") + self.adata = anndata.read(fn) + self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nReads"].values[:, None])) \ + .multiply(1 / 1000000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py new file mode 100644 index 000000000..681dc6abd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -0,0 +1,76 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" + + self.download = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41590-020-0602-z" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.obs_key_cellontology_original = "cell_type" + + self.class_maps = { + "0": { + "Activated CD4 T": "Activated CD4 T", + "B cell IgA Plasma": "B cell IgA Plasma", + "B cell IgG Plasma": "B cell IgG Plasma", + "B cell cycling": "B cell cycling", + "B cell memory": "B cell memory", + "CD8 T": "CD8 T", + "Follicular B cell": "Follicular", + "ILC": "ILC", + "LYVE1 Macrophage": "LYVE1 Macrophage", + "Lymphoid DC": "Lymphoid DC", + "Macrophage": "Macrophage", + "Mast": "Mast cell", + "Monocyte": "Monocyte", + "NK": "NK", + "Tcm": "Tcm", + "Tfh": "Tfh", + "Th1": "Th1", + "Th17": "Th17", + "Treg": "Treg", + "cDC1": "DC1", + "cDC2": "DC2", + "cycling DCs": "cycling DCs", + "cycling gd T": "cycling gd T", + "gd T": "gd T", + "pDC": "pDC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py new file mode 100644 index 000000000..a7df368b3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py @@ -0,0 +1,67 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" + + self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.organ = "lung" # ToDo: "alveoli, parenchyma" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + self.normalization = "norm" + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated 2": "Multiciliated lineage", + "Luminal_Macrophages": "Macrophages", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "Endothelial": "1_Endothelial", + "Lymphatic": "Lymphatic EC", + "Ciliated 1": "Multiciliated lineage", + "Smooth muscle": "2_Smooth Muscle", + "Type_1_alveolar": "AT1", + "Neutrophils": "Monocytes", + "Club": "Secretory", + "Basal 2": "Basal", + "B cells": "B cell lineage", + "T and NK": "2_Lymphoid", + "Mesothelium": "Mesothelium", + "Mast cells": "Mast cells", + "Fibroblasts": "2_Fibroblast lineage", + "Type 2 alveolar": "AT2", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py new file mode 100644 index 000000000..54985ca9b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py @@ -0,0 +1,67 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" + + self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Bronchi_anonymised.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.normalization = "norm" + self.organ = "lung" # ToDo "bronchi" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated 1": "Multiciliated lineage", + "Club": "Secretory", + "Ciliated 2": "Multiciliated lineage", + "Ionocytes": "Rare", + "Basal 2": "Basal", + "Goblet_1": "Secretory", + "Goblet 2": "Secretory", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "B cells": "B cell lineage", + "Luminal_Macrophages": "Macrophages", + "Neutrophils": "Monocytes", + "Endothelial": "1_Endothelial", + "Smooth muscle": "2_Smooth Muscle", + "T and NK": "2_Lymphoid", + "Fibroblasts": "2_Fibroblast lineage", + "Lymphatic": "Lymphatic EC", + "Mast cells": "Mast cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py new file mode 100644 index 000000000..3d6542451 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" + + self.author = "Teichmann" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" # ToDo: "parenchymal lung and distal airway specimens" + self.organism = "human" + self.protocol = "dropseq" + self.state_exact = "uninvolved areas of tumour resection material" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Fibroblast": "Fibroblasts", + "Type 2": "AT2", + "B cell": "B cell lineage", + "Macrophages": "Macrophages", + "NK cell": "Innate lymphoid cells", + "T cell": "T cell lineage", + "Ciliated": "Multiciliated lineage", + "Lymphatic": "Lymphatic EC", + "Type 1": "AT1", + "Transformed epithelium": "1_Epithelial", + "Secretory": "Secretory", + "Endothelium": "1_Endothelial", + "Mast cell": "Mast cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), + os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), + ] + self.adata = anndata.read_csv(fn[0]).T + self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py similarity index 59% rename from sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py index 04c86cb19..4ad06f4ec 100644 --- a/sfaira/data/mouse/brain/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -3,29 +3,42 @@ import os import pandas from typing import Union -from .external import DatasetBase +from sfaira.data import DatasetBase class Dataset(DatasetBase): - id: str - def __init__( self, path: Union[str, None] = None, meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, **kwargs ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" - self.download_website = \ + + self.download = \ "www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" - self.download_website_meta = \ + self.download_meta = \ "www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" + + self.author = "Movahedi" + self.doi = "10.1038/s41593-019-0393-4" + self.healthy = True + self.normalization = "raw" self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True + self.organism = "mouse" + self.protocol = "microwell" + self.state_exact = "healthy" + self.year = 2019 + + self.var_ensembl_col = "ensembl" + self.var_symbol_col = "names" + + self.obs_key_cellontology_class = self._ADATA_IDS_SFAIRA.cell_ontology_class + self.obs_key_cellontology_id = self._ADATA_IDS_SFAIRA.cell_ontology_id + self.obs_key_cellontology_original = self._ADATA_IDS_SFAIRA.cell_ontology_class self.class_maps = { "0": { @@ -64,23 +77,5 @@ def _load(self, fn=None): # Assign attributes self.adata.obs_names = obs_names self.adata.var = var - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") self.adata.obs = obs assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Movahedi" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41593-019-0393-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py new file mode 100644 index 000000000..81e9cf13c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import pandas as pd +import scipy.io +import gzip +import tarfile + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" + + self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" + self.download_meta = None + + self.author = "Mo" + self.healthy = True + self.normalization = "raw" + self.organ = "kidney" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + self.doi = "10.1038/s41597-019-0351-8" + + self.var_symbol_col = "names" + self.var_ensembl_col = "ensembl" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") + adatas = [] + with tarfile.open(fn) as tar: + for member in tar.getmembers(): + if "_matrix.mtx.gz" in member.name: + name = "_".join(member.name.split("_")[:-1]) + with gzip.open(tar.extractfile(member), "rb") as mm: + X = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, + sep="\t", index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(name + "_features.tsv.gz"), compression="gzip", header=None, + sep="\t").iloc[:, :2] + var.columns = ["ensembl", "names"] + var.index = var["ensembl"].values + self.adata = anndata.AnnData(X=X, obs=obs, var=var) + self.adata.obs["sample"] = name + adatas.append(self.adata) + self.adata = adatas[0].concatenate(adatas[1:]) + del self.adata.obs["batch"] diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py new file mode 100644 index 000000000..929d0094b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -0,0 +1,58 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" + + self.download = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" + self.download_meta = None + + self.author = "Mullins" + self.doi = "10.1073/pnas.1914143116" + self.healthy = True + self.normalization = "norm" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "B-cell": "B-cell", + "Endothelial": "Endothelial cell", + "Fibroblast": "Fibroblast", + "Macrophage": "Macrophage", + "Mast-cell": "Mast-cell", + "Melanocyte": "Melanocyte", + "Pericyte": "Pericyte", + "RPE": "Retinal pigment epithelium", + "Schwann1": "Schwann1", + "Schwann2": "Schwann2", + "T/NK-cell": "T/NK-cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py new file mode 100644 index 000000000..e3fb7a4dd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py @@ -0,0 +1,57 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" + self.download_meta = None + + self.author = "Chen" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" + self.organism = "human" + self.doi = "10.1084/jem.20191130" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Enterocyte Progenitors", + "Enterocyte": "Enterocytes", + "Goblet": "Goblet cells", + "TA": "TA", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem cells", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py new file mode 100644 index 000000000..8ccaa58fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py @@ -0,0 +1,57 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" + self.download_meta = None + + self.author = "Chen" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = "ileum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Progenitors", + "Goblet": "Goblet cells", + "Enterocyte": "Enterocytes", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py new file mode 100644 index 000000000..54126316a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py @@ -0,0 +1,56 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_rectum_2019_10x_wang_001_10.1084/jem.20191130" + + self.download = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" + + self.author = "Chen" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = "rectum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Progenitor": "Enterocyte progenitor", + "Goblet": "Goblet", + "Enterocyte": "Enterocyte", + "Paneth-like": "Paneth-like", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py new file mode 100644 index 000000000..01e20cbd3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -0,0 +1,61 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" + + self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" + self.download_meta = None + + self.author = "Eils" + self.doi = "10.1101/2020.03.13.991455" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Ciliated": "Multiciliated lineage", + "Endothelial": "1_Endothelial", + "AT2": "AT2", + "LymphaticEndothelium": "Lymphatic EC", + "Fibroblasts": "2_Fibroblast lineage", + "Club": "Secretory", + "Immuno_TCells": "T cell lineage", + "Immuno_Monocytes": "Monocytes", + "AT1": "AT1" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py new file mode 100644 index 000000000..ea766e1cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py @@ -0,0 +1,66 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" + + self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" + self.download_meta = None + + self.author = "Eils" + self.doi = "10.1101/2020.03.13.991455" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" # ToDo: "bronchial epithelial cells" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Secretory3": "Secretory", + "Ciliated1": "Multiciliated lineage", + "Goblet": "Secretory", + "Ciliated2": "Multiciliated lineage", + "Club": "Secretory", + "Secretory2": "Secretory", + "FOXN4": "Rare", + "Basal1": "Basal", + "Secretory1": "Secretory", + "Fibroblast": "2_Fibroblast lineage", + "Ionocyte": "Rare", + "Basal3": "Basal", + "Basal_Mitotic": "Basal", + "Basal2": "Basal", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ + .multiply(1 / 10000) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py new file mode 100644 index 000000000..0a07210ec --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py @@ -0,0 +1,56 @@ +import anndata +from typing import Union +from sfaira.data import DatasetBase + + +class Dataset_d10_1101_661728(DatasetBase): + """ + This is a dataloader template for tabula muris data. + """ + + def __init__( + self, + path: Union[str, None], + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.source = source + if self.source == "aws": + self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + elif self.source == "figshare": + self.download = "https://ndownloader.figshare.com/articles/8273102/versions/2" + else: + raise ValueError("source %s not recognized" % self.source) + + self.obs_key_cellontology_original = "free_annotation" + self.obs_key_age = "age" + self.obs_key_dev_stage = "development_stage" # not given in all data sets + self.obs_key_sex = "sex" + # ToDo: further anatomical information for subtissue in "subtissue" + + self.author = "Quake" + self.doi = "10.1101/661728" + self.healthy = True + self.normalization = "norm" + self.organism = "mouse" + self.state_exact = "healthy" + self.year = 2019 + + self.var_ensembl_col = None + self.var_symbol_col = "index" + + def _load_generalized(self, fn): + self.adata = anndata.read_h5ad(fn) + if self.source == "aws": + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} + + def _get_protocol_tms(self, x) -> str: + return "smartseq2" if "smartseq2" in x else "10x" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py new file mode 100644 index 000000000..ba71dfa03 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..710a502fd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..93af8ff7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py new file mode 100644 index 000000000..755c34976 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py new file mode 100644 index 000000000..864b0e0af --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" + self.organ = "adipose" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py new file mode 100644 index 000000000..c6889776b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bladder_2019_10x_pisco_001_10.1101/661728" + self.organ = "bladder" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..41da10137 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "bladder" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py new file mode 100644 index 000000000..2a919d354 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" + self.organ = "bone" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..bc06538cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "bone" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["nan-marrow-needs-subclustering"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..68b1f2d51 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py @@ -0,0 +1,37 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "brain" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + + self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..d23cb6411 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py @@ -0,0 +1,37 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728" + self.organ = "brain" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + + self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py new file mode 100644 index 000000000..239a8eb7e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py @@ -0,0 +1,34 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" + self.organ = "colon" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..1d3a206e4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "colon" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..d1c15fa22 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "diaphragm" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py new file mode 100644 index 000000000..995b2f06f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_10x_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a59e8ed75 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py new file mode 100644 index 000000000..a37a09f12 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "heart" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") + elif self.source == "figshare": + raise ValueError("not defined") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py new file mode 100644 index 000000000..98d48650e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py @@ -0,0 +1,39 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" + self.organ = "kidney" + + self.class_maps = { + "0": { + "kidney capillary endothelial cell": "endothelial cell", + "kidney mesangial cell": "mesangial cell", + "kidney interstitial fibroblast": "interstitial fibroblast", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..3016fedea --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py @@ -0,0 +1,39 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "kidney" + + self.class_maps = { + "0": { + "kidney capillary endothelial cell": "endothelial cell", + "kidney mesangial cell": "mesangial cell", + "kidney interstitial fibroblast": "interstitial fibroblast", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) + self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py new file mode 100644 index 000000000..56954c663 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_liver_2019_10x_pisco_001_10.1101/661728" + self.organ = "liver" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a21bd1c7b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "liver" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py new file mode 100644 index 000000000..05602ebf8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" + self.organ = "lung" + + self.class_maps = { + "0": { + "ciliated columnar cell of tracheobronchial tree": "ciliated cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..615240b9b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "lung" + + self.class_maps = { + "0": { + "ciliated columnar cell of tracheobronchial tree": "ciliated cell", + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py new file mode 100644 index 000000000..2a65a44be --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" + self.organ = "mammarygland" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..1a3fae56e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "mammarygland" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py new file mode 100644 index 000000000..4fb82822e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" + self.organ = "muscle" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..6fb28fa22 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "muscle" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py new file mode 100644 index 000000000..31e270b8b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "pancreatic ductal cel": "pancreatic ductal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..aa9765a6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "pancreas" + + self.class_maps = { + "0": { + "pancreatic ductal cel": "pancreatic ductal cell" + }, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py new file mode 100644 index 000000000..5085b14b8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py @@ -0,0 +1,33 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" + self.organ = "skin" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..a40b384e9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py @@ -0,0 +1,34 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "skin" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py new file mode 100644 index 000000000..bfd7a079f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_spleen_2019_10x_pisco_001_10.1101/661728" + self.organ = "spleen" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..038eb521e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py @@ -0,0 +1,33 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "spleen" + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py new file mode 100644 index 000000000..80b21b384 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_thymus_2019_10x_pisco_001_10.1101/661728" + self.organ = "thymus" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..2de3dc695 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "thymus" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py new file mode 100644 index 000000000..877168d52 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_tongue_2019_10x_pisco_001_10.1101/661728" + self.organ = "tongue" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..c76a73108 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py @@ -0,0 +1,35 @@ +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "tongue" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py new file mode 100644 index 000000000..3f5210e77 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py @@ -0,0 +1,36 @@ +import numpy as np +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_trachea_2019_10x_pisco_001_10.1101/661728" + self.organ = "trachea" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py new file mode 100644 index 000000000..9d3dd4fcd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py @@ -0,0 +1,36 @@ +import numpy as np +import os +from typing import Union +from .base import Dataset_d10_1101_661728 + + +class Dataset(Dataset_d10_1101_661728): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + source: str = "aws", + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) + self.id = "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728" + self.organ = "trachea" + self.protocol = self._get_protocol_tms(self.id) + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + if self.path is None: + raise ValueError("provide either fn in load or path in constructor") + if self.source == "aws": + fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") + elif self.source == "figshare": + fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") + else: + raise ValueError("source %s not recognized" % self.source) + self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py new file mode 100644 index 000000000..51d8cc958 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -0,0 +1,94 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" + + self.download = [ + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", + "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fbarcodes%2Etsv%2Egz" + ] + self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" + + self.author = "Kropski" + self.doi = "10.1101/753806" + self.normalization = "raw" + self.organ = "lung" # ToDo: "parenchyma" + self.organism = "human" + self.protocol = "10x" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "celltype" + self.obs_key_state_exact = "Diagnosis" + self.obs_key_healthy = "Status" + self.healthy_state_healthy = "Control" + + self.class_maps = { + "0": { + "Proliferating Macrophages": "Macrophages", + "Myofibroblasts": "Myofibroblasts", + "Proliferating Epithelial Cells": "Proliferating Epithelial Cells", + "Mesothelial Cells": "Mesothelium", + "cDCs": "Dendritic cells", + "Mast Cells": "Mast cells", + "Ciliated": "Multiciliated lineage", + "T Cells": "T cell lineage", + "pDCs": "Dendritic cells", + "Smooth Muscle Cells": "2_Smooth Muscle", + "Transitional AT2": "AT2", + "AT2": "AT2", + "B Cells": "B cell lineage", + "NK Cells": "Innate lymphoid cells", + "Monocytes": "Monocytes", + "Basal": "Basal", + "Plasma Cells": "B cell lineage", + "Differentiating Ciliated": "Multiciliated lineage", + "Macrophages": "Macrophages", + "MUC5B+": "Secretory", + "SCGB3A2+": "Secretory", + "Fibroblasts": "Fibroblasts", + "Lymphatic Endothelial Cells": "Lymphatic EC", + "Endothelial Cells": "2_Blood vessels", + "SCGB3A2+ SCGB1A1+": "Secretory", + "PLIN2+ Fibroblasts": "Fibroblasts", + "KRT5-/KRT17+": "KRT5-/KRT17+", + "MUC5AC+ High": "Secretory", + "Proliferating T Cells": "T cell lineage", + "AT1": "AT1", + "HAS1 High Fibroblasts": "Fibroblasts" + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), + os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), + ] + self.adata = anndata.read_mtx(fn[0]).T + self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) + self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) + obs = pd.read_csv(fn[3], index_col=0) + self.adata = self.adata[obs.index.tolist(), :].copy() + self.adata.obs = obs + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py new file mode 100644 index 000000000..0e5dd65f6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -0,0 +1,128 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" + + self.download = [ + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" + ] + self.download_meta = None + + self.author = "Clatworthy" + self.doi = "10.1126/science.aat5031" + self.healthy = True + self.normalization = "norm" + self.organ = "kidney" # ToDo: "renal medulla, renal pelvis, ureter, cortex of kidney" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "ID" + + self.obs_key_cellontology_original = "celltype" + + self.class_maps = { + "0": { + "Ascending vasa recta endothelium": "Endothelial Cells - AVR", + "B cell": "B cell", + "CD4 T cell": "CD4 T cell", + "CD8 T cell": "CD8 T cell", + "CNT/PC - proximal UB": "CNT/PC - proximal UB", + "Cap mesenchyme": "Cap mesenchyme", + "Connecting tubule": "Connecting tubule", + "Descending vasa recta endothelium": "Endothelial Cells - AEA & DVR", + "Distal S shaped body": "Distal S shaped body", + "Distal renal vesicle": "Distal renal vesicle", + "Distinct proximal tubule 1": "Distinct proximal tubule 1", + "Distinct proximal tubule 2": "Distinct proximal tubule 2", + "Endothelium": "Endothelial Cells (unassigned)", + "Epithelial progenitor cell": "Epithelial progenitor", + "Erythroid": "Erythroid", + "Fibroblast": "Fibroblast", + "Fibroblast 1": "Fibroblast", + "Fibroblast 2": "Fibroblast", + "Glomerular endothelium": "Endothelial Cells - glomerular capillaries", + "Indistinct intercalated cell": "Indistinct intercalated cell", + "Innate like lymphocyte": "Innate like lymphocyte", + "Loop of Henle": "Loop of Henle", + "MNP-a/classical monocyte derived": "MNP-a/classical monocyte derived", + "MNP-b/non-classical monocyte derived": "MNP-b/non-classical monocyte derived", + "MNP-c/dendritic cell": "MNP-c/dendritic cell", + "MNP-d/Tissue macrophage": "MNP-d/Tissue macrophage", + "Macrophage 1": "Macrophage", + "Macrophage 2": "Macrophage", + "Mast cell": "Mast cell", + "Mast cells": "Mast cell", + "Medial S shaped body": "Medial S shaped body", + "Megakaryocyte": "Megakaryocyte", + "Monocyte": "Monocyte", + "Myofibroblast": "Myofibroblast", + "Myofibroblast 1": "Myofibroblast", + "Myofibroblast 2": "Myofibroblast", + "NK cell": "NK cell", + "NKT cell": "NKT cell", + "Neuron": "Neuron", + "Neutrophil": "Neutrophil", + "Pelvic epithelium": "Pelvic epithelium", + "Pelvic epithelium - distal UB": "Pelvic epithelium - distal UB", + "Peritubular capillary endothelium 1": "Peritubular capillary endothelium 1", + "Peritubular capillary endothelium 2": "Peritubular capillary endothelium 2", + "Plasmacytoid dendritic cell": "Plasmacytoid dendritic cell", + "Podocyte": "Podocyte", + "Principal cell": "Principal cell", + "Proliferating B cell": "Proliferating B cell", + "Proliferating NK cell": "Proliferating NK cell", + "Proliferating Proximal Tubule": "Proliferating Proximal Tubule", + "Proliferating cDC2": "Proliferating cDC2", + "Proliferating cap mesenchyme": "Proliferating cap mesenchyme", + "Proliferating distal renal vesicle": "Proliferating distal renal vesicle", + "Proliferating fibroblast": "Proliferating fibroblast", + "Proliferating macrophage": "Proliferating macrophage", + "Proliferating monocyte": "Proliferating monocyte", + "Proliferating myofibroblast": "Proliferating myofibroblast", + "Proliferating stroma progenitor": "Proliferating stroma progenitor", + "Proximal S shaped body": "Proximal S shaped body", + "Proximal UB": "Proximal UB", + "Proximal renal vesicle": "Proximal renal vesicle", + "Proximal tubule": "Proximal tubule", + "Stroma progenitor": "Stroma progenitor", + "Thick ascending limb of Loop of Henle": "Thick ascending limb of Loop of Henle", + "Transitional urothelium": "Transitional urothelium", + "Type A intercalated cell": "Type A intercalated cell", + "Type B intercalated cell": "Collecting Duct - Intercalated Cells Type B", + "cDC1": "cDC1", + "cDC2": "cDC2", + "pDC": "pDC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = [ + os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), + os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") + ] + adult = anndata.read(fn[0]) + fetal = anndata.read(fn[1]) + adult.obs["development"] = "adult" + fetal.obs["development"] = "fetal" + self.adata = adult.concatenate(fetal) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py new file mode 100644 index 000000000..5f9715507 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -0,0 +1,91 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" + + self.download = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_meta = None + + self.author = "Teichmann" + self.doi = "10.1126/science.aay3224" + self.healthy = True + self.normalization = "norm" + self.organ = "thymus" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Anno_level_fig1" + + self.class_maps = { + "0": { + "B_memory": "B_memory", + "B_naive": "B_naive", + "B_plasma": "B_plasma", + "B_pro/pre": "B_pro/pre", + "CD4+T": "CD4+T", + "CD4+Tmem": "CD4+Tmem", + "CD8+T": "CD8+T", + "CD8+Tmem": "CD8+Tmem", + "CD8αα": "CD8αα", + "DC1": "DC1", + "DC2": "DC2", + "DN": "DN", + "DP": "DP", + "ETP": "ETP", + "Endo": "Endo", + "Epi_GCM2": "Epi_GCM2", + "Ery": "Ery", + "Fb_1": "Fb_1", + "Fb_2": "Fb_2", + "Fb_cycling": "Fb_cycling", + "ILC3": "ILC3", + "Lymph": "Lymph", + "Mac": "Mac", + "Mast": "Mast", + "Mgk": "Mgk", + "Mono": "Mono", + "NK": "NK", + "NKT": "NKT", + "NMP": "NMP", + "T(agonist)": "T(agonist)", + "TEC(myo)": "TEC(myo)", + "TEC(neuro)": "TEC(neuro)", + "Treg": "Treg", + "VSMC": "VSMC", + "aDC": "aDC", + "cTEC": "cTEC", + "mTEC(I)": "mTEC(I)", + "mTEC(II)": "mTEC(II)", + "mTEC(III)": "mTEC(III)", + "mTEC(IV)": "mTEC(IV)", + "mcTEC": "mcTEC", + "pDC": "pDC", + "αβT(entry)": "alpha_beta_T(entry)", + "γδT": "gamma_delta_T", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py new file mode 100644 index 000000000..825c386f4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py @@ -0,0 +1,68 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_esophagus_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" + + self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" + # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.organ = "esophagus" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids-HCATisStab7413619" + + self.obs_key_cellontology_original = "Celltypes" + + self.class_maps = { + "0": { + "B_CD27neg": "B_CD27neg", + "B_CD27pos": "B_CD27pos", + "Blood_vessel": "Blood_vessel", + "Dendritic_Cells": "Dendritic cell", + "Epi_basal": "Basal cell", + "Epi_dividing": "Epi_dividing", + "Epi_stratified": "Stratified epithelial cell", + "Epi_suprabasal": "Epi_suprabasal", + "Epi_upper": "Epi_upper", + "Glands_duct": "Glands_duct", + "Glands_mucous": "Glands_mucous", + "Lymph_vessel": "Lymph_vessel", + "Mast_cell": "Mast cell", + "Mono_macro": "Mono_macro", + "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", + "Stroma": "Stromal cell", + "T_CD4": "T_CD4", + "T_CD8": "T_CD8", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") + self.adata = anndata.read(fn) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py new file mode 100644 index 000000000..aa1aded6c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py @@ -0,0 +1,73 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" + + self.download = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.state_exact = "healthy" + self.organ = "lung" # ToDo: "parenchyma" + self.organism = "human" + self.protocol = "10x" + self.year = 2020 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene.ids.HCATisStab7509734" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "T_CD4": "T cell lineage", + "Mast_cells": "Mast cells", + "Monocyte": "Monocytes", + "Blood_vessel": "2_Blood vessels", + "Ciliated": "Multiciliated lineage", + "Macrophage_MARCOneg": "Macrophages", + "DC_plasmacytoid": "Dendritic cells", + "DC_1": "Dendritic cells", + "Muscle_cells": "2_Smooth Muscle", + "Macrophage_MARCOpos": "Macrophages", + "T_cells_Dividing": "T cell lineage", + "DC_Monocyte_Dividing": "Dendritic cells", + "B_cells": "B cell lineage", + "T_CD8_CytT": "T cell lineage", + "NK_Dividing": "Innate lymphoid cells", + "T_regulatory": "T cell lineage", + "DC_2": "Dendritic cells", + "Alveolar_Type2": "AT2", + "Plasma_cells": "B cell lineage", + "NK": "Innate lymphoid cells", + "Alveolar_Type1": "AT1", + "Fibroblast": "2_Fibroblast lineage", + "DC_activated": "Dendritic cells", + "Macrophage_Dividing": "Macrophages", + "Lymph_vessel": "Lymphatic EC", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") + self.adata = anndata.read(fn) + + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py new file mode 100644 index 000000000..77325a3ca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py @@ -0,0 +1,77 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_spleen_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" + + self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.download_meta = None + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.organ = "spleen" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids-HCATisStab7463846" + + self.obs_key_cellontology_original = "Celltypes" + + self.class_maps = { + "0": { + "B_Hypermutation": "B_Hypermutation", + "B_T_doublet": "B_T_doublet", + "B_follicular": "B_follicular", + "B_mantle": "B_mantle", + "CD34_progenitor": "CD34_progenitor", + "DC_1": "DC_1", + "DC_2": "DC_2", + "DC_activated": "DC_activated", + "DC_plasmacytoid": "DC_plasmacytoid", + "ILC": "ILC", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "NK_CD160pos": "NK_CD160pos", + "NK_FCGR3Apos": "NK_FCGR3Apos", + "NK_dividing": "NK_dividing", + "Plasma_IgG": "Plasma_IgG", + "Plasma_IgM": "Plasma_IgM", + "Plasmablast": "Plasmablast", + "Platelet": "Platelet", + "T_CD4_conv": "T_CD4_conv", + "T_CD4_fh": "T_CD4_fh", + "T_CD4_naive": "T_CD4_naive", + "T_CD4_reg": "T_CD4_reg", + "T_CD8_CTL": "T_CD8_CTL", + "T_CD8_MAIT": "T_CD8_MAIT", + "T_CD8_activated": "T_CD8_activated", + "T_CD8_gd": "T_CD8_gd", + "T_cell_dividing": "Proliferating T cell", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") + self.adata = anndata.read(fn) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py new file mode 100644 index 000000000..9527f968a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -0,0 +1,65 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" + + self.download = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" + self.download_meta = None + + self.author = "Wong" + self.doi = "10.15252/embj.2018100811" + self.healthy = True + self.normalization = "raw" + self.organ = "eye" # ToDo: "retina" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.obs_key_cellontology_original = "CellType" + + self.class_maps = { + "0": { + "Muller cell": "Muller cell", + "amacrine cell": "Amacrine cell", + "microglial cell": "Microglia", + "retinal bipolar neuron type A": "Retinal bipolar neuron type A", + "retinal bipolar neuron type B": "Retinal bipolar neuron type B", + "retinal bipolar neuron type C": "Retinal bipolar neuron type C", + "retinal bipolar neuron type D": "Retinal bipolar neuron type D", + "retinal cone cell": "Retinal cone cell", + "retinal ganglion cell": "Retinal ganglion cell", + "retinal rod cell type A": "Retinal rod cell type A", + "retinal rod cell type B": "Retinal rod cell type B", + "retinal rod cell type C": "Retinal rod cell type C", + "unannotated": "Unknown", + "unspecified": "Unknown", + }, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d_nan/__init__.py b/sfaira/data/dataloaders/loaders/d_nan/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py new file mode 100644 index 000000000..d72ed32d6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py @@ -0,0 +1,48 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2018_10x_ica_001_unknown" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_meta = None + + self.author = "Regev" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "blood" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + self.var_ensembl_col = "Accession" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + self.adata = anndata.read_loom(fn) + idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "umbilical cord blood").values, + (self.adata.obs["emptydrops_is_cell"] == "t").values) + self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py new file mode 100644 index 000000000..808914f61 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -0,0 +1,55 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + """ + This data loader requires manual preprocessing of the raw datafile. To download the data, use the link in the + `.download_website` attribute of this class. To create the file required by this dataloader, run the following + python code: + + import scanpy + scanpy.read_10x_h5("pbmc_10k_v3_filtered_feature_bc_matrix.h5").write("pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + + :param path: + :param meta_path: + :param kwargs: + """ + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_blood_2019_10x_10xGenomics_001_unknown" + + self.download = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.download_meta = None + + self.author = "10x Genomics" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "blood" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "gene_ids" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py new file mode 100644 index 000000000..807392226 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py @@ -0,0 +1,48 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = "human_bone_2018_10x_ica_unknown" + + self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_meta = None + + self.author = "Regev" + self.doi = "d_nan" + self.healthy = True + self.normalization = "raw" + self.organ = "bone" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + self.var_ensembl_col = "Accession" + + self.class_maps = { + "0": {}, + } + + def _load(self, fn=None): + if fn is None: + fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + self.adata = anndata.read_loom(fn) + idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "bone marrow").values, + (self.adata.obs["emptydrops_is_cell"] == "t").values) + self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py new file mode 100644 index 000000000..16f8d1733 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -0,0 +1,45 @@ +import pydoc +import os +from typing import Union +from warnings import warn +from sfaira.data import DatasetSuperGroup, DatasetGroupDirectoryOriented + + +class DatasetSuperGroupLoaders(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Class that sits on top of a directory of data set directories that each contain a data set group. + + :param file_base: + :param dir_prefix: Prefix to sub-select directories by. Set to "" for no constraints. + :param path: + :param meta_path: + :param cache_path: + """ + # Directory choice hyperparamters: + dir_prefix = "d" + dir_exlcude = [] + # Collect all data loaders from files in directory: + dataset_groups = [] + cwd = os.path.dirname(__file__) + for f in os.listdir(cwd): + if os.path.isdir(os.path.join(cwd, f)): # only directories + if f[:len(dir_prefix)] == dir_prefix and f not in dir_exlcude: # Narrow down to data set directories + path_dsg = pydoc.locate( + "sfaira.sfaira.data.dataloaders.loaders." + f + ".FILE_PATH") + if path_dsg is not None: + dataset_groups.append(DatasetGroupDirectoryOriented( + file_base=path_dsg, + path=path, + meta_path=meta_path, + cache_path=cache_path + )) + else: + warn(f"DatasetGroupDirectoryOriented was None for {f}") + super().__init__(dataset_groups=dataset_groups) diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py new file mode 100644 index 000000000..2a549254b --- /dev/null +++ b/sfaira/data/dataloaders/super_group.py @@ -0,0 +1,35 @@ +from typing import Union + +from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders +from sfaira.data.dataloaders.databases import DatasetSuperGroupDatabases +from sfaira.data import DatasetSuperGroup + + +class DatasetSuperGroupSfaira(DatasetSuperGroup): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Nested super group of data loaders, unifying data set wise data loader SuperGroup and the database + interface SuperGroup. + + :param path: + :param meta_path: + :param cache_path: + """ + super().__init__(dataset_groups=[ + DatasetSuperGroupLoaders( + path=path, + meta_path=meta_path, + cache_path=cache_path, + ), + DatasetSuperGroupDatabases( + path=path, + meta_path=meta_path, + cache_path=cache_path, + ) + ]) diff --git a/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb b/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb deleted file mode 100644 index 156861dd0..000000000 --- a/sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Human Cell Landscape Preprocessing\n", - "This jupyter notebook contains the code that is required to prepare the full Human Cell Landscape single cell dataset (Han et al., 2020. doi: 10.1038/s41586-020-2157-4) for use through sfaira dataloaders. The code downloads, annotates and cleans the provided adata pbject, saves it by sample and copies it into the right folders in your local sfaira dataset repository, so you can use it with sfaira dataloaders. The notebook saves all files in its working directory and requires you to provide the path to your local sfaira dataset repository in the last cell of this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/python/lib/python3.7/site-packages/anndata/_core/anndata.py:21: FutureWarning: pandas.core.index is deprecated and will be removed in a future version. The public classes are available in the top-level namespace.\n", - " from pandas.core.index import RangeIndex\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import urllib.request\n", - "import numpy as np\n", - "import anndata as ad\n", - "import scipy.sparse\n", - "import os\n", - "import zipfile\n", - "from sfaira.versions.genome_versions.class_interface import SuperGenomeContainer" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('HCL_Fig1_adata.h5ad', )\n", - "('HCL_Fig1_cell_Info.xlsx', )\n", - "('annotation_rmbatch_data_revised417.zip', )\n" - ] - } - ], - "source": [ - "# download required files from human cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/17727365', 'HCL_Fig1_adata.h5ad'))\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/21758835', 'HCL_Fig1_cell_Info.xlsx'))\n", - "print(urllib.request.urlretrieve('https://ndownloader.figshare.com/files/22447898', 'annotation_rmbatch_data_revised417.zip'))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# extract the downloaded zip archive\n", - "with zipfile.ZipFile('annotation_rmbatch_data_revised417.zip', 'r') as zip_ref:\n", - " zip_ref.extractall('./')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# load data file\n", - "adata = ad.read('HCL_Fig1_adata.h5ad')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "adrenalgland 43476\n", - "stomach 41963\n", - "kidney 40691\n", - "blood 35533\n", - "lung 33698\n", - "brain 30493\n", - "liver 28501\n", - "pancreas 28473\n", - "colon 22301\n", - "pleura 19695\n", - "spleen 15806\n", - "malegonad 13211\n", - "omentum 12812\n", - "thyroid 12647\n", - "esophagus 11364\n", - "heart 10783\n", - "trachea 9949\n", - "chorionicvillus 9898\n", - "gallbladder 9769\n", - "artery 9652\n", - "placenta 9595\n", - "bladder 9048\n", - "bone 8704\n", - "cervix 8096\n", - "muscle 7775\n", - "uterus 7694\n", - "skin 6991\n", - "femalegonad 6941\n", - "fallopiantube 6556\n", - "rib 5992\n", - "spinalcord 5916\n", - "rectum 5718\n", - "jejunum 5549\n", - "calvaria 5129\n", - "duodenum 4681\n", - "thymus 4516\n", - "epityphlon 4486\n", - "ileum 3367\n", - "prostate 2445\n", - "ureter 2390\n", - "eye 1880\n", - "hesc 1660\n", - "adipose 1372\n", - "Name: organ, dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# convert to sparse matrix\n", - "adata.X = scipy.sparse.csr_matrix(adata.X).copy()\n", - "\n", - "# harmonise annotations\n", - "for col in ['batch', 'tissue']:\n", - " adata.obs[col] = adata.obs[col].astype('str')\n", - "adata.obs.index = adata.obs.index.str.replace('AdultJeJunum', 'AdultJejunum', regex=True).str.replace('AdultGallBladder', 'AdultGallbladder', regex=True).str.replace('FetalFemaleGonald', 'FetalFemaleGonad', regex=True)\n", - "adata.obs.replace({'AdultJeJunum': 'AdultJejunum', 'AdultGallBladder': 'AdultGallbladder', 'FetalFemaleGonald': 'FetalFemaleGonad'}, regex=True, inplace=True)\n", - "adata.obs.index = [\"-\".join(i.split('-')[:-1]) for i in adata.obs.index]\n", - "\n", - "# load celltype labels and harmonise them\n", - "fig1_anno = pd.read_excel('HCL_Fig1_cell_Info.xlsx', index_col='cellnames')\n", - "fig1_anno.index = fig1_anno.index.str.replace('AdultJeJunum', 'AdultJejunum', regex=True).str.replace('AdultGallBladder', 'AdultGallbladder', regex=True).str.replace('FetalFemaleGonald', 'FetalFemaleGonad', regex=True)\n", - "\n", - "# check that the order of cells and cell labels is the same\n", - "assert np.all(fig1_anno.index == adata.obs.index)\n", - "\n", - "# add annotations to adata object and rename columns\n", - "adata.obs = pd.concat([adata.obs, fig1_anno[['cluster', 'stage', 'donor', 'celltype']]], axis=1)\n", - "adata.obs.columns = ['sample', 'tissue', 'n_genes', 'n_counts', 'cluster_global', 'stage', 'donor', 'celltype_global']\n", - "\n", - "# add sample-wise annotations to the full adata object\n", - "df = pd.DataFrame(columns=['Cell_barcode', 'Sample', 'Batch', 'Cell_id', 'Cluster_id', 'Ages', 'Development_stage', 'Method', 'Gender', 'Source', 'Biomaterial', 'Name', 'ident', 'Celltype'])\n", - "for f in os.listdir('annotation_rmbatch_data_revised417/'):\n", - " df1 = pd.read_csv('annotation_rmbatch_data_revised417/'+f, encoding='unicode_escape')\n", - " df = pd.concat([df, df1], sort=True)\n", - "df = df.set_index('Cell_id')\n", - "adata = adata[[i in df.index for i in adata.obs.index]].copy()\n", - "a_idx = adata.obs.index.copy()\n", - "adata.obs = pd.concat([adata.obs, df[['Ages', 'Celltype', 'Cluster_id', 'Gender', 'Method', 'Source']]], axis=1)\n", - "assert np.all(a_idx == adata.obs.index)\n", - "\n", - "# remove mouse cells from the object\n", - "adata = adata[adata.obs['Source'] != 'MCA2.0'].copy()\n", - "\n", - "# tidy up the column names of the obs annotations\n", - "adata.obs.columns = ['sample', 'sub_tissue', 'n_genes', 'n_counts', 'cluster_global', 'dev_stage',\n", - " 'donor', 'celltype_global', 'age', 'celltype_specific', 'cluster_specific', 'gender',\n", - " 'protocol', 'source']\n", - "\n", - "# create some annotations that are used in sfaira\n", - "adata.obs[\"healthy\"] = True\n", - "adata.obs[\"state_exact\"] = 'healthy'\n", - "adata.obs[\"cell_ontology_class\"] = adata.obs[\"celltype_global\"]\n", - "adata.obs[\"cell_ontology_id\"] = None\n", - "\n", - "# convert gene ids to ensembl ids and store both\n", - "gc = SuperGenomeContainer(species='human', genome='Homo_sapiens_GRCh38_97')\n", - "id_dict = gc.names_to_id_dict\n", - "adata.var = adata.var.reset_index().rename({'index': 'names'}, axis='columns')\n", - "adata.var['ensembl'] = [id_dict[n] if n in id_dict.keys() else 'n/a' for n in adata.var['names']]\n", - "adata.var.index = adata.var['ensembl'].values\n", - "\n", - "# create a tidy organ annotaion which is then used in sfaira\n", - "adata.obs['organ'] = adata.obs['sub_tissue'] \\\n", - " .str.replace(\"Adult\", \"\") \\\n", - " .str.replace(\"Fetal\", \"\") \\\n", - " .str.replace(\"Neonatal\", \"\") \\\n", - " .str.replace(\"Transverse\", \"\") \\\n", - " .str.replace(\"Sigmoid\", \"\") \\\n", - " .str.replace(\"Ascending\", \"\") \\\n", - " .str.replace(\"Cord\", \"\") \\\n", - " .str.replace(\"Peripheral\", \"\") \\\n", - " .str.replace(\"CD34P\", \"\") \\\n", - " .str.replace(\"Cerebellum\", \"Brain\") \\\n", - " .str.replace(\"TemporalLobe\", \"Brain\") \\\n", - " .str.replace(\"BoneMarrow\", \"Bone\") \\\n", - " .str.replace(\"Spinal\", \"SpinalCord\") \\\n", - " .str.replace(\"Intestine\", \"Stomach\") \\\n", - " .str.replace(\"Eyes\", \"Eye\") \\\n", - " .str.lower()\n", - "\n", - "# print the number of cells per organ\n", - "adata.obs['organ'].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "... storing 'sample' as categorical\n", - "... storing 'sub_tissue' as categorical\n", - "... storing 'dev_stage' as categorical\n", - "... storing 'donor' as categorical\n", - "... storing 'celltype_global' as categorical\n", - "... storing 'age' as categorical\n", - "... storing 'celltype_specific' as categorical\n", - "... storing 'cluster_specific' as categorical\n", - "... storing 'gender' as categorical\n", - "... storing 'protocol' as categorical\n", - "... storing 'source' as categorical\n", - "... storing 'state_exact' as categorical\n", - "... storing 'cell_ontology_class' as categorical\n", - "... storing 'cell_ontology_id' as categorical\n", - "... storing 'organ' as categorical\n", - "... storing 'ensembl' as categorical\n" - ] - } - ], - "source": [ - "# write full adata object to disk\n", - "adata.write('HCL_processed.h5ad')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", - "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n" - ] - } - ], - "source": [ - "# write separate files per sample as used in sfaira\n", - "os.mkdir('hcl_organs/')\n", - "for i in adata.obs['sample'].unique():\n", - " a = adata[adata.obs['sample'] == i].copy()\n", - " a.write('hcl_organs/hcl_{}.h5ad'.format(i))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# copy the seperate h5ad files into your sfaira data repository\n", - "your_datarepository = '/path/to/repository' # path to the folder that contains the 'human' and 'mouse' directories\n", - "for samplefile in os.listdir('hcl_organs/'):\n", - " if samplefile.startswith('hcl_'):\n", - " a = ad.read('hcl_organs/'+samplefile)\n", - " organ = a.obs['organ'][0]\n", - " if organ not in os.listdir(f\"{your_datarepository}/human\"):\n", - " os.mkdir(f\"{your_datarepository}/human/{organ}\")\n", - " a.write(f'{your_datarepository}/human/{organ}/{samplefile}')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/sfaira/data/human/__init__.py b/sfaira/data/human/__init__.py deleted file mode 100644 index d986a9781..000000000 --- a/sfaira/data/human/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -from .adipose import DatasetGroupAdipose -from .adrenalgland import DatasetGroupAdrenalgland -from .mixed import DatasetGroupMixed -from .artery import DatasetGroupArtery -from .bladder import DatasetGroupBladder -from .blood import DatasetGroupBlood -from .bone import DatasetGroupBone -from .brain import DatasetGroupBrain -from .calvaria import DatasetGroupCalvaria -from .cervix import DatasetGroupCervix -from .chorionicvillus import DatasetGroupChorionicvillus -from .colon import DatasetGroupColon -from .duodenum import DatasetGroupDuodenum -from .epityphlon import DatasetGroupEpityphlon -from .esophagus import DatasetGroupEsophagus -from .eye import DatasetGroupEye -from .fallopiantube import DatasetGroupFallopiantube -from .femalegonad import DatasetGroupFemalegonad -from .gallbladder import DatasetGroupGallbladder -from .heart import DatasetGroupHeart -from .hesc import DatasetGroupHesc -from .ileum import DatasetGroupIleum -from .jejunum import DatasetGroupJejunum -from .kidney import DatasetGroupKidney -from .liver import DatasetGroupLiver -from .lung import DatasetGroupLung -from .malegonad import DatasetGroupMalegonad -from .muscle import DatasetGroupMuscle -from .omentum import DatasetGroupOmentum -from .pancreas import DatasetGroupPancreas -from .placenta import DatasetGroupPlacenta -from .pleura import DatasetGroupPleura -from .prostate import DatasetGroupProstate -from .rectum import DatasetGroupRectum -from .rib import DatasetGroupRib -from .skin import DatasetGroupSkin -from .spinalcord import DatasetGroupSpinalcord -from .spleen import DatasetGroupSpleen -from .stomach import DatasetGroupStomach -from .thymus import DatasetGroupThymus -from .thyroid import DatasetGroupThyroid -from .trachea import DatasetGroupTrachea -from .ureter import DatasetGroupUreter -from .uterus import DatasetGroupUterus diff --git a/sfaira/data/human/adipose/__init__.py b/sfaira/data/human/adipose/__init__.py deleted file mode 100644 index 93e95d11c..000000000 --- a/sfaira/data/human/adipose/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_adipose import DatasetGroupAdipose diff --git a/sfaira/data/human/adipose/external.py b/sfaira/data/human/adipose/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/adipose/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adipose/human_adipose.py b/sfaira/data/human/adipose/human_adipose.py deleted file mode 100644 index ca12c0d40..000000000 --- a/sfaira/data/human/adipose/human_adipose.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_adipose_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupAdipose(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py b/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py deleted file mode 100644 index d72bb0627..000000000 --- a/sfaira/data/human/adipose/human_adipose_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'adipose' - self.sub_tissue = 'AdultAdipose' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adipose", "hcl_AdultAdipose_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/__init__.py b/sfaira/data/human/adrenalgland/__init__.py deleted file mode 100644 index 4cfcfad3a..000000000 --- a/sfaira/data/human/adrenalgland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_adrenalgland import DatasetGroupAdrenalgland diff --git a/sfaira/data/human/adrenalgland/external.py b/sfaira/data/human/adrenalgland/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/adrenalgland/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland.py b/sfaira/data/human/adrenalgland/human_adrenalgland.py deleted file mode 100644 index af942823d..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_adrenalgland_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_adrenalgland_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_adrenalgland_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_adrenalgland_2020_microwell_han_004 import Dataset as Dataset0004 -from .human_adrenalgland_2020_microwell_han_005 import Dataset as Dataset0005 -from .human_adrenalgland_2020_microwell_han_006 import Dataset as Dataset0006 - - -class DatasetGroupAdrenalgland(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdrenalgland - self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py deleted file mode 100644 index e9db32eb1..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'NeonatalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_NeonatalAdrenalGland_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py deleted file mode 100644 index ec1f52bdc..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py deleted file mode 100644 index 0deb5571f..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py deleted file mode 100644 index 310c19e76..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'AdultAdrenalGland' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py deleted file mode 100644 index 120868718..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'FetalAdrenalGland' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_FetalAdrenalGland_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py deleted file mode 100644 index 437aab383..000000000 --- a/sfaira/data/human/adrenalgland/human_adrenalgland_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'adrenalgland' - self.sub_tissue = 'AdultAdrenalGland' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "adrenalgland", "hcl_AdultAdrenalGland_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/artery/__init__.py b/sfaira/data/human/artery/__init__.py deleted file mode 100644 index b6d2b14a5..000000000 --- a/sfaira/data/human/artery/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_artery import DatasetGroupArtery diff --git a/sfaira/data/human/artery/external.py b/sfaira/data/human/artery/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/artery/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/artery/human_artery.py b/sfaira/data/human/artery/human_artery.py deleted file mode 100644 index 7e51a6d0c..000000000 --- a/sfaira/data/human/artery/human_artery.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_artery_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupArtery(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupArtery - self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py b/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py deleted file mode 100644 index 3ea6ffc35..000000000 --- a/sfaira/data/human/artery/human_artery_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'artery' - self.sub_tissue = 'AdultArtery' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "artery", "hcl_AdultArtery_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/__init__.py b/sfaira/data/human/bladder/__init__.py deleted file mode 100644 index e85cb5318..000000000 --- a/sfaira/data/human/bladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_bladder import DatasetGroupBladder diff --git a/sfaira/data/human/bladder/external.py b/sfaira/data/human/bladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/bladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bladder/human_bladder.py b/sfaira/data/human/bladder/human_bladder.py deleted file mode 100644 index 6efc756b8..000000000 --- a/sfaira/data/human/bladder/human_bladder.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_bladder_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_bladder_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_bladder_2020_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupBladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py deleted file mode 100644 index 5bb5d35d8..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py deleted file mode 100644 index 21ba1594d..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultBladder_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py b/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py deleted file mode 100644 index 29312ed94..000000000 --- a/sfaira/data/human/bladder/human_bladder_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'bladder' - self.sub_tissue = 'AdultBladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bladder", "hcl_AdultGallbladder_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/__init__.py b/sfaira/data/human/blood/__init__.py deleted file mode 100644 index 54879d8e2..000000000 --- a/sfaira/data/human/blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_blood import DatasetGroupBlood diff --git a/sfaira/data/human/blood/external.py b/sfaira/data/human/blood/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/blood/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/blood/human_blood.py b/sfaira/data/human/blood/human_blood.py deleted file mode 100644 index 1e439ff31..000000000 --- a/sfaira/data/human/blood/human_blood.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_blood_2018_10x_ica_001 import Dataset as Dataset0001 -from .human_blood_2019_10x_10xGenomics_001 import Dataset as Dataset0002 -from .human_blood_2020_microwell_han_001 import Dataset as Dataset0003 -from .human_blood_2020_microwell_han_002 import Dataset as Dataset0004 -from .human_blood_2020_microwell_han_003 import Dataset as Dataset0005 -from .human_blood_2020_microwell_han_004 import Dataset as Dataset0006 -from .human_blood_2020_microwell_han_005 import Dataset as Dataset0007 -from .human_blood_2020_microwell_han_006 import Dataset as Dataset0008 -from .human_blood_2020_microwell_han_007 import Dataset as Dataset0009 - - -class DatasetGroupBlood(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py b/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py deleted file mode 100644 index 4d733a8b5..000000000 --- a/sfaira/data/human/blood/human_blood_2018_10x_ica_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import numpy as np - adata = anndata.read_loom('c95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom') - idx = np.logical_and((adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, (adata.obs['emptydrops_is_cell'] == 't').values) - adata = adata[idx].copy() - adata.write('ica_bone.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2018_10x_ica_001_unknown" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_website_meta = None - self.organ = "blood" - self.sub_tissue = "umbilical_cord_blood" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'umbilical cord blood').values, - (self.adata.obs['emptydrops_is_cell'] == 't').values) - self.adata = self.adata[idx].copy() - - else: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "ica_blood.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py deleted file mode 100644 index 72581d7ee..000000000 --- a/sfaira/data/human/blood/human_blood_2019_10x_10xGenomics_001.py +++ /dev/null @@ -1,65 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader requires manual preprocessing of the raw datafile. To download the data, use the link in the - `.download_website` attribute of this class. To create the file required by this dataloader, run the following - python code: - - import scanpy - scanpy.read_10x_h5('pbmc_10k_v3_filtered_feature_bc_matrix.h5').write('pbmc_10k_v3_filtered_feature_bc_matrix.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2019_10x_10xGenomics_001_unknown" - self.download_website = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.download_website_meta = None - self.organ = "blood" - self.sub_tissue = "pbmcs" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = '10x Genomics' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py deleted file mode 100644 index 9745f5970..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py deleted file mode 100644 index ef055d7ba..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_PeripheralBlood_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py deleted file mode 100644 index 78c553a83..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBlood' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py deleted file mode 100644 index 1e80922d4..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'AdultPeripheralBlood' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_AdultPeripheralBlood_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py deleted file mode 100644 index 0ae2a490e..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBloodCD34P' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py deleted file mode 100644 index b745cb32a..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBloodCD34P' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBloodCD34P_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py b/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py deleted file mode 100644 index a96dc2fb5..000000000 --- a/sfaira/data/human/blood/human_blood_2020_microwell_han_007.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'blood' - self.sub_tissue = 'CordBlood' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "blood", "hcl_CordBlood_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bone/__init__.py b/sfaira/data/human/bone/__init__.py deleted file mode 100644 index bdf361783..000000000 --- a/sfaira/data/human/bone/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_bone import DatasetGroupBone diff --git a/sfaira/data/human/bone/external.py b/sfaira/data/human/bone/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/bone/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/bone/human_bone.py b/sfaira/data/human/bone/human_bone.py deleted file mode 100644 index 3a8842e1f..000000000 --- a/sfaira/data/human/bone/human_bone.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_bone_2018_10x_ica_001 import Dataset as Dataset0001 -from .human_bone_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_bone_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupBone(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py b/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py deleted file mode 100644 index 85b019bbe..000000000 --- a/sfaira/data/human/bone/human_bone_2018_10x_ica_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import numpy as np - adata = anndata.read_loom('c95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom') - idx = np.logical_and((adata.obs['derived_organ_parts_label'] == 'bone marrow').values, (adata.obs['emptydrops_is_cell'] == 't').values) - adata = adata[idx].copy() - adata.write('ica_bone.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2018_10x_ica_unknown" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_website_meta = None - self.organ = "bone" - self.sub_tissue = "bone_marrow" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs['derived_organ_parts_label'] == 'bone marrow').values, - (self.adata.obs['emptydrops_is_cell'] == 't').values) - self.adata = self.adata[idx].copy() - - else: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "ica_bone.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Regev' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = None - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='Accession') diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py deleted file mode 100644 index eace2ce68..000000000 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'bone' - self.sub_tissue = 'AdultBoneMarrow' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py b/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py deleted file mode 100644 index 3d7d21c78..000000000 --- a/sfaira/data/human/bone/human_bone_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'bone' - self.sub_tissue = 'AdultBoneMarrow' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "bone", "hcl_BoneMarrow_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/__init__.py b/sfaira/data/human/brain/__init__.py deleted file mode 100644 index c1a0ef08b..000000000 --- a/sfaira/data/human/brain/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_brain import DatasetGroupBrain diff --git a/sfaira/data/human/brain/external.py b/sfaira/data/human/brain/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/brain/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/brain/human_brain.py b/sfaira/data/human/brain/human_brain.py deleted file mode 100644 index 016c8fce5..000000000 --- a/sfaira/data/human/brain/human_brain.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_brain_2017_DroNcSeq_habib_001 import Dataset as Dataset0001 -from .human_brain_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_brain_2020_microwell_han_002 import Dataset as Dataset0003 -from .human_brain_2020_microwell_han_003 import Dataset as Dataset0004 -from .human_brain_2020_microwell_han_004 import Dataset as Dataset0005 -from .human_brain_2020_microwell_han_005 import Dataset as Dataset0006 -from .human_brain_2020_microwell_han_006 import Dataset as Dataset0007 - - -class DatasetGroupBrain(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py deleted file mode 100644 index 6610531c7..000000000 --- a/sfaira/data/human/brain/human_brain_2017_DroNcSeq_habib_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" - self.download_website = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" - self.download_website_meta = None - self.organ = "brain" - self.sub_tissue = "hippocampus, prefrontal cortex" - self.annotated = True - - self.class_maps = { - "0": { - 'exPFC1': 'Glutamatergic neurons from the PFC 1', - 'exPFC2': 'Glutamatergic neurons from the PFC 2', - 'exDG': 'Granule neurons from the hip dentate gyrus region', - 'GABA1': 'GABAergic interneurons 1', - 'GABA2': 'GABAergic interneurons 2', - 'exCA3': 'Pyramidal neurons from the hip CA region 1', - 'exCA1': 'Pyramidal neurons from the hip CA region 2', - 'ODC1': 'Oligodendrocytes', - 'ASC1': 'Astrocytes 1', - 'OPC': 'Oligodendrocyte precursors', - 'ASC2': 'Astrocytes 2', - 'Unclassified': 'Unknown', - 'MG': 'Microglia', - 'NSC': 'Neuronal stem cells', - 'END': 'Endothelial cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/nmeth.4407" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'DroNcSeq' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py deleted file mode 100644 index a3da7a14e..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py deleted file mode 100644 index ffa89b995..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_002.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py deleted file mode 100644 index d06a74c5e..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_003.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py deleted file mode 100644 index a5e5d3798..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_004.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'AdultTemporalLobe' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_AdultTemporalLobe_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py deleted file mode 100644 index da1d294fc..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_005.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'FetalBrain' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_FetalBrain_6.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py b/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py deleted file mode 100644 index 0add7c561..000000000 --- a/sfaira/data/human/brain/human_brain_2020_microwell_han_006.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'brain' - self.sub_tissue = 'AdultCerebellum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Astrocyte': 'Astrocyte', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Endothelial cell (APC)': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Fetal enterocyte ': 'Fetal enterocyte ', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal Neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Oligodendrocyte': 'Oligodendrocytes', - 'Primordial germ cell': 'Primordial germ cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'Neuronal stem cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "brain", "hcl_AdultCerebellum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/calvaria/__init__.py b/sfaira/data/human/calvaria/__init__.py deleted file mode 100644 index dfad96236..000000000 --- a/sfaira/data/human/calvaria/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_calvaria import DatasetGroupCalvaria diff --git a/sfaira/data/human/calvaria/external.py b/sfaira/data/human/calvaria/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/calvaria/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/calvaria/human_calvaria.py b/sfaira/data/human/calvaria/human_calvaria.py deleted file mode 100644 index bd37cd357..000000000 --- a/sfaira/data/human/calvaria/human_calvaria.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_calvaria_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupCalvaria(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCalvaria - self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py b/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py deleted file mode 100644 index 6c18f11c7..000000000 --- a/sfaira/data/human/calvaria/human_calvaria_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'calvaria' - self.sub_tissue = 'FetalCalvaria' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "calvaria", "hcl_FetalCalvaria_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/cervix/__init__.py b/sfaira/data/human/cervix/__init__.py deleted file mode 100644 index 5b71011e8..000000000 --- a/sfaira/data/human/cervix/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_cervix import DatasetGroupCervix diff --git a/sfaira/data/human/cervix/external.py b/sfaira/data/human/cervix/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/cervix/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/cervix/human_cervix.py b/sfaira/data/human/cervix/human_cervix.py deleted file mode 100644 index 9468a333b..000000000 --- a/sfaira/data/human/cervix/human_cervix.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_cervix_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupCervix(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCervix - self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py b/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py deleted file mode 100644 index 400d4808f..000000000 --- a/sfaira/data/human/cervix/human_cervix_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'cervix' - self.sub_tissue = 'AdultCervix' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "cervix", "hcl_AdultCervix_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/chorionicvillus/__init__.py b/sfaira/data/human/chorionicvillus/__init__.py deleted file mode 100644 index 1265da611..000000000 --- a/sfaira/data/human/chorionicvillus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_chorionicvillus import DatasetGroupChorionicvillus diff --git a/sfaira/data/human/chorionicvillus/external.py b/sfaira/data/human/chorionicvillus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/chorionicvillus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus.py deleted file mode 100644 index 030412ab7..000000000 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_chorionicvillus_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupChorionicvillus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupChorionicvillus - self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py deleted file mode 100644 index 1c9bfb424..000000000 --- a/sfaira/data/human/chorionicvillus/human_chorionicvillus_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'chorionicvillus' - self.sub_tissue = 'ChorionicVillus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "chorionicvillus", "hcl_ChorionicVillus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/colon/__init__.py b/sfaira/data/human/colon/__init__.py deleted file mode 100644 index 79ba22cee..000000000 --- a/sfaira/data/human/colon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_colon import DatasetGroupColon diff --git a/sfaira/data/human/colon/external.py b/sfaira/data/human/colon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/colon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/colon/human_colon.py b/sfaira/data/human/colon/human_colon.py deleted file mode 100644 index fa2642d40..000000000 --- a/sfaira/data/human/colon/human_colon.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_colon_2019_10x_kinchen_001 import Dataset as Dataset0001 -from .human_colon_2019_10x_smilie_001 import Dataset as Dataset0002 -from .human_colon_2019_10x_wang_001 import Dataset as Dataset0003 -from .human_colon_2020_10x_james_001 import Dataset as Dataset0004 -from .human_colon_2020_microwell_han_001 import Dataset as Dataset0005 -from .human_colon_2020_microwell_han_002 import Dataset as Dataset0006 -from .human_colon_2020_microwell_han_003 import Dataset as Dataset0007 -from .human_colon_2020_microwell_han_004 import Dataset as Dataset0008 - - -class DatasetGroupColon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py b/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py deleted file mode 100644 index 616dc5927..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_kinchen_001.py +++ /dev/null @@ -1,150 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class and obtain cell type annotations ('hc_meta_data_stromal_with_donor.txt' and - 'uc_meta_data_stromal_with_donor.txt') directly from the authors of the paper. For (up - to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the self.load() method. For - this, you need to preprocess the raw files as below and place the resulting h5ad file in the data folder of this - organ: - - import anndata - import pandas as pd - - adata = anndata.read_loom('f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom') - ctuc = pd.read_csv('uc_meta_data_stromal_with_donor.txt', sep='\t') - cthealthy = pd.read_csv('hc_meta_data_stromal_with_donor.txt', sep='\t') - - adata = adata[adata.obs['emptydrops_is_cell'] == 't'].copy() - adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() - - uc = adata[adata.obs['donor_organism.diseases.ontology_label'] == "ulcerative colitis (disease)"].copy() - bcuc = [i.split('-')[0] for i in ctuc['Barcode']] - seluc = [] - for i in uc.obs['barcode']: - seluc.append((uc.obs['barcode'].str.count(i).sum() == 1) and i in bcuc) - uc = uc[seluc].copy() - ctuc.index = [i.split('-')[0] for i in ctuc['Barcode']] - uc.obs['celltype'] = [ctuc.loc[i]['Cluster'] for i in uc.obs['barcode']] - uc.var = uc.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - - healthy = adata[adata.obs['donor_organism.diseases.ontology_label'] == "normal"].copy() - bchealthy = [i.split('-')[0] for i in cthealthy['Barcode']] - selhealthy = [] - for i in healthy.obs['barcode']: - selhealthy.append((healthy.obs['barcode'].str.count(i).sum() == 1) and i in bchealthy) - healthy = healthy[selhealthy].copy() - cthealthy.index = [i.split('-')[0] for i in cthealthy['Barcode']] - healthy.obs['celltype'] = [cthealthy.loc[i]['Cluster'] for i in healthy.obs['barcode']] - healthy.var = healthy.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - - adata = healthy.concatenate(uc) - adata.write('kinchenetal.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" - self.download_website = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" - self.download_website_meta = 'private' - self.organ = "colon" - self.sub_tissue = "lamina propria of mucosa of colon" - self.annotated = True - - self.class_maps = { - "0": { - "Endothelial 1": "Endothelial", - "Endothelial 2": "Endothelial", - "Glial": "Glial cells", - "Myofibroblasts": "Myofibroblasts", - "Pericyte 1": "Pericytes", - "Pericyte 2": "Pericytes", - "Pericytes": "Pericytes", - "Plasma Cells": "Plasma Cells", - "Smooth Muscle": "Smooth Muscle", - "Stromal 1": "Stromal", - "Stromal 2a": "Stromal", - "Stromal 2b": "Stromal", - "Stromal 3": "Stromal", - "Stromal 4": "Stromal", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") - ] - adata = anndata.read_loom(fn[0]) - ctuc = pd.read_csv(fn[1], sep='\t') - cthealthy = pd.read_csv(fn[2], sep='\t') - adata = adata[adata.obs['emptydrops_is_cell'] == 't'].copy() - adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() - uc = adata[adata.obs['donor_organism.diseases.ontology_label'] == "ulcerative colitis (disease)"].copy() - bcuc = [i.split('-')[0] for i in ctuc['Barcode']] - seluc = [] - for i in uc.obs['barcode']: - seluc.append((uc.obs['barcode'].str.count(i).sum() == 1) and i in bcuc) - uc = uc[seluc].copy() - ctuc.index = [i.split('-')[0] for i in ctuc['Barcode']] - uc.obs['celltype'] = [ctuc.loc[i]['Cluster'] for i in uc.obs['barcode']] - uc.var = uc.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - healthy = adata[adata.obs['donor_organism.diseases.ontology_label'] == "normal"].copy() - bchealthy = [i.split('-')[0] for i in cthealthy['Barcode']] - selhealthy = [] - for i in healthy.obs['barcode']: - selhealthy.append((healthy.obs['barcode'].str.count(i).sum() == 1) and i in bchealthy) - healthy = healthy[selhealthy].copy() - cthealthy.index = [i.split('-')[0] for i in cthealthy['Barcode']] - healthy.obs['celltype'] = [cthealthy.loc[i]['Cluster'] for i in healthy.obs['barcode']] - healthy.var = healthy.var.reset_index().rename(columns={'index': 'names'}).set_index('featurekey') - self.adata = healthy.concatenate(uc) - - else: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "kinchenetal.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Simmons' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.08.067" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [line == 'normal' for line in - self.adata.obs['donor_organism.diseases.ontology_label']] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['donor_organism.diseases.ontology_label'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact]\ - .cat.rename_categories({'normal': 'healthy', 'ulcerative colitis (disease)': 'ulcerative colitis'}) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col='Accession') diff --git a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py b/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py deleted file mode 100644 index 3f5493506..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_smilie_001.py +++ /dev/null @@ -1,106 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. This dataloader only provides the subset of the published sata which has been made available through the - covid-19 Cell Atlas. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - self.download_website = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colonic epithelium" - self.annotated = True - - self.class_maps = { - "0": { - 'Cycling TA': 'Cycling TA', - 'TA 1': 'TA 1', - 'TA 2': 'TA 2', - 'Immature Enterocytes 2': 'Immature Enterocytes 2', - 'Immature Enterocytes 1': 'Immature Enterocytes 1', - 'Enterocyte Progenitors': 'Enterocyte Progenitors', - 'Immature Goblet': 'Immature Goblet', - 'Enterocytes': 'Enterocytes', - 'Secretory TA': 'Secretory TA', - 'Best4+ Enterocytes': 'Best4+ Enterocytes', - 'CD8+ IELs': 'CD8+ IELs', - 'Goblet': 'Goblet cells', - 'Stem': 'Stem cells', - 'Tuft': 'Tuft', - 'Follicular': 'Follicular', - 'Enteroendocrine': 'Enteroendocrine cells', - 'Plasma': 'Plasma Cells', - 'CD4+ Memory': 'CD4+ Memory', - 'CD8+ LP': 'CD8+ LP', - 'CD69- Mast': 'CD69- Mast', - 'Macrophages': 'Macrophage', - 'GC': 'Glial cells', - 'Cycling B': 'B cell cycling', - 'CD4+ Activated Fos-hi': 'CD4+ T Activated Fos-hi', - 'CD4+ Activated Fos-lo': 'CD4+ T Activated Fos-lo', - 'NKs': 'NK', - 'Cycling T': 'Cycling T', - 'M cells': 'M cells', - 'CD69+ Mast': 'CD69+ Mast', - 'MT-hi': 'MT-hi', - 'CD8+ IL17+': 'CD8+ IL17+', - 'CD4+ PD1+': 'CD4+ PD1+', - 'DC2': 'DC2', - 'Treg': 'Treg', - 'ILCs': 'ILC', - 'DC1': 'DC1', - 'WNT2B+ Fos-lo 1': 'WNT2B+ Fos-lo 1', - 'WNT5B+ 2': 'WNT5B+ 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Regev" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.06.029" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py b/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py deleted file mode 100644 index 6ab9ba0bd..000000000 --- a/sfaira/data/human/colon/human_colon_2019_10x_wang_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Enterocyte Progenitors', - 'Enterocyte': 'Enterocytes', - 'Goblet': 'Goblet cells', - 'TA': 'TA', - 'Paneth-like': 'Paneth cells', - 'Stem Cell': 'Stem cells', - 'Enteriendocrine': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py b/sfaira/data/human/colon/human_colon_2020_10x_james_001.py deleted file mode 100644 index 0bd47f057..000000000 --- a/sfaira/data/human/colon/human_colon_2020_10x_james_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. This dataloader only provides the subset of the published sata which has been made available through the - covid-19 Cell Atlas. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" - self.download_website = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" - self.download_website_meta = None - self.organ = "colon" - self.sub_tissue = "colonic immune cells" - self.annotated = True - - self.class_maps = { - "0": { - 'Activated CD4 T': 'Activated CD4 T', - 'B cell IgA Plasma': 'B cell IgA Plasma', - 'B cell IgG Plasma': 'B cell IgG Plasma', - 'B cell cycling': 'B cell cycling', - 'B cell memory': 'B cell memory', - 'CD8 T': 'CD8 T', - 'Follicular B cell': 'Follicular', - 'ILC': 'ILC', - 'LYVE1 Macrophage': 'LYVE1 Macrophage', - 'Lymphoid DC': 'Lymphoid DC', - 'Macrophage': 'Macrophage', - 'Mast': 'Mast cell', - 'Monocyte': 'Monocyte', - 'NK': 'NK', - 'Tcm': 'Tcm', - 'Tfh': 'Tfh', - 'Th1': 'Th1', - 'Th17': 'Th17', - 'Treg': 'Treg', - 'cDC1': 'DC1', - 'cDC2': 'DC2', - 'cycling DCs': 'cycling DCs', - 'cycling gd T': 'cycling gd T', - 'gd T': 'gd T', - 'pDC': 'pDC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41590-020-0602-z" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['cell_type'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py deleted file mode 100644 index c83fd925f..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultAscendingColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultAscendingColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py deleted file mode 100644 index 3cd8d5469..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_002.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultTransverseColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py deleted file mode 100644 index 56d1f309a..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_003.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultTransverseColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py b/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py deleted file mode 100644 index acdcef798..000000000 --- a/sfaira/data/human/colon/human_colon_2020_microwell_han_004.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Colon' - self.sub_tissue = 'AdultSigmoidColon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Enterocyte progenitor': 'Enterocyte Progenitors', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Enterocyte': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'T cell': 'T cell', - 'Stromal cell': 'Stromal', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'Smooth muscle cell': 'Smooth Muscle', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'Endothelial cell': 'Endothelial', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Enterocyte Progenitors', - 'Fibroblast': 'Fibroblast', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial', - 'Fetal stromal cell': 'Stromal', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Monocyte': 'Monocyte', - 'Erythroid cell': 'Erythroid cell', - 'Fetal endocrine cell': 'Enteroendocrine cells', - 'Primordial germ cell': 'Primordial germ cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'M2 Macrophage': 'Macrophage', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "colon", "hcl_AdultColon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/duodenum/__init__.py b/sfaira/data/human/duodenum/__init__.py deleted file mode 100644 index b8a98c3be..000000000 --- a/sfaira/data/human/duodenum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_duodenum import DatasetGroupDuodenum diff --git a/sfaira/data/human/duodenum/external.py b/sfaira/data/human/duodenum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/duodenum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/duodenum/human_duodenum.py b/sfaira/data/human/duodenum/human_duodenum.py deleted file mode 100644 index c4d3b8bba..000000000 --- a/sfaira/data/human/duodenum/human_duodenum.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_duodenum_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupDuodenum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupDuodenum - self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py b/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py deleted file mode 100644 index be0e66b06..000000000 --- a/sfaira/data/human/duodenum/human_duodenum_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'duodenum' - self.sub_tissue = 'AdultDuodenum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "duodenum", "hcl_AdultDuodenum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/epityphlon/__init__.py b/sfaira/data/human/epityphlon/__init__.py deleted file mode 100644 index 1463f978a..000000000 --- a/sfaira/data/human/epityphlon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_epityphlon import DatasetGroupEpityphlon diff --git a/sfaira/data/human/epityphlon/external.py b/sfaira/data/human/epityphlon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/epityphlon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/epityphlon/human_epityphlon.py b/sfaira/data/human/epityphlon/human_epityphlon.py deleted file mode 100644 index 3330e03bf..000000000 --- a/sfaira/data/human/epityphlon/human_epityphlon.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_epityphlon_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupEpityphlon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEpityphlon - self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py deleted file mode 100644 index e68044b9e..000000000 --- a/sfaira/data/human/epityphlon/human_epityphlon_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'epityphlon' - self.sub_tissue = 'AdultEpityphlon' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "epityphlon", "hcl_AdultEpityphlon_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/esophagus/__init__.py b/sfaira/data/human/esophagus/__init__.py deleted file mode 100644 index 8dc074247..000000000 --- a/sfaira/data/human/esophagus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_esophagus import DatasetGroupEsophagus diff --git a/sfaira/data/human/esophagus/external.py b/sfaira/data/human/esophagus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/esophagus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/esophagus/human_esophagus.py b/sfaira/data/human/esophagus/human_esophagus.py deleted file mode 100644 index 2e3df391e..000000000 --- a/sfaira/data/human/esophagus/human_esophagus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_esophagus_2019_10x_madissoon_001 import Dataset as Dataset0001 -from .human_esophagus_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_esophagus_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupEsophagus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEsophagus - self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py deleted file mode 100644 index 14c097816..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2019_10x_madissoon_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2019_10x_madissoon_001_10.1101/741405" - self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" - # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 - self.download_website_meta = None - self.organ = "esophagus" - self.sub_tissue = "esophagus" - self.annotated = True - - self.class_maps = { - "0": { - "B_CD27neg": "B_CD27neg", - "B_CD27pos": "B_CD27pos", - "Blood_vessel": "Blood_vessel", - "Dendritic_Cells": "Dendritic cell", - "Epi_basal": "Basal cell", - "Epi_dividing": "Epi_dividing", - "Epi_stratified": "Stratified epithelial cell", - "Epi_suprabasal": "Epi_suprabasal", - "Epi_upper": "Epi_upper", - "Glands_duct": "Glands_duct", - "Glands_mucous": "Glands_mucous", - "Lymph_vessel": "Lymph_vessel", - "Mast_cell": "Mast cell", - "Mono_macro": "Mono_macro", - "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", - "Stroma": "Stromal cell", - "T_CD4": "T_CD4", - "T_CD8": "T_CD8", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7413619', - new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) \ No newline at end of file diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py deleted file mode 100644 index 860dabe23..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Esophagus' - self.sub_tissue = 'AdultEsophagus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Basal cell': 'Basal cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'T cell': 'T cell', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Stromal cell': 'Stromal cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Fetal stromal cell': 'Fetal stromal cell', - 'CB CD34+': 'CB CD34+', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Loop of Henle': 'Loop of Henle', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py b/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py deleted file mode 100644 index ab5d04b70..000000000 --- a/sfaira/data/human/esophagus/human_esophagus_2020_microwell_han_002.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Esophagus' - self.sub_tissue = 'AdultEsophagus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Basal cell': 'Basal cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Macrophage': 'Macrophage', - 'B cell': 'B cell', - 'T cell': 'T cell', - 'Dendritic cell': 'Dendritic cell', - 'Mast cell': 'Mast cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Stromal cell': 'Stromal cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Fetal stromal cell': 'Fetal stromal cell', - 'CB CD34+': 'CB CD34+', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Loop of Henle': 'Loop of Henle', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "hcl_AdultEsophagus_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/eye/__init__.py b/sfaira/data/human/eye/__init__.py deleted file mode 100644 index 345236753..000000000 --- a/sfaira/data/human/eye/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_eye import DatasetGroupEye diff --git a/sfaira/data/human/eye/external.py b/sfaira/data/human/eye/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/eye/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/eye/human_eye.py b/sfaira/data/human/eye/human_eye.py deleted file mode 100644 index 5a3c43f93..000000000 --- a/sfaira/data/human/eye/human_eye.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_eye_2019_10x_lukowski_001 import Dataset as Dataset0001 -from .human_eye_2019_10x_menon_001 import Dataset as Dataset0002 -from .human_eye_2019_10x_voigt_001 import Dataset as Dataset0003 -from .human_eye_2020_microwell_han_001 import Dataset as Dataset0004 - - -class DatasetGroupEye(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEye - self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py b/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py deleted file mode 100644 index f73d4e388..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_lukowski_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'Muller cell': 'Muller cell', - 'amacrine cell': 'Amacrine cell', - 'microglial cell': 'Microglia', - 'retinal bipolar neuron type A': 'Retinal bipolar neuron type A', - 'retinal bipolar neuron type B': 'Retinal bipolar neuron type B', - 'retinal bipolar neuron type C': 'Retinal bipolar neuron type C', - 'retinal bipolar neuron type D': 'Retinal bipolar neuron type D', - 'retinal cone cell': 'Retinal cone cell', - 'retinal ganglion cell': 'Retinal ganglion cell', - 'retinal rod cell type A': 'Retinal rod cell type A', - 'retinal rod cell type B': 'Retinal rod cell type B', - 'retinal rod cell type C': 'Retinal rod cell type C', - 'unannotated': 'Unknown', - 'unspecified': 'Unknown', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Wong' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.15252/embj.2018100811' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py b/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py deleted file mode 100644 index 5701ac731..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_menon_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" - self.download_website = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'ACs': 'Amacrine cell', - 'BPs': 'BPs', - 'Cones': 'Retinal cone cell', - 'Endo': 'Endothelial cell', - 'HCs': 'Horizontal cells', - 'Macroglia': 'Macroglia', - 'Microglia': 'Microglia', - 'RGCs': 'Retinal ganglion cell', - 'Rods': 'Rods', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Hafler' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-12780-8' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py b/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py deleted file mode 100644 index 31dfce8a7..000000000 --- a/sfaira/data/human/eye/human_eye_2019_10x_voigt_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" - self.download_website = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" - self.download_website_meta = None - self.organ = "eye" - self.sub_tissue = "retina" - self.annotated = True - - self.class_maps = { - "0": { - 'B-cell': 'B-cell', - 'Endothelial': 'Endothelial cell', - 'Fibroblast': 'Fibroblast', - 'Macrophage': 'Macrophage', - 'Mast-cell': 'Mast-cell', - 'Melanocyte': 'Melanocyte', - 'Pericyte': 'Pericyte', - 'RPE': 'Retinal pigment epithelium', - 'Schwann1': 'Schwann1', - 'Schwann2': 'Schwann2', - 'T/NK-cell': 'T/NK-cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mullins' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1073/pnas.1914143116' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py b/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py deleted file mode 100644 index 88f2468c5..000000000 --- a/sfaira/data/human/eye/human_eye_2020_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Eye' - self.sub_tissue = 'FetalEyes' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fetal neuron': 'Fetal neuron', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Endothelial cell': 'Endothelial cell', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal Neuron': 'Fetal neuron', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Dendritic cell': 'Dendritic cell', - 'Fetal endocrine cell': 'Fetal endocrine cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Basal cell': 'Basal cell', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'CB CD34+': 'CB CD34_pos', - 'hESC': 'hESC' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "eye", "hcl_FetalEyes_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/fallopiantube/__init__.py b/sfaira/data/human/fallopiantube/__init__.py deleted file mode 100644 index 4f16e3956..000000000 --- a/sfaira/data/human/fallopiantube/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_fallopiantube import DatasetGroupFallopiantube diff --git a/sfaira/data/human/fallopiantube/external.py b/sfaira/data/human/fallopiantube/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/fallopiantube/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube.py b/sfaira/data/human/fallopiantube/human_fallopiantube.py deleted file mode 100644 index cff1f8131..000000000 --- a/sfaira/data/human/fallopiantube/human_fallopiantube.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_fallopiantube_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupFallopiantube(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFallopiantube - self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py deleted file mode 100644 index cd3d107e2..000000000 --- a/sfaira/data/human/fallopiantube/human_fallopiantube_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'fallopiantube' - self.sub_tissue = 'AdultFallopiantube' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "fallopiantube", "hcl_AdultFallopiantube_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/femalegonad/__init__.py b/sfaira/data/human/femalegonad/__init__.py deleted file mode 100644 index bbb59f91d..000000000 --- a/sfaira/data/human/femalegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_femalegonad import DatasetGroupFemalegonad diff --git a/sfaira/data/human/femalegonad/external.py b/sfaira/data/human/femalegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/femalegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/femalegonad/human_femalegonad.py b/sfaira/data/human/femalegonad/human_femalegonad.py deleted file mode 100644 index 790322e71..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_femalegonad_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_femalegonad_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupFemalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py deleted file mode 100644 index 78b0e1cc5..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'femalegonad' - self.sub_tissue = 'FetalFemaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py deleted file mode 100644 index 45c00bf50..000000000 --- a/sfaira/data/human/femalegonad/human_femalegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'femalegonad' - self.sub_tissue = 'FetalFemaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "femalegonad", "hcl_FetalFemaleGonad_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/gallbladder/__init__.py b/sfaira/data/human/gallbladder/__init__.py deleted file mode 100644 index de13546c3..000000000 --- a/sfaira/data/human/gallbladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_gallbladder import DatasetGroupGallbladder diff --git a/sfaira/data/human/gallbladder/external.py b/sfaira/data/human/gallbladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/gallbladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/gallbladder/human_gallbladder.py b/sfaira/data/human/gallbladder/human_gallbladder.py deleted file mode 100644 index aa015fe75..000000000 --- a/sfaira/data/human/gallbladder/human_gallbladder.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_gallbladder_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupGallbladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupGallbladder - self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py deleted file mode 100644 index 3d685e0f3..000000000 --- a/sfaira/data/human/gallbladder/human_gallbladder_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'gallbladder' - self.sub_tissue = 'AdultGallbladder' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "gallbladder", "hcl_AdultGallbladder_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/__init__.py b/sfaira/data/human/heart/__init__.py deleted file mode 100644 index b6fe327b0..000000000 --- a/sfaira/data/human/heart/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_heart import DatasetGroupHeart diff --git a/sfaira/data/human/heart/external.py b/sfaira/data/human/heart/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/heart/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/heart/human_heart.py b/sfaira/data/human/heart/human_heart.py deleted file mode 100644 index a1c2195a7..000000000 --- a/sfaira/data/human/heart/human_heart.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_heart_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_heart_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_heart_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_heart_2020_microwell_han_004 import Dataset as Dataset0004 - - -class DatasetGroupHeart(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py deleted file mode 100644 index 325d4e08e..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'FetalHeart' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py deleted file mode 100644 index bc8eeb41a..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'AdultHeart' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py deleted file mode 100644 index 01fbee187..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'AdultHeart' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_AdultHeart_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py b/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py deleted file mode 100644 index 24b48e6bf..000000000 --- a/sfaira/data/human/heart/human_heart_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'heart' - self.sub_tissue = 'FetalHeart' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "heart", "hcl_FetalHeart_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/hesc/__init__.py b/sfaira/data/human/hesc/__init__.py deleted file mode 100644 index 741b9caa4..000000000 --- a/sfaira/data/human/hesc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_hesc import DatasetGroupHesc diff --git a/sfaira/data/human/hesc/external.py b/sfaira/data/human/hesc/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/hesc/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/hesc/human_hesc.py b/sfaira/data/human/hesc/human_hesc.py deleted file mode 100644 index ffc258d70..000000000 --- a/sfaira/data/human/hesc/human_hesc.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_hesc_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupHesc(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHesc - self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py b/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py deleted file mode 100644 index 4553eedae..000000000 --- a/sfaira/data/human/hesc/human_hesc_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'hesc' - self.sub_tissue = 'HESC' - self.dev_stage = 'HESC' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "hesc", "hcl_HESC_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/ileum/__init__.py b/sfaira/data/human/ileum/__init__.py deleted file mode 100644 index cb7ce42d4..000000000 --- a/sfaira/data/human/ileum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_ileum import DatasetGroupIleum diff --git a/sfaira/data/human/ileum/external.py b/sfaira/data/human/ileum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/ileum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ileum/human_ileum.py b/sfaira/data/human/ileum/human_ileum.py deleted file mode 100644 index 53c884c69..000000000 --- a/sfaira/data/human/ileum/human_ileum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_ileum_2019_10x_martin_001 import Dataset as Dataset0001 -from .human_ileum_2019_10x_wang_001 import Dataset as Dataset0002 -from .human_ileum_2020_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupIleum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py deleted file mode 100644 index 1de123a07..000000000 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_martin_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" - self.download_website = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" - self.download_website_meta = None - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'T cells': 'T cells', - 'Plasma cells': 'Plasma Cells', - 'B cells': 'B cells', - 'MNP': 'MNP', - 'ILC': 'ILC', - 'Enterocytes': 'Enterocytes', - 'Fibs': 'Fibroblasts', - 'CD36+ endothelium': 'CD36+ endothelium', - 'Progenitors': 'Progenitors', - 'Goblets': 'Goblet cells', - 'Glial cells': 'Glial cells', - 'Cycling': 'Cycling', - 'ACKR1+ endothelium': 'ACKR1+ endothelium', - 'Pericytes': 'Pericytes', - 'Lymphatics': 'Lymphatics', - 'Mast cells': 'Mast cells', - 'SM': 'Smooth muscle cell', - 'TA': 'TA', - 'Paneth cells': 'Paneth cells', - 'Enteroendocrines': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - self.adata = self.adata[self.adata.obs['CellType'] != 'Doublets'].copy() - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Kenigsberg" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2019.08.008" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids') diff --git a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py b/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py deleted file mode 100644 index 9e9a6a79f..000000000 --- a/sfaira/data/human/ileum/human_ileum_2019_10x_wang_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" - self.download_website_meta = None - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Progenitors', - 'Goblet': 'Goblet cells', - 'Enterocyte': 'Enterocytes', - 'Paneth-like': 'Paneth cells', - 'Stem Cell': 'Stem Cell', - 'TA': 'TA', - 'Enteriendocrine': 'Enteroendocrine cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py b/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py deleted file mode 100644 index db65ce9bb..000000000 --- a/sfaira/data/human/ileum/human_ileum_2020_microwell_han_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'ileum' - self.sub_tissue = 'AdultIleum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'B cells', - 'B cell (Plasmocyte)': 'Plasma Cells', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte': 'Enterocytes', - 'Enterocyte progenitor': 'Enterocytes', - 'Epithelial cell': 'Epithelial cell', - 'Fetal Neuron': 'Fetal neuron', - 'Fetal enterocyte': 'Enterocytes', - 'Fetal epithelial progenitor': 'Progenitors', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblasts', - 'Hepatocyte/Endodermal cell': 'Hepatocyte/Endodermal cell', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cells', - 'Monocyte': 'Monocyte', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'T cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "hcl_AdultIleum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/jejunum/__init__.py b/sfaira/data/human/jejunum/__init__.py deleted file mode 100644 index dd9a3acd4..000000000 --- a/sfaira/data/human/jejunum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_jejunum import DatasetGroupJejunum diff --git a/sfaira/data/human/jejunum/external.py b/sfaira/data/human/jejunum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/jejunum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/jejunum/human_jejunum.py b/sfaira/data/human/jejunum/human_jejunum.py deleted file mode 100644 index 0d5dba57e..000000000 --- a/sfaira/data/human/jejunum/human_jejunum.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_jejunum_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupJejunum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupJejunum - self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py b/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py deleted file mode 100644 index 48507f945..000000000 --- a/sfaira/data/human/jejunum/human_jejunum_2020_microwell_han_001.py +++ /dev/null @@ -1,60 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'jejunum' - self.sub_tissue = 'AdultJejunum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "jejunum", "hcl_AdultJejunum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/__init__.py b/sfaira/data/human/kidney/__init__.py deleted file mode 100644 index 4101ed3c2..000000000 --- a/sfaira/data/human/kidney/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_kidney import DatasetGroupKidney diff --git a/sfaira/data/human/kidney/external.py b/sfaira/data/human/kidney/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/kidney/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/kidney/human_kidney.py b/sfaira/data/human/kidney/human_kidney.py deleted file mode 100644 index 9d4b2e4d5..000000000 --- a/sfaira/data/human/kidney/human_kidney.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_kidney_2019_10xSn_lake_001 import Dataset as Dataset0001 -from .human_kidney_2019_10x_stewart_001 import Dataset as Dataset0002 -from .human_kidney_2020_10x_liao_001 import Dataset as Dataset0003 -from .human_kidney_2020_microwell_han_001 import Dataset as Dataset0004 -from .human_kidney_2020_microwell_han_002 import Dataset as Dataset0005 -from .human_kidney_2020_microwell_han_003 import Dataset as Dataset0006 -from .human_kidney_2020_microwell_han_004 import Dataset as Dataset0007 -from .human_kidney_2020_microwell_han_005 import Dataset as Dataset0008 -from .human_kidney_2020_microwell_han_006 import Dataset as Dataset0009 -from .human_kidney_2020_microwell_han_007 import Dataset as Dataset0010 - - -class DatasetGroupKidney(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py deleted file mode 100644 index 6cdc98279..000000000 --- a/sfaira/data/human/kidney/human_kidney_2019_10xSn_lake_001.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data files which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Collecting Duct - Intercalated Cells Type A (cortex)': 'Collecting Duct - Intercalated Cells Type A (cortex)', - 'Collecting Duct - Intercalated Cells Type A (medulla)': 'Collecting Duct - Intercalated Cells Type A (medulla)', - 'Collecting Duct - Intercalated Cells Type B': 'Collecting Duct - Intercalated Cells Type B', - 'Collecting Duct - PCs - Stressed Dissoc Subset': 'Collecting Duct - PCs - Stressed Dissoc Subset', - 'Collecting Duct - Principal Cells (cortex)': 'Collecting Duct - Principal Cells (cortex)', - 'Collecting Duct - Principal Cells (medulla)': 'Collecting Duct - Principal Cells (medulla)', - 'Connecting Tubule': 'Connecting tubule', - 'Decending Limb': 'Decending Limb', - 'Distal Convoluted Tubule': 'Distal Convoluted Tubule', - 'Endothelial Cells (unassigned)': 'Endothelial Cells (unassigned)', - 'Endothelial Cells - AEA & DVR ': 'Endothelial Cells - AEA & DVR', - 'Endothelial Cells - AVR': 'Endothelial Cells - AVR', - 'Endothelial Cells - glomerular capillaries': 'Endothelial Cells - glomerular capillaries', - 'Epithelial Cells (unassigned)': 'Epithelial Cells (unassigned)', - 'Immune Cells - Macrophages': 'Macrophage', - 'Interstitium': 'Interstitium', - 'Mesangial Cells': 'Mesangial Cells', - 'Podocytes': 'Podocyte', - 'Proximal Tubule Epithelial Cells (S1)': 'Proximal Tubule Epithelial Cells (S1)', - 'Proximal Tubule Epithelial Cells (S2)': 'Proximal Tubule Epithelial Cells (S2)', - 'Proximal Tubule Epithelial Cells (S3)': 'Proximal Tubule Epithelial Cells (S3)', - 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3 )': 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', - 'Proximal Tubule Epithelial Cells - Stress/Inflam': 'Proximal Tubule Epithelial Cells - Stress/Inflam', - 'Thick Ascending Limb': 'Thick ascending limb of Loop of Henle', - 'Thin ascending limb': 'Thin ascending limb', - 'Unknown - Novel PT CFH+ Subpopulation (S2)': 'Unknown - Novel PT CFH+ Subpopulation (S2)', - 'Vascular Smooth Muscle Cells and pericytes': 'Vascular Smooth Muscle Cells and pericytes', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) - annot = pd.read_csv(fn[1], index_col=0, dtype='category') - self.adata.obs['celltype'] = [annot.loc[i.split('_')[0][1:]]['Annotation'] for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Jain' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-019-10861-2' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10xSn' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py b/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py deleted file mode 100644 index 3c97f7a87..000000000 --- a/sfaira/data/human/kidney/human_kidney_2019_10x_stewart_001.py +++ /dev/null @@ -1,145 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the two raw data files which can be obtained from the `download_website` - attribute of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" - self.download_website = [ - 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad', - 'https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad' - ] - self.download_website_meta = None - self.organ = "kidney" - self.sub_tissue = "renal medulla, renal pelvis, ureter, cortex of kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Ascending vasa recta endothelium': 'Endothelial Cells - AVR', - 'B cell': 'B cell', - 'CD4 T cell': 'CD4 T cell', - 'CD8 T cell': 'CD8 T cell', - 'CNT/PC - proximal UB': 'CNT/PC - proximal UB', - 'Cap mesenchyme': 'Cap mesenchyme', - 'Connecting tubule': 'Connecting tubule', - 'Descending vasa recta endothelium': 'Endothelial Cells - AEA & DVR', - 'Distal S shaped body': 'Distal S shaped body', - 'Distal renal vesicle': 'Distal renal vesicle', - 'Distinct proximal tubule 1': 'Distinct proximal tubule 1', - 'Distinct proximal tubule 2': 'Distinct proximal tubule 2', - 'Endothelium': 'Endothelial Cells (unassigned)', - 'Epithelial progenitor cell': 'Epithelial progenitor', - 'Erythroid': 'Erythroid', - 'Fibroblast': 'Fibroblast', - 'Fibroblast 1': 'Fibroblast', - 'Fibroblast 2': 'Fibroblast', - 'Glomerular endothelium': 'Endothelial Cells - glomerular capillaries', - 'Indistinct intercalated cell': 'Indistinct intercalated cell', - 'Innate like lymphocyte': 'Innate like lymphocyte', - 'Loop of Henle': 'Loop of Henle', - 'MNP-a/classical monocyte derived': 'MNP-a/classical monocyte derived', - 'MNP-b/non-classical monocyte derived': 'MNP-b/non-classical monocyte derived', - 'MNP-c/dendritic cell': 'MNP-c/dendritic cell', - 'MNP-d/Tissue macrophage': 'MNP-d/Tissue macrophage', - 'Macrophage 1': 'Macrophage', - 'Macrophage 2': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Mast cells': 'Mast cell', - 'Medial S shaped body': 'Medial S shaped body', - 'Megakaryocyte': 'Megakaryocyte', - 'Monocyte': 'Monocyte', - 'Myofibroblast': 'Myofibroblast', - 'Myofibroblast 1': 'Myofibroblast', - 'Myofibroblast 2': 'Myofibroblast', - 'NK cell': 'NK cell', - 'NKT cell': 'NKT cell', - 'Neuron': 'Neuron', - 'Neutrophil': 'Neutrophil', - 'Pelvic epithelium': 'Pelvic epithelium', - 'Pelvic epithelium - distal UB': 'Pelvic epithelium - distal UB', - 'Peritubular capillary endothelium 1': 'Peritubular capillary endothelium 1', - 'Peritubular capillary endothelium 2': 'Peritubular capillary endothelium 2', - 'Plasmacytoid dendritic cell': 'Plasmacytoid dendritic cell', - 'Podocyte': 'Podocyte', - 'Principal cell': 'Principal cell', - 'Proliferating B cell': 'Proliferating B cell', - 'Proliferating NK cell': 'Proliferating NK cell', - 'Proliferating Proximal Tubule': 'Proliferating Proximal Tubule', - 'Proliferating cDC2': 'Proliferating cDC2', - 'Proliferating cap mesenchyme': 'Proliferating cap mesenchyme', - 'Proliferating distal renal vesicle': 'Proliferating distal renal vesicle', - 'Proliferating fibroblast': 'Proliferating fibroblast', - 'Proliferating macrophage': 'Proliferating macrophage', - 'Proliferating monocyte': 'Proliferating monocyte', - 'Proliferating myofibroblast': 'Proliferating myofibroblast', - 'Proliferating stroma progenitor': 'Proliferating stroma progenitor', - 'Proximal S shaped body': 'Proximal S shaped body', - 'Proximal UB': 'Proximal UB', - 'Proximal renal vesicle': 'Proximal renal vesicle', - 'Proximal tubule': 'Proximal tubule', - 'Stroma progenitor': 'Stroma progenitor', - 'Thick ascending limb of Loop of Henle': 'Thick ascending limb of Loop of Henle', - 'Transitional urothelium': 'Transitional urothelium', - 'Type A intercalated cell': 'Type A intercalated cell', - 'Type B intercalated cell': 'Collecting Duct - Intercalated Cells Type B', - 'cDC1': 'cDC1', - 'cDC2': 'cDC2', - 'pDC': 'pDC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), - os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") - ] - adult = anndata.read(fn[0]) - fetal = anndata.read(fn[1]) - adult.obs['development'] = 'adult' - fetal.obs['development'] = 'fetal' - self.adata = adult.concatenate(fetal) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Clatworthy' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1126/science.aat5031' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["celltype"] - self.adata.obs["cell_ontology_id"] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='ID') diff --git a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py b/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py deleted file mode 100644 index 99a147d75..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_10x_liao_001.py +++ /dev/null @@ -1,115 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import scipy.io -import gzip -import tarfile - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling the - self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in the - data folder of this organ: - - import anndata - import pandas as pd - import scipy.io - import gzip - import tarfile - adatas = [] - with tarfile.open("GSE131685_RAW.tar") as tar: - for member in tar.getmembers(): - if '_matrix.mtx.gz' in member.name: - name = '_'.join(member.name.split('_')[:-1]) - with gzip.open(tar.extractfile(member), 'rb') as mm: - X = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name+'_barcodes.tsv.gz'), compression='gzip', header=None, sep='\t', index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name+'_features.tsv.gz'), compression='gzip', header=None, sep='\t').iloc[:,:2] - var.columns = ['ensembl', 'names'] - var.index = var['ensembl'].values - adata = anndata.AnnData(X=X, obs=obs, var=var) - adata.obs['sample'] = name - adatas.append(adata) - adata = adatas[0].concatenate(adatas[1:]) - del adata.obs['batch'] - adata.write('GSE131685.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" - self.download_website_meta = None - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") - adatas = [] - with tarfile.open(fn) as tar: - for member in tar.getmembers(): - if '_matrix.mtx.gz' in member.name: - name = '_'.join(member.name.split('_')[:-1]) - with gzip.open(tar.extractfile(member), 'rb') as mm: - X = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name + '_barcodes.tsv.gz'), compression='gzip', header=None, - sep='\t', index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name + '_features.tsv.gz'), compression='gzip', header=None, - sep='\t').iloc[:, :2] - var.columns = ['ensembl', 'names'] - var.index = var['ensembl'].values - self.adata = anndata.AnnData(X=X, obs=obs, var=var) - self.adata.obs['sample'] = name - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:]) - del self.adata.obs['batch'] - - else: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "GSE131685.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Mo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41597-019-0351-8' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = None - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py deleted file mode 100644 index ffea57f1b..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_001.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py deleted file mode 100644 index 353dae669..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_002.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py deleted file mode 100644 index 94c6bf3b8..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_003.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'AdultKidney' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_AdultKidney_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py deleted file mode 100644 index 264ec8857..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_004.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py deleted file mode 100644 index 68079fd52..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_005.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py deleted file mode 100644 index 10699d7a8..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_006.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py b/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py deleted file mode 100644 index 91667d873..000000000 --- a/sfaira/data/human/kidney/human_kidney_2020_microwell_han_007.py +++ /dev/null @@ -1,110 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'Kidney' - self.sub_tissue = 'FetalKidney' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Epithelial cell (intermediated)': 'Intermediated cell', - 'Erythroid cell': 'Erythroid', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal chondrocyte': 'Chondrocyte', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Stroma progenitor', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stroma progenitor', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Intercalated cell': 'Intercalated cell', - 'Intermediated cell': 'Intermediated cell', - 'Kidney intercalated cell': 'Intercalated cell', - 'Loop of Henle': 'Loop of Henle', - 'M2 Macrophage': 'M2 Macrophage', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Smooth muscle cell': 'Vascular Smooth Muscle Cells and pericytes', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "hcl_FetalKidney_6.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/__init__.py b/sfaira/data/human/liver/__init__.py deleted file mode 100644 index 2014f8490..000000000 --- a/sfaira/data/human/liver/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_liver import DatasetGroupLiver diff --git a/sfaira/data/human/liver/external.py b/sfaira/data/human/liver/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/liver/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/liver/human_liver.py b/sfaira/data/human/liver/human_liver.py deleted file mode 100644 index 278a6a4dc..000000000 --- a/sfaira/data/human/liver/human_liver.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_liver_2018_10x_macparland_001 import Dataset as Dataset0001 -from .human_liver_2019_10x_popescu_001 import Dataset as Dataset0002 -from .human_liver_2019_10x_ramachandran_001 import Dataset as Dataset0003 -from .human_liver_2019_mCELSeq2_aizarani_001 import Dataset as Dataset0004 -from .human_liver_2020_microwell_han_001 import Dataset as Dataset0005 -from .human_liver_2020_microwell_han_002 import Dataset as Dataset0006 -from .human_liver_2020_microwell_han_003 import Dataset as Dataset0007 -from .human_liver_2020_microwell_han_004 import Dataset as Dataset0008 -from .human_liver_2020_microwell_han_005 import Dataset as Dataset0009 - - -class DatasetGroupLiver(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py b/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py deleted file mode 100644 index 5828651a0..000000000 --- a/sfaira/data/human/liver/human_liver_2018_10x_macparland_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - The input files for this dataloader (GSE115469.csv.gz and GSE115469_labels.txt) were kindly provided to us by the - authors of the publication. Please contact them directly to obtain the required - files. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE115469" - self.download_website_meta = 'private' - self.organ = "liver" - self.sub_tissue = "caudate lobe" - self.annotated = True - - self.class_maps = { - "0": { - '1':'Hepatocyte 1', - '2':'Alpha beta T cells', - '3':'Hepatocyte 2', - '4':'Inflammatory macrophages', - '5':'Hepatocyte 3', - '6':'Hepatocyte 4', - '7':'Plasma cells', - '8':'NK cell', - '9':'Gamma delta T cells 1', - '10':'Non inflammatory macrophages', - '11':'Periportal LSECs', - '12':'Central venous LSECs', - '13':'Endothelial cell', - '14':'Hepatocyte 5', - '15':'Hepatocyte 6', - '16':'Mature B cells', - '17':'Cholangiocytes', - '18':'Gamma delta T cells 2', - '19':'Erythroid cells', - '20':'Hepatic stellate cells' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), - os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") - ] - self.adata = anndata.read_csv(fn[0]).T - celltype_df = pd.read_csv(fn[1], sep='\t').set_index('CellName') - self.adata.obs['celltype'] = [str(celltype_df.loc[i]['Cluster#']) for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'McGilvray' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41467-018-06318-7' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py b/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py deleted file mode 100644 index d335ed46d..000000000 --- a/sfaira/data/human/liver/human_liver_2019_10x_popescu_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - The input file for this dataloader (fetal_liver_alladata_.h5ad) was kindly provided to us by the - authors of the publication. Please contact them directly to obtain the required file. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" - self.download_website = "https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-7407/" - self.download_website_meta = 'private' - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'Mature B cells', - 'DC1': 'Dendritic cell 1', - 'DC2': 'Dendritic cell 2', - 'DC precursor': 'Dendritic cell precursor', - 'Early Erythroid': 'Early Erythroid', - 'Early lymphoid_T lymphocyte': 'Early lymphoid T lymphocyte', - 'Endothelial cell': 'Endothelial cell', - 'Fibroblast': 'Fibroblast', - 'HSC_MPP': 'HSC MPP', - 'Hepatocyte': 'Hepatocyte', - 'ILC precursor': 'ILC precursor', - 'Kupffer Cell': 'Kupffer Cell', - 'Late Erythroid': 'Late Erythroid', - 'MEMP': 'MEMP', - 'Mast cell': 'Mast cell', - 'Megakaryocyte': 'Megakaryocyte', - 'Mid Erythroid': 'Mid Erythroid', - 'Mono-Mac': 'Mono Macrophage', - 'Monocyte': 'Monocyte', - 'Monocyte precursor': 'Monocyte precursor', - 'NK': 'NK cell', - 'Neutrophil-myeloid progenitor': 'Neutrophil myeloid progenitor', - 'Pre pro B cell': 'Pre pro B cell', - 'VCAM1+ EI macrophage': 'VCAM1pos EI macrophage', - 'pDC precursor': 'pDendritic cell precursor', - 'pre-B cell': 'pre B cell', - 'pro-B cell': 'pro B cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Haniffa' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1652-y' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["cell.labels"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py deleted file mode 100644 index 0e9623afa..000000000 --- a/sfaira/data/human/liver/human_liver_2019_10x_ramachandran_001.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This dataloader requires manual preprocessing of the Rdata file that can be obtained from the link in the - `download_website` attribute of this class. The preprocessing code below uses the rpy2 and anndata2ri python - packages to convert the R object to anndata (pip install anndata2ri), run it in a jupyter notebook: - - ## Notebook Cell 1 - import anndata2ri - anndata2ri.activate() - %load_ext rpy2.ipython - - ## Notebook Cell 2 - %%R -o sce - library(Seurat) - load('tissue.rdata') - new_obj = CreateSeuratObject(counts = tissue@raw.data) - new_obj@meta.data = tissue@meta.data - sce <- as.SingleCellExperiment(new_obj) - - ## Notebook cell 3 - sce.write('ramachandran.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" - self.download_website = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" - self.download_website_meta = None - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'MPs': 'MP', - 'Tcells': 'Tcells', - 'ILCs': 'ILC', - 'Endothelia': 'Endothelia', - 'Bcells': 'Bcells', - 'pDCs': 'pDCs', - 'Plasma Bcells': 'Plasma B cell', - 'Mast cells': 'Mast cell', - 'Mesenchyme': 'Mesenchyme', - 'Cholangiocytes': 'Cholangiocytes', - 'Hepatocytes': 'Hepatocytes', - 'Mesothelia': 'Mesothelia', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Henderson' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1631-3' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["annotation_lineage"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Uninjured' for i in self.adata.obs["condition"]] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = ['healthy' if i == 'Uninjured' else i for i in self.adata.obs["condition"]] - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py deleted file mode 100644 index 19e9e2783..000000000 --- a/sfaira/data/human/liver/human_liver_2019_mCELSeq2_aizarani_001.py +++ /dev/null @@ -1,108 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - '1': 'NK, NKT and T cells', - '2': 'Kupffer Cell', - '3': 'NK, NKT and T cells', - '4': 'Cholangiocytes', - '5': 'NK, NKT and T cells', - '6': 'Kupffer Cell', - '7': 'Cholangiocytes', - '8': 'B Cell', - '9': 'Liver sinusoidal endothelial cells', - '10': 'Macrovascular endothelial cells', - '11': 'Hepatocyte', - '12': 'NK, NKT and T cells', - '13': 'Liver sinusoidal endothelial cells', - '14': 'Hepatocyte', - '15': 'Other endothelial cells', - '16': 'Unknown', - '17': 'Hepatocyte', - '18': 'NK, NKT and T cells', - '19': 'Unknown', - '20': 'Liver sinusoidal endothelial cells', - '21': 'Macrovascular endothelial cells', - '22': 'B Cell', - '23': 'Kupffer Cell', - '24': 'Cholangiocytes', - '25': 'Kupffer Cell', - '26': 'Other endothelial cells', - '27': 'Unknown', - '28': 'NK, NKT and T cells', - '29': 'Macrovascular endothelial cells', - '30': 'Hepatocyte', - '31': 'Kupffer Cell', - '32': 'Liver sinusoidal endothelial cells', - '33': 'Hepatic stellate cells', - '34': 'B Cell', - '35': 'Other endothelial cells', - '36': 'Unknown', - '37': 'Unknown', - '38': 'B Cell', - '39': 'Cholangiocytes' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t').T) - celltype_df = pd.read_csv(fn[1], sep=' ') - self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() - self.adata.obs['CellType'] = [str(celltype_df.loc[i]['sct@cpart']) for i in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Gruen' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-019-1373-2' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'mCEL-Seq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs['cell_ontology_class'] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py deleted file mode 100644 index ed46c5da4..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_001.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py deleted file mode 100644 index ba74db0ba..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_002.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py deleted file mode 100644 index 9ecdc5456..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_003.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'AdultLiver' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_AdultLiver_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py deleted file mode 100644 index 6f8003ab9..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_004.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'FetalLiver' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_Liver_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py b/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py deleted file mode 100644 index a4909b27f..000000000 --- a/sfaira/data/human/liver/human_liver_2020_microwell_han_005.py +++ /dev/null @@ -1,89 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'Liver' - self.sub_tissue = 'FetalLiver' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'Plasma B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Late Erythroid', - 'Erythroid progenitor cell (RP high)': 'Early Erythroid', - 'Fetal enterocyte ': 'Enterocyte ', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Non inflammatory macrophages', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Myeloid cell': 'Myeloid cell', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Sinusoidal endothelial cell': 'Liver sinusoidal endothelial cells', - 'Smooth muscle cell': 'Smooth muscle cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "liver", "hcl_Liver_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/lung/__init__.py b/sfaira/data/human/lung/__init__.py deleted file mode 100644 index fafe9671b..000000000 --- a/sfaira/data/human/lung/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_lung import DatasetGroupLung diff --git a/sfaira/data/human/lung/external.py b/sfaira/data/human/lung/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/lung/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/lung/human_lung.py b/sfaira/data/human/lung/human_lung.py deleted file mode 100644 index fc29e35a3..000000000 --- a/sfaira/data/human/lung/human_lung.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Union - -from .external import DatasetGroupBase - -from .human_lung_2019_10x_braga_001 import Dataset as Dataset0001 -from .human_lung_2019_10x_braga_002 import Dataset as Dataset0002 -from .human_lung_2019_10x_madissoon_001 import Dataset as Dataset0003 -from .human_lung_2019_dropseq_braga_003 import Dataset as Dataset0004 -from .human_lung_2020_10x_habermann_001 import Dataset as Dataset0005 -from .human_lung_2020_10x_lukassen_001 import Dataset as Dataset0006 -from .human_lung_2020_10x_lukassen_002 import Dataset as Dataset0007 -from .human_lung_2020_10x_miller_001 import Dataset as Dataset0008 -from .human_lung_2020_10x_travaglini_001 import Dataset as Dataset0009 -from .human_lung_2020_microwell_han_001 import Dataset as Dataset0010 -from .human_lung_2020_microwell_han_002 import Dataset as Dataset0011 -from .human_lung_2020_microwell_han_003 import Dataset as Dataset0012 -from .human_lung_2020_microwell_han_004 import Dataset as Dataset0013 -from .human_lung_2020_microwell_han_005 import Dataset as Dataset0014 -from .human_lung_2020_smartseq2_travaglini_002 import Dataset as Dataset0015 - - -class DatasetGroupLung(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path), - Dataset0011(path=path, meta_path=meta_path), - Dataset0012(path=path, meta_path=meta_path), - Dataset0013(path=path, meta_path=meta_path), - Dataset0014(path=path, meta_path=meta_path), - Dataset0015(path=path, meta_path=meta_path), - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py deleted file mode 100644 index a9119aaa8..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_001.py +++ /dev/null @@ -1,84 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" - self.download_website = "https://covid19.cog.sanger.ac.uk/" \ - "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "alveoli, parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated 2': 'Multiciliated lineage', - 'Luminal_Macrophages': 'Macrophages', - 'Basal 1': 'Basal', - 'Dendritic cells': 'Dendritic cells', - 'Endothelial': '1_Endothelial', - 'Lymphatic': 'Lymphatic EC', - 'Ciliated 1': 'Multiciliated lineage', - 'Smooth muscle': '2_Smooth Muscle', - 'Type_1_alveolar': 'AT1', - 'Neutrophils': 'Monocytes', - 'Club': 'Secretory', - 'Basal 2': 'Basal', - 'B cells': 'B cell lineage', - 'T and NK': '2_Lymphoid', - 'Mesothelium': 'Mesothelium', - 'Mast cells': 'Mast cells', - 'Fibroblasts': '2_Fibroblast lineage', - 'Type 2 alveolar': 'AT2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py b/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py deleted file mode 100644 index 2ce4619c1..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_braga_002.py +++ /dev/null @@ -1,84 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" - self.download_website = "https://covid19.cog.sanger.ac.uk/" \ - "vieira19_Bronchi_anonymised.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "bronchi" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated 1': 'Multiciliated lineage', - 'Club': 'Secretory', - 'Ciliated 2': 'Multiciliated lineage', - 'Ionocytes': 'Rare', - 'Basal 2': 'Basal', - 'Goblet_1': 'Secretory', - 'Goblet 2': 'Secretory', - 'Basal 1': 'Basal', - 'Dendritic cells': 'Dendritic cells', - 'B cells': 'B cell lineage', - 'Luminal_Macrophages': 'Macrophages', - 'Neutrophils': 'Monocytes', - 'Endothelial': '1_Endothelial', - 'Smooth muscle': '2_Smooth Muscle', - 'T and NK': '2_Lymphoid', - 'Fibroblasts': '2_Fibroblast lineage', - 'Lymphatic': 'Lymphatic EC', - 'Mast cells': 'Mast cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py b/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py deleted file mode 100644 index 4327033ee..000000000 --- a/sfaira/data/human/lung/human_lung_2019_10x_madissoon_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" - self.download_website = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'T_CD4': 'T cell lineage', - 'Mast_cells': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Blood_vessel': '2_Blood vessels', - 'Ciliated': 'Multiciliated lineage', - 'Macrophage_MARCOneg': 'Macrophages', - 'DC_plasmacytoid': 'Dendritic cells', - 'DC_1': 'Dendritic cells', - 'Muscle_cells': '2_Smooth Muscle', - 'Macrophage_MARCOpos': 'Macrophages', - 'T_cells_Dividing': 'T cell lineage', - 'DC_Monocyte_Dividing': 'Dendritic cells', - 'B_cells': 'B cell lineage', - 'T_CD8_CytT': 'T cell lineage', - 'NK_Dividing': 'Innate lymphoid cells', - 'T_regulatory': 'T cell lineage', - 'DC_2': 'Dendritic cells', - 'Alveolar_Type2': 'AT2', - 'Plasma_cells': 'B cell lineage', - 'NK': 'Innate lymphoid cells', - 'Alveolar_Type1': 'AT1', - 'Fibroblast': '2_Fibroblast lineage', - 'DC_activated': 'Dendritic cells', - 'Macrophage_Dividing': 'Macrophages', - 'Lymph_vessel': 'Lymphatic EC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Meyer' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1186/s13059-019-1906-x" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene.ids.HCATisStab7509734') diff --git a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py b/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py deleted file mode 100644 index 6fadd1a90..000000000 --- a/sfaira/data/human/lung/human_lung_2019_dropseq_braga_003.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the two raw data files which can be obtained from the `download_website` - and `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" - self.organ = "lung" - self.sub_tissue = "parenchymal lung and distal airway specimens" - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblasts', - 'Type 2': 'AT2', - 'B cell': 'B cell lineage', - 'Macrophages': 'Macrophages', - 'NK cell': 'Innate lymphoid cells', - 'T cell': 'T cell lineage', - 'Ciliated': 'Multiciliated lineage', - 'Lymphatic': 'Lymphatic EC', - 'Type 1': 'AT1', - 'Transformed epithelium': '1_Epithelial', - 'Secretory': 'Secretory', - 'Endothelium': '1_Endothelial', - 'Mast cell': 'Mast cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), - os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), - ] - self.adata = anndata.read_csv(fn[0]).T - self.adata.obs = pd.read_csv(fn[1], sep='\t', index_col=0) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41591-019-0468-5" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'dropseq' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = [self.download_website, self.download_website_meta] - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'uninvolved areas of tumour resection material' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py b/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py deleted file mode 100644 index 5f5872577..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_habermann_001.py +++ /dev/null @@ -1,126 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data files if `load_raw=True` is passed to self.load() - To download the datafiles required by this dataloader, use the links provided as the `download_website` and - `download_website_meta` attribute of this class. For (up to 100-fold faster) repeated data loading, please pass - `load_raw=False` when calling the self.load() method. For this, you need to preprocess the raw files as below and - place the resulting h5ad file in the data folder of this organ: - - import anndata - import pandas as pd - adata = anndata.read_mtx('GSE135893_matrix.mtx.gz').T - adata.var = pd.read_csv('GSE135893_genes.tsv.gz', index_col=0, header=None, names=['ids']) - adata.obs = pd.read_csv('GSE135893_barcodes.tsv.gz', index_col=0, header=None, names=['barcodes']) - obs = pd.read_csv('GSE135893_IPF_metadata.csv.gz', index_col=0) - adata = adata[obs.index.tolist(),:].copy() - adata.obs = obs - adata.write('habermann_processed.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" - self.download_website = [ - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", - "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fbarcodes%2Etsv%2Egz" - ] - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" - self.organ = "lung" - self.sub_tissue = "parenchyma" - self.annotated = True - - self.class_maps = { - "0": { - 'Proliferating Macrophages': 'Macrophages', - 'Myofibroblasts': 'Myofibroblasts', - 'Proliferating Epithelial Cells': 'Proliferating Epithelial Cells', - 'Mesothelial Cells': 'Mesothelium', - 'cDCs': 'Dendritic cells', - 'Mast Cells': 'Mast cells', - 'Ciliated': 'Multiciliated lineage', - 'T Cells': 'T cell lineage', - 'pDCs': 'Dendritic cells', - 'Smooth Muscle Cells': '2_Smooth Muscle', - 'Transitional AT2': 'AT2', - 'AT2': 'AT2', - 'B Cells': 'B cell lineage', - 'NK Cells': 'Innate lymphoid cells', - 'Monocytes': 'Monocytes', - 'Basal': 'Basal', - 'Plasma Cells': 'B cell lineage', - 'Differentiating Ciliated': 'Multiciliated lineage', - 'Macrophages': 'Macrophages', - 'MUC5B+': 'Secretory', - 'SCGB3A2+': 'Secretory', - 'Fibroblasts': 'Fibroblasts', - 'Lymphatic Endothelial Cells': 'Lymphatic EC', - 'Endothelial Cells': '2_Blood vessels', - 'SCGB3A2+ SCGB1A1+': 'Secretory', - 'PLIN2+ Fibroblasts': 'Fibroblasts', - 'KRT5-/KRT17+': 'KRT5-/KRT17+', - 'MUC5AC+ High': 'Secretory', - 'Proliferating T Cells': 'T cell lineage', - 'AT1': 'AT1', - 'HAS1 High Fibroblasts': 'Fibroblasts' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), - ] - self.adata = anndata.read_mtx(fn[0]).T - self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=['ids']) - self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=['barcodes']) - obs = pd.read_csv(fn[3], index_col=0) - self.adata = self.adata[obs.index.tolist(), :].copy() - self.adata.obs = obs - else: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "habermann_processed.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Kropski' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/753806" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [i == 'Control' for i in self.adata.obs['Status']] - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Diagnosis'].astype('category') - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py deleted file mode 100644 index 8ad57e976..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_001.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'Ciliated': 'Multiciliated lineage', - 'Endothelial': '1_Endothelial', - 'AT2': 'AT2', - 'LymphaticEndothelium': 'Lymphatic EC', - 'Fibroblasts': '2_Fibroblast lineage', - 'Club': 'Secretory', - 'Immuno_TCells': 'T cell lineage', - 'Immuno_Monocytes': 'Monocytes', - 'AT1': 'AT1' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py b/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py deleted file mode 100644 index 7ecfbaf49..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_lukassen_002.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" - self.download_website = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "bronchial epithelial cells" - self.annotated = True - - self.class_maps = { - "0": { - 'Secretory3': 'Secretory', - 'Ciliated1': 'Multiciliated lineage', - 'Goblet': 'Secretory', - 'Ciliated2': 'Multiciliated lineage', - 'Club': 'Secretory', - 'Secretory2': 'Secretory', - 'FOXN4': 'Rare', - 'Basal1': 'Basal', - 'Secretory1': 'Secretory', - 'Fibroblast': '2_Fibroblast lineage', - 'Ionocyte': 'Rare', - 'Basal3': 'Basal', - 'Basal_Mitotic': 'Basal', - 'Basal2': 'Basal', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nCount_RNA'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Eils' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/2020.03.13.991455" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py b/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py deleted file mode 100644 index 955e85b9e..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_miller_001.py +++ /dev/null @@ -1,93 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" - self.download_website = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "fetal lung" - self.annotated = True - - self.class_maps = { - "0": { - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Basal cell': 'Basal', - 'Bud tip adjacent': 'Fetal airway progenitors', - 'Bud tip progenitor': 'Fetal airway progenitors', - 'Cartilage': 'Cartilage', - 'Club-like secretory': 'Secretory', - 'Endothelial': '1_Endothelial', - 'Epithelial': '1_Epithelial', - 'Goblet-like secretory': 'Secretory', - 'Hematopoietic, B Cells': 'B cell lineage', - 'Hematopoietic, Macrophage': 'Macrophages', - 'Hematopoietic, Natural Killer Cell': 'Innate lymphoid cells', - 'Hematopoietic, T Cells': 'T cell lineage', - 'Immune': '1_Immune', - 'Intermediate ciliated': 'Multiciliated lineage', - 'Mesenchyme RSPO2+': '1_Stroma', - 'Mesenchyme SERPINF1-high': '1_Stroma', - 'Multiciliated cell': 'Multiciliated lineage', - 'Multiciliated precursor': 'Multiciliated lineage', - 'Neuroendocrine': 'Rare', - 'Pericyte': 'Fibroblasts', - 'RBC': 'Erythrocytes', - 'Secretory progenitor': 'Secretory', - 'Submucosal gland': 'Submucosal Secretory', - 'Submucosal gland basal': 'Submucosal Secretory', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Spence' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.devcel.2020.01.033" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Cell_type'] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py b/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py deleted file mode 100644 index 630f99090..000000000 --- a/sfaira/data/human/lung/human_lung_2020_10x_travaglini_001.py +++ /dev/null @@ -1,135 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the data file provided by the authors. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login('synapse_username','password') - syn21625095 = syn.get(entity='syn21625095') - shutil.move(syn21625095.path, 'droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" - self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "proximal, medial, distal, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'Intermediate Monocyte': 'Monocytes', - 'Adventitial Fibroblast': 'Fibroblasts', - 'Myeloid Dendritic Type 1': 'Dendritic cells', - 'Myofibroblast': 'Myofibroblasts', - 'Bronchial Vessel 2': 'Bronchial Vessel 2', - 'Fibromyocyte': 'Fibromyocyte', - 'Basal': 'Basal', - 'IGSF21+ Dendritic': 'Macrophages', - 'CD8+ Memory/Effector T': 'T cell lineage', - 'CD4+ Naive T': 'T cell lineage', - 'Myeloid Dendritic Type 2': 'Dendritic cells', - 'Neuroendocrine': 'Rare', - 'Ciliated': 'Multiciliated lineage', - 'Proximal Ciliated': 'Multiciliated lineage', - 'Proliferating Basal': 'Basal', - 'Proximal Basal': 'Basal', - 'Nonclassical Monocyte': 'Monocytes', - 'Proliferating Macrophage': 'Macrophages', - 'Plasmacytoid Dendritic': 'Dendritic cells', - 'Vein': 'Venous', - 'Basophil/Mast 1': 'Mast cells', - 'Serous': 'Submucosal Secretory', - 'Natural Killer T': 'T cell lineage', - 'Mesothelial': 'Mesothelium', - 'Ionocyte': 'Rare', - 'Bronchial Vessel 1': 'Bronchial Vessel 1', - 'Natural Killer': 'Innate lymphoid cells', - 'Capillary Aerocyte': 'Capillary', - 'Vascular Smooth Muscle': '2_Smooth Muscle', - 'Macrophage': 'Macrophages', - 'Basophil/Mast 2': 'Mast cells', - 'Platelet/Megakaryocyte': 'Megakaryocytes', - 'Pericyte': 'Fibroblasts', - 'Capillary Intermediate 2': 'Capillary Intermediate 2', - 'CD4+ Memory/Effector T': 'T cell lineage', - 'B': 'B cell lineage', - 'Lymphatic': 'Lymphatic EC', - 'Mucous': 'Submucosal Secretory', - 'Signaling Alveolar Epithelial Type 2': 'AT2', - 'Alveolar Epithelial Type 1': 'AT1', - 'OLR1+ Classical Monocyte': 'Monocytes', - 'Plasma': 'B cell lineage', - 'Lipofibroblast': 'Fibroblasts', - 'Capillary Intermediate 1': 'Capillary Intermediate 1', - 'EREG+ Dendritic': 'Macrophages', - 'Capillary': 'Capillary', - 'TREM2+ Dendritic': 'Macrophages', - 'Alveolar Fibroblast': 'Fibroblasts', - 'Classical Monocyte': 'Monocytes', - 'Goblet': 'Secretory', - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Club': 'Secretory', - 'Proliferating NK/T': 'Innate lymphoid cells', - 'Alveolar Epithelial Type 2': 'AT2', - 'Differentiating Basal': 'Basal', - 'CD8+ Naive T': 'T cell lineage', - 'Artery': 'Arterial' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nUMI'].values[:, None])) \ - .multiply(1 / 10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py deleted file mode 100644 index 99e467b2e..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_001.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'FetalLung' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py deleted file mode 100644 index 1da535072..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_002.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py deleted file mode 100644 index 9e1566a2f..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_003.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py deleted file mode 100644 index af7875309..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_004.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'AdultLung' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_AdultLung_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py b/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py deleted file mode 100644 index 8233627f0..000000000 --- a/sfaira/data/human/lung/human_lung_2020_microwell_han_005.py +++ /dev/null @@ -1,112 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'lung' - self.sub_tissue = 'FetalLung' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'AT2 cell': 'AT2', - 'Antigen presenting cell (RPS high)': 'unknown', - 'B cell': 'B cell lineage', - 'B cell (Plasmocyte)': 'B cell lineage', - 'Basal cell': 'Basal', - 'CB CD34+': 'Fetal airway progenitors', - 'Chondrocyte': '1_Stroma', - 'Dendritic cell': 'Dendritic cells', - 'Endothelial cell': '1_Endothelial', - 'Endothelial cell (APC)': '1_Endothelial', - 'Endothelial cell (endothelial to mesenchymal transition)': '1_Endothelial', - 'Enterocyte progenitor': '1_Epithelial', - 'Epithelial cell': '1_Epithelial', - 'Epithelial cell (intermediated)': '1_Epithelial', - 'Erythroid cell': 'Erythrocytes', - 'Erythroid progenitor cell (RP high)': 'Erythrocytes', - 'Fasciculata cell': 'unknown', - 'Fetal Neuron': 'unknown', - 'Fetal chondrocyte': '1_Stroma', - 'Fetal endocrine cell': 'unknown', - 'Fetal enterocyte ': '1_Epithelial', - 'Fetal epithelial progenitor': '1_Epithelial', - 'Fetal fibroblast': 'Fibroblasts', - 'Fetal mesenchymal progenitor': '1_Stroma', - 'Fetal neuron': 'unknown', - 'Fetal skeletal muscle cell': 'unknown', - 'Fetal stromal cell': '1_Stroma', - 'Fibroblast': 'Fibroblasts', - 'Gastric endocrine cell': 'unknown', - 'Goblet cell': 'Secretory', - 'Kidney intercalated cell': 'unknown', - 'Loop of Henle': 'unknown', - 'M2 Macrophage': 'Macrophages', - 'Macrophage': 'Macrophages', - 'Mast cell': 'Mast cells', - 'Mesothelial cell': 'Mast cells', - 'Monocyte': 'Monocytes', - 'Myeloid cell': '2_Myeloid', - 'Neutrophil': 'Neutrophilic', - 'Neutrophil (RPS high)': 'Neutrophilic', - 'Primordial germ cell': 'unknown', - 'Proliferating T cell': 'T cell lineage', - 'Proximal tubule progenitor': 'unknown', - 'Sinusoidal endothelial cell': '1_Endothelial', - 'Smooth muscle cell': '2_Smooth Muscle', - 'Stratified epithelial cell': '1_Epithelial', - 'Stromal cell': '1_Stroma', - 'T cell': 'T cell lineage', - 'Ventricle cardiomyocyte': '1_Stroma', - 'hESC': 'Fetal airway progenitors', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "hcl_FetalLung_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py deleted file mode 100644 index d347e5766..000000000 --- a/sfaira/data/human/lung/human_lung_2020_smartseq2_travaglini_002.py +++ /dev/null @@ -1,122 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the data file provided by the authors. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login('synapse_username','password') - syn21625142 = syn.get(entity='syn21625142') - shutil.move(syn21625142.path, 'facs_normal_lung_blood_scanpy.20200205.RC4.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" - self.download_website = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_website_meta = None - self.organ = "lung" - self.sub_tissue = "proximal, medial, distal, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'Intermediate Monocyte': 'Monocytes', - 'Adventitial Fibroblast': 'Fibroblasts', - 'Myofibroblast': 'Myofibroblasts', - 'Fibromyocyte': 'Fibromyocyte', - 'Basal': 'Basal', - 'IGSF21+ Dendritic': 'Macrophages', - 'CD8+ Memory/Effector T': 'T cell lineage', - 'CD4+ Naive T': 'T cell lineage', - 'Myeloid Dendritic Type 2': 'Dendritic cells', - 'Neuroendocrine': 'Rare', - 'Ciliated': 'Multiciliated lineage', - 'Nonclassical Monocyte': 'Monocytes', - 'Plasmacytoid Dendritic': 'Dendritic cells', - 'Vein': 'Venous', - 'Basophil/Mast 1': 'Mast cells', - 'Natural Killer T': 'T cell lineage', - 'Ionocyte': 'Rare', - 'Bronchial Vessel 1': 'Bronchial Vessel 1', - 'Natural Killer': 'Innate lymphoid cells', - 'Capillary Aerocyte': 'Capillary', - 'Vascular Smooth Muscle': '2_Smooth Muscle', - 'Macrophage': 'Macrophages', - 'Pericyte': 'Fibroblasts', - 'CD4+ Memory/Effector T': 'T cell lineage', - 'B': 'B cell lineage', - 'Lymphatic': 'Lymphatic EC', - 'Signaling Alveolar Epithelial Type 2': 'AT2', - 'Alveolar Epithelial Type 1': 'AT1', - 'Plasma': 'B cell lineage', - 'Lipofibroblast': 'Fibroblasts', - 'Capillary Intermediate 1': 'Capillary Intermediate 1', - 'Capillary': 'Capillary', - 'Alveolar Fibroblast': 'Fibroblasts', - 'Classical Monocyte': 'Monocytes', - 'Goblet': 'Secretory', - 'Airway Smooth Muscle': 'Airway smooth muscle', - 'Club': 'Secretory', - 'Proliferating NK/T': 'Innate lymphoid cells', - 'Alveolar Epithelial Type 2': 'AT2', - 'Differentiating Basal': 'Basal', - 'CD8+ Naive T': 'T cell lineage', - 'Artery': 'Arterial', - 'Neutrophil': 'Monocytes', - 'Dendritic': 'Dendritic cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['nReads'].values[:, None])) \ - .multiply(1 / 1000000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Krasnow' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41586-020-2922-4" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = ["_".join(i.split('_')[:-1]) for i in self.adata.obs['free_annotation']] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].astype('category') - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.uns[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/__init__.py b/sfaira/data/human/malegonad/__init__.py deleted file mode 100644 index bf7a87036..000000000 --- a/sfaira/data/human/malegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_malegonad import DatasetGroupMalegonad diff --git a/sfaira/data/human/malegonad/external.py b/sfaira/data/human/malegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/malegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/malegonad/human_malegonad.py b/sfaira/data/human/malegonad/human_malegonad.py deleted file mode 100644 index 681f1f334..000000000 --- a/sfaira/data/human/malegonad/human_malegonad.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_malegonad_2018_10x_guo_001 import Dataset as Dataset0001 -from .human_malegonad_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_malegonad_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupMalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py b/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py deleted file mode 100644 index 0b4b7ab2f..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2018_10x_guo_001.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" - self.download_website = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" - self.download_website_meta = None - self.organ = "malegonad" - self.sub_tissue = "testis" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongated Spermatids': 'Elongated Spermatids', - 'Leydig cells': 'Leydig cells', - 'Early Primary Spermatocytes': 'Early Primary Spermatocytes', - 'Round Spermatids': 'Round Spermatids', - 'Endothelial cells': 'Endothelial cells', - 'Macrophages': 'Macrophages', - 'Myoid cells': 'Myoid cells', - 'Differentiating Spermatogonia': 'Differentiating Spermatogonia', - 'Late primary Spermatocytes': 'Late primary Spermatocytes', - 'Spermatogonial Stem cell': 'Spermatogonial Stem cell', - 'Sertoli cells': 'Sertoli cells', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Cairns" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41422-018-0099-2" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py deleted file mode 100644 index 045e0ba9c..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,90 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'malegonad' - self.sub_tissue = 'FetalMaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal acinar cell': 'Fetal acinar cell', - 'Fetal chondrocyte': 'Fetal chondrocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Sertoli cells', - 'Loop of Henle': 'Loop of Henle', - 'Macrophage': 'Macrophages', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py b/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py deleted file mode 100644 index 3df75da7f..000000000 --- a/sfaira/data/human/malegonad/human_malegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,90 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'malegonad' - self.sub_tissue = 'FetalMaleGonad' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cells', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal acinar cell': 'Fetal acinar cell', - 'Fetal chondrocyte': 'Fetal chondrocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal neuron': 'Fetal neuron', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Sertoli cells', - 'Loop of Henle': 'Loop of Henle', - 'Macrophage': 'Macrophages', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'Ureteric bud cell': 'Ureteric bud cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "hcl_FetalMaleGonad_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/mixed/__init__.py b/sfaira/data/human/mixed/__init__.py deleted file mode 100644 index 5c885d57c..000000000 --- a/sfaira/data/human/mixed/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_mixed import DatasetGroupMixed diff --git a/sfaira/data/human/mixed/external.py b/sfaira/data/human/mixed/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/mixed/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/mixed/human_mixed.py b/sfaira/data/human/mixed/human_mixed.py deleted file mode 100644 index 9041ae787..000000000 --- a/sfaira/data/human/mixed/human_mixed.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_mixed_2019_10x_szabo_001 import Dataset as Dataset0001 - - -class DatasetGroupMixed(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMixed - self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py b/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py deleted file mode 100644 index dde7c0605..000000000 --- a/sfaira/data/human/mixed/human_mixed_2019_10x_szabo_001.py +++ /dev/null @@ -1,189 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import tarfile -import pandas as pd -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data files if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` attribute of - this class. The required celltype annotations for the data were kindly provided to us by the authors of the paper. - Please contact them directly to pbtain the required annotation files (donor1.annotation.txt and - donor2.annotation.txt). For (up to 100-fold faster) repeated data loading, please pass `load_raw=False` when calling - the self.load() method. For this, you need to preprocess the raw files as below and place the resulting h5ad file in - the data folder of this organ: - - import anndata - import tarfile - import pandas as pd - import scipy.sparse - adatas = [] - with tarfile.open('GSE126030_RAW.tar') as tar: - for member in tar.getmembers(): - df = pd.read_csv(tar.extractfile(member.name), compression='gzip', sep='\t') - df.index = [i.split('.')[0] for i in df['Accession']] - var = pd.concat([df.pop(x) for x in ['Gene', 'Accession']], 1) - if df.columns[-1].startswith('Un'): - df.drop(df.columns[len(df.columns)-1], axis=1, inplace=True) - adata = anndata.AnnData(df.T) - adata.var = var - if "PP001" in member.name or "PP002" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Lung' - elif "PP003" in member.name or "PP004" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Bone Marrow' - elif "PP005" in member.name or "PP006" in member.name: - adata.obs['donor'] = 'Donor1' - adata.obs['organ'] = 'Lymph Node' - elif "PP009" in member.name or "PP010" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Lung' - elif "PP011" in member.name or "PP012" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Bone Marrow' - elif "PP013" in member.name or "PP014" in member.name: - adata.obs['donor'] = 'Donor2' - adata.obs['organ'] = 'Lymph Node' - else: - continue - adata.obs.index = member.name.split('_')[1].split('s')[0]+'nskept.'+adata.obs.index - adatas.append(adata) - adata = adatas[0].concatenate(adatas[1:], index_unique=None) - adata.obs.drop('batch', axis=1, inplace=True) - adata = adata[:,adata.X.sum(axis=0) > 0].copy() - adata.obs['cell_ontology_class'] = 'Unknown' - df1 = pd.read_csv('donor1.annotation.txt', sep='\t', index_col=0, header=None) - df2 = pd.read_csv('donor2.annotation.txt', sep='\t', index_col=0, header=None) - for i in df1.index: - adata.obs['cell_ontology_class'].loc[i] = df1.loc[i][1] - for i in df2.index: - adata.obs['cell_ontology_class'].loc[i] = df2.loc[i][1] - adata.X = scipy.sparse.csc_matrix(adata.X) - adata.write('GSE126030.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" - self.download_website_meta = 'private' - self.organ = "mixed" - self.sub_tissue = "Bone Marrow, Lung, Lymph Node" - self.annotated = True - self.loaded = False - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), - os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), - os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), - ] - adatas = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - df = pd.read_csv(tar.extractfile(member.name), compression='gzip', sep='\t') - df.index = [i.split('.')[0] for i in df['Accession']] - var = pd.concat([df.pop(x) for x in ['Gene', 'Accession']], 1) - if df.columns[-1].startswith('Un'): - df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - self.adata = anndata.AnnData(df.T) - self.adata.var = var - if "PP001" in member.name or "PP002" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Lung' - elif "PP003" in member.name or "PP004" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Bone Marrow' - elif "PP005" in member.name or "PP006" in member.name: - self.adata.obs['donor'] = 'Donor1' - self.adata.obs['organ'] = 'Lymph Node' - elif "PP009" in member.name or "PP010" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Lung' - elif "PP011" in member.name or "PP012" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Bone Marrow' - elif "PP013" in member.name or "PP014" in member.name: - self.adata.obs['donor'] = 'Donor2' - self.adata.obs['organ'] = 'Lymph Node' - else: - continue - self.adata.obs.index = member.name.split('_')[1].split('s')[0] + 'nskept.' + self.adata.obs.index - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:], index_unique=None) - self.adata.obs.drop('batch', axis=1, inplace=True) - self.adata = self.adata[:, self.adata.X.sum(axis=0) > 0].copy() - self.adata.obs['cell_ontology_class'] = 'Unknown' - df1 = pd.read_csv(fn[1], sep='\t', index_col=0, header=None) - df2 = pd.read_csv(fn[2], sep='\t', index_col=0, header=None) - for i in df1.index: - self.adata.obs['cell_ontology_class'].loc[i] = df1.loc[i][1] - for i in df2.index: - self.adata.obs['cell_ontology_class'].loc[i] = df2.loc[i][1] - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - else: - if fn is None: - fn = os.path.join(self.path, "human", "mixed", "GSE126030.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sims" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1038/s41467-019-12464-3" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs["subtissue"] = self.adata.obs["organ"] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='Gene', ensembl_col='Accession') - - # If the subset_organs() method has been run before, subset to specified organs - if "organsubset" in self.__dict__: - self.adata = self.adata[self.adata.obs['organ'].isin(self.organsubset)] - # If adata object is empty, set it to None - if not len(self.adata): - self.adata = None - self.loaded = True - - @property - def ncells(self): - if "organsubset" in self.__dict__: - if not self.loaded: - self._load() - if self.adata is None: - return 0 - else: - return self.adata.n_obs - else: - return super().ncells diff --git a/sfaira/data/human/muscle/__init__.py b/sfaira/data/human/muscle/__init__.py deleted file mode 100644 index f6c2f1d41..000000000 --- a/sfaira/data/human/muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_muscle import DatasetGroupMuscle diff --git a/sfaira/data/human/muscle/external.py b/sfaira/data/human/muscle/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/muscle/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/muscle/human_muscle.py b/sfaira/data/human/muscle/human_muscle.py deleted file mode 100644 index 30cdb789b..000000000 --- a/sfaira/data/human/muscle/human_muscle.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_muscle_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_muscle_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupMuscle(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py deleted file mode 100644 index c8da3462a..000000000 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'muscle' - self.sub_tissue = 'FetalMuscle' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "muscle", "hcl_FetalMuscle_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py b/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py deleted file mode 100644 index 032d37ce1..000000000 --- a/sfaira/data/human/muscle/human_muscle_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'muscle' - self.sub_tissue = 'AdultMuscle' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "muscle", "hcl_AdultMuscle_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/__init__.py b/sfaira/data/human/omentum/__init__.py deleted file mode 100644 index 330530786..000000000 --- a/sfaira/data/human/omentum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_omentum import DatasetGroupOmentum diff --git a/sfaira/data/human/omentum/external.py b/sfaira/data/human/omentum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/omentum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/omentum/human_omentum.py b/sfaira/data/human/omentum/human_omentum.py deleted file mode 100644 index f16e94458..000000000 --- a/sfaira/data/human/omentum/human_omentum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_omentum_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_omentum_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_omentum_2020_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupOmentum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupOmentum - self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py deleted file mode 100644 index fa911836d..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py deleted file mode 100644 index 3fb5c9d11..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py b/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py deleted file mode 100644 index 7cda691f9..000000000 --- a/sfaira/data/human/omentum/human_omentum_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'omentum' - self.sub_tissue = 'AdultOmentum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "omentum", "hcl_AdultOmentum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/pancreas/__init__.py b/sfaira/data/human/pancreas/__init__.py deleted file mode 100644 index 34f7a3229..000000000 --- a/sfaira/data/human/pancreas/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_pancreas import DatasetGroupPancreas diff --git a/sfaira/data/human/pancreas/external.py b/sfaira/data/human/pancreas/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/pancreas/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pancreas/human_pancreas.py b/sfaira/data/human/pancreas/human_pancreas.py deleted file mode 100644 index e609c44fe..000000000 --- a/sfaira/data/human/pancreas/human_pancreas.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_pancreas_2016_indrop_baron_001 import Dataset as Dataset0001 -from .human_pancreas_2016_smartseq2_segerstolpe_001 import Dataset as Dataset0002 -from .human_pancreas_2017_smartseq2_enge_001 import Dataset as Dataset0003 -from .human_pancreas_2020_microwell_han_001 import Dataset as Dataset0004 -from .human_pancreas_2020_microwell_han_002 import Dataset as Dataset0005 -from .human_pancreas_2020_microwell_han_003 import Dataset as Dataset0006 -from .human_pancreas_2020_microwell_han_004 import Dataset as Dataset0007 - - -class DatasetGroupPancreas(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py deleted file mode 100644 index 7afe0094b..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2016_indrop_baron_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" - self.download_website = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" - self.download_website_meta = None - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 't_cell': 'T cell', - 'quiescent_stellate': 'Quiescent Stellate cell', - 'mast': 'Mast cell', - 'delta': 'Delta cell', - 'beta': 'Beta cell', - 'endothelial': 'Endothelial cell', - 'macrophage': 'Macrophage', - 'epsilon': 'Epsilon cell', - 'activated_stellate': 'Activated Stellate cell', - 'acinar': 'Acinar cell', - 'alpha': 'Alpha cell', - 'ductal': 'Ductal cell', - 'schwann': 'Schwann cell', - 'gamma': 'Gamma cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Yanai" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cels.2016.08.011" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'inDrop' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py deleted file mode 100644 index 44d2d183e..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import pandas as pd - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data files which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" - self.download_website = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" - self.download_website_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'alpha cell': 'Alpha cell', - 'ductal cell': 'Ductal cell', - 'beta cell': 'Beta cell', - 'gamma cell': 'Gamma cell', - 'acinar cell': 'Acinar cell', - 'delta cell': 'Delta cell', - 'PSC cell': 'PSC cell', - 'unclassified endocrine cell': 'Unclassified endocrine cell', - 'co-expression cell': 'Co-expression cell', - 'endothelial cell': 'Endothelial cell', - 'epsilon cell': 'Epsilon cell', - 'mast cell': 'Mast cell', - 'MHC class II cell': 'MHC class II cell', - 'unclassified cell': 'Unknown', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") - ] - df = pd.read_csv(fn[0], sep='\t') - df.index = df.index.get_level_values(0) - df = df.drop('#samples', axis=1) - df = df.T.iloc[:, :26178] - self.adata = anndata.AnnData(df) - self.adata.obs = pd.read_csv(fn[1], sep='\t').set_index('Source Name').loc[self.adata.obs.index] - # filter observations which are not cells (empty wells, low quality cells etc.) - self.adata = self.adata[self.adata.obs['Characteristics[cell type]'] != 'not applicable'].copy() - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Sandberg" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2016 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2016.08.020" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = [True if line == 'normal' else False for line in self.adata.obs['Characteristics[disease]']] - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs['Characteristics[disease]'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact].cat.rename_categories({'normal':'healthy', 'type II diabetes mellitus':'type II diabetes mellitus'}) - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Characteristics[cell type]'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py deleted file mode 100644 index 6bdd8965e..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2017_smartseq2_enge_001.py +++ /dev/null @@ -1,147 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import tarfile -import gzip -from io import StringIO -import anndata as ad -import pandas as pd -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader supports reading of the downloaded raw data file if `load_raw=True` is passed to self.load() - To download the datafile required by this dataloader, use the link provided as the `download_website` and - `download_website_meta` attributes of this class. For (up to 100-fold faster) repeated data loading, please pass - `load_raw=False` when calling the self.load() method. For this, you need to preprocess the raw files as below and - place the resulting h5ad file in the data folder of this organ: - - import tarfile - import os - import gzip - from io import StringIO - import anndata as ad - import pandas as pd - import scipy.sparse - dfs = [] - with tarfile.open("GSE81547_RAW.tar") as tar: - for member in tar.getmembers(): - d = pd.read_csv(tar.extractfile(member), compression='gzip', header=None, sep='\t', index_col=0, names=[member.name.split("_")[0]]) - dfs.append(d) - adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - adata.X = scipy.sparse.csc_matrix(adata.X) - with gzip.open('GSE81547_series_matrix.txt.gz') as f: - file_content = [i.decode("utf-8") for i in f.readlines()] - inputstring = '' - for line in file_content: - if '"ID_REF"' in line: - inputstring += line - if '!Sample_title' in line: - inputstring += line[1:] - if '!Sample_characteristics_ch1\t"inferred_cell_type: alpha' in line: - inputstring += line[1:] - data = StringIO(inputstring) - d = pd.read_csv(data, sep='\t').T - d.columns=d.iloc[0] - d.drop('Sample_title', inplace=True) - d = d.reset_index().set_index('ID_REF') - d.columns.name = None - d.index.name = None - adata.obs['celltype'] = [d.loc[i]['Sample_characteristics_ch1'].split(": ")[1] for i in adata.obs.index] - adata.obs['patient'] = ["_".join(d.loc[i]['index'].split('_')[:2]) for i in adata.obs.index] - adata.write('GSE81547.h5ad') - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" - self.download_website = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" - self.download_website_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" - self.organ = "pancreas" - self.sub_tissue = "islet of Langerhans" - self.annotated = True - - self.class_maps = { - "0": { - 'alpha': 'Alpha cell', - 'acinar': 'Acinar cell', - 'ductal': 'Ductal cell', - 'beta': 'Beta cell', - 'unsure': 'Unknown', - 'delta': 'Delta cell', - 'mesenchymal': 'Mesenchymal Cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), - os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") - ] - dfs = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - d = pd.read_csv(tar.extractfile(member), compression='gzip', header=None, sep='\t', index_col=0, - names=[member.name.split("_")[0]]) - dfs.append(d) - self.adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - with gzip.open(fn[1]) as f: - file_content = [i.decode("utf-8") for i in f.readlines()] - inputstring = '' - for line in file_content: - if '"ID_REF"' in line: - inputstring += line - if '!Sample_title' in line: - inputstring += line[1:] - if '!Sample_characteristics_ch1\t"inferred_cell_type: alpha' in line: - inputstring += line[1:] - data = StringIO(inputstring) - d = pd.read_csv(data, sep='\t').T - d.columns = d.iloc[0] - d.drop('Sample_title', inplace=True) - d = d.reset_index().set_index('ID_REF') - d.columns.name = None - d.index.name = None - self.adata.obs['celltype'] = [d.loc[i]['Sample_characteristics_ch1'].split(": ")[1] for i in self.adata.obs.index] - self.adata.obs['patient'] = ["_".join(d.loc[i]['index'].split('_')[:2]) for i in self.adata.obs.index] - - else: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "GSE81547.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2017 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2017.09.004" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = 'Smartseq2' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['celltype'] - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py deleted file mode 100644 index bcc01b053..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_001.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'AdultPancreas' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_AdultPancreas_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py deleted file mode 100644 index 22ff8326d..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_002.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py deleted file mode 100644 index 7bd2e3004..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_003.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py b/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py deleted file mode 100644 index c218c072c..000000000 --- a/sfaira/data/human/pancreas/human_pancreas_2020_microwell_han_004.py +++ /dev/null @@ -1,99 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'Pancreas' - self.sub_tissue = 'FetalPancreas' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Neuron', - 'Fetal acinar cell': 'Acinar cell', - 'Fetal endocrine cell': 'Endocrine cell', - 'Fetal enterocyte ': 'Enterocyte', - 'Fetal epithelial progenitor': 'Epithelial progenitor', - 'Fetal fibroblast': 'Fibroblast', - 'Fetal mesenchymal progenitor': 'Mesenchymal Cell', - 'Fetal neuron': 'Neuron', - 'Fetal skeletal muscle cell': 'Skeletal muscle cell', - 'Fetal stromal cell': 'Stromal cell', - 'Fibroblast': 'Fibroblast', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Immature sertoli cell (Pre-Sertoli cell)': 'Immature sertoli cell (Pre-Sertoli cell)', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Pancreas exocrine cell': 'Pancreas exocrine cell', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'T cell', - 'Proximal tubule progenitor': 'Proximal tubule progenitor', - 'Sinusoidal endothelial cell': 'Endothelial cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "hcl_FetalPancreas_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/placenta/__init__.py b/sfaira/data/human/placenta/__init__.py deleted file mode 100644 index 5ea45fa0a..000000000 --- a/sfaira/data/human/placenta/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_placenta import DatasetGroupPlacenta diff --git a/sfaira/data/human/placenta/external.py b/sfaira/data/human/placenta/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/placenta/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/placenta/human_placenta.py b/sfaira/data/human/placenta/human_placenta.py deleted file mode 100644 index 4700e8083..000000000 --- a/sfaira/data/human/placenta/human_placenta.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_placenta_2018_smartseq2_ventotormo_001 import Dataset as Dataset0001 -from .human_placenta_2018_10x_ventotormo_001 import Dataset as Dataset0002 -from .human_placenta_2020_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupPlacenta(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py deleted file mode 100644 index e5c833dac..000000000 --- a/sfaira/data/human/placenta/human_placenta_2018_10x_ventotormo_001.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import anndata - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2018_10x_ventotormo_10.1038/s41586-018-0698-6" - self.download_website = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.1.zip' - self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip' - self.organ = "placenta" - self.sub_tissue = "placenta, decidua, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'DC1': 'Dendritic Cells 1', - 'DC2': 'Dendritic Cells 2', - 'EVT': 'Extravillous Trophoblasts', - 'Endo (f)': 'Endothelial Cells f', - 'Endo (m)': 'Endothelial Cells m', - 'Endo L': 'Endothelial Cells L', - 'Epi1': 'Epithelial Glandular Cells 1', - 'Epi2': 'Epithelial Glandular Cells 2', - 'Granulocytes': 'Granulocytes', - 'HB': 'Hofbauer Cells', - 'ILC3': 'ILC3', - 'MO': 'Monocyte', - 'NK CD16+': 'NK Cells CD16+', - 'NK CD16-': 'NK Cells CD16-', - 'Plasma': 'B cell (Plasmocyte)', - 'SCT': 'Syncytiotrophoblasts', - 'Tcells': 'T cell', - 'VCT': 'Villous Cytotrophoblasts', - 'dM1': 'Decidual Macrophages 1', - 'dM2': 'Decidual Macrophages 2', - 'dM3': 'Decidual Macrophages 3', - 'dNK p': 'Decidual NK Cells p', - 'dNK1': 'Decidual NK Cells 1', - 'dNK2': 'Decidual NK Cells 2', - 'dNK3': 'Decidual NK Cells 3', - 'dP1': 'Perivascular Cells 1', - 'dP2': 'Perivascular Cells 2', - 'dS1': 'Decidual Stromal Cells 1', - 'dS2': 'Decidual Stromal Cells 2', - 'dS3': 'Decidual Stromal Cells 3', - 'fFB1': 'Fibroblasts 1', - 'fFB2': 'Fibroblasts 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) - df = pd.read_csv(fn[1], sep='\t') - for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] - self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() - self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - - self.adata = self.adata[:, ~self.adata.var.index.isin( - ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py deleted file mode 100644 index 8de479d29..000000000 --- a/sfaira/data/human/placenta/human_placenta_2018_smartseq2_ventotormo_001.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import Union -from .external import DatasetBase -import pandas as pd -import anndata - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` and - `download_website_meta` attributes of this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2018_smartseq2_ventotormo_10.1038/s41586-018-0698-6" - self.download_website = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.1.zip' - self.download_website_meta = 'https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip' - self.organ = "placenta" - self.sub_tissue = "placenta, decidua, blood" - self.annotated = True - - self.class_maps = { - "0": { - 'DC1': 'Dendritic Cells 1', - 'DC2': 'Dendritic Cells 2', - 'EVT': 'Extravillous Trophoblasts', - 'Endo (f)': 'Endothelial Cells f', - 'Endo (m)': 'Endothelial Cells m', - 'Endo L': 'Endothelial Cells L', - 'Epi1': 'Epithelial Glandular Cells 1', - 'Epi2': 'Epithelial Glandular Cells 2', - 'Granulocytes': 'Granulocytes', - 'HB': 'Hofbauer Cells', - 'ILC3': 'ILC3', - 'MO': 'Monocyte', - 'NK CD16+': 'NK Cells CD16+', - 'NK CD16-': 'NK Cells CD16-', - 'Plasma': 'B cell (Plasmocyte)', - 'SCT': 'Syncytiotrophoblasts', - 'Tcells': 'T cell', - 'VCT': 'Villous Cytotrophoblasts', - 'dM1': 'Decidual Macrophages 1', - 'dM2': 'Decidual Macrophages 2', - 'dM3': 'Decidual Macrophages 3', - 'dNK p': 'Decidual NK Cells p', - 'dNK1': 'Decidual NK Cells 1', - 'dNK2': 'Decidual NK Cells 2', - 'dNK3': 'Decidual NK Cells 3', - 'dP1': 'Perivascular Cells 1', - 'dP2': 'Perivascular Cells 2', - 'dS1': 'Decidual Stromal Cells 1', - 'dS2': 'Decidual Stromal Cells 2', - 'dS3': 'Decidual Stromal Cells 3', - 'fFB1': 'Fibroblasts 1', - 'fFB2': 'Fibroblasts 2', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep='\t', index_col='Gene').T) - df = pd.read_csv(fn[1], sep='\t') - for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Teichmann' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-018-0698-6' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "Smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs = self.adata.obs.rename({'location': 'organ'}, axis='columns') - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['annotation'] - self.adata.obs["subtissue"] = self.adata.obs["organ"].copy() - self.adata.obs["final_cluster"] = self.adata.obs['final_cluster'].astype('category') - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self.adata.var['ensembl'] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var['names'] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop('index', axis=1) - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - - self.adata = self.adata[:, ~self.adata.var.index.isin( - ['', '-1', '-10', '-11', '-2', '-3', '-4', '-5', '-6', '-7', '-8', '-9', 'A.2', 'A.3'])].copy() diff --git a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py b/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py deleted file mode 100644 index a87c4a0a8..000000000 --- a/sfaira/data/human/placenta/human_placenta_2020_microwell_han_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Placenta' - self.sub_tissue = 'Placenta' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Fibroblast': 'Fibroblast', - 'Macrophage': 'Macrophage', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Stromal cell': 'Stromal cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Endothelial cell': 'Endothelial cell', - 'T cell': 'T cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Intermediated cell': 'Intermediated cell', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Fetal neuron': 'Fetal neuron', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'hESC': 'hESC', - 'Basal cell': 'Basal cell', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell (endothelial to mesenchymal transition)', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'M2 Macrophage': 'M2 Macrophage', - 'Myeloid cell': 'Myeloid cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "placenta", "hcl_Placenta_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/pleura/__init__.py b/sfaira/data/human/pleura/__init__.py deleted file mode 100644 index fbfcb922f..000000000 --- a/sfaira/data/human/pleura/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_pleura import DatasetGroupPleura diff --git a/sfaira/data/human/pleura/external.py b/sfaira/data/human/pleura/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/pleura/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/pleura/human_pleura.py b/sfaira/data/human/pleura/human_pleura.py deleted file mode 100644 index da4bba12c..000000000 --- a/sfaira/data/human/pleura/human_pleura.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_pleura_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupPleura(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPleura - self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py b/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py deleted file mode 100644 index 80e79ecd1..000000000 --- a/sfaira/data/human/pleura/human_pleura_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'pleura' - self.sub_tissue = 'AdultPleura' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "pleura", "hcl_AdultPleura_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/prostate/__init__.py b/sfaira/data/human/prostate/__init__.py deleted file mode 100644 index 34ef08dc6..000000000 --- a/sfaira/data/human/prostate/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_prostate import DatasetGroupProstate diff --git a/sfaira/data/human/prostate/external.py b/sfaira/data/human/prostate/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/prostate/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/prostate/human_prostate.py b/sfaira/data/human/prostate/human_prostate.py deleted file mode 100644 index 71f2a1991..000000000 --- a/sfaira/data/human/prostate/human_prostate.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_prostate_2018_10x_henry_001 import Dataset as Dataset0001 -from .human_prostate_2020_microwell_han_001 import Dataset as Dataset0002 - - -class DatasetGroupProstate(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py b/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py deleted file mode 100644 index a0d2ed360..000000000 --- a/sfaira/data/human/prostate/human_prostate_2018_10x_henry_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" - self.download_website = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" - self.download_website_meta = None - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Basal': 'Basal cell', - 'Hillock': 'Hillock', - 'Luminal': 'Luminal', - 'Endothelia': 'Endothelial cell', - 'Club': 'Club', - 'Fibroblast': 'Fibroblast', - 'Smooth muscle': 'Smooth muscle cell', - 'Leukocytes': 'Leukocytes', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Strand" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2018 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.celrep.2018.11.086" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py b/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py deleted file mode 100644 index ef194dee4..000000000 --- a/sfaira/data/human/prostate/human_prostate_2020_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'prostate' - self.sub_tissue = 'AdultProstate' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Basal cell': 'Basal cell', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell', - 'Endothelial cell (endothelial to mesenchymal transition)': 'Endothelial cell', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell (intermediated)': 'Epithelial cell (intermediated)', - 'Fasciculata cell': 'Fasciculata cell', - 'Fetal enterocyte': 'Fetal enterocyte', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Gastric endocrine cell': 'Gastric endocrine cell', - 'Goblet cell': 'Goblet cell', - 'Macrophage': 'Macrophage', - 'Monocyte': 'Monocyte', - 'Primordial germ cell': 'Primordial germ cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stratified epithelial cell': 'Stratified epithelial cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "prostate", "hcl_AdultProstate_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/rectum/__init__.py b/sfaira/data/human/rectum/__init__.py deleted file mode 100644 index a341faa2e..000000000 --- a/sfaira/data/human/rectum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_rectum import DatasetGroupRectum diff --git a/sfaira/data/human/rectum/external.py b/sfaira/data/human/rectum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/rectum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rectum/human_rectum.py b/sfaira/data/human/rectum/human_rectum.py deleted file mode 100644 index bc246d917..000000000 --- a/sfaira/data/human/rectum/human_rectum.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_rectum_2019_10x_wang_001 import Dataset as Dataset0001 -from .human_rectum_2020_microwell_han_001 import Dataset as Dataset0002 - - -class DatasetGroupRectum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRectum - self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py b/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py deleted file mode 100644 index 8ef1d79b6..000000000 --- a/sfaira/data/human/rectum/human_rectum_2019_10x_wang_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rectum_2019_10x_wang_001_10.1084/jem.20191130" - self.download_website = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" - self.organ = "rectum" - self.sub_tissue = "rectum" - self.annotated = True - - self.class_maps = { - "0": { - 'Progenitor': 'Enterocyte progenitor', - 'Goblet': 'Goblet', - 'Enterocyte': 'Enterocyte', - 'Paneth-like': 'Paneth-like', - 'Stem Cell': 'Stem Cell', - 'TA': 'TA', - 'Enteriendocrine': 'Enteroendocrine', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Chen" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1084/jem.20191130" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['CellType'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py b/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py deleted file mode 100644 index 702e630a7..000000000 --- a/sfaira/data/human/rectum/human_rectum_2020_microwell_han_001.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'rectum' - self.sub_tissue = 'AdultRectum' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell': 'B cell', - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Enterocyte': 'Enterocyte', - 'Enterocyte progenitor': 'Enterocyte progenitor', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rectum", "hcl_AdultRectum_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/rib/__init__.py b/sfaira/data/human/rib/__init__.py deleted file mode 100644 index e648dcc40..000000000 --- a/sfaira/data/human/rib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_rib import DatasetGroupRib diff --git a/sfaira/data/human/rib/external.py b/sfaira/data/human/rib/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/rib/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/rib/human_rib.py b/sfaira/data/human/rib/human_rib.py deleted file mode 100644 index 1a5481a0b..000000000 --- a/sfaira/data/human/rib/human_rib.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_rib_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_rib_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupRib(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py deleted file mode 100644 index c2d3a7c5b..000000000 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'rib' - self.sub_tissue = 'FetalRib' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py b/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py deleted file mode 100644 index a909d5890..000000000 --- a/sfaira/data/human/rib/human_rib_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'rib' - self.sub_tissue = 'FetalRib' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "rib", "hcl_FetalRib_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/skin/__init__.py b/sfaira/data/human/skin/__init__.py deleted file mode 100644 index 78ccff527..000000000 --- a/sfaira/data/human/skin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_skin import DatasetGroupSkin diff --git a/sfaira/data/human/skin/external.py b/sfaira/data/human/skin/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/skin/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/skin/human_skin.py b/sfaira/data/human/skin/human_skin.py deleted file mode 100644 index db470536f..000000000 --- a/sfaira/data/human/skin/human_skin.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_skin_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_skin_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupSkin(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py deleted file mode 100644 index 1e17922c3..000000000 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_001.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'skin' - self.sub_tissue = 'FetalSkin' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Kidney intercalated cell': 'Kidney intercalated cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py b/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py deleted file mode 100644 index f05d0e8e8..000000000 --- a/sfaira/data/human/skin/human_skin_2020_microwell_han_002.py +++ /dev/null @@ -1,91 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'skin' - self.sub_tissue = 'FetalSkin' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'Basal cell': 'Basal cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Endothelial cell': 'Endothelial cell', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'Epithelial cell': 'Epithelial cell', - 'Erythroid cell': 'Erythroid cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Fetal Neuron': 'Fetal Neuron', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Fetal fibroblast': 'Fetal fibroblast', - 'Fetal mesenchymal progenitor': 'Fetal mesenchymal progenitor', - 'Fetal skeletal muscle cell': 'Fetal skeletal muscle cell', - 'Fetal stromal cell': 'Fetal stromal cell', - 'Fibroblast': 'Fibroblast', - 'Kidney intercalated cell': 'Kidney intercalated cell', - 'Macrophage': 'Macrophage', - 'Mast cell': 'Mast cell', - 'Monocyte': 'Monocyte', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Primordial germ cell': 'Primordial germ cell', - 'Proliferating T cell': 'Proliferating T cell', - 'Smooth muscle cell': 'Smooth muscle cell', - 'Stromal cell': 'Stromal cell', - 'T cell': 'T cell', - 'hESC': 'hESC', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "skin", "hcl_FetalSkin_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/spinalcord/__init__.py b/sfaira/data/human/spinalcord/__init__.py deleted file mode 100644 index 449651e79..000000000 --- a/sfaira/data/human/spinalcord/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_spinalcord import DatasetGroupSpinalcord diff --git a/sfaira/data/human/spinalcord/external.py b/sfaira/data/human/spinalcord/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/spinalcord/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spinalcord/human_spinalcord.py b/sfaira/data/human/spinalcord/human_spinalcord.py deleted file mode 100644 index 386ee4d15..000000000 --- a/sfaira/data/human/spinalcord/human_spinalcord.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_spinalcord_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupSpinalcord(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpinalcord - self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py deleted file mode 100644 index afa0de19e..000000000 --- a/sfaira/data/human/spinalcord/human_spinalcord_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'spinalcord' - self.sub_tissue = 'FetalSpinalCord' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spinalcord", "hcl_FetalSpinalCord_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/spleen/__init__.py b/sfaira/data/human/spleen/__init__.py deleted file mode 100644 index 88890eff3..000000000 --- a/sfaira/data/human/spleen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_spleen import DatasetGroupSpleen diff --git a/sfaira/data/human/spleen/external.py b/sfaira/data/human/spleen/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/spleen/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/spleen/human_spleen.py b/sfaira/data/human/spleen/human_spleen.py deleted file mode 100644 index 5a6f95804..000000000 --- a/sfaira/data/human/spleen/human_spleen.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_spleen_2019_10x_madissoon_001 import Dataset as Dataset0001 -from .human_spleen_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_spleen_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupSpleen(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py deleted file mode 100644 index ab6dc0b09..000000000 --- a/sfaira/data/human/spleen/human_spleen_2019_10x_madissoon_001.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import scipy.sparse - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2019_10x_madissoon_001_10.1101/741405" - self.download_website = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" - self.download_website_meta = None - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": { - "B_Hypermutation": "B_Hypermutation", - "B_T_doublet": "B_T_doublet", - "B_follicular": "B_follicular", - "B_mantle": "B_mantle", - "CD34_progenitor": "CD34_progenitor", - "DC_1": "DC_1", - "DC_2": "DC_2", - "DC_activated": "DC_activated", - "DC_plasmacytoid": "DC_plasmacytoid", - "ILC": "ILC", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "NK_CD160pos": "NK_CD160pos", - "NK_FCGR3Apos": "NK_FCGR3Apos", - "NK_dividing": "NK_dividing", - "Plasma_IgG": "Plasma_IgG", - "Plasma_IgM": "Plasma_IgM", - "Plasmablast": "Plasmablast", - "Platelet": "Platelet", - "T_CD4_conv": "T_CD4_conv", - "T_CD4_fh": "T_CD4_fh", - "T_CD4_naive": "T_CD4_naive", - "T_CD4_reg": "T_CD4_reg", - "T_CD8_CTL": "T_CD8_CTL", - "T_CD8_MAIT": "T_CD8_MAIT", - "T_CD8_activated": "T_CD8_activated", - "T_CD8_gd": "T_CD8_gd", - "T_cell_dividing": "Proliferating T cell", - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs['n_counts'].values[:, None]))\ - .multiply(1/10000) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Meyer" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2019 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/741405" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Celltypes'] - self.set_unkown_class_id(ids=["Unknown"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col='gene_ids-HCATisStab7463846', - new_index=ADATA_IDS_SFAIRA.gene_id_ensembl) diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py deleted file mode 100644 index dbcc0eb87..000000000 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'Spleen' - self.sub_tissue = 'AdultSpleen' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Erythroid cell': 'Erythroid cell', - 'Monocyte': 'Monocyte', - 'Endothelial cell': 'Endothelial cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Proliferating T cell': 'Proliferating T cell', - 'Fibroblast': 'Fibroblast', - 'Stromal cell': 'Stromal cell', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Mast cell': 'Mast cell', - 'Smooth muscle cell': 'Smooth muscle cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleenParenchyma_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py b/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py deleted file mode 100644 index 4c44116aa..000000000 --- a/sfaira/data/human/spleen/human_spleen_2020_microwell_han_002.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'Spleen' - self.sub_tissue = 'AdultSpleen' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'B cell (Plasmocyte)': 'B cell (Plasmocyte)', - 'Neutrophil': 'Neutrophil', - 'Endothelial cell (APC)': 'Endothelial cell (APC)', - 'B cell': 'B cell', - 'Macrophage': 'Macrophage', - 'T cell': 'T cell', - 'Erythroid progenitor cell (RP high)': 'Erythroid progenitor cell (RP high)', - 'Dendritic cell': 'Dendritic cell', - 'CB CD34+': 'CB CD34+', - 'Erythroid cell': 'Erythroid cell', - 'Monocyte': 'Monocyte', - 'Endothelial cell': 'Endothelial cell', - 'Sinusoidal endothelial cell': 'Sinusoidal endothelial cell', - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Proliferating T cell': 'Proliferating T cell', - 'Fibroblast': 'Fibroblast', - 'Stromal cell': 'Stromal cell', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Mast cell': 'Mast cell', - 'Smooth muscle cell': 'Smooth muscle cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "hcl_AdultSpleen_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") diff --git a/sfaira/data/human/stomach/__init__.py b/sfaira/data/human/stomach/__init__.py deleted file mode 100644 index 0ba1dc728..000000000 --- a/sfaira/data/human/stomach/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_stomach import DatasetGroupStomach diff --git a/sfaira/data/human/stomach/external.py b/sfaira/data/human/stomach/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/stomach/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/stomach/human_stomach.py b/sfaira/data/human/stomach/human_stomach.py deleted file mode 100644 index 537ea659a..000000000 --- a/sfaira/data/human/stomach/human_stomach.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_stomach_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_stomach_2020_microwell_han_002 import Dataset as Dataset0002 -from .human_stomach_2020_microwell_han_003 import Dataset as Dataset0003 -from .human_stomach_2020_microwell_han_004 import Dataset as Dataset0004 -from .human_stomach_2020_microwell_han_005 import Dataset as Dataset0005 -from .human_stomach_2020_microwell_han_006 import Dataset as Dataset0006 -from .human_stomach_2020_microwell_han_007 import Dataset as Dataset0007 -from .human_stomach_2020_microwell_han_008 import Dataset as Dataset0008 -from .human_stomach_2020_microwell_han_009 import Dataset as Dataset0009 -from .human_stomach_2020_microwell_han_010 import Dataset as Dataset0010 - - -class DatasetGroupStomach(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py deleted file mode 100644 index d9100c776..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py deleted file mode 100644 index b5e9fe7e0..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalStomach' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py deleted file mode 100644 index c1a2d80e0..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py deleted file mode 100644 index 5dc7e5944..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntetsine_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py deleted file mode 100644 index 9c779b667..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_005.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalStomach' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalStomach_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py deleted file mode 100644 index 90f12d3c8..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_006.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py deleted file mode 100644 index c583b2bf7..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_007.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_5.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py deleted file mode 100644 index 041004ec7..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_008.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_3.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py deleted file mode 100644 index 935271988..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_009.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'AdultStomach' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_AdultStomach_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py b/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py deleted file mode 100644 index 5cc789fa6..000000000 --- a/sfaira/data/human/stomach/human_stomach_2020_microwell_han_010.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" - self.organ = 'stomach' - self.sub_tissue = 'FetalIntestine' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "stomach", "hcl_FetalIntestine_4.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thymus/__init__.py b/sfaira/data/human/thymus/__init__.py deleted file mode 100644 index 1d0720e38..000000000 --- a/sfaira/data/human/thymus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_thymus import DatasetGroupThymus diff --git a/sfaira/data/human/thymus/external.py b/sfaira/data/human/thymus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/thymus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thymus/human_thymus.py b/sfaira/data/human/thymus/human_thymus.py deleted file mode 100644 index c8d5da0ad..000000000 --- a/sfaira/data/human/thymus/human_thymus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_thymus_2020_10x_park_001 import Dataset as Dataset0001 -from .human_thymus_2020_microwell_han_001 import Dataset as Dataset0002 -from .human_thymus_2020_microwell_han_002 import Dataset as Dataset0003 - - -class DatasetGroupThymus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py b/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py deleted file mode 100644 index 295656f14..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_10x_park_001.py +++ /dev/null @@ -1,108 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase -import numpy as np - - -class Dataset(DatasetBase): - """ - This data loader directly processes the raw data file which can be obtained from the `download_website` attribute of - this class. - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" - self.download_website = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" - self.download_website_meta = None - self.organ = "thymus" - self.sub_tissue = "fetal thymus" - self.annotated = True - - self.class_maps = { - "0": { - 'B_memory': 'B_memory', - 'B_naive': 'B_naive', - 'B_plasma': 'B_plasma', - 'B_pro/pre': 'B_pro/pre', - 'CD4+T': 'CD4+T', - 'CD4+Tmem': 'CD4+Tmem', - 'CD8+T': 'CD8+T', - 'CD8+Tmem': 'CD8+Tmem', - 'CD8αα': 'CD8αα', - 'DC1': 'DC1', - 'DC2': 'DC2', - 'DN': 'DN', - 'DP': 'DP', - 'ETP': 'ETP', - 'Endo': 'Endo', - 'Epi_GCM2': 'Epi_GCM2', - 'Ery': 'Ery', - 'Fb_1': 'Fb_1', - 'Fb_2': 'Fb_2', - 'Fb_cycling': 'Fb_cycling', - 'ILC3': 'ILC3', - 'Lymph': 'Lymph', - 'Mac': 'Mac', - 'Mast': 'Mast', - 'Mgk': 'Mgk', - 'Mono': 'Mono', - 'NK': 'NK', - 'NKT': 'NKT', - 'NMP': 'NMP', - 'T(agonist)': 'T(agonist)', - 'TEC(myo)': 'TEC(myo)', - 'TEC(neuro)': 'TEC(neuro)', - 'Treg': 'Treg', - 'VSMC': 'VSMC', - 'aDC': 'aDC', - 'cTEC': 'cTEC', - 'mTEC(I)': 'mTEC(I)', - 'mTEC(II)': 'mTEC(II)', - 'mTEC(III)': 'mTEC(III)', - 'mTEC(IV)': 'mTEC(IV)', - 'mcTEC': 'mcTEC', - 'pDC': 'pDC', - 'αβT(entry)': 'alpha_beta_T(entry)', - 'γδT': 'gamma_delta_T', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Teichmann" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1126/science.aay3224" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = '10x' - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs['Anno_level_fig1'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = 'healthy' - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py deleted file mode 100644 index e50d8bf2d..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'thymus' - self.sub_tissue = 'FetalThymus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Erythroid cell': 'Ery', - 'Erythroid progenitor cell (RP high)': 'Ery', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Macrophage': 'Mac', - 'Monocyte': 'Mono', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'Proliferating T cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py b/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py deleted file mode 100644 index b0a97e9de..000000000 --- a/sfaira/data/human/thymus/human_thymus_2020_microwell_han_002.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'thymus' - self.sub_tissue = 'FetalThymus' - self.dev_stage = 'Fetus' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": { - 'Antigen presenting cell (RPS high)': 'Antigen presenting cell (RPS high)', - 'B cell': 'B cell', - 'CB CD34+': 'CB CD34+', - 'Dendritic cell': 'Dendritic cell', - 'Erythroid cell': 'Ery', - 'Erythroid progenitor cell (RP high)': 'Ery', - 'Fetal epithelial progenitor': 'Fetal epithelial progenitor', - 'Macrophage': 'Mac', - 'Monocyte': 'Mono', - 'Neutrophil': 'Neutrophil', - 'Neutrophil (RPS high)': 'Neutrophil (RPS high)', - 'Proliferating T cell': 'Proliferating T cell', - 'T cell': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "hcl_FetalThymus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thyroid/__init__.py b/sfaira/data/human/thyroid/__init__.py deleted file mode 100644 index 5a20c5cab..000000000 --- a/sfaira/data/human/thyroid/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_thyroid import DatasetGroupThyroid diff --git a/sfaira/data/human/thyroid/external.py b/sfaira/data/human/thyroid/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/thyroid/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/thyroid/human_thyroid.py b/sfaira/data/human/thyroid/human_thyroid.py deleted file mode 100644 index a30ad5c92..000000000 --- a/sfaira/data/human/thyroid/human_thyroid.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_thyroid_2020_microwell_han_001 import Dataset as Dataset0001 -from .human_thyroid_2020_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupThyroid(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThyroid - self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py deleted file mode 100644 index 65472f513..000000000 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'thyroid' - self.sub_tissue = 'AdultThyroid' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py b/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py deleted file mode 100644 index 69cc04769..000000000 --- a/sfaira/data/human/thyroid/human_thyroid_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = 'thyroid' - self.sub_tissue = 'AdultThyroid' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "thyroid", "hcl_AdultThyroid_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/trachea/__init__.py b/sfaira/data/human/trachea/__init__.py deleted file mode 100644 index 4778fdb5e..000000000 --- a/sfaira/data/human/trachea/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_trachea import DatasetGroupTrachea diff --git a/sfaira/data/human/trachea/external.py b/sfaira/data/human/trachea/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/trachea/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/trachea/human_trachea.py b/sfaira/data/human/trachea/human_trachea.py deleted file mode 100644 index f6b9578c2..000000000 --- a/sfaira/data/human/trachea/human_trachea.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_trachea_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupTrachea(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py b/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py deleted file mode 100644 index 25af6f305..000000000 --- a/sfaira/data/human/trachea/human_trachea_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'trachea' - self.sub_tissue = 'AdultTrachea' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "trachea", "hcl_AdultTrachea_2.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/ureter/__init__.py b/sfaira/data/human/ureter/__init__.py deleted file mode 100644 index 0d19170e9..000000000 --- a/sfaira/data/human/ureter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_ureter import DatasetGroupUreter diff --git a/sfaira/data/human/ureter/external.py b/sfaira/data/human/ureter/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/ureter/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/ureter/human_ureter.py b/sfaira/data/human/ureter/human_ureter.py deleted file mode 100644 index 7d3615eff..000000000 --- a/sfaira/data/human/ureter/human_ureter.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_ureter_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupUreter(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUreter - self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py b/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py deleted file mode 100644 index 4a9af9e70..000000000 --- a/sfaira/data/human/ureter/human_ureter_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'ureter' - self.sub_tissue = 'AdultUreter' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "ureter", "hcl_AdultUreter_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/human/uterus/__init__.py b/sfaira/data/human/uterus/__init__.py deleted file mode 100644 index 3c82a964f..000000000 --- a/sfaira/data/human/uterus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .human_uterus import DatasetGroupUterus diff --git a/sfaira/data/human/uterus/external.py b/sfaira/data/human/uterus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/human/uterus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/human/uterus/human_uterus.py b/sfaira/data/human/uterus/human_uterus.py deleted file mode 100644 index 9feee712d..000000000 --- a/sfaira/data/human/uterus/human_uterus.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .human_uterus_2020_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupUterus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py b/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py deleted file mode 100644 index 35a9432e0..000000000 --- a/sfaira/data/human/uterus/human_uterus_2020_microwell_han_001.py +++ /dev/null @@ -1,62 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - - -class Dataset(DatasetBase): - """ - This is a dataloader for a the Human Cell Landscape dataset (Han et al. 2020. doi: 10.1038/s41586-020-2157-4). - In order to obtain the required preprocessed datafiles, please use the notebook provided in this repository under: - sfaira/data/download_scripts/get_and_preprocess_HumanCellLandscape.ipynb - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "human" - self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = 'uterus' - self.sub_tissue = 'AdultUterus' - self.dev_stage = 'Adult' - self.download_website = 'https://figshare.com/articles/HCL_DGE_Data/7235471' - self.download_website_meta = None - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - - if self._load_raw or not self._load_raw: - if fn is None: - fn = os.path.join(self.path, "human", "uterus", "hcl_AdultUterus_1.h5ad") - self.adata = anndata.read(fn) - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = 'Guo' - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = 2020 - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = '10.1038/s41586-020-2157-4' - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "human" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] = self.dev_stage - - self._convert_and_set_var_names(symbol_col="names", ensembl_col="ensembl") - diff --git a/sfaira/data/interactive/external.py b/sfaira/data/interactive/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/interactive/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 45e6b6d8a..77c1294ac 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -1,6 +1,7 @@ import anndata from typing import Union -from .external import DatasetBase + +from sfaira.data import DatasetBase class DatasetInteractive(DatasetBase): @@ -8,37 +9,71 @@ class DatasetInteractive(DatasetBase): def __init__( self, data: anndata.AnnData, - species: str, + organism: str, organ: str, gene_symbol_col: Union[str, None] = 'index', gene_ens_col: Union[str, None] = None, + obs_key_celltypes: Union[str, None] = None, class_maps: dict = {}, - dataset_id: str = "interactive", - **kwargs + dataset_id: str = "interactive_dataset", + path: Union[str, None] = ".", + meta_path: Union[str, None] = ".", + cache_path: Union[str, None] = ".", ): """ + Load data set into sfaira data format. - :param data: - :param species: - :param organ: - :param class_maps: - :param id: - :param kwargs: + :param data: Data set. + :param organism: Organism of data set. + :param organ: Organ of data set. + :param gene_symbol_col: Column name in .var which contains gene symbols. Set to "index" to use the index. + :param gene_ens_col: Column name in .var which contains ENSG symbols. Set to "index" to use the index. + :param obs_key_celltypes: .obs column name which contains cell type labels. + :param class_maps: Cell type class maps. + :param dataset_id: Identifer of data set. + :param path: + :param meta_path: + :param cache_path: """ - DatasetBase.__init__(self=self, path=None, meta_path=None, **kwargs) - self.adata = data - self.species = species + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path) self.id = dataset_id + + self.author = "interactive_dataset" + self.doi = "interactive_dataset" + + self.download = "." + self.download_meta = "." + + # self.age # not currently supported + # self.dev_stage # not currently supported + # self.ethnicity # not currently supported + # self.healthy # not currently supported + # self.normalisation # not currently supported self.organ = organ + self.organism = organism + # self.protocol # not currently supported + # self.sex # not currently supported + # self.state_exact # not currently supported + # self.year # not currently supported - self.gene_symbol_col = gene_symbol_col - self.gene_ensg_col = gene_ens_col + self.obs_key_cellontology_original = obs_key_celltypes + + # self.obs_key_age # not currently supported + # self.obs_key_dev_stage # not currently supported + # self.obs_key_ethnicity # not currently supported + # self.obs_key_healthy # not currently supported + # self.obs_key_organ # not currently supported + # self.obs_key_organism # not currently supported + # self.obs_key_protocol # not currently supported + # self.obs_key_sex # not currently supported + # self.obs_key_state_exact # not currently supported + + self.var_symbol_col = gene_symbol_col + self.var_ensembl_col = gene_ens_col self.class_maps = class_maps + self.adata = data + def _load(self, fn=None): - self._convert_and_set_var_names( - symbol_col=self.gene_symbol_col, - ensembl_col=self.gene_ensg_col, - new_index='ensembl' - ) + pass diff --git a/sfaira/data/mouse/__init__.py b/sfaira/data/mouse/__init__.py deleted file mode 100644 index f1063a851..000000000 --- a/sfaira/data/mouse/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from .bladder import DatasetGroupBladder -from .brain import DatasetGroupBrain -from .diaphragm import DatasetGroupDiaphragm -from .adipose import DatasetGroupAdipose -from .heart import DatasetGroupHeart -from .kidney import DatasetGroupKidney -from .colon import DatasetGroupColon -from .muscle import DatasetGroupMuscle -from .liver import DatasetGroupLiver -from .lung import DatasetGroupLung -from .mammarygland import DatasetGroupMammaryGland -from .bone import DatasetGroupBone -from .femalegonad import DatasetGroupFemalegonad -from .pancreas import DatasetGroupPancreas -from .placenta import DatasetGroupPlacenta -from .blood import DatasetGroupBlood -from .prostate import DatasetGroupProstate -from .rib import DatasetGroupRib -from .ileum import DatasetGroupIleum -from .skin import DatasetGroupSkin -from .spleen import DatasetGroupSpleen -from .stomach import DatasetGroupStomach -from .malegonad import DatasetGroupMalegonad -from .thymus import DatasetGroupThymus -from .tongue import DatasetGroupTongue -from .trachea import DatasetGroupTrachea -from .uterus import DatasetGroupUterus diff --git a/sfaira/data/mouse/adipose/__init__.py b/sfaira/data/mouse/adipose/__init__.py deleted file mode 100644 index c23acef29..000000000 --- a/sfaira/data/mouse/adipose/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_adipose import DatasetGroupAdipose \ No newline at end of file diff --git a/sfaira/data/mouse/adipose/external.py b/sfaira/data/mouse/adipose/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/adipose/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/adipose/mouse_adipose.py b/sfaira/data/mouse/adipose/mouse_adipose.py deleted file mode 100644 index 3493d3d21..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 -from .mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 -from .mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 - - -class DatasetGroupAdipose(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py deleted file mode 100644 index 7c72f9c4e..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - self.adata.raw = None - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py deleted file mode 100644 index faad974eb..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py deleted file mode 100644 index 1e386e00c..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py deleted file mode 100644 index 782caa53a..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_003.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py deleted file mode 100644 index 656d4e504..000000000 --- a/sfaira/data/mouse/adipose/mouse_adipose_2019_smartseq2_pisco_004.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "adipose" - self.sub_tissue = "adipose" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/__init__.py b/sfaira/data/mouse/bladder/__init__.py deleted file mode 100644 index c868320bb..000000000 --- a/sfaira/data/mouse/bladder/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_bladder import DatasetGroupBladder \ No newline at end of file diff --git a/sfaira/data/mouse/bladder/external.py b/sfaira/data/mouse/bladder/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/bladder/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/bladder/mouse_bladder.py b/sfaira/data/mouse/bladder/mouse_bladder.py deleted file mode 100644 index 93d08570a..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_bladder_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_bladder_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_bladder_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupBladder(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py deleted file mode 100644 index 011fa3759..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2018_microwell_han_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": { - "Endothelial cell_Ly6c1 high(Bladder)": 'endothelial cell', - "Vascular endothelial cell(Bladder)": 'endothelial cell', - 'Urothelium(Bladder)': 'bladder urothelial cell', - 'Dendritic cell_Cd74 high(Bladder)': 'dendritic cell', - 'Dendritic cell_Lyz2 high(Bladder)': 'dendritic cell', - 'Macrophage_Pf4 high(Bladder)': 'macrophage', - 'NK cell(Bladder)': 'NK cell', - 'Basal epithelial cell(Bladder)': 'basal epithelial cell', - 'Epithelial cell_Upk3a high(Bladder)': 'epithelial cell', - 'Epithelial cell_Gm23935 high(Bladder)': 'epithelial cell', - 'Mesenchymal stromal cell(Bladder)': 'mesenchymal stromal cell', - 'Stromal cell_Dpt high(Bladder)': 'stromal cell', - 'Stromal cell_Car3 high(Bladder)': 'stromal cell', - 'Smooth muscle cell(Bladder)': 'smooth muscle cell', - 'Vascular smooth muscle progenitor cell(Bladder)': 'smooth muscle cell', - 'Umbrella cell(Bladder)': 'umbrella cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py deleted file mode 100644 index c1483afba..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py deleted file mode 100644 index a1e1f6500..000000000 --- a/sfaira/data/mouse/bladder/mouse_bladder_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,68 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bladder" - self.sub_tissue = "bladder" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/__init__.py b/sfaira/data/mouse/blood/__init__.py deleted file mode 100644 index 6b0e27f4f..000000000 --- a/sfaira/data/mouse/blood/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_blood import DatasetGroupBlood \ No newline at end of file diff --git a/sfaira/data/mouse/blood/external.py b/sfaira/data/mouse/blood/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/blood/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/blood/mouse_blood.py b/sfaira/data/mouse/blood/mouse_blood.py deleted file mode 100644 index e1dfaf511..000000000 --- a/sfaira/data/mouse/blood/mouse_blood.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 -from .mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 -from .mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 - - -class DatasetGroupBlood (DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py deleted file mode 100644 index aa373428b..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_001.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py deleted file mode 100644 index 9088bd83a..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_002.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py deleted file mode 100644 index fcd971a98..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_003.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py deleted file mode 100644 index 204613af0..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_004.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py b/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py deleted file mode 100644 index 33efff1d5..000000000 --- a/sfaira/data/mouse/blood/mouse_blood_2018_microwell_han_005.py +++ /dev/null @@ -1,85 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "blood" - self.sub_tissue = "blood" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igha high(Peripheral_Blood)': 'B cell', - 'B cell_Ly6d high(Peripheral_Blood)': 'B cell', - 'B cell_Rps27rt high(Peripheral_Blood)': 'B cell', - 'B cell_Vpreb3 high(Peripheral_Blood)': 'B cell', - 'Basophil_Prss34 high(Peripheral_Blood)': 'basophil', - 'Dendritic cell_Siglech high(Peripheral_Blood)': 'dendritic cell', - 'Erythroblast_Car2 high(Peripheral_Blood)': 'erythroblast', - 'Erythroblast_Hba-a2 high(Peripheral_Blood)': 'erythroblast', - 'Macrophage_Ace high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Flt-ps1 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_Pf4 high(Peripheral_Blood)': 'macrophage', - 'Macrophage_S100a4 high(Peripheral_Blood)': 'macrophage', - 'Monocyte_Elane high(Peripheral_Blood)': 'monocyte', - 'Monocyte_F13a1 high(Peripheral_Blood)': 'monocyte', - 'NK cell_Gzma high(Peripheral_Blood)': 'NK cell', - 'Neutrophil_Camp high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Il1b high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Ltf high(Peripheral_Blood)': 'neutrophil', - 'Neutrophil_Retnlg high(Peripheral_Blood)': 'neutrophil', - 'T cell_Gm14303 high(Peripheral_Blood)': 'T cell', - 'T cell_Trbc2 high(Peripheral_Blood)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/__init__.py b/sfaira/data/mouse/bone/__init__.py deleted file mode 100644 index 9b6ccd006..000000000 --- a/sfaira/data/mouse/bone/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_bone import DatasetGroupBone \ No newline at end of file diff --git a/sfaira/data/mouse/bone/external.py b/sfaira/data/mouse/bone/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/bone/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/bone/mouse_bone.py b/sfaira/data/mouse/bone/mouse_bone.py deleted file mode 100644 index ff7f50125..000000000 --- a/sfaira/data/mouse/bone/mouse_bone.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_bone_2018_microwell_001 import Dataset as Dataset0003 - - -class DatasetGroupBone(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py b/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py deleted file mode 100644 index f6e14e5cf..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2018_microwell_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Igkc high(Bone-Marrow)': 'naive B cell', - 'Dendritic cell_H2-Eb1 high(Bone-Marrow)': 'dendritic cell', - 'Dendritic cell_Siglech high(Bone-Marrow)': 'dendritic cell', - 'Macrophage_Ms4a6c high(Bone-Marrow)': 'macrophage', - 'Macrophage_S100a4 high(Bone-Marrow)': 'macrophage', - 'Erythroblast(Bone-Marrow)': 'erythroid progenitor', - 'Mast cell(Bone-Marrow)': 'mast cell', - 'Monocyte_Mif high(Bone-Marrow)': 'monocyte', - 'Monocyte_Prtn3 high(Bone-Marrow)': 'monocyte', - 'Neutrophil progenitor(Bone-Marrow)': 'neutrophil progenitor', - 'Neutrophil_Cebpe high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Fcnb high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Mmp8 high(Bone-Marrow)': 'neutrophil', - 'Neutrophil_Ngp high(Bone-Marrow)': 'neutrophil', - 'Hematopoietic stem progenitor cell(Bone-Marrow)': 'hematopoietic precursor cell', - 'Pre-pro B cell(Bone-Marrow)': 'early pro-B cell', - 'T cell_Ms4a4b high(Bone-Marrow)': 'CD4-positive, alpha-beta T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs['Annotation'] - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py deleted file mode 100644 index 0eaae87b9..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6e72e85db..000000000 --- a/sfaira/data/mouse/bone/mouse_bone_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "bone" - self.sub_tissue = "marrow" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["unknown", "nan-marrow-needs-subclustering"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/__init__.py b/sfaira/data/mouse/brain/__init__.py deleted file mode 100644 index 499b40bb0..000000000 --- a/sfaira/data/mouse/brain/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_brain import DatasetGroupBrain \ No newline at end of file diff --git a/sfaira/data/mouse/brain/external.py b/sfaira/data/mouse/brain/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/brain/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/brain/mouse_brain.py b/sfaira/data/mouse/brain/mouse_brain.py deleted file mode 100644 index e62393153..000000000 --- a/sfaira/data/mouse/brain/mouse_brain.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_brain_2019_smartseq2_pisco_001 import Dataset as Dataset0001 -from .mouse_brain_2019_smartseq2_pisco_002 import Dataset as Dataset0002 -from .mouse_brain_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_brain_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupBrain(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py deleted file mode 100644 index eda8e1088..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_001.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": { - 'Astroglial cell(Bergman glia)(Brain)': 'Bergmann glial cell', - 'Astrocyte_Atp1b2 high(Brain)': 'astrocyte', - 'Astrocyte_Mfe8 high(Brain)': 'astrocyte', - 'Astrocyte_Pla2g7 high(Brain)': 'astrocyte', - 'Granulocyte_Ngp high(Brain)': 'granulocyte', - 'Hypothalamic ependymal cell(Brain)': 'ependymal cell', - 'Macrophage_Klf2 high(Brain)': 'macrophage', - 'Macrophage_Lyz2 high(Brain)': 'macrophage', - 'Microglia(Brain)': 'microglial cell', - 'Myelinating oligodendrocyte(Brain)': 'oligodendrocyte', - 'Oligodendrocyte precursor cell(Brain)': 'oligodendrocyte precursor cell', - 'Neuron(Brain)': 'neuron', - 'Pan-GABAergic(Brain)': 'GABAergic cell', - 'Schwann cell(Brain)': 'schwann cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py b/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py deleted file mode 100644 index 31f7dafdc..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2018_microwell_han_002.py +++ /dev/null @@ -1,78 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": { - 'Astroglial cell(Bergman glia)(Brain)': 'Bergmann glial cell', - 'Astrocyte_Atp1b2 high(Brain)': 'astrocyte', - 'Astrocyte_Mfe8 high(Brain)': 'astrocyte', - 'Astrocyte_Pla2g7 high(Brain)': 'astrocyte', - 'Granulocyte_Ngp high(Brain)': 'granulocyte', - 'Hypothalamic ependymal cell(Brain)': 'ependymal cell', - 'Macrophage_Klf2 high(Brain)': 'macrophage', - 'Macrophage_Lyz2 high(Brain)': 'macrophage', - 'Microglia(Brain)': 'microglial cell', - 'Myelinating oligodendrocyte(Brain)': 'oligodendrocyte', - 'Oligodendrocyte precursor cell(Brain)': 'oligodendrocyte precursor cell', - 'Neuron(Brain)': 'neuron', - 'Pan-GABAergic(Brain)': 'GABAergic cell', - 'Schwann cell(Brain)': 'schwann cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py deleted file mode 100644 index 543ff2c65..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py deleted file mode 100644 index 16bed27d5..000000000 --- a/sfaira/data/mouse/brain/mouse_brain_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "brain" - self.sub_tissue = "brain" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan", "Il6 expressing cells"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/__init__.py b/sfaira/data/mouse/colon/__init__.py deleted file mode 100644 index 8e57ba03e..000000000 --- a/sfaira/data/mouse/colon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_colon import DatasetGroupColon \ No newline at end of file diff --git a/sfaira/data/mouse/colon/external.py b/sfaira/data/mouse/colon/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/colon/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/colon/mouse_colon.py b/sfaira/data/mouse/colon/mouse_colon.py deleted file mode 100644 index 3a64a819a..000000000 --- a/sfaira/data/mouse/colon/mouse_colon.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupColon(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py deleted file mode 100644 index e7f4077d6..000000000 --- a/sfaira/data/mouse/colon/mouse_colon_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py deleted file mode 100644 index f1a8d3d71..000000000 --- a/sfaira/data/mouse/colon/mouse_colon_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "colon" - self.sub_tissue = "colon" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ - self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/diaphragm/__init__.py b/sfaira/data/mouse/diaphragm/__init__.py deleted file mode 100644 index a68701d73..000000000 --- a/sfaira/data/mouse/diaphragm/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_diaphragm import DatasetGroupDiaphragm \ No newline at end of file diff --git a/sfaira/data/mouse/diaphragm/external.py b/sfaira/data/mouse/diaphragm/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/diaphragm/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm.py deleted file mode 100644 index 0a78a5bfc..000000000 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_diaphragm_2019_smartseq2_pisco_001 import Dataset as Dataset0001 - - -class DatasetGroupDiaphragm(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupDiaphragm - self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py deleted file mode 100644 index 061e65439..000000000 --- a/sfaira/data/mouse/diaphragm/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "diaphragm" - self.sub_tissue = "diaphragm" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/__init__.py b/sfaira/data/mouse/femalegonad/__init__.py deleted file mode 100644 index 6cca0c4d4..000000000 --- a/sfaira/data/mouse/femalegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_femalegonad import DatasetGroupFemalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/femalegonad/external.py b/sfaira/data/mouse/femalegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/femalegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad.py deleted file mode 100644 index e8b6fecf6..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupFemalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py deleted file mode 100644 index e69c4b146..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" - self.sub_tissue = "femalegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Cumulus cell_Car14 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Nupr1 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Ube2c high(Ovary)': 'cumulus cell', - 'Granulosa cell_Inhba high(Ovary)': 'granulosa cell', - 'Granulosa cell_Kctd14 high(Ovary)': 'granulosa cell', - 'Large luteal cell(Ovary)': 'large luteal cell', - 'Macrophage_Lyz2 high(Ovary)': 'macrophage', - 'Marcrophage_Cd74 high(Ovary)': 'macrophage', - 'Ovarian surface epithelium cell(Ovary)': 'epithelial cell of ovarian surface', - 'Ovarian vascular surface endothelium cell(Ovary)': 'endothelial cell of ovarian surface', - 'Small luteal cell(Ovary)': 'small luteal cell', - 'Stroma cell (Ovary)': 'stromal cell', - 'Thecal cell(Ovary)': 'thecal cell', - 'luteal cells(Ovary)': 'luteal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py deleted file mode 100644 index 776162f34..000000000 --- a/sfaira/data/mouse/femalegonad/mouse_femalegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" - self.sub_tissue = "femalegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Cumulus cell_Car14 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Nupr1 high(Ovary)': 'cumulus cell', - 'Cumulus cell_Ube2c high(Ovary)': 'cumulus cell', - 'Granulosa cell_Inhba high(Ovary)': 'granulosa cell', - 'Granulosa cell_Kctd14 high(Ovary)': 'granulosa cell', - 'Large luteal cell(Ovary)': 'large luteal cell', - 'Macrophage_Lyz2 high(Ovary)': 'macrophage', - 'Marcrophage_Cd74 high(Ovary)': 'macrophage', - 'Ovarian surface epithelium cell(Ovary)': 'epithelial cell of ovarian surface', - 'Ovarian vascular surface endothelium cell(Ovary)': 'endothelial cell of ovarian surface', - 'Small luteal cell(Ovary)': 'small luteal cell', - 'Stroma cell (Ovary)': 'stromal cell', - 'Thecal cell(Ovary)': 'thecal cell', - 'luteal cells(Ovary)': 'luteal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/__init__.py b/sfaira/data/mouse/heart/__init__.py deleted file mode 100644 index be8480688..000000000 --- a/sfaira/data/mouse/heart/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_heart import DatasetGroupHeart \ No newline at end of file diff --git a/sfaira/data/mouse/heart/external.py b/sfaira/data/mouse/heart/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/heart/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/heart/mouse_heart.py b/sfaira/data/mouse/heart/mouse_heart.py deleted file mode 100644 index 11b15636e..000000000 --- a/sfaira/data/mouse/heart/mouse_heart.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - - -from .mouse_heart_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_heart_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_heart_2019_smartseq2_pisco_002 import Dataset as Dataset0003 - - -class DatasetGroupHeart(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py deleted file mode 100644 index e10649e05..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_10x_pisco_001_10.1101/661728" - - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py deleted file mode 100644 index 721e03cee..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py deleted file mode 100644 index 53900692f..000000000 --- a/sfaira/data/mouse/heart/mouse_heart_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "heart" - self.sub_tissue = "heart" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") - elif self.source == "figshare": - raise ValueError("not defined") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/__init__.py b/sfaira/data/mouse/ileum/__init__.py deleted file mode 100644 index 89c13450a..000000000 --- a/sfaira/data/mouse/ileum/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_ileum import DatasetGroupIleum \ No newline at end of file diff --git a/sfaira/data/mouse/ileum/external.py b/sfaira/data/mouse/ileum/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/ileum/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/ileum/mouse_ileum.py b/sfaira/data/mouse/ileum/mouse_ileum.py deleted file mode 100644 index 0d01e26df..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupIleum(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py deleted file mode 100644 index 5363e094c..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_001.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py deleted file mode 100644 index c96944ea9..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_002.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py deleted file mode 100644 index b3e7390aa..000000000 --- a/sfaira/data/mouse/ileum/mouse_ileum_2018_microwell_han_003.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "ileum" - self.sub_tissue = "ileum" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Ighd high(Small-Intestine)': 'B cell', - 'B cell_Igkv12-46 high(Small-Intestine)': 'B cell', - 'B cell_Jchain high(Small-Intestine)': 'B cell', - 'B cell_Ms4a1 high(Small-Intestine)': 'B cell', - 'Columnar epithelium(Small-Intestine)': 'epithelial cell', - 'Dendritic cell_Siglech high(Small-Intestine)': 'dendritic cell', - 'Dendrtic cell_Cst3 high(Small-Intestine)': 'dendritic cell', - 'Epithelial cell_Kcne3 high(Small-Intestine)': 'epithelial cell', - 'Epithelial cell_Sh2d6 high(Small-Intestine)': 'epithelial cell', - 'Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)': 'epithelial cell villi', - 'Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)': 'epithelial cell villi', - 'Macrophage_Apoe high(Small-Intestine)': 'macrophage', - 'Macrophage_Cxcl2 high(Small-Intestine)': 'macrophage', - 'Paneth cell(Small-Intestine)': 'paneth cell', - 'S cell_Chgb high(Small-Intestine)': 'enteroendocrine cell', - 'S cell_Gip high(Small-Intestine)': 'enteroendocrine cell', - 'Stromal cell_Adamdec1 high(Small-Intestine)': 'stromal cell', - 'Stromal cell_Dcn high(Small-Intestine)': 'stromal cell', - 'T cell_Ccl5 high(Small-Intestine)': 'T cell', - 'T cell_Icos high(Small-Intestine)': 'T cell', - 'T cell_Cd7 high(Small-Intestine)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/kidney/__init__.py b/sfaira/data/mouse/kidney/__init__.py deleted file mode 100644 index 057f45e34..000000000 --- a/sfaira/data/mouse/kidney/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_kidney import DatasetGroupKidney \ No newline at end of file diff --git a/sfaira/data/mouse/kidney/external.py b/sfaira/data/mouse/kidney/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/kidney/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/kidney/mouse_kidney.py b/sfaira/data/mouse/kidney/mouse_kidney.py deleted file mode 100644 index 9f20d3dbb..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_kidney_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_kidney_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_kidney_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_kidney_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupKidney(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py deleted file mode 100644 index 7088e667f..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_001.py +++ /dev/null @@ -1,64 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Cell in cell cycle(Fetal_Kidney)': 'fetal proliferative cell', - 'Metanephric mesenchyme(Fetal_Kidney)': 'fetal mesenchymal cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py deleted file mode 100644 index 86ace4b56..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2018_microwell_han_002.py +++ /dev/null @@ -1,95 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - 'Adipocyte(Fetal_Kidney)': 'fetal adipocyte', - 'B cell(Kidney)': 'B cell', - 'Dendritic cell_Ccr7 high(Kidney)': 'dendritic cell', - 'Dendritic cell_Cst3 high(Kidney)': 'dendritic cell', - 'Distal collecting duct principal cell_Cldn4 high(Kidney)': 'kidney collecting duct principal cell', - 'Distal collecting duct principal cell_Hsd11b2 high(Kidney)': 'kidney collecting duct principal cell', - 'Distal convoluted tubule_Pvalb high(Kidney)': 'kidney distal convoluted tubule epithelial cell', - 'Distal convoluted tubule_S100g high(Kidney)': 'kidney distal convoluted tubule epithelial cell', - 'Endothelial cell(Kidney)': 'fenestrated cell', - 'Epithelial cell_Cryab high(Kidney)': "epithelial cell", - 'Fenestrated endothelial cell_Plvap high(Kidney)': 'fenestrated cell', - 'Fenestrated endothelial cell_Tm4sf1 high(Kidney)': 'fenestrated cell', - 'Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney)': 'glomerular epithelial cell', - 'Intercalated cells of collecting duct_Aqp6 high(Kidney)': 'kidney collecting duct epithelial cell', - 'Intercalated cells of collecting duct_Slc26a4 high(Kidney)': 'kidney collecting duct epithelial cell', - 'Macrophage_Ccl4 high (Kidney)': 'macrophage', - 'Macrophage_Lyz2 high(Kidney)': 'macrophage', - 'Metanephric mesenchyme(Fetal_Kidney)': 'fetal mesenchymal cell', - 'Neutrophil progenitor_S100a8 high(Kidney)': 'neutrophil progenitor', - 'Proximal tubule brush border cell(Kidney)': 'brush cell', - 'Proximal tubule cell_Cyp4a14 high(Kidney)': 'epithelial cell of proximal tubule', - 'Proximal tubule cell_Osgin1 high(Kidney)': 'epithelial cell of proximal tubule', - 'S1 proximal tubule cells(Kidney)': 'epithelial cell of proximal tubule', - 'S3 proximal tubule cells(Kidney)': 'epithelial cell of proximal tubule', - 'Stromal cell_Ankrd1 high(Kidney)': 'fibroblast', - 'Stromal cell_Cxcl10 high(Kidney)': 'fibroblast', - 'Stromal cell_Dcn high(Kidney)': 'fibroblast', - 'Stromal cell_Mgp high(Fetal_Kidney)': 'fibroblast', - 'Stromal cell_Mgp high(Kidney)': 'fibroblast', - 'Stromal cell_Ptgds high(Kidney)': 'fibroblast', - 'T cell(Kidney)': 'T cell', - 'Thick ascending limb of the loop of Henle(Kidney)': 'kidney loop of Henle ascending limb epithelial cell', - 'Ureteric epithelium(Kidney)': 'ureteric epithelial cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py deleted file mode 100644 index 67cd214f8..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_10x_pisco_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py deleted file mode 100644 index 2e05e00b5..000000000 --- a/sfaira/data/mouse/kidney/mouse_kidney_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "kidney" - self.sub_tissue = "kidney" - self.annotated = True - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan", "kidney cell"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/__init__.py b/sfaira/data/mouse/liver/__init__.py deleted file mode 100644 index 4ddaa4d26..000000000 --- a/sfaira/data/mouse/liver/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_liver import DatasetGroupLiver \ No newline at end of file diff --git a/sfaira/data/mouse/liver/external.py b/sfaira/data/mouse/liver/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/liver/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/liver/mouse_liver.py b/sfaira/data/mouse/liver/mouse_liver.py deleted file mode 100644 index 0d166852e..000000000 --- a/sfaira/data/mouse/liver/mouse_liver.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_liver_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_liver_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_liver_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_liver_2018_microwell_han_002 import Dataset as Dataset0004 - - -class DatasetGroupLiver(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py deleted file mode 100644 index 5a856484d..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Fcmr high(Liver)': 'B cell', - 'B cell_Jchain high(Liver)': 'B cell', - 'Dendritic cell_Cst3 high(Liver)': 'dendritic cell', - 'Dendritic cell_Siglech high(Liver)': 'dendritic cell', - 'Endothelial cell(Liver)': 'endothelial cell of hepatic sinusoid', - 'Epithelial cell(Liver)': "duct epithelial cell", - 'Epithelia cell_Spp1 high(Liver)': "duct epithelial cell", - 'Erythroblast_Hbb-bs high(Liver)': 'erythroblast', - 'Erythroblast_Hbb-bt high(Liver)': 'erythroblast', - 'Granulocyte(Liver)': 'granulocyte', - 'Hepatocyte_Fabp1 high(Liver)': 'hepatocyte', - 'Hepatocyte_mt-Nd4 high(Liver)': 'hepatocyte', - 'Pericentral (PC) hepatocytes(Liver)': 'hepatocyte', - 'Periportal (PP) hepatocyte(Liver)': 'hepatocyte', - 'Kuppfer cell(Liver)': 'Kupffer cell', - 'Macrophage_Chil3 high(Liver)': 'macrophage', - 'Neutrophil_Ngp high(Liver)': 'neutrophil', - 'Stromal cell(Liver)': 'stromal cell', - 'T cell_Gzma high(Liver)': 'T cell', - 'T cell_Trbc2 high(Liver)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py b/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py deleted file mode 100644 index c1f4f3b2c..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2018_microwell_han_002.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Jchain high(Liver)': 'B cell', - 'Dendritic cell_Cst3 high(Liver)': 'dendritic cell', - 'Dendritic cell_Siglech high(Liver)': 'dendritic cell', - 'Epithelial cell(Liver)': "duct epithelial cell", - 'Epithelia cell_Spp1 high(Liver)': "duct epithelial cell", - 'Erythroblast_Hbb-bs high(Liver)': 'erythroblast', - 'Hepatocyte_Fabp1 high(Liver)': 'hepatocyte', - 'Pericentral (PC) hepatocytes(Liver)': 'hepatocyte', - 'Periportal (PP) hepatocyte(Liver)': 'hepatocyte', - 'Kuppfer cell(Liver)': 'Kupffer cell', - 'Macrophage_Chil3 high(Liver)': 'macrophage', - 'Stromal cell(Liver)': 'stromal cell', - 'T cell_Gzma high(Liver)': 'T cell', - 'T cell_Trbc2 high(Liver)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py deleted file mode 100644 index e8366603e..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py deleted file mode 100644 index 73067787a..000000000 --- a/sfaira/data/mouse/liver/mouse_liver_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "liver" - self.sub_tissue = "liver" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/__init__.py b/sfaira/data/mouse/lung/__init__.py deleted file mode 100644 index dafc0bfc6..000000000 --- a/sfaira/data/mouse/lung/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_lung import DatasetGroupLung \ No newline at end of file diff --git a/sfaira/data/mouse/lung/external.py b/sfaira/data/mouse/lung/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/lung/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/lung/mouse_lung.py b/sfaira/data/mouse/lung/mouse_lung.py deleted file mode 100644 index 9aea13353..000000000 --- a/sfaira/data/mouse/lung/mouse_lung.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_lung_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_lung_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_lung_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_lung_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_lung_2018_microwell_han_003 import Dataset as Dataset0005 - - -class DatasetGroupLung(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py deleted file mode 100644 index 62bebd82c..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_001.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py deleted file mode 100644 index bff170a68..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_002.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py b/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py deleted file mode 100644 index a45337414..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2018_microwell_han_003.py +++ /dev/null @@ -1,97 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - 'AT1 Cell(Lung)': 'alveolar epithelial cell type I', - 'AT2 Cell(Lung)': 'alveolar epithelial cell type II', - 'Alveolar bipotent progenitor(Lung)': 'alveolar bipotent progenitor', - 'Alveolar macrophage_Ear2 high(Lung)': 'alveolar macrophage', - 'Alveolar macrophage_Pclaf high(Lung)': 'alveolar macrophage', - 'B Cell(Lung)': 'B cell', - 'Basophil(Lung)': 'basophil', - 'Ciliated cell(Lung)': 'ciliated cell', - 'Clara Cell(Lung)': 'clara cell', - 'Conventional dendritic cell_Gngt2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_H2-M2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Mgl2 high(Lung)': "dendritic cell", - 'Conventional dendritic cell_Tubb5 high(Lung)': "dendritic cell", - 'Dendritic cell_Naaa high(Lung)': "dendritic cell", - 'Dividing T cells(Lung)': "T cell", - 'Dividing cells(Lung)': 'unknown', - 'Dividing dendritic cells(Lung)': "dendritic cell", - 'Endothelial cell_Kdr high(Lung)': "endothelial cell", - 'Endothelial cell_Tmem100 high(Lung)': "endothelial cell", - 'Endothelial cells_Vwf high(Lung)': "endothelial cell", - 'Eosinophil granulocyte(Lung)': 'eosinophil', - 'Ig−producing B cell(Lung)': 'B cell', - 'Interstitial macrophage(Lung)': 'lung macrophage', - 'Monocyte progenitor cell(Lung)': 'monocyte progenitor', - 'NK Cell(Lung)': 'NK cell', - 'Neutrophil granulocyte(Lung)': 'neutrophil', - 'Nuocyte(Lung)': 'nuocyte', - 'Plasmacytoid dendritic cell(Lung)': "plasmacytoid dendritic cell", - 'Stromal cell_Acta2 high(Lung)': 'stromal cell', - 'Stromal cell_Dcn high(Lung)': 'stromal cell', - 'Stromal cell_Inmt high(Lung)': 'stromal cell', - 'T Cell_Cd8b1 high(Lung)': "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py deleted file mode 100644 index eabf196af..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py deleted file mode 100644 index 77b2fa934..000000000 --- a/sfaira/data/mouse/lung/mouse_lung_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "lung" - self.sub_tissue = "lung" - self.annotated = True - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/__init__.py b/sfaira/data/mouse/malegonad/__init__.py deleted file mode 100644 index a56dbc2f4..000000000 --- a/sfaira/data/mouse/malegonad/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_malegonad import DatasetGroupMalegonad \ No newline at end of file diff --git a/sfaira/data/mouse/malegonad/external.py b/sfaira/data/mouse/malegonad/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/malegonad/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad.py b/sfaira/data/mouse/malegonad/mouse_malegonad.py deleted file mode 100644 index e1818bc07..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupMalegonad(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py deleted file mode 100644 index acc94d1ed..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" - self.sub_tissue = "malegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongating spermatid(Testis)': 'elongating spermatid', - 'Erythroblast_Hbb-bs high(Testis)': 'erythroblast', - 'Leydig cell(Testis)': 'leydig cell', - 'Macrophage_Lyz2 high(Testis)': 'macrophage', - 'Pre-Sertoli cell_Cst9 high(Testis)': 'pre-sertoli cell', - 'Pre-Sertoli cell_Ctsl high(Testis)': 'pre-sertoli cell', - 'Preleptotene spermatogonia(Testis)': 'preleptotene spermatogonia', - 'Sertoli cell(Testis)': 'sertoli cell', - 'Spermatids_1700016P04Rik high(Testis)': 'spermatid', - 'Spermatids_Cst13 high(Testis)': 'spermatid', - 'Spermatids_Hmgb4 high(Testis)': 'spermatid', - 'Spermatids_Tnp1 high(Testis)': 'spermatid', - 'Spermatocyte_1700001F09Rik high(Testis)': 'spermatocyte', - 'Spermatocyte_Cabs1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Calm2 high(Testis)': 'spermatocyte', - 'Spermatocyte_Mesp1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Slc2a3 high(Testis)': 'spermatocyte', - 'Spermatogonia_1700001P01Rik high(Testis)': 'spermatogonia', - 'Spermatogonia_Tbc1d23 high(Testis)': 'spermatogonia' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py deleted file mode 100644 index 7e5d1feea..000000000 --- a/sfaira/data/mouse/malegonad/mouse_malegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" - self.sub_tissue = "malegonad" - self.annotated = True - - self.class_maps = { - "0": { - 'Elongating spermatid(Testis)': 'elongating spermatid', - 'Erythroblast_Hbb-bs high(Testis)': 'erythroblast', - 'Leydig cell(Testis)': 'leydig cell', - 'Macrophage_Lyz2 high(Testis)': 'macrophage', - 'Pre-Sertoli cell_Cst9 high(Testis)': 'pre-sertoli cell', - 'Pre-Sertoli cell_Ctsl high(Testis)': 'pre-sertoli cell', - 'Preleptotene spermatogonia(Testis)': 'preleptotene spermatogonia', - 'Sertoli cell(Testis)': 'sertoli cell', - 'Spermatids_1700016P04Rik high(Testis)': 'spermatid', - 'Spermatids_Cst13 high(Testis)': 'spermatid', - 'Spermatids_Hmgb4 high(Testis)': 'spermatid', - 'Spermatids_Tnp1 high(Testis)': 'spermatid', - 'Spermatocyte_1700001F09Rik high(Testis)': 'spermatocyte', - 'Spermatocyte_Cabs1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Calm2 high(Testis)': 'spermatocyte', - 'Spermatocyte_Mesp1 high(Testis)': 'spermatocyte', - 'Spermatocyte_Slc2a3 high(Testis)': 'spermatocyte', - 'Spermatogonia_1700001P01Rik high(Testis)': 'spermatogonia', - 'Spermatogonia_Tbc1d23 high(Testis)': 'spermatogonia' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/__init__.py b/sfaira/data/mouse/mammarygland/__init__.py deleted file mode 100644 index 6a42b03d9..000000000 --- a/sfaira/data/mouse/mammarygland/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_mammarygland import DatasetGroupMammaryGland \ No newline at end of file diff --git a/sfaira/data/mouse/mammarygland/external.py b/sfaira/data/mouse/mammarygland/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/mammarygland/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland.py deleted file mode 100644 index 71fca2ff3..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 -from .mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 -from .mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 - - -class DatasetGroupMammaryGland(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMammaryGland - self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py deleted file mode 100644 index c9c33b8b7..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_001.py +++ /dev/null @@ -1,80 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py deleted file mode 100644 index 63d95e9d1..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_002.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py deleted file mode 100644 index 59e75d075..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_003.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py deleted file mode 100644 index 85f714899..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2018_microwell_han_004.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)': 'B cell', - 'B cell_Jchain high(Mammary-Gland-Virgin)': 'B cell', - 'Dendritic cell_Cst3 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dendritic cell_Siglech high(Mammary-Gland-Virgin)': 'dendritic cell', - 'Dividing cell(Mammary-Gland-Virgin)': 'proliferative cell', - 'Luminal cell_Krt19 high (Mammary-Gland-Virgin)': 'luminal epithelial cell of mammary gland', - 'Luminal progenitor(Mammary-Gland-Virgin)': 'luminal progenitor cell', - 'Macrophage_C1qc high(Mammary-Gland-Virgin)': 'macrophage', - 'Macrophage_Lyz1 high(Mammary-Gland-Virgin)': 'macrophage', - 'NK cell(Mammary-Gland-Virgin)': 'NK cell', - 'Stem and progenitor cell(Mammary-Gland-Virgin)': 'stem and progenitor cell', - 'Stromal cell_Col3a1 high(Mammary-Gland-Virgin)': 'stromal cell', - 'Stromal cell_Pi16 high(Mammary-Gland-Virgin)': 'stromal cell', - 'T cell_Cd8b1 high(Mammary-Gland-Virgin)': 'T cell', - 'T cell_Ly6c2 high(Mammary-Gland-Virgin)': 'T cell', - 'T-cells_Ctla4 high(Mammary-Gland-Virgin)': 'T cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py deleted file mode 100644 index ba044e9f2..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_10x_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py deleted file mode 100644 index acddb5514..000000000 --- a/sfaira/data/mouse/mammarygland/mouse_mammarygland_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "mammarygland" - self.sub_tissue = "mammarygland" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[ - self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/__init__.py b/sfaira/data/mouse/muscle/__init__.py deleted file mode 100644 index fa8cb5cfd..000000000 --- a/sfaira/data/mouse/muscle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_muscle import DatasetGroupMuscle \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/external.py b/sfaira/data/mouse/muscle/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/muscle/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/muscle/mouse_muscle.py b/sfaira/data/mouse/muscle/mouse_muscle.py deleted file mode 100644 index 1bf0eddcc..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupMuscle(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py deleted file mode 100644 index 53ad4269d..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2018_microwell_han_001.py +++ /dev/null @@ -1,82 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell_Jchain high(Muscle)': 'B cell', - 'B cell_Vpreb3 high(Muscle)': 'B cell', - 'Dendritic cell(Muscle)': 'dendritic cell', - 'Endothelial cell(Muscle)': 'endothelial cell', - 'Erythroblast_Car1 high(Muscle)': 'erythroblast', - 'Erythroblast_Car2 high(Muscle)': 'erythroblast', - 'Granulocyte monocyte progenitor cell(Muscle)': 'monocyte progenitor', - 'Macrophage_Ms4a6c high(Muscle)': 'macrophage', - 'Macrophage_Retnla high(Muscle)': 'macrophage', - 'Muscle cell_Tnnc1 high(Muscle)': 'muscle cell', - 'Muscle cell_Tnnc2 high(Muscle)': 'muscle cell', - 'Muscle progenitor cell(Muscle)': 'skeletal muscle satellite cell', - 'Neutrophil_Camp high(Muscle)': 'neutrophil', - 'Neutrophil_Prg2 high(Muscle)': 'neutrophil', - 'Neutrophil_Retnlg high(Muscle)': 'neutrophil', - 'Stromal cell(Muscle)': 'stromal cell', - 'T cell(Muscle)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) - diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py deleted file mode 100644 index e3a9b589b..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_10x_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py deleted file mode 100644 index e16e3911c..000000000 --- a/sfaira/data/mouse/muscle/mouse_muscle_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,69 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "muscle" - self.sub_tissue = "muscle" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/__init__.py b/sfaira/data/mouse/pancreas/__init__.py deleted file mode 100644 index 27d79134c..000000000 --- a/sfaira/data/mouse/pancreas/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_pancreas import DatasetGroupPancreas \ No newline at end of file diff --git a/sfaira/data/mouse/pancreas/external.py b/sfaira/data/mouse/pancreas/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/pancreas/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas.py b/sfaira/data/mouse/pancreas/mouse_pancreas.py deleted file mode 100644 index ea87d9d50..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_pancreas_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_pancreas_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_pancreas_2018_microwell_han_001 import Dataset as Dataset0003 -from .mouse_pancreas_2019_10x_thompson_001 import Dataset as Dataset0004 -from .mouse_pancreas_2019_10x_thompson_002 import Dataset as Dataset0005 -from .mouse_pancreas_2019_10x_thompson_003 import Dataset as Dataset0006 -from .mouse_pancreas_2019_10x_thompson_004 import Dataset as Dataset0007 -from .mouse_pancreas_2019_10x_thompson_005 import Dataset as Dataset0008 -from .mouse_pancreas_2019_10x_thompson_006 import Dataset as Dataset0009 -from .mouse_pancreas_2019_10x_thompson_007 import Dataset as Dataset0010 -from .mouse_pancreas_2019_10x_thompson_008 import Dataset as Dataset0011 - - -class DatasetGroupPancreas(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path), - Dataset0004(path=path, meta_path=meta_path), - Dataset0005(path=path, meta_path=meta_path), - Dataset0006(path=path, meta_path=meta_path), - Dataset0007(path=path, meta_path=meta_path), - Dataset0008(path=path, meta_path=meta_path), - Dataset0009(path=path, meta_path=meta_path), - Dataset0010(path=path, meta_path=meta_path), - Dataset0011(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py deleted file mode 100644 index 71901248d..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2018_microwell_han_001.py +++ /dev/null @@ -1,86 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'Acinar cell(Pancreas)': 'pancreatic acinar cell', - 'Dendrtic cell(Pancreas)': 'dendritic cell', - 'Ductal cell(Pancreas)': 'pancreatic ductal cell', - 'Endocrine cell(Pancreas)': "endocrine cell", - 'Dividing cell(Pancreas)': "endocrine cell", - 'Endothelial cell_Fabp4 high(Pancreas)': 'endothelial cell', - 'Endothelial cell_Lrg1 high(Pancreas)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Pancreas)': 'endothelial cell', - 'Erythroblast_Hbb-bt high(Pancreas)': 'erythroblast', - 'Erythroblast_Igkc high(Pancreas)': 'erythroblast', - 'Granulocyte(Pancreas)': 'granulocyte', - 'Macrophage_Ly6c2 high(Pancreas)': 'macrophage', - 'Macrophage(Pancreas)': 'macrophage', - 'Glial cell(Pancreas)': 'glial cell', - 'Smooth muscle cell_Acta2 high(Pancreas)': 'smooth muscle cell', - 'Smooth muscle cell_Rgs5 high(Pancreas)': 'smooth muscle cell', - 'Stromal cell_Fn1 high(Pancreas)': 'stromal cell', - 'Stromal cell_Mfap4 high(Pancreas)': 'stromal cell', - 'Stromal cell_Smoc2 high(Pancreas)': 'stromal cell', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'β-cell(Pancreas)': "pancreatic B cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py deleted file mode 100644 index fd4e43323..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py deleted file mode 100644 index 433ff3de1..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_001.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_001_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py deleted file mode 100644 index 378006b63..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_002.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_002_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py deleted file mode 100644 index f7773faf1..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_003.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_003_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py deleted file mode 100644 index f3117cba6..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_004.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_004_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py deleted file mode 100644 index a30f30d8b..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_005.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_005_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py deleted file mode 100644 index 3c1873c96..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_006.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_006_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py deleted file mode 100644 index d619bf262..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_007.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_007_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py deleted file mode 100644 index 5747594ff..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_10x_thompson_008.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_10x_thompson_008_10.1016/j.cmet.2019.01.021" - self.download_website = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - 'acinar': 'pancreatic acinar cell', - 'ductal': 'pancreatic ductal cell', - 'leukocyte': 'leukocyte', - 'T cell(Pancreas)': 't cell', - 'B cell(Pancreas)': 'b cell', - 'beta': "pancreatic B cell", - 'alpha': "pancreatic A cell", - 'delta': "pancreatic D cell", - 'pp': "pancreatic PP cell", - 'smooth_muscle': "smooth muscle cell", - 'stellate cell': "pancreatic stellate cell", - 'fibroblast': "stromal cell", - 'endothelial': "endothelial cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=0) - - self.adata = anndata.read_mtx(fn + '_matrix.mtx.gz').transpose() - self.adata.var_names = np.genfromtxt(fn + '_genes.tsv.gz', dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + '_barcodes.tsv.gz', dtype=str) - self.adata.var_names_make_unique() - self.adata = self.adata[celltypes.index] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Bhushan" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cmet.2019.01.021" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = celltypes - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = False - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "diabetic" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py deleted file mode 100644 index 8f17750bf..000000000 --- a/sfaira/data/mouse/pancreas/mouse_pancreas_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "pancreas" - self.sub_tissue = "pancreas" - self.annotated = True - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py b/sfaira/data/mouse/peripheral_blood/mouse_peripheral_blood_2018_microwell_han_005.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/data/mouse/placenta/__init__.py b/sfaira/data/mouse/placenta/__init__.py deleted file mode 100644 index f8363fb61..000000000 --- a/sfaira/data/mouse/placenta/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_placenta import DatasetGroupPlacenta \ No newline at end of file diff --git a/sfaira/data/mouse/placenta/external.py b/sfaira/data/mouse/placenta/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/placenta/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/placenta/mouse_placenta.py b/sfaira/data/mouse/placenta/mouse_placenta.py deleted file mode 100644 index 3c885a94f..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_placenta_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_placenta_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupPlacenta(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py deleted file mode 100644 index a1b4c6a35..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "placenta" - self.sub_tissue = "placenta" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Placenta)': 'B cell', - 'Basophil(Placenta)': 'basophil', - 'Decidual stromal cell(Placenta)': 'decidual stromal cell', - 'Dendritic cell(Placenta)': 'dendritic cell', - 'Endodermal cell_Afp high(Placenta)': 'endodermal cell', - 'Endothelial cell_Maged2 high(Placenta)': 'endothelial cell', - 'Erythroblast_Hbb-y high(Placenta)': 'erythroblast', - 'Granulocyte monocyte progenitors(Placenta)': 'monocyte progenitor', - 'Granulocyte_Neat1 high(Placenta)': 'granulocyte', - 'Granulocyte_S100a9 high(Placenta)': 'granulocyte', - 'HSPC_Lmo2 high(Placenta)': 'HSPC', - 'Invasive spongiotrophoblast(Placenta)': 'invasive spongiotrophoblast', - 'Labyrinthine trophoblast(Placenta)': 'labyrinthine trophoblast', - 'Macrophage_Apoe high(Placenta)': 'macrophage', - 'Macrophage_Spp1 high(Placenta)': 'macrophage', - 'Megakaryocyte progenitor cell(Placenta)': 'megakaryocte', - 'Monocyte(Placenta)': 'monocyte', - 'NK cell(Placenta)': 'NK cell', - 'NKT cell(Placenta)': 'NKT cell', - 'PE lineage cell_Gkn2 high(Placenta)': 'PE lineage cell', - 'PE lineage cell_S100g high(Placenta)': 'PE lineage cell', - 'Progenitor trophoblast_Gjb3 high(Placenta)': 'trophoblast progenitor', - 'Spiral artery trophoblast giant cells(Placenta)': 'spiral artery trophoblast giant cells', - 'Spongiotrophoblast_Hsd11b2 high(Placenta)': 'spongiotrophoblast', - 'Spongiotrophoblast_Phlda2 high(Placenta)': 'spongiotrophoblast', - 'Stromal cell(Placenta)': 'stromal cell', - 'Stromal cell_Acta2 high(Placenta)': 'stromal cell', - 'Trophoblast progenitor_Taf7l high(Placenta)': 'trophoblast progenitor', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py deleted file mode 100644 index 871354a1d..000000000 --- a/sfaira/data/mouse/placenta/mouse_placenta_2018_microwell_han_002.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "placenta" - self.sub_tissue = "placenta" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Placenta)': 'B cell', - 'Basophil(Placenta)': 'basophil', - 'Decidual stromal cell(Placenta)': 'decidual stromal cell', - 'Dendritic cell(Placenta)': 'dendritic cell', - 'Endodermal cell_Afp high(Placenta)': 'endodermal cell', - 'Endothelial cell_Maged2 high(Placenta)': 'endothelial cell', - 'Erythroblast_Hbb-y high(Placenta)': 'erythroblast', - 'Granulocyte monocyte progenitors(Placenta)': 'monocyte progenitor', - 'Granulocyte_Neat1 high(Placenta)': 'granulocyte', - 'Granulocyte_S100a9 high(Placenta)': 'granulocyte', - 'HSPC_Lmo2 high(Placenta)': 'HSPC', - 'Invasive spongiotrophoblast(Placenta)': 'invasive spongiotrophoblast', - 'Labyrinthine trophoblast(Placenta)': 'labyrinthine trophoblast', - 'Macrophage_Apoe high(Placenta)': 'macrophage', - 'Macrophage_Spp1 high(Placenta)': 'macrophage', - 'Megakaryocyte progenitor cell(Placenta)': 'megakaryocte', - 'Monocyte(Placenta)': 'monocyte', - 'NK cell(Placenta)': 'NK cell', - 'NKT cell(Placenta)': 'NKT cell', - 'PE lineage cell_Gkn2 high(Placenta)': 'PE lineage cell', - 'PE lineage cell_S100g high(Placenta)': 'PE lineage cell', - 'Progenitor trophoblast_Gjb3 high(Placenta)': 'trophoblast progenitor', - 'Spiral artery trophoblast giant cells(Placenta)': 'spiral artery trophoblast giant cells', - 'Spongiotrophoblast_Hsd11b2 high(Placenta)': 'spongiotrophoblast', - 'Spongiotrophoblast_Phlda2 high(Placenta)': 'spongiotrophoblast', - 'Stromal cell(Placenta)': 'stromal cell', - 'Stromal cell_Acta2 high(Placenta)': 'stromal cell', - 'Trophoblast progenitor_Taf7l high(Placenta)': 'trophoblast progenitor', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/__init__.py b/sfaira/data/mouse/prostate/__init__.py deleted file mode 100644 index 2f35afd48..000000000 --- a/sfaira/data/mouse/prostate/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_prostate import DatasetGroupProstate \ No newline at end of file diff --git a/sfaira/data/mouse/prostate/external.py b/sfaira/data/mouse/prostate/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/prostate/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/prostate/mouse_prostate.py b/sfaira/data/mouse/prostate/mouse_prostate.py deleted file mode 100644 index bd16b0e46..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_prostate_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_prostate_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupProstate(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py deleted file mode 100644 index 486ba9b5c..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Dendritic cell(Prostate)': 'dendritic cell', - 'Epithelial cell(Prostate)': 'epithelial cell', - 'Glandular epithelium(Prostate)': 'glandular epithelial cell', - 'Prostate gland cell(Prostate)': 'glandular cell', - 'Stromal cell(Prostate)': 'stromal cell', - 'T cell(Prostate)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py deleted file mode 100644 index 9c5a357d7..000000000 --- a/sfaira/data/mouse/prostate/mouse_prostate_2018_microwell_han_002.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "prostate" - self.sub_tissue = "prostate" - self.annotated = True - - self.class_maps = { - "0": { - 'Dendritic cell(Prostate)': 'dendritic cell', - 'Epithelial cell(Prostate)': 'epithelial cell', - 'Glandular epithelium(Prostate)': 'glandular epithelial cell', - 'Prostate gland cell(Prostate)': 'glandular cell', - 'Stromal cell(Prostate)': 'stromal cell', - 'T cell(Prostate)': 'T cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/__init__.py b/sfaira/data/mouse/rib/__init__.py deleted file mode 100644 index d8f73f181..000000000 --- a/sfaira/data/mouse/rib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_rib import DatasetGroupRib \ No newline at end of file diff --git a/sfaira/data/mouse/rib/external.py b/sfaira/data/mouse/rib/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/rib/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/rib/mouse_rib.py b/sfaira/data/mouse/rib/mouse_rib.py deleted file mode 100644 index 1320a5e8c..000000000 --- a/sfaira/data/mouse/rib/mouse_rib.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_rib_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_rib_2018_microwell_han_002 import Dataset as Dataset0002 -from .mouse_rib_2018_microwell_han_003 import Dataset as Dataset0003 - - -class DatasetGroupRib(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py deleted file mode 100644 index 358d4b053..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_001.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py deleted file mode 100644 index 1099a3f6c..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_002.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py b/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py deleted file mode 100644 index e1a4f8264..000000000 --- a/sfaira/data/mouse/rib/mouse_rib_2018_microwell_han_003.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "rib" - self.sub_tissue = "rib" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Neonatal-Rib)': 'B cell', - 'Cartilage cell_Clu high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Col2a1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Cxcl14 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Ppa1 high(Neonatal-Rib)': 'cartilage cell', - 'Cartilage cell_Prg4 high(Neonatal-Rib)': 'cartilage cell', - 'Dividing cell(Neonatal-Rib)': 'proliferative cell', - 'Endothelial cell(Neonatal-Rib)': 'endothelial cell', - 'Erythroblast_Hba-a1 high(Neonatal-Rib)': 'erythroblast', - 'Erythroblast_Ttr high(Neonatal-Rib)': 'erythroblast', - 'Granulocyte(Neonatal-Rib)': 'granulocyte', - 'Macrophage_C1qc high(Neonatal-Rib)': 'macrophage', - 'Macrophage_Ctss high(Neonatal-Rib)': 'macrophage', - 'Muscle cell(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Acta2 high(Neonatal-Rib)': 'muscle cell', - 'Muscle cell_Actc1 high(Neonatal-Rib)': 'muscle cell', - 'Neuron_Mpz high(Neonatal-Rib)': 'neuron', - 'Neuron_Stmn2 high(Neonatal-Rib)': 'neuron', - 'Neutrophil(Neonatal-Rib)': 'neutrophil', - 'Neutrophil_Elane high(Neonatal-Rib)': 'neutrophil', - 'Oligodendrocyte(Neonatal-Rib)': 'oligodendrocyte', - 'Osteoblast(Neonatal-Rib)': 'osteoblast', - 'Osteoclast(Neonatal-Rib)': 'osteoclast', - 'Stromal cell_Acta1 high(Neonatal-Rib)': 'stromal cell', - 'Stromal cell_Tnmd high(Neonatal-Rib)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/__init__.py b/sfaira/data/mouse/skin/__init__.py deleted file mode 100644 index 232177187..000000000 --- a/sfaira/data/mouse/skin/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_skin import DatasetGroupSkin \ No newline at end of file diff --git a/sfaira/data/mouse/skin/external.py b/sfaira/data/mouse/skin/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/skin/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/skin/mouse_skin.py b/sfaira/data/mouse/skin/mouse_skin.py deleted file mode 100644 index dba84de3a..000000000 --- a/sfaira/data/mouse/skin/mouse_skin.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_skin_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_skin_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupSkin(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path).datasets) - except ImportError: - pass \ No newline at end of file diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py deleted file mode 100644 index 6d1889244..000000000 --- a/sfaira/data/mouse/skin/mouse_skin_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "skin" - self.sub_tissue = "skin" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6b8f05078..000000000 --- a/sfaira/data/mouse/skin/mouse_skin_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "skin" - self.sub_tissue = "skin" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/__init__.py b/sfaira/data/mouse/spleen/__init__.py deleted file mode 100644 index 90981c60b..000000000 --- a/sfaira/data/mouse/spleen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_spleen import DatasetGroupSpleen \ No newline at end of file diff --git a/sfaira/data/mouse/spleen/external.py b/sfaira/data/mouse/spleen/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/spleen/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/spleen/mouse_spleen.py b/sfaira/data/mouse/spleen/mouse_spleen.py deleted file mode 100644 index 05ff9439c..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_spleen_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_spleen_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_spleen_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupSpleen(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py deleted file mode 100644 index 3eac25aa7..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2018_microwell_han_001.py +++ /dev/null @@ -1,75 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": { - 'Erythroblast(Spleen)': 'proerythroblast', - 'Dendritic cell_S100a4 high(Spleen)': 'dendritic cell', - 'Dendritic cell_Siglech high(Spleen)': 'dendritic cell', - 'Granulocyte(Spleen)': 'granulocyte', - 'Macrophage(Spleen)': 'macrophage', - 'Monocyte(Spleen)': 'monocyte', - 'NK cell(Spleen)': 'NK cell', - 'Neutrophil(Spleen)': 'neutrophil', - 'Plasma cell(Spleen)': 'plasma cell', - 'T cell(Spleen)': 'T cell', - 'Marginal zone B cell(Spleen)': 'B cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py deleted file mode 100644 index 3042be5fc..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py deleted file mode 100644 index d386c4609..000000000 --- a/sfaira/data/mouse/spleen/mouse_spleen_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.sub_tissue = "spleen" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/stomach/__init__.py b/sfaira/data/mouse/stomach/__init__.py deleted file mode 100644 index f9f2d2ead..000000000 --- a/sfaira/data/mouse/stomach/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_stomach import DatasetGroupStomach \ No newline at end of file diff --git a/sfaira/data/mouse/stomach/external.py b/sfaira/data/mouse/stomach/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/stomach/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/stomach/mouse_stomach.py b/sfaira/data/mouse/stomach/mouse_stomach.py deleted file mode 100644 index 3fc8ba1ba..000000000 --- a/sfaira/data/mouse/stomach/mouse_stomach.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_stomach_2018_microwell_han_001 import Dataset as Dataset0001 - - -class DatasetGroupStomach(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py deleted file mode 100644 index 87cce385b..000000000 --- a/sfaira/data/mouse/stomach/mouse_stomach_2018_microwell_han_001.py +++ /dev/null @@ -1,81 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "stomach" - self.sub_tissue = "stomach" - self.annotated = True - - self.class_maps = { - "0": { - 'Antral mucous cell (Stomach)': 'antral mucous cell', - 'Dendritic cell(Stomach)': 'dendritic cell', - 'Dividing cell(Stomach)': 'proliferative cell', - 'Epithelial cell_Gkn3 high(Stomach)': 'epithelial cell', - 'Epithelial cell_Krt20 high(Stomach)': 'epithelial cell', - 'Epithelial cell_Pla2g1b high(Stomach)': 'epithelial cell', - 'G cell(Stomach)': 'G cell', - 'Gastric mucosal cell(Stomach)': 'gastric mucosal cell', - 'Macrophage(Stomach)': 'macrophage', - 'Muscle cell(Stomach)': 'muscle cell', - 'Parietal cell (Stomach)': 'parietal cell', - 'Pit cell_Gm26917 high(Stomach)': 'pit cell', - 'Pit cell_Ifrd1 high(Stomach)': 'pit cell', - 'Stomach cell_Gkn2 high(Stomach)': 'stomach cell', - 'Stomach cell_Mt2 high(Stomach)': 'stomach cell', - 'Stomach cell_Muc5ac high(Stomach)': 'stomach cell', - 'Tuft cell(Stomach)': 'tuft cell' - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/__init__.py b/sfaira/data/mouse/thymus/__init__.py deleted file mode 100644 index b01b604c8..000000000 --- a/sfaira/data/mouse/thymus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_thymus import DatasetGroupThymus \ No newline at end of file diff --git a/sfaira/data/mouse/thymus/external.py b/sfaira/data/mouse/thymus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/thymus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/thymus/mouse_thymus.py b/sfaira/data/mouse/thymus/mouse_thymus.py deleted file mode 100644 index b74e325c1..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_thymus_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_thymus_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from .mouse_thymus_2018_microwell_han_001 import Dataset as Dataset0003 - - -class DatasetGroupThymus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path), - Dataset0003(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py deleted file mode 100644 index 602777280..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2018_microwell_han_001.py +++ /dev/null @@ -1,70 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": { - 'abT cell(Thymus)': 'abT cell', - 'B cell(Thymus)': "B cell", - 'DPT cell(Thymus)': "double positive T cell", - 'gdT cell (Thymus)': 'gdT cell', - 'Pre T cell(Thymus)': 'immature T cell', - 'Proliferating thymocyte(Thymus)': "immature T cell", - 'T cell_Id2 high(Thymus)': 'abT cell', # TODO check, not sure about this gene - 'T cell_Ms4a4b high(Thymus)': 'abT cell' # TODO check, not sure about this gene - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py deleted file mode 100644 index 12bfbbe3e..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_10x_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py deleted file mode 100644 index aed1fac7e..000000000 --- a/sfaira/data/mouse/thymus/mouse_thymus_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,71 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "thymus" - self.sub_tissue = "thymus" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=["nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/__init__.py b/sfaira/data/mouse/tongue/__init__.py deleted file mode 100644 index 63266537b..000000000 --- a/sfaira/data/mouse/tongue/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_tongue import DatasetGroupTongue \ No newline at end of file diff --git a/sfaira/data/mouse/tongue/external.py b/sfaira/data/mouse/tongue/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/tongue/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/tongue/mouse_tongue.py b/sfaira/data/mouse/tongue/mouse_tongue.py deleted file mode 100644 index a88388371..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_tongue_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_tongue_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupTongue(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTongue - self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py deleted file mode 100644 index 0721142b2..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_10x_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_tongue_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "tongue" - self.sub_tissue = "tongue" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py deleted file mode 100644 index b076e08cf..000000000 --- a/sfaira/data/mouse/tongue/mouse_tongue_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,72 +0,0 @@ -import anndata -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "spleen" - self.organ = "tongue" - self.sub_tissue = "tongue" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/__init__.py b/sfaira/data/mouse/trachea/__init__.py deleted file mode 100644 index 3286064dd..000000000 --- a/sfaira/data/mouse/trachea/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_trachea import DatasetGroupTrachea \ No newline at end of file diff --git a/sfaira/data/mouse/trachea/external.py b/sfaira/data/mouse/trachea/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/trachea/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/trachea/mouse_trachea.py b/sfaira/data/mouse/trachea/mouse_trachea.py deleted file mode 100644 index b83539438..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_trachea_2019_10x_pisco_001 import Dataset as Dataset0001 -from .mouse_trachea_2019_smartseq2_pisco_001 import Dataset as Dataset0002 - - -class DatasetGroupTrachea(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py deleted file mode 100644 index fda917f76..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_10x_pisco_001.py +++ /dev/null @@ -1,74 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_trachea_2019_10x_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "trachea" - self.sub_tissue = "trachea" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "10x" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py deleted file mode 100644 index e18bb8b6a..000000000 --- a/sfaira/data/mouse/trachea/mouse_trachea_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import numpy as np -import os -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728" - self.source = source - if self.source == "aws": - self.download_website = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download_website = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - self.organ = "trachea" - self.sub_tissue = "trachea" - self.annotated = True - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Quake" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2019" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1101/661728" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "smartseq2" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'norm' - # self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] is already set - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/__init__.py b/sfaira/data/mouse/uterus/__init__.py deleted file mode 100644 index e84f043a1..000000000 --- a/sfaira/data/mouse/uterus/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mouse_uterus import DatasetGroupUterus \ No newline at end of file diff --git a/sfaira/data/mouse/uterus/external.py b/sfaira/data/mouse/uterus/external.py deleted file mode 100644 index cc51e6fda..000000000 --- a/sfaira/data/mouse/uterus/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroupBase -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/mouse/uterus/mouse_uterus.py b/sfaira/data/mouse/uterus/mouse_uterus.py deleted file mode 100644 index aca5ad69d..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Union - -from .external import DatasetGroupBase - -from .mouse_uterus_2018_microwell_han_001 import Dataset as Dataset0001 -from .mouse_uterus_2018_microwell_han_002 import Dataset as Dataset0002 - - -class DatasetGroupUterus(DatasetGroupBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path), - Dataset0002(path=path, meta_path=meta_path) - ] - keys = [x.id for x in datasets] - self.datasets = dict(zip(keys, datasets)) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py deleted file mode 100644 index 58985d2a0..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_001.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "uterus" - self.sub_tissue = "uterus" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Uterus)': 'B cell', - 'Dendritic cell(Uterus)': 'dendritic cell', - 'Endothelial cell_Cldn5 high(Uterus)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Uterus)': 'endothelial cell', - 'Glandular epithelium_Ltf high(Uterus)': 'glandular epithelial cell', - 'Glandular epithelium_Sprr2f high(Uterus)': 'glandular epithelial cell', - 'Granulocyte(Uterus)': 'granulocyte', - 'Keratinocyte(Uterus)': 'keratinocyte', - 'Macrophage(Uterus)': 'macrophage', - 'Monocyte(Uterus)': 'monocyte', - 'Muscle cell_Mgp high(Uterus)': 'muscle cell', - 'Muscle cell_Pcp4 high(Uterus)': 'muscle cell', - 'Smooth muscle cell_Rgs5 high(Uterus)': 'smooth muscle cell', - 'NK cell(Uterus)': 'NK cell', - 'Stromal cell_Ccl11 high(Uterus)': 'stromal cell', - 'Stromal cell_Cxcl14 high(Uterus)': 'stromal cell', - 'Stromal cell_Gm23935 high(Uterus)': 'stromal cell', - 'Stromal cell_Has1 high(Uterus)': 'stromal cell', - 'Stromal cell_Hsd11b2 high(Uterus)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py deleted file mode 100644 index dad5f7194..000000000 --- a/sfaira/data/mouse/uterus/mouse_uterus_2018_microwell_han_002.py +++ /dev/null @@ -1,83 +0,0 @@ -import anndata -import numpy as np -import os -import pandas -from typing import Union -from .external import DatasetBase - - -class Dataset(DatasetBase): - - id: str - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - **kwargs - ): - DatasetBase.__init__(self=self, path=path, meta_path=meta_path, **kwargs) - self.species = "mouse" - self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download_website = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "uterus" - self.sub_tissue = "uterus" - self.annotated = True - - self.class_maps = { - "0": { - 'B cell(Uterus)': 'B cell', - 'Dendritic cell(Uterus)': 'dendritic cell', - 'Endothelial cell_Cldn5 high(Uterus)': 'endothelial cell', - 'Endothelial cell_Tm4sf1 high(Uterus)': 'endothelial cell', - 'Glandular epithelium_Ltf high(Uterus)': 'glandular epithelial cell', - 'Glandular epithelium_Sprr2f high(Uterus)': 'glandular epithelial cell', - 'Granulocyte(Uterus)': 'granulocyte', - 'Keratinocyte(Uterus)': 'keratinocyte', - 'Macrophage(Uterus)': 'macrophage', - 'Monocyte(Uterus)': 'monocyte', - 'Muscle cell_Mgp high(Uterus)': 'muscle cell', - 'Muscle cell_Pcp4 high(Uterus)': 'muscle cell', - 'Smooth muscle cell_Rgs5 high(Uterus)': 'smooth muscle cell', - 'NK cell(Uterus)': 'NK cell', - 'Stromal cell_Ccl11 high(Uterus)': 'stromal cell', - 'Stromal cell_Cxcl14 high(Uterus)': 'stromal cell', - 'Stromal cell_Gm23935 high(Uterus)': 'stromal cell', - 'Stromal cell_Has1 high(Uterus)': 'stromal cell', - 'Stromal cell_Hsd11b2 high(Uterus)': 'stromal cell', - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(['Unnamed: 0'], axis=1) - - data = pandas.read_csv(fn, sep=' ', header=0) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] - - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = "Guo" - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = "2018" - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = "10.1016/j.cell.2018.02.001" - self.adata.uns[self._ADATA_IDS_SFAIRA.protocol] = "microwell-seq" - self.adata.uns[self._ADATA_IDS_SFAIRA.organ] = self.organ - self.adata.uns[self._ADATA_IDS_SFAIRA.subtissue] = self.sub_tissue # TODO - self.adata.uns[self._ADATA_IDS_SFAIRA.species] = "mouse" - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download_website - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = 'raw' - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.adata.obs["Annotation"].values.tolist() - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = self.adata.obs["Annotation"].values.tolist() - self.set_unkown_class_id(ids=[np.nan, "nan"]) - self.adata.obs[self._ADATA_IDS_SFAIRA.healthy] = True - self.adata.obs[self._ADATA_IDS_SFAIRA.state_exact] = "healthy" - - self._convert_and_set_var_names(symbol_col='index', ensembl_col=None) diff --git a/sfaira/data/utils/create_meta.py b/sfaira/data/utils/create_meta.py new file mode 100644 index 000000000..bb707ce16 --- /dev/null +++ b/sfaira/data/utils/create_meta.py @@ -0,0 +1,34 @@ +import sfaira +import sys +import tensorflow as tf + +print(tf.__version__) + + +def write_meta(args0, args1): + args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) + return None + + +# Set global variables. +print("sys.argv", sys.argv) + +path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +processes = int(str(sys.argv[3])) + +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta +) +dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. +dsg.load( + celltype_version=None, + annotated_only=False, + match_to_reference=None, + remove_gene_version=True, + load_raw=True, + allow_caching=False, + processes=processes, + func=write_meta, + kwargs_func={"args1": path_meta}, +) diff --git a/sfaira/data/utils/create_meta_human.py b/sfaira/data/utils/create_meta_human.py deleted file mode 100644 index 0ba7e49bb..000000000 --- a/sfaira/data/utils/create_meta_human.py +++ /dev/null @@ -1,62 +0,0 @@ -import sys -import tensorflow as tf -from sfaira.data import human - - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) - -ds_dict = { - "adipose": human.DatasetGroupAdipose(path=path, meta_path=path_meta), - "adrenalgland": human.DatasetGroupAdrenalgland(path=path, meta_path=path_meta), - "mixed": human.DatasetGroupMixed(path=path, meta_path=path_meta), - "artery": human.DatasetGroupArtery(path=path, meta_path=path_meta), - "bladder": human.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": human.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": human.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": human.DatasetGroupBrain(path=path, meta_path=path_meta), - "calvaria": human.DatasetGroupCalvaria(path=path, meta_path=path_meta), - "cervix": human.DatasetGroupCervix(path=path, meta_path=path_meta), - "chorionicvillus": human.DatasetGroupChorionicvillus(path=path, meta_path=path_meta), - "colon": human.DatasetGroupColon(path=path, meta_path=path_meta), - "duodenum": human.DatasetGroupDuodenum(path=path, meta_path=path_meta), - "epityphlon": human.DatasetGroupEpityphlon(path=path, meta_path=path_meta), - "esophagus": human.DatasetGroupEsophagus(path=path, meta_path=path_meta), - "eye": human.DatasetGroupEye(path=path, meta_path=path_meta), - "fallopiantube": human.DatasetGroupFallopiantube(path=path, meta_path=path_meta), - "femalegonad": human.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "gallbladder": human.DatasetGroupGallbladder(path=path, meta_path=path_meta), - "heart": human.DatasetGroupHeart(path=path, meta_path=path_meta), - "hesc": human.DatasetGroupHesc(path=path, meta_path=path_meta), - "ileum": human.DatasetGroupIleum(path=path, meta_path=path_meta), - "jejunum": human.DatasetGroupJejunum(path=path, meta_path=path_meta), - "kidney": human.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": human.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": human.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": human.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "muscle": human.DatasetGroupMuscle(path=path, meta_path=path_meta), - "omentum": human.DatasetGroupOmentum(path=path, meta_path=path_meta), - "pancreas": human.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": human.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "pleura": human.DatasetGroupPleura(path=path, meta_path=path_meta), - "prostate": human.DatasetGroupProstate(path=path, meta_path=path_meta), - "rectum": human.DatasetGroupRectum(path=path, meta_path=path_meta), - "rib": human.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": human.DatasetGroupSkin(path=path, meta_path=path_meta), - "spinalcord": human.DatasetGroupSpinalcord(path=path, meta_path=path_meta), - "spleen": human.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": human.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": human.DatasetGroupThymus(path=path, meta_path=path_meta), - "thyroid": human.DatasetGroupThyroid(path=path, meta_path=path_meta), - "trachea": human.DatasetGroupTrachea(path=path, meta_path=path_meta), - "ureter": human.DatasetGroupUreter(path=path, meta_path=path_meta), - "uterus": human.DatasetGroupUterus(path=path, meta_path=path_meta), -} -for k in list(ds_dict.keys()): - for kk in ds_dict[k].ids: - ds_dict[k].datasets[kk].write_meta(dir_out=path_meta) diff --git a/sfaira/data/utils/create_meta_mouse.py b/sfaira/data/utils/create_meta_mouse.py deleted file mode 100644 index c2ab7c4ca..000000000 --- a/sfaira/data/utils/create_meta_mouse.py +++ /dev/null @@ -1,45 +0,0 @@ -import sys -import tensorflow as tf -from sfaira.data import mouse - - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) - -ds_dict = { - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), - "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), - "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), - "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta) -} -for k in list(ds_dict.keys()): - for kk in ds_dict[k].ids: - ds_dict[k].datasets[kk].write_meta(dir_out=path_meta) diff --git a/sfaira/data/utils/write_backed_human.py b/sfaira/data/utils/write_backed_human.py index 1788f5e36..ec7b98766 100644 --- a/sfaira/data/utils/write_backed_human.py +++ b/sfaira/data/utils/write_backed_human.py @@ -1,10 +1,7 @@ +import os +import sfaira import sys import tensorflow as tf -import sfaira -import os - -from sfaira.data import human - print(tf.__version__) @@ -16,56 +13,10 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") - -ds_dict = { - "adipose": human.DatasetGroupAdipose(path=path, meta_path=path_meta), - "adrenalgland": human.DatasetGroupAdrenalgland(path=path, meta_path=path_meta), - "mixed": human.DatasetGroupMixed(path=path, meta_path=path_meta), - "artery": human.DatasetGroupArtery(path=path, meta_path=path_meta), - "bladder": human.DatasetGroupBladder(path=path, meta_path=path_meta), - "blood": human.DatasetGroupBlood(path=path, meta_path=path_meta), - "bone": human.DatasetGroupBone(path=path, meta_path=path_meta), - "brain": human.DatasetGroupBrain(path=path, meta_path=path_meta), - "calvaria": human.DatasetGroupCalvaria(path=path, meta_path=path_meta), - "cervix": human.DatasetGroupCervix(path=path, meta_path=path_meta), - "chorionicvillus": human.DatasetGroupChorionicvillus(path=path, meta_path=path_meta), - "colon": human.DatasetGroupColon(path=path, meta_path=path_meta), - "duodenum": human.DatasetGroupDuodenum(path=path, meta_path=path_meta), - "epityphlon": human.DatasetGroupEpityphlon(path=path, meta_path=path_meta), - "esophagus": human.DatasetGroupEsophagus(path=path, meta_path=path_meta), - "eye": human.DatasetGroupEye(path=path, meta_path=path_meta), - "fallopiantube": human.DatasetGroupFallopiantube(path=path, meta_path=path_meta), - "femalegonad": human.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "gallbladder": human.DatasetGroupGallbladder(path=path, meta_path=path_meta), - "heart": human.DatasetGroupHeart(path=path, meta_path=path_meta), - "hesc": human.DatasetGroupHesc(path=path, meta_path=path_meta), - "ileum": human.DatasetGroupIleum(path=path, meta_path=path_meta), - "jejunum": human.DatasetGroupJejunum(path=path, meta_path=path_meta), - "kidney": human.DatasetGroupKidney(path=path, meta_path=path_meta), - "liver": human.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": human.DatasetGroupLung(path=path, meta_path=path_meta), - "malegonad": human.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "muscle": human.DatasetGroupMuscle(path=path, meta_path=path_meta), - "omentum": human.DatasetGroupOmentum(path=path, meta_path=path_meta), - "pancreas": human.DatasetGroupPancreas(path=path, meta_path=path_meta), - "placenta": human.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "pleura": human.DatasetGroupPleura(path=path, meta_path=path_meta), - "prostate": human.DatasetGroupProstate(path=path, meta_path=path_meta), - "rectum": human.DatasetGroupRectum(path=path, meta_path=path_meta), - "rib": human.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": human.DatasetGroupSkin(path=path, meta_path=path_meta), - "spinalcord": human.DatasetGroupSpinalcord(path=path, meta_path=path_meta), - "spleen": human.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": human.DatasetGroupStomach(path=path, meta_path=path_meta), - "thymus": human.DatasetGroupThymus(path=path, meta_path=path_meta), - "thyroid": human.DatasetGroupThyroid(path=path, meta_path=path_meta), - "trachea": human.DatasetGroupTrachea(path=path, meta_path=path_meta), - "ureter": human.DatasetGroupUreter(path=path, meta_path=path_meta), - "uterus": human.DatasetGroupUterus(path=path, meta_path=path_meta), -} -ds = sfaira.data.DatasetSuperGroup( - dataset_groups=[ds_dict[k] for k in list(ds_dict.keys())] +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta ) +ds.subset(key="organism", values=["human"]) ds.load_all_tobacked( fn_backed=fn, genome=genome, diff --git a/sfaira/data/utils/write_backed_mouse.py b/sfaira/data/utils/write_backed_mouse.py index a408380e9..e8397186b 100644 --- a/sfaira/data/utils/write_backed_mouse.py +++ b/sfaira/data/utils/write_backed_mouse.py @@ -1,10 +1,7 @@ +import os +import sfaira import sys import tensorflow as tf -import sfaira -import os - -from sfaira.data import mouse - print(tf.__version__) @@ -16,39 +13,10 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") - -ds_dict = { - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=path_meta), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=path_meta), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=path_meta), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=path_meta), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=path_meta), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=path_meta), - "colon": mouse.DatasetGroupColon(path=path, meta_path=path_meta), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=path_meta), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=path_meta), - "lung": mouse.DatasetGroupLung(path=path, meta_path=path_meta), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=path_meta), - "bone": mouse.DatasetGroupBone(path=path, meta_path=path_meta), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=path_meta), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=path_meta), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=path_meta), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=path_meta), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=path_meta), - "rib": mouse.DatasetGroupRib(path=path, meta_path=path_meta), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=path_meta), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=path_meta), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=path_meta), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=path_meta), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=path_meta), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=path_meta), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=path_meta), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=path_meta), - "uterus": mouse.DatasetGroupUterus(path=path, meta_path=path_meta), -} -ds = sfaira.data.DatasetSuperGroup( - dataset_groups=[ds_dict[k] for k in list(ds_dict.keys())] +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_meta ) +ds.subset(key="organism", values=["mouse"]) ds.load_all_tobacked( fn_backed=fn, genome=genome, diff --git a/sfaira/estimators/__init__.py b/sfaira/estimators/__init__.py index 7e6f3ff03..cbed4e77b 100644 --- a/sfaira/estimators/__init__.py +++ b/sfaira/estimators/__init__.py @@ -1,5 +1,6 @@ from sfaira.estimators.keras import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype + try: - from sfaira_extension.estimators import * + from sfaira_extension.estimators import * # noqa: F403 except ImportError: pass diff --git a/sfaira/estimators/callbacks.py b/sfaira/estimators/callbacks.py index 5121b07a2..690c3d29f 100644 --- a/sfaira/estimators/callbacks.py +++ b/sfaira/estimators/callbacks.py @@ -46,15 +46,15 @@ def on_epoch_end(self, epoch, logs=None): # (epoch + 1, pseudo_inputs.max(), pseudo_inputs.mean(), pseudo_inputs.min())) if epoch == 199: lr = tf.keras.backend.get_value(self.model.optimizer.lr) - tf.keras.backend.set_value(self.model.optimizer.lr, lr/10) + tf.keras.backend.set_value(self.model.optimizer.lr, lr / 10) if self.verbose > 0: - print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr/10)) + print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr / 10)) if epoch == 249: lr = tf.keras.backend.get_value(self.model.optimizer.lr) - tf.keras.backend.set_value(self.model.optimizer.lr, lr/10) + tf.keras.backend.set_value(self.model.optimizer.lr, lr / 10) if self.verbose > 0: - print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr/10)) + print('\nReduce lr training at epoch %03d to %s' % (epoch + 1, lr / 10)) if epoch == 299: self.model.stop_training = True diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 3e27959ef..19b6fb032 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ -from sfaira.versions.celltype_versions import SPECIES_DICT, CelltypeVersionsBase +from sfaira.versions.celltype_versions import ORGANISM_DICT, CelltypeVersionsBase from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 759eb3d85..a9a04d323 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -40,7 +40,7 @@ def __init__( model_dir: Union[str, None], model_id: Union[str, None], model_class: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -55,12 +55,12 @@ def __init__( self.model_dir = model_dir self.model_id = model_id self.model_class = model_class.lower() - self.species = species.lower() + self.organism = organism.lower() self.organ = organ.lower() self.model_type = model_type.lower() self.model_topology = model_topology self.topology_container = Topologies( - species=species, + organism=organism, model_class=model_class, model_type=model_type, topology_id=model_topology @@ -104,7 +104,7 @@ def load_pretrained_weights(self): ) fn = os.path.join(self.cache_path, f"{self.model_id}_weights.data-00000-of-00001") except HTTPError: - raise FileNotFoundError(f'cannot find remote weightsfile') + raise FileNotFoundError('cannot find remote weightsfile') else: # Local repo if not self.model_dir: @@ -474,7 +474,7 @@ def __init__( data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -482,16 +482,16 @@ def __init__( cache_path: str = os.path.join('cache', '') ): super(EstimatorKerasEmbedding, self).__init__( - data=data, - model_dir=model_dir, - model_id=model_id, - model_class="embedding", - species=species, - organ=organ, - model_type=model_type, - model_topology=model_topology, - weights_md5=weights_md5, - cache_path=cache_path + data=data, + model_dir=model_dir, + model_id=model_id, + model_class="embedding", + organism=organism, + organ=organ, + model_type=model_type, + model_topology=model_topology, + weights_md5=weights_md5, + cache_path=cache_path ) def init_model( @@ -793,7 +793,7 @@ def compute_gradients_input( idx = self.idx_test if self.idx_test is None: num_samples = 10000 - idx = np.random.randint(0,self.data.X.shape[0],num_samples) + idx = np.random.randint(0, self.data.X.shape[0], num_samples) n_obs = len(idx) else: idx = None @@ -820,14 +820,14 @@ def compute_gradients_input( self.model.training_model.input, self.model.encoder_model.output[0] ) - latent_dim = self.model.encoder_model.output[0].shape[1] + latent_dim = self.model.encoder_model.output[0].shape[1] input_dim = self.model.training_model.input[0].shape[1] else: model = tf.keras.Model( self.model.training_model.input, self.model.encoder_model.output ) - latent_dim = self.model.encoder_model.output[0].shape[0] + latent_dim = self.model.encoder_model.output[0].shape[0] input_dim = self.model.training_model.input[0].shape[1] @tf.function @@ -837,14 +837,16 @@ def get_gradients(x_batch): tape.watch(x) model_out = model((x, sf)) if abs_gradients: - f = lambda x: abs(x) + def f(x): + return abs(x) else: - f = lambda x: x + def f(x): + return x # marginalize on batch level and then accumulate batches # batch_jacobian gives output of size: (batch_size, latent_dim, input_dim) batch_gradients = f(tape.batch_jacobian(model_out, x)) return batch_gradients - + for step, (x_batch, y_batch) in tqdm(enumerate(ds), total=np.ceil(n_obs / batch_size)): batch_gradients = get_gradients(x_batch).numpy() _, y = y_batch @@ -857,11 +859,11 @@ def get_gradients(x_batch): if per_celltype: for cell in cell_names: print(f'{cell} with {counts[cell]} observations') - grads_x[cell] = grads_x[cell]/counts[cell] if counts[cell] > 0 else np.zeros((latent_dim, input_dim)) - + grads_x[cell] = grads_x[cell] / counts[cell] if counts[cell] > 0 else np.zeros((latent_dim, input_dim)) + return {'gradients': grads_x, 'counts': counts} else: - return grads_x/n_obs + return grads_x / n_obs class EstimatorKerasCelltype(EstimatorKeras): @@ -876,7 +878,7 @@ def __init__( data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - species: Union[str, None], + organism: Union[str, None], organ: Union[str, None], model_type: Union[str, None], model_topology: Union[str, None], @@ -885,16 +887,16 @@ def __init__( max_class_weight: float = 1e3 ): super(EstimatorKerasCelltype, self).__init__( - data=data, - model_dir=model_dir, - model_id=model_id, - model_class="celltype", - species=species, - organ=organ, - model_type=model_type, - model_topology=model_topology, - weights_md5=weights_md5, - cache_path=cache_path + data=data, + model_dir=model_dir, + model_id=model_id, + model_class="celltype", + organism=organism, + organ=organ, + model_type=model_type, + model_topology=model_topology, + weights_md5=weights_md5, + cache_path=cache_path ) self.max_class_weight = max_class_weight @@ -916,7 +918,7 @@ def init_model( raise ValueError('unknown topology %s for EstimatorKerasCelltype' % self.model_type) self.model = Model( - species=self.species, + organism=self.organism, organ=self.organ, topology_container=self.topology_container, override_hyperpar=override_hyperpar @@ -1196,15 +1198,17 @@ def compute_gradients_input( ) for step, (x_batch, _, _) in enumerate(ds): - print("compute gradients wrt. input: batch %i / %i." % (step+1, np.ceil(n_obs / 64))) + print("compute gradients wrt. input: batch %i / %i." % (step + 1, np.ceil(n_obs / 64))) x = x_batch with tf.GradientTape(persistent=True) as tape: tape.watch(x) model_out = model(x) if abs_gradients: - f = lambda x: abs(x) + def f(x): + return abs(x) else: - f = lambda x: x + def f(x): + return x # marginalize on batch level and then accumulate batches # batch_jacobian gives output of size: (batch_size, latent_dim, input_dim) batch_gradients = f(tape.batch_jacobian(model_out, x).numpy()) diff --git a/sfaira/estimators/losses.py b/sfaira/estimators/losses.py index ee2fc03d0..ad46d3ef9 100644 --- a/sfaira/estimators/losses.py +++ b/sfaira/estimators/losses.py @@ -52,7 +52,7 @@ def call( """Implements the gaussian log likelihood loss as VAE reconstruction loss""" loc, scale = tf.split(y_pred, num_or_size_splits=2, axis=1) - ll = -tf.math.log(scale*tf.math.sqrt(2.*np.pi)) - 0.5*tf.math.square((y_true - loc) / scale) + ll = -tf.math.log(scale * tf.math.sqrt(2. * np.pi)) - 0.5 * tf.math.square((y_true - loc) / scale) ll = tf.clip_by_value(ll, -300, 300, "log_probs") neg_ll = -ll if self.average: diff --git a/sfaira/estimators/metrics.py b/sfaira/estimators/metrics.py index 74ee58b45..864eb0134 100644 --- a/sfaira/estimators/metrics.py +++ b/sfaira/estimators/metrics.py @@ -62,8 +62,8 @@ def __init__(self, name='acc_agg', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): phat_pos_agg = tf.reduce_sum(y_true * y_pred, axis=1, keepdims=True) acc_agg = tf.cast( - phat_pos_agg > tf.reduce_max((tf.ones_like(y_true) - y_true) * y_pred, axis=1), - dtype=y_true.dtype + phat_pos_agg > tf.reduce_max((tf.ones_like(y_true) - y_true) * y_pred, axis=1), + dtype=y_true.dtype ) # Do not use weighting for accuracy. self.acc_agg.assign_add(tf.reduce_mean(acc_agg)) @@ -86,8 +86,8 @@ def __init__(self, k: int, name='tpr', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): tp_by_class = tf.reduce_sum(tf.cast( - y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), - dtype=y_true.dtype + y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), + dtype=y_true.dtype ) * y_true, axis=0) fn_by_class = tf.reduce_sum(tf.cast( y_pred < tf.reduce_max(y_pred, axis=1, keepdims=True), @@ -147,8 +147,8 @@ def __init__(self, k: int, name='f1', **kwargs): def update_state(self, y_true, y_pred, sample_weight=None): tp_by_class = tf.reduce_sum(tf.cast( - y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), - dtype=y_true.dtype + y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), + dtype=y_true.dtype ) * y_true, axis=0) fp_by_class = tf.reduce_sum(tf.cast( y_pred == tf.reduce_max(y_pred, axis=1, keepdims=True), diff --git a/sfaira/genomes/generate_feature_list.py b/sfaira/genomes/generate_feature_list.py index 3cb73d555..838f53e87 100644 --- a/sfaira/genomes/generate_feature_list.py +++ b/sfaira/genomes/generate_feature_list.py @@ -5,11 +5,11 @@ class ExtractFeatureList: gene_table: Union[None, pandas.DataFrame] - species: Union[None, str] + organism: Union[None, str] release: Union[None, str] def __init__(self): - self.species = None + self.organism = None self.release = None self.gene_table = None @@ -39,7 +39,7 @@ def from_ensemble_gtf( :return: """ gtf_name = fn.split("/")[-1] - self.species = gtf_name.split(".")[0] + self.organism = gtf_name.split(".")[0] self.release = "_".join(gtf_name.split(".")[1:-1]) tab = pandas.read_table( @@ -63,4 +63,4 @@ def reduce_types_protein_coding(self): self.reduce_types(types=["protein_coding"]) def write_gene_table_to_csv(self, path): - self.gene_table.to_csv(path_or_buf=path + self.species + "_" + self.release + ".csv") + self.gene_table.to_csv(path_or_buf=path + self.organism + "_" + self.release + ".csv") diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py index 5e70f72b4..51dee4b72 100644 --- a/sfaira/interface/__init__.py +++ b/sfaira/interface/__init__.py @@ -1 +1,2 @@ +from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype from sfaira.interface.user_interface import UserInterface diff --git a/sfaira/interface/external.py b/sfaira/interface/external.py deleted file mode 100644 index fdb52e721..000000000 --- a/sfaira/interface/external.py +++ /dev/null @@ -1,5 +0,0 @@ -from sfaira.estimators import EstimatorKeras, EstimatorKerasEmbedding, EstimatorKerasCelltype -import sfaira.versions.celltype_versions as celltype_versions -from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.topology_versions import Topologies -from sfaira.data.interactive import DatasetInteractive diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 94612b2d1..2aab74632 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -7,7 +7,8 @@ import pandas as pd from typing import List, Union -from .external import celltype_versions, Topologies +from sfaira.versions.celltype_versions import ORGANISM_DICT +from sfaira.versions.topology_versions import Topologies class ModelZoo(abc.ABC): @@ -18,7 +19,7 @@ class ModelZoo(abc.ABC): ontology: dict model_id: Union[str, None] model_class: Union[str, None] - species: Union[str, None] + organism: Union[str, None] organ: Union[str, None] model_class: Union[str, None] model_type: Union[str, None] @@ -37,7 +38,7 @@ def __init__( self.ontology = self.load_ontology_from_model_ids(model_lookuptable['model_id'].values) self.model_id = None self.model_class = None - self.species = None + self.organism = None self.organ = None self.model_type = None self.organisation = None @@ -80,7 +81,7 @@ def set_model_id( self.model_id = model_id ixs = self.model_id.split('_') self.model_class = ixs[0] - self.species = ixs[1] + self.organism = ixs[1] self.organ = ixs[2] self.model_type = ixs[3] self.organisation = ixs[4] @@ -88,7 +89,7 @@ def set_model_id( self.model_version = ixs[6] self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class=self.model_class, model_type=self.model_type, topology_id=self.model_topology @@ -129,114 +130,115 @@ def call_kipoi(self): with_dataloader=True ) # TODO make sure that this is in line with kipoi_experimental model names # alternatively: - #return kipoi_experimental.get_model("https://github.com/kipoi/models/tree/7d3ea7800184de414aac16811deba6c8eefef2b6/pwm_HOCOMOCO/human/CTCF", source='github-permalink') + # return kipoi_experimental.get_model("https://github.com/kipoi/models/tree/7d3ea7800184de414aac16811deba6c8eefef2b6/pwm_HOCOMOCO/human/CTCF", + # source='github-permalink') - def species(self) -> List[str]: + def organism(self) -> List[str]: """ - Return list of available species. + Return list of available organism. - :return: List of species available. + :return: List of organism available. """ return self.ontology.keys() def organs( self, - species: str + organism: str ) -> List[str]: """ - Return list of available organs for a given species. + Return list of available organs for a given organism. - :param species: Identifier of species to show organs for. + :param organism: Identifier of organism to show organs for. :return: List of organs available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - return self.ontology[species].keys() + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + return self.ontology[organism].keys() def models( self, - species: str, + organism: str, organ: str ) -> List[str]: """ - Return list of available models for a given species, organ. + Return list of available models for a given organism, organ. - :param species: Identifier of species to show organs for. + :param organism: Identifier of organism to show organs for. :param organ: Identifier of organ to show versions for. :return: List of models available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - return self.ontology[species][organ].keys() + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + return self.ontology[organism][organ].keys() def organisation( self, - species: str, + organism: str, organ: str, model_type: str ) -> List[str]: """ - Return list of available organisation that trained a given model for a given species and organ + Return list of available organisation that trained a given model for a given organism and organ - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - return self.ontology[species][organ][model_type] + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + return self.ontology[organism][organ][model_type] def topology( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str ) -> List[str]: """ Return list of available model topologies that trained by a given organisation, - a given model for a given species and organ + a given model for a given organism and organ - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - return self.ontology[species][organ][model_type][organisation] + return self.ontology[organism][organ][model_type][organisation] def versions( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, model_topology: str ) -> List[str]: """ - Return list of available model versions of a given organisation for a given species and organ and model. + Return list of available model versions of a given organisation for a given organism and organ and model. - :param species: Identifier of species to show versions for. + :param organism: Identifier of organism to show versions for. :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :param model_topology: Identifier of model_topology to show versions for. :return: List of versions available. """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" - return self.ontology[species][organ][model_type][organisation][model_topology] + return self.ontology[organism][organ][model_type][organisation][model_topology] @property def genome(self): @@ -260,7 +262,7 @@ class ModelZooEmbedding(ModelZoo): """ The supported model ontology is: - species -> organ -> model -> organisation -> topology -> version -> ID + organism -> organ -> model -> organisation -> topology -> version -> ID Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. """ @@ -279,12 +281,12 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'embedding'] id_df = pd.DataFrame( [i.split('_')[1:7] for i in ids], - columns=['species', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - species = np.unique(id_df['species']) - ontology = dict.fromkeys(species) - for g in species: - id_df_g = id_df[id_df.species == g] + organism = np.unique(id_df['organism']) + ontology = dict.fromkeys(organism) + for g in organism: + id_df_g = id_df[id_df.organism == g] organ = np.unique(id_df_g['organ']) ontology[g] = dict.fromkeys(organ) for o in organ: @@ -307,7 +309,7 @@ def load_ontology_from_model_ids( def set_latest( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, @@ -316,38 +318,38 @@ def set_latest( """ Set model ID to latest model in given ontology group. - :param species: Identifier of species to select. + :param organism: Identifier of organism to select. :param organ: Identifier of organ to select. :param model_type: Identifier of model_type to select. :param organisation: Identifier of organisation to select. :param model_topology: Identifier of model_topology to select :return: """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - species=species, + organism=organism, organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.species = species + self.organism = organism self.organ = organ self.model_type = model_type self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be species/organ specific later + self.model_topology = model_topology # set to model for now, could be organism/organ specific later self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'embedding', - self.species, + self.organism, self.organ, self.model_type, self.organisation, @@ -355,7 +357,7 @@ def set_latest( self.model_version ]) self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class="embedding", model_type=self.model_type, topology_id=self.model_topology @@ -366,7 +368,7 @@ class ModelZooCelltype(ModelZoo): """ The supported model ontology is: - species -> organ -> model -> organisation -> topology -> version -> ID + organism -> organ -> model -> organisation -> topology -> version -> ID Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. @@ -388,12 +390,12 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'celltype'] id_df = pd.DataFrame( [i.split('_')[1:7] for i in ids], - columns=['species', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - species = np.unique(id_df['species']) - ontology = dict.fromkeys(species) - for g in species: - id_df_g = id_df[id_df.species == g] + organism = np.unique(id_df['organism']) + ontology = dict.fromkeys(organism) + for g in organism: + id_df_g = id_df[id_df.organism == g] organ = np.unique(id_df_g['organ']) ontology[g] = dict.fromkeys(organ) for o in organ: @@ -416,7 +418,7 @@ def load_ontology_from_model_ids( def set_latest( self, - species: str, + organism: str, organ: str, model_type: str, organisation: str, @@ -425,39 +427,39 @@ def set_latest( """ Set model ID to latest model in given ontology group. - :param species: Identifier of species to select. + :param organism: Identifier of organism to select. :param organ: Identifier of organ to select. :param model_type: Identifier of model_type to select. :param organisation: Identifier of organisation to select. :param model_topology: Identifier of model_topology to select :return: """ - assert species in self.ontology.keys(), "species requested was not found in ontology" - assert organ in self.ontology[species].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[species][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[species][organ][model_type].keys(), \ + assert organism in self.ontology.keys(), "organism requested was not found in ontology" + assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" + assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[organism][organ][model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[species][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - species=species, + organism=organism, organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.species = species + self.organism = organism self.organ = organ self.model_type = model_type self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be species/organ specific later + self.model_topology = model_topology # set to model for now, could be organism/organ specific later self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'celltype', - self.species, + self.organism, self.organ, self.model_type, self.organisation, @@ -465,9 +467,9 @@ def set_latest( self.model_version ]) self.topology_container = Topologies( - species=self.species, + organism=self.organism, model_class="celltype", model_type=self.model_type, topology_id=self.model_topology ) - self.celltypes = celltype_versions.SPECIES_DICT[self.species][self.organ].celltype_universe[self.model_version.split(".")[0]] + self.celltypes = ORGANISM_DICT[self.organism][self.organ].celltype_universe[self.model_version.split(".")[0]] diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index ef0e1ad70..a7b70a7de 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -9,8 +9,9 @@ from typing import List, Union import warnings -from .external import EstimatorKerasEmbedding, EstimatorKerasCelltype, DatasetInteractive -from .model_zoo import ModelZooEmbedding, ModelZooCelltype +from sfaira.data import DatasetInteractive +from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype +from sfaira.interface.model_zoo import ModelZooEmbedding, ModelZooCelltype class UserInterface: @@ -25,8 +26,8 @@ class UserInterface: # initialise your sfaira instance with a model lookuptable. # instead of setting `custom_repo` when initialising the UI you can also use `sfaira_repo=True` to use public weights ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) - ui.zoo_embedding.set_latest(species, organ, model_type, organisation, model_topology) - ui.zoo_celltype.set_latest(species, organ, model_type, organisation, model_topology) + ui.zoo_embedding.set_latest(organism, organ, model_type, organisation, model_topology) + ui.zoo_celltype.set_latest(organism, organ, model_type, organisation, model_topology) ui.load_data(anndata.read("/path/to/file.h5ad")) # load your dataset into sfaira ui.load_model_embedding() ui.load_model_celltype() @@ -142,9 +143,9 @@ def write_lookuptable( if ids: pd.DataFrame( - list(zip(ids_cleaned, model_paths, file_paths, md5)), - columns=['model_id', 'model_path', 'model_file_path', 'md5'] - )\ + list(zip(ids_cleaned, model_paths, file_paths, md5)), + columns=['model_id', 'model_path', 'model_file_path', 'md5'] + )\ .sort_values('model_id')\ .reset_index(drop=True)\ .to_csv(os.path.join(repo_path, 'model_lookuptable.csv')) @@ -171,11 +172,17 @@ def deposit_zenodo( :param zenodo_access_token: Your personal Zenodo API access token. Create one here: https://zenodo.org/account/settings/applications/tokens/new/ :param title: Title of the Zenodo deposition - :param authors: List of dicts, where each dict defines one author (dict keys: name: Name of creator in the format "Family name, Given names", affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), gnd: GND identifier of creator (optional) + :param authors: List of dicts, where each dict defines one author (dict keys: + name: Name of creator in the format "Family name, Given names", + affiliation: Affiliation of creator (optional), orcid: ORCID identifier of creator (optional), + gnd: GND identifier of creator (optional) :param description: Description of the Zenodo deposition. - :param metadata: Dictionary with further metadata attributes of the deposit. See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation - :param publish: Set this to True to directly publish the weights on Zenodo. When set to False a draft will be created, which can be edited in the browser before publishing. - :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. + :param metadata: Dictionary with further metadata attributes of the deposit. + See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation + :param publish: Set this to True to directly publish the weights on Zenodo. + When set to False a draft will be created, which can be edited in the browser before publishing. + :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. + We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. """ import requests @@ -233,7 +240,7 @@ def deposit_zenodo( 'license': 'cc-by-4.0', 'upload_type': 'dataset', 'access_right': 'open' - } + } meta = {**meta_core, **metadata} r = requests.put(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}', params=params, @@ -269,22 +276,30 @@ def load_data( self, data: anndata.AnnData, gene_symbol_col: Union[str, None] = None, - gene_ens_col: Union[str, None] = None + gene_ens_col: Union[str, None] = None, + remove_gene_version: bool = True, + match_to_reference: Union[str, None] = None, ): """ Loads the provided AnnData object into sfaira. - If genes in the provided AnnData object are annotated as gene symbols, please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. - If genes in the provided AnnData object are annotated as ensembl ids, please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. + + If genes in the provided AnnData object are annotated as gene symbols, + please provide the name of the corresponding var column (or 'index') through the gene_symbol_col argument. + If genes in the provided AnnData object are annotated as ensembl ids, + please provide the name of the corresponding var column (or 'index') through the gene_ens_col argument. You need to provide at least one of the two. :param data: AnnData object to load :param gene_symbol_col: Var column name (or 'index') which contains gene symbols :param gene_ens_col: ar column name (or 'index') which contains ensembl ids + :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different + data sets are superimposed. + :param match_to_reference: Reference genomes name. """ - if self.zoo_embedding.species is not None: - species = self.zoo_embedding.species + if self.zoo_embedding.organism is not None: + organism = self.zoo_embedding.organism organ = self.zoo_embedding.organ - elif self.zoo_celltype.species is not None: - species = self.zoo_celltype.species + elif self.zoo_celltype.organism is not None: + organism = self.zoo_celltype.organism organ = self.zoo_celltype.organ else: raise ValueError("Please first set which model_id to use via the model zoo before loading the data") @@ -293,13 +308,20 @@ def load_data( raise ValueError("Please provide either the gene_ens_col or the gene_symbol_col argument.") dataset = DatasetInteractive( - data=data, - species=species, - organ=organ, - gene_symbol_col=gene_symbol_col, - gene_ens_col=gene_ens_col - ) - dataset.load() + data=data, + organism=organism, + organ=organ, + gene_symbol_col=gene_symbol_col, + gene_ens_col=gene_ens_col + ) + dataset.load( + celltype_version=None, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=False, + allow_caching=False, + ) self.data = dataset.adata def filter_cells(self): @@ -326,7 +348,7 @@ def load_model_embedding(self): data=self.data, model_dir=model_dir, model_id=self.zoo_embedding.model_id, - species=self.zoo_embedding.species, + organism=self.zoo_embedding.organism, organ=self.zoo_embedding.organ, model_type=self.zoo_embedding.model_type, model_topology=self.zoo_embedding.model_topology, @@ -351,7 +373,7 @@ def load_model_celltype(self): data=self.data, model_dir=model_dir, model_id=self.zoo_celltype.model_id, - species=self.zoo_celltype.species, + organism=self.zoo_celltype.organism, organ=self.zoo_celltype.organ, model_type=self.zoo_celltype.model_type, model_topology=self.zoo_celltype.model_topology, diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index c5c2ca03d..ac8d4da39 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -11,6 +11,7 @@ class LearnedThresholdLayer(tf.keras.layers.Layer): """ A layer that thresholds the input with a learned threshold. """ + def __init__( self, out_dim, @@ -97,7 +98,7 @@ class CellTypeMarkerVersioned(CellTypeMarker): def __init__( self, - species: str, + organism: str, organ: str, topology_container: Topologies, override_hyperpar: Union[dict, None] = None @@ -110,8 +111,8 @@ def __init__( :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ - # Get cell type version instance based on topology ID, species and organ. - self.celltypes_version = celltype_versions.SPECIES_DICT[species.lower()][organ.lower()] + # Get cell type version instance based on topology ID, organism and organ. + self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] self.celltypes_version.set_version(version=topology_container.topology_id) unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) @@ -119,8 +120,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - CellTypeMarker.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, **hyperpar @@ -131,7 +131,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index b94fc4226..9b296862c 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -73,7 +73,7 @@ class CellTypeMlpVersioned(CellTypeMlp): def __init__( self, - species: str, + organism: str, organ: str, topology_container: Topologies, override_hyperpar: Union[dict, None] = None @@ -86,8 +86,8 @@ def __init__( :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ - # Get cell type version instance based on topology ID, species and organ. - self.celltypes_version = celltype_versions.SPECIES_DICT[species.lower()][organ.lower()] + # Get cell type version instance based on topology ID, organism and organ. + self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] self.celltypes_version.set_version(version=topology_container.topology_id) unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) @@ -95,8 +95,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - CellTypeMlp.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, **hyperpar @@ -107,7 +106,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index 27b4d069c..58fa5dc47 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -58,8 +58,8 @@ def __init__( self.layer_list.append(tf.keras.layers.Dropout(hid_drop, name='enc_%s_drop' % i)) def call(self, x, **kwargs): - for l in self.layer_list: - x = l(x) + for layer in self.layer_list: + x = layer(x) return x @@ -105,8 +105,8 @@ def __init__( self.layer_list.append(tf.keras.layers.Dropout(hid_drop, name='dec_%s_drop' % i)) def call(self, x, **kwargs): - for l in self.layer_list: - x = l(x) + for layer in self.layer_list: + x = layer(x) return x @@ -214,8 +214,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelAe.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -225,7 +224,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 93fd33ee6..72ac6f8e2 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -109,8 +109,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelLinear.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -120,7 +119,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/output_layers.py b/sfaira/models/embedding/output_layers.py index d4192c7c6..65ac4a56a 100644 --- a/sfaira/models/embedding/output_layers.py +++ b/sfaira/models/embedding/output_layers.py @@ -3,6 +3,7 @@ class NegBinOutput(tf.keras.layers.Layer): """Negative binomial output layer""" + def __init__( self, original_dim=None, @@ -203,4 +204,4 @@ def call(self, inputs, **kwargs): invlinker_mean = mean_clip + sf invlinker_var = tf.exp(var_clip) - return [invlinker_mean, invlinker_var] \ No newline at end of file + return [invlinker_mean, invlinker_var] diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index b24b0d75e..f122d670b 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -161,7 +161,6 @@ def __init__( else: raise ValueError("len(latent_dim)=%i should be uneven to provide a defined bottleneck" % len(latent_dim)) - inputs_encoder = tf.keras.Input(shape=(in_dim,), name='counts') inputs_sf = tf.keras.Input(shape=(1,), name='size_factors') inputs_encoder_pp = PreprocInput()(inputs_encoder) @@ -237,8 +236,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVae.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -248,7 +246,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 3cc518fdc..80436e1a3 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -96,17 +96,17 @@ def call(self, inputs, **kwargs): class IAF(tf.keras.layers.Layer): def __init__( - self, - bottleneck: int, - n_iaf: int, - l1_coef: float, - l2_coef: float, - masking_dim=320, - n_made=2, - activation="relu", - name='iaf', - **kwargs - ): + self, + bottleneck: int, + n_iaf: int, + l1_coef: float, + l2_coef: float, + masking_dim=320, + n_made=2, + activation="relu", + name='iaf', + **kwargs + ): """ Transforms latent space with simple distribution to one with a more flexible one. @@ -241,7 +241,6 @@ def __init__( else: raise ValueError("len(latent_dim)=%i should be uneven to provide a defined bottleneck" % len(latent_dim)) - inputs_encoder = tf.keras.Input(shape=(in_dim,), name='counts') inputs_sf = tf.keras.Input(shape=(1,), name='size_factors') inputs_encoder_pp = PreprocInput()(inputs_encoder) @@ -255,7 +254,7 @@ def __init__( kernel_initializer=init ) iaf = IAF( - bottleneck=latent_dim[n_layers_enc-1], + bottleneck=latent_dim[n_layers_enc - 1], n_iaf=n_iaf, l1_coef=l1_coef, l2_coef=l2_coef @@ -284,8 +283,8 @@ def __init__( z, s_t_sigmas = iaf([z, h]) z_t_square_mc += tf.square(z) z_t_mean += z - z_t_square_mc = z_t_square_mc/mc_samples - z_t_mean = z_t_mean/mc_samples + z_t_square_mc = z_t_square_mc / mc_samples + z_t_mean = z_t_mean / mc_samples cum_s_t_log_var = 0 for s_t_sigma in s_t_sigmas: @@ -355,8 +354,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVaeIAF.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -366,7 +364,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index 55ce3bd47..db9d75c81 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -298,8 +298,7 @@ def __init__( if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] - ModelVaeVamp.__init__( - self=self, + super().__init__( in_dim=topology_container.ngenes, **hyperpar ) @@ -310,7 +309,7 @@ def __init__( self.model_class = topology_container.model_class self.model_type = topology_container.model_type self.hyperparam = dict( - list(hyperpar.items()) + + list(hyperpar.items()) + # noqa: W504 [ ("topology_id", self._topology_id), ("genome_size", self.genome_size), diff --git a/sfaira/models/made.py b/sfaira/models/made.py index fb08e2b67..eec724905 100644 --- a/sfaira/models/made.py +++ b/sfaira/models/made.py @@ -59,6 +59,7 @@ def __init__(self, units, out_units, self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.batchnorm = batchnorm + def dropout_wrapper(self, inputs, training): if 0. < self.rate < 1.: def dropped_inputs(): diff --git a/sfaira/train/external.py b/sfaira/train/external.py deleted file mode 100644 index 158904c07..000000000 --- a/sfaira/train/external.py +++ /dev/null @@ -1,5 +0,0 @@ -from sfaira.versions.celltype_versions import SPECIES_DICT -from sfaira.data import DatasetGroupBase, DatasetSuperGroup -from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype -from sfaira.data import mouse, human diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 86119aa17..74cdc6cb9 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -6,10 +6,10 @@ import warnings from typing import Union, List import os -from .train_model import TargetZoos -from .external import SPECIES_DICT -from .external import EstimatorKerasEmbedding +from sfaira.train.train_model import TargetZoos +from sfaira.versions.celltype_versions import ORGANISM_DICT +from sfaira.estimators import EstimatorKerasEmbedding def _tp(yhat, ytrue): @@ -666,15 +666,15 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], "model_type": [ "linear" if (id_i.split("_")[3] == "mlp" and id_i.split("_")[5].split(".")[1] == "0") else id_i.split("_")[3] @@ -682,10 +682,10 @@ def create_summary_tab(self): ], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids - }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + - list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + + }.items()) + # noqa: W504 + list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 + list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 + list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 list(dict([("all_" + m, [self.evals[x]["all"][m] for x in self.run_ids]) for m in metrics]).items()) )) if self.summary_tab.shape[0] == 0: @@ -808,7 +808,7 @@ def plot_best( fig, axs = plt.subplots(1, 1, figsize=(height_fig, width_fig)) with sns.axes_style("dark"): axs = sns.heatmap( - sns_data_heatmap, #mask=mask, + sns_data_heatmap, # mask=mask, annot=True, fmt=".2f", ax=axs, vmin=0, vmax=1, xticklabels=True, yticklabels=True, @@ -835,7 +835,7 @@ def plot_best_classwise_heatmap( Plot evaluation metric heatmap for specified organ by cell classes and model types. :param organ: Organ to plot in heatmap. - :param organism: Species that the gridsearch was run on + :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository :param celltype_version: Version in sfaira celltype universe :param partition_select: Based on which partition to select the best model @@ -883,11 +883,11 @@ def plot_best_classwise_heatmap( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all() + raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = SPECIES_DICT.copy() + celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] @@ -900,7 +900,7 @@ def plot_best_classwise_heatmap( for leaf in ontology[k]: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1/len(ontology[k]) + cell_counts[leaf] += 1 / len(ontology[k]) del cell_counts[k] # Compute class-wise metrics @@ -998,7 +998,7 @@ def plot_best_classwise_scatter( Plot evaluation metric scatterplot for specified organ by cell classes and model types. :param organ: Organ to plot in heatmap. - :param organism: Species that the gridsearch was run on + :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository :param celltype_version: Version in sfaira celltype universe :param partition_select: Based on which partition to select the best model @@ -1048,11 +1048,11 @@ def plot_best_classwise_scatter( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all() + raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = SPECIES_DICT.copy() + celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] @@ -1065,7 +1065,7 @@ def plot_best_classwise_scatter( for leaf in ontology[k]: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1/len(ontology[k]) + cell_counts[leaf] += 1 / len(ontology[k]) del cell_counts[k] # Compute class-wise metrics @@ -1175,23 +1175,31 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], + "organ": [id_i.split("_")[2] for id_i in self.run_ids], + "model_type": [id_i.split("_")[3] for id_i in self.run_ids], "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], "run": self.run_ids, - }.items()) + - list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() else self.evals[x]["train"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() else self.evals[x]["test"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() else self.evals[x]["val"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() else self.evals[x]["all"]['neg_ll_'+m] for x in self.run_ids]) for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + }.items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() + else self.evals[x]["train"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() + else self.evals[x]["test"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() + else self.evals[x]["val"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() + else self.evals[x]["all"]['neg_ll_' + m] for x in self.run_ids]) + for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models )) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models @@ -1292,7 +1300,7 @@ def plot_best( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), f"{partition_show}_{metric_show}" + ), f"{partition_show}_{metric_show}" ] if data_temp.shape[0] > 0: if self.cv: @@ -1319,7 +1327,7 @@ def plot_best( fig, axs = plt.subplots(1, 1, figsize=(height_fig, width_fig)) with sns.axes_style("dark"): axs = sns.heatmap( - sns_data_heatmap, #mask=mask, + sns_data_heatmap, # mask=mask, annot=True, fmt=".2f", ax=axs, xticklabels=True, yticklabels=True, @@ -1381,18 +1389,17 @@ def get_gradients_by_celltype( elif organism == "mouse": dataset = tz.data_mouse[organ] else: - raise (ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'human')")) - dataset.load_all(annotated_only=True) + raise (ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset.load(annotated_only=True) print('Compute gradients (2/3): load embedding') # load embedding adata = dataset.adata - topology = model_id embedding = EstimatorKerasEmbedding( data=adata, model_dir="", model_id="", - species=organism, + organism=organism, organ=organ, model_type=model_type, model_topology=model_id.split('_')[5] @@ -1471,10 +1478,10 @@ def plot_gradient_distr( if normalize: avg_grads[modelt] = np.abs(avg_grads[modelt]) avg_grads[modelt] = (avg_grads[modelt] - np.min(avg_grads[modelt], axis=1, keepdims=True)) / \ - np.maximum( - np.max(avg_grads[modelt], axis=1, keepdims=True) - np.min(avg_grads[modelt], - axis=1, - keepdims=True), 1e-8) + np.maximum( + np.max(avg_grads[modelt], axis=1, keepdims=True) - np.min(avg_grads[modelt], + axis=1, + keepdims=True), 1e-8) fig, axs = plt.subplots(1, 1, figsize=(width_fig, height_fig)) @@ -1613,10 +1620,10 @@ def plot_npc( """ import matplotlib.pyplot as plt if self.summary_tab is None: - self.create_summary_tab() + self.create_summary_tab() models = np.unique(self.summary_tab["model_type"]).tolist() self.summary_tab["topology"] = [x.split("_")[5] for x in self.summary_tab["model_gs_id"].values] - + with plt.style.context("seaborn-whitegrid"): plt.figure(figsize=(12, 6)) for model in models: @@ -1635,7 +1642,7 @@ def plot_npc( eig_sum = sum(eig_vals) var_exp = [(i / eig_sum) for i in sorted(eig_vals, reverse=True)] cum_var_exp = np.cumsum([0] + var_exp) - plt.step(range(0, eig_vals.shape[0]+1), cum_var_exp, where="post", linewidth=3, + plt.step(range(0, eig_vals.shape[0] + 1), cum_var_exp, where="post", linewidth=3, label="%s cumulative explained variance (95%%: %s / 99%%: %s)" % (model, np.sum(cum_var_exp < .95), np.sum(cum_var_exp < .99))) plt.yticks([0.0, .25, .50, .75, .95, .99]) plt.ylabel("Explained variance ratio", fontsize=16) @@ -1645,8 +1652,8 @@ def plot_npc( plt.show() def plot_active_latent_units( - self, - organ, + self, + organ, topology_version, cvs=None ): @@ -1664,7 +1671,7 @@ def active_latent_units_mask(z): min_var_x = 0.01 active_units_mask = var_x > min_var_x return active_units_mask - + import matplotlib.pyplot as plt if self.summary_tab is None: self.create_summary_tab() @@ -1676,11 +1683,11 @@ def active_latent_units_mask(z): plt.axhline(np.log(0.01), color="k", linestyle='dashed', linewidth=2, label="active unit threshold") for i, model in enumerate(models): model_id, embedding, covar = self.best_model_embedding( - subset={"model_type": model, "organ": organ, "topology": topology_version}, - partition="val", - metric="loss", - cvs=cvs, - ) + subset={"model_type": model, "organ": organ, "topology": topology_version}, + partition="val", + metric="loss", + cvs=cvs, + ) if len(embedding[0].shape) == 3: z = embedding[0][0] # in case of three-dimensional VAE embedding (z, z_mean, z_var), use z else: @@ -1690,7 +1697,7 @@ def active_latent_units_mask(z): log_var = np.log(var) active_units = np.log(var[active_latent_units_mask(z)]) - plt.plot(range(1,log_var.shape[0]+1), log_var, color=colors[i], alpha=1.0, linewidth=3, + plt.plot(range(1, log_var.shape[0] + 1), log_var, color=colors[i], alpha=1.0, linewidth=3, label="%s active units: %i" % (model, len(active_units))) # to plot vertical lines log_var_cut = var.copy() @@ -1698,13 +1705,13 @@ def active_latent_units_mask(z): log_var_cut = np.log(log_var_cut) num_active = np.argmax(log_var_cut) if num_active > 0: - plt.vlines(num_active, ymin = -.15, ymax = 0.15, color=colors[i], linestyle='solid', linewidth=3) + plt.vlines(num_active, ymin=-.15, ymax=0.15, color=colors[i], linestyle='solid', linewidth=3) if model == "vaevamp": - z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))),2) - plt.plot(range(1, int(latent_dim/2)+1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, - label=r"%s $z_2$ active units: %i" % (model, len(z2[z2>np.log(0.01)])), linestyle='dashed', + z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))), 2) + plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, + label=r"%s $z_2$ active units: %i" % (model, len(z2[z2 > np.log(0.01)])), linestyle='dashed', linewidth=3) - plt.plot(range(1, int(latent_dim/2)+1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, + plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, label=r"%s $z_1$ active units: %i" % (model, len(z1[z1 > np.log(0.01)])), linestyle='dotted', linewidth=3) plt.xlabel(r'Latent unit $i$', fontsize=16) diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index c6125d400..45abfb1bb 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,103 +5,103 @@ import pickle from typing import Union -from .external import DatasetGroupBase, DatasetSuperGroup -from .external import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from .external import ModelZoo, ModelZooEmbedding, ModelZooCelltype -from .external import mouse, human -from .external import SPECIES_DICT +from sfaira.data import DatasetGroup, DatasetSuperGroup +from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.versions.celltype_versions import ORGANISM_DICT class TargetZoos: """ - Class that provides access to all available dataset groups in sfaira. + Class that provides access to all available dataset human in sfaira. Parameters ---------- path : str - The name of the animal + Path to the files for this dataset on disk meta_path : str - The sound the animal makes + Path to the meta files for this dataset on disk """ - def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None): + def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None): if path is not None: + from sfaira.data.dataloaders.anatomical_groups import mouse, human self.data_mouse = { - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path), - "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path), - "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path), - "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path), - "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path), - "uterus": mouse.DatasetGroupUterus(path=path) + "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), + "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), + "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path), + "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), + "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), + "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), + "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), + "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), + "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), + "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), + "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path), + "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), + "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), + "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), + "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), + "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), + "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), + "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), + "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), + "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), + "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), + "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), + "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path), + "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), + "uterus": mouse.DatasetGroupUterus(path=path, cache_path=cache_path), } self.data_human = { - 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path), - 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path), - 'mixed': human.DatasetGroupMixed(path=path, meta_path=meta_path), - 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path), - 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path), - 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path), - 'bone': human.DatasetGroupBone(path=path, meta_path=meta_path), - 'brain': human.DatasetGroupBrain(path=path, meta_path=meta_path), - 'calvaria': human.DatasetGroupCalvaria(path=path, meta_path=meta_path), - 'cervix': human.DatasetGroupCervix(path=path, meta_path=meta_path), - 'chorionicvillus': human.DatasetGroupChorionicvillus(path=path, meta_path=meta_path), - 'colon': human.DatasetGroupColon(path=path, meta_path=meta_path), - 'duodenum': human.DatasetGroupDuodenum(path=path, meta_path=meta_path), - 'epityphlon': human.DatasetGroupEpityphlon(path=path, meta_path=meta_path), - 'esophagus': human.DatasetGroupEsophagus(path=path, meta_path=meta_path), - 'eye': human.DatasetGroupEye(path=path, meta_path=meta_path), - 'fallopiantube': human.DatasetGroupFallopiantube(path=path, meta_path=meta_path), - 'femalegonad': human.DatasetGroupFemalegonad(path=path, meta_path=meta_path), - 'gallbladder': human.DatasetGroupGallbladder(path=path, meta_path=meta_path), - 'heart': human.DatasetGroupHeart(path=path, meta_path=meta_path), - 'hesc': human.DatasetGroupHesc(path=path, meta_path=meta_path), - 'ileum': human.DatasetGroupIleum(path=path, meta_path=meta_path), - 'jejunum': human.DatasetGroupJejunum(path=path, meta_path=meta_path), - 'kidney': human.DatasetGroupKidney(path=path, meta_path=meta_path), - 'liver': human.DatasetGroupLiver(path=path, meta_path=meta_path), - 'lung': human.DatasetGroupLung(path=path, meta_path=meta_path), - 'malegonad': human.DatasetGroupMalegonad(path=path, meta_path=meta_path), - 'muscle': human.DatasetGroupMuscle(path=path, meta_path=meta_path), - 'omentum': human.DatasetGroupOmentum(path=path, meta_path=meta_path), - 'pancreas': human.DatasetGroupPancreas(path=path, meta_path=meta_path), - 'placenta': human.DatasetGroupPlacenta(path=path, meta_path=meta_path), - 'pleura': human.DatasetGroupPleura(path=path, meta_path=meta_path), - 'prostate': human.DatasetGroupProstate(path=path, meta_path=meta_path), - 'rectum': human.DatasetGroupRectum(path=path, meta_path=meta_path), - 'rib': human.DatasetGroupRib(path=path, meta_path=meta_path), - 'skin': human.DatasetGroupSkin(path=path, meta_path=meta_path), - 'spinalcord': human.DatasetGroupSpinalcord(path=path, meta_path=meta_path), - 'spleen': human.DatasetGroupSpleen(path=path, meta_path=meta_path), - 'stomach': human.DatasetGroupStomach(path=path, meta_path=meta_path), - 'thymus': human.DatasetGroupThymus(path=path, meta_path=meta_path), - 'thyroid': human.DatasetGroupThyroid(path=path, meta_path=meta_path), - 'trachea': human.DatasetGroupTrachea(path=path, meta_path=meta_path), - 'ureter': human.DatasetGroupUreter(path=path, meta_path=meta_path), - 'uterus': human.DatasetGroupUterus(path=path, meta_path=meta_path), + 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), + 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path), + 'mixed': human.DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path), + 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path), + 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), + 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), + 'bone': human.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), + 'brain': human.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), + 'calvaria': human.DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path), + 'cervix': human.DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path), + 'chorionicvillus': human.DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path), + 'colon': human.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), + 'duodenum': human.DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path), + 'epityphlon': human.DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path), + 'esophagus': human.DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path), + 'eye': human.DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path), + 'fallopiantube': human.DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path), + 'femalegonad': human.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + 'gallbladder': human.DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path), + 'heart': human.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), + 'hesc': human.DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path), + 'ileum': human.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), + 'jejunum': human.DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path), + 'kidney': human.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), + 'liver': human.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), + 'lung': human.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), + 'malegonad': human.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), + 'muscle': human.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), + 'omentum': human.DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path), + 'pancreas': human.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), + 'placenta': human.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), + 'pleura': human.DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path), + 'prostate': human.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), + 'rectum': human.DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path), + 'rib': human.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), + 'skin': human.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), + 'spinalcord': human.DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path), + 'spleen': human.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), + 'stomach': human.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), + 'thymus': human.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), + 'thyroid': human.DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path), + 'trachea': human.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), + 'ureter': human.DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path), + 'uterus': human.DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path), } - + else: self.data_human = None self.data_mouse = None @@ -116,8 +116,8 @@ def write_celltypes_tocsv_human(self, fn: str): ds = self.data_human[x] self._write_celltypes_tocsv(fn, x, ds) - def _write_celltypes_tocsv(self, fn: str, x: str, ds: DatasetGroupBase): - ds.load_all(annotated_only=True, remove_gene_version=False, match_to_reference=None) + def _write_celltypes_tocsv(self, fn: str, x: str, ds: DatasetGroup): + ds.load(annotated_only=True, remove_gene_version=False, match_to_reference=None) if len(ds.adata_ls) > 0: obs = ds.obs_concat(keys=["cell_ontology_class", "cell_ontology_id"]) obs.index = range(0, obs.shape[0]) @@ -159,7 +159,7 @@ class TrainModel(TargetZoos): estimator: Union[None, EstimatorKeras] zoo: Union[None, ModelZoo] model_dir: str - data: Union[DatasetGroupBase, DatasetSuperGroup, anndata.AnnData, str, None] + data: Union[DatasetGroup, DatasetSuperGroup, anndata.AnnData, str, None] def __init__(self, data_path: str, meta_path: str): # Check if handling backed anndata or base path to directory of raw files: @@ -187,7 +187,7 @@ def adata(self): raise ValueError("self.data not set yet") elif isinstance(self.data, anndata.AnnData): return self.data - elif isinstance(self.data, DatasetGroupBase) or isinstance(self.data, DatasetSuperGroup): + elif isinstance(self.data, DatasetGroup) or isinstance(self.data, DatasetSuperGroup): return self.data.adata else: raise ValueError("self.data type not recognized: %s " % type(self.data)) @@ -200,7 +200,7 @@ def human_target(self, organ: str): def set_data( self, - data_group: Union[DatasetGroupBase, DatasetSuperGroup] + data_group: Union[DatasetGroup, DatasetSuperGroup] ): """ Set input data group. @@ -260,7 +260,7 @@ def init_estim( data=self.adata, model_dir=self.model_dir, model_id=self.zoo.model_id, - species=self.zoo.species, + organism=self.zoo.organism, organ=self.zoo.organ, model_type=self.zoo.model_type, model_topology=self.zoo.model_topology @@ -327,7 +327,7 @@ def init_estim( data=self.adata, model_dir=self.model_dir, model_id=self.zoo.model_id, - species=self.zoo.species, + organism=self.zoo.organism, organ=self.zoo.organ, model_type=self.zoo.model_type, model_topology=self.zoo.model_topology @@ -379,10 +379,10 @@ def _save_specific( cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() - celltype_versions = SPECIES_DICT.copy() - celltype_versions[self.zoo.species][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) - leafnodes = celltype_versions[self.zoo.species][self.zoo.organ].ids - ontology = celltype_versions[self.zoo.species][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] + celltype_versions = ORGANISM_DICT.copy() + celltype_versions[self.zoo.organism][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) + leafnodes = celltype_versions[self.zoo.organism][self.zoo.organ].ids + ontology = celltype_versions[self.zoo.organism][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] for k in cell_counts.keys(): if k not in leafnodes: if k not in ontology.keys(): @@ -390,7 +390,7 @@ def _save_specific( for leaf in ontology[k]: if leaf not in cell_counts_leaf.keys(): cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1/len(ontology[k]) + cell_counts_leaf[leaf] += 1 / len(ontology[k]) del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py new file mode 100644 index 000000000..5993e7e88 --- /dev/null +++ b/sfaira/unit_tests/test_data_template.py @@ -0,0 +1,49 @@ +import unittest + +from sfaira.data import DatasetGroupDirectoryOriented + + +class TestDatasetTemplate(unittest.TestCase): + dir_data: str = "./test_data" + dir_meta: str = "./test_data/meta" + + def test_load(self): + """ + Address ToDos before running test to customize to your data set. + :return: + """ + celltype_version = None + remove_gene_version = True + match_to_reference = None + # ToDo: add correct module here as "YOUR_STUDY": + from sfaira.data.dataloaders.loaders.YOUR_STUDY import FILE_PATH + ds = DatasetGroupDirectoryOriented( + file_base=FILE_PATH, + path=self.dir_data, + meta_path=self.dir_meta, + cache_path=self.dir_data + ) + # Test raw loading and caching: + ds.load( + celltype_version=celltype_version, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=True, # tests raw loading + allow_caching=True # tests caching + ) + # Test loading from cache: + ds.load( + celltype_version=celltype_version, + fn=None, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=False, + allow_caching=False + ) + # Test concatenation: + _ = ds.adata + + +if __name__ == '__main__': + unittest.main() diff --git a/sfaira/unit_tests/test_dataset.py b/sfaira/unit_tests/test_dataset.py index 9f745322e..763dc89f0 100644 --- a/sfaira/unit_tests/test_dataset.py +++ b/sfaira/unit_tests/test_dataset.py @@ -3,7 +3,8 @@ import scipy.sparse import unittest -from sfaira.data import mouse, DatasetSuperGroup +from sfaira.data import DatasetSuperGroup +from sfaira.data import DatasetSuperGroupSfaira class TestDatasetGroups(unittest.TestCase): @@ -11,11 +12,15 @@ class TestDatasetGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) ds.load_all() def test_adata(self): - ds = mouse.DatasetGroupBladder(path=self.dir_data, meta_path=self.dir_meta) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) _ = ds.adata @@ -24,27 +29,24 @@ class TestDatasetSuperGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all() def test_adata(self): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) _ = ds.adata def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all_tobacked( fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), genome=genome, @@ -55,11 +57,10 @@ def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) def test_load_backed_sparse(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroup( - dataset_groups=[ - mouse.DatasetGroupLung(path=self.dir_data, meta_path=self.dir_meta) - ] - ) + ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all_tobacked( fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), genome=genome, diff --git a/sfaira/unit_tests/test_estimator.py b/sfaira/unit_tests/test_estimator.py index bb9c0a15e..ddb711dad 100644 --- a/sfaira/unit_tests/test_estimator.py +++ b/sfaira/unit_tests/test_estimator.py @@ -21,8 +21,8 @@ class _TestEstimator: """ Contains functions _test* to test individual functions and attributes of estimator class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_call() for an example. """ @@ -73,7 +73,7 @@ class TestEstimatorKerasEmbedding(unittest.TestCase, _TestEstimator): def set_topology(self, model_type): self.topology_container = Topologies( - species="mouse", + organism="mouse", model_class="embedding", model_type=model_type, topology_id="0.1" @@ -84,7 +84,7 @@ def init_estimator(self): data=self.data, model_dir=None, model_id=None, - species="mouse", + organism="mouse", organ="lung", model_type=self.topology_container.model_type, model_topology=self.topology_container.topology_id @@ -134,7 +134,7 @@ class TestEstimatorKerasCelltype(unittest.TestCase, _TestEstimator): def set_topology(self, model_type): self.topology_container = Topologies( - species="mouse", + organism="mouse", model_class="celltype", model_type=model_type, topology_id="0.0.1" @@ -145,7 +145,7 @@ def init_estimator(self): data=self.data, model_dir=None, model_id=None, - species="mouse", + organism="mouse", organ="lung", model_type=self.topology_container.model_type, model_topology=self.topology_container.topology_id diff --git a/sfaira/unit_tests/test_userinterface.py b/sfaira/unit_tests/test_userinterface.py index f7b8dbc90..aa99a8ee7 100644 --- a/sfaira/unit_tests/test_userinterface.py +++ b/sfaira/unit_tests/test_userinterface.py @@ -12,8 +12,8 @@ class TestUi(unittest.TestCase): """ Contains functions _test* to test individual functions and attributes of the user interface class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_call() for an example. """ diff --git a/sfaira/unit_tests/test_zoo.py b/sfaira/unit_tests/test_zoo.py index 2fa0b0763..f1f7db52c 100644 --- a/sfaira/unit_tests/test_zoo.py +++ b/sfaira/unit_tests/test_zoo.py @@ -14,8 +14,8 @@ class _TestZoo: """ Contains functions _test* to test individual functions and attributes of estimator class. - - TODO for everybody working on this, add one _test* function in here and add it into + + TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_kipoi_call() for an example. """ @@ -56,7 +56,7 @@ def _test_basic(self, id: str): np.random.seed(1) self.simulate() self.init_zoo() - #self._test_kipoi_call() + # self._test_kipoi_call() self.zoo_manual.set_model_id(id) diff --git a/sfaira/versions/__init__.py b/sfaira/versions/__init__.py index e69de29bb..7840c39b1 100644 --- a/sfaira/versions/__init__.py +++ b/sfaira/versions/__init__.py @@ -0,0 +1,3 @@ +from . import celltype_versions +from . import genome_versions +from . import topology_versions diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py index 3e4990909..68f91677f 100644 --- a/sfaira/versions/celltype_versions/__init__.py +++ b/sfaira/versions/celltype_versions/__init__.py @@ -8,31 +8,31 @@ # Load versions from extension if available: try: - from sfaira_extension.versions.celltype_versions import SPECIES_DICT as SPECIES_DICT_EXTENSION + from sfaira_extension.versions.celltype_versions import ORGANISM_DICT as ORGANISM_DICT_EXTENSION for organ in mouse.keys(): - if organ in SPECIES_DICT_EXTENSION["mouse"].keys(): - for v in SPECIES_DICT_EXTENSION["mouse"][organ].versions: + if organ in ORGANISM_DICT_EXTENSION["mouse"].keys(): + for v in ORGANISM_DICT_EXTENSION["mouse"][organ].versions: if v in mouse[organ].celltype_universe.keys(): raise ValueError(f'Celltype version {v} already defined for mouse organ {organ} in base sfaira. ' f'Please define a new version in sfaira_extension.') else: - mouse[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["mouse"][organ].celltype_universe[v] - mouse[organ].ontology[v] = SPECIES_DICT_EXTENSION["mouse"][organ].ontology[v] + mouse[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].celltype_universe[v] + mouse[organ].ontology[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].ontology[v] for organ in human.keys(): - if organ in SPECIES_DICT_EXTENSION["human"].keys(): - for v in SPECIES_DICT_EXTENSION["human"][organ].versions: + if organ in ORGANISM_DICT_EXTENSION["human"].keys(): + for v in ORGANISM_DICT_EXTENSION["human"][organ].versions: if v in human[organ].celltype_universe.keys(): - raise ValueError(f'Celltype version {v} already defined for human organ {organ} in base sfaira. ' + raise ValueError(f'Celltype version {v} already defined for loaders organ {organ} in base sfaira. ' f'Please define a new version in sfaira_extension.') else: - human[organ].celltype_universe[v] = SPECIES_DICT_EXTENSION["human"][organ].celltype_universe[v] - human[organ].ontology[v] = SPECIES_DICT_EXTENSION["human"][organ].ontology[v] + human[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["human"][organ].celltype_universe[v] + human[organ].ontology[v] = ORGANISM_DICT_EXTENSION["human"][organ].ontology[v] except ImportError: pass -SPECIES_DICT = { +ORGANISM_DICT = { "mouse": mouse, "human": human -} \ No newline at end of file +} diff --git a/sfaira/versions/celltype_versions/base.py b/sfaira/versions/celltype_versions/base.py index 3beeef80c..40b6c4c66 100644 --- a/sfaira/versions/celltype_versions/base.py +++ b/sfaira/versions/celltype_versions/base.py @@ -27,7 +27,7 @@ def __init__(self, **kwargs): ) # Check that ontology terms are unique also between ontologies if np.sum([len(x) for x in self.ontology.values()]) != \ - len(np.unique(np.array([list(x) for x in self.ontology.values()]))): + len(np.unique(np.array([list(x) for x in self.ontology.values()]))): raise ValueError( "duplicated ontology terms found between ontologies in %s" % type(self) @@ -66,11 +66,10 @@ def set_version( else: raise ValueError("version supplied should be either in format `a.b.c` or `a`") - @property def ids(self): """ - List of all human understandable cell type names of this instance. + List of all loaders understandable cell type names of this instance. :return: """ diff --git a/sfaira/versions/celltype_versions/human/brain.py b/sfaira/versions/celltype_versions/human/brain.py index 4e5bc6144..0bea539e2 100644 --- a/sfaira/versions/celltype_versions/human/brain.py +++ b/sfaira/versions/celltype_versions/human/brain.py @@ -44,7 +44,9 @@ ONTOLOGIES_HUMAN_BRAIN_V0 = { "names": { 'Astrocyte': ['Astrocytes 1', 'Astrocytes 2'], - 'Fetal Neuron': ['Glutamatergic neurons from the PFC 1', 'Glutamatergic neurons from the PFC 2', 'Granule neurons from the hip dentate gyrus region', 'GABAergic interneurons 1', 'GABAergic interneurons 2', 'Pyramidal neurons from the hip CA region 1', 'Pyramidal neurons from the hip CA region 2'] + 'Fetal Neuron': ['Glutamatergic neurons from the PFC 1', 'Glutamatergic neurons from the PFC 2', + 'Granule neurons from the hip dentate gyrus region', 'GABAergic interneurons 1', + 'GABAergic interneurons 2', 'Pyramidal neurons from the hip CA region 1', 'Pyramidal neurons from the hip CA region 2'] }, "ontology_ids": {}, } diff --git a/sfaira/versions/celltype_versions/human/eye.py b/sfaira/versions/celltype_versions/human/eye.py index 26a41e393..66afcbdbd 100644 --- a/sfaira/versions/celltype_versions/human/eye.py +++ b/sfaira/versions/celltype_versions/human/eye.py @@ -50,7 +50,7 @@ ONTOLOGIES_HUMAN_EYE_V0 = { "names": { 'BPs': ['Retinal bipolar neuron type A', 'Retinal bipolar neuron type B', 'Retinal bipolar neuron type C', 'Retinal bipolar neuron type D'], - 'Rods': ['Retinal rod cell type A', 'Retinal rod cell type B', 'Retinal rod cell type C',] + 'Rods': ['Retinal rod cell type A', 'Retinal rod cell type B', 'Retinal rod cell type C', ] }, "ontology_ids": {}, } @@ -65,4 +65,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_EYE_V0 } - super(CelltypeVersionsHumanEye, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanEye, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/kidney.py b/sfaira/versions/celltype_versions/human/kidney.py index f05b0c3fd..c1ba3b0ad 100644 --- a/sfaira/versions/celltype_versions/human/kidney.py +++ b/sfaira/versions/celltype_versions/human/kidney.py @@ -111,7 +111,7 @@ ONTOLOGIES_HUMAN_KIDNEY_V0 = { "names": { 'Type A intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', - 'Collecting Duct - Intercalated Cells Type A (medulla)'], + 'Collecting Duct - Intercalated Cells Type A (medulla)'], 'Principal cell': ['Collecting Duct - PCs - Stressed Dissoc Subset', 'Collecting Duct - Principal Cells (cortex)', 'Collecting Duct - Principal Cells (medulla)'], @@ -121,9 +121,13 @@ 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], 'Dendritic cell': ['MNP-c/dendritic cell', 'Plasmacytoid dendritic cell'], - 'Endothelial cell': ['Endothelial Cells (unassigned)', 'Endothelial Cells - AEA & DVR', 'Endothelial Cells - AVR', 'Endothelial Cells - glomerular capillaries', 'Peritubular capillary endothelium 1', 'Peritubular capillary endothelium 2'], - 'Epithelial cell': ['Pelvic epithelium', 'Pelvic epithelium - distal UB', 'Proximal Tubule Epithelial Cells (S1)', 'Proximal Tubule Epithelial Cells (S2)', 'Proximal Tubule Epithelial Cells (S3)', 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], - 'Intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', 'Collecting Duct - Intercalated Cells Type A (medulla)', 'Collecting Duct - Intercalated Cells Type B', 'Indistinct intercalated cell'], + 'Endothelial cell': ['Endothelial Cells (unassigned)', 'Endothelial Cells - AEA & DVR', 'Endothelial Cells - AVR', + 'Endothelial Cells - glomerular capillaries', 'Peritubular capillary endothelium 1', 'Peritubular capillary endothelium 2'], + 'Epithelial cell': ['Pelvic epithelium', 'Pelvic epithelium - distal UB', 'Proximal Tubule Epithelial Cells (S1)', + 'Proximal Tubule Epithelial Cells (S2)', 'Proximal Tubule Epithelial Cells (S3)', + 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], + 'Intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', 'Collecting Duct - Intercalated Cells Type A (medulla)', + 'Collecting Duct - Intercalated Cells Type B', 'Indistinct intercalated cell'], 'T cell': ['CD4 T cell', 'CD8 T cell'], 'Ureteric bud cell': ['CNT/PC - proximal UB', 'Proximal UB', 'Pelvic epithelium - distal UB'] }, diff --git a/sfaira/versions/celltype_versions/human/liver.py b/sfaira/versions/celltype_versions/human/liver.py index 6120a740f..d294a31cd 100644 --- a/sfaira/versions/celltype_versions/human/liver.py +++ b/sfaira/versions/celltype_versions/human/liver.py @@ -72,8 +72,8 @@ "names": { 'Erythroid cells': ['Early Erythroid', 'Mid Erythroid', 'Late Erythroid'], 'Endothelial cell': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], - 'Hepatocyte': ['Hepatocyte 1','Hepatocyte 2','Hepatocyte 3','Hepatocyte 4','Hepatocyte 5','Hepatocyte 6'], - 'Hepatocytes': ['Hepatocyte 1','Hepatocyte 2','Hepatocyte 3','Hepatocyte 4','Hepatocyte 5','Hepatocyte 6'], + 'Hepatocyte': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], + 'Hepatocytes': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], 'Endothelia': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], 'Bcells': ['pro B cell', 'Pre pro B cell', 'Mature B cells', 'pre B cell', 'Plasma B cell'], 'Tcells': ['Gamma delta T cells 2', 'Gamma delta T cells 1', 'Alpha beta T cells'], diff --git a/sfaira/versions/celltype_versions/human/mixed.py b/sfaira/versions/celltype_versions/human/mixed.py index bd2d91e3a..02922bae0 100644 --- a/sfaira/versions/celltype_versions/human/mixed.py +++ b/sfaira/versions/celltype_versions/human/mixed.py @@ -29,6 +29,7 @@ "ontology_ids": {}, } + class CelltypeVersionsHumanMixed(CelltypeVersionsBase): def __init__(self, **kwargs): diff --git a/sfaira/versions/celltype_versions/human/rectum.py b/sfaira/versions/celltype_versions/human/rectum.py index 6741afe2f..80d30a0a8 100644 --- a/sfaira/versions/celltype_versions/human/rectum.py +++ b/sfaira/versions/celltype_versions/human/rectum.py @@ -38,4 +38,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_RECTUM_V0 } - super(CelltypeVersionsHumanRectum, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanRectum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/skin.py b/sfaira/versions/celltype_versions/human/skin.py index 6e391ec50..160003138 100644 --- a/sfaira/versions/celltype_versions/human/skin.py +++ b/sfaira/versions/celltype_versions/human/skin.py @@ -41,7 +41,8 @@ ] ONTOLOGIES_HUMAN_SKIN_V0 = { "names": { - 'immune': ['B cell', 'T cell', 'Dendritic cell', 'Erythroid cell', 'Erythroid progenitor cell (RP high)', 'Macrophage', 'Mast cell', 'Monocyte', 'Neutrophil', 'Neutrophil (RPS high)', 'Proliferating T cell'], + 'immune': ['B cell', 'T cell', 'Dendritic cell', 'Erythroid cell', 'Erythroid progenitor cell (RP high)', 'Macrophage', + 'Mast cell', 'Monocyte', 'Neutrophil', 'Neutrophil (RPS high)', 'Proliferating T cell'], 'Basal cell': ['Basal cell 1', 'Basal cell 2'] }, "ontology_ids": {}, @@ -57,4 +58,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_SKIN_V0 } - super(CelltypeVersionsHumanSkin, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanSkin, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/spleen.py b/sfaira/versions/celltype_versions/human/spleen.py index 2c1d0bfa3..27cca5500 100644 --- a/sfaira/versions/celltype_versions/human/spleen.py +++ b/sfaira/versions/celltype_versions/human/spleen.py @@ -66,4 +66,4 @@ def __init__(self, **kwargs): self.ontology = { "0": ONTOLOGIES_HUMAN_SPLEEN_V0 } - super(CelltypeVersionsHumanSpleen, self).__init__(**kwargs) \ No newline at end of file + super(CelltypeVersionsHumanSpleen, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/adipose.py b/sfaira/versions/celltype_versions/mouse/adipose.py index 5b390523d..1b82c908c 100644 --- a/sfaira/versions/celltype_versions/mouse/adipose.py +++ b/sfaira/versions/celltype_versions/mouse/adipose.py @@ -17,7 +17,7 @@ ONTOLOGIES_MOUSE_ADIPOSE_V0 = { "names": { "lymphocyte": [ - "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", + "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "myeloid cell", "NK cell" ], "T cell": ["CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell"] diff --git a/sfaira/versions/celltype_versions/mouse/bladder.py b/sfaira/versions/celltype_versions/mouse/bladder.py index 3c2215ec7..995dc6e3d 100644 --- a/sfaira/versions/celltype_versions/mouse/bladder.py +++ b/sfaira/versions/celltype_versions/mouse/bladder.py @@ -18,7 +18,7 @@ ONTOLOGIES_MOUSE_BLADDER_V0 = { "names": { "bladder cell": ["basal epithelial cell", "epithelial cell", "mesenchymal stromal cell", "smooth muscle cell", - "stromal cell", "umbrella cell"], + "stromal cell", "umbrella cell"], "leukocyte": ["dendritic cell", "macrophage", "NK cell"] }, "ontology_ids": {}, diff --git a/sfaira/versions/celltype_versions/mouse/kidney.py b/sfaira/versions/celltype_versions/mouse/kidney.py index 5866bb19f..9fe0c66c2 100644 --- a/sfaira/versions/celltype_versions/mouse/kidney.py +++ b/sfaira/versions/celltype_versions/mouse/kidney.py @@ -47,7 +47,7 @@ ], "lymphocyte": ["B cell", "dendritic cell", "macrophage", "NK cell", "T cell"], "leukocyte": [ - "B cell", "dendritic cell", "macrophage", "neutrophil progenitor", + "B cell", "dendritic cell", "macrophage", "neutrophil progenitor", "NK cell", "plasma cell", "T cell" ], }, diff --git a/sfaira/versions/celltype_versions/mouse/pancreas.py b/sfaira/versions/celltype_versions/mouse/pancreas.py index a89f3ce78..b367cfacd 100644 --- a/sfaira/versions/celltype_versions/mouse/pancreas.py +++ b/sfaira/versions/celltype_versions/mouse/pancreas.py @@ -31,7 +31,7 @@ "macrophage", "t cell" ], - "endocrine cell": [ + "endocrine cell": [ "pancreatic A cell", "pancreatic D cell", "pancreatic PP cell" diff --git a/sfaira/versions/celltype_versions/mouse/spleen.py b/sfaira/versions/celltype_versions/mouse/spleen.py index f5204f45a..67a2e21df 100644 --- a/sfaira/versions/celltype_versions/mouse/spleen.py +++ b/sfaira/versions/celltype_versions/mouse/spleen.py @@ -23,7 +23,7 @@ ONTOLOGIES_MOUSE_SPLEEN_V0 = { "names": { "T cell": [ - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", + "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "immature NKT cell", "mature NK T cell" ] }, diff --git a/sfaira/versions/genome_versions/class_interface.py b/sfaira/versions/genome_versions/class_interface.py index 9b28a5994..ffc44de77 100644 --- a/sfaira/versions/genome_versions/class_interface.py +++ b/sfaira/versions/genome_versions/class_interface.py @@ -5,15 +5,15 @@ class SuperGenomeContainer: _cache_tab: pandas.DataFrame genome: str - species: str + organism: str def __init__( self, - species: str, + organism: str, genome: str ): - self.species = species - if self.species == "human": + self.organism = organism + if self.organism == "human": try: from sfaira_extension.versions.genome_versions.human import GenomeContainer if genome not in GenomeContainer.available_genomes: @@ -24,7 +24,7 @@ def __init__( from .human import GenomeContainer if genome not in GenomeContainer.available_genomes: raise ValueError(f"Genome {genome} not recognised.") - elif self.species == "mouse": + elif self.organism == "mouse": try: from sfaira_extension.versions.genome_versions.mouse import GenomeContainer if genome not in GenomeContainer.available_genomes: @@ -36,7 +36,7 @@ def __init__( if genome not in GenomeContainer.available_genomes: raise ValueError(f"Genome {genome} not recognised.") else: - raise ValueError(f"Species {species} not recognised.") + raise ValueError(f"Organism {organism} not recognised.") self.gc = GenomeContainer() self.set_genome(genome=genome) diff --git a/sfaira/versions/genome_versions/human/genome_sizes.py b/sfaira/versions/genome_versions/human/genome_sizes.py index db7d7fda7..5d898aeaa 100644 --- a/sfaira/versions/genome_versions/human/genome_sizes.py +++ b/sfaira/versions/genome_versions/human/genome_sizes.py @@ -1,3 +1,3 @@ GENOME_SIZE_DICT = { "Homo_sapiens_GRCh38_97": (19986, ) -} \ No newline at end of file +} diff --git a/sfaira/versions/genome_versions/mouse/genome_container.py b/sfaira/versions/genome_versions/mouse/genome_container.py index ce9d047ab..13c341234 100644 --- a/sfaira/versions/genome_versions/mouse/genome_container.py +++ b/sfaira/versions/genome_versions/mouse/genome_container.py @@ -16,4 +16,4 @@ def __init__(self): } def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) \ No newline at end of file + return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/mouse/genome_sizes.py b/sfaira/versions/genome_versions/mouse/genome_sizes.py index 8d1c9c3e7..63cf95ff0 100644 --- a/sfaira/versions/genome_versions/mouse/genome_sizes.py +++ b/sfaira/versions/genome_versions/mouse/genome_sizes.py @@ -1,3 +1,3 @@ GENOME_SIZE_DICT = { "Mus_musculus_GRCm38_97": (21900, ) -} \ No newline at end of file +} diff --git a/sfaira/versions/topology_versions/class_interface.py b/sfaira/versions/topology_versions/class_interface.py index e27b80847..0aa186c53 100644 --- a/sfaira/versions/topology_versions/class_interface.py +++ b/sfaira/versions/topology_versions/class_interface.py @@ -8,7 +8,7 @@ class Topologies: def __init__( self, - species: str, + organism: str, model_class: str, model_type: str, topology_id: str @@ -43,27 +43,27 @@ def __init__( } } } - self.species = species + self.organism = organism self.model_class = model_class self.model_type = model_type self.topology_id = topology_id - assert species in list(self.topologies.keys()), \ - "species %s not found in %s" % \ - (species, list(self.topologies.keys())) - assert model_class in list(self.topologies[species].keys()), \ + assert organism in list(self.topologies.keys()), \ + "organism %s not found in %s" % \ + (organism, list(self.topologies.keys())) + assert model_class in list(self.topologies[organism].keys()), \ "model_class %s not found in %s" % \ - (model_type, list(self.topologies[species].keys())) - assert model_type in list(self.topologies[species][model_class].keys()), \ + (model_type, list(self.topologies[organism].keys())) + assert model_type in list(self.topologies[organism][model_class].keys()), \ "model_type %s not found in %s" % \ - (model_type, list(self.topologies[species][model_class].keys())) - assert topology_id in list(self.topologies[species][model_class][model_type].keys()), \ + (model_type, list(self.topologies[organism][model_class].keys())) + assert topology_id in list(self.topologies[organism][model_class][model_type].keys()), \ "topology_id %s not found in %s" % \ - (topology_id, list(self.topologies[species][model_class][model_type].keys())) - self.genome_container = SuperGenomeContainer(species=species, genome=self.topology["genome"]) + (topology_id, list(self.topologies[organism][model_class][model_type].keys())) + self.genome_container = SuperGenomeContainer(organism=organism, genome=self.topology["genome"]) @property def topology(self): - return self.topologies[self.species][self.model_class][self.model_type][self.topology_id] + return self.topologies[self.organism][self.model_class][self.model_type][self.topology_id] @property def ngenes(self): diff --git a/sfaira/versions/topology_versions/external.py b/sfaira/versions/topology_versions/external.py index 93bcbab8e..86fafa27f 100644 --- a/sfaira/versions/topology_versions/external.py +++ b/sfaira/versions/topology_versions/external.py @@ -1 +1 @@ -from sfaira.versions.genome_versions import SuperGenomeContainer \ No newline at end of file +from sfaira.versions.genome_versions import SuperGenomeContainer # noqa: W292 diff --git a/sfaira/versions/topology_versions/human/embedding/ae.py b/sfaira/versions/topology_versions/human/embedding/ae.py index 225100769..fd449a8c1 100644 --- a/sfaira/versions/topology_versions/human/embedding/ae.py +++ b/sfaira/versions/topology_versions/human/embedding/ae.py @@ -2,60 +2,60 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/linear.py b/sfaira/versions/topology_versions/human/embedding/linear.py index 80f9edeca..ef1bc2c53 100644 --- a/sfaira/versions/topology_versions/human/embedding/linear.py +++ b/sfaira/versions/topology_versions/human/embedding/linear.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/nmf.py b/sfaira/versions/topology_versions/human/embedding/nmf.py index d006be9cb..7ab548d78 100644 --- a/sfaira/versions/topology_versions/human/embedding/nmf.py +++ b/sfaira/versions/topology_versions/human/embedding/nmf.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/vae.py b/sfaira/versions/topology_versions/human/embedding/vae.py index 535a907c8..8ba9d4199 100644 --- a/sfaira/versions/topology_versions/human/embedding/vae.py +++ b/sfaira/versions/topology_versions/human/embedding/vae.py @@ -2,56 +2,56 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py index 0602ac457..5ad4cf9ea 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/human/embedding/vaeiaf.py @@ -2,29 +2,29 @@ "0.1": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Homo_sapiens_GRCh38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/ae.py b/sfaira/versions/topology_versions/mouse/embedding/ae.py index 4c628642a..12b092138 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/ae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/ae.py @@ -2,60 +2,60 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "input_dropout": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "input_dropout": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/linear.py b/sfaira/versions/topology_versions/mouse/embedding/linear.py index cd07f0366..f073b42a2 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/linear.py +++ b/sfaira/versions/topology_versions/mouse/embedding/linear.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": False, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": False, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/nmf.py b/sfaira/versions/topology_versions/mouse/embedding/nmf.py index 65b2b44a3..9283ae40f 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/nmf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/nmf.py @@ -2,33 +2,33 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 64, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 64, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_shared_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": 128, - "l1_coef": 0., - "l2_coef": 0., - "positive_components": True, - "output_layer": "nb_const_disp" + "latent_dim": 128, + "l1_coef": 0., + "l2_coef": 0., + "positive_components": True, + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/vae.py b/sfaira/versions/topology_versions/mouse/embedding/vae.py index 49b45b01f..aaeab8e76 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vae.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vae.py @@ -2,56 +2,56 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 64, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 64, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.3": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_shared_disp" } }, "0.4": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 64, 128, 256, 512), - "l2_coef": 0., - "l1_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "selu", - "init": "lecun_normal", - "output_layer": "nb_const_disp" + "latent_dim": (512, 256, 128, 64, 128, 256, 512), + "l2_coef": 0., + "l1_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "selu", + "init": "lecun_normal", + "output_layer": "nb_const_disp" } } } diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py index d6dd458b2..28989d580 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py @@ -2,29 +2,29 @@ "0.1": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (256, 128, 64, 128, 256), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (256, 128, 64, 128, 256), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } }, "0.2": { "genome": "Mus_musculus_GRCm38_97", "hyper_parameters": { - "latent_dim": (512, 256, 128, 256, 512), - "n_iaf": 2, - "l1_coef": 0., - "l2_coef": 0., - "dropout_rate": 0., - "batchnorm": True, - "activation": "tanh", - "init": "glorot_uniform", - "output_layer": "nb_shared_disp" + "latent_dim": (512, 256, 128, 256, 512), + "n_iaf": 2, + "l1_coef": 0., + "l2_coef": 0., + "dropout_rate": 0., + "batchnorm": True, + "activation": "tanh", + "init": "glorot_uniform", + "output_layer": "nb_shared_disp" } } } @@ -37,4 +37,4 @@ **VAEIAF_TOPOLOGIES_EXTENSION } except ImportError: - pass \ No newline at end of file + pass diff --git a/versioneer.py b/versioneer.py index 8c2ece54e..d52552de5 100644 --- a/versioneer.py +++ b/versioneer.py @@ -561,15 +561,15 @@ def git_get_keywords(versionfile_abs): f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) + mo = re.search(r'=\s*"(.*)"', line) # noqa: W605 if mo: keywords["date"] = mo.group(1) f.close() From 700cd3702fc92c83d3bd2e5c12ca4d7ec4d612d7 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 19 Jan 2021 12:23:54 +0100 Subject: [PATCH 025/161] extended data loader documentation (#77) * FAQ with contributions from @xlancelottx and @lauradmartens * more detailed step-by-step explanation of loader contribution --- docs/data.rst | 173 +++++++++++++++++- sfaira/consts/adata_fields.py | 2 +- sfaira/data/base.py | 12 +- .../d10_1038_s41586_020_2157_4/base.py | 18 +- 4 files changed, 180 insertions(+), 25 deletions(-) diff --git a/docs/data.rst b/docs/data.rst index 1cf1f118a..13fda40a0 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -32,8 +32,8 @@ Add data sets ~~~~~~~~~~~~~ 1. Write a data loader as outlined below. - 2. Identify the raw files as indicated in the data loader classes and copy them into your directory structure as required by your data laoder. - 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the dataloader contributed to sfaira would use this download link. + 2. Identify the raw files as indicated in the data loader classes and copy them into your directory structure as required by your data loader. + 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. Use data loaders on existing data repository -------------------------------------------- @@ -63,6 +63,21 @@ This directory contains an `__init__.py` file which makes these data loaders vis Next, each data set is represented by one data loader python file in this directory. See below for more complex set ups with repetitive data loader code. +Check that the data loader was not already implemented +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We will open issues for all planned data loaders, so you can search both the code_ base and our GitHub issues_ for +matching data loaders before you start writing one. +The core data loader identified is the directory compatible doi, +which is the doi with all special characters replaced by "_" and a "d" prefix is used: +"10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". +Searching for this string should yield a match if it is already implemented, take care to look for both +preprint and publication DOIs if both are available. +We will also mention publication names in issues, you will however not find these in the code. + +.. _code: https://github.com/theislab/sfaira/tree/dev +.. _issues: https://github.com/theislab/sfaira/issues + + The data loader python file ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -80,9 +95,10 @@ before it is loaded into memory: self, path: Union[str, None] = None, meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, **kwargs) + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) # Data set meta data: You do not have to include all of these and can simply skip lines corresponding # to attritbutes that you do not have access to. These are meta data on a sample level. # The meta data attributes labeled with (*) may als be supplied per cell, see below, @@ -93,8 +109,8 @@ before it is loaded into memory: self.author = x # author (list) who sampled / created the data set self.doi = x # doi of data set accompanying manuscript - self.download = x # download website(s) of data files - self.download_meta = x # download website(s) of meta data files + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files self.age = x # (*, optional) age of sample self.dev_stage = x # (*, optional) developmental stage of organism @@ -140,10 +156,18 @@ before it is loaded into memory: 2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. .. code-block:: python def _load(self, fn=None): + # assuming that i uploaded an h5ad somewhere (in self.download) + if fn is None: + fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. @@ -157,12 +181,13 @@ In summary, a simply example data loader for a mouse lung data set could look li self, path: Union[str, None] = None, meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, **kwargs) + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.author = "me" self.doi = "my preprint" - self.download = "my GEO upload" + self.download_url_data = "my GEO upload" self.normalisation = "raw" # because I uploaded raw counts, which is good practice! self.organ = "lung" self.organism = "mouse" @@ -174,7 +199,7 @@ In summary, a simply example data loader for a mouse lung data set could look li def _load(self, fn=None): # assuming that i uploaded an h5ad somewhere (in self.download) if fn is None: - fn = os.path.join(self.path, "mouse", "lung", "my.h5ad") + fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") self.adata = anndata.read(fn) @@ -244,7 +269,28 @@ Metadata management We constrain meta data by ontologies where possible. The current restrictions are: - - .organism must either mouse or human. + - .age: unconstrained string, try using units of years for human and units of months for mice + - .dev_stage: unconstrained string, this will constrained to an ontology in the future, + try choosing from HSAPDV (http://www.obofoundry.org/ontology/hsapdv.html) for human + or from MMUSDEV (http://www.obofoundry.org/ontology/mmusdv.html) for mouse + - .ethnicity: unconstrained string, this will constrained to an ontology in the future, + try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) + - .healthy: bool + - .normalisation: unconstrained string, this will constrained to an ontology in the future, + try using {"raw", "scaled"} + - .organ: unconstrained string, this will constrained to an ontology in the future, try to choose + term from Uberon (http://www.obofoundry.org/ontology/ehdaa2.html) + or from EHDAA2 (http://www.obofoundry.org/ontology/ehdaa2.html) for human + or from EMAPA (http://www.obofoundry.org/ontology/emapa.html) for mouse + - .organism: constrained string, {"mouse", "human"}. In the future, we will use NCBITAXON + (http://www.obofoundry.org/ontology/ncbitaxon.html). + - .protocol: unconstrained string, this will constrained to an anatomic ontology in the future, + try choosing a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false + - .sex: constrained string, {"female", "male"} + - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. + If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity + and use units: `[1g]` + - .year: must be an integer year, e.g. 2020 Follow this issue_ for details on upcoming ontology integrations. @@ -253,6 +299,9 @@ Follow this issue_ for details on upcoming ontology integrations. Genome management ----------------- +You do not have to worry about this unless you are interested, +this section is not required reading for writing data loaders. + We streamline feature spaces used by models by defining standardized gene sets that are used as model input. Per default, sfaira works with the protein coding genes of a genome assembly right now. A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. @@ -260,3 +309,109 @@ As genome assemblies are updated, model topology version can be updated and mode Note that because protein coding genes do not change drastically between genome assemblies, sample can be carried over to assemblies they were not aligned against by matching gene identifiers. Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. + +FAQ +--- + +How is the dataset’s ID structured? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi + +How do I assemble the data set ID if some of its element meta data are not unique? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The data set ID is designed to be a unique identifier of a data set. +Therefore, it is not an issue if it does not capture the full complexity of the data. +Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. + +What are cell-wise and sample-wise meta data? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Metadata can be set on a per sample level or, in some cases, per cell. +Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). +Cell-wise metadata can be provided in `.obs` of the loaded data, here, +a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels +(e.g. self.obs_key_organism). +Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. + +Which meta data objects are optional? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): + + - .id: Dataset ID. This is used to identify the data set uniquely. + Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" + - .download_url_data: Link to data download website. + Example: self.download = "some URL" + - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not + specified. + Example: self.download_meta = "some URL" + - .var_symbol_col, .var_ensembl_col: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var + (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. + Example: self.var_symbol_col = 'index', self.var_ensembl_col = “GeneID” + - .author: First author of publication (or list of all authors). + self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] + - .doi: Doi of publication + Example: self.doi = "10.1016/j.cell.2019.06.029" + - .organism (or .obs_key_organism): Organism sampled. + Example: self.organism = “human” + +Highly recommended: + + - .normalization: Normalization of count data: + Example: self.normalization = “raw” + - .organ (or .obs_key_organ): Organ sampled. + Example: self.organ = “liver” + - .protocol (or .obs_key_protocol): Protocol with which data was collected. + Example: self.protocol = “10x” + +Optional (if available): + + - .age (or .obs_key_age): Age of individual sampled. + Example: self.age = 80 # (80 years old for human) + - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. + Example: self.dev_stage = “mature” + - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). + Example: self.ethnicity = “free text” + - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) + Example: self.healthy = True + - .sex (or .obs_key_sex): Sex of individual sampled. + Example: self.sex = “male” + - .state_exact (or .obs_key_state_exact): Exact disease state + self.state_exact = free text + - .obs_key_cellontology_original: Column in .obs in which free text cell type names are stored. + Example: self.obs_key_cellontology_original = 'CellType' + - .year: Year of publication: + Example: self.year = 2019 + +How do I cache data sets? +~~~~~~~~~~~~~~~~~~~~~~~~~ +When loading a dataset with `Dataset.load(),`you can specify if the adata object +should be cached or not (allow_caching= True). +If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. + +How do I add cell type annotation? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are simplifying this right now, new instructions will be available second half of January. + +Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Initiation and data set loading are handled separately to allow lazy loading. +All steps that are required to load the count data and +additional metadata should be defined solely in the `_load` section. +Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. + +How do I tell sfaira where the gene names are? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By setting the attributes `.var_symbol_col` or `.var_ensembl_col` in the constructor. +If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. + +I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +No, that is not a problem. They will automatically be converted to Ensembl IDs. +You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` +to which the names should be mapped to. + +I have CITE-seq data, where can I put the protein quantification? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We will soon provide a structured interface for loading and accessing CITE-seq data, +for now you can add it into `self.adata.obsm[“CITE”]`. diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 1eee5e775..0d022b955 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -199,7 +199,7 @@ def __init__(self): self.organ_allowed_entries = None self.organism_allowed_entries = ["mouse", "human"] self.protocol_allowed_entries = None - self.sex_allowed_entries = None + self.sex_allowed_entries = ["female", "male"] self.subtissue_allowed_entries = None self.year_allowed_entries = list(range(2000, 3000)) # Free fields that are not constrained: diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 73e071aaf..33075193e 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -152,10 +152,6 @@ def _load(self, fn): def _download(self, fn): pass - @property - def _directory_formatted_doi(self) -> str: - return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) - @property def _directory_formatted_id(self) -> str: return "_".join("_".join(self.id.split("/")).split(".")) @@ -200,11 +196,11 @@ def _load_cached( raise ValueError("provide either fn in load or path in constructor") assert self.cache_path is not None, "set self.cache_path first" - assert self._directory_formatted_doi is not None, "set self.doi first" + assert self.directory_formatted_doi is not None, "set self.doi first" assert self._directory_formatted_id is not None, "set self.id first" fn_cache = os.path.join( self.cache_path, - self._directory_formatted_doi, + self.directory_formatted_doi, self._directory_formatted_id + ".h5ad" ) # Check if raw loader has to be called: @@ -872,6 +868,10 @@ def doi(self, x: str): self.__erasing_protection(attr="doi", val_old=self._doi, val_new=x) self._doi = x + @property + def directory_formatted_doi(self) -> str: + return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) + @property def download(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: """ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py index 75b926aa6..a54472e61 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -50,23 +50,23 @@ def _download(self): # download required files from loaders cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471 print(urllib.request.urlretrieve( "https://ndownloader.figshare.com/files/17727365", - os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad") + os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad") )) print(urllib.request.urlretrieve( "https://ndownloader.figshare.com/files/21758835", - os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx") + os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx") )) print(urllib.request.urlretrieve( "https://ndownloader.figshare.com/files/22447898", - os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") )) # extract the downloaded zip archive with zipfile.ZipFile( - os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417.zip"), + os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip"), "r" ) as zip_ref: - zip_ref.extractall(os.path.join(self.path, self._directory_formatted_doi)) + zip_ref.extractall(os.path.join(self.path, self.directory_formatted_doi)) def _load_generalized(self, fn, sample_id: str): """ @@ -75,7 +75,7 @@ def _load_generalized(self, fn, sample_id: str): :param fn: :return: """ - adata = anndata.read(os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_adata.h5ad")) + adata = anndata.read(os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix adata.X = scipy.sparse.csr_matrix(adata.X).copy() @@ -92,7 +92,7 @@ def _load_generalized(self, fn, sample_id: str): # load celltype labels and harmonise them # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: fig1_anno = pd.read_excel( - os.path.join(self.path, "human", self._directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), index_col="cellnames", engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables ) @@ -113,11 +113,11 @@ def _load_generalized(self, fn, sample_id: str): columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) for f in os.listdir( - os.path.join(self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417") + os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417") ): df1 = pd.read_csv( os.path.join( - self.path, "human", self._directory_formatted_doi, "annotation_rmbatch_data_revised417", f + self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417", f ), encoding="unicode_escape") df = pd.concat([df, df1], sort=True) df = df.set_index("Cell_id") From f95ea89c311f574926f232527c1b1fc9df7ae14c Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 20 Jan 2021 11:32:44 +0100 Subject: [PATCH 026/161] added development FAQ section (#79) --- docs/development.rst | 45 ++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 46 insertions(+) create mode 100644 docs/development.rst diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000..1d8488c31 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,45 @@ +Development +=========== + +Data zoo FAQ +------------ + +How are the meta data entries that I define in the constructor constrained or protected? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The items that are not free text are documented in the readthedocs data section, often, +these would require entries to be terms in an ontology. +If you make a mistake in defining these fields in a data loader that you contribute, +the template test data loader and any loading operation will throw an error +pointing at this meta data element. + +How is _load() used in data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`_load()` contains all processing steps that load raw data files into a ready to use adata object. +`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. +This adata object can be cached as an h5ad file named after the dataset ID for faster reloading +(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available +(if use_cached=False). + +How is the feature space (gene names) manipulated during data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and +vice versa. +Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); +in this case, counts are aggregated across these features. +Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. + +Datasets, DatasetGroups, DatasetSuperGroups - what are they? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Dataset: Custom class that loads a specific dataset. +DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). +This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. +DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. + +Basics of sfaira lazy loading via split into constructor and _load function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The constructor of a dataset defines all metadata associated with this data set. +The loading of the actual data happens in the `load()` function and not in the constructor. +This is useful as it allows initialising the datasets and accessing dataset metadata +without loading the actual count data. +DatasetGroups can contain initialised Datasets and can be subsetted based on metadata +before loading is triggered across the entire group. diff --git a/docs/index.rst b/docs/index.rst index 497452180..265bc7965 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -43,4 +43,5 @@ Latest additions models ecosystem roadmap + development changelog From 03d590f5b8e09b4b514d8a6794f16b2cec622f88 Mon Sep 17 00:00:00 2001 From: le-ander <20015434+le-ander@users.noreply.github.com> Date: Mon, 25 Jan 2021 09:46:17 +0100 Subject: [PATCH 027/161] update cached reading to only warn and not throw errors if cached reading / writing fails --- sfaira/data/base.py | 74 +++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 33075193e..2889d42f7 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -192,31 +192,61 @@ def _load_cached( :param allow_caching: Whether to allow method to cache adata object for faster re-loading. :return: """ + def _get_cache_fn(): + if None in [ + self.cache_path, + self.directory_formatted_doi, + self._directory_formatted_id + ]: + if self.cache_path is None: + w = "cache path" + elif self.directory_formatted_doi is None: + w = "self.doi" + else: # self._directory_formatted_id is None + w = "self.id" + warnings.warn(f"Caching enabled, but cannot find caching directory. Set {w} first. " + f"Disabling caching for now") + return None + + cache = os.path.join( + self.cache_path, + self.directory_formatted_doi, + self._directory_formatted_id + ".h5ad" + ) + return cache + + def _cached_reading(fn, fn_cache): + if fn_cache is not None: + if os.path.exists(fn_cache): + self.adata = anndata.read_h5ad(fn_cache) + else: + warnings.warn(f"Cached loading enabled, but cache file {fn_cache} not found. " + f"Loading from raw files.") + self._load(fn=fn) + + def _cached_writing(fn_cache): + if fn_cache is not None: + dir_cache = os.path.dirname(fn_cache) + if not os.path.exists(dir_cache): + os.makedirs(dir_cache) + self.adata.write_h5ad(fn_cache) + if fn is None and self.path is None: raise ValueError("provide either fn in load or path in constructor") - assert self.cache_path is not None, "set self.cache_path first" - assert self.directory_formatted_doi is not None, "set self.doi first" - assert self._directory_formatted_id is not None, "set self.id first" - fn_cache = os.path.join( - self.cache_path, - self.directory_formatted_doi, - self._directory_formatted_id + ".h5ad" - ) - # Check if raw loader has to be called: - if load_raw or not os.path.exists(fn_cache): + if load_raw and allow_caching: self._load(fn=fn) - else: - assert self.cache_path is not None, "set cache_path to use caching" - assert os.path.exists(fn_cache), f"did not find cache file {fn_cache}, consider caching first" - self.adata = anndata.read_h5ad(fn_cache) - # Check if file needs to be cached: - if allow_caching and not os.path.exists(fn_cache): - assert self.cache_path is not None, "set cache_path to use caching" - dir_cache = os.path.dirname(fn_cache) - if not os.path.exists(dir_cache): - os.makedirs(dir_cache) - self.adata.write_h5ad(fn_cache) + fn_cache = _get_cache_fn() + _cached_writing(fn_cache) + elif load_raw and not allow_caching: + self._load(fn=fn) + elif not load_raw and allow_caching: + fn_cache = _get_cache_fn() + _cached_reading(fn, fn_cache) + _cached_writing(fn_cache) + else: # not load_raw and not allow_caching + fn_cache = _get_cache_fn() + _cached_reading(fn, fn_cache) def load( self, @@ -240,7 +270,7 @@ def load( """ if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" - "while not removing gene versions. this can lead to very poor matching performance") + "while not removing gene versions. this can lead to very poor matching results") # Set default genomes per organism if none provided: if match_to_reference: From 9d0dbe2071de530d960c0fd3a2df70cc59d43766 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 25 Jan 2021 18:56:14 +0100 Subject: [PATCH 028/161] fixed bug in d10_1016_j_cmet_2019_01_021 data loader and added exception handling for Dataset.annotated (#82) fixes #81 - fixed bug in d10_1016_j_cmet_2019_01_021 data loader - added exception handling for Dataset.annotated - added Dataset.loaded property --- sfaira/data/base.py | 14 +++++++++++++- .../loaders/d10_1016_j_cmet_2019_01_021/base.py | 4 +++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 2889d42f7..0d52e2f29 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -837,7 +837,7 @@ def age(self, x: str): self._age = x @property - def annotated(self) -> bool: + def annotated(self) -> Union[bool, None]: if self.obs_key_cellontology_id is not None or self.obs_key_cellontology_original is not None: return True else: @@ -845,7 +845,12 @@ def annotated(self) -> bool: self.load_meta(fn=None) if self.meta is not None and self._ADATA_IDS_SFAIRA.annotated in self.meta.columns: return self.meta[self._ADATA_IDS_SFAIRA.annotated] + elif self.loaded: + # If data set was loaded and there is still no annotation indicated, it is declared unannotated. + return False else: + # If data set was not yet loaded, it is unclear if annotation would be loaded in ._load(), + # if also no meta data is available, we do not know the status of the data set. return None @property @@ -1023,6 +1028,13 @@ def id(self, x: str): self.__erasing_protection(attr="id", val_old=self._id, val_new=x) self._id = x + @property + def loaded(self) -> bool: + """ + :return: Whether DataSet was loaded into memory. + """ + return self.adata is not None + @property def meta(self) -> Union[None, pd.DataFrame]: return self._meta diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py index 2c89b9310..381dc45e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py @@ -30,6 +30,8 @@ def __init__( self.var_symbol_col = "index" + self.obs_key_cellontology_original = "celltypes" + self.class_maps = { "0": { "acinar": "pancreatic acinar cell", @@ -56,4 +58,4 @@ def _load_generalized(self, fn, fn_meta): self.adata.obs_names = np.genfromtxt(fn + "_barcodes.tsv.gz", dtype=str) self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = celltypes + self.adata.obs["celltypes"] = celltypes From e532a4e485092caf4b999f1e58613b84c1ca4e5a Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Tue, 26 Jan 2021 17:03:56 +0100 Subject: [PATCH 029/161] Fix tutorials (#83) * fix bug in cached loading * fixed bug in d10_1016_j_cmet_2019_01_021 data loader and added exception handling for Dataset.annotated (#82) fixes #81 - fixed bug in d10_1016_j_cmet_2019_01_021 data loader - added exception handling for Dataset.annotated - added Dataset.loaded property * remove mixed dataset specific subsetting * remove empty dataset groups after subsetting * fixup! remove empty dataset groups after subsetting * make sure int entries in metadata table are loaded correctly and improve error message * improve metadata loading error message * improve handling of dtypes and Nones in meta data reading * revert formatting changes to meta_data_files.py * add missing protocol specification in TMS * fix ncells property of datasetsupergroup Co-authored-by: David S. Fischer --- sfaira/data/base.py | 34 ++++++++----------- .../human_mixed_2019_10x_szabo_001.py | 12 +++---- .../mouse_colon_2019_10x_pisco_001.py | 1 + .../mouse_colon_2019_smartseq2_pisco_001.py | 1 + .../mouse_kidney_2019_10x_pisco_001.py | 1 + .../mouse_kidney_2019_smartseq2_pisco_001.py | 1 + .../mouse_lung_2019_10x_pisco_001.py | 1 + .../mouse_lung_2019_smartseq2_pisco_001.py | 1 + .../mouse_pancreas_2019_10x_pisco_001.py | 1 + ...mouse_pancreas_2019_smartseq2_pisco_001.py | 1 + .../mouse_skin_2019_10x_pisco_001.py | 2 ++ .../mouse_skin_2019_smartseq2_pisco_001.py | 2 ++ .../mouse_spleen_2019_smartseq2_pisco_001.py | 2 ++ .../mouse_trachea_2019_10x_pisco_001.py | 1 - .../mouse_trachea_2019_smartseq2_pisco_001.py | 1 - .../data/dataloaders/loaders/super_group.py | 2 +- 16 files changed, 35 insertions(+), 29 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 0d52e2f29..b6e1acdac 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -205,7 +205,7 @@ def _get_cache_fn(): else: # self._directory_formatted_id is None w = "self.id" warnings.warn(f"Caching enabled, but cannot find caching directory. Set {w} first. " - f"Disabling caching for now") + f"Disabling caching for now.") return None cache = os.path.join( @@ -223,6 +223,8 @@ def _cached_reading(fn, fn_cache): warnings.warn(f"Cached loading enabled, but cache file {fn_cache} not found. " f"Loading from raw files.") self._load(fn=fn) + else: + self._load(fn=fn) def _cached_writing(fn_cache): if fn_cache is not None: @@ -277,10 +279,10 @@ def load( genome = match_to_reference elif self.organism == "human": genome = "Homo_sapiens_GRCh38_97" - warnings.warn(f"using default genomes {genome}") + warnings.warn(f"using default genome {genome}") elif self.organism == "mouse": genome = "Mus_musculus_GRCm38_97" - warnings.warn(f"using default genomes {genome}") + warnings.warn(f"using default genome {genome}") else: raise ValueError(f"genome was not supplied and organism {self.organism} " f"was not matched to a default choice") @@ -726,21 +728,11 @@ def load_meta(self, fn: Union[PathLike, str, None]): # Only load meta data if file exists: if os.path.isfile(fn): meta = pandas.read_csv( - fn, usecols=list(self._META_DATA_FIELDS.keys()), dtype=str, + fn, + usecols=list(self._META_DATA_FIELDS.keys()), + dtype=self._META_DATA_FIELDS, ) - # Formatting: All are read as string to allow dealing wth None entries: - # Make sure bool entries are bool: - for k, v in self._META_DATA_FIELDS.items(): - if v == bool: - meta[k] = [ - True if x == "True" else - False if x == "False" else None - for x in meta[k].values.tolist() - ] - else: - # Make sure None entries are formatted as None and not as string "None": - meta[k] = [None if x == "None" else x for x in meta[k].values.tolist()] - self.meta = meta + self.meta = meta.fillna("None").replace({"None": None}) def write_meta( self, @@ -947,7 +939,7 @@ def download_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: :return: """ x = self._download_meta - # if self._download_meta is not None: # TODO add this back in once download_meta is routineyl set in datasets + # if self._download_meta is not None: # TODO add this back in once download_meta is routinely set in datasets # x = self._download_meta # else: # if self.meta is None: @@ -1050,7 +1042,7 @@ def meta(self, x: Union[None, pd.DataFrame]): else: if x[k] is not None: # None is always allowed. if not isinstance(v[0], self._META_DATA_FIELDS[k]): - raise ValueError(f"key {k} of signature {str(v[0])} " + raise ValueError(f"key '{k}' of value `{v[0]}` and signature `{type(v[0])}` " f"in meta data table did not match signature " f"{str(self._META_DATA_FIELDS[k])}") self._meta = x @@ -1874,7 +1866,7 @@ def ncells_bydataset_flat(self, annotated_only: bool = False): return [xx for x in self.ncells_bydataset(annotated_only=annotated_only) for xx in x] def ncells(self, annotated_only: bool = False): - return np.sum(self.ncells_bydataset(annotated_only=annotated_only)) + return np.sum(self.ncells_bydataset_flat(annotated_only=annotated_only)) def flatten(self) -> DatasetGroup: """ @@ -2067,6 +2059,8 @@ def subset(self, key, values): for x in self.dataset_groups: x.subset(key=key, values=values) + self.dataset_groups = [x for x in self.dataset_groups if x.datasets] # Delete empty DatasetGroups + def subset_organs(self, subset: Union[None, List]): for x in self.dataset_groups: if x.datasets[0].organ == "mixed": diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 902beb985..5939cc247 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -38,7 +38,7 @@ def __init__( self.obs_key_cellontology_original = "cell_ontology_class" self.obs_key_organ = "organ" - self.loaded = False # TODO do this differently? + # self.loaded = False # TODO do this differently? self.class_maps = { "0": {}, @@ -97,12 +97,12 @@ def _load(self, fn=None): # TODO we should move this code into the base class # If the subset_organs() method has been run before, subset to specified organs - if "organsubset" in self.__dict__: - self.adata = self.adata[self.adata.obs["organ"].isin(self.organsubset)] + # if "organsubset" in self.__dict__: + # self.adata = self.adata[self.adata.obs["organ"].isin(self.organsubset)] # If adata object is empty, set it to None - if not len(self.adata): - self.adata = None - self.loaded = True + # if not len(self.adata): + # self.adata = None + # self.loaded = True @property def ncells(self): diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py index 239a8eb7e..6bec3f395 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" self.organ = "colon" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": {}, diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py index 1d3a206e4..c8535269c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "colon" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": {}, diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py index 98d48650e..3f1ae52b2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" self.organ = "kidney" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py index 3016fedea..19f3f365e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "kidney" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py index 05602ebf8..3d836d4a8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" self.organ = "lung" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py index 615240b9b..807de2179 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "lung" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py index 31e270b8b..dd3351fb2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" self.organ = "pancreas" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py index aa9765a6c..acf953870 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py @@ -16,6 +16,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "pancreas" + self.protocol = self._get_protocol_tms(self.id) self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py index 5085b14b8..13c227435 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py @@ -16,6 +16,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" self.organ = "skin" + self.protocol = self._get_protocol_tms(self.id) + self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py index a40b384e9..676b9f000 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py @@ -16,6 +16,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "skin" + self.protocol = self._get_protocol_tms(self.id) + self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py index 038eb521e..29d802954 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py @@ -16,6 +16,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" self.organ = "spleen" + self.protocol = self._get_protocol_tms(self.id) + self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py index 3f5210e77..725ed16e6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py @@ -1,4 +1,3 @@ -import numpy as np import os from typing import Union from .base import Dataset_d10_1101_661728 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py index 9d3dd4fcd..6622c8916 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py @@ -1,4 +1,3 @@ -import numpy as np import os from typing import Union from .base import Dataset_d10_1101_661728 diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py index 16f8d1733..c327816b2 100644 --- a/sfaira/data/dataloaders/loaders/super_group.py +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -32,7 +32,7 @@ def __init__( if os.path.isdir(os.path.join(cwd, f)): # only directories if f[:len(dir_prefix)] == dir_prefix and f not in dir_exlcude: # Narrow down to data set directories path_dsg = pydoc.locate( - "sfaira.sfaira.data.dataloaders.loaders." + f + ".FILE_PATH") + f"sfaira.sfaira.data.dataloaders.loaders.{f}.FILE_PATH") if path_dsg is not None: dataset_groups.append(DatasetGroupDirectoryOriented( file_base=path_dsg, From 69807fab1eb037381bc0fe4f2ef69f088d4f3743 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 27 Jan 2021 13:21:50 +0100 Subject: [PATCH 030/161] Streamline data loader for data set collection (#85) * added streamlined solutions for multiple data sets in one study * adopted new format for all relevant data loaders other than hcl, mca and tms * started anatomical group refactor based on sfaira universe subsetting using IDs * annotated new dataset classes in docs * added meta and cache script for data base building and testing * fixed potential pydoc bug --- docs/data.rst | 38 ++- sfaira/data/__init__.py | 2 +- sfaira/data/base.py | 217 +++++++++++++++--- .../anatomical_groups/human/__init__.py | 1 - .../anatomical_groups/human/external.py | 2 - .../anatomical_groups/human/human_adipose.py | 20 +- .../human/human_adrenalgland.py | 34 +-- .../anatomical_groups/human/human_artery.py | 19 +- .../anatomical_groups/human/human_bladder.py | 25 +- .../anatomical_groups/human/human_blood.py | 43 ++-- .../anatomical_groups/human/human_bone.py | 25 +- .../anatomical_groups/human/human_brain.py | 37 ++- .../anatomical_groups/human/human_calvaria.py | 19 +- .../anatomical_groups/human/human_cervix.py | 19 +- .../human/human_chorionicvillus.py | 19 +- .../anatomical_groups/human/human_colon.py | 40 ++-- .../anatomical_groups/human/human_duodenum.py | 19 +- .../human/human_epityphlon.py | 19 +- .../human/human_esophagus.py | 25 +- .../anatomical_groups/human/human_eye.py | 28 +-- .../human/human_fallopiantube.py | 19 +- .../human/human_femalegonad.py | 22 +- .../human/human_gallbladder.py | 19 +- .../anatomical_groups/human/human_heart.py | 28 +-- .../anatomical_groups/human/human_hesc.py | 19 +- .../anatomical_groups/human/human_ileum.py | 25 +- .../anatomical_groups/human/human_jejunum.py | 19 +- .../anatomical_groups/human/human_kidney.py | 46 ++-- .../anatomical_groups/human/human_liver.py | 43 ++-- .../anatomical_groups/human/human_lung.py | 61 ++--- .../human/human_malegonad.py | 25 +- .../anatomical_groups/human/human_mixed.py | 26 --- .../anatomical_groups/human/human_muscle.py | 22 +- .../anatomical_groups/human/human_omentum.py | 25 +- .../anatomical_groups/human/human_pancreas.py | 37 ++- .../anatomical_groups/human/human_placenta.py | 25 +- .../anatomical_groups/human/human_pleura.py | 19 +- .../anatomical_groups/human/human_prostate.py | 22 +- .../anatomical_groups/human/human_rectum.py | 22 +- .../anatomical_groups/human/human_rib.py | 22 +- .../anatomical_groups/human/human_skin.py | 22 +- .../human/human_spinalcord.py | 19 +- .../anatomical_groups/human/human_spleen.py | 25 +- .../anatomical_groups/human/human_stomach.py | 46 ++-- .../anatomical_groups/human/human_thymus.py | 25 +- .../anatomical_groups/human/human_thyroid.py | 22 +- .../anatomical_groups/human/human_trachea.py | 19 +- .../anatomical_groups/human/human_ureter.py | 19 +- .../anatomical_groups/human/human_uterus.py | 19 +- .../anatomical_groups/mouse/external.py | 2 - .../anatomical_groups/mouse/mouse_adipose.py | 32 +-- .../anatomical_groups/mouse/mouse_bladder.py | 26 +-- .../anatomical_groups/mouse/mouse_blood.py | 32 +-- .../anatomical_groups/mouse/mouse_bone.py | 26 +-- .../anatomical_groups/mouse/mouse_brain.py | 29 +-- .../anatomical_groups/mouse/mouse_colon.py | 23 +- .../mouse/mouse_diaphragm.py | 20 +- .../mouse/mouse_femalegonad.py | 23 +- .../anatomical_groups/mouse/mouse_heart.py | 27 +-- .../anatomical_groups/mouse/mouse_ileum.py | 26 +-- .../anatomical_groups/mouse/mouse_kidney.py | 29 +-- .../anatomical_groups/mouse/mouse_liver.py | 29 +-- .../anatomical_groups/mouse/mouse_lung.py | 32 +-- .../mouse/mouse_malegonad.py | 23 +- .../mouse/mouse_mammarygland.py | 35 +-- .../anatomical_groups/mouse/mouse_muscle.py | 26 +-- .../anatomical_groups/mouse/mouse_pancreas.py | 50 ++-- .../anatomical_groups/mouse/mouse_placenta.py | 23 +- .../anatomical_groups/mouse/mouse_prostate.py | 23 +- .../anatomical_groups/mouse/mouse_rib.py | 26 +-- .../anatomical_groups/mouse/mouse_skin.py | 23 +- .../anatomical_groups/mouse/mouse_spleen.py | 26 +-- .../anatomical_groups/mouse/mouse_stomach.py | 20 +- .../anatomical_groups/mouse/mouse_thymus.py | 26 +-- .../anatomical_groups/mouse/mouse_tongue.py | 23 +- .../anatomical_groups/mouse/mouse_trachea.py | 23 +- .../anatomical_groups/mouse/mouse_uterus.py | 23 +- .../mouse_pancreas_2019_10x_thompson_001.py | 26 --- .../mouse_pancreas_2019_10x_thompson_002.py | 26 --- .../mouse_pancreas_2019_10x_thompson_003.py | 26 --- .../mouse_pancreas_2019_10x_thompson_004.py | 26 --- .../mouse_pancreas_2019_10x_thompson_005.py | 26 --- .../mouse_pancreas_2019_10x_thompson_006.py | 26 --- .../mouse_pancreas_2019_10x_thompson_007.py | 26 --- .../mouse_pancreas_2019_10x_thompson_008.py | 26 --- ... => mouse_pancreas_2019_10x_thompson_x.py} | 32 ++- .../human_mixed_2019_10x_szabo_001.py | 128 +++++------ .../human_placenta_2018_10x_ventotormo_001.py | 32 ++- ..._placenta_2018_smartseq2_ventotormo_001.py | 92 -------- .../human_lung_2020_10x_travaglini_001.py | 33 ++- ...uman_lung_2020_smartseq2_travaglini_002.py | 162 ------------- .../human_lung_2019_10x_braga_001.py | 94 +++++--- .../human_lung_2019_10x_braga_002.py | 67 ------ ...y => human_lung_2019_dropseq_braga_001.py} | 10 +- .../human_colon_2019_10x_wang_001.py | 57 ----- .../human_ileum_2019_10x_wang_001.py | 57 ----- .../human_rectum_2019_10x_wang_001.py | 56 ----- .../human_x_2019_10x_wang_001.py | 89 +++++++ .../human_lung_2020_10x_lukassen_001.py | 68 ++++-- .../human_lung_2020_10x_lukassen_002.py | 66 ------ .../human_esophagus_2019_10x_madissoon_001.py | 68 ------ .../human_lung_2019_10x_madissoon_001.py | 73 ------ .../human_spleen_2019_10x_madissoon_001.py | 77 ------- .../human_x_2019_10x_madissoon_001.py | 151 ++++++++++++ .../d_nan/human_blood_2018_10x_ica_001.py | 2 +- .../human_blood_2019_10x_10xGenomics_001.py | 2 +- .../d_nan/human_bone_2018_10x_ica_001.py | 2 +- .../data/dataloaders/loaders/super_group.py | 3 +- sfaira/data/external.py | 2 - sfaira/data/templates/__init__.py | 0 sfaira/data/templates/dataloaders/__init__.py | 0 .../__init__.py | 1 + .../your_dataset_file_1.py | 20 ++ .../your_dataset_file_2.py | 20 ++ .../__init__.py | 1 + .../your_dataset_file.py | 28 +++ .../many_samples_one_file/__init__.py | 1 + .../your_dataset_file.py | 31 +++ .../one_samples_one_files/__init__.py | 1 + .../your_dataset_file_1.py | 20 ++ sfaira/data/utils/create_meta_and_cache.py | 45 ++++ 121 files changed, 1524 insertions(+), 2414 deletions(-) delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/external.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/external.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py rename sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/{base.py => mouse_pancreas_2019_10x_thompson_x.py} (57%) delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py rename sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/{human_lung_2019_dropseq_braga_003.py => human_lung_2019_dropseq_braga_001.py} (89%) delete mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py delete mode 100644 sfaira/data/external.py create mode 100644 sfaira/data/templates/__init__.py create mode 100644 sfaira/data/templates/dataloaders/__init__.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py create mode 100644 sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py create mode 100644 sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py create mode 100644 sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py create mode 100644 sfaira/data/utils/create_meta_and_cache.py diff --git a/docs/data.rst b/docs/data.rst index 13fda40a0..96a369935 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -209,6 +209,12 @@ Alternatively, we also provide the optional dependency sfaira_extensions (https: in which local data and cell type annotation can be managed separately but still be loaded as usual through sfaira. The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. +To get going, consider copying over code from our collection of template_ study-centric data loader directories. +In these templates, it is clearly annotated which code fragment can remain constant +and which have to be addressed by you. + +.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders + Map cell type labels to ontology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -230,14 +236,30 @@ Repetitive data loader code There are instances in which you find yourself copying code between data loader files corresponding to one study. In most of these cases, you can avoid the copy operations and share the code more efficiently. -If you have multiple data files which each correspond to a data set and are structured similarly, you can define a super -class which contains the shared constructor and `_load()` code, from which each data set specific loader inherits. -ToDo: Example. - -If you have a single file which contains the data from multiple data sets which belong to a data loader each, -because of different meta data or batches for example, -you can set up a `group.py` file which defines a DatasetGroup for this study, which controls the generation of Datasets. -ToDo: Example. +If you have multiple data sets in a study which are all saved in separate files which come in similar formats: +You can subclass `DatasetBaseGroupLoadingManyFiles` instead of `DatasetBase` and proceed as usual, +only with adding `SAMPLE_FNS` in the data loader file name space, +which is a list of all file names addressed with this file. +You can then refer to an additional property of the Dataset class, `self.sample_fn` during loading +or when dynamically defining meta data in the constructor. +Consider also this template_ and this example_. +Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. + +.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined +.. _example: https://github.com/theislab/sfaira/tree/dev/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130 + +If you have multiple data sets in a study which are all saved in one file: +You can subclass `DatasetBaseGroupLoadingOneFile` instead of `DatasetBase` and proceed as usual, +only with adding `SAMPLE_IDS` in the data loader file name space, +which is a list of all sample IDs addressed with this file. +You can then refer to an additional property of the Dataset class, `self.sample_id` during loading +or when dynamically defining meta data in the constructor. +Note that `self.sample_id` refers to a `self.adata.obs` column in the loaded data set, +this column has to be defined in `self.obs_key_sample`, which needs to be defined in the constructor. +Consider also this template_. +Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. + +.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined Cell type ontology management ----------------------------- diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index ac96ab48f..f5ff50212 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,4 @@ -from .base import DatasetBase, DatasetBaseGroupLoading, \ +from .base import DatasetBase, DatasetBaseGroupLoadingOneFile, DatasetBaseGroupLoadingManyFiles, \ DatasetGroup, DatasetGroupDirectoryOriented, \ DatasetSuperGroup from . import dataloaders diff --git a/sfaira/data/base.py b/sfaira/data/base.py index b6e1acdac..516eb6fa5 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -14,8 +14,8 @@ from typing import Dict, List, Tuple, Union import warnings -from .external import SuperGenomeContainer -from .external import ADATA_IDS_SFAIRA, META_DATA_FIELDS +from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS UNS_STRING_META_IN_OBS = "__obs__" @@ -81,6 +81,7 @@ class DatasetBase(abc.ABC): _obs_key_organ: Union[None, str] _obs_key_organism: Union[None, str] _obs_key_protocol: Union[None, str] + _obs_key_sample: Union[None, str] _obs_key_sex: Union[None, str] _obs_key_state_exact: Union[None, str] @@ -134,6 +135,7 @@ def __init__( self._obs_key_organ = None self._obs_key_organism = None self._obs_key_protocol = None + self._obs_key_sample = None self._obs_key_sex = None self._obs_key_state_exact = None @@ -535,15 +537,6 @@ def _set_metadata_in_adata(self, celltype_version): celltype_version=celltype_version ) - def subset_organs(self, subset: Union[None, List]): - if self.organ == "mixed": - self.organsubset = subset - else: - raise ValueError("Only data that contain multiple organs can be subset.") - if self.adata is not None: - warnings.warn("You are trying to subset organs after loading the dataset." - "This will have no effect unless the dataset is loaded again.") - def load_tobacked( self, adata_backed: anndata.AnnData, @@ -1160,6 +1153,15 @@ def obs_key_protocol(self, x: str): self.__erasing_protection(attr="obs_key_protocol", val_old=self._obs_key_protocol, val_new=x) self._obs_key_protocol = x + @property + def obs_key_sample(self) -> str: + return self._obs_key_sample + + @obs_key_sample.setter + def obs_key_sample(self, x: str): + self.__erasing_protection(attr="obs_key_sample", val_old=self._obs_key_sample, val_new=x) + self._obs_key_sample = x + @property def obs_key_sex(self) -> str: return self._obs_key_sex @@ -1344,15 +1346,65 @@ def __value_protection(self, attr, allowed, attempted): if x not in allowed: raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + def subset_cells(self, key, values): + """ + Subset list of adata objects based on cell-wise properties. + + These keys are properties that are not available in lazy model and require loading first because the + subsetting works on the cell-level: .adata are maintained but reduced to matches. + + :param key: Property to subset by. Options: + + - "age" points to self.obs_key_age + - "cell_ontology_class" points to self.obs_key_cellontology_original + - "dev_stage" points to self.obs_key_dev_stage + - "ethnicity" points to self.obs_key_ethnicity + - "healthy" points to self.obs_key_healthy + - "organ" points to self.obs_key_organ + - "organism" points to self.obs_key_organism + - "protocol" points to self.obs_key_protocol + - "sex" points to self.obs_key_sex + - "state_exact" points to self.obs_key_state_exact + :param values: Classes to overlap to. + :return: + """ + if not isinstance(values, list): + values = [values] + + def get_subset_idx(samplewise_key, cellwise_key): + obs_key = getattr(self, cellwise_key) + sample_attr = getattr(self, samplewise_key) + if sample_attr is not None and obs_key is None: + if not isinstance(sample_attr, list): + sample_attr = [sample_attr] + if np.any([x in values for x in sample_attr]): + idx = np.arange(1, self.ncells) + else: + idx = np.array([]) + elif sample_attr is None and obs_key is not None: + assert self.adata is not None, "adata was not yet loaded" + values_found = self.adata.obs[obs_key].values + idx = np.where([x in values for x in values_found]) + elif sample_attr is not None and obs_key is not None: + assert False, f"both cell-wise and sample-wise attribute {samplewise_key} given" + else: + assert False, "no subset chosen" + return idx + + idx_keep = get_subset_idx(samplewise_key="obs_key_" + key, cellwise_key=key) + self.adata = self.adata[idx_keep, :].copy() -class DatasetBaseGroupLoading(DatasetBase): + +class DatasetBaseGroupLoadingOneFile(DatasetBase): """ - Container class specific to datasets which come in groups and require specialised loading. + Container class specific to datasets which come in groups and in which data sets are saved in a single file. """ _unprocessed_full_group_object: bool + _sample_id: str def __init__( self, + sample_id: str, path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, @@ -1360,9 +1412,14 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self._unprocessed_full_group_object = False + self._sample_id = sample_id + + @property + def sample_id(self): + return self._sample_id @abc.abstractmethod - def _load_full_group_object(self, fn=None) -> Union[None, anndata.AnnData]: + def _load_full(self, fn=None) -> anndata.AnnData: """ Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. @@ -1375,7 +1432,7 @@ def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.An if self.adata is None and adata_group is not None: self.adata = adata_group elif self.adata is None and adata_group is not None: - self.adata = self._load_full_group_object(fn=fn) + self.adata = self._load_full(fn=fn) elif self.adata is not None and self._unprocessed_full_group_object: pass else: @@ -1390,7 +1447,8 @@ def _load_from_group(self): Override this method in the Dataset if this is relevant. """ - pass + assert self.obs_key_sample is not None, "self.obs_key_sample needs to be set" + self._subset_from_group(subset_items={self.obs_key_sample: self.sample_id}) def _subset_from_group( self, @@ -1414,6 +1472,29 @@ def _load(self, fn): self._unprocessed_full_group_object = False +class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): + """ + Container class specific to datasets which come in groups and in which data sets are saved in separate but + streamlined files. + """ + _sample_fn: str + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self._sample_fn = sample_fn + + @property + def sample_fn(self): + return self._sample_fn + + class DatasetGroup: """ Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through @@ -1716,7 +1797,7 @@ def format_type_version(self, version): def subset(self, key, values): """ - Subset list of adata objects based on match to values in key property. + Subset list of adata objects based on sample-wise properties. These keys are properties that are available in lazy model. Subsetting happens on .datasets. @@ -1740,12 +1821,32 @@ def subset(self, key, values): for x in ids_del: del self.datasets[x] - def subset_organs(self, subset: Union[None, List]): - for i in self.ids: - if self.datasets[i].organ == "mixed": - self.datasets[i].subset_organs(subset) - else: - raise ValueError("Only data that contain multiple organs can be subset.") + def subset_cells(self, key, values: Union[str, List[str]]): + """ + Subset list of adata objects based on cell-wise properties. + + These keys are properties that are not available in lazy model and require loading first because the + subsetting works on the cell-level: .adata are maintained but reduced to matches. + + :param key: Property to subset by. Options: + + - "age" points to self.obs_key_age + - "cell_ontology_class" points to self.obs_key_cellontology_original + - "dev_stage" points to self.obs_key_dev_stage + - "ethnicity" points to self.obs_key_ethnicity + - "healthy" points to self.obs_key_healthy + - "organ" points to self.obs_key_organ + - "organism" points to self.obs_key_organism + - "protocol" points to self.obs_key_protocol + - "sex" points to self.obs_key_sex + - "state_exact" points to self.obs_key_state_exact + :param values: Classes to overlap to. + :return: + """ + for x in self.ids: + self.datasets[x].subset_cells(key=key, values=values) + if self.datasets[x].ncells == 0: # none left + del self.datasets[x] class DatasetGroupDirectoryOriented(DatasetGroup): @@ -1774,7 +1875,7 @@ def __init__( dataset_module = str(cwd.split("/")[-1]) if "group.py" in os.listdir(cwd): DatasetGroupFound = pydoc.locate( - "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + ".group.DatasetGroup") + "sfaira.data.dataloaders.loaders." + dataset_module + ".group.DatasetGroup") dsg = DatasetGroupFound(path=path, meta_path=meta_path, cache_path=cache_path) datasets.extend(list(dsg.datasets.values)) else: @@ -1784,8 +1885,43 @@ def __init__( if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: file_module = ".".join(f.split(".")[:-1]) DatasetFound = pydoc.locate( - "sfaira.sfaira.data.dataloaders.loaders." + dataset_module + "." + file_module + ".Dataset") - datasets.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".Dataset") + # Check if global objects are available: + # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles + # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + sample_fns = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".SAMPLE_FNS") + sample_ids = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".SAMPLE_IDS") + if sample_fns is not None and sample_ids is None: + # DatasetBaseGroupLoadingManyFiles: + datasets.extend([ + DatasetFound( + sample_fn=x, + path=path, + meta_path=meta_path, + cache_path=cache_path, + ) + for x in sample_fns + ]) + elif sample_fns is None and sample_ids is not None: + # DatasetBaseGroupLoadingManyFiles: + datasets.extend([ + DatasetFound( + sample_id=x, + path=path, + meta_path=meta_path, + cache_path=cache_path, + ) + for x in sample_ids + ]) + elif sample_fns is not None and sample_ids is not None: + raise ValueError(f"sample_fns and sample_ids both found for {f}") + else: + datasets.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) @@ -2058,10 +2194,29 @@ def subset(self, key, values): """ for x in self.dataset_groups: x.subset(key=key, values=values) - self.dataset_groups = [x for x in self.dataset_groups if x.datasets] # Delete empty DatasetGroups - def subset_organs(self, subset: Union[None, List]): - for x in self.dataset_groups: - if x.datasets[0].organ == "mixed": - x.subset_organs(subset) + def subset_cells(self, key, values: Union[str, List[str]]): + """ + Subset list of adata objects based on cell-wise properties. + + These keys are properties that are not available in lazy model and require loading first because the + subsetting works on the cell-level: .adata are maintained but reduced to matches. + + :param key: Property to subset by. Options: + + - "age" points to self.obs_key_age + - "cell_ontology_class" points to self.obs_key_cellontology_original + - "dev_stage" points to self.obs_key_dev_stage + - "ethnicity" points to self.obs_key_ethnicity + - "healthy" points to self.obs_key_healthy + - "organ" points to self.obs_key_organ + - "organism" points to self.obs_key_organism + - "protocol" points to self.obs_key_protocol + - "sex" points to self.obs_key_sex + - "state_exact" points to self.obs_key_state_exact + :param values: Classes to overlap to. + :return: + """ + for x in self.dataset_groups.ids: + self.dataset_groups[x].subset_cells(key=key, values=values) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py index c4dfd5b7c..3565250ac 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py @@ -1,6 +1,5 @@ from .human_adipose import DatasetGroupAdipose from .human_adrenalgland import DatasetGroupAdrenalgland -from .human_mixed import DatasetGroupMixed from .human_artery import DatasetGroupArtery from .human_bladder import DatasetGroupBladder from .human_blood import DatasetGroupBlood diff --git a/sfaira/data/dataloaders/anatomical_groups/human/external.py b/sfaira/data/dataloaders/anatomical_groups/human/external.py deleted file mode 100644 index 413092483..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroup -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py index 4a531d920..201370e91 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adipose_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupAdipose(DatasetGroup): @@ -13,14 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py index 0c6ab1bfa..0ffb36613 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -1,13 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_003 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_004 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_005 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_adrenalgland_2020_microwell_han_006 import Dataset as Dataset0006 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupAdrenalgland(DatasetGroup): @@ -18,19 +12,15 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupAdrenalgland - self.datasets.update(DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py index 0aa3abedf..b18b55e36 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_artery_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupArtery(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupArtery - self.datasets.update(DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py index f39d8a55a..a61f32a3f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bladder_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBladder(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py index a63658118..72d033a38 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -1,16 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d_nan.human_blood_2018_10x_ica_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d_nan.human_blood_2019_10x_10xGenomics_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_002 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_003 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_004 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_005 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_006 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_blood_2020_microwell_han_007 import Dataset as Dataset0009 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBlood(DatasetGroup): @@ -21,22 +12,18 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_blood_2018_10x_ica_001", + "human_blood_2019_10x_10xGenomics_001", + "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4", + "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py index 0decbe187..8297cfeb7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d_nan.human_bone_2018_10x_ica_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_bone_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBone(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_bone_2018_10x_ica_001", + "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py index 1f81ae71c..86c507a2e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -1,14 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_nmeth_4407.human_brain_2017_DroNcSeq_habib_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_002 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_003 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_004 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_005 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_brain_2020_microwell_han_006 import Dataset as Dataset0007 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBrain(DatasetGroup): @@ -19,20 +12,16 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_brain_2017_DroNcSeq_habib_001", + "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py index 24a8c4c6a..44310af2c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_calvaria_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupCalvaria(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCalvaria - self.datasets.update(DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py index 07b677bd2..207b9ca9a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_cervix_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupCervix(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupCervix - self.datasets.update(DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py index 6098b30a2..0118fc2f5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_chorionicvillus_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupChorionicvillus(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupChorionicvillus - self.datasets.update(DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py index ab40519be..7c59bf553 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -1,15 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_08_067.human_colon_2019_10x_kinchen_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_06_029.human_colon_2019_10x_smilie_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_colon_2019_10x_wang_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41590_020_0602_z.human_colon_2020_10x_james_001 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_001 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_002 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_003 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_colon_2020_microwell_han_004 import Dataset as Dataset0008 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupColon(DatasetGroup): @@ -20,21 +12,17 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_colon_2019_10x_kinchen_001", + "human_colon_2019_10x_smilie_001", + "human_colon_2019_10x_wang_001", + "human_colon_2020_10x_james_001", + "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py index f7ce00833..32daabf11 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_duodenum_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupDuodenum(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupDuodenum - self.datasets.update(DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py index 21f9cae8f..f5d60e28d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_epityphlon_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupEpityphlon(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEpityphlon - self.datasets.update(DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py index c3300c274..3bd2cfc08 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_esophagus_2019_10x_madissoon_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_esophagus_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupEsophagus(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_esophagus_2019_10x_madissoon_001", + "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEsophagus - self.datasets.update(DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py index 68ee322cb..83a6fbbb6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -1,11 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_15252_embj_2018100811.human_eye_2019_10x_lukowski_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12780_8.human_eye_2019_10x_menon_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1073_pnas_1914143116.human_eye_2019_10x_voigt_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_eye_2020_microwell_han_001 import Dataset as Dataset0004 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupEye(DatasetGroup): @@ -16,17 +12,13 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_eye_2019_10x_lukowski_001", + "human_eye_2019_10x_menon_001", + "human_eye_2019_10x_voigt_001", + "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupEye - self.datasets.update(DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py index 3ed7986a2..855db417a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_fallopiantube_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupFallopiantube(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFallopiantube - self.datasets.update(DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py index 237ad73e0..ffee659e9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_femalegonad_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupFemalegonad(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py index 0e0a033f4..0fc9ae9e7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_gallbladder_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupGallbladder(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupGallbladder - self.datasets.update(DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py index 2425637cb..7c79ea374 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -1,11 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_003 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_heart_2020_microwell_han_004 import Dataset as Dataset0004 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupHeart(DatasetGroup): @@ -16,17 +12,13 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py index c32bd0730..ab3e7f567 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_hesc_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupHesc(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupHesc - self.datasets.update(DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py index 93d9ad6b0..0dae51294 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2019_08_008.human_ileum_2019_10x_martin_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_ileum_2019_10x_wang_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ileum_2020_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupIleum(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_ileum_2019_10x_martin_001", + "human_ileum_2019_10x_wang_001", + "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py index d9a7fb8e6..111cc268a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_jejunum_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupJejunum(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupJejunum - self.datasets.update(DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py index 2a6f1bd94..dd5fba448 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -1,17 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_10861_2.human_kidney_2019_10xSn_lake_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1126_science_aat5031.human_kidney_2019_10x_stewart_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41597_019_0351_8.human_kidney_2020_10x_liao_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_001 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_002 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_003 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_004 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_005 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_006 import Dataset as Dataset0009 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_kidney_2020_microwell_han_007 import Dataset as Dataset0010 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupKidney(DatasetGroup): @@ -22,23 +12,19 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_kidney_2019_10xSn_lake_001", + "human_kidney_2019_10x_stewart_001", + "human_kidney_2020_10x_liao_001", + "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4", + "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py index 4674aedc0..d6b5bebba 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -1,16 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41467_018_06318_7.human_liver_2018_10x_macparland_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1652_y.human_liver_2019_10x_popescu_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1631_3.human_liver_2019_10x_ramachandran_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_019_1373_2.human_liver_2019_mCELSeq2_aizarani_001 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_001 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_002 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_003 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_004 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_liver_2020_microwell_han_005 import Dataset as Dataset0009 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupLiver(DatasetGroup): @@ -21,22 +12,18 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_liver_2018_10x_macparland_001", + "human_liver_2019_10x_popescu_001", + "human_liver_2019_10x_ramachandran_001", + "human_liver_2019_mCELSeq2_aizarani_001", + "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py index 2d312fd3e..1cc3504f2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -1,22 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_10x_braga_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_lung_2019_10x_madissoon_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41591_019_0468_5.human_lung_2019_dropseq_braga_003 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1101_753806.human_lung_2020_10x_habermann_001 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_001 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1101_2020_03_13_991455.human_lung_2020_10x_lukassen_002 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1016_j_devcel_2020_01_033.human_lung_2020_10x_miller_001 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_10x_travaglini_001 import Dataset as Dataset0009 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_001 import Dataset as Dataset0010 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_002 import Dataset as Dataset0011 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_003 import Dataset as Dataset0012 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_004 import Dataset as Dataset0013 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_lung_2020_microwell_han_005 import Dataset as Dataset0014 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2922_4.human_lung_2020_smartseq2_travaglini_002 import Dataset as Dataset0015 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupLung(DatasetGroup): @@ -27,28 +12,24 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0012(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0013(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0014(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0015(path=path, meta_path=meta_path, cache_path=cache_path), - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_lung_2019_10x_braga_001", + "human_lung_2019_10x_braga_002", + "human_lung_2019_dropseq_braga_003", + "human_lung_2019_10x_madissoon_001", + "human_lung_2020_10x_habermann_001", + "human_lung_2020_10x_lukassen_001", + "human_lung_2020_10x_lukassen_002", + "human_lung_2020_10x_miller_001", + "human_lung_2020_10x_travaglini_001", + "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_lung_2020_smartseq2_travaglini_002", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py index 0b607e309..95bebf457 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41422_018_0099_2.human_malegonad_2018_10x_guo_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_malegonad_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupMalegonad(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_malegonad_2018_10x_guo_001", + "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py b/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py deleted file mode 100644 index 018f0c413..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_mixed.py +++ /dev/null @@ -1,26 +0,0 @@ -from typing import Union - -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41467_019_12464_3.human_mixed_2019_10x_szabo_001 import Dataset as Dataset0001 - - -class DatasetGroupMixed(DatasetGroup): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMixed - self.datasets.update(DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py index 6ca10dad9..d019e27dc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_muscle_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupMuscle(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py index 1af19a624..201ebbdc4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_omentum_2020_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupOmentum(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupOmentum - self.datasets.update(DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py index 08b067dd7..e20280602 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -1,14 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cels_2016_08_011.human_pancreas_2016_indrop_baron_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2016_08_020.human_pancreas_2016_smartseq2_segerstolpe_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2017_09_004.human_pancreas_2017_smartseq2_enge_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_001 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_002 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_003 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pancreas_2020_microwell_han_004 import Dataset as Dataset0007 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupPancreas(DatasetGroup): @@ -19,20 +12,16 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_pancreas_2016_indrop_baron_001", + "human_pancreas_2016_smartseq2_segerstolpe_001", + "human_pancreas_2017_smartseq2_enge_001", + "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py index 106b9cd20..8476af59f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_smartseq2_ventotormo_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_018_0698_6.human_placenta_2018_10x_ventotormo_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_placenta_2020_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupPlacenta(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_placenta_2018_smartseq2_ventotormo_001", + "human_placenta_2018_10x_ventotormo_002", + "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py index 61a2f6be7..52867a5fb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_pleura_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupPleura(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupPleura - self.datasets.update(DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py index 3ed1f9a44..52a9288a8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_celrep_2018_11_086.human_prostate_2018_10x_henry_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_prostate_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupProstate(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_prostate_2018_10x_henry_001", + "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py index 67ee06c82..d497cd46a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1084_jem_20191130.human_rectum_2019_10x_wang_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rectum_2020_microwell_han_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupRectum(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_rectum_2019_10x_wang_001", + "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRectum - self.datasets.update(DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py index a39e0646a..ceecd92ad 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_rib_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupRib(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py index 30985fe65..dc38e5dd0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_skin_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupSkin(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py index 4434146c8..a90effd8e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spinalcord_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupSpinalcord(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpinalcord - self.datasets.update(DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py index fa36e2bf7..b19f84abf 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1186_s13059_019_1906_x.human_spleen_2019_10x_madissoon_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_spleen_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupSpleen(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_spleen_2019_10x_madissoon_001", + "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py index b6030d318..0a7180157 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -1,17 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_003 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_004 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_005 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_006 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_007 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_008 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_009 import Dataset as Dataset0009 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_stomach_2020_microwell_han_010 import Dataset as Dataset0010 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupStomach(DatasetGroup): @@ -22,23 +12,19 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4", + "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py index 9ece40261..0e5045b37 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -1,10 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1126_science_aay3224.human_thymus_2020_10x_park_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thymus_2020_microwell_han_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupThymus(DatasetGroup): @@ -15,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_thymus_2020_10x_park_001", + "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py index e521b4f37..7097ae580 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -1,9 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_thyroid_2020_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupThyroid(DatasetGroup): @@ -14,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4", + "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupThyroid - self.datasets.update(DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py index 1fb26ad18..236ece19d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_trachea_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupTrachea(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py index 143f88545..5a9562560 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_ureter_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupUreter(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUreter - self.datasets.update(DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py index 303e1ed50..dc5d21528 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -1,8 +1,7 @@ from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1038_s41586_020_2157_4.human_uterus_2020_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupUterus(DatasetGroup): @@ -13,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.human import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/external.py b/sfaira/data/dataloaders/anatomical_groups/mouse/external.py deleted file mode 100644 index 413092483..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroup -from sfaira.consts import ADATA_IDS_SFAIRA diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py index f7532d03c..f0e4823cd 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -1,13 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_002 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_003 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_adipose_2019_smartseq2_pisco_004 import Dataset as Dataset0005 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupAdipose(DatasetGroup): @@ -18,18 +12,14 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_adipose_2019_10x_pisco_001_10.1101/661728", + "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728", + "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728", + "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupAdipose - self.datasets.update(DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py index 6576b2987..9fa4f8391 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bladder_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bladder_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBladder(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_bladder_2019_10x_pisco_001_10.1101/661728", + "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBladder - self.datasets.update(DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py index 35638cefa..f846cdfd7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -1,13 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_003 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_004 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_blood_2018_microwell_han_005 import Dataset as Dataset0005 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBlood (DatasetGroup): @@ -18,18 +12,14 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", + "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", + "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBlood - self.datasets.update(DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py index f2135c456..ee5ccc3b4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_bone_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_bone_2018_microwell_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBone(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_bone_2019_10x_pisco_001_10.1101/661728", + "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_bone_2018_microwell_001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBone - self.datasets.update(DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py index 5b932749e..8fdba3957 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -1,12 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_brain_2019_smartseq2_pisco_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_brain_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupBrain(DatasetGroup): @@ -17,17 +12,13 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728", + "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupBrain - self.datasets.update(DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py index 6507cd3fd..188d142dd 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_colon_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupColon(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_colon_2019_10x_pisco_001_10.1101/661728", + "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupColon - self.datasets.update(DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py index 7c69001d0..4127baa4c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -1,9 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_diaphragm_2019_smartseq2_pisco_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupDiaphragm(DatasetGroup): @@ -14,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupDiaphragm - self.datasets.update(DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py index b2d687412..fdc177d65 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_femalegonad_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupFemalegonad(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupFemalegonad - self.datasets.update(DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py index 50458cd02..f48849341 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -1,12 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_heart_2019_smartseq2_pisco_002 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupHeart(DatasetGroup): @@ -17,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_heart_2019_10x_pisco_001_10.1101/661728", + "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_heart_2019_smartseq2_pisco_002_10.1101/661728" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupHeart - self.datasets.update(DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py index bcd9fd9ca..efe1768f5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_ileum_2018_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupIleum(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupIleum - self.datasets.update(DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py index ce6788cd6..4341b6e04 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -1,12 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_kidney_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_kidney_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupKidney(DatasetGroup): @@ -17,17 +12,13 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_kidney_2019_10x_pisco_001_10.1101/661728", + "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupKidney - self.datasets.update(DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py index e9915b36b..a8b5ddc7f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -1,12 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_liver_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_liver_2018_microwell_han_002 import Dataset as Dataset0004 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupLiver(DatasetGroup): @@ -17,17 +12,13 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_liver_2019_10x_pisco_001_10.1101/661728", + "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLiver - self.datasets.update(DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py index b6e6c9e5c..f1133c2e3 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -1,13 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_lung_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_002 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_lung_2018_microwell_han_003 import Dataset as Dataset0005 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupLung(DatasetGroup): @@ -18,18 +12,14 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_lung_2019_10x_pisco_001_10.1101/661728", + "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupLung - self.datasets.update(DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py index 8e62116b5..2f7889c98 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_malegonad_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupMalegonad(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMalegonad - self.datasets.update(DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py index e5bd9eb2e..923ca32c2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -1,14 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_mammarygland_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_002 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_003 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_mammarygland_2018_microwell_han_004 import Dataset as Dataset0006 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupMammaryGland(DatasetGroup): @@ -19,19 +12,15 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728", + "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", + "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMammaryGland - self.datasets.update(DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py index 6e1deee58..b50c32a16 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_muscle_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_muscle_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupMuscle(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_muscle_2019_10x_pisco_001_10.1101/661728", + "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupMuscle - self.datasets.update(DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py index a70918270..029894169 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -1,19 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_pancreas_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_pancreas_2018_microwell_han_001 import Dataset as Dataset0003 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_001 import Dataset as Dataset0004 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_002 import Dataset as Dataset0005 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_003 import Dataset as Dataset0006 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_004 import Dataset as Dataset0007 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_005 import Dataset as Dataset0008 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_006 import Dataset as Dataset0009 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_007 import Dataset as Dataset0010 -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021.mouse_pancreas_2019_10x_thompson_008 import Dataset as Dataset0011 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupPancreas(DatasetGroup): @@ -24,24 +12,20 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0004(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0005(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0006(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0007(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0008(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0009(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0010(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0011(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_pancreas_2019_10x_pisco_001_10.1101/661728", + "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_pancreas_2019_10x_thompson_001", + "mouse_pancreas_2019_10x_thompson_002", + "mouse_pancreas_2019_10x_thompson_003", + "mouse_pancreas_2019_10x_thompson_004", + "mouse_pancreas_2019_10x_thompson_005", + "mouse_pancreas_2019_10x_thompson_006", + "mouse_pancreas_2019_10x_thompson_007", + "mouse_pancreas_2019_10x_thompson_008", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPancreas - self.datasets.update(DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py index 04a87566d..e13c2ba99 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_placenta_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupPlacenta(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupPlacenta - self.datasets.update(DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py index a816076de..de8347a3a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_prostate_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupProstate(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupProstate - self.datasets.update(DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py index 069b179df..3f62aa3ea 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_002 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_rib_2018_microwell_han_003 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupRib(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", + "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupRib - self.datasets.update(DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py index d0a012add..4430d228d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_skin_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupSkin(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_skin_2019_10x_pisco_001_10.1101/661728", + "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSkin - self.datasets.update(DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py index 8bd62249a..364373594 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_spleen_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_spleen_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupSpleen(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_spleen_2019_10x_pisco_001_10.1101/661728", + "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupSpleen - self.datasets.update(DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py index 15b2fcd63..0ce1d3737 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -1,9 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_stomach_2018_microwell_han_001 import Dataset as Dataset0001 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupStomach(DatasetGroup): @@ -14,14 +12,10 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupStomach - self.datasets.update(DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py index a2f30d3df..d17112840 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -1,11 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_thymus_2019_smartseq2_pisco_001 import Dataset as Dataset0002 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_thymus_2018_microwell_han_001 import Dataset as Dataset0003 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupThymus(DatasetGroup): @@ -16,16 +12,12 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0003(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_thymus_2019_10x_pisco_001_10.1101/661728", + "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728", + "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupThymus - self.datasets.update(DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py index 695f666de..e294d80f8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_tongue_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupTongue(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_tongue_2019_10x_pisco_001_10.1101/661728", + "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTongue - self.datasets.update(DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py index eaff5c910..6b8a1bd31 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_10x_pisco_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1101_661728.mouse_trachea_2019_smartseq2_pisco_001 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupTrachea(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_trachea_2019_10x_pisco_001_10.1101/661728", + "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728", + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupTrachea - self.datasets.update(DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py index 7513bf339..a58af9178 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -1,10 +1,7 @@ -import os from typing import Union -from .external import DatasetGroup - -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_001 import Dataset as Dataset0001 -from sfaira.data.dataloaders.loaders.d10_1016_j_cell_2018_02_001.mouse_uterus_2018_microwell_han_002 import Dataset as Dataset0002 +from sfaira.data.base import DatasetGroup +from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira class DatasetGroupUterus(DatasetGroup): @@ -15,15 +12,11 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - datasets = [ - Dataset0001(path=path, meta_path=meta_path, cache_path=cache_path), - Dataset0002(path=path, meta_path=meta_path, cache_path=cache_path) - ] + dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg.subset(key="id", values=[ + "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", + "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + ]) + datasets = dsg.flatten().datasets keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) - # Load versions from extension if available: - try: - from sfaira_extension.data.mouse import DatasetGroupUterus - self.datasets.update(DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path).datasets) - except ImportError: - pass diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py deleted file mode 100644 index 5128278fd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_001.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_001_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308545_NOD_08w_A_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py deleted file mode 100644 index 142e0f759..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_002.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_002_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308547_NOD_08w_C_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py deleted file mode 100644 index e8ba6f466..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_003.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_003_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308548_NOD_14w_A_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py deleted file mode 100644 index 50d2cf114..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_004.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_004_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308549_NOD_14w_B_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py deleted file mode 100644 index 06b60eaaa..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_005.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_005_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308550_NOD_14w_C_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py deleted file mode 100644 index e9c96a3e5..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_006.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_006_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308551_NOD_16w_A_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py deleted file mode 100644 index efc3678ed..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_007.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_007_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308552_NOD_16w_B_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py deleted file mode 100644 index b64c76432..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_008.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cmet_2019_01_021 - - -class Dataset(Dataset_d10_1016_j_cmet_2019_01_021): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2019_10x_thompson_008_10.1016/j.cmet.2019.01.021" - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C") - fn_meta = os.path.join(self.path, "mouse", "pancreas", "GSM3308553_NOD_16w_C_annotation.csv") - else: - fn_meta = os.path.join(fn, "_annotation.csv") - self._load_generalized(fn=fn, fn_meta=fn_meta) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py similarity index 57% rename from sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 381dc45e7..00f85685d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -3,19 +3,34 @@ import os import pandas from typing import Union -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "GSM3308545_NOD_08w_A", + "GSM3308547_NOD_08w_A", + "GSM3308548_NOD_14w_A", + "GSM3308549_NOD_14w_B", + "GSM3308550_NOD_14w_C", + "GSM3308551_NOD_16w_A", + "GSM3308552_NOD_16w_B", + "GSM3308553_NOD_16w_C" +] -class Dataset_d10_1016_j_cmet_2019_01_021(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_" \ + f"10.1016/j.cmet.2019.01.021" + self.download = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" self.author = "Bhushan" @@ -50,12 +65,13 @@ def __init__( }, } - def _load_generalized(self, fn, fn_meta): - celltypes = pandas.read_csv(fn_meta, index_col=0) + def _load(self, fn=None): + path_base = os.path.join(self.path, "mouse", "pancreas") + celltypes = pandas.read_csv(os.path.join(path_base, self.sample_fn + "_annotation.csv"), index_col=0) - self.adata = anndata.read_mtx(fn + "_matrix.mtx.gz").transpose() - self.adata.var_names = np.genfromtxt(fn + "_genes.tsv.gz", dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(fn + "_barcodes.tsv.gz", dtype=str) + self.adata = anndata.read_mtx(os.path.join(path_base, self.sample_fn + "_matrix.mtx.gz")).transpose() + self.adata.var_names = np.genfromtxt(os.path.join(path_base, self.sample_fn + "_genes.tsv.gz"), dtype=str)[:, 1] + self.adata.obs_names = np.genfromtxt(os.path.join(path_base, self.sample_fn + "_barcodes.tsv.gz"), dtype=str) self.adata.var_names_make_unique() self.adata = self.adata[celltypes.index] self.adata.obs["celltypes"] = celltypes diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 5939cc247..f9d31966d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -5,20 +5,40 @@ import pandas as pd import scipy.sparse -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "GSM3589406_PP001swap.filtered.matrix.txt.gz", + "GSM3589407_PP002swap.filtered.matrix.txt.gz", + "GSM3589408_PP003swap.filtered.matrix.txt.gz", + "GSM3589409_PP004swap.filtered.matrix.txt.gz", + "GSM3589410_PP005swap.filtered.matrix.txt.gz", + "GSM3589411_PP006swap.filtered.matrix.txt.gz", + "GSM3589412_PP009swap.filtered.matrix.txt.gz", + "GSM3589413_PP010swap.filtered.matrix.txt.gz", + "GSM3589414_PP011swap.filtered.matrix.txt.gz", + "GSM3589415_PP012swap.filtered.matrix.txt.gz", + "GSM3589416_PP013swap.filtered.matrix.txt.gz", + "GSM3589417_PP014swap.filtered.matrix.txt.gz", + "GSM3589418_PP017swap.filtered.matrix.txt.gz", + "GSM3589419_PP018swap.filtered.matrix.txt.gz", + "GSM3589420_PP019swap.filtered.matrix.txt.gz", + "GSM3589421_PP020swap.filtered.matrix.txt.gz", +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_mixed_2019_10x_szabo_001_10.1038/s41467-019-12464-3" + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_10.1038/s41467-019-12464-3" self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" self.download_meta = "private" @@ -38,80 +58,48 @@ def __init__( self.obs_key_cellontology_original = "cell_ontology_class" self.obs_key_organ = "organ" - # self.loaded = False # TODO do this differently? - self.class_maps = { "0": {}, } def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), - os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), - os.path.join(self.path, "human", "mixed", "donor2.annotation.txt"), - ] - adatas = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - df = pd.read_csv(tar.extractfile(member.name), compression="gzip", sep="\t") - df.index = [i.split(".")[0] for i in df["Accession"]] - var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) - if df.columns[-1].startswith("Un"): - df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - self.adata = anndata.AnnData(df.T) - self.adata.var = var - if "PP001" in member.name or "PP002" in member.name: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Lung" - elif "PP003" in member.name or "PP004" in member.name: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Bone Marrow" - elif "PP005" in member.name or "PP006" in member.name: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Lymph Node" - elif "PP009" in member.name or "PP010" in member.name: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Lung" - elif "PP011" in member.name or "PP012" in member.name: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Bone Marrow" - elif "PP013" in member.name or "PP014" in member.name: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Lymph Node" - else: - continue - self.adata.obs.index = member.name.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:], index_unique=None) - self.adata.obs.drop("batch", axis=1, inplace=True) - self.adata = self.adata[:, self.adata.X.sum(axis=0) > 0].copy() + fn_tar = os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), + fn_annot = [ + os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), + os.path.join(self.path, "human", "mixed", "donor2.annotation.txt") + ] + with tarfile.open(fn_tar) as tar: + df = pd.read_csv(tar.extractfile(self.sample_fn), compression="gzip", sep="\t") + df.index = [i.split(".")[0] for i in df["Accession"]] + var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) + if df.columns[-1].startswith("Un"): + df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) + self.adata = anndata.AnnData(df.T) + self.adata.var = var + if "PP001" in fn or "PP002" in fn: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lung" + elif "PP003" in fn or "PP004" in fn: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP005" in fn or "PP006" in fn: + self.adata.obs["donor"] = "Donor1" + self.adata.obs["organ"] = "Lymph Node" + elif "PP009" in fn or "PP010" in fn: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lung" + elif "PP011" in fn or "PP012" in fn: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Bone Marrow" + elif "PP013" in fn or "PP014" in fn: + self.adata.obs["donor"] = "Donor2" + self.adata.obs["organ"] = "Lymph Node" + self.adata.obs.index = fn.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index self.adata.obs["cell_ontology_class"] = "Unknown" - df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) - df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) + df1 = pd.read_csv(fn_annot[0], sep="\t", index_col=0, header=None) + df2 = pd.read_csv(fn_annot[1], sep="\t", index_col=0, header=None) for i in df1.index: self.adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] for i in df2.index: self.adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - - # TODO we should move this code into the base class - # If the subset_organs() method has been run before, subset to specified organs - # if "organsubset" in self.__dict__: - # self.adata = self.adata[self.adata.obs["organ"].isin(self.organsubset)] - # If adata object is empty, set it to None - # if not len(self.adata): - # self.adata = None - # self.loaded = True - - @property - def ncells(self): - if "organsubset" in self.__dict__: - if not self.loaded: - self._load() - if self.adata is None: - return 0 - else: - return self.adata.n_obs - else: - return super().ncells diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py index cb2e14589..0d23e723d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -3,23 +3,31 @@ import pandas as pd import anndata -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "E-MTAB-6678.processed", + "E-MTAB-6701.processed", +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_placenta_2018_10x_ventotormo_10.1038/s41586-018-0698-6" + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + protocol = "10x" if self.sample_fn == "E-MTAB-6678.processed" else "smartseq2" + self.id = f"human_placenta_2018_{protocol}_ventotormo_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1038/s41586-018-0698-6" - self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.1.zip" - self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/E-MTAB-6701.processed.2.zip" + self.download = f"https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/{self.sample_fn}.1.zip" + self.download_meta = f"https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/{self.sample_fn}.2.zip" self.author = "Teichmann" self.healthy = True @@ -27,7 +35,7 @@ def __init__( self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? self.organism = "human" self.doi = "10.1038/s41586-018-0698-6" - self.protocol = "10x" + self.protocol = protocol self.state_exact = "healthy" self.year = 2018 @@ -75,11 +83,11 @@ def __init__( } def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6701.processed.2.zip"), - ] + base_path = os.path.join(self.path, "human", "placenta") + fn = [ + os.path.join(base_path, f"{self.sample_fn}.1.zip"), + os.path.join(base_path, f"{self.sample_fn}.2.zip"), + ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) df = pd.read_csv(fn[1], sep="\t") for i in df.columns: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py deleted file mode 100644 index 666d41719..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_smartseq2_ventotormo_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import os -from typing import Union -import pandas as pd -import anndata - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_placenta_2018_smartseq2_ventotormo_10.1038/s41586-018-0698-6" - - self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.1.zip" - self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6678/E-MTAB-6678.processed.2.zip" - - self.author = "Teichmann" - self.healthy = True - self.normalization = "raw" - self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? - self.organism = "human" - self.doi = "10.1038/s41586-018-0698-6" - self.protocol = "Smartseq2" - self.state_exact = "healthy" - self.year = 2018 - - self.var_symbol_col = "names" - self.var_ensembl_col = "ensembl" - - self.obs_key_cellontology_original = "annotation" - # ToDo: further anatomical information for subtissue in "location" - - self.class_maps = { - "0": { - "DC1": "Dendritic Cells 1", - "DC2": "Dendritic Cells 2", - "EVT": "Extravillous Trophoblasts", - "Endo (f)": "Endothelial Cells f", - "Endo (m)": "Endothelial Cells m", - "Endo L": "Endothelial Cells L", - "Epi1": "Epithelial Glandular Cells 1", - "Epi2": "Epithelial Glandular Cells 2", - "Granulocytes": "Granulocytes", - "HB": "Hofbauer Cells", - "ILC3": "ILC3", - "MO": "Monocyte", - "NK CD16+": "NK Cells CD16+", - "NK CD16-": "NK Cells CD16-", - "Plasma": "B cell (Plasmocyte)", - "SCT": "Syncytiotrophoblasts", - "Tcells": "T cell", - "VCT": "Villous Cytotrophoblasts", - "dM1": "Decidual Macrophages 1", - "dM2": "Decidual Macrophages 2", - "dM3": "Decidual Macrophages 3", - "dNK p": "Decidual NK Cells p", - "dNK1": "Decidual NK Cells 1", - "dNK2": "Decidual NK Cells 2", - "dNK3": "Decidual NK Cells 3", - "dP1": "Perivascular Cells 1", - "dP2": "Perivascular Cells 2", - "dS1": "Decidual Stromal Cells 1", - "dS2": "Decidual Stromal Cells 2", - "dS3": "Decidual Stromal Cells 3", - "fFB1": "Fibroblasts 1", - "fFB2": "Fibroblasts 2", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.1.zip"), - os.path.join(self.path, "human", "placenta", "E-MTAB-6678.processed.2.zip"), - ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) - df = pd.read_csv(fn[1], sep="\t") - for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] - - self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) - self.adata = self.adata[:, ~self.adata.var.index.isin( - ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index ed648d9db..9064f6b1c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -4,14 +4,20 @@ import scipy.sparse import numpy as np -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad", + "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): """ - This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: + This data loader directly processes the data file provided under the download link. + To obtain the file, you need to create a free account at https://www.synapse.org. + You can then use those login credentials to download the file with python using the synapse client, + installable via `pip install synapseclient`: import synapseclient import shutil @@ -27,13 +33,16 @@ class Dataset(DatasetBase): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_10x_travaglini_001_10.1038/s41586-020-2922-4" + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + protocol = "10x" if self.sample_fn.split("_")[0] == "droplet" else "smartseq2" + self.id = f"human_lung_2020_{protocol}_travaglini_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1038/s41586-020-2922-4" self.download = "https://www.synapse.org/#!Synapse:syn21041850" self.download_meta = None @@ -44,7 +53,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "10x" + self.protocol = protocol self.state_exact = "healthy" self.year = 2020 @@ -186,11 +195,15 @@ def __init__( def _load(self, fn=None): if fn is None: - fn = os.path.join(self.path, "human", "lung", "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") + fn = os.path.join(self.path, "human", "lung", self.sample_fn) + if self.sample_fn.split("_")[0] == "droplet": + norm_const = 1000000 + else: + norm_const = 10000 self.adata = anndata.read(fn) self.adata.X = scipy.sparse.csc_matrix(self.adata.X) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None])) \ - .multiply(1 / 10000) + .multiply(1 / norm_const) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py deleted file mode 100644 index 1ebf48fb2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_smartseq2_travaglini_002.py +++ /dev/null @@ -1,162 +0,0 @@ -import anndata -import os -from typing import Union -import scipy.sparse -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - """ - This data loader directly processes the data file provided under the download link. To obtain the file, you need to create a - free account at https://www.synapse.org. You can then use those login credentials to download the file with python - using the synapse client, installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login("synapse_username","password") - syn21625142 = syn.get(entity="syn21625142") - shutil.move(syn21625142.path, "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") - - :param path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_smartseq2_travaglini_002_10.1038/s41586-020-2922-4" - - self.download = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_meta = None - - self.author = "Krasnow" - self.doi = "10.1038/s41586-020-2922-4" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" - self.organism = "human" - self.protocol = "smartseq2" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.class_maps = { - "0": { - "Adventitial Fibroblast_P1": "Fibroblasts", - "Adventitial Fibroblast_P2": "Fibroblasts", - "Adventitial Fibroblast_P3": "Fibroblasts", - "Airway Smooth Muscle_P1": "Airway smooth muscle", - "Airway Smooth Muscle_P2": "Airway smooth muscle", - "Airway Smooth Muscle_P3": "Airway smooth muscle", - "Alveolar Epithelial Type 1_P1": "AT1", - "Alveolar Epithelial Type 1_P2": "AT1", - "Alveolar Epithelial Type 1_P3": "AT1", - "Alveolar Epithelial Type 2_P1": "AT2", - "Alveolar Epithelial Type 2_P2": "AT2", - "Alveolar Epithelial Type 2_P3": "AT2", - "Alveolar Fibroblast_P1": "Fibroblasts", - "Alveolar Fibroblast_P2": "Fibroblasts", - "Alveolar Fibroblast_P3": "Fibroblasts", - "Artery_P1": "Arterial", - "Artery_P2": "Arterial", - "Artery_P3": "Arterial", - "B_P1": "B cell lineage", - "B_P2": "B cell lineage", - "B_P3": "B cell lineage", - "Basal_P1": "Basal", - "Basal_P2": "Basal", - "Basal_P3": "Basal", - "Basophil/Mast 1_P1": "Mast cells", - "Basophil/Mast 1_P2": "Mast cells", - "Basophil/Mast 1_P3": "Mast cells", - "Bronchial Vessel 1_P1": "Bronchial Vessel 1", - "CD4+ Memory/Effector T_P1": "T cell lineage", - "CD4+ Naive T_P1": "T cell lineage", - "CD4+ Naive T_P2": "T cell lineage", - "CD8+ Memory/Effector T_P1": "T cell lineage", - "CD8+ Naive T_P1": "T cell lineage", - "CD8+ Naive T_P2": "T cell lineage", - "Capillary Aerocyte_P1": "Capillary", - "Capillary Aerocyte_P2": "Capillary", - "Capillary Aerocyte_P3": "Capillary", - "Capillary Intermediate 1_P2": "Capillary Intermediate 1", - "Capillary_P1": "Capillary", - "Capillary_P2": "Capillary", - "Capillary_P3": "Capillary", - "Ciliated_P1": "Multiciliated lineage", - "Ciliated_P2": "Multiciliated lineage", - "Ciliated_P3": "Multiciliated lineage", - "Classical Monocyte_P1": "Monocytes", - "Club_P1": "Secretory", - "Club_P2": "Secretory", - "Club_P3": "Secretory", - "Dendritic_P1": "Dendritic cells", - "Differentiating Basal_P3": "Basal", - "Fibromyocyte_P3": "Fibromyocyte", - "Goblet_P1": "Secretory", - "Goblet_P2": "Secretory", - "Goblet_P3": "Secretory", - "IGSF21+ Dendritic_P2": "Macrophages", - "IGSF21+ Dendritic_P3": "Macrophages", - "Intermediate Monocyte_P2": "Monocytes", - "Intermediate Monocyte_P3": "Monocytes", - "Ionocyte_P3": "Rare", - "Lipofibroblast_P1": "Fibroblasts", - "Lymphatic_P1": "Lymphatic EC", - "Lymphatic_P2": "Lymphatic EC", - "Lymphatic_P3": "Lymphatic EC", - "Macrophage_P2": "Macrophages", - "Macrophage_P3": "Macrophages", - "Myeloid Dendritic Type 2_P3": "Dendritic cells", - "Myofibroblast_P2": "Myofibroblasts", - "Myofibroblast_P3": "Myofibroblasts", - "Natural Killer T_P2": "T cell lineage", - "Natural Killer T_P3": "T cell lineage", - "Natural Killer_P1": "Innate lymphoid cells", - "Natural Killer_P2": "Innate lymphoid cells", - "Natural Killer_P3": "Innate lymphoid cells", - "Neuroendocrine_P1": "Rare", - "Neuroendocrine_P3": "Rare", - "Neutrophil_P1": "Monocytes", - "Neutrophil_P2": "Monocytes", - "Neutrophil_P3": "Monocytes", - "Nonclassical Monocyte_P1": "Monocytes", - "Nonclassical Monocyte_P2": "Monocytes", - "Pericyte_P1": "Fibroblasts", - "Pericyte_P2": "Fibroblasts", - "Pericyte_P3": "Fibroblasts", - "Plasma_P3": "B cell lineage", - "Plasmacytoid Dendritic_P1": "Dendritic cells", - "Plasmacytoid Dendritic_P2": "Dendritic cells", - "Plasmacytoid Dendritic_P3": "Dendritic cells", - "Proliferating NK/T_P2": "Innate lymphoid cells", - "Proliferating NK/T_P3": "Innate lymphoid cells", - "Signaling Alveolar Epithelial Type 2_P1": "AT2", - "Signaling Alveolar Epithelial Type 2_P3": "AT2", - "Vascular Smooth Muscle_P1": "2_Smooth Muscle", - "Vascular Smooth Muscle_P2": "2_Smooth Muscle", - "Vascular Smooth Muscle_P3": "2_Smooth Muscle", - "Vein_P2": "Venous", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad") - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nReads"].values[:, None])) \ - .multiply(1 / 1000000) - - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py index a7df368b3..2bbb8b5f3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py @@ -3,28 +3,36 @@ from typing import Union import numpy as np -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad", + "vieira19_Bronchi_anonymised.processed.h5ad", +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2019_10x_braga_001_10.1038/s41591-019-0468-5" + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"human_lung_2019_10x_braga_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1038/s41591-019-0468-5" - self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad" - self.download_meta = None + self.download = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.author = "Teichmann" self.doi = "10.1038/s41591-019-0468-5" self.healthy = True - self.organ = "lung" # ToDo: "alveoli, parenchyma" + self.organ = "lung" + # ToDo: 1->"alveoli, parenchyma" + # ToDo: 2->"bronchi" self.organism = "human" self.protocol = "10x" self.state_exact = "healthy" @@ -35,32 +43,56 @@ def __init__( self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "Ciliated 2": "Multiciliated lineage", - "Luminal_Macrophages": "Macrophages", - "Basal 1": "Basal", - "Dendritic cells": "Dendritic cells", - "Endothelial": "1_Endothelial", - "Lymphatic": "Lymphatic EC", - "Ciliated 1": "Multiciliated lineage", - "Smooth muscle": "2_Smooth Muscle", - "Type_1_alveolar": "AT1", - "Neutrophils": "Monocytes", - "Club": "Secretory", - "Basal 2": "Basal", - "B cells": "B cell lineage", - "T and NK": "2_Lymphoid", - "Mesothelium": "Mesothelium", - "Mast cells": "Mast cells", - "Fibroblasts": "2_Fibroblast lineage", - "Type 2 alveolar": "AT2", - }, - } + if self.sample_fn == "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad": + self.class_maps = { + "0": { + "Ciliated 2": "Multiciliated lineage", + "Luminal_Macrophages": "Macrophages", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "Endothelial": "1_Endothelial", + "Lymphatic": "Lymphatic EC", + "Ciliated 1": "Multiciliated lineage", + "Smooth muscle": "2_Smooth Muscle", + "Type_1_alveolar": "AT1", + "Neutrophils": "Monocytes", + "Club": "Secretory", + "Basal 2": "Basal", + "B cells": "B cell lineage", + "T and NK": "2_Lymphoid", + "Mesothelium": "Mesothelium", + "Mast cells": "Mast cells", + "Fibroblasts": "2_Fibroblast lineage", + "Type 2 alveolar": "AT2", + }, + } + else: + self.class_maps = { + "0": { + "Ciliated 1": "Multiciliated lineage", + "Club": "Secretory", + "Ciliated 2": "Multiciliated lineage", + "Ionocytes": "Rare", + "Basal 2": "Basal", + "Goblet_1": "Secretory", + "Goblet 2": "Secretory", + "Basal 1": "Basal", + "Dendritic cells": "Dendritic cells", + "B cells": "B cell lineage", + "Luminal_Macrophages": "Macrophages", + "Neutrophils": "Monocytes", + "Endothelial": "1_Endothelial", + "Smooth muscle": "2_Smooth Muscle", + "T and NK": "2_Lymphoid", + "Fibroblasts": "2_Fibroblast lineage", + "Lymphatic": "Lymphatic EC", + "Mast cells": "Mast cells", + }, + } def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad") + base_path = os.path.join(self.path, "human", "placenta") + fn = os.path.join(base_path, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py deleted file mode 100644 index 54985ca9b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_002.py +++ /dev/null @@ -1,67 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2019_10x_braga_002_10.1038/s41591-019-0468-5" - - self.download = "https://covid19.cog.sanger.ac.uk/vieira19_Bronchi_anonymised.processed.h5ad" - self.download_meta = None - - self.author = "Teichmann" - self.doi = "10.1038/s41591-019-0468-5" - self.healthy = True - self.normalization = "norm" - self.organ = "lung" # ToDo "bronchi" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Ciliated 1": "Multiciliated lineage", - "Club": "Secretory", - "Ciliated 2": "Multiciliated lineage", - "Ionocytes": "Rare", - "Basal 2": "Basal", - "Goblet_1": "Secretory", - "Goblet 2": "Secretory", - "Basal 1": "Basal", - "Dendritic cells": "Dendritic cells", - "B cells": "B cell lineage", - "Luminal_Macrophages": "Macrophages", - "Neutrophils": "Monocytes", - "Endothelial": "1_Endothelial", - "Smooth muscle": "2_Smooth Muscle", - "T and NK": "2_Lymphoid", - "Fibroblasts": "2_Fibroblast lineage", - "Lymphatic": "Lymphatic EC", - "Mast cells": "Mast cells", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "vieira19_Bronchi_anonymised.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py similarity index 89% rename from sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 3d6542451..a83c74dec 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -54,11 +54,11 @@ def __init__( } def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE130148_raw_counts.csv.gz"), - os.path.join(self.path, "human", "lung", "GSE130148_barcodes_cell_types.txt.gz"), - ] + base_path = os.path.join(self.path, "human", "placenta") + fn = [ + os.path.join(base_path, "GSE130148_raw_counts.csv.gz"), + os.path.join(base_path, "GSE130148_barcodes_cell_types.txt.gz"), + ] self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py deleted file mode 100644 index e3fb7a4dd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_colon_2019_10x_wang_001.py +++ /dev/null @@ -1,57 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2019_10x_wang_001_10.1084/jem.20191130" - - self.download = "https://covid19.cog.sanger.ac.uk/wang20_colon.processed.h5ad" - self.download_meta = None - - self.author = "Chen" - self.healthy = True - self.normalization = "raw" - self.organ = "colon" - self.organism = "human" - self.doi = "10.1084/jem.20191130" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Progenitor": "Enterocyte Progenitors", - "Enterocyte": "Enterocytes", - "Goblet": "Goblet cells", - "TA": "TA", - "Paneth-like": "Paneth cells", - "Stem Cell": "Stem cells", - "Enteriendocrine": "Enteroendocrine cells", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "colon", "wang20_colon.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py deleted file mode 100644 index 8ccaa58fd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_ileum_2019_10x_wang_001.py +++ /dev/null @@ -1,57 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ileum_2019_10x_wang_001_10.1084/jem.20191130" - - self.download = "https://covid19.cog.sanger.ac.uk/wang20_ileum.processed.h5ad" - self.download_meta = None - - self.author = "Chen" - self.doi = "10.1084/jem.20191130" - self.healthy = True - self.normalization = "raw" - self.organ = "ileum" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Progenitor": "Progenitors", - "Goblet": "Goblet cells", - "Enterocyte": "Enterocytes", - "Paneth-like": "Paneth cells", - "Stem Cell": "Stem Cell", - "TA": "TA", - "Enteriendocrine": "Enteroendocrine cells", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "wang20_ileum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py deleted file mode 100644 index 54126316a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_rectum_2019_10x_wang_001.py +++ /dev/null @@ -1,56 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_rectum_2019_10x_wang_001_10.1084/jem.20191130" - - self.download = "https://covid19.cog.sanger.ac.uk/wang20_rectum.processed.h5ad" - - self.author = "Chen" - self.doi = "10.1084/jem.20191130" - self.healthy = True - self.normalization = "raw" - self.organ = "rectum" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Progenitor": "Enterocyte progenitor", - "Goblet": "Goblet", - "Enterocyte": "Enterocyte", - "Paneth-like": "Paneth-like", - "Stem Cell": "Stem Cell", - "TA": "TA", - "Enteriendocrine": "Enteroendocrine", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "rectum", "wang20_rectum.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py new file mode 100644 index 000000000..ad7e68dc9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py @@ -0,0 +1,89 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "wang20_colon.processed.h5ad", + "wang20_ileum.processed.h5ad", + "wang20_rectum.processed.h5ad" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + organ = self.sample_fn.split("_")[1].split(".")[0] + self.id = f"human_{organ}_2019_10x_wang_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1084/jem.20191130" + + self.download = f"https://covid19.cog.sanger.ac.uk/wang20_{organ}.processed.h5ad" + + self.author = "Chen" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = "colon" if organ == "colon" else "ileum" if organ == "ileum" else "rectum" + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "CellType" + + if organ == "colon": + self.class_maps = { + "0": { + "Progenitor": "Enterocyte Progenitors", + "Enterocyte": "Enterocytes", + "Goblet": "Goblet cells", + "TA": "TA", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem cells", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + elif organ == "ileum": + self.class_maps = { + "0": { + "Progenitor": "Progenitors", + "Goblet": "Goblet cells", + "Enterocyte": "Enterocytes", + "Paneth-like": "Paneth cells", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine cells", + }, + } + else: + self.class_maps = { + "0": { + "Progenitor": "Enterocyte progenitor", + "Goblet": "Goblet", + "Enterocyte": "Enterocyte", + "Paneth-like": "Paneth-like", + "Stem Cell": "Stem Cell", + "TA": "TA", + "Enteriendocrine": "Enteroendocrine", + }, + } + + def _load(self, fn=None): + base_path = os.path.join(self.path, "human", self.organ) + fn = os.path.join(base_path, self.sample_fn) + self.adata = anndata.read(fn) + self.adata.X = np.expm1(self.adata.X) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py index 01e20cbd3..d31cbb9b6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -4,23 +4,29 @@ import numpy as np import scipy.sparse -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "lukassen20_lung_orig.processed.h5ad", + "lukassen20_airway_orig.processed.h5ad" +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_10x_lukassen_001_10.1101/2020.03.13.991455" + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"human_lung_2020_10x_lukassen_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1101/2020.03.13.991455" - self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_lung_orig.processed.h5ad" - self.download_meta = None + self.download = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.author = "Eils" self.doi = "10.1101/2020.03.13.991455" @@ -36,23 +42,43 @@ def __init__( self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "Ciliated": "Multiciliated lineage", - "Endothelial": "1_Endothelial", - "AT2": "AT2", - "LymphaticEndothelium": "Lymphatic EC", - "Fibroblasts": "2_Fibroblast lineage", - "Club": "Secretory", - "Immuno_TCells": "T cell lineage", - "Immuno_Monocytes": "Monocytes", - "AT1": "AT1" - }, - } + if self.sample_fn == "lukassen20_lung_orig.processed.h5ad": + self.class_maps = { + "0": { + "AT1": "AT1", + "AT2": "AT2", + "Ciliated": "Multiciliated lineage", + "Club": "Secretory", + "Endothelial": "1_Endothelial", + "Fibroblasts": "2_Fibroblast lineage", + "Immuno_TCells": "T cell lineage", + "Immuno_Monocytes": "Monocytes", + "LymphaticEndothelium": "Lymphatic EC", + } + } + else: + self.class_maps = { + "0": { + "Basal_Mitotic": "Basal", + "Basal1": "Basal", + "Basal2": "Basal", + "Basal3": "Basal", + "Ciliated1": "Multiciliated lineage", + "Ciliated2": "Multiciliated lineage", + "Club": "Secretory", + "Fibroblast": "2_Fibroblast lineage", + "FOXN4": "Rare", + "Ionocyte": "Rare", + "Goblet": "Secretory", + "Secretory3": "Secretory", + "Secretory2": "Secretory", + "Secretory1": "Secretory", + }, + } def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_lung_orig.processed.h5ad") + base_path = os.path.join(self.path, "human", "lung") + fn = os.path.join(base_path, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py deleted file mode 100644 index ea766e1cd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_002.py +++ /dev/null @@ -1,66 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_10x_lukassen_002_10.1101/2020.03.13.991455" - - self.download = "https://covid19.cog.sanger.ac.uk/lukassen20_airway_orig.processed.h5ad" - self.download_meta = None - - self.author = "Eils" - self.doi = "10.1101/2020.03.13.991455" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" # ToDo: "bronchial epithelial cells" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Secretory3": "Secretory", - "Ciliated1": "Multiciliated lineage", - "Goblet": "Secretory", - "Ciliated2": "Multiciliated lineage", - "Club": "Secretory", - "Secretory2": "Secretory", - "FOXN4": "Rare", - "Basal1": "Basal", - "Secretory1": "Secretory", - "Fibroblast": "2_Fibroblast lineage", - "Ionocyte": "Rare", - "Basal3": "Basal", - "Basal_Mitotic": "Basal", - "Basal2": "Basal", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "lukassen20_airway_orig.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ - .multiply(1 / 10000) - - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py deleted file mode 100644 index 825c386f4..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_esophagus_2019_10x_madissoon_001.py +++ /dev/null @@ -1,68 +0,0 @@ -import anndata -import os -from typing import Union -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_esophagus_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" - - self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" - # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 - self.download_meta = None - - self.author = "Meyer" - self.doi = "10.1186/s13059-019-1906-x" - self.healthy = True - self.normalization = "raw" - self.organ = "esophagus" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids-HCATisStab7413619" - - self.obs_key_cellontology_original = "Celltypes" - - self.class_maps = { - "0": { - "B_CD27neg": "B_CD27neg", - "B_CD27pos": "B_CD27pos", - "Blood_vessel": "Blood_vessel", - "Dendritic_Cells": "Dendritic cell", - "Epi_basal": "Basal cell", - "Epi_dividing": "Epi_dividing", - "Epi_stratified": "Stratified epithelial cell", - "Epi_suprabasal": "Epi_suprabasal", - "Epi_upper": "Epi_upper", - "Glands_duct": "Glands_duct", - "Glands_mucous": "Glands_mucous", - "Lymph_vessel": "Lymph_vessel", - "Mast_cell": "Mast cell", - "Mono_macro": "Mono_macro", - "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", - "Stroma": "Stromal cell", - "T_CD4": "T_CD4", - "T_CD8": "T_CD8", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "esophagus", "oesophagus.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py deleted file mode 100644 index aa1aded6c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_lung_2019_10x_madissoon_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2019_10x_madissoon_001._10.1186/s13059-019-1906-x" - - self.download = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" - self.download_meta = None - - self.author = "Meyer" - self.doi = "10.1186/s13059-019-1906-x" - self.healthy = True - self.normalization = "raw" - self.state_exact = "healthy" - self.organ = "lung" # ToDo: "parenchyma" - self.organism = "human" - self.protocol = "10x" - self.year = 2020 - - self.var_symbol_col = "index" - self.var_ensembl_col = "gene.ids.HCATisStab7509734" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "T_CD4": "T cell lineage", - "Mast_cells": "Mast cells", - "Monocyte": "Monocytes", - "Blood_vessel": "2_Blood vessels", - "Ciliated": "Multiciliated lineage", - "Macrophage_MARCOneg": "Macrophages", - "DC_plasmacytoid": "Dendritic cells", - "DC_1": "Dendritic cells", - "Muscle_cells": "2_Smooth Muscle", - "Macrophage_MARCOpos": "Macrophages", - "T_cells_Dividing": "T cell lineage", - "DC_Monocyte_Dividing": "Dendritic cells", - "B_cells": "B cell lineage", - "T_CD8_CytT": "T cell lineage", - "NK_Dividing": "Innate lymphoid cells", - "T_regulatory": "T cell lineage", - "DC_2": "Dendritic cells", - "Alveolar_Type2": "AT2", - "Plasma_cells": "B cell lineage", - "NK": "Innate lymphoid cells", - "Alveolar_Type1": "AT1", - "Fibroblast": "2_Fibroblast lineage", - "DC_activated": "Dendritic cells", - "Macrophage_Dividing": "Macrophages", - "Lymph_vessel": "Lymphatic EC", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "madissoon19_lung.processed.h5ad") - self.adata = anndata.read(fn) - - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py deleted file mode 100644 index 77325a3ca..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_spleen_2019_10x_madissoon_001.py +++ /dev/null @@ -1,77 +0,0 @@ -import anndata -import os -from typing import Union -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_spleen_2019_10x_madissoon_001_10.1186/s13059-019-1906-x" - - self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" - self.download_meta = None - - self.author = "Meyer" - self.doi = "10.1186/s13059-019-1906-x" - self.healthy = True - self.normalization = "raw" - self.organ = "spleen" - self.organism = "human" - self.protocol = "10x" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids-HCATisStab7463846" - - self.obs_key_cellontology_original = "Celltypes" - - self.class_maps = { - "0": { - "B_Hypermutation": "B_Hypermutation", - "B_T_doublet": "B_T_doublet", - "B_follicular": "B_follicular", - "B_mantle": "B_mantle", - "CD34_progenitor": "CD34_progenitor", - "DC_1": "DC_1", - "DC_2": "DC_2", - "DC_activated": "DC_activated", - "DC_plasmacytoid": "DC_plasmacytoid", - "ILC": "ILC", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "NK_CD160pos": "NK_CD160pos", - "NK_FCGR3Apos": "NK_FCGR3Apos", - "NK_dividing": "NK_dividing", - "Plasma_IgG": "Plasma_IgG", - "Plasma_IgM": "Plasma_IgM", - "Plasmablast": "Plasmablast", - "Platelet": "Platelet", - "T_CD4_conv": "T_CD4_conv", - "T_CD4_fh": "T_CD4_fh", - "T_CD4_naive": "T_CD4_naive", - "T_CD4_reg": "T_CD4_reg", - "T_CD8_CTL": "T_CD8_CTL", - "T_CD8_MAIT": "T_CD8_MAIT", - "T_CD8_activated": "T_CD8_activated", - "T_CD8_gd": "T_CD8_gd", - "T_cell_dividing": "Proliferating T cell", - }, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "spleen", "spleen.cellxgene.h5ad") - self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py new file mode 100644 index 000000000..25ae0c22f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -0,0 +1,151 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "madissoon19_lung.processed.h5ad", + "oesophagus.cellxgene.h5ad", + "spleen.cellxgene.h5ad", +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + organ = "lung" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ + "oesophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" + # ToDo: lung to "parenchyma"? + self.id = f"human_{organ}_2019_10x_madissoon_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1186/s13059-019-1906-x" + + if self.sample_fn == "madissoon19_lung.processed.h5ad": + "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.var_ensembl_col = "gene.ids.HCATisStab7509734" + elif self.sample_fn == "oesophagus.cellxgene.h5ad": + self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" + # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.var_ensembl_col = "gene_ids-HCATisStab7413619" + else: + "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.var_ensembl_col = "gene_ids-HCATisStab7463846" + + self.author = "Meyer" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" + self.organ = organ + self.organism = "human" + self.protocol = "10x" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Celltypes" + + if self.sample_fn == "madissoon19_lung.processed.h5ad": + self.class_maps = { + "0": { + "T_CD4": "T cell lineage", + "Mast_cells": "Mast cells", + "Monocyte": "Monocytes", + "Blood_vessel": "2_Blood vessels", + "Ciliated": "Multiciliated lineage", + "Macrophage_MARCOneg": "Macrophages", + "DC_plasmacytoid": "Dendritic cells", + "DC_1": "Dendritic cells", + "Muscle_cells": "2_Smooth Muscle", + "Macrophage_MARCOpos": "Macrophages", + "T_cells_Dividing": "T cell lineage", + "DC_Monocyte_Dividing": "Dendritic cells", + "B_cells": "B cell lineage", + "T_CD8_CytT": "T cell lineage", + "NK_Dividing": "Innate lymphoid cells", + "T_regulatory": "T cell lineage", + "DC_2": "Dendritic cells", + "Alveolar_Type2": "AT2", + "Plasma_cells": "B cell lineage", + "NK": "Innate lymphoid cells", + "Alveolar_Type1": "AT1", + "Fibroblast": "2_Fibroblast lineage", + "DC_activated": "Dendritic cells", + "Macrophage_Dividing": "Macrophages", + "Lymph_vessel": "Lymphatic EC", + }, + } + elif self.sample_fn == "oesophagus.cellxgene.h5ad": + self.class_maps = { + "0": { + "B_CD27neg": "B_CD27neg", + "B_CD27pos": "B_CD27pos", + "Blood_vessel": "Blood_vessel", + "Dendritic_Cells": "Dendritic cell", + "Epi_basal": "Basal cell", + "Epi_dividing": "Epi_dividing", + "Epi_stratified": "Stratified epithelial cell", + "Epi_suprabasal": "Epi_suprabasal", + "Epi_upper": "Epi_upper", + "Glands_duct": "Glands_duct", + "Glands_mucous": "Glands_mucous", + "Lymph_vessel": "Lymph_vessel", + "Mast_cell": "Mast cell", + "Mono_macro": "Mono_macro", + "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", + "Stroma": "Stromal cell", + "T_CD4": "T_CD4", + "T_CD8": "T_CD8", + }, + } + else: + self.class_maps = { + "0": { + "B_Hypermutation": "B_Hypermutation", + "B_T_doublet": "B_T_doublet", + "B_follicular": "B_follicular", + "B_mantle": "B_mantle", + "CD34_progenitor": "CD34_progenitor", + "DC_1": "DC_1", + "DC_2": "DC_2", + "DC_activated": "DC_activated", + "DC_plasmacytoid": "DC_plasmacytoid", + "ILC": "ILC", + "Macrophage": "Macrophage", + "Monocyte": "Monocyte", + "NK_CD160pos": "NK_CD160pos", + "NK_FCGR3Apos": "NK_FCGR3Apos", + "NK_dividing": "NK_dividing", + "Plasma_IgG": "Plasma_IgG", + "Plasma_IgM": "Plasma_IgM", + "Plasmablast": "Plasmablast", + "Platelet": "Platelet", + "T_CD4_conv": "T_CD4_conv", + "T_CD4_fh": "T_CD4_fh", + "T_CD4_naive": "T_CD4_naive", + "T_CD4_reg": "T_CD4_reg", + "T_CD8_CTL": "T_CD8_CTL", + "T_CD8_MAIT": "T_CD8_MAIT", + "T_CD8_activated": "T_CD8_activated", + "T_CD8_gd": "T_CD8_gd", + "T_cell_dividing": "Proliferating T cell", + }, + } + + def _load(self, fn=None): + base_path = os.path.join(self.path, "human", self.organ) + fn = os.path.join(base_path, self.sample_fn) + self.adata = anndata.read(fn) + if self.sample_fn == "oesophagus.cellxgene.h5ad" or self.sample_fn == "spleen.cellxgene.h5ad": + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py index d72ed32d6..67f12d467 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py @@ -23,7 +23,7 @@ def __init__( self.download_meta = None self.author = "Regev" - self.doi = "d_nan" + self.doi = "no_doi" self.healthy = True self.normalization = "raw" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py index 808914f61..c624bbd5b 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -33,7 +33,7 @@ def __init__( self.download_meta = None self.author = "10x Genomics" - self.doi = "d_nan" + self.doi = "no_doi" self.healthy = True self.normalization = "raw" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py index 807392226..d799cacb2 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py @@ -23,7 +23,7 @@ def __init__( self.download_meta = None self.author = "Regev" - self.doi = "d_nan" + self.doi = "no_doi" self.healthy = True self.normalization = "raw" self.organ = "bone" diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py index c327816b2..dece9b82c 100644 --- a/sfaira/data/dataloaders/loaders/super_group.py +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -31,8 +31,7 @@ def __init__( for f in os.listdir(cwd): if os.path.isdir(os.path.join(cwd, f)): # only directories if f[:len(dir_prefix)] == dir_prefix and f not in dir_exlcude: # Narrow down to data set directories - path_dsg = pydoc.locate( - f"sfaira.sfaira.data.dataloaders.loaders.{f}.FILE_PATH") + path_dsg = pydoc.locate(f"sfaira.data.dataloaders.loaders.{f}.FILE_PATH") if path_dsg is not None: dataset_groups.append(DatasetGroupDirectoryOriented( file_base=path_dsg, diff --git a/sfaira/data/external.py b/sfaira/data/external.py deleted file mode 100644 index 3ef466858..000000000 --- a/sfaira/data/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS diff --git a/sfaira/data/templates/__init__.py b/sfaira/data/templates/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/data/templates/dataloaders/__init__.py b/sfaira/data/templates/dataloaders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py new file mode 100644 index 000000000..896deaa18 --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py @@ -0,0 +1,20 @@ +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # ToDo Add you meta data here. + + def _load(self, fn): + pass # ToDo: load file fn into self.adata. diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py new file mode 100644 index 000000000..896deaa18 --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py @@ -0,0 +1,20 @@ +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # ToDo Add you meta data here. + + def _load(self, fn): + pass # ToDo: load file fn into self.adata. diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py new file mode 100644 index 000000000..26eba3e66 --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py @@ -0,0 +1,28 @@ +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ # ToDo Add correct sample file names here. + "your_sample_fn_1", + "your_sample_fn_2" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__( + sample_fn=sample_fn, + path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"sth_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_doi" # ToDo: Index the Dataset ID by the file. + # ToDo Add you meta data here. + + def _load_any_object(self, fn=None): + pass # ToDo: load file fn into self.adata, using self.sample_fn, ie the current sample file. diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py b/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py new file mode 100644 index 000000000..91052ce5d --- /dev/null +++ b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py @@ -0,0 +1,31 @@ +import anndata +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_IDS = [ # ToDo Add correct sample IDs here. + "your_sample_id_1", + "your_sample_id_2" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__( + sample_fn=sample_fn, + path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # ToDo Add you meta data here. + self.obs_key_sample = 'Sample' # ToDo: Make sure to include this attribute which indicates the column in + # self.adata in which you saved the sample IDs based on which the full adata object is subsetted. + + def _load_full_group_object(self, fn=None) -> anndata.AnnData: + pass # ToDo: load full data object and return (no subsetting!) diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py b/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py new file mode 100644 index 000000000..896deaa18 --- /dev/null +++ b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py @@ -0,0 +1,20 @@ +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # ToDo Add you meta data here. + + def _load(self, fn): + pass # ToDo: load file fn into self.adata. diff --git a/sfaira/data/utils/create_meta_and_cache.py b/sfaira/data/utils/create_meta_and_cache.py new file mode 100644 index 000000000..9c40da2a4 --- /dev/null +++ b/sfaira/data/utils/create_meta_and_cache.py @@ -0,0 +1,45 @@ +import sfaira +import sys +import tensorflow as tf + +print(tf.__version__) + + +def write_meta(args0, args1): + # Write meta data, cache. + args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) + # Test load from cache. + args0.load( + celltype_version=None, + match_to_reference=True, + remove_gene_version=True, + load_raw=False, + allow_caching=False, + ) + return None + + +# Set global variables. +print("sys.argv", sys.argv) + +path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +processes = int(str(sys.argv[4])) + +ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + path=path, meta_path=path_meta, cache_path=path_cache +) +dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. +# Write meta data, cache and test load from cache: +dsg.load( + celltype_version=None, + annotated_only=False, + match_to_reference=None, + remove_gene_version=True, + load_raw=True, + allow_caching=True, + processes=processes, + func=write_meta, + kwargs_func={"args1": path_meta}, +) From 7e11415d50407a61052e1dbe36bc8d718316ec47 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 27 Jan 2021 16:12:36 +0100 Subject: [PATCH 031/161] fixed bugs in create_meta_and_cache.py (#87) --- sfaira/data/utils/create_meta_and_cache.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sfaira/data/utils/create_meta_and_cache.py b/sfaira/data/utils/create_meta_and_cache.py index 9c40da2a4..d694e6fae 100644 --- a/sfaira/data/utils/create_meta_and_cache.py +++ b/sfaira/data/utils/create_meta_and_cache.py @@ -11,7 +11,6 @@ def write_meta(args0, args1): # Test load from cache. args0.load( celltype_version=None, - match_to_reference=True, remove_gene_version=True, load_raw=False, allow_caching=False, From d0a4b1ff6ef1b18c4afe9257b710c27c9c57443b Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 28 Jan 2021 10:38:26 +0100 Subject: [PATCH 032/161] fix dataloaders (#88) * fix mouse pancreas dataloader * fix anatomical dataloaders * remove superflous line in human adipose dataloader * remove reference to human mixed organ from dictionaries --- .../dataloaders/anatomical_groups/human/human_adipose.py | 5 +---- .../anatomical_groups/human/human_adrenalgland.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_artery.py | 4 +--- .../dataloaders/anatomical_groups/human/human_bladder.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_blood.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_bone.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_brain.py | 4 +--- .../dataloaders/anatomical_groups/human/human_calvaria.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_cervix.py | 4 +--- .../anatomical_groups/human/human_chorionicvillus.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_colon.py | 4 +--- .../dataloaders/anatomical_groups/human/human_duodenum.py | 4 +--- .../dataloaders/anatomical_groups/human/human_epityphlon.py | 4 +--- .../dataloaders/anatomical_groups/human/human_esophagus.py | 4 +--- sfaira/data/dataloaders/anatomical_groups/human/human_eye.py | 4 +--- .../anatomical_groups/human/human_fallopiantube.py | 4 +--- .../dataloaders/anatomical_groups/human/human_femalegonad.py | 4 +--- .../dataloaders/anatomical_groups/human/human_gallbladder.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_heart.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_hesc.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_ileum.py | 4 +--- .../dataloaders/anatomical_groups/human/human_jejunum.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_kidney.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_liver.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_lung.py | 4 +--- .../dataloaders/anatomical_groups/human/human_malegonad.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_muscle.py | 4 +--- .../dataloaders/anatomical_groups/human/human_omentum.py | 4 +--- .../dataloaders/anatomical_groups/human/human_pancreas.py | 4 +--- .../dataloaders/anatomical_groups/human/human_placenta.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_pleura.py | 4 +--- .../dataloaders/anatomical_groups/human/human_prostate.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_rectum.py | 4 +--- sfaira/data/dataloaders/anatomical_groups/human/human_rib.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_skin.py | 4 +--- .../dataloaders/anatomical_groups/human/human_spinalcord.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_spleen.py | 4 +--- .../dataloaders/anatomical_groups/human/human_stomach.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_thymus.py | 4 +--- .../dataloaders/anatomical_groups/human/human_thyroid.py | 4 +--- .../dataloaders/anatomical_groups/human/human_trachea.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_ureter.py | 4 +--- .../data/dataloaders/anatomical_groups/human/human_uterus.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_adipose.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_bladder.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_blood.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_bone.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_brain.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_colon.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_diaphragm.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_femalegonad.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_heart.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_ileum.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_kidney.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_liver.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_lung.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_malegonad.py | 4 +--- .../anatomical_groups/mouse/mouse_mammarygland.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_muscle.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_pancreas.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_placenta.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_prostate.py | 4 +--- sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_skin.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_spleen.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_stomach.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_thymus.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_tongue.py | 4 +--- .../dataloaders/anatomical_groups/mouse/mouse_trachea.py | 4 +--- .../data/dataloaders/anatomical_groups/mouse/mouse_uterus.py | 4 +--- .../mouse_pancreas_2019_10x_thompson_x.py | 2 +- sfaira/train/train_model.py | 1 - sfaira/versions/celltype_versions/human/__init__.py | 1 - 73 files changed, 71 insertions(+), 214 deletions(-) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py index 201370e91..e0b4f2aaa 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -16,7 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py index 0ffb36613..be0505ea0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -21,6 +21,4 @@ def __init__( "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4", "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py index b18b55e36..8fa2ed83f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py index a61f32a3f..00b4ef431 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -18,6 +18,4 @@ def __init__( "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4", "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py index 72d033a38..66c887cd4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -24,6 +24,4 @@ def __init__( "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4", "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py index 8297cfeb7..c172fe8f5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -18,6 +18,4 @@ def __init__( "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py index 86c507a2e..8e5b5d38e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -22,6 +22,4 @@ def __init__( "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4", "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py index 44310af2c..057987d06 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py index 207b9ca9a..6bc3ca986 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py index 0118fc2f5..b5b0221a0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py index 7c59bf553..2de1cc5fa 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -23,6 +23,4 @@ def __init__( "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4", "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py index 32daabf11..8e232a69c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py index f5d60e28d..56f86c06f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py index 3bd2cfc08..9d6679f27 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -18,6 +18,4 @@ def __init__( "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py index 83a6fbbb6..a86666ada 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -19,6 +19,4 @@ def __init__( "human_eye_2019_10x_voigt_001", "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py index 855db417a..1f819c846 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py index ffee659e9..f10e5d03c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -17,6 +17,4 @@ def __init__( "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py index 0fc9ae9e7..d8a90fe34 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py index 7c79ea374..2f15fba64 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -19,6 +19,4 @@ def __init__( "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4", "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py index ab3e7f567..16546f8c5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py index 0dae51294..8ecdbddbb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -18,6 +18,4 @@ def __init__( "human_ileum_2019_10x_wang_001", "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py index 111cc268a..f238e23b5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py index dd5fba448..72a9b5d86 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -25,6 +25,4 @@ def __init__( "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4", "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py index d6b5bebba..1a525c7db 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -24,6 +24,4 @@ def __init__( "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4", "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py index 1cc3504f2..cea664c0d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -30,6 +30,4 @@ def __init__( "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4", "human_lung_2020_smartseq2_travaglini_002", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py index 95bebf457..ba33bb371 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -18,6 +18,4 @@ def __init__( "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py index d019e27dc..697538b2b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -17,6 +17,4 @@ def __init__( "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py index 201ebbdc4..ece41406d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -18,6 +18,4 @@ def __init__( "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4", "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py index e20280602..f654095f2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -22,6 +22,4 @@ def __init__( "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4", "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py index 8476af59f..1ea950c50 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -18,6 +18,4 @@ def __init__( "human_placenta_2018_10x_ventotormo_002", "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py index 52867a5fb..534a531d5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py index 52a9288a8..ec73a1c9e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -17,6 +17,4 @@ def __init__( "human_prostate_2018_10x_henry_001", "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py index d497cd46a..a4d769c9d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -17,6 +17,4 @@ def __init__( "human_rectum_2019_10x_wang_001", "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py index ceecd92ad..9af9c34ac 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -17,6 +17,4 @@ def __init__( "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py index dc38e5dd0..8d8ba3171 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -17,6 +17,4 @@ def __init__( "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py index a90effd8e..c13172e37 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py index b19f84abf..68d4b4da5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -18,6 +18,4 @@ def __init__( "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py index 0a7180157..f6f2f3a90 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -25,6 +25,4 @@ def __init__( "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4", "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py index 0e5045b37..661c08da4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -18,6 +18,4 @@ def __init__( "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py index 7097ae580..245f28f99 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -17,6 +17,4 @@ def __init__( "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py index 236ece19d..88a93df17 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py index 5a9562560..665fbc401 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py index dc5d21528..cbc85303c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py index f0e4823cd..59407309b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -20,6 +20,4 @@ def __init__( "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728", "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py index 9fa4f8391..9b0a85f76 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -18,6 +18,4 @@ def __init__( "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728", "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py index f846cdfd7..0a2901342 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -20,6 +20,4 @@ def __init__( "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py index ee5ccc3b4..b9f1ac781 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -18,6 +18,4 @@ def __init__( "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728", "mouse_bone_2018_microwell_001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py index 8fdba3957..7309eb1d9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -19,6 +19,4 @@ def __init__( "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py index 188d142dd..1b579aea7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -17,6 +17,4 @@ def __init__( "mouse_colon_2019_10x_pisco_001_10.1101/661728", "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py index 4127baa4c..4ec085dbf 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py index fdc177d65..dbe2e7bb8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -17,6 +17,4 @@ def __init__( "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py index f48849341..c8fdc754f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -18,6 +18,4 @@ def __init__( "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728", "mouse_heart_2019_smartseq2_pisco_002_10.1101/661728" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py index efe1768f5..91bfa7a5e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -18,6 +18,4 @@ def __init__( "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py index 4341b6e04..4c2008342 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -19,6 +19,4 @@ def __init__( "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py index a8b5ddc7f..c9ea7d277 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -19,6 +19,4 @@ def __init__( "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py index f1133c2e3..41f9a46ad 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -20,6 +20,4 @@ def __init__( "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py index 2f7889c98..1da24a2b7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -17,6 +17,4 @@ def __init__( "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py index 923ca32c2..403b42c2f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -21,6 +21,4 @@ def __init__( "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py index b50c32a16..27121dcf2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -18,6 +18,4 @@ def __init__( "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728", "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py index 029894169..ae25a6ccd 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -26,6 +26,4 @@ def __init__( "mouse_pancreas_2019_10x_thompson_007", "mouse_pancreas_2019_10x_thompson_008", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py index e13c2ba99..ad4c12122 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -17,6 +17,4 @@ def __init__( "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py index de8347a3a..0b1b91004 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -17,6 +17,4 @@ def __init__( "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py index 3f62aa3ea..285c33aca 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -18,6 +18,4 @@ def __init__( "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py index 4430d228d..37ee74c8d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -17,6 +17,4 @@ def __init__( "mouse_skin_2019_10x_pisco_001_10.1101/661728", "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py index 364373594..5b265e5e8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -18,6 +18,4 @@ def __init__( "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728", "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py index 0ce1d3737..183ed1c68 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -16,6 +16,4 @@ def __init__( dsg.subset(key="id", values=[ "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py index d17112840..98044c2c2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -18,6 +18,4 @@ def __init__( "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728", "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py index e294d80f8..fc54163f4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -17,6 +17,4 @@ def __init__( "mouse_tongue_2019_10x_pisco_001_10.1101/661728", "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py index 6b8a1bd31..cbd9c4e8e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -17,6 +17,4 @@ def __init__( "mouse_trachea_2019_10x_pisco_001_10.1101/661728", "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728", ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py index a58af9178..d170301d5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -17,6 +17,4 @@ def __init__( "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" ]) - datasets = dsg.flatten().datasets - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) + super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 00f85685d..fbdcdada9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -7,7 +7,7 @@ SAMPLE_FNS = [ "GSM3308545_NOD_08w_A", - "GSM3308547_NOD_08w_A", + "GSM3308547_NOD_08w_C", "GSM3308548_NOD_14w_A", "GSM3308549_NOD_14w_B", "GSM3308550_NOD_14w_C", diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index 45abfb1bb..e9ae7eacf 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -58,7 +58,6 @@ def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None, c self.data_human = { 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path), - 'mixed': human.DatasetGroupMixed(path=path, meta_path=meta_path, cache_path=cache_path), 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path), 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), diff --git a/sfaira/versions/celltype_versions/human/__init__.py b/sfaira/versions/celltype_versions/human/__init__.py index 85ca723ed..bf89eb2b9 100644 --- a/sfaira/versions/celltype_versions/human/__init__.py +++ b/sfaira/versions/celltype_versions/human/__init__.py @@ -47,7 +47,6 @@ ORGAN_DICT = { 'adipose': CelltypeVersionsHumanAdipose(), 'adrenalgland': CelltypeVersionsHumanAdrenalgland(), - 'mixed': CelltypeVersionsHumanMixed(), 'artery': CelltypeVersionsHumanArtery(), 'bladder': CelltypeVersionsHumanBladder(), 'blood': CelltypeVersionsHumanBlood(), From 036f341061282d4ff843c417cfb22c8e0f6f0859 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Thu, 28 Jan 2021 21:05:31 +0100 Subject: [PATCH 033/161] fixed meta data loading bug --- sfaira/consts/meta_data_files.py | 4 ++-- sfaira/data/base.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 7adfd59d8..cdff4c828 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -9,9 +9,9 @@ "doi": str, "download": str, "download_meta": str, - "healthy": bool, + "healthy": str, "id": str, - "ncells": str, + "ncells": int, "normalization": str, "organ": str, "protocol": str, diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 516eb6fa5..d886477fb 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -723,8 +723,12 @@ def load_meta(self, fn: Union[PathLike, str, None]): meta = pandas.read_csv( fn, usecols=list(self._META_DATA_FIELDS.keys()), - dtype=self._META_DATA_FIELDS, ) + # using dtype in read_csv through errors some times. + for k, v in self._META_DATA_FIELDS.items(): + if k in meta.columns: + if meta[k].values[0] is not None: + meta[k] = v(meta[k]) self.meta = meta.fillna("None").replace({"None": None}) def write_meta( @@ -1042,6 +1046,7 @@ def meta(self, x: Union[None, pd.DataFrame]): @property def ncells(self) -> int: + # ToDo cache this if it was loaded from meta? if self.adata is not None: x = self.adata.n_obs elif self._ncells is not None: From 47e87135615a9f98a64120aad1fd5489822447ed Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 29 Jan 2021 12:44:53 +0100 Subject: [PATCH 034/161] cell onotology data base (#12) * added basic obo handling class for cell type ontologies * refactored cell type universes into separate csvs * wrote new celltype_versions submodule * built infrastructure for writing and loading cell type class map csv with fuzzy match suggesion drafts * added functioning example of cell ontology and .csv cell type map to loader unit test --- .gitignore | 1 + docs/data.rst | 34 +- requirements.txt | 6 +- setup.py | 4 + sfaira/api/celltypes.py | 1 + sfaira/data/base.py | 196 +++++----- .../loaders/d10_1101_661728/base.py | 56 --- .../mouse_adipose_2019_10x_pisco_001.py | 35 -- .../mouse_adipose_2019_smartseq2_pisco_001.py | 35 -- .../mouse_adipose_2019_smartseq2_pisco_002.py | 35 -- .../mouse_adipose_2019_smartseq2_pisco_003.py | 35 -- .../mouse_adipose_2019_smartseq2_pisco_004.py | 35 -- .../mouse_bladder_2019_10x_pisco_001.py | 35 -- .../mouse_bladder_2019_smartseq2_pisco_001.py | 35 -- .../mouse_bone_2019_10x_pisco_001.py | 36 -- .../mouse_bone_2019_smartseq2_pisco_001.py | 36 -- .../mouse_brain_2019_smartseq2_pisco_001.py | 37 -- .../mouse_brain_2019_smartseq2_pisco_002.py | 37 -- .../mouse_colon_2019_10x_pisco_001.py | 35 -- .../mouse_colon_2019_smartseq2_pisco_001.py | 36 -- ...ouse_diaphragm_2019_smartseq2_pisco_001.py | 35 -- .../mouse_heart_2019_10x_pisco_001.py | 35 -- .../mouse_heart_2019_smartseq2_pisco_001.py | 35 -- .../mouse_heart_2019_smartseq2_pisco_002.py | 35 -- .../mouse_kidney_2019_10x_pisco_001.py | 40 --- .../mouse_kidney_2019_smartseq2_pisco_001.py | 40 --- .../mouse_liver_2019_10x_pisco_001.py | 35 -- .../mouse_liver_2019_smartseq2_pisco_001.py | 35 -- .../mouse_lung_2019_10x_pisco_001.py | 37 -- .../mouse_lung_2019_smartseq2_pisco_001.py | 37 -- .../mouse_mammarygland_2019_10x_pisco_001.py | 35 -- ...e_mammarygland_2019_smartseq2_pisco_001.py | 35 -- .../mouse_muscle_2019_10x_pisco_001.py | 35 -- .../mouse_muscle_2019_smartseq2_pisco_001.py | 35 -- .../mouse_pancreas_2019_10x_pisco_001.py | 37 -- ...mouse_pancreas_2019_smartseq2_pisco_001.py | 37 -- .../mouse_skin_2019_10x_pisco_001.py | 35 -- .../mouse_skin_2019_smartseq2_pisco_001.py | 36 -- .../mouse_spleen_2019_10x_pisco_001.py | 35 -- .../mouse_spleen_2019_smartseq2_pisco_001.py | 35 -- .../mouse_thymus_2019_10x_pisco_001.py | 35 -- .../mouse_thymus_2019_smartseq2_pisco_001.py | 35 -- .../mouse_tongue_2019_10x_pisco_001.py | 35 -- .../mouse_tongue_2019_smartseq2_pisco_001.py | 35 -- .../mouse_trachea_2019_10x_pisco_001.py | 35 -- .../mouse_trachea_2019_smartseq2_pisco_001.py | 35 -- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 116 ++++++ sfaira/data/utils/create_meta.py | 1 - sfaira/data/utils/create_meta_and_cache.py | 3 +- sfaira/estimators/external.py | 2 +- sfaira/estimators/keras.py | 29 +- sfaira/interface/model_zoo.py | 6 +- sfaira/models/celltype/marker.py | 5 +- sfaira/models/celltype/mlp.py | 5 +- sfaira/train/summaries.py | 2 +- sfaira/train/train_model.py | 3 +- sfaira/unit_tests/test_celltype_universe.py | 70 ++++ sfaira/unit_tests/test_data_template.py | 33 +- sfaira/versions/celltype_versions/__init__.py | 39 +- sfaira/versions/celltype_versions/base.py | 337 ++++++++++++++---- .../celltype_versions/extensions/__init__.py | 2 + .../extensions/obo_extension_human.py | 1 + .../extensions/obo_extension_mouse.py | 1 + .../celltype_versions/human/__init__.py | 91 ----- .../celltype_versions/human/adipose.py | 34 -- .../celltype_versions/human/adrenalgland.py | 66 ---- .../celltype_versions/human/artery.py | 39 -- .../celltype_versions/human/bladder.py | 44 --- .../versions/celltype_versions/human/blood.py | 40 --- .../versions/celltype_versions/human/bone.py | 34 -- .../versions/celltype_versions/human/brain.py | 64 ---- .../celltype_versions/human/calvaria.py | 42 --- .../celltype_versions/human/cervix.py | 34 -- .../human/chorionicvillus.py | 43 --- .../versions/celltype_versions/human/colon.py | 100 ------ .../celltype_versions/human/duodenum.py | 39 -- .../celltype_versions/human/epityphlon.py | 34 -- .../celltype_versions/human/esophagus.py | 58 --- .../celltype_versions/human/external.py | 1 - .../versions/celltype_versions/human/eye.py | 68 ---- .../celltype_versions/human/fallopiantube.py | 38 -- .../celltype_versions/human/femalegonad.py | 48 --- .../celltype_versions/human/gallbladder.py | 44 --- .../versions/celltype_versions/human/heart.py | 54 --- .../versions/celltype_versions/human/hesc.py | 25 -- .../versions/celltype_versions/human/ileum.py | 55 --- .../celltype_versions/human/jejunum.py | 37 -- .../celltype_versions/human/kidney.py | 147 -------- .../versions/celltype_versions/human/liver.py | 100 ------ .../versions/celltype_versions/human/lung.py | 74 ---- .../celltype_versions/human/malegonad.py | 57 --- .../versions/celltype_versions/human/mixed.py | 42 --- .../celltype_versions/human/muscle.py | 47 --- .../celltype_versions/human/omentum.py | 40 --- .../celltype_versions/human/pancreas.py | 66 ---- .../celltype_versions/human/placenta.py | 81 ----- .../celltype_versions/human/pleura.py | 46 --- .../celltype_versions/human/prostate.py | 43 --- .../celltype_versions/human/rectum.py | 41 --- .../versions/celltype_versions/human/rib.py | 44 --- .../versions/celltype_versions/human/skin.py | 61 ---- .../celltype_versions/human/spinalcord.py | 55 --- .../celltype_versions/human/spleen.py | 69 ---- .../celltype_versions/human/stomach.py | 68 ---- .../celltype_versions/human/thymus.py | 74 ---- .../celltype_versions/human/thyroid.py | 48 --- .../celltype_versions/human/trachea.py | 45 --- .../celltype_versions/human/ureter.py | 33 -- .../celltype_versions/human/uterus.py | 43 --- .../celltype_versions/mouse/__init__.py | 57 --- .../celltype_versions/mouse/adipose.py | 38 -- .../celltype_versions/mouse/bladder.py | 37 -- .../versions/celltype_versions/mouse/blood.py | 31 -- .../versions/celltype_versions/mouse/bone.py | 52 --- .../versions/celltype_versions/mouse/brain.py | 54 --- .../versions/celltype_versions/mouse/colon.py | 30 -- .../celltype_versions/mouse/diaphragm.py | 28 -- .../celltype_versions/mouse/external.py | 1 - .../celltype_versions/mouse/femalegonad.py | 33 -- .../versions/celltype_versions/mouse/heart.py | 42 --- .../versions/celltype_versions/mouse/ileum.py | 33 -- .../celltype_versions/mouse/kidney.py | 67 ---- .../versions/celltype_versions/mouse/liver.py | 45 --- .../versions/celltype_versions/mouse/lung.py | 87 ----- .../celltype_versions/mouse/malegonad.py | 33 -- .../celltype_versions/mouse/mammarygland.py | 34 -- .../celltype_versions/mouse/muscle.py | 38 -- .../celltype_versions/mouse/pancreas.py | 53 --- .../celltype_versions/mouse/placenta.py | 44 --- .../celltype_versions/mouse/prostate.py | 28 -- .../versions/celltype_versions/mouse/rib.py | 36 -- .../versions/celltype_versions/mouse/skin.py | 30 -- .../celltype_versions/mouse/spleen.py | 43 --- .../celltype_versions/mouse/stomach.py | 34 -- .../celltype_versions/mouse/thymus.py | 46 --- .../celltype_versions/mouse/tongue.py | 25 -- .../celltype_versions/mouse/trachea.py | 39 -- .../celltype_versions/mouse/uterus.py | 34 -- .../target_universes/__init__.py | 0 .../target_universes/human/__init__.py | 1 + .../target_universes/human/adipose.csv | 15 + .../target_universes/human/adrenalgland.csv | 47 +++ .../target_universes/human/artery.csv | 20 ++ .../target_universes/human/bladder.csv | 25 ++ .../target_universes/human/blood.csv | 21 ++ .../target_universes/human/bone.csv | 15 + .../target_universes/human/brain.csv | 40 +++ .../target_universes/human/calvaria.csv | 23 ++ .../target_universes/human/cervix.csv | 15 + .../human/chorionicvillus.csv | 24 ++ .../target_universes/human/colon.csv | 68 ++++ .../target_universes/human/duodenum.csv | 20 ++ .../target_universes/human/epityphlon.csv | 15 + .../target_universes/human/esophagus.csv | 35 ++ .../target_universes/human/eye.csv | 46 +++ .../target_universes/human/fallopiantube.csv | 19 + .../target_universes/human/femalegonad.csv | 29 ++ .../target_universes/human/gallbladder.csv | 25 ++ .../target_universes/human/heart.csv | 35 ++ .../target_universes/human/hesc.csv | 6 + .../target_universes/human/ileum.csv | 33 ++ .../target_universes/human/jejunum.csv | 18 + .../target_universes/human/kidney.csv | 107 ++++++ .../target_universes/human/liver.csv | 67 ++++ .../target_universes/human/lung.csv | 39 ++ .../target_universes/human/malegonad.csv | 38 ++ .../target_universes/human/muscle.csv | 28 ++ .../target_universes/human/omentum.csv | 21 ++ .../target_universes/human/pancreas.csv | 45 +++ .../target_universes/human/placenta.csv | 55 +++ .../target_universes/human/pleura.csv | 27 ++ .../target_universes/human/prostate.csv | 24 ++ .../target_universes/human/rectum.csv | 20 ++ .../target_universes/human/rib.csv | 25 ++ .../target_universes/human/skin.csv | 38 ++ .../target_universes/human/spinalcord.csv | 36 ++ .../target_universes/human/spleen.csv | 44 +++ .../target_universes/human/stomach.csv | 49 +++ .../target_universes/human/thymus.csv | 51 +++ .../target_universes/human/thyroid.csv | 28 ++ .../target_universes/human/trachea.csv | 25 ++ .../target_universes/human/ureter.csv | 14 + .../target_universes/human/uterus.csv | 24 ++ .../target_universes/mouse/__init__.py | 1 + .../target_universes/mouse/adipose.csv | 12 + .../target_universes/mouse/bladder.csv | 13 + .../target_universes/mouse/blood.csv | 11 + .../target_universes/mouse/bone.csv | 29 ++ .../target_universes/mouse/brain.csv | 32 ++ .../target_universes/mouse/colon.csv | 10 + .../target_universes/mouse/diaphragm.csv | 8 + .../target_universes/mouse/heart.csv | 22 ++ .../target_universes/mouse/ileum.csv | 13 + .../target_universes/mouse/kidney.csv | 29 ++ .../target_universes/mouse/liver.csv | 18 + .../target_universes/mouse/lung.csv | 40 +++ .../target_universes/mouse/malegonad.csv | 12 + .../target_universes/mouse/mammarygland.csv | 14 + .../target_universes/mouse/muscle.csv | 17 + .../target_universes/mouse/ovary.csv | 11 + .../target_universes/mouse/pancreas.csv | 20 ++ .../target_universes/mouse/placenta.csv | 24 ++ .../target_universes/mouse/prostate.csv | 8 + .../target_universes/mouse/rib.csv | 16 + .../target_universes/mouse/skin.csv | 10 + .../target_universes/mouse/spleen.csv | 18 + .../target_universes/mouse/stomach.csv | 14 + .../target_universes/mouse/thymus.csv | 16 + .../target_universes/mouse/tongue.csv | 5 + .../target_universes/mouse/trachea.csv | 17 + .../target_universes/mouse/uterus.csv | 14 + 211 files changed, 2489 insertions(+), 5317 deletions(-) create mode 100644 sfaira/api/celltypes.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/base.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py create mode 100644 sfaira/unit_tests/test_celltype_universe.py create mode 100644 sfaira/versions/celltype_versions/extensions/__init__.py create mode 100644 sfaira/versions/celltype_versions/extensions/obo_extension_human.py create mode 100644 sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py delete mode 100644 sfaira/versions/celltype_versions/human/adipose.py delete mode 100644 sfaira/versions/celltype_versions/human/adrenalgland.py delete mode 100644 sfaira/versions/celltype_versions/human/artery.py delete mode 100644 sfaira/versions/celltype_versions/human/bladder.py delete mode 100644 sfaira/versions/celltype_versions/human/blood.py delete mode 100644 sfaira/versions/celltype_versions/human/bone.py delete mode 100644 sfaira/versions/celltype_versions/human/brain.py delete mode 100644 sfaira/versions/celltype_versions/human/calvaria.py delete mode 100644 sfaira/versions/celltype_versions/human/cervix.py delete mode 100644 sfaira/versions/celltype_versions/human/chorionicvillus.py delete mode 100644 sfaira/versions/celltype_versions/human/colon.py delete mode 100644 sfaira/versions/celltype_versions/human/duodenum.py delete mode 100644 sfaira/versions/celltype_versions/human/epityphlon.py delete mode 100644 sfaira/versions/celltype_versions/human/esophagus.py delete mode 100644 sfaira/versions/celltype_versions/human/external.py delete mode 100644 sfaira/versions/celltype_versions/human/eye.py delete mode 100644 sfaira/versions/celltype_versions/human/fallopiantube.py delete mode 100644 sfaira/versions/celltype_versions/human/femalegonad.py delete mode 100644 sfaira/versions/celltype_versions/human/gallbladder.py delete mode 100644 sfaira/versions/celltype_versions/human/heart.py delete mode 100644 sfaira/versions/celltype_versions/human/hesc.py delete mode 100644 sfaira/versions/celltype_versions/human/ileum.py delete mode 100644 sfaira/versions/celltype_versions/human/jejunum.py delete mode 100644 sfaira/versions/celltype_versions/human/kidney.py delete mode 100644 sfaira/versions/celltype_versions/human/liver.py delete mode 100644 sfaira/versions/celltype_versions/human/lung.py delete mode 100644 sfaira/versions/celltype_versions/human/malegonad.py delete mode 100644 sfaira/versions/celltype_versions/human/mixed.py delete mode 100644 sfaira/versions/celltype_versions/human/muscle.py delete mode 100644 sfaira/versions/celltype_versions/human/omentum.py delete mode 100644 sfaira/versions/celltype_versions/human/pancreas.py delete mode 100644 sfaira/versions/celltype_versions/human/placenta.py delete mode 100644 sfaira/versions/celltype_versions/human/pleura.py delete mode 100644 sfaira/versions/celltype_versions/human/prostate.py delete mode 100644 sfaira/versions/celltype_versions/human/rectum.py delete mode 100644 sfaira/versions/celltype_versions/human/rib.py delete mode 100644 sfaira/versions/celltype_versions/human/skin.py delete mode 100644 sfaira/versions/celltype_versions/human/spinalcord.py delete mode 100644 sfaira/versions/celltype_versions/human/spleen.py delete mode 100644 sfaira/versions/celltype_versions/human/stomach.py delete mode 100644 sfaira/versions/celltype_versions/human/thymus.py delete mode 100644 sfaira/versions/celltype_versions/human/thyroid.py delete mode 100644 sfaira/versions/celltype_versions/human/trachea.py delete mode 100644 sfaira/versions/celltype_versions/human/ureter.py delete mode 100644 sfaira/versions/celltype_versions/human/uterus.py delete mode 100644 sfaira/versions/celltype_versions/mouse/__init__.py delete mode 100644 sfaira/versions/celltype_versions/mouse/adipose.py delete mode 100644 sfaira/versions/celltype_versions/mouse/bladder.py delete mode 100644 sfaira/versions/celltype_versions/mouse/blood.py delete mode 100644 sfaira/versions/celltype_versions/mouse/bone.py delete mode 100644 sfaira/versions/celltype_versions/mouse/brain.py delete mode 100644 sfaira/versions/celltype_versions/mouse/colon.py delete mode 100644 sfaira/versions/celltype_versions/mouse/diaphragm.py delete mode 100644 sfaira/versions/celltype_versions/mouse/external.py delete mode 100644 sfaira/versions/celltype_versions/mouse/femalegonad.py delete mode 100644 sfaira/versions/celltype_versions/mouse/heart.py delete mode 100644 sfaira/versions/celltype_versions/mouse/ileum.py delete mode 100644 sfaira/versions/celltype_versions/mouse/kidney.py delete mode 100644 sfaira/versions/celltype_versions/mouse/liver.py delete mode 100644 sfaira/versions/celltype_versions/mouse/lung.py delete mode 100644 sfaira/versions/celltype_versions/mouse/malegonad.py delete mode 100644 sfaira/versions/celltype_versions/mouse/mammarygland.py delete mode 100644 sfaira/versions/celltype_versions/mouse/muscle.py delete mode 100644 sfaira/versions/celltype_versions/mouse/pancreas.py delete mode 100644 sfaira/versions/celltype_versions/mouse/placenta.py delete mode 100644 sfaira/versions/celltype_versions/mouse/prostate.py delete mode 100644 sfaira/versions/celltype_versions/mouse/rib.py delete mode 100644 sfaira/versions/celltype_versions/mouse/skin.py delete mode 100644 sfaira/versions/celltype_versions/mouse/spleen.py delete mode 100644 sfaira/versions/celltype_versions/mouse/stomach.py delete mode 100644 sfaira/versions/celltype_versions/mouse/thymus.py delete mode 100644 sfaira/versions/celltype_versions/mouse/tongue.py delete mode 100644 sfaira/versions/celltype_versions/mouse/trachea.py delete mode 100644 sfaira/versions/celltype_versions/mouse/uterus.py create mode 100644 sfaira/versions/celltype_versions/target_universes/__init__.py create mode 100644 sfaira/versions/celltype_versions/target_universes/human/__init__.py create mode 100644 sfaira/versions/celltype_versions/target_universes/human/adipose.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/artery.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/bladder.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/blood.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/bone.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/brain.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/calvaria.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/cervix.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/colon.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/duodenum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/esophagus.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/eye.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/heart.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/hesc.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/ileum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/jejunum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/kidney.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/liver.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/lung.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/malegonad.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/muscle.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/omentum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/pancreas.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/placenta.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/pleura.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/prostate.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/rectum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/rib.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/skin.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/spleen.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/stomach.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/thymus.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/thyroid.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/trachea.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/ureter.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/human/uterus.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/__init__.py create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/blood.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/bone.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/brain.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/colon.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/heart.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/liver.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/lung.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/rib.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/skin.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv create mode 100644 sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv diff --git a/.gitignore b/.gitignore index 362e74721..f9ffe2c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ sfaira/unit_tests/test_data_loaders/* sfaira/unit_tests/test_data/* +sfaira/unit_tests/template_data/* sfaira/unit_tests/test_data_template.py git abuild cache diff --git a/docs/data.rst b/docs/data.rst index 96a369935..df067ca83 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -144,16 +144,6 @@ before it is loaded into memory: # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and # directory as the python file of this data loader (see below). - # A dictionary of dictionaries with: - # One item for each annotation label that is not contained in the ontology. - # This item maps a custom ID to an ontology supported ID. - # Note that you have to load your custom IDs, to which this refers to, in load(). - self.class_maps = { - "0": { # one entry for each cell type version for this species and organ - 'my weird name for T cells': 'T cell', # one map from a custom ID to an ontology supported ID - }, - } - 2. A function called to load the data set into memory: It is important to set an automated path indicating the location of the raw files here. @@ -221,10 +211,12 @@ Map cell type labels to ontology The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with the same name and directory as the python file in which the data loader is located. -This .csv contains two columns with one row for each unique cell type label and their free text identifiers in the first -column, and the corresponding ontology term in the second column. -You could write this file entirely from scratch. -Sfaira also allows you to generate a first guess of this file using fuzzy string matching via ToDo. +This .csv contains two columns with one row for each unique cell type label. +The free text identifiers in the first column "source", +and the corresponding ontology term in the second column "target". +You can write this file entirely from scratch. +Sfaira also allows you to generate a first guess of this file using fuzzy string matching +which is automatically executed when you run the template data loader unit test for the first time with you new loader. Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which ontology term in the case of conflicts. Still, this first guess usually drastically speeds up this annotation harmonization. @@ -261,6 +253,20 @@ Note that you can always add additional data loaders for further, less streamlin .. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined + +Test your data loader +~~~~~~~~~~~~~~~~~~~~~ + +Sfaira has a local data loader unit test_ with which you can debug your data loader and which aids with meta data +assignments, such as cell types. +You can use this test with pytest in an IDE. +You can simply place the raw data into `sfaira/unit_tests/template_data/` with the correct sub path, +as indicated in the `._load()`, +for the test to access this data. + +.. _test: https://github.com/theislab/sfaira/tree/dev/sfaira/unit_tests/test_data_template.py + + Cell type ontology management ----------------------------- diff --git a/requirements.txt b/requirements.txt index cee7a6606..d0722796f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,15 @@ anndata>=0.7 docutils +fuzzywuzzy jinja2 loompy -numpy>=1.14.0 matplotlib +networkx +numpy>=1.14.0 +obonet openpyxl pandas +python-Levenshtein scanpy scipy seaborn diff --git a/setup.py b/setup.py index 71270b465..91f191226 100644 --- a/setup.py +++ b/setup.py @@ -27,9 +27,13 @@ packages=find_packages(), install_requires=[ 'anndata>=0.7', + 'fuzzywuzzy', 'h5py', + 'networkx', 'numpy>=1.16.4', + 'obonet', 'pandas', + 'python-Levenshtein', 'scipy>=1.2.1', 'tqdm', 'tensorflow>=2.0.0' # TODO Remove and add to tensorflow profile diff --git a/sfaira/api/celltypes.py b/sfaira/api/celltypes.py new file mode 100644 index 000000000..0f68392e7 --- /dev/null +++ b/sfaira/api/celltypes.py @@ -0,0 +1 @@ +from sfaira.versions.celltype_versions import CelltypeUniverse, OntologyObo diff --git a/sfaira/data/base.py b/sfaira/data/base.py index d886477fb..e0a1fcb00 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -15,17 +15,23 @@ import warnings from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.versions.celltype_versions import CelltypeUniverse from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS UNS_STRING_META_IN_OBS = "__obs__" def map_fn(inputs): - ds, formatted_version, remove_gene_version, match_to_reference, load_raw, allow_caching, func, \ + """ + Functional to load data set with predefined additional actions. + + :param inputs: + :return: None if function ran, error report otherwise + """ + ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, \ kwargs_func = inputs try: ds.load( - celltype_version=formatted_version, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, load_raw=load_raw, @@ -90,6 +96,9 @@ class DatasetBase(abc.ABC): _var_symbol_col: Union[None, str] _var_ensembl_col: Union[None, str] + _ontology_celltypes: Union[None, CelltypeUniverse] + _ontology_class_map: Union[None, dict] + def __init__( self, path: Union[str, None] = None, @@ -147,6 +156,9 @@ def __init__( self.class_maps = {"0": {}} self._unknown_celltype_identifiers = self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers + self._ontology_celltypes = None + self._ontology_class_map = None + @abc.abstractmethod def _load(self, fn): pass @@ -213,6 +225,7 @@ def _get_cache_fn(): cache = os.path.join( self.cache_path, self.directory_formatted_doi, + "cache", self._directory_formatted_id + ".h5ad" ) return cache @@ -254,7 +267,6 @@ def _cached_writing(fn_cache): def load( self, - celltype_version: Union[str, None] = None, fn: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, @@ -263,7 +275,6 @@ def load( ): """ - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param fn: Optional target file name, otherwise infers from defined directory structure. :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. @@ -293,7 +304,7 @@ def load( # Run data set-specific loading script: self._load_cached(fn=fn, load_raw=load_raw, allow_caching=allow_caching) # Set data-specific meta data in .adata: - self._set_metadata_in_adata(celltype_version=celltype_version) + self._set_metadata_in_adata() # Set loading hyper-parameter-specific meta data: self.adata.uns[self._ADATA_IDS_SFAIRA.load_raw] = load_raw self.adata.uns[self._ADATA_IDS_SFAIRA.mapped_features] = match_to_reference @@ -471,11 +482,10 @@ def _match_features_to_reference(self, match_to_reference): uns=self.adata.uns ) - def _set_metadata_in_adata(self, celltype_version): + def _set_metadata_in_adata(self): """ Copy meta data from dataset class in .anndata. - :param celltype_version: :return: """ # Set data set-wide attributes (.uns): @@ -528,14 +538,9 @@ def _set_metadata_in_adata(self, celltype_version): # Set cell-wise attributes (.obs): # None so far other than celltypes. # Set cell types: - if self._ADATA_IDS_SFAIRA.cell_ontology_id not in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_id] = None # Map cell type names from raw IDs to ontology maintained ones:: - if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.columns: - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = self.map_ontology_class( - raw_ids=self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values, - celltype_version=celltype_version - ) + if self.obs_key_cellontology_original is not None: + self.project_celltypes_to_ontology() def load_tobacked( self, @@ -543,7 +548,6 @@ def load_tobacked( genome: str, idx: np.ndarray, fn: Union[None, str] = None, - celltype_version: Union[str, None] = None, load_raw: bool = False, allow_caching: bool = True ): @@ -559,14 +563,12 @@ def load_tobacked( shuffled object. :param keys: :param fn: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param load_raw: See .load(). :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ self.load( fn=fn, - celltype_version=celltype_version, remove_gene_version=True, match_to_reference=genome, load_raw=load_raw, @@ -650,46 +652,59 @@ def doi_cleaned_id(self): return "_".join(self.id.split("_")[:-1]) @property - def available_type_versions(self): - return np.array(list(self.class_maps.keys())) + def fn_ontology_class_map_csv(self): + """Standardised file name under which cell type conversion tables are saved.""" + return self.doi_cleaned_id + ".csv" - def set_default_type_version(self): + def write_ontology_class_map(self, fn, protected_writing: bool = True): """ - Choose most recent version. + Load class maps of free text cell types to ontology classes. - :return: Version key corresponding to most recent version. + :param fn: File name of csv to load class maps from. + :param protected_writing: Only write if file was not already found. + :return: """ - return self.available_type_versions[np.argmax([int(x) for x in self.available_type_versions])] + labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) + tab = self.ontology_celltypes.onto.fuzzy_match_nodes( + source=labels_original, + match_only=False, + include_old=False, + include_synonyms=False, + remove=self._unknown_celltype_identifiers, + ) + if not os.path.exists(fn) or not protected_writing: + tab.to_csv(fn, index=None) - def assert_celltype_version_key( - self, - celltype_version - ): - if celltype_version not in self.available_type_versions: - raise ValueError( - "required celltype version %s not found. available are: %s" % - (celltype_version, str(self.available_type_versions)) - ) + def load_ontology_class_map(self, fn): + """ + Load class maps of free text cell types to ontology classes. - def map_ontology_class( - self, - raw_ids, - celltype_version - ): + :param fn: File name of csv to load class maps from. + :return: """ + if os.path.exists(fn): + self.ontology_class_map = pd.read_csv(fn, header=0, index_col=None) + else: + warnings.warn(f"file {fn} does not exist") + + def project_celltypes_to_ontology(self): + """ + Project free text cell type names to ontology based on mapping table. + + ToDo: add ontology ID setting here. - :param raw_ids: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :return: """ - if celltype_version is None: - celltype_version = self.set_default_type_version() - self.assert_celltype_version_key(celltype_version=celltype_version) - return [ - self.class_maps[celltype_version][x] if x in self.class_maps[celltype_version].keys() - else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x - for x in raw_ids - ] + labels_original = self.adata.obs[self.obs_key_cellontology_original].values + if self.ontology_class_map is not None: # only if this was defined + labels_mapped = [ + self.ontology_class_map[x] if x in self.ontology_class_map.keys() + else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers + else x for x in labels_original + ] + del self.adata.obs[self.obs_key_cellontology_original] + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = labels_mapped + self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = labels_original @property def citation(self): @@ -1319,6 +1334,29 @@ def year(self, x: int): self.__value_protection(attr="year", allowed=self._ADATA_IDS_SFAIRA.year_allowed_entries, attempted=x) self._year = x + @property + def ontology_celltypes(self): + if self._ontology_celltypes is None: + assert self.organism is not None, "set organism before using ontology_celltypes" + self._ontology_celltypes = CelltypeUniverse(organism=self.organism) + return self._ontology_celltypes + + @property + def ontology_class_map(self) -> dict: + return self._ontology_class_map + + @ontology_class_map.setter + def ontology_class_map(self, x: pd.DataFrame): + self.__erasing_protection(attr="ontology_class_map", val_old=self._ontology_class_map, val_new=x) + assert x.shape[1] == 2 + assert x.columns[0] == "source" + assert x.columns[1] == "target" + # Transform data frame into a mapping dictionary: + self._ontology_class_map = dict(list(zip( + x["source"].values.tolist(), + x["target"].values.tolist() + ))) + # Private methods: def __erasing_protection(self, attr, val_old, val_new): @@ -1531,7 +1569,6 @@ def _load_group(self, load_raw: bool): def load( self, annotated_only: bool = False, - celltype_version: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, load_raw: bool = False, @@ -1549,7 +1586,6 @@ def load( In this setting, datasets are removed from memory after the function has been executed. :param annotated_only: - :param celltype_version: See .load(). :param remove_gene_version: See .load(). :param match_to_reference: See .load(). :param load_raw: See .load(). @@ -1564,9 +1600,7 @@ def func(dataset, **kwargs_func): :param kwargs_func: Kwargs of func. :return: """ - formatted_version = self.format_type_version(celltype_version) args = [ - formatted_version, remove_gene_version, match_to_reference, load_raw, @@ -1590,6 +1624,7 @@ def func(dataset, **kwargs_func): del self.datasets[x[0]] else: # for loop adata_group = None + datasets_to_remove = [] for k, v in self.datasets.items(): print(f"loading {k}") group_loading = v.set_raw_full_group_object(fn=None, adata_group=adata_group) @@ -1598,8 +1633,10 @@ def func(dataset, **kwargs_func): x = map_fn(tuple([v] + args)) # Clear data sets that were not successfully loaded because of missing data: if x is not None: - print(x[1]) - del self.datasets[x[0]] + warnings.warn(f"data set {k} not loaded") + datasets_to_remove.append(k) + for k in datasets_to_remove: + del self.datasets[k] del adata_group def load_tobacked( @@ -1608,7 +1645,6 @@ def load_tobacked( genome: str, idx: List[np.ndarray], annotated_only: bool = False, - celltype_version: Union[str, None] = None, load_raw: bool = False, allow_caching: bool = True, ): @@ -1623,7 +1659,6 @@ def load_tobacked( :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a shuffled object. This has to be a list of the length of self.data, one index array for each dataset. :param annotated_only: - :param celltype_version: See .load(). :param load_raw: See .load(). :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. @@ -1637,7 +1672,6 @@ def load_tobacked( adata_backed=adata_backed, genome=genome, idx=idx[i], - celltype_version=self.format_type_version(celltype_version), load_raw=load_raw, allow_caching=allow_caching ) @@ -1768,38 +1802,6 @@ def ncells(self, annotated_only: bool = False): cells = self.ncells_bydataset(annotated_only=annotated_only) return np.sum(cells) - def assert_celltype_version_key( - self, - celltype_version - ): - """ - Assert that version key exists in each data set. - :param celltype_version: - :return: - """ - for x in self.ids: - if celltype_version not in self.datasets[x].available_type_versions: - raise ValueError( - "required celltype version %s not found in data set %s. available are: %s" % - (celltype_version, x, str(self.datasets[x].available_type_versions)) - ) - - def format_type_version(self, version): - """ - Choose most recent version available in each dataset if None, otherwise return input version after checking. - - :return: Version key corresponding to default version. - """ - if version is None: - versions = set(self.datasets[self.ids[0]].available_type_versions) - for x in self.ids[1:]: - versions = versions.intersection(set(self.datasets[x].available_type_versions)) - versions = np.array(list(versions)) - return versions[np.argmax([int(x) for x in versions])] - else: - self.assert_celltype_version_key(celltype_version=version) - return version - def subset(self, key, values): """ Subset list of adata objects based on sample-wise properties. @@ -1888,6 +1890,7 @@ def __init__( if os.path.isfile(os.path.join(cwd, f)): # only files # Narrow down to data set files: if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + datasets_f = [] file_module = ".".join(f.split(".")[:-1]) DatasetFound = pydoc.locate( "sfaira.data.dataloaders.loaders." + dataset_module + "." + @@ -1903,7 +1906,7 @@ def __init__( file_module + ".SAMPLE_IDS") if sample_fns is not None and sample_ids is None: # DatasetBaseGroupLoadingManyFiles: - datasets.extend([ + datasets_f.extend([ DatasetFound( sample_fn=x, path=path, @@ -1914,7 +1917,7 @@ def __init__( ]) elif sample_fns is None and sample_ids is not None: # DatasetBaseGroupLoadingManyFiles: - datasets.extend([ + datasets_f.extend([ DatasetFound( sample_id=x, path=path, @@ -1926,7 +1929,11 @@ def __init__( elif sample_fns is not None and sample_ids is not None: raise ValueError(f"sample_fns and sample_ids both found for {f}") else: - datasets.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + datasets_f.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + # Load cell type maps: + for x in datasets_f: + x.load_ontology_class_map(fn=os.path.join(cwd, x.fn_ontology_class_map_csv)) + datasets.extend(datasets_f) keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) @@ -2024,7 +2031,6 @@ def flatten(self) -> DatasetGroup: def load_all( self, - celltype_version: Union[str, None] = None, annotated_only: bool = False, match_to_reference: Union[str, None] = None, remove_gene_version: bool = True, @@ -2035,8 +2041,6 @@ def load_all( """ Loads data set human into anndata object. - :param celltype_version: Version of cell type ontology to use. - Uses most recent within each DatasetGroup if None. :param annotated_only: :param match_to_reference: See .load(). :param remove_gene_version: See .load(). @@ -2051,7 +2055,6 @@ def load_all( annotated_only=annotated_only, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, - celltype_version=celltype_version, load_raw=load_raw, allow_caching=allow_caching, processes=processes, @@ -2073,7 +2076,6 @@ def load_all_tobacked( shuffled: bool = False, as_dense: bool = False, annotated_only: bool = False, - celltype_version: Union[str, None] = None, load_raw: bool = False, allow_caching: bool = True, ): @@ -2096,7 +2098,6 @@ def load_all_tobacked( :param shuffled: Whether to shuffle data when writing to backed. :param as_dense: Whether to load into dense count matrix. :param annotated_only: - :param celltype_version: Version of cell type ontology to use. Uses most recent if None. :param load_raw: See .load(). :param allow_caching: See .load(). """ @@ -2165,7 +2166,6 @@ def load_all_tobacked( genome=genome, idx=idx_ls[i], annotated_only=annotated_only, - celltype_version=celltype_version, load_raw=load_raw, allow_caching=allow_caching, ) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py deleted file mode 100644 index 0a07210ec..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/base.py +++ /dev/null @@ -1,56 +0,0 @@ -import anndata -from typing import Union -from sfaira.data import DatasetBase - - -class Dataset_d10_1101_661728(DatasetBase): - """ - This is a dataloader template for tabula muris data. - """ - - def __init__( - self, - path: Union[str, None], - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.source = source - if self.source == "aws": - self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" - elif self.source == "figshare": - self.download = "https://ndownloader.figshare.com/articles/8273102/versions/2" - else: - raise ValueError("source %s not recognized" % self.source) - - self.obs_key_cellontology_original = "free_annotation" - self.obs_key_age = "age" - self.obs_key_dev_stage = "development_stage" # not given in all data sets - self.obs_key_sex = "sex" - # ToDo: further anatomical information for subtissue in "subtissue" - - self.author = "Quake" - self.doi = "10.1101/661728" - self.healthy = True - self.normalization = "norm" - self.organism = "mouse" - self.state_exact = "healthy" - self.year = 2019 - - self.var_ensembl_col = None - self.var_symbol_col = "index" - - def _load_generalized(self, fn): - self.adata = anndata.read_h5ad(fn) - if self.source == "aws": - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} - - def _get_protocol_tms(self, x) -> str: - return "smartseq2" if "smartseq2" in x else "10x" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py deleted file mode 100644 index ba71dfa03..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.organ = "adipose" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "Fat_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py deleted file mode 100644 index 710a502fd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "adipose" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "bat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py deleted file mode 100644 index 93af8ff7e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728" - self.organ = "adipose" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "gat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py deleted file mode 100644 index 755c34976..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_003.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728" - self.organ = "adipose" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "mat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py deleted file mode 100644 index 864b0e0af..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_adipose_2019_smartseq2_pisco_004.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728" - self.organ = "adipose" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "adipose", "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "adipose", "scat_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py deleted file mode 100644 index c6889776b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_bladder_2019_10x_pisco_001_10.1101/661728" - self.organ = "bladder" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py deleted file mode 100644 index 41da10137..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bladder_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "bladder" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bladder", "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bladder", "Bladder_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py deleted file mode 100644 index 2a919d354..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_10x_pisco_001.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_bone_2019_10x_pisco_001_10.1101/661728" - self.organ = "bone" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - self.set_unkown_class_id(ids=["unknown cell-1", "unknown cell-2"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py deleted file mode 100644 index bc06538cd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_bone_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "bone" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "bone", "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "bone", "Marrow_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - self.set_unkown_class_id(ids=["nan-marrow-needs-subclustering"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py deleted file mode 100644 index 68b1f2d51..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "brain" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Non-Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - - self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py deleted file mode 100644 index d23cb6411..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_brain_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728" - self.organ = "brain" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "brain", "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "brain", "Brain_Myeloid_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - - self.set_unkown_class_id(ids=["Il6 expressing cells"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py deleted file mode 100644 index 6bec3f395..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_colon_2019_10x_pisco_001_10.1101/661728" - self.organ = "colon" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py deleted file mode 100644 index c8535269c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_colon_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "colon" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "colon", "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "colon", "Large_Intestine_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py deleted file mode 100644 index d1c15fa22..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_diaphragm_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "diaphragm" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "diaphragm", "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "diaphragm", "Diaphragm_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py deleted file mode 100644 index 995b2f06f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_heart_2019_10x_pisco_001_10.1101/661728" - self.organ = "heart" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py deleted file mode 100644 index a59e8ed75..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "heart" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "heart", "Heart_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py deleted file mode 100644 index a37a09f12..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_heart_2019_smartseq2_pisco_002.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "heart" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "heart", "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad") - elif self.source == "figshare": - raise ValueError("not defined") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py deleted file mode 100644 index 3f1ae52b2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_10x_pisco_001.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_kidney_2019_10x_pisco_001_10.1101/661728" - self.organ = "kidney" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py deleted file mode 100644 index 19f3f365e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_kidney_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "kidney" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "kidney capillary endothelial cell": "endothelial cell", - "kidney mesangial cell": "mesangial cell", - "kidney interstitial fibroblast": "interstitial fibroblast", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "kidney", "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "kidney", "Kidney_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) - self.set_unkown_class_id(ids=["kidney cell"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py deleted file mode 100644 index 56954c663..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_liver_2019_10x_pisco_001_10.1101/661728" - self.organ = "liver" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py deleted file mode 100644 index a21bd1c7b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_liver_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "liver" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "liver", "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "liver", "Liver_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py deleted file mode 100644 index 3d836d4a8..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_10x_pisco_001.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_lung_2019_10x_pisco_001_10.1101/661728" - self.organ = "lung" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py deleted file mode 100644 index 807de2179..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_lung_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "lung" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "ciliated columnar cell of tracheobronchial tree": "ciliated cell", - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "lung", "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "lung", "Lung_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py deleted file mode 100644 index 2a65a44be..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728" - self.organ = "mammarygland" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py deleted file mode 100644 index 1a3fae56e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_mammarygland_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "mammarygland" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "mammarygland", "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "mammarygland", "Mammary_Gland_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py deleted file mode 100644 index 4fb82822e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_muscle_2019_10x_pisco_001_10.1101/661728" - self.organ = "muscle" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6fb28fa22..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_muscle_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "muscle" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "muscle", "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "muscle", "Limb_Muscle_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py deleted file mode 100644 index dd3351fb2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_10x_pisco_001.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_pancreas_2019_10x_pisco_001_10.1101/661728" - self.organ = "pancreas" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py deleted file mode 100644 index acf953870..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_pancreas_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "pancreas" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": { - "pancreatic ductal cel": "pancreatic ductal cell" - }, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "pancreas", "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "pancreas", "Pancreas_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py deleted file mode 100644 index 13c227435..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_skin_2019_10x_pisco_001_10.1101/661728" - self.organ = "skin" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py deleted file mode 100644 index 676b9f000..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_skin_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "skin" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "skin", "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "skin", "Skin_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py deleted file mode 100644 index bfd7a079f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_spleen_2019_10x_pisco_001_10.1101/661728" - self.organ = "spleen" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py deleted file mode 100644 index 29d802954..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_spleen_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "spleen" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "spleen", "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "spleen", "Spleen_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py deleted file mode 100644 index 80b21b384..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_thymus_2019_10x_pisco_001_10.1101/661728" - self.organ = "thymus" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py deleted file mode 100644 index 2de3dc695..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_thymus_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "thymus" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "thymus", "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "thymus", "Thymus_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py deleted file mode 100644 index 877168d52..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_tongue_2019_10x_pisco_001_10.1101/661728" - self.organ = "tongue" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py deleted file mode 100644 index c76a73108..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_tongue_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "tongue" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "tongue", "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "tongue", "Tongue_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py deleted file mode 100644 index 725ed16e6..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_10x_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_trachea_2019_10x_pisco_001_10.1101/661728" - self.organ = "trachea" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_droplet.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py deleted file mode 100644 index 6622c8916..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_trachea_2019_smartseq2_pisco_001.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1101_661728 - - -class Dataset(Dataset_d10_1101_661728): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - source: str = "aws", - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, source=source, **kwargs) - self.id = "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728" - self.organ = "trachea" - self.protocol = self._get_protocol_tms(self.id) - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if self.source == "aws": - fn = os.path.join(self.path, "mouse", "trachea", "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad") - elif self.source == "figshare": - fn = os.path.join(self.path, "mouse", "trachea", "Trachea_facs.h5ad") - else: - raise ValueError("source %s not recognized" % self.source) - self._load_generalized(fn=fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py new file mode 100644 index 000000000..7668f9782 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -0,0 +1,116 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad", + "tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad", + "tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad", +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + protocol = "10x" if sample_fn.split("-")[3] == "droplet" else "smartseq2" + organ = sample_fn.split("-")[-1].split(".")[0].lower() + organ = "adipose" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ + "bladder" if organ in ["bladder"] else \ + "marrow" if organ in ["marrow"] else \ + "brain" if organ in ["brain_non-myeloid", "brain_myeloid"] else \ + "colon" if organ in ["large_intestine"] else \ + "diaphragm" if organ in ["diaphragm"] else \ + "heart" if organ in ["heart_and_aorta", "heart", "aorta"] else \ + "kidney" if organ in ["kidney"] else \ + "liver" if organ in ["liver"] else \ + "lung" if organ in ["lung"] else \ + "mammary_gland" if organ in ["mammary_gland"] else \ + "muscle" if organ in ["limb_muscle"] else \ + "pancreas" if organ in ["pancreas"] else \ + "skin" if organ in ["skin"] else \ + "spleen" if organ in ["spleen"] else \ + "thymus" if organ in ["thymus"] else \ + "tongue" if organ in ["tongue"] else \ + "trachea" if organ in ["trachea"] else "error" + + self.id = f"mouse_{organ}_2019_{protocol}_pisco_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + f"10.1101/661728" + + self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + + self.obs_key_cellontology_original = "free_annotation" + self.obs_key_age = "age" + self.obs_key_dev_stage = "development_stage" # not given in all data sets + self.obs_key_sex = "sex" + # ToDo: further anatomical information for subtissue in "subtissue" + + self.author = "Quake" + self.doi = "10.1101/661728" + self.healthy = True + self.normalization = "norm" + self.organism = "mouse" + self.organ = organ + self.protocol = protocol + self.state_exact = "healthy" + self.year = 2019 + + self.var_ensembl_col = None + self.var_symbol_col = "index" + + def _load(self, fn): + base_path = os.path.join(self.path, "mouse", self.organ) + fn = os.path.join(base_path, self.sample_fn) + + self.adata = anndata.read_h5ad(fn) + self.adata.X = self.adata.raw.X + self.adata.var = self.adata.raw.var + del self.adata.raw + self.adata.obsm = {} + self.adata.varm = {} + self.adata.uns = {} diff --git a/sfaira/data/utils/create_meta.py b/sfaira/data/utils/create_meta.py index bb707ce16..159d25dc9 100644 --- a/sfaira/data/utils/create_meta.py +++ b/sfaira/data/utils/create_meta.py @@ -22,7 +22,6 @@ def write_meta(args0, args1): ) dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. dsg.load( - celltype_version=None, annotated_only=False, match_to_reference=None, remove_gene_version=True, diff --git a/sfaira/data/utils/create_meta_and_cache.py b/sfaira/data/utils/create_meta_and_cache.py index d694e6fae..50aad5181 100644 --- a/sfaira/data/utils/create_meta_and_cache.py +++ b/sfaira/data/utils/create_meta_and_cache.py @@ -10,11 +10,11 @@ def write_meta(args0, args1): args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) # Test load from cache. args0.load( - celltype_version=None, remove_gene_version=True, load_raw=False, allow_caching=False, ) + args0.write_ontology_class_map(fn=args0.fn_ontology_class_map_csv) return None @@ -32,7 +32,6 @@ def write_meta(args0, args1): dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. # Write meta data, cache and test load from cache: dsg.load( - celltype_version=None, annotated_only=False, match_to_reference=None, remove_gene_version=True, diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 19b6fb032..14c612215 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ -from sfaira.versions.celltype_versions import ORGANISM_DICT, CelltypeVersionsBase +from sfaira.versions.celltype_versions import CelltypeUniverse from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index a9a04d323..aa21191b0 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -9,7 +9,7 @@ import os import warnings from tqdm import tqdm -from .external import CelltypeVersionsBase, Topologies, BasicModel +from .external import CelltypeUniverse, Topologies, BasicModel from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ CustomAccAgg, CustomF1Classwise, CustomFprClasswise, CustomTprClasswise, custom_cce_agg @@ -871,7 +871,7 @@ class EstimatorKerasCelltype(EstimatorKeras): Estimator class for the cell type model. """ - celltypes_version: CelltypeVersionsBase + celltypes_version: CelltypeUniverse def __init__( self, @@ -943,12 +943,12 @@ def ontology(self): def _get_celltype_out( self, idx: Union[np.ndarray, None], - lookup_ontology=["names"] + lookup_ontology="names" ): """ Build one hot encoded cell type output tensor and observation-wise weight matrix. - :param lookup_ontology: list of ontology names to conisder. + :param lookup_ontology: list of ontology names to consider. :return: """ if idx is None: @@ -959,18 +959,15 @@ def _get_celltype_out( else: type_classes = self.ntypes + 1 y = np.zeros((len(idx), type_classes), dtype="float32") - for i, x in enumerate(idx): - label = self.data.obs["cell_ontology_class"].values[x] - if label not in self.ids: - if not np.any([label in self.ontology[ont].keys() for ont in lookup_ontology]): - raise ValueError("%s not found in cell type universe and ontology sets" % label) - # Distribute probability mass uniformly across classes if multiple classes match. - for ont in lookup_ontology: - if label in self.ontology[ont].keys(): - leave_nodes = self.ontology[ont][label] - y[i, np.where([jj in leave_nodes for jj in self.ids])[0]] = 1. - else: - y[i, self.ids.index(label)] = 1. + celltype_idx = self.model.celltypes_version.map_to_target_leaves( + nodes=self.data.obs["cell_ontology_class"].values[idx].tolist(), + ontology="custom", + ontology_id=lookup_ontology, + return_type="idx" + ) + for i, x in enumerate(celltype_idx): + # Distribute probability mass uniformly across classes if multiple classes match: + y[i, x] = 1. / len(x) # Distribute aggregated class weight for computation of weights: freq = np.mean(y / np.sum(y, axis=1, keepdims=True), axis=0, keepdims=True) weights = 1. / np.matmul(y, freq.T) # observation wise weight matrix diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 2aab74632..5eaed4dc2 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -7,7 +7,7 @@ import pandas as pd from typing import List, Union -from sfaira.versions.celltype_versions import ORGANISM_DICT +from sfaira.versions.celltype_versions import CelltypeUniverse from sfaira.versions.topology_versions import Topologies @@ -25,7 +25,7 @@ class ModelZoo(abc.ABC): model_type: Union[str, None] model_topology: Union[str, None] model_version: Union[str, None] - celltypes: Union[List, None] + celltypes: Union[CelltypeUniverse, None] def __init__( self, @@ -472,4 +472,4 @@ def set_latest( model_type=self.model_type, topology_id=self.model_topology ) - self.celltypes = ORGANISM_DICT[self.organism][self.organ].celltype_universe[self.model_version.split(".")[0]] + self.celltypes = CelltypeUniverse(organism=self.organism).load_target_universe(organ=self.organ) diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index ac8d4da39..f06661d2c 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -94,7 +94,7 @@ def __init__( class CellTypeMarkerVersioned(CellTypeMarker): - cell_type_version: celltype_versions.CelltypeVersionsBase + cell_type_version: celltype_versions.CelltypeUniverse def __init__( self, @@ -112,9 +112,6 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] - self.celltypes_version.set_version(version=topology_container.topology_id) - unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index 9b296862c..fadd3ee99 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -69,7 +69,7 @@ def __init__( class CellTypeMlpVersioned(CellTypeMlp): - cell_type_version: celltype_versions.CelltypeVersionsBase + cell_type_version: celltype_versions.CelltypeUniverse def __init__( self, @@ -87,9 +87,6 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - self.celltypes_version = celltype_versions.ORGANISM_DICT[organism.lower()][organ.lower()] - self.celltypes_version.set_version(version=topology_container.topology_id) - unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 74cdc6cb9..398110f02 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -8,7 +8,6 @@ import os from sfaira.train.train_model import TargetZoos -from sfaira.versions.celltype_versions import ORGANISM_DICT from sfaira.estimators import EstimatorKerasEmbedding @@ -887,6 +886,7 @@ def plot_best_classwise_heatmap( dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() + assert False, "depreceat celltype_versions code here" celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index e9ae7eacf..e3e334443 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -8,7 +8,6 @@ from sfaira.data import DatasetGroup, DatasetSuperGroup from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype -from sfaira.versions.celltype_versions import ORGANISM_DICT class TargetZoos: @@ -378,7 +377,7 @@ def _save_specific( cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() - celltype_versions = ORGANISM_DICT.copy() + assert False, "fix celltyp versions code here, depreceate" celltype_versions[self.zoo.organism][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) leafnodes = celltype_versions[self.zoo.organism][self.zoo.organ].ids ontology = celltype_versions[self.zoo.organism][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] diff --git a/sfaira/unit_tests/test_celltype_universe.py b/sfaira/unit_tests/test_celltype_universe.py new file mode 100644 index 000000000..e5f5030dd --- /dev/null +++ b/sfaira/unit_tests/test_celltype_universe.py @@ -0,0 +1,70 @@ +import numpy as np +import pandas as pd +import unittest + +from sfaira.versions.celltype_versions import OntologyObo, ORGANISM_DICT + + +class TestCellTypeUniverse(unittest.TestCase): + dir_debugging = "~/Desktop/temp/" + dir_debugging2 = "~/Desktop/temp2/" + dir_debugging3 = "~/Desktop/temp3/" + + def test_debugging(self, reduced=False): + import csv + onto = OntologyObo() + for k, v in ORGANISM_DICT.items(): + for kk, vv in v.items(): + universe = vv.celltype_universe["0"] + tab = onto.fuzzy_match_nodes(universe, match_only=True) + if not np.all(tab["matched"].values): + tab2 = onto.fuzzy_match_nodes(universe, match_only=False, include_old=True, remove=["unkown"]) + if not reduced: + tab2.to_csv( + self.dir_debugging + k + "_" + kk + "_universe.csv", + index=False, quoting=csv.QUOTE_NONE, sep=";" + ) + else: + tab2.loc[tab["matched"].values == False].to_csv( + self.dir_debugging + k + "_" + kk + "_universe.csv", + index=False, quoting=csv.QUOTE_NONE + ) + + def test_debugging2(self): + import csv + onto = OntologyObo() + for k, v in ORGANISM_DICT.items(): + for kk, vv in v.items(): + names = list(vv.ontology["0"]["names"].keys()) + tab = onto.fuzzy_match_nodes(names, match_only=True) + if not np.all(tab["matched"].values): + tab = onto.fuzzy_match_nodes(names, match_only=False, include_old=True, remove=["unkown"]) + tab.to_csv( + self.dir_debugging2 + k + "_" + kk + "_universe.csv", + index=False, quoting=csv.QUOTE_NONE, sep=";" + ) + + def test_debugging3(self): + import csv + onto = OntologyObo() + tab = pd.DataFrame({"name,id": [",".join([x, y]) for x, y in zip( + [v["name"] for k, v in onto.graph.nodes.items()], + list(onto.graph.nodes.keys()) + )]}) + tab.to_csv( + self.dir_debugging3 + "onto_full.csv", + index=False, quoting=csv.QUOTE_NONE, sep=";" + ) + + def test_only(self): + onto = OntologyObo() + for k, v in ORGANISM_DICT.items(): + for kk, vv in v.items(): + universe = vv.celltype_universe["0"] + tab = onto.fuzzy_match_nodes(universe, match_only=True) + print(tab.loc[tab["matched"].values == False]) + assert np.all(tab["matched"].values), f"{k} {kk}" + + +if __name__ == '__main__': + unittest.main() diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index 5993e7e88..dfe76b321 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -1,41 +1,50 @@ +import os import unittest from sfaira.data import DatasetGroupDirectoryOriented class TestDatasetTemplate(unittest.TestCase): - dir_data: str = "./test_data" - dir_meta: str = "./test_data/meta" + dir_template: str = "./template_data" def test_load(self): """ Address ToDos before running test to customize to your data set. :return: """ - celltype_version = None remove_gene_version = True match_to_reference = None # ToDo: add correct module here as "YOUR_STUDY": - from sfaira.data.dataloaders.loaders.YOUR_STUDY import FILE_PATH + # Addition coming soon: This path can either be in sfaira or in sfaira_extensions. + # So far, this still has to be in sfaira. + from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021 import FILE_PATH ds = DatasetGroupDirectoryOriented( file_base=FILE_PATH, - path=self.dir_data, - meta_path=self.dir_meta, - cache_path=self.dir_data + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template ) # Test raw loading and caching: + # You can set load_raw to True while debugging when caching works already to speed the test up, + # but be sure to set load_raw to True for final tests. ds.load( - celltype_version=celltype_version, - fn=None, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, - load_raw=True, # tests raw loading + load_raw=False, # tests raw loading allow_caching=True # tests caching ) + # Create cell type conversion table: + for k, v in ds.datasets.items(): + v.load() + # Write this directly into sfaira installation so that it can be committed via git. + v.write_ontology_class_map( + fn=os.path.join("/".join(FILE_PATH.split("/")[:-1]), v.fn_ontology_class_map_csv), + protected_writing=False + ) + # ToDo: conflicts are not automatically resolved, please go back to https://www.ebi.ac.uk/ols/ontologies/cl + # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. # Test loading from cache: ds.load( - celltype_version=celltype_version, - fn=None, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, load_raw=False, diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py index 68f91677f..3b6fd15ce 100644 --- a/sfaira/versions/celltype_versions/__init__.py +++ b/sfaira/versions/celltype_versions/__init__.py @@ -1,38 +1 @@ -from .base import CelltypeVersionsBase -from . import mouse -from . import human - - -mouse = mouse.ORGAN_DICT -human = human.ORGAN_DICT - -# Load versions from extension if available: -try: - from sfaira_extension.versions.celltype_versions import ORGANISM_DICT as ORGANISM_DICT_EXTENSION - - for organ in mouse.keys(): - if organ in ORGANISM_DICT_EXTENSION["mouse"].keys(): - for v in ORGANISM_DICT_EXTENSION["mouse"][organ].versions: - if v in mouse[organ].celltype_universe.keys(): - raise ValueError(f'Celltype version {v} already defined for mouse organ {organ} in base sfaira. ' - f'Please define a new version in sfaira_extension.') - else: - mouse[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].celltype_universe[v] - mouse[organ].ontology[v] = ORGANISM_DICT_EXTENSION["mouse"][organ].ontology[v] - - for organ in human.keys(): - if organ in ORGANISM_DICT_EXTENSION["human"].keys(): - for v in ORGANISM_DICT_EXTENSION["human"][organ].versions: - if v in human[organ].celltype_universe.keys(): - raise ValueError(f'Celltype version {v} already defined for loaders organ {organ} in base sfaira. ' - f'Please define a new version in sfaira_extension.') - else: - human[organ].celltype_universe[v] = ORGANISM_DICT_EXTENSION["human"][organ].celltype_universe[v] - human[organ].ontology[v] = ORGANISM_DICT_EXTENSION["human"][organ].ontology[v] -except ImportError: - pass - -ORGANISM_DICT = { - "mouse": mouse, - "human": human -} +from .base import CelltypeUniverse, OntologyObo diff --git a/sfaira/versions/celltype_versions/base.py b/sfaira/versions/celltype_versions/base.py index 40b6c4c66..7fef5c622 100644 --- a/sfaira/versions/celltype_versions/base.py +++ b/sfaira/versions/celltype_versions/base.py @@ -1,109 +1,316 @@ +import abc +import networkx import numpy as np +import obonet +import pandas as pd +from typing import Dict, List, Tuple, Union +import warnings +from sfaira.versions.celltype_versions.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE -class CelltypeVersionsBase: - """ - Versioned cell type universe (list) and ontology (hierarchy) container class. - This class is subclassed once for each anatomical structure (organ). - Cell type versions take the form x.y: - - x is a major version and is incremented if the cell type identities or number changes. - - y is a minor version and is incremented if cell type annotation such as names are altered without changing - the described cell type or adding cell types. +class OntologyBase: + leaves: list - Basic checks on the organ specific instance are performed in the constructor. - """ + @abc.abstractmethod + def set_leaves(self, nodes: list = None): + pass + + @abc.abstractmethod + def get_ancestors(self, node: str) -> List[str]: + pass - celltype_universe: dict - ontology: dict - version: str + def map_to_leaves(self, node: str, return_type: str = "elements", include_self: bool = True): + """ + Map a given list of nodes to leave nodes. - def __init__(self, **kwargs): - # Check that versions are consistent. - if not list(self.celltype_universe.keys()) == list(self.ontology.keys()): - raise ValueError( - "error in matching versions of cell type universe and ontology in %s" % - type(self) - ) - # Check that ontology terms are unique also between ontologies - if np.sum([len(x) for x in self.ontology.values()]) != \ - len(np.unique(np.array([list(x) for x in self.ontology.values()]))): - raise ValueError( - "duplicated ontology terms found between ontologies in %s" % - type(self) - ) + :param node: + :param return_type: - @property - def versions(self): + "elements": names of mapped leave nodes + "idx": indicies in leave note list of of mapped leave nodes + :param include_self: whether to include node itself + :return: """ - Available cell type universe versions loaded in this instance. + assert self.leaves is not None + ancestors = self.get_ancestors(node) + if include_self: + ancestors = ancestors + [node] + if return_type == "elements": + return [x for x in self.leaves if x in ancestors] + if return_type == "idx": + return np.array([i for i, (x, y) in enumerate(self.leaves) if x in ancestors]) + + +class OntologyDict(OntologyBase): + + def __init__(self, onto: dict): + self.onto = onto + + def set_leaves(self, nodes: list = None): + self.leaves = nodes + + def get_ancestors(self, node: str) -> List[str]: + return self.onto[node] if node in self.onto.keys() else [node] + + +class OntologyObo(OntologyBase): + + graph: networkx.MultiDiGraph + + def __init__(self, obo: str = "http://purl.obolibrary.org/obo/cl.obo", **kwargs): + self.graph = obonet.read_obo(obo) + self._check_graph() + def _check_graph(self): + # ToDo OBO from obolibrary is not DAG? + if not networkx.is_directed_acyclic_graph(self.graph): + warnings.warn("DAG was broken") + + @property + def nodes(self): + return self.graph.nodes() + + def set_leaves(self, nodes: list = None): + # ToDo check that these are not include parents of each other! + if nodes is not None: + for x in nodes: + assert x in self.graph.nodes, f"{x} not found" + self.leaves = nodes + else: + self.leaves = self.get_all_roots() + + def get_all_roots(self) -> List[str]: + return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] + + def get_ancestors(self, node: str) -> List[str]: + return list(networkx.ancestors(self.graph, node)) + + def map_class_to_id(self, x): + """ + Map ontology class to ID. + :param x: :return: """ - return self.celltype_universe.keys() + assert False # ToDo - def _check_version(self, version: str): - if version not in self.celltype_universe.keys(): - raise ValueError("Version %s not found. Check self.version for available versions." % version) + def map_id_to_class(self, x): + """ + Map ontology ID to class. + :param x: + :return: + """ + assert False # ToDo - def set_version( + def fuzzy_match_nodes( self, - version: str - ): + source, + match_only: bool = False, + include_old: bool = False, + include_synonyms: bool = True, + remove: list = [] + ) -> pd.DataFrame: """ - Set a cell type universe version for this instance. + Map free text node names to ontology node names. - :param version: Full version string "a.b.c" or celltype version "a". + :param source: Free text node labels which are to be matched to ontology nodes. + :param match_only: Whether to include strict matches only in output. + :param include_old: Whether to include previous (free text) node label in output. + :param include_synonyms: Whether to include synonym nodes. + :param remove: Free text node labels to omit in map. + :return: Table with source and target node names. Columns: "source", "target" + """ + from fuzzywuzzy import fuzz + matches = [] + nodes = [(k, v) for k, v in self.graph.nodes.items()] + include = [] + if isinstance(source, pd.DataFrame): + source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) + for x in source: + if not isinstance(x, list) and not isinstance(x, tuple): + x = [x, "nan"] + scores = np.array([ + np.max([ + fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + ] + [ + fuzz.ratio(x[0].lower().strip("'").strip("\"").strip("]").strip("["), yy.lower()) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + np.max([ + fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + ]) + if "name" in y[1].keys() else 0 # ToDo: these are empty nodes, where are they coming from? + for y in nodes + ]) + include.append(x[0].lower().strip("'").strip("\"") not in remove) + if match_only: + matches.append(np.any(scores == 100)) # perfect match + else: + if np.any(scores == 100): + matches.append([(nodes[i][1]["name"], nodes[i][0]) for i in np.where(scores == 100)[0]]) + else: + matchesi = [( + nodes[i][1]["name"] + "[" + ";".join([ + yy.strip("'").strip("\"").strip("]").strip("[") + for yy in nodes[i][1]["synonym"] + ]) + "}" + if "synonym" in nodes[i][1].keys() and include_synonyms else nodes[i][1]["name"], + nodes[i][0] + ) for i in np.argsort(scores)[-10:]] + if include_old: + matchesi = matchesi + [(x[0].upper(), x[1])] + matches.append(matchesi) + if match_only: + tab = pd.DataFrame({"source": source, "target": matches}) + else: + tab = pd.DataFrame({ + "source": source, + "target": [" ".join([",".join(zz) for zz in z]) for z in matches] + }) + return tab.loc[include] + + +class OntologyExtendedObo(OntologyObo): + """ + Basic .obo ontology extended by additional nodes and edges without breaking DAG. + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.add_extension(dict_ontology=ONTOLOGIY_EXTENSION_HUMAN) # ToDo distinguish here + + def add_extension(self, dict_ontology: Dict[str, List[str]]): + """ + Extend ontology by additional edges and nodes defined in a dictionary. + + Checks that DAG is not broken after graph assembly. + + :param dict_ontology: Dictionary of nodes and edges to add to ontology. Parsing: + + - keys: parent nodes (which must be in ontology) + - values: children nodes (which can be in ontology), must be given as list of stringd. + If these are in the ontology, an edge is added, otherwise, an edge and the node are added. :return: """ - if len(version.split(".")) == 3: - version = version.split(".")[0] - self._check_version(version=version) - self.version = version - elif len(version.split(".")) == 1: - self._check_version(version=version) - self.version = version + for k, v in dict_ontology.items(): + assert isinstance(v, list), "dictionary values should be list of strings" + # Check that parent node is present: + if k not in self.nodes: + raise ValueError(f"key {k} was not in reference ontology") + # Check if edge is added only, or edge and node. + for child_node in v: + if child_node not in self.nodes: # Add node. + self.graph.add_node(child_node) + # Add edge. + self.graph.add_edge(k, child_node) + # Check that DAG was not broken: + self._check_graph() + + +class CelltypeUniverse: + """ + Cell type universe (list) and ontology (hierarchy) container class. + + + Basic checks on the organ specific instance are performed in the constructor. + """ + ontology: OntologyBase + _target_universe: Union[List[str], None] + + def __init__(self, organism: str, **kwargs): + """ + + :param organism: Organism, defines ontology extension used. + :param kwargs: + """ + self.onto = OntologyExtendedObo(**kwargs) + self._target_universe = None + self._set_extension(organism=organism) + + def _set_extension(self, organism): + """ + + :param organism: Organism, defines ontology extension used. + """ + if organism == "human": + self.onto.add_extension(ONTOLOGIY_EXTENSION_HUMAN) + elif organism == "mouse": + self.onto.add_extension(ONTOLOGIY_EXTENSION_MOUSE) else: - raise ValueError("version supplied should be either in format `a.b.c` or `a`") + raise ValueError(f"organism {organism} not found") @property - def ids(self): + def target_universe(self): """ - List of all loaders understandable cell type names of this instance. + Ontology classes of target universe (understandable cell type names). :return: """ - return self._ids(self.version) + return self._target_universe - def _ids(self, version: str): - return [x[0] for x in self.celltype_universe[version]] + @target_universe.setter + def target_universe(self, x: List[str]): + # Check that all nodes are valid: + for xx in x: + if xx not in self.onto.nodes: + raise ValueError(f"cell type {xx} was not in ontology") + # Default universe is the full set of leave nodes of ontology: + self.target_universe = self.onto.leaves + self.onto.set_leaves(self.target_universe) @property - def ontology_ids(self): + def target_universe_ids(self): """ - List of all cell type IDs (based on an ontology ID scheme) of this instance. + Ontology IDs of target universe (codified cell type names). :return: """ - return self._ontology_ids(self.version) - - def _ontology_ids(self, version: str): - return [x[1] for x in self.celltype_universe[version]] + return [self.onto.map_class_to_id(x) for x in self._target_universe] @property def ntypes(self): """ - Number of different cell types in this instance. + Number of different cell types in target universe. + """ + return len(self.target_universe) + + def __validate_target_universe_table(self, tab: pd.DataFrame): + assert len(tab.columns) == 2 + assert tab.columns[0] == "name" and tab.columns[1] == "id" + + def load_target_universe(self, organ): + """ + :param organ: Anatomic structure to load target universe for. :return: """ - return self._ntypes(self.version) + # ToDo: Use pydoc based query of universes stored in ./target_universes/.. + tab = None + self.__validate_target_universe_table(tab=tab) + self.target_universe = None # ToDo - def _ntypes(self, version: str): - return len(self.celltype_universe[version]) + def read_target_universe_csv(self, fn): + """ - def to_csv( + :param fn: File containing target universe. + :return: + """ + tab = pd.read_csv(fn) + self.__validate_target_universe_table(tab=tab) + self.target_universe = tab["name"].values + + def map_to_target_leaves( self, - fn: str + nodes: List[str], + return_type: str = "elements" ): - pass + """ + Map a given list of nodes to leave nodes defined for this ontology. + :param nodes: + :param return_type: + + "elements": names of mapped leave nodes + "idx": indices in leave note list of of mapped leave nodes + :return: + """ + return [self.onto.map_to_leaves(x, return_type=return_type) for x in nodes] diff --git a/sfaira/versions/celltype_versions/extensions/__init__.py b/sfaira/versions/celltype_versions/extensions/__init__.py new file mode 100644 index 000000000..ff1f8ff55 --- /dev/null +++ b/sfaira/versions/celltype_versions/extensions/__init__.py @@ -0,0 +1,2 @@ +from .obo_extension_human import ONTOLOGIY_EXTENSION_HUMAN +from .obo_extension_mouse import ONTOLOGIY_EXTENSION_MOUSE diff --git a/sfaira/versions/celltype_versions/extensions/obo_extension_human.py b/sfaira/versions/celltype_versions/extensions/obo_extension_human.py new file mode 100644 index 000000000..8a4b683e7 --- /dev/null +++ b/sfaira/versions/celltype_versions/extensions/obo_extension_human.py @@ -0,0 +1 @@ +ONTOLOGIY_EXTENSION_HUMAN = {} diff --git a/sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py b/sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py new file mode 100644 index 000000000..af93a79db --- /dev/null +++ b/sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py @@ -0,0 +1 @@ +ONTOLOGIY_EXTENSION_MOUSE = {} diff --git a/sfaira/versions/celltype_versions/human/__init__.py b/sfaira/versions/celltype_versions/human/__init__.py index bf89eb2b9..e69de29bb 100644 --- a/sfaira/versions/celltype_versions/human/__init__.py +++ b/sfaira/versions/celltype_versions/human/__init__.py @@ -1,91 +0,0 @@ -from .adipose import CelltypeVersionsHumanAdipose -from .adrenalgland import CelltypeVersionsHumanAdrenalgland -from .mixed import CelltypeVersionsHumanMixed -from .artery import CelltypeVersionsHumanArtery -from .bladder import CelltypeVersionsHumanBladder -from .blood import CelltypeVersionsHumanBlood -from .bone import CelltypeVersionsHumanBone -from .brain import CelltypeVersionsHumanBrain -from .calvaria import CelltypeVersionsHumanCalvaria -from .cervix import CelltypeVersionsHumanCervix -from .chorionicvillus import CelltypeVersionsHumanChorionicvillus -from .colon import CelltypeVersionsHumanColon -from .duodenum import CelltypeVersionsHumanDuodenum -from .epityphlon import CelltypeVersionsHumanEpityphlon -from .esophagus import CelltypeVersionsHumanEsophagus -from .eye import CelltypeVersionsHumanEye -from .fallopiantube import CelltypeVersionsHumanFallopiantube -from .femalegonad import CelltypeVersionsHumanFemalegonad -from .gallbladder import CelltypeVersionsHumanGallbladder -from .heart import CelltypeVersionsHumanHeart -from .hesc import CelltypeVersionsHumanHesc -from .ileum import CelltypeVersionsHumanIleum -from .jejunum import CelltypeVersionsHumanJejunum -from .kidney import CelltypeVersionsHumanKidney -from .liver import CelltypeVersionsHumanLiver -from .lung import CelltypeVersionsHumanLung -from .malegonad import CelltypeVersionsHumanMalegonad -from .muscle import CelltypeVersionsHumanMuscle -from .omentum import CelltypeVersionsHumanOmentum -from .pancreas import CelltypeVersionsHumanPancreas -from .placenta import CelltypeVersionsHumanPlacenta -from .pleura import CelltypeVersionsHumanPleura -from .prostate import CelltypeVersionsHumanProstate -from .rectum import CelltypeVersionsHumanRectum -from .rib import CelltypeVersionsHumanRib -from .skin import CelltypeVersionsHumanSkin -from .spinalcord import CelltypeVersionsHumanSpinalcord -from .spleen import CelltypeVersionsHumanSpleen -from .stomach import CelltypeVersionsHumanStomach -from .thymus import CelltypeVersionsHumanThymus -from .thyroid import CelltypeVersionsHumanThyroid -from .trachea import CelltypeVersionsHumanTrachea -from .ureter import CelltypeVersionsHumanUreter -from .uterus import CelltypeVersionsHumanUterus - - -ORGAN_DICT = { - 'adipose': CelltypeVersionsHumanAdipose(), - 'adrenalgland': CelltypeVersionsHumanAdrenalgland(), - 'artery': CelltypeVersionsHumanArtery(), - 'bladder': CelltypeVersionsHumanBladder(), - 'blood': CelltypeVersionsHumanBlood(), - 'bone': CelltypeVersionsHumanBone(), - 'brain': CelltypeVersionsHumanBrain(), - 'calvaria': CelltypeVersionsHumanCalvaria(), - 'cervix': CelltypeVersionsHumanCervix(), - 'chorionicvillus': CelltypeVersionsHumanChorionicvillus(), - 'colon': CelltypeVersionsHumanColon(), - 'duodenum': CelltypeVersionsHumanDuodenum(), - 'epityphlon': CelltypeVersionsHumanEpityphlon(), - 'esophagus': CelltypeVersionsHumanEsophagus(), - 'eye': CelltypeVersionsHumanEye(), - 'fallopiantube': CelltypeVersionsHumanFallopiantube(), - 'femalegonad': CelltypeVersionsHumanFemalegonad(), - 'gallbladder': CelltypeVersionsHumanGallbladder(), - 'heart': CelltypeVersionsHumanHeart(), - 'hesc': CelltypeVersionsHumanHesc(), - 'ileum': CelltypeVersionsHumanIleum(), - 'jejunum': CelltypeVersionsHumanJejunum(), - 'kidney': CelltypeVersionsHumanKidney(), - 'liver': CelltypeVersionsHumanLiver(), - 'lung': CelltypeVersionsHumanLung(), - 'malegonad': CelltypeVersionsHumanMalegonad(), - 'muscle': CelltypeVersionsHumanMuscle(), - 'omentum': CelltypeVersionsHumanOmentum(), - 'pancreas': CelltypeVersionsHumanPancreas(), - 'placenta': CelltypeVersionsHumanPlacenta(), - 'pleura': CelltypeVersionsHumanPleura(), - 'prostate': CelltypeVersionsHumanProstate(), - 'rectum': CelltypeVersionsHumanRectum(), - 'rib': CelltypeVersionsHumanRib(), - 'skin': CelltypeVersionsHumanSkin(), - 'spinalcord': CelltypeVersionsHumanSpinalcord(), - 'spleen': CelltypeVersionsHumanSpleen(), - 'stomach': CelltypeVersionsHumanStomach(), - 'thymus': CelltypeVersionsHumanThymus(), - 'thyroid': CelltypeVersionsHumanThyroid(), - 'trachea': CelltypeVersionsHumanTrachea(), - 'ureter': CelltypeVersionsHumanUreter(), - 'uterus': CelltypeVersionsHumanUterus() -} diff --git a/sfaira/versions/celltype_versions/human/adipose.py b/sfaira/versions/celltype_versions/human/adipose.py deleted file mode 100644 index 6b0447e8a..000000000 --- a/sfaira/versions/celltype_versions/human/adipose.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_ADIPOSE_V0 = [ - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Fibroblast', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_ADIPOSE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanAdipose(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_ADIPOSE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_ADIPOSE_V0 - } - super(CelltypeVersionsHumanAdipose, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/adrenalgland.py b/sfaira/versions/celltype_versions/human/adrenalgland.py deleted file mode 100644 index 4603ecf3d..000000000 --- a/sfaira/versions/celltype_versions/human/adrenalgland.py +++ /dev/null @@ -1,66 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_ADRENALGLAND_V0 = [ - ['Adrenal gland inflammatory cell', "nan"], - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal acinar cell', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Goblet cell', "nan"], - ['Hepatocyte/Endodermal cell', "nan"], - ['Immature sertoli cell (Pre-Sertoli cell)', "nan"], - ['Kidney intercalated cell', "nan"], - ['Loop of Henle', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proximal tubule progenitor', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Ureteric bud cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_ADRENALGLAND_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanAdrenalgland(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_ADRENALGLAND_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_ADRENALGLAND_V0 - } - super(CelltypeVersionsHumanAdrenalgland, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/artery.py b/sfaira/versions/celltype_versions/human/artery.py deleted file mode 100644 index f549b2fca..000000000 --- a/sfaira/versions/celltype_versions/human/artery.py +++ /dev/null @@ -1,39 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_ARTERY_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Fibroblast', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mesothelial cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Neutrophil', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_ARTERY_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanArtery(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_ARTERY_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_ARTERY_V0 - } - super(CelltypeVersionsHumanArtery, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/bladder.py b/sfaira/versions/celltype_versions/human/bladder.py deleted file mode 100644 index 21ad053db..000000000 --- a/sfaira/versions/celltype_versions/human/bladder.py +++ /dev/null @@ -1,44 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_BLADDER_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal fibroblast', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Intermediated cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_BLADDER_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanBladder(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_BLADDER_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_BLADDER_V0 - } - super(CelltypeVersionsHumanBladder, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/blood.py b/sfaira/versions/celltype_versions/human/blood.py deleted file mode 100644 index ada92871b..000000000 --- a/sfaira/versions/celltype_versions/human/blood.py +++ /dev/null @@ -1,40 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_BLOOD_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_BLOOD_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanBlood(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_BLOOD_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_BLOOD_V0 - } - super(CelltypeVersionsHumanBlood, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/bone.py b/sfaira/versions/celltype_versions/human/bone.py deleted file mode 100644 index 11cd9dada..000000000 --- a/sfaira/versions/celltype_versions/human/bone.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_BONE_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_BONE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanBone(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_BONE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_BONE_V0 - } - super(CelltypeVersionsHumanBone, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/brain.py b/sfaira/versions/celltype_versions/human/brain.py deleted file mode 100644 index 0bea539e2..000000000 --- a/sfaira/versions/celltype_versions/human/brain.py +++ /dev/null @@ -1,64 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_BRAIN_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['Astrocytes 1', "nan"], - ['Astrocytes 2', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cells', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['GABAergic interneurons 1', "nan"], - ['GABAergic interneurons 2', "nan"], - ['Gastric endocrine cell', "nan"], - ['Glutamatergic neurons from the PFC 1', "nan"], - ['Glutamatergic neurons from the PFC 2', "nan"], - ['Goblet cell', "nan"], - ['Granule neurons from the hip dentate gyrus region', "nan"], - ['Macrophage', "nan"], - ['Microglia', "nan"], - ['Monocyte', "nan"], - ['Neuronal stem cells', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Oligodendrocyte precursors', "nan"], - ['Oligodendrocytes', "nan"], - ['Primordial germ cell', "nan"], - ['Pyramidal neurons from the hip CA region 1', "nan"], - ['Pyramidal neurons from the hip CA region 2', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Unknown', "nan"] -] -ONTOLOGIES_HUMAN_BRAIN_V0 = { - "names": { - 'Astrocyte': ['Astrocytes 1', 'Astrocytes 2'], - 'Fetal Neuron': ['Glutamatergic neurons from the PFC 1', 'Glutamatergic neurons from the PFC 2', - 'Granule neurons from the hip dentate gyrus region', 'GABAergic interneurons 1', - 'GABAergic interneurons 2', 'Pyramidal neurons from the hip CA region 1', 'Pyramidal neurons from the hip CA region 2'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanBrain(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_BRAIN_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_BRAIN_V0 - } - super(CelltypeVersionsHumanBrain, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/calvaria.py b/sfaira/versions/celltype_versions/human/calvaria.py deleted file mode 100644 index f3e528a08..000000000 --- a/sfaira/versions/celltype_versions/human/calvaria.py +++ /dev/null @@ -1,42 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_CALVARIA_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Kidney intercalated cell', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_CALVARIA_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanCalvaria(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_CALVARIA_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_CALVARIA_V0 - } - super(CelltypeVersionsHumanCalvaria, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/cervix.py b/sfaira/versions/celltype_versions/human/cervix.py deleted file mode 100644 index db7ebeb0f..000000000 --- a/sfaira/versions/celltype_versions/human/cervix.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_CERVIX_V0 = [ - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fibroblast', "nan"], - ['Loop of Henle', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_CERVIX_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanCervix(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_CERVIX_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_CERVIX_V0 - } - super(CelltypeVersionsHumanCervix, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/chorionicvillus.py b/sfaira/versions/celltype_versions/human/chorionicvillus.py deleted file mode 100644 index aa5575608..000000000 --- a/sfaira/versions/celltype_versions/human/chorionicvillus.py +++ /dev/null @@ -1,43 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_CHORIONICVILLUS_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Loop of Henle', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_CHORIONICVILLUS_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanChorionicvillus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_CHORIONICVILLUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_CHORIONICVILLUS_V0 - } - super(CelltypeVersionsHumanChorionicvillus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/colon.py b/sfaira/versions/celltype_versions/human/colon.py deleted file mode 100644 index 61e7e7c2f..000000000 --- a/sfaira/versions/celltype_versions/human/colon.py +++ /dev/null @@ -1,100 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_COLON_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell IgA Plasma', "nan"], - ['B cell IgG Plasma', "nan"], - ['B cell cycling', "nan"], - ['B cell memory', "nan"], - ['Best4+ Enterocytes', "nan"], - ['CD4+ Memory', "nan"], - ['CD4+ PD1+', "nan"], - ['CD4+ T Activated Fos-hi', "nan"], - ['CD4+ T Activated Fos-lo', "nan"], - ['CD69+ Mast', "nan"], - ['CD69- Mast', "nan"], - ['CD8 T', "nan"], - ['CD8+ IELs', "nan"], - ['CD8+ IL17+', "nan"], - ['CD8+ LP', "nan"], - ['Cycling T', "nan"], - ['Cycling TA', "nan"], - ['DC1', "nan"], - ['DC2', "nan"], - ['Endothelial', "nan"], - ['Enterocyte Progenitors', "nan"], - ['Enterocytes', "nan"], - ['Enteroendocrine cells', "nan"], - ['Erythroid cell', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fibroblast', "nan"], - ['Follicular', "nan"], - ['Glial cells', "nan"], - ['Goblet cells', "nan"], - ['ILC', "nan"], - ['Immature Enterocytes 1', "nan"], - ['Immature Enterocytes 2', "nan"], - ['Immature Goblet', "nan"], - ['LYVE1 Macrophage', "nan"], - ['Lymphoid DC', "nan"], - ['M cells', "nan"], - ['MT-hi', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Myofibroblasts', "nan"], - ['NK', "nan"], - ['Neutrophil', "nan"], - ['Paneth cells', "nan"], - ['Pericytes', "nan"], - ['Primordial germ cell', "nan"], - ['Secretory TA', "nan"], - ['Smooth Muscle', "nan"], - ['Stem cells', "nan"], - ['Stromal', "nan"], - ['TA 1', "nan"], - ['TA 2', "nan"], - ['Tcm', "nan"], - ['Tfh', "nan"], - ['Th1', "nan"], - ['Th17', "nan"], - ['Treg', "nan"], - ['Tregs', "nan"], - ['Tuft', "nan"], - ['WNT2B+ Fos-lo 1', "nan"], - ['WNT5B+ 2', "nan"], - ['cycling DCs', "nan"], - ['cycling gd T', "nan"], - ['gd T', "nan"], - ['pDC', "nan"] -] -ONTOLOGIES_HUMAN_COLON_V0 = { - "names": { - 'Plasma Cells': ['B cell IgA Plasma', 'B cell IgG Plasma'], - 'Macrophage': ['LYVE1 Macrophage', 'Macrophage'], - 'Enterocytes': ['Enterocytes', 'Best4+ Enterocytes'], - 'TA': ['Cycling TA', 'TA 1', 'TA 2', 'Secretory TA'], - 'Activated CD4 T': ['CD4+ T Activated Fos-hi', 'CD4+ T Activated Fos-lo'], - 'Fetal enterocyte': ['Immature Enterocytes 1', 'Immature Enterocytes 2'], - 'B cell (Plasmocyte)': ['B cell IgA Plasma', 'B cell IgG Plasma'], - 'Mast cell': ['CD69+ Mast', 'CD69- Mast'], - 'Dendritic cell': ['DC1', 'DC2'], - 'B cell': ['B cell cycling', 'B cell memory', 'Follicular'], - 'T cell': ['Treg', 'Cycling T', 'CD4+ T Activated Fos-hi', 'CD4+ T Activated Fos-lo', 'Tcm', 'Tfh', 'Th1', 'Th17', 'cycling gd T', 'gd T'], - 'Epithelial cell': ['Enterocytes', 'Goblet cells', 'Enteroendocrine cells', 'Tuft'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanColon(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_COLON_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_COLON_V0 - } - super(CelltypeVersionsHumanColon, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/duodenum.py b/sfaira/versions/celltype_versions/human/duodenum.py deleted file mode 100644 index 2faeb5062..000000000 --- a/sfaira/versions/celltype_versions/human/duodenum.py +++ /dev/null @@ -1,39 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_DUODENUM_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fibroblast', "nan"], - ['Goblet cell', "nan"], - ['Hepatocyte/Endodermal cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_DUODENUM_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanDuodenum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_DUODENUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_DUODENUM_V0 - } - super(CelltypeVersionsHumanDuodenum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/epityphlon.py b/sfaira/versions/celltype_versions/human/epityphlon.py deleted file mode 100644 index 3341f735e..000000000 --- a/sfaira/versions/celltype_versions/human/epityphlon.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_EPITYPHLON_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_EPITYPHLON_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanEpityphlon(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_EPITYPHLON_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_EPITYPHLON_V0 - } - super(CelltypeVersionsHumanEpityphlon, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/esophagus.py b/sfaira/versions/celltype_versions/human/esophagus.py deleted file mode 100644 index 3ad9baf9e..000000000 --- a/sfaira/versions/celltype_versions/human/esophagus.py +++ /dev/null @@ -1,58 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_ESOPHAGUS_V0 = [ - ['B cell (Plasmocyte)', "nan"], - ['B_CD27neg', "nan"], - ['B_CD27pos', "nan"], - ['Basal cell', "nan"], - ['Blood_vessel', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epi_dividing', "nan"], - ['Epi_suprabasal', "nan"], - ['Epi_upper', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Glands_duct', "nan"], - ['Glands_mucous', "nan"], - ['Loop of Henle', "nan"], - ['Lymph_vessel', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['NK_T_CD8_Cytotoxic', "nan"], - ['Neutrophil', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T_CD4', "nan"], - ['T_CD8', "nan"] -] -ONTOLOGIES_HUMAN_ESOPHAGUS_V0 = { - "names": { - "Mono_macro": ["Monocyte", "Macrophage"], - "B cell": ['B_CD27neg', 'B_CD27pos'], - "T cell": ["T_CD4", "T_CD8", "NK_T_CD8_Cytotoxic"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanEsophagus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_ESOPHAGUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_ESOPHAGUS_V0 - } - super(CelltypeVersionsHumanEsophagus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/external.py b/sfaira/versions/celltype_versions/human/external.py deleted file mode 100644 index cf6bc6d79..000000000 --- a/sfaira/versions/celltype_versions/human/external.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.versions.celltype_versions import CelltypeVersionsBase diff --git a/sfaira/versions/celltype_versions/human/eye.py b/sfaira/versions/celltype_versions/human/eye.py deleted file mode 100644 index 66afcbdbd..000000000 --- a/sfaira/versions/celltype_versions/human/eye.py +++ /dev/null @@ -1,68 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_EYE_V0 = [ - ['Amacrine cell', "nan"], - ['Antigen presenting cell (RPS high)', "nan"], - ['B-cell', "nan"], - ['Basal cell', "nan"], - ['CB CD34_pos', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Horizontal cells', "nan"], - ['Macroglia', "nan"], - ['Macrophage', "nan"], - ['Mast-cell', "nan"], - ['Melanocyte', "nan"], - ['Microglia', "nan"], - ['Muller cell', "nan"], - ['Pericyte', "nan"], - ['Primordial germ cell', "nan"], - ['Retinal bipolar neuron type A', "nan"], - ['Retinal bipolar neuron type B', "nan"], - ['Retinal bipolar neuron type C', "nan"], - ['Retinal bipolar neuron type D', "nan"], - ['Retinal cone cell', "nan"], - ['Retinal ganglion cell', "nan"], - ['Retinal pigment epithelium', "nan"], - ['Retinal rod cell type A', "nan"], - ['Retinal rod cell type B', "nan"], - ['Retinal rod cell type C', "nan"], - ['Schwann1', "nan"], - ['Schwann2', "nan"], - ['Stratified epithelial cell', "nan"], - ['T cell', "nan"], - ['T/NK-cell', "nan"], - ['Unknown', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_EYE_V0 = { - "names": { - 'BPs': ['Retinal bipolar neuron type A', 'Retinal bipolar neuron type B', 'Retinal bipolar neuron type C', 'Retinal bipolar neuron type D'], - 'Rods': ['Retinal rod cell type A', 'Retinal rod cell type B', 'Retinal rod cell type C', ] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanEye(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_EYE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_EYE_V0 - } - super(CelltypeVersionsHumanEye, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/fallopiantube.py b/sfaira/versions/celltype_versions/human/fallopiantube.py deleted file mode 100644 index 149a341c6..000000000 --- a/sfaira/versions/celltype_versions/human/fallopiantube.py +++ /dev/null @@ -1,38 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_FALLOPIANTUBE_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fibroblast', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_FALLOPIANTUBE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanFallopiantube(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_FALLOPIANTUBE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_FALLOPIANTUBE_V0 - } - super(CelltypeVersionsHumanFallopiantube, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/femalegonad.py b/sfaira/versions/celltype_versions/human/femalegonad.py deleted file mode 100644 index b0ccc1c3b..000000000 --- a/sfaira/versions/celltype_versions/human/femalegonad.py +++ /dev/null @@ -1,48 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_FEMALEGONAD_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Immature sertoli cell (Pre-Sertoli cell)', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_FEMALEGONAD_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanFemalegonad(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_FEMALEGONAD_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_FEMALEGONAD_V0 - } - super(CelltypeVersionsHumanFemalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/gallbladder.py b/sfaira/versions/celltype_versions/human/gallbladder.py deleted file mode 100644 index e8786816c..000000000 --- a/sfaira/versions/celltype_versions/human/gallbladder.py +++ /dev/null @@ -1,44 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_GALLBLADDER_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Goblet cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Neutrophil', "nan"], - ['Primordial germ cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_GALLBLADDER_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanGallbladder(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_GALLBLADDER_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_GALLBLADDER_V0 - } - super(CelltypeVersionsHumanGallbladder, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/heart.py b/sfaira/versions/celltype_versions/human/heart.py deleted file mode 100644 index 1b5d4a81c..000000000 --- a/sfaira/versions/celltype_versions/human/heart.py +++ /dev/null @@ -1,54 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_HEART_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Ventricle cardiomyocyte', "nan"] -] -ONTOLOGIES_HUMAN_HEART_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanHeart(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_HEART_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_HEART_V0 - } - super(CelltypeVersionsHumanHeart, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/hesc.py b/sfaira/versions/celltype_versions/human/hesc.py deleted file mode 100644 index 56a9aa838..000000000 --- a/sfaira/versions/celltype_versions/human/hesc.py +++ /dev/null @@ -1,25 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_HESC_V0 = [ - ['Fetal epithelial progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_HESC_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanHesc(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_HESC_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_HESC_V0 - } - super(CelltypeVersionsHumanHesc, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/ileum.py b/sfaira/versions/celltype_versions/human/ileum.py deleted file mode 100644 index fb51c3a12..000000000 --- a/sfaira/versions/celltype_versions/human/ileum.py +++ /dev/null @@ -1,55 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_ILEUM_V0 = [ - ['ACKR1+ endothelium', "nan"], - ['B cells', "nan"], - ['CD36+ endothelium', "nan"], - ['Cycling', "nan"], - ['Dendritic cell', "nan"], - ['Enterocytes', "nan"], - ['Enteroendocrine cells', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblasts', "nan"], - ['Glial cells', "nan"], - ['Goblet cells', "nan"], - ['Hepatocyte/Endodermal cell', "nan"], - ['ILC', "nan"], - ['Lymphatics', "nan"], - ['M2 Macrophage', "nan"], - ['MNP', "nan"], - ['Macrophage', "nan"], - ['Mast cells', "nan"], - ['Monocyte', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Paneth cells', "nan"], - ['Pericytes', "nan"], - ['Plasma Cells', "nan"], - ['Progenitors', "nan"], - ['Smooth muscle cell', "nan"], - ['Stem Cell', "nan"], - ['Stromal cell', "nan"], - ['T cells', "nan"], - ['TA', "nan"] -] -ONTOLOGIES_HUMAN_ILEUM_V0 = { - "names": { - 'Endothelial cell': ['ACKR1+ endothelium', 'CD36+ endothelium'], - 'Epithelial cell': ['Goblet cells', 'Enterocytes', 'Paneth cells', 'Enteroendocrine cells'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanIleum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_ILEUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_ILEUM_V0 - } - super(CelltypeVersionsHumanIleum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/jejunum.py b/sfaira/versions/celltype_versions/human/jejunum.py deleted file mode 100644 index 2aef7f5d3..000000000 --- a/sfaira/versions/celltype_versions/human/jejunum.py +++ /dev/null @@ -1,37 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_JEJUNUM_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fibroblast', "nan"], - ['Hepatocyte/Endodermal cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_JEJUNUM_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanJejunum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_JEJUNUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_JEJUNUM_V0 - } - super(CelltypeVersionsHumanJejunum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/kidney.py b/sfaira/versions/celltype_versions/human/kidney.py deleted file mode 100644 index c1ba3b0ad..000000000 --- a/sfaira/versions/celltype_versions/human/kidney.py +++ /dev/null @@ -1,147 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_KIDNEY_V0 = [ - ['Acinar cell', "nan"], - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['CD4 T cell', "nan"], - ['CD8 T cell', "nan"], - ['CNT/PC - proximal UB', "nan"], - ['Cap mesenchyme', "nan"], - ['Chondrocyte', "nan"], - ['Collecting Duct - Intercalated Cells Type A (cortex)', "nan"], - ['Collecting Duct - Intercalated Cells Type A (medulla)', "nan"], - ['Collecting Duct - Intercalated Cells Type B', "nan"], - ['Collecting Duct - PCs - Stressed Dissoc Subset', "nan"], - ['Collecting Duct - Principal Cells (cortex)', "nan"], - ['Collecting Duct - Principal Cells (medulla)', "nan"], - ['Connecting tubule', "nan"], - ['Decending Limb', "nan"], - ['Distal Convoluted Tubule', "nan"], - ['Distal S shaped body', "nan"], - ['Distal renal vesicle', "nan"], - ['Distinct proximal tubule 1', "nan"], - ['Distinct proximal tubule 2', "nan"], - ['Endocrine cell', "nan"], - ['Endothelial Cells (unassigned)', "nan"], - ['Endothelial Cells - AEA & DVR', "nan"], - ['Endothelial Cells - AVR', "nan"], - ['Endothelial Cells - glomerular capillaries', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte ', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial Cells (unassigned)', "nan"], - ['Epithelial progenitor', "nan"], - ['Erythroid', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Indistinct intercalated cell', "nan"], - ['Innate like lymphocyte', "nan"], - ['Intermediated cell', "nan"], - ['Interstitium', "nan"], - ['Loop of Henle', "nan"], - ['M2 Macrophage', "nan"], - ['MNP-a/classical monocyte derived', "nan"], - ['MNP-b/non-classical monocyte derived', "nan"], - ['MNP-c/dendritic cell', "nan"], - ['MNP-d/Tissue macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mast cells', "nan"], - ['Medial S shaped body', "nan"], - ['Megakaryocyte', "nan"], - ['Mesangial Cells', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Myofibroblast', "nan"], - ['NK cell', "nan"], - ['NKT cell', "nan"], - ['Neuron', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Pelvic epithelium', "nan"], - ['Pelvic epithelium - distal UB', "nan"], - ['Peritubular capillary endothelium 1', "nan"], - ['Peritubular capillary endothelium 2', "nan"], - ['Plasmacytoid dendritic cell', "nan"], - ['Podocyte', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating B cell', "nan"], - ['Proliferating NK cell', "nan"], - ['Proliferating Proximal Tubule', "nan"], - ['Proliferating T cell', "nan"], - ['Proliferating cDC2', "nan"], - ['Proliferating cap mesenchyme', "nan"], - ['Proliferating distal renal vesicle', "nan"], - ['Proliferating fibroblast', "nan"], - ['Proliferating macrophage', "nan"], - ['Proliferating monocyte', "nan"], - ['Proliferating myofibroblast', "nan"], - ['Proliferating stroma progenitor', "nan"], - ['Proximal S shaped body', "nan"], - ['Proximal Tubule Epithelial Cells (S1)', "nan"], - ['Proximal Tubule Epithelial Cells (S2)', "nan"], - ['Proximal Tubule Epithelial Cells (S3)', "nan"], - ['Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', "nan"], - ['Proximal Tubule Epithelial Cells - Stress/Inflam', "nan"], - ['Proximal UB', "nan"], - ['Proximal renal vesicle', "nan"], - ['Proximal tubule progenitor', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Skeletal muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stroma progenitor', "nan"], - ['Stromal cell', "nan"], - ['Thick ascending limb of Loop of Henle', "nan"], - ['Thin ascending limb', "nan"], - ['Transitional urothelium', "nan"], - ['Unknown - Novel PT CFH+ Subpopulation (S2)', "nan"], - ['Vascular Smooth Muscle Cells and pericytes', "nan"], - ['cDC1', "nan"], - ['cDC2', "nan"], - ['hESC', "nan"], - ['pDC', "nan"] -] -ONTOLOGIES_HUMAN_KIDNEY_V0 = { - "names": { - 'Type A intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', - 'Collecting Duct - Intercalated Cells Type A (medulla)'], - 'Principal cell': ['Collecting Duct - PCs - Stressed Dissoc Subset', - 'Collecting Duct - Principal Cells (cortex)', - 'Collecting Duct - Principal Cells (medulla)'], - 'Proximal tubule': ['Proximal Tubule Epithelial Cells (S1)', - 'Proximal Tubule Epithelial Cells (S2)', - 'Proximal Tubule Epithelial Cells (S3)', - 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', - 'Proximal Tubule Epithelial Cells - Stress/Inflam'], - 'Dendritic cell': ['MNP-c/dendritic cell', 'Plasmacytoid dendritic cell'], - 'Endothelial cell': ['Endothelial Cells (unassigned)', 'Endothelial Cells - AEA & DVR', 'Endothelial Cells - AVR', - 'Endothelial Cells - glomerular capillaries', 'Peritubular capillary endothelium 1', 'Peritubular capillary endothelium 2'], - 'Epithelial cell': ['Pelvic epithelium', 'Pelvic epithelium - distal UB', 'Proximal Tubule Epithelial Cells (S1)', - 'Proximal Tubule Epithelial Cells (S2)', 'Proximal Tubule Epithelial Cells (S3)', - 'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', 'Proximal Tubule Epithelial Cells - Stress/Inflam'], - 'Intercalated cell': ['Collecting Duct - Intercalated Cells Type A (cortex)', 'Collecting Duct - Intercalated Cells Type A (medulla)', - 'Collecting Duct - Intercalated Cells Type B', 'Indistinct intercalated cell'], - 'T cell': ['CD4 T cell', 'CD8 T cell'], - 'Ureteric bud cell': ['CNT/PC - proximal UB', 'Proximal UB', 'Pelvic epithelium - distal UB'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanKidney(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_KIDNEY_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_KIDNEY_V0 - } - super(CelltypeVersionsHumanKidney, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/liver.py b/sfaira/versions/celltype_versions/human/liver.py deleted file mode 100644 index d294a31cd..000000000 --- a/sfaira/versions/celltype_versions/human/liver.py +++ /dev/null @@ -1,100 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_LIVER_V0 = [ - ['Alpha beta T cells', "nan"], - ['Antigen presenting cell (RPS high)', "nan"], - ['CB CD34+', "nan"], - ['Central venous LSECs', "nan"], - ['Cholangiocytes', "nan"], - ['Dendritic cell 1', "nan"], - ['Dendritic cell 2', "nan"], - ['Dendritic cell precursor', "nan"], - ['Early Erythroid', "nan"], - ['Early lymphoid T lymphocyte', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte ', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial progenitor', "nan"], - ['Fibroblast', "nan"], - ['Gamma delta T cells 1', "nan"], - ['Gamma delta T cells 2', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['HSC MPP', "nan"], - ['Hepatic stellate cells', "nan"], - ['Hepatocyte 1', "nan"], - ['Hepatocyte 2', "nan"], - ['Hepatocyte 3', "nan"], - ['Hepatocyte 4', "nan"], - ['Hepatocyte 5', "nan"], - ['Hepatocyte 6', "nan"], - ['ILC', "nan"], - ['ILC precursor', "nan"], - ['Inflammatory macrophages', "nan"], - ['Kupffer Cell', "nan"], - ['Late Erythroid', "nan"], - ['Liver sinusoidal endothelial cells', "nan"], - ['MEMP', "nan"], - ['MP', "nan"], - ['Macrovascular endothelial cells', "nan"], - ['Mast cell', "nan"], - ['Mature B cells', "nan"], - ['Megakaryocyte', "nan"], - ['Mesenchyme', "nan"], - ['Mesothelia', "nan"], - ['Mid Erythroid', "nan"], - ['Mono Macrophage', "nan"], - ['Monocyte', "nan"], - ['Monocyte precursor', "nan"], - ['Myeloid cell', "nan"], - ['NK cell', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Neutrophil myeloid progenitor', "nan"], - ['Non inflammatory macrophages', "nan"], - ['Other endothelial cells', "nan"], - ['Pancreas exocrine cell', "nan"], - ['Periportal LSECs', "nan"], - ['Plasma B cell', "nan"], - ['Plasma cells', "nan"], - ['Pre pro B cell', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Unknown', "nan"], - ['VCAM1pos EI macrophage', "nan"], - ['pDendritic cell precursor', "nan"], - ['pre B cell', "nan"], - ['pro B cell', "nan"] -] -ONTOLOGIES_HUMAN_LIVER_V0 = { - "names": { - 'Erythroid cells': ['Early Erythroid', 'Mid Erythroid', 'Late Erythroid'], - 'Endothelial cell': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], - 'Hepatocyte': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], - 'Hepatocytes': ['Hepatocyte 1', 'Hepatocyte 2', 'Hepatocyte 3', 'Hepatocyte 4', 'Hepatocyte 5', 'Hepatocyte 6'], - 'Endothelia': ['Liver sinusoidal endothelial cells', 'Macrovascular endothelial cells', 'Other endothelial cells'], - 'Bcells': ['pro B cell', 'Pre pro B cell', 'Mature B cells', 'pre B cell', 'Plasma B cell'], - 'Tcells': ['Gamma delta T cells 2', 'Gamma delta T cells 1', 'Alpha beta T cells'], - 'pDCs': ['Dendritic cell 1', 'Dendritic cell 2'], - 'NK, NKT and T cells': ['NK cell', 'Alpha beta T cells', 'Gamma delta T cells 1', 'Gamma delta T cells 2'], - 'B Cell': ['pro B cell', 'Pre pro B cell', 'Mature B cells', 'pre B cell', 'Plasma B cell'], - 'T cell': ['Alpha beta T cells', 'Gamma delta T cells 1', 'Gamma delta T cells 2'], - 'Dendritic cell': ['Dendritic cell 1', 'Dendritic cell 2'], - 'B cell': ['pro B cell', 'Pre pro B cell', 'Mature B cells', 'pre B cell'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanLiver(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_LIVER_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_LIVER_V0 - } - super(CelltypeVersionsHumanLiver, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/lung.py b/sfaira/versions/celltype_versions/human/lung.py deleted file mode 100644 index ddcfd0777..000000000 --- a/sfaira/versions/celltype_versions/human/lung.py +++ /dev/null @@ -1,74 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_LUNG_V0 = [ - ['Cycling cells', "nan"], - ['Cartilage', "nan"], - ['Fetal airway progenitors', "nan"], - ['Mesothelium', "nan"], - ['AT1', "nan"], - ['AT2', "nan"], - ['Acinar', "nan"], - ['Airway smooth muscle', "nan"], - ['Arterial', "nan"], - ['B cell lineage', "nan"], - ['Basal', "nan"], - ['Bronchial Vessel 1', "nan"], - ['Bronchial Vessel 2', "nan"], - ['Capillary', "nan"], - ['Capillary Intermediate 1', "nan"], - ['Capillary Intermediate 2', "nan"], - ['Dendritic cells', "nan"], - ['Erythrocytes', "nan"], - ['Fibroblasts', "nan"], - ['Fibromyocyte', "nan"], - ['Innate lymphoid cells', "nan"], - ['KRT5-/KRT17+', "nan"], - ['Lymphatic EC', "nan"], - ['Macrophages', "nan"], - ['Mast cells', "nan"], - ['Megakaryocytes', "nan"], - ['Monocytes', "nan"], - ['Multiciliated lineage', "nan"], - ['Myofibroblasts', "nan"], - ['Neutrophilic', "nan"], - ['Proliferating Epithelial Cells', "nan"], - ['Rare', "nan"], - ['Secretory', "nan"], - ['Submucosal Secretory', "nan"], - ['T cell lineage', "nan"], - ['Venous', "nan"], - ['Venous smooth muscle', "nan"], - ['unknown', "nan"] -] -ONTOLOGIES_HUMAN_LUNG_V0 = { - "names": { - "1_Endothelial": ['Arterial', 'Capillary', 'Venous', 'Bronchial Vessel 1', 'Bronchial Vessel 2', - 'Capillary Intermediate 1', 'Capillary Intermediate 2', 'Lymphatic EC'], - "1_Epithelial": ['Basal', 'Multiciliated lineage', 'Secretory', 'Rare', 'Submucosal Secretory', 'Acinar', - 'AT1', 'AT2', 'KRT5-/KRT17+', 'Proliferating Epithelial Cells', - 'Fetal airway progenitors'], - "1_Immune": ["B cell lineage", "T cell lineage", "Innate lymphoid cells", "Dendritic cells", "Macrophages", - "Monocytes", "Mast cells", "Megakaryocytes", "Erythrocytes"], - "1_Stroma": ['Mesothelium', 'Fibroblasts', 'Myofibroblasts', 'Fibromyocyte', 'Airway smooth muscle', - 'Venous smooth muscle', 'Cartilage'], - "2_Blood vessels": ['Arterial', 'Capillary', 'Venous', 'Bronchial Vessel 1', 'Bronchial Vessel 2', - 'Capillary Intermediate 1', 'Capillary Intermediate 2'], - "2_Fibroblast lineage": ['Fibroblasts', 'Myofibroblasts'], - "2_Lymphoid": ['B cell lineage', 'T cell lineage', 'Innate lymphoid cells'], - "2_Smooth Muscle": ['Fibromyocyte', 'Airway smooth muscle', 'Venous smooth muscle'], - "2_Myeloid": ["Dendritic cells", "Macrophages", "Monocytes", "Mast cells"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanLung(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_LUNG_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_LUNG_V0 - } - super(CelltypeVersionsHumanLung, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/malegonad.py b/sfaira/versions/celltype_versions/human/malegonad.py deleted file mode 100644 index ff581554f..000000000 --- a/sfaira/versions/celltype_versions/human/malegonad.py +++ /dev/null @@ -1,57 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_MALEGONAD_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Differentiating Spermatogonia', "nan"], - ['Early Primary Spermatocytes', "nan"], - ['Elongated Spermatids', "nan"], - ['Endothelial cells', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal acinar cell', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Late primary Spermatocytes', "nan"], - ['Leydig cells', "nan"], - ['Loop of Henle', "nan"], - ['Macrophages', "nan"], - ['Monocyte', "nan"], - ['Myoid cells', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proximal tubule progenitor', "nan"], - ['Round Spermatids', "nan"], - ['Sertoli cells', "nan"], - ['Smooth muscle cell', "nan"], - ['Sperm', "nan"], - ['Spermatogonial Stem cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Ureteric bud cell', "nan"] -] -ONTOLOGIES_HUMAN_MALEGONAD_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanMalegonad(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_MALEGONAD_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_MALEGONAD_V0 - } - super(CelltypeVersionsHumanMalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/mixed.py b/sfaira/versions/celltype_versions/human/mixed.py deleted file mode 100644 index 02922bae0..000000000 --- a/sfaira/versions/celltype_versions/human/mixed.py +++ /dev/null @@ -1,42 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_MIXED_V0 = [ - ['1.CD4rest', "nan"], - ['10.CD8EM/TRMact', "nan"], - ['10.CD8TEMRAact', "nan"], - ['11.CD8TEMRA', "nan"], - ['2.CD4act1', "nan"], - ['2.CD4rest2', "nan"], - ['3.CD4act1', "nan"], - ['3.CD4act2', "nan"], - ['4.CD4act2', "nan"], - ['4.CD4act3', "nan"], - ['5.CD4TRMrest', "nan"], - ['5.CD4act3', "nan"], - ['6.CD4TRMact', "nan"], - ['6.CD4Treg', "nan"], - ['7.CD4Treg', "nan"], - ['7.CD8EM/TRMrest', "nan"], - ['8.CD8EM/TRMact', "nan"], - ['8.CD8EM/TRMrest', "nan"], - ['9.CD8TEMRArest', "nan"], - ['9.CD8TRMrest', "nan"], - ['Unknown', "nan"] -] - -ONTOLOGIES_HUMAN_MIXED_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanMixed(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_MIXED_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_MIXED_V0 - } - super(CelltypeVersionsHumanMixed, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/muscle.py b/sfaira/versions/celltype_versions/human/muscle.py deleted file mode 100644 index 3ecdc9d07..000000000 --- a/sfaira/versions/celltype_versions/human/muscle.py +++ /dev/null @@ -1,47 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_MUSCLE_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Ventricle cardiomyocyte', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_MUSCLE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanMuscle(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_MUSCLE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_MUSCLE_V0 - } - super(CelltypeVersionsHumanMuscle, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/omentum.py b/sfaira/versions/celltype_versions/human/omentum.py deleted file mode 100644 index 8730a0528..000000000 --- a/sfaira/versions/celltype_versions/human/omentum.py +++ /dev/null @@ -1,40 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_OMENTUM_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fibroblast', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mesothelial cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_OMENTUM_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanOmentum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_OMENTUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_OMENTUM_V0 - } - super(CelltypeVersionsHumanOmentum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/pancreas.py b/sfaira/versions/celltype_versions/human/pancreas.py deleted file mode 100644 index c5c161500..000000000 --- a/sfaira/versions/celltype_versions/human/pancreas.py +++ /dev/null @@ -1,66 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_PANCREAS_V0 = [ - ['Acinar cell', "nan"], - ['Activated Stellate cell', "nan"], - ['Alpha cell', "nan"], - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['Beta cell', "nan"], - ['CB CD34+', "nan"], - ['Co-expression cell', "nan"], - ['Delta cell', "nan"], - ['Dendritic cell', "nan"], - ['Ductal cell', "nan"], - ['Endothelial cell', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial progenitor', "nan"], - ['Epsilon cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fibroblast', "nan"], - ['Gamma cell', "nan"], - ['Gastric endocrine cell', "nan"], - ['Immature sertoli cell (Pre-Sertoli cell)', "nan"], - ['MHC class II cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mesenchymal Cell', "nan"], - ['Monocyte', "nan"], - ['Neuron', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['PSC cell', "nan"], - ['Pancreas exocrine cell', "nan"], - ['Primordial germ cell', "nan"], - ['Proximal tubule progenitor', "nan"], - ['Quiescent Stellate cell', "nan"], - ['Schwann cell', "nan"], - ['Skeletal muscle cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Unclassified endocrine cell', "nan"], - ['Unknown', "nan"] -] -ONTOLOGIES_HUMAN_PANCREAS_V0 = { - "names": { - 'Endocrine cell': ['Alpha cell', 'Beta cell', 'Gamma cell', 'Delta cell', 'Epsilon cell', 'Unclassified endocrine cell'], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanPancreas(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_PANCREAS_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_PANCREAS_V0 - } - super(CelltypeVersionsHumanPancreas, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/placenta.py b/sfaira/versions/celltype_versions/human/placenta.py deleted file mode 100644 index 401ba11fe..000000000 --- a/sfaira/versions/celltype_versions/human/placenta.py +++ /dev/null @@ -1,81 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_PLACENTA_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['CB CD34+', "nan"], - ['Decidual Macrophages 1', "nan"], - ['Decidual Macrophages 2', "nan"], - ['Decidual Macrophages 3', "nan"], - ['Decidual NK Cells 1', "nan"], - ['Decidual NK Cells 2', "nan"], - ['Decidual NK Cells 3', "nan"], - ['Decidual NK Cells p', "nan"], - ['Decidual Stromal Cells 1', "nan"], - ['Decidual Stromal Cells 2', "nan"], - ['Decidual Stromal Cells 3', "nan"], - ['Dendritic Cells 1', "nan"], - ['Dendritic Cells 2', "nan"], - ['Endothelial Cells L', "nan"], - ['Endothelial Cells f', "nan"], - ['Endothelial Cells m', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial Glandular Cells 1', "nan"], - ['Epithelial Glandular Cells 2', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Extravillous Trophoblasts', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fibroblasts 1', "nan"], - ['Fibroblasts 2', "nan"], - ['Granulocytes', "nan"], - ['Hofbauer Cells', "nan"], - ['ILC3', "nan"], - ['Intermediated cell', "nan"], - ['M2 Macrophage', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['NK Cells CD16+', "nan"], - ['NK Cells CD16-', "nan"], - ['Neutrophil', "nan"], - ['Perivascular Cells 1', "nan"], - ['Perivascular Cells 2', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['Syncytiotrophoblasts', "nan"], - ['T cell', "nan"], - ['Villous Cytotrophoblasts', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_PLACENTA_V0 = { - "names": { - 'Fibroblast': ['Fibroblasts 1', 'Fibroblasts 2'], - 'Macrophage': ['Decidual Macrophages 1', 'Decidual Macrophages 2', 'Decidual Macrophages 3'], - 'Epithelial cell': ['Epithelial Glandular Cells 1', 'Epithelial Glandular Cells 2'], - 'Fetal stromal cell': ['Decidual Stromal Cells 1', 'Decidual Stromal Cells 2', 'Decidual Stromal Cells 3'], - 'Endothelial cell': ['Endothelial Cells f', 'Endothelial Cells m', 'Endothelial Cells L'], - 'Dendritic cell': ['Dendritic Cells 1', 'Dendritic Cells 2'], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanPlacenta(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_PLACENTA_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_PLACENTA_V0 - } - super(CelltypeVersionsHumanPlacenta, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/pleura.py b/sfaira/versions/celltype_versions/human/pleura.py deleted file mode 100644 index c9c8bf906..000000000 --- a/sfaira/versions/celltype_versions/human/pleura.py +++ /dev/null @@ -1,46 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_PLEURA_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mesothelial cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_PLEURA_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanPleura(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_PLEURA_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_PLEURA_V0 - } - super(CelltypeVersionsHumanPleura, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/prostate.py b/sfaira/versions/celltype_versions/human/prostate.py deleted file mode 100644 index da8930589..000000000 --- a/sfaira/versions/celltype_versions/human/prostate.py +++ /dev/null @@ -1,43 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_PROSTATE_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['Basal cell', "nan"], - ['Club', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Hillock', "nan"], - ['Leukocytes', "nan"], - ['Luminal', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_PROSTATE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanProstate(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_PROSTATE_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_PROSTATE_V0 - } - super(CelltypeVersionsHumanProstate, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/rectum.py b/sfaira/versions/celltype_versions/human/rectum.py deleted file mode 100644 index 80d30a0a8..000000000 --- a/sfaira/versions/celltype_versions/human/rectum.py +++ /dev/null @@ -1,41 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_RECTUM_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Enteroendocrine', "nan"], - ['Erythroid cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Goblet', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Paneth-like', "nan"], - ['Smooth muscle cell', "nan"], - ['Stem Cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['TA', "nan"] -] -ONTOLOGIES_HUMAN_RECTUM_V0 = { - "names": { - 'Epithelial cell': ['Paneth-like', 'Enteroendocrine', 'Goblet'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanRectum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_RECTUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_RECTUM_V0 - } - super(CelltypeVersionsHumanRectum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/rib.py b/sfaira/versions/celltype_versions/human/rib.py deleted file mode 100644 index 11661027f..000000000 --- a/sfaira/versions/celltype_versions/human/rib.py +++ /dev/null @@ -1,44 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_RIB_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Kidney intercalated cell', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_RIB_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanRib(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_RIB_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_RIB_V0 - } - super(CelltypeVersionsHumanRib, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/skin.py b/sfaira/versions/celltype_versions/human/skin.py deleted file mode 100644 index 160003138..000000000 --- a/sfaira/versions/celltype_versions/human/skin.py +++ /dev/null @@ -1,61 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_SKIN_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['Basal cell 1', "nan"], - ['Basal cell 2', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Kidney intercalated cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['WNT1', "nan"], - ['channel', "nan"], - ['folicular', "nan"], - ['granular', "nan"], - ['hESC', "nan"], - ['melanocyte', "nan"], - ['mitotic', "nan"], - ['spinous', "nan"] -] -ONTOLOGIES_HUMAN_SKIN_V0 = { - "names": { - 'immune': ['B cell', 'T cell', 'Dendritic cell', 'Erythroid cell', 'Erythroid progenitor cell (RP high)', 'Macrophage', - 'Mast cell', 'Monocyte', 'Neutrophil', 'Neutrophil (RPS high)', 'Proliferating T cell'], - 'Basal cell': ['Basal cell 1', 'Basal cell 2'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanSkin(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_SKIN_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_SKIN_V0 - } - super(CelltypeVersionsHumanSkin, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/spinalcord.py b/sfaira/versions/celltype_versions/human/spinalcord.py deleted file mode 100644 index b7d60886e..000000000 --- a/sfaira/versions/celltype_versions/human/spinalcord.py +++ /dev/null @@ -1,55 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_SPINALCORD_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['Astrocyte', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Epithelial cell', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Kidney intercalated cell', "nan"], - ['Loop of Henle', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_SPINALCORD_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanSpinalcord(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_SPINALCORD_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_SPINALCORD_V0 - } - super(CelltypeVersionsHumanSpinalcord, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/spleen.py b/sfaira/versions/celltype_versions/human/spleen.py deleted file mode 100644 index 27cca5500..000000000 --- a/sfaira/versions/celltype_versions/human/spleen.py +++ /dev/null @@ -1,69 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_SPLEEN_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B_Hypermutation', "nan"], - ['B_T_doublet', "nan"], - ['B_follicular', "nan"], - ['B_mantle', "nan"], - ['CB CD34+', "nan"], - ['CD34_progenitor', "nan"], - ['DC_1', "nan"], - ['DC_2', "nan"], - ['DC_activated', "nan"], - ['DC_plasmacytoid', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fibroblast', "nan"], - ['ILC', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['NK_CD160pos', "nan"], - ['NK_FCGR3Apos', "nan"], - ['NK_dividing', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Plasma_IgG', "nan"], - ['Plasma_IgM', "nan"], - ['Plasmablast', "nan"], - ['Platelet', "nan"], - ['Proliferating T cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T_CD4_conv', "nan"], - ['T_CD4_fh', "nan"], - ['T_CD4_naive', "nan"], - ['T_CD4_reg', "nan"], - ['T_CD8_CTL', "nan"], - ['T_CD8_MAIT', "nan"], - ['T_CD8_activated', "nan"], - ['T_CD8_gd', "nan"], - ['unknown', "nan"] -] -ONTOLOGIES_HUMAN_SPLEEN_V0 = { - "names": { - 'B cell (Plasmocyte)': ['Plasma_IgG', 'Plasma_IgM', 'Plasmablast'], - 'B cell': ['B_Hypermutation', 'B_follicular', 'B_mantle', 'B_T_doublet'], - 'Dendritic cell': ['DC_1', 'DC_2', 'DC_activated', 'DC_plasmacytoid'], - 'T cell': ["T_CD4_conv", "T_CD4_fh", "T_CD4_naive", "T_CD4_reg", "T_CD8_CTL", "T_CD8_MAIT", "T_CD8_activated", - "T_CD8_gd", ] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanSpleen(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_SPLEEN_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_SPLEEN_V0 - } - super(CelltypeVersionsHumanSpleen, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/stomach.py b/sfaira/versions/celltype_versions/human/stomach.py deleted file mode 100644 index 074d0c97a..000000000 --- a/sfaira/versions/celltype_versions/human/stomach.py +++ /dev/null @@ -1,68 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_STOMACH_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte', "nan"], - ['Enterocyte progenitor', "nan"], - ['Epithelial cell', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Erythroid cell', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal Neuron', "nan"], - ['Fetal acinar cell', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal endocrine cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal neuron', "nan"], - ['Fetal skeletal muscle cell', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric chief cell', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Hepatocyte/Endodermal cell', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Mesothelial cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Proximal tubule progenitor', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['hESC', "nan"] -] -ONTOLOGIES_HUMAN_STOMACH_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanStomach(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_STOMACH_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_STOMACH_V0 - } - super(CelltypeVersionsHumanStomach, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/thymus.py b/sfaira/versions/celltype_versions/human/thymus.py deleted file mode 100644 index af315a27c..000000000 --- a/sfaira/versions/celltype_versions/human/thymus.py +++ /dev/null @@ -1,74 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_THYMUS_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B_memory', "nan"], - ['B_naive', "nan"], - ['B_plasma', "nan"], - ['B_pro/pre', "nan"], - ['CB CD34+', "nan"], - ['CD4+T', "nan"], - ['CD4+Tmem', "nan"], - ['CD8+T', "nan"], - ['CD8+Tmem', "nan"], - ['CD8αα', "nan"], - ['DC1', "nan"], - ['DC2', "nan"], - ['DN', "nan"], - ['DP', "nan"], - ['ETP', "nan"], - ['Endo', "nan"], - ['Epi_GCM2', "nan"], - ['Ery', "nan"], - ['Fb_1', "nan"], - ['Fb_2', "nan"], - ['Fb_cycling', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['ILC3', "nan"], - ['Lymph', "nan"], - ['Mac', "nan"], - ['Mast', "nan"], - ['Mgk', "nan"], - ['Mono', "nan"], - ['NK', "nan"], - ['NKT', "nan"], - ['NMP', "nan"], - ['Neutrophil', "nan"], - ['Neutrophil (RPS high)', "nan"], - ['Proliferating T cell', "nan"], - ['T(agonist)', "nan"], - ['TEC(myo)', "nan"], - ['TEC(neuro)', "nan"], - ['Treg', "nan"], - ['VSMC', "nan"], - ['aDC', "nan"], - ['alpha_beta_T(entry)', "nan"], - ['cTEC', "nan"], - ['gamma_delta_T', "nan"], - ['mTEC(I)', "nan"], - ['mTEC(II)', "nan"], - ['mTEC(III)', "nan"], - ['mTEC(IV)', "nan"], - ['mcTEC', "nan"], - ['pDC', "nan"] -] -ONTOLOGIES_HUMAN_THYMUS_V0 = { - "names": { - 'B cell': ['B_memory', 'B_naive', 'B_pro/pre'], - 'Dendritic cell': ['DC1', 'DC2'], - 'T cell': ['alpha_beta_T(entry)', 'gamma_delta_T', 'Treg', 'CD4+T', 'CD4+Tmem', 'CD8+T', 'CD8+Tmem'] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanThymus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_THYMUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_THYMUS_V0 - } - super(CelltypeVersionsHumanThymus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/thyroid.py b/sfaira/versions/celltype_versions/human/thyroid.py deleted file mode 100644 index 394b67df0..000000000 --- a/sfaira/versions/celltype_versions/human/thyroid.py +++ /dev/null @@ -1,48 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_THYROID_V0 = [ - ['Antigen presenting cell (RPS high)', "nan"], - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['CB CD34+', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Enterocyte progenitor', "nan"], - ['Erythroid progenitor cell (RP high)', "nan"], - ['Fasciculata cell', "nan"], - ['Fetal enterocyte ', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Loop of Henle', "nan"], - ['M2 Macrophage', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Neutrophil', "nan"], - ['Primordial germ cell', "nan"], - ['Proliferating T cell', "nan"], - ['Sinusoidal endothelial cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Thyroid follicular cell', "nan"] -] - -ONTOLOGIES_HUMAN_THYROID_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanThyroid(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_THYROID_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_THYROID_V0 - } - super(CelltypeVersionsHumanThyroid, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/trachea.py b/sfaira/versions/celltype_versions/human/trachea.py deleted file mode 100644 index fe00fce25..000000000 --- a/sfaira/versions/celltype_versions/human/trachea.py +++ /dev/null @@ -1,45 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_TRACHEA_V0 = [ - ['B cell', "nan"], - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['Chondrocyte', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte progenitor', "nan"], - ['Fetal chondrocyte', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Goblet cell', "nan"], - ['Loop of Henle', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"], - ['Thyroid follicular cell', "nan"] -] - -ONTOLOGIES_HUMAN_TRACHEA_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanTrachea(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_TRACHEA_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_TRACHEA_V0 - } - super(CelltypeVersionsHumanTrachea, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/ureter.py b/sfaira/versions/celltype_versions/human/ureter.py deleted file mode 100644 index b410d9db5..000000000 --- a/sfaira/versions/celltype_versions/human/ureter.py +++ /dev/null @@ -1,33 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_URETER_V0 = [ - ['B cell (Plasmocyte)', "nan"], - ['Basal cell', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Epithelial cell (intermediated)', "nan"], - ['Fibroblast', "nan"], - ['Intermediated cell', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_URETER_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanUreter(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_URETER_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_URETER_V0 - } - super(CelltypeVersionsHumanUreter, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/human/uterus.py b/sfaira/versions/celltype_versions/human/uterus.py deleted file mode 100644 index 5e4c79e79..000000000 --- a/sfaira/versions/celltype_versions/human/uterus.py +++ /dev/null @@ -1,43 +0,0 @@ -from .external import CelltypeVersionsBase - -CELLTYPES_HUMAN_UTERUS_V0 = [ - ['AT2 cell', "nan"], - ['B cell', "nan"], - ['Dendritic cell', "nan"], - ['Endothelial cell', "nan"], - ['Endothelial cell (APC)', "nan"], - ['Endothelial cell (endothelial to mesenchymal transition)', "nan"], - ['Enterocyte progenitor', "nan"], - ['Fetal epithelial progenitor', "nan"], - ['Fetal fibroblast', "nan"], - ['Fetal mesenchymal progenitor', "nan"], - ['Fetal stromal cell', "nan"], - ['Fibroblast', "nan"], - ['Gastric endocrine cell', "nan"], - ['Loop of Henle', "nan"], - ['Macrophage', "nan"], - ['Mast cell', "nan"], - ['Monocyte', "nan"], - ['Myeloid cell', "nan"], - ['Primordial germ cell', "nan"], - ['Smooth muscle cell', "nan"], - ['Stratified epithelial cell', "nan"], - ['Stromal cell', "nan"], - ['T cell', "nan"] -] -ONTOLOGIES_HUMAN_UTERUS_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsHumanUterus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_HUMAN_UTERUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_HUMAN_UTERUS_V0 - } - super(CelltypeVersionsHumanUterus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/__init__.py b/sfaira/versions/celltype_versions/mouse/__init__.py deleted file mode 100644 index e19c15aea..000000000 --- a/sfaira/versions/celltype_versions/mouse/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -from .bladder import CelltypeVersionsMouseBladder -from .brain import CelltypeVersionsMouseBrain -from .diaphragm import CelltypeVersionsMouseDiaphragm -from .adipose import CelltypeVersionsMouseAdipose -from .heart import CelltypeVersionsMouseHeart -from .kidney import CelltypeVersionsMouseKidney -from .colon import CelltypeVersionsMouseColon -from .muscle import CelltypeVersionsMouseMuscle -from .liver import CelltypeVersionsMouseLiver -from .lung import CelltypeVersionsMouseLung -from .mammarygland import CelltypeVersionsMouseMammarygland -from .bone import CelltypeVersionsMouseBone -from .femalegonad import CelltypeVersionsMouseFemalegonad -from .blood import CelltypeVersionsMouseBlood -from .placenta import CelltypeVersionsMousePlacenta -from .pancreas import CelltypeVersionsMousePancreas -from .prostate import CelltypeVersionsMouseProstate -from .rib import CelltypeVersionsMouseRib -from .skin import CelltypeVersionsMouseSkin -from .ileum import CelltypeVersionsMouseIleum -from .spleen import CelltypeVersionsMouseSpleen -from .stomach import CelltypeVersionsMouseStomach -from .malegonad import CelltypeVersionsMouseMalegonad -from .thymus import CelltypeVersionsMouseThymus -from .tongue import CelltypeVersionsMouseTongue -from .trachea import CelltypeVersionsMouseTrachea -from .uterus import CelltypeVersionsMouseUterus - -ORGAN_DICT = { - "bladder": CelltypeVersionsMouseBladder(), - "brain": CelltypeVersionsMouseBrain(), - "diaphragm": CelltypeVersionsMouseDiaphragm(), - "adipose": CelltypeVersionsMouseAdipose(), - "heart": CelltypeVersionsMouseHeart(), - "kidney": CelltypeVersionsMouseKidney(), - "colon": CelltypeVersionsMouseColon(), - "muscle": CelltypeVersionsMouseMuscle(), - "liver": CelltypeVersionsMouseLiver(), - "lung": CelltypeVersionsMouseLung(), - "mammarygland": CelltypeVersionsMouseMammarygland(), - "bone": CelltypeVersionsMouseBone(), - "femalegonad": CelltypeVersionsMouseFemalegonad(), - "blood": CelltypeVersionsMouseBlood(), - "placenta": CelltypeVersionsMousePlacenta(), - "pancreas": CelltypeVersionsMousePancreas(), - "prostate": CelltypeVersionsMouseProstate(), - "rib": CelltypeVersionsMouseRib(), - "skin": CelltypeVersionsMouseSkin(), - "ileum": CelltypeVersionsMouseIleum(), - "spleen": CelltypeVersionsMouseSpleen(), - "stomach": CelltypeVersionsMouseStomach(), - "malegonad": CelltypeVersionsMouseMalegonad(), - "thymus": CelltypeVersionsMouseThymus(), - "tongue": CelltypeVersionsMouseTongue(), - "trachea": CelltypeVersionsMouseTrachea(), - "uterus": CelltypeVersionsMouseUterus() -} diff --git a/sfaira/versions/celltype_versions/mouse/adipose.py b/sfaira/versions/celltype_versions/mouse/adipose.py deleted file mode 100644 index 1b82c908c..000000000 --- a/sfaira/versions/celltype_versions/mouse/adipose.py +++ /dev/null @@ -1,38 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_ADIPOSE_V0 = [ - ["B cell", "CL:0000236"], - ["CD4-positive, alpha-beta T cell", "nan"], - ["CD8-positive, alpha-beta T cell", "nan"], - ["endothelial cell", "CL:0000115"], - ["epithelial cell", "CL:0000066"], - ["erythroblast", "nan"], - ["macrophage", "nan"], - ["mesenchymal stem cell of adipose", "CL:0002570"], - ["myeloid cell", "CL:0000763"], - ["NK cell", "CL:0000623"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_ADIPOSE_V0 = { - "names": { - "lymphocyte": [ - "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", - "myeloid cell", "NK cell" - ], - "T cell": ["CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseAdipose(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_ADIPOSE_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_ADIPOSE_V0 - } - super(CelltypeVersionsMouseAdipose, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/bladder.py b/sfaira/versions/celltype_versions/mouse/bladder.py deleted file mode 100644 index 995dc6e3d..000000000 --- a/sfaira/versions/celltype_versions/mouse/bladder.py +++ /dev/null @@ -1,37 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_BLADDER_V0 = [ - ["basal epithelial cell", "nan"], - ["bladder urothelial cell", "CL:1001428"], - ["dendritic cell", "nan"], - ["endothelial cell", "CL:0000115"], - ["epithelial cell", "nan"], - ["macrophage", "nan"], - ["mesenchymal stromal cell", "nan"], - ["NK cell", "nan"], - ["smooth muscle cell", "nan"], - ["stromal cell", "nan"], - ["umbrella cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_BLADDER_V0 = { - "names": { - "bladder cell": ["basal epithelial cell", "epithelial cell", "mesenchymal stromal cell", "smooth muscle cell", - "stromal cell", "umbrella cell"], - "leukocyte": ["dendritic cell", "macrophage", "NK cell"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseBladder(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_BLADDER_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_BLADDER_V0 - } - super(CelltypeVersionsMouseBladder, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/blood.py b/sfaira/versions/celltype_versions/mouse/blood.py deleted file mode 100644 index cc4613157..000000000 --- a/sfaira/versions/celltype_versions/mouse/blood.py +++ /dev/null @@ -1,31 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_BLOOD_V0 = [ - ["B cell", "CL:0000236"], - ["macrophage", "CL:0000235"], - ["T cell", "CL:0000084"], - ["NK cell", "nan"], - ["neutrophil", "nan"], - ["monocyte", "nan"], - ["erythroblast", "nan"], - ["dendritic cell", "nan"], - ["basophil", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_BLOOD_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseBlood(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_BLOOD_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_BLOOD_V0 - } - super(CelltypeVersionsMouseBlood, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/bone.py b/sfaira/versions/celltype_versions/mouse/bone.py deleted file mode 100644 index 8cadbb0a2..000000000 --- a/sfaira/versions/celltype_versions/mouse/bone.py +++ /dev/null @@ -1,52 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_BONE_V0 = [ - ["basophil", "CL:0000767"], - ["CD4-positive, alpha-beta T cell", "nan"], - ["dendritic cell", "nan"], - ["early pro-B cell", "CL:0002046"], - ["erythroblast", "CL:0000765"], - ["erythrocyte", "CL:0000232"], - ["erythroid progenitor", "CL:0000038"], - ["granulocyte monocyte progenitor cell", "nan"], - ["granulocytopoietic cell", "CL:0002191"], - ["hematopoietic precursor cell", "CL:0008001"], - ["hematopoietic stem cell", "nan"], - ["immature B cell", "CL:0000816"], - ["late pro-B cell", "CL:0002048"], - ["lymphoid progenitor cell", "nan"], - ["macrophage", "nan"], - ["mast cell", "nan"], - ["monocyte", "CL:0000576"], - ["megakaryocyte-erythroid progenitor cell", "CL:0000050"], - ["naive B cell", "CL:0000788"], - ["naive T cell", "CL:0000898"], - ["neutrophil", "nan"], - ["neutrophil progenitor", "nan"], - ["NK cell", "CL:0000623"], - ["plasma cell", "CL:0000786"], - ["precursor B cell", "CL:0000817"], - ["proerythroblast", "CL:0000547"], - ["promonocyte", "CL:0000559"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_BONE_V0 = { - "names": { - "granulocyte": ["basophil", "neutrophil", "mast cell"], - "mature alpha-beta T cell": ["CD4-positive, alpha-beta T cell"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseBone(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_BONE_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_BONE_V0 - } - super(CelltypeVersionsMouseBone, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/brain.py b/sfaira/versions/celltype_versions/mouse/brain.py deleted file mode 100644 index e8e1b9271..000000000 --- a/sfaira/versions/celltype_versions/mouse/brain.py +++ /dev/null @@ -1,54 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_BRAIN_V0 = [ - ["astrocyte", "CL:0000127"], - ["BAM", "nan"], - ["B cells", "nan"], - ["Bergmann glial cell", "CL:0000644"], - ["brain pericyte", "CL:2000043"], - ["CD8-positive, alpha-beta T cell", "CL:0000625"], - ["cDC1", "nan"], - ["cDC2", "nan"], - ["endothelial cell", "CL:0000115"], - ["ependymal cell", "CL:0000065"], - ["GABAergic cell", "nan"], - ["granulocyte", "nan"], - ["ILC", "nan"], - ["interneuron", "CL:0000099"], - ["macrophage", "CL:0000235"], - ["mature NK T cell", "nan"], - ["medium spiny neuron", "CL:1001474"], - ["microglial cell", "CL:0000129"], - ["migDC", "nan"], - ["monocyte", "nan"], - ["neuroepithelial cell", "nan"], - ["neuron", "CL:0000540"], - ["neuronal stem cell", "CL:0000047"], - ["neutorphils", "nan"], - ["NK cells", "nan"], - ["oligodendrocyte", "CL:0000128"], - ["oligodendrocyte precursor cell", "CL:0002453"], - ["pDC", "nan"], - ["schwann cell", "nan"], - ["yd T cells", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_BRAIN_V0 = { - "names": { - "T cell": ["CD8-positive, alpha-beta T cell", "yd T cells", "mature NK T cell"], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseBrain(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_BRAIN_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_BRAIN_V0 - } - super(CelltypeVersionsMouseBrain, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/colon.py b/sfaira/versions/celltype_versions/mouse/colon.py deleted file mode 100644 index c901104ce..000000000 --- a/sfaira/versions/celltype_versions/mouse/colon.py +++ /dev/null @@ -1,30 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_COLON_V0 = [ - ["Brush cell of epithelium proper of large intestine", "CL:0002203"], - ["enterocyte of epithelium of large intestine", "CL:0002071"], - ["enteroendocrine cell", "CL:0000164"], - ["epithelial cell of large intestine", "CL:0002253"], - ["hematopoietic stem cell", "CL:0000037"], - ["intestinal crypt stem cell", "CL:0002250"], - ["large intestine goblet cell", "CL:1000320"], - ["secretory cell", "CL:0000151"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_COLON_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseColon(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_COLON_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_COLON_V0 - } - super(CelltypeVersionsMouseColon, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/diaphragm.py b/sfaira/versions/celltype_versions/mouse/diaphragm.py deleted file mode 100644 index 7558ac084..000000000 --- a/sfaira/versions/celltype_versions/mouse/diaphragm.py +++ /dev/null @@ -1,28 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_DIAPHRAGM_V0 = [ - ["B cell", "CL:0000236"], - ["endothelial cell", "CL:0000115"], - ["macrophage", "CL:0000235"], - ["mesenchymal stem cell", "CL:0000134"], - ["skeletal muscle satellite cell", "CL:0000594"], - ["T cell", "CL:0000084"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_DIAPHRAGM_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseDiaphragm(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_DIAPHRAGM_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_DIAPHRAGM_V0 - } - super(CelltypeVersionsMouseDiaphragm, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/external.py b/sfaira/versions/celltype_versions/mouse/external.py deleted file mode 100644 index cf6bc6d79..000000000 --- a/sfaira/versions/celltype_versions/mouse/external.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.versions.celltype_versions import CelltypeVersionsBase diff --git a/sfaira/versions/celltype_versions/mouse/femalegonad.py b/sfaira/versions/celltype_versions/mouse/femalegonad.py deleted file mode 100644 index 19278ed31..000000000 --- a/sfaira/versions/celltype_versions/mouse/femalegonad.py +++ /dev/null @@ -1,33 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_FEMALEGONAD_V0 = [ - ["cumulus cell", "nan"], - ["granulosa cell", "nan"], - ["large luteal cell", "nan"], - ["macrophage", "nan"], - ["small luteal cell", "nan"], - ["epithelial cell of ovarian surface", "nan"], - ["endothelial cell of ovarian surface", "nan"], - ["stromal cell", "nan"], - ["thecal cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_FEMALEGONAD_V0 = { - "names": { - 'luteal cell': ['small luteal cell', 'large luteal cell'], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseFemalegonad(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_FEMALEGONAD_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_FEMALEGONAD_V0 - } - super(CelltypeVersionsMouseFemalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/heart.py b/sfaira/versions/celltype_versions/mouse/heart.py deleted file mode 100644 index 76e77c90c..000000000 --- a/sfaira/versions/celltype_versions/mouse/heart.py +++ /dev/null @@ -1,42 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_HEART_V0 = [ - ["aortic endothelial cell", "CL:0002544"], - ["atrial myocyte", "CL:0002129"], - ["B cell", "CL:CL:0000115"], - ["cardiac neuron", "CL:0000057"], - ["cardiomyocyte", "CL:0000746"], - ["endocardial cell", "CL:0002350"], - ["endothelial cell of coronary artery", "CL:2000018"], - ["epithelial cell", "CL:"], - ["erythrocyte", "CL:"], - ["fibroblast of cardiac tissue", "CL:0002548"], - ["fibrocyte", "CL:CL:0000145"], - ["leukocyte", "CL:0000738"], - ["mast cell", "nan"], - ["monocyte", "nan"], - ["macrophage", "nan"], - ["professional antigen presenting cell", "nan"], - ["smooth muscle cell", "CL:0000192"], - ["T cell", "nan"], - ["valve cell", "CL:0000663"], - ["ventricular myocyte", "CL:0002131"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_HEART_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseHeart(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_HEART_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_HEART_V0 - } - super(CelltypeVersionsMouseHeart, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/ileum.py b/sfaira/versions/celltype_versions/mouse/ileum.py deleted file mode 100644 index 1f190bd5c..000000000 --- a/sfaira/versions/celltype_versions/mouse/ileum.py +++ /dev/null @@ -1,33 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_ILEUM_V0 = [ - ["B cell", "CL:0000236"], - ["macrophage", "CL:0000235"], - ["T cell", "CL:0000084"], - ["dendritic cell", "nan"], - ["mast cell", "nan"], - ["paneth cell", "nan"], - ["stromal cell", "nan"], - ["epithelial cell", "nan"], - ["epithelial cell villi", "nan"], - ["enteroendocrine cell", "nan"], - ["erythroblast", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_ILEUM_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseIleum(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_ILEUM_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_ILEUM_V0 - } - super(CelltypeVersionsMouseIleum, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/kidney.py b/sfaira/versions/celltype_versions/mouse/kidney.py deleted file mode 100644 index 9fe0c66c2..000000000 --- a/sfaira/versions/celltype_versions/mouse/kidney.py +++ /dev/null @@ -1,67 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_KIDNEY_V0 = [ - ["B cell", "CL:0000236"], - ["brush cell", "nan"], - ["dendritic cell", "nan"], - ["endothelial cell", "nan"], - ["fenestrated cell", "CL:0000666"], - ["fetal adipocyte", "nan"], - ["fetal mesenchymal cell", "nan"], - ["fetal proliferative cell", "nan"], - ["fibroblast", "CL:0000057"], - ["interstitial fibroblast", "nan"], - ["glomerular epithelial cell", "nan"], - ["kidney collecting duct epithelial cell", "CL:1000454"], - ["kidney collecting duct principal cell", "CL:1001431"], - ["kidney cortex artery cell", "CL:1001045"], - ["kidney distal convoluted tubule epithelial cell", "CL:1000849"], - ["kidney loop of Henle ascending limb epithelial cell", "CL:1001016"], - ["kidney loop of Henle thick ascending limb epithelial cell", "CL:1001106"], - ["kidney proximal convoluted tubule epithelial cell", "CL:1000838"], - ["kidney proximal straight tubule epithelial cell", "nan"], - ["macrophage", "CL:0000235"], - ["mesangial cell", "CL:0000650"], - ["neutrophil progenitor", "nan"], - ["NK cell", "nan"], - ["podocyte", "CL:0000653"], - ["plasma cell", "CL:0000786"], - ["T cell", "CL:0000084"], - ["ureteric epithelial cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_KIDNEY_V0 = { - "names": { - "epithelial cell": [ - "kidney collecting duct epithelial cell", - "kidney distal convoluted tubule epithelial cell", - "kidney loop of Henle ascending limb epithelial cell", - "kidney loop of Henle thick ascending limb epithelial cell", - "kidney proximal convoluted tubule epithelial cell", - "kidney proximal straight tubule epithelial cell", - ], - "epithelial cell of proximal tubule": [ - "kidney proximal convoluted tubule epithelial cell", - "kidney proximal straight tubule epithelial cell", - ], - "lymphocyte": ["B cell", "dendritic cell", "macrophage", "NK cell", "T cell"], - "leukocyte": [ - "B cell", "dendritic cell", "macrophage", "neutrophil progenitor", - "NK cell", "plasma cell", "T cell" - ], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseKidney(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_KIDNEY_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_KIDNEY_V0 - } - super(CelltypeVersionsMouseKidney, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/liver.py b/sfaira/versions/celltype_versions/mouse/liver.py deleted file mode 100644 index 8c944f933..000000000 --- a/sfaira/versions/celltype_versions/mouse/liver.py +++ /dev/null @@ -1,45 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_LIVER_V0 = [ - ["B cell", "CL:0000236"], - ["dendritic cell", "nan"], - ["CD4-positive, alpha-beta T cell", "nan"], - ["CD8-positive, alpha-beta T cell", "nan"], - ["duct epithelial cell", "nan"], - ["erythroblast", "nan"], - ["endothelial cell of hepatic sinusoid", "CL:1000398"], - ["granulocyte", "nan"], - ["hepatic stellate cell", "CL:0000632"], - ["hepatocyte", "CL:0000182"], - ["Kupffer cell", "CL:0000091"], - ["macrophage", "nan"], - ["neutrophil", "CL:0000775"], - ["NK cell", "CL:0000623"], - ["plasmacytoid dendritic cell", "CL:0000784"], - ["stromal cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_LIVER_V0 = { - "names": { - "myeloid leukocyte": [ - "macrophage", "dendritic cell", "plasmacytoid dendritic cell", "erythroblast", - "granulocyte", "neutrophil" - ], - "T cell": ["CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell"], - "mature NK T cell": ["CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "NK cell"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseLiver(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_LIVER_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_LIVER_V0 - } - super(CelltypeVersionsMouseLiver, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/lung.py b/sfaira/versions/celltype_versions/mouse/lung.py deleted file mode 100644 index ae525824e..000000000 --- a/sfaira/versions/celltype_versions/mouse/lung.py +++ /dev/null @@ -1,87 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_LUNG_V0 = [ - ["adventitial cell", "nan"], - ["alveolar bipotent progenitor", "nan"], - ["alveolar epithelial cell type I", "nan"], - ["alveolar epithelial cell type II", "nan"], - ["alveolar macrophage", "nan"], - ["B cell", "CL_0000236"], - ["basophil", "nan"], - ["bronchial smooth muscle cell", "nan"], - ["CD4-positive, alpha-beta T cell", "nan"], - ["CD8-positive, alpha-beta T cell", "nan"], - ["ciliated cell", "nan"], - ["clara cell", "nan"], - ["classical monocyte", "nan"], - ["club cell of bronchiole", "nan"], - ["endothelial cell of lymphatic vessel", "nan"], - ["eosinophil", "nan"], - ["fibroblast of lung", "nan"], - ["glial cell", "CL_0000125"], - ["intermediate monocyte", "nan"], - ["lung macrophage", "nan"], - ["lung neuroendocrine cell", "nan"], - ["monocyte progenitor", "nan"], - ["myeloid dendritic cell", "nan"], - ["neutrophil", "nan"], - ["NK cell", "CL_0000623"], - ["non-classical monocyte", "nan"], - ["nuocyte", "nan"], - ["pericyte cell", "nan"], - ["plasma cell", "nan"], - ["plasmacytoid dendritic cell", "nan"], - ["proliferative cell", "nan"], - ["pulmonary interstitial fibroblast", "nan"], - ["regulatory T cell", "nan"], - ["respiratory basal cell", "nan"], - ["smooth muscle cell of the pulmonary artery", "nan"], - ["type I pneumocyte", "nan"], - ["type II pneumocyte", "nan"], - ["vein endothelial cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_LUNG_V0 = { - "names": { - "dendritic cell": [ - "myeloid dendritic cell", "plasmacytoid dendritic cell" - ], - "endothelial cell": [ - "endothelial cell of lymphatic vessel", "vein endothelial cell" - ], - "leukocyte": [ - "alveolar macrophage", "B cell", "basophil", "bronchial smooth muscle cell", - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "classical monocyte", - "eosinophil", "glial cell", "intermediate monocyte", "lung macrophage", "monocyte progenitor", - "myeloid dendritic cell", "neutrophil", "NK cell", "non-classical monocyte", "plasma cell", - "plasmacytoid dendritic cell", "regulatory T cell" - ], - "lymphocyte": [ - "B cell", "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", - "plasmacytoid dendritic cell", "NK cell", "regulatory T cell" - ], - "mature NK T cell": [ - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "NK cell", "regulatory T cell" - ], - "T cell": [ - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", "regulatory T cell" - ], - "stromal cell": [ - "fibroblast of lung", "pericyte cell" - ] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseLung(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_LUNG_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_LUNG_V0 - } - super(CelltypeVersionsMouseLung, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/malegonad.py b/sfaira/versions/celltype_versions/mouse/malegonad.py deleted file mode 100644 index f311320a7..000000000 --- a/sfaira/versions/celltype_versions/mouse/malegonad.py +++ /dev/null @@ -1,33 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_MALEGONAD_V0 = [ - ["macrophage", "CL:0000235"], - ["leydig cell", "nan"], - ["elongating spermatid", "nan"], - ["erythroblast", "nan"], - ["pre-sertoli cell", "nan"], - ["sertoli cell", "nan"], - ["preleptotene spermatogonia", "nan"], - ["spermatogonia", "nan"], - ["spermatocyte", "nan"], - ["spermatid", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_MALEGONAD_V0 = { - "names": { - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseMalegonad(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_MALEGONAD_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_MALEGONAD_V0 - } - super(CelltypeVersionsMouseMalegonad, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/mammarygland.py b/sfaira/versions/celltype_versions/mouse/mammarygland.py deleted file mode 100644 index cbd1ad90c..000000000 --- a/sfaira/versions/celltype_versions/mouse/mammarygland.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_MAMMARYGLAND_V0 = [ - ["B cell", "CL:0000236"], - ["basal cell", "CL:0000646"], - ["endothelial cell", "CL:0000115"], - ["luminal epithelial cell of mammary gland", "CL:0002326"], - ["luminal progenitor cell", "CL:0002326"], - ["macrophage", "CL:0000235"], - ["stromal cell", "CL:0000499"], - ["T cell", "CL:0000084"], - ["dendritic cell", "nan"], - ["proliferative cell", "nan"], - ["NK cell", "CL:0000623"], - ["stem and progenitor cell", "CL:0000623"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_MAMMARYGLAND_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseMammarygland(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_MAMMARYGLAND_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_MAMMARYGLAND_V0 - } - super(CelltypeVersionsMouseMammarygland, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/muscle.py b/sfaira/versions/celltype_versions/mouse/muscle.py deleted file mode 100644 index b2ffdc66d..000000000 --- a/sfaira/versions/celltype_versions/mouse/muscle.py +++ /dev/null @@ -1,38 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_MUSCLE_V0 = [ - ["B cell", "CL:0000236"], - ["dendritic cell", "nan"], - ["endothelial cell", "CL:0000115"], - ["erythroblast", "nan"], - ["macrophage", "CL:0000235"], - ["mesenchymal stem cell", "CL:0000134"], - ["monocyte progenitor", "nan"], - ["muscle cell", "nan"], - ["neutrophil", "nan"], - ["Schwann cell", "CL:0002573"], - ["smooth muscle cell", "CL:0000192"], - ["stromal cell", "nan"], - ["skeletal muscle cell", "CL:0000192"], - ["skeletal muscle satellite cell", "CL:0000594"], - ["T cell", "CL:0000084"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_MUSCLE_V0 = { - "names": { - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseMuscle(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_MUSCLE_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_MUSCLE_V0 - } - super(CelltypeVersionsMouseMuscle, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/pancreas.py b/sfaira/versions/celltype_versions/mouse/pancreas.py deleted file mode 100644 index b367cfacd..000000000 --- a/sfaira/versions/celltype_versions/mouse/pancreas.py +++ /dev/null @@ -1,53 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_PANCREAS_V0 = [ - ["b cell", "nan"], - ["dendritic cell", "nan"], - ["endothelial cell", "CL:0000115"], - ["erythroblast", "nan"], - ["glial cell", "nan"], - ["granulocyte", "nan"], - ["macrophage", "nan"], - ["pancreatic A cell", "CL:0000171"], - ["pancreatic acinar cell", "CL:0002064"], - ["pancreatic B cell", "CL:0000169"], - ["pancreatic D cell", "CL:0000173"], - ["pancreatic ductal cell", "CL:0002079"], - ["pancreatic PP cell", "CL:0002275"], - ["pancreatic stellate cell", "CL:0002410"], - ["smooth muscle cell", "nan"], - ["stromal cell", "nan"], - ["t cell", "nan"], - ["lymphatic endothelial cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_PANCREAS_V0 = { - "names": { - "leukocyte": [ - "b cell", - "dendritic cell", - "granulocyte", - "macrophage", - "t cell" - ], - "endocrine cell": [ - "pancreatic A cell", - "pancreatic D cell", - "pancreatic PP cell" - ], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMousePancreas(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_PANCREAS_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_PANCREAS_V0 - } - super(CelltypeVersionsMousePancreas, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/placenta.py b/sfaira/versions/celltype_versions/mouse/placenta.py deleted file mode 100644 index 104aafa28..000000000 --- a/sfaira/versions/celltype_versions/mouse/placenta.py +++ /dev/null @@ -1,44 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_PLACENTA_V0 = [ - ["B cell", "CL:0000236"], - ["endothelial cell", "CL:0000115"], - ["macrophage", "CL:0000235"], - ["stromal cell", "CL:0000499"], - ["erythroblast", "nan"], - ["granulocyte", "nan"], - ["basophil", "nan"], - ["decidual stromal cell", "nan"], - ["dendritic cell", "nan"], - ["endodermal cell", "nan"], - ["monocyte progenitor", "nan"], - ["HSPC", "nan"], - ["megakaryocte", "nan"], - ["monocyte", "nan"], - ["NK cell", "nan"], - ["NKT cell", "nan"], - ["PE lineage cell", "nan"], - ["trophoblast progenitor", "nan"], - ["labyrinthine trophoblast", "nan"], - ["spiral artery trophoblast giant cells", "nan"], - ["invasive spongiotrophoblast", "nan"], - ["spongiotrophoblast", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_PLACENTA_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMousePlacenta(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_PLACENTA_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_PLACENTA_V0 - } - super(CelltypeVersionsMousePlacenta, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/prostate.py b/sfaira/versions/celltype_versions/mouse/prostate.py deleted file mode 100644 index c3d476d86..000000000 --- a/sfaira/versions/celltype_versions/mouse/prostate.py +++ /dev/null @@ -1,28 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_PROSTATE_V0 = [ - ["epithelial cell", "nan"], - ["glandular epithelial cell", "nan"], - ["T cell", "CL:0000084"], - ["glandular cell", "nan"], - ["stromal cell", "nan"], - ["dendritic cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_PROSTATE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseProstate(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_PROSTATE_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_PROSTATE_V0 - } - super(CelltypeVersionsMouseProstate, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/rib.py b/sfaira/versions/celltype_versions/mouse/rib.py deleted file mode 100644 index fe0437529..000000000 --- a/sfaira/versions/celltype_versions/mouse/rib.py +++ /dev/null @@ -1,36 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_RIB_V0 = [ - ["B cell", "CL:0000236"], - ["endothelial cell", "CL:0000115"], - ["macrophage", "CL:0000235"], - ["stromal cell", "CL:0000499"], - ["proliferative cell", "nan"], - ["cartilage cell", "nan"], - ["erythroblast", "nan"], - ["granulocyte", "nan"], - ["muscle cell", "nan"], - ["neuron", "nan"], - ["neutrophil", "nan"], - ["osteoblast", "nan"], - ["osteoclast", "nan"], - ["oligodendrocyte", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_RIB_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseRib(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_RIB_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_RIB_V0 - } - super(CelltypeVersionsMouseRib, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/skin.py b/sfaira/versions/celltype_versions/mouse/skin.py deleted file mode 100644 index fbfe183a4..000000000 --- a/sfaira/versions/celltype_versions/mouse/skin.py +++ /dev/null @@ -1,30 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_SKIN_V0 = [ - ["basal cell of epidermis", "CL:0002187"], - ["bulge keratinocyte", "nan"], - ["epidermal cell", "CL:0000362"], - ["fibroblast", "nan"], - ["keratinocyte stem cell", "CL:0002337"], - ["macrophage", "nan"], - ["stem cell of epidermis", "CL:1000428"], - ["T cell", "CL:0000084"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_SKIN_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseSkin(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_SKIN_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_SKIN_V0 - } - super(CelltypeVersionsMouseSkin, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/spleen.py b/sfaira/versions/celltype_versions/mouse/spleen.py deleted file mode 100644 index 67a2e21df..000000000 --- a/sfaira/versions/celltype_versions/mouse/spleen.py +++ /dev/null @@ -1,43 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_SPLEEN_V0 = [ - ["B cell", "CL:0000236"], - ["CD4-positive, alpha-beta T cell", "nan"], - ["CD8-positive, alpha-beta T cell", "nan"], - ["dendritic cell", "nan"], - ["erythroblast", "CL:0000765"], - ["granulocyte", "CL:0000094"], - ["immature NKT cell", "nan"], - ["macrophage", "nan"], - ["macrophage dendritic cell progenitor", "CL:0002009"], - ["mature NK T cell", "nan"], - ["megakaryocyte-erythroid progenitor cell", "nan"], - ["monocyte", "nan"], - ["neutrophil", "nan"], - ["NK cell", "CL:0000623"], - ["plasma cell", "nan"], - ["proerythroblast", "CL:0000547"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_SPLEEN_V0 = { - "names": { - "T cell": [ - "CD4-positive, alpha-beta T cell", "CD8-positive, alpha-beta T cell", - "immature NKT cell", "mature NK T cell" - ] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseSpleen(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_SPLEEN_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_SPLEEN_V0 - } - super(CelltypeVersionsMouseSpleen, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/stomach.py b/sfaira/versions/celltype_versions/mouse/stomach.py deleted file mode 100644 index 1feb65a94..000000000 --- a/sfaira/versions/celltype_versions/mouse/stomach.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_STOMACH_V0 = [ - ["antral mucous cell", "nan"], - ["dendritic cell", "nan"], - ["G cell", "nan"], - ["gastric mucosal cell", "nan"], - ["epithelial cell", "nan"], - ["muscle cell", "nan"], - ["macrophage", "CL:0000235"], - ["parietal cell", "nan"], - ["pit cell", "nan"], - ["proliferative cell", "nan"], - ["stomach cell", "nan"], - ["tuft cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_STOMACH_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseStomach(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_STOMACH_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_STOMACH_V0 - } - super(CelltypeVersionsMouseStomach, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/thymus.py b/sfaira/versions/celltype_versions/mouse/thymus.py deleted file mode 100644 index c9e6c7d36..000000000 --- a/sfaira/versions/celltype_versions/mouse/thymus.py +++ /dev/null @@ -1,46 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_THYMUS_V0 = [ - ["abT cell", "nan"], - ["B cell", "nan"], - ["dendritic cell", "nan"], - ["DN1 thymocyte", "nan"], - ["DN2 thymocyte", "nan"], - ["DN3 thymocyte", "nan"], - ["DN4 thymocyte", "nan"], - ["double positive T cell", "nan"], - ["endothelial cell", "CL:0000115"], - ["epithelial cell of thymus", "CL:0002293"], - ["fibroblast", "nan"], - ["gdT cell", "nan"], - ["macrophage", "nan"], - ["professional antigen presenting cell", "CL:0000145"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_THYMUS_V0 = { - "names": { - 'double negative T cell': ["DN1 thymocyte", "DN2 thymocyte", "DN3 thymocyte", "DN4 thymocyte"], - "immature T cell": [ - "DN1 thymocyte", "DN2 thymocyte", "DN3 thymocyte", "DN4 thymocyte", "double positive T cell" - ], - "mature T cell": ["abT cell", "gdT cell"], - 'thymocyte': [ - "DN1 thymocyte", "DN2 thymocyte", "DN3 thymocyte", "DN4 thymocyte", "double positive T cell", - "gdT cell", "abT cell" - ], - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseThymus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_THYMUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_THYMUS_V0 - } - super(CelltypeVersionsMouseThymus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/tongue.py b/sfaira/versions/celltype_versions/mouse/tongue.py deleted file mode 100644 index 080a8dc7e..000000000 --- a/sfaira/versions/celltype_versions/mouse/tongue.py +++ /dev/null @@ -1,25 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_TONGUE_V0 = [ - ["basal cell of epidermis", "CL:0002187"], - ["keratinocyte", "CL:0000312"], - ["Langerhans cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_TONGUE_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseTongue(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_TONGUE_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_TONGUE_V0 - } - super(CelltypeVersionsMouseTongue, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/trachea.py b/sfaira/versions/celltype_versions/mouse/trachea.py deleted file mode 100644 index 57ad18f33..000000000 --- a/sfaira/versions/celltype_versions/mouse/trachea.py +++ /dev/null @@ -1,39 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_TRACHEA_V0 = [ - ["basal epithelial cell of tracheobronchial tree", "CL:0002329"], - ["chondrocyte", "CL:0000138"], - ["ciliated columnar cell of tracheobronchial tree", "CL:0002145"], - ["endothelial cell", "CL:000115"], - ["epithelial cell", "CL:000115"], - ["fibroblast", "CL:0000057"], - ["granulocyte", "CL:0000094"], - ["keratinocyte", "nan"], - ["macrophage", "CL:0000235"], - ["mesenchymal cell", "nan"], - ["mesenchymal progenitor cell", "nan"], - ["mucus secreting cell", "CL:0000319"], - ["neuroendocrine cell", "nan"], - ["smooth muscle cell of trachea", "CL:0002600"], - ["T cell", "CL:0000084"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_TRACHEA_V0 = { - "names": { - 'blood cell': ["granulocyte", "macrophage", "T cell"] - }, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseTrachea(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_TRACHEA_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_TRACHEA_V0 - } - super(CelltypeVersionsMouseTrachea, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/mouse/uterus.py b/sfaira/versions/celltype_versions/mouse/uterus.py deleted file mode 100644 index 0d207613a..000000000 --- a/sfaira/versions/celltype_versions/mouse/uterus.py +++ /dev/null @@ -1,34 +0,0 @@ -from .external import CelltypeVersionsBase - -# Version 0 -CELLTYPES_MOUSE_UTERUS_V0 = [ - ["B cell", "CL:0000236"], - ["dendritic cell", "nan"], - ["granulocyte", "nan"], - ["macrophage", "CL:0000235"], - ["NK cell", "CL:0000623"], - ["stromal cell", "nan"], - ["endothelial cell", "nan"], - ["glandular epithelial cell", "nan"], - ["keratinocyte", "nan"], - ["monocyte", "nan"], - ["muscle cell", "nan"], - ["smooth muscle cell", "nan"], - ["unknown", "nan"] -] -ONTOLOGIES_MOUSE_UTERUS_V0 = { - "names": {}, - "ontology_ids": {}, -} - - -class CelltypeVersionsMouseUterus(CelltypeVersionsBase): - - def __init__(self, **kwargs): - self.celltype_universe = { - "0": CELLTYPES_MOUSE_UTERUS_V0 - } - self.ontology = { - "0": ONTOLOGIES_MOUSE_UTERUS_V0 - } - super(CelltypeVersionsMouseUterus, self).__init__(**kwargs) diff --git a/sfaira/versions/celltype_versions/target_universes/__init__.py b/sfaira/versions/celltype_versions/target_universes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/versions/celltype_versions/target_universes/human/__init__.py b/sfaira/versions/celltype_versions/target_universes/human/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/versions/celltype_versions/target_universes/human/adipose.csv b/sfaira/versions/celltype_versions/target_universes/human/adipose.csv new file mode 100644 index 000000000..7ca47fcb6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/adipose.csv @@ -0,0 +1,15 @@ +"name","id" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Fibroblast', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv b/sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv new file mode 100644 index 000000000..02c5b83a2 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv @@ -0,0 +1,47 @@ +"name","id" +'Adrenal gland inflammatory cell', "nan" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal Neuron', "nan" +'Fetal acinar cell', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Goblet cell', "nan" +'Hepatocyte/Endodermal cell', "nan" +'Immature sertoli cell (Pre-Sertoli cell)', "nan" +'Kidney intercalated cell', "nan" +'Loop of Henle', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proximal tubule progenitor', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Ureteric bud cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/artery.csv b/sfaira/versions/celltype_versions/target_universes/human/artery.csv new file mode 100644 index 000000000..21712b7a5 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/artery.csv @@ -0,0 +1,20 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Fibroblast', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mesothelial cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Neutrophil', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/bladder.csv b/sfaira/versions/celltype_versions/target_universes/human/bladder.csv new file mode 100644 index 000000000..d9222e44d --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/bladder.csv @@ -0,0 +1,25 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Epithelial cell (intermediated)', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal fibroblast', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Intermediated cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/blood.csv b/sfaira/versions/celltype_versions/target_universes/human/blood.csv new file mode 100644 index 000000000..f0a8cec41 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/blood.csv @@ -0,0 +1,21 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal chondrocyte', "nan" +'Fetal epithelial progenitor', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Sinusoidal endothelial cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/bone.csv b/sfaira/versions/celltype_versions/target_universes/human/bone.csv new file mode 100644 index 000000000..2ff5712b6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/bone.csv @@ -0,0 +1,15 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/brain.csv b/sfaira/versions/celltype_versions/target_universes/human/brain.csv new file mode 100644 index 000000000..ee05bf7f1 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/brain.csv @@ -0,0 +1,40 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'Astrocytes 1', "nan" +'Astrocytes 2', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cells', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'GABAergic interneurons 1', "nan" +'GABAergic interneurons 2', "nan" +'Gastric endocrine cell', "nan" +'Glutamatergic neurons from the PFC 1', "nan" +'Glutamatergic neurons from the PFC 2', "nan" +'Goblet cell', "nan" +'Granule neurons from the hip dentate gyrus region', "nan" +'Macrophage', "nan" +'Microglia', "nan" +'Monocyte', "nan" +'Neuronal stem cells', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Oligodendrocyte precursors', "nan" +'Oligodendrocytes', "nan" +'Primordial germ cell', "nan" +'Pyramidal neurons from the hip CA region 1', "nan" +'Pyramidal neurons from the hip CA region 2', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Unknown', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/calvaria.csv b/sfaira/versions/celltype_versions/target_universes/human/calvaria.csv new file mode 100644 index 000000000..f8010b36b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/calvaria.csv @@ -0,0 +1,23 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal chondrocyte', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Kidney intercalated cell', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/cervix.csv b/sfaira/versions/celltype_versions/target_universes/human/cervix.csv new file mode 100644 index 000000000..4a54d94b6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/cervix.csv @@ -0,0 +1,15 @@ +"name","id" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Fetal epithelial progenitor', "nan" +'Fibroblast', "nan" +'Loop of Henle', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv b/sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv new file mode 100644 index 000000000..4282e43e4 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv @@ -0,0 +1,24 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Loop of Henle', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/colon.csv b/sfaira/versions/celltype_versions/target_universes/human/colon.csv new file mode 100644 index 000000000..ecca3f454 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/colon.csv @@ -0,0 +1,68 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell IgA Plasma', "nan" +'B cell IgG Plasma', "nan" +'B cell cycling', "nan" +'B cell memory', "nan" +'Best4+ Enterocytes', "nan" +'CD4+ Memory', "nan" +'CD4+ PD1+', "nan" +'CD4+ T Activated Fos-hi', "nan" +'CD4+ T Activated Fos-lo', "nan" +'CD69+ Mast', "nan" +'CD69- Mast', "nan" +'CD8 T', "nan" +'CD8+ IELs', "nan" +'CD8+ IL17+', "nan" +'CD8+ LP', "nan" +'Cycling T', "nan" +'Cycling TA', "nan" +'DC1', "nan" +'DC2', "nan" +'Endothelial', "nan" +'Enterocyte Progenitors', "nan" +'Enterocytes', "nan" +'Enteroendocrine cells', "nan" +'Erythroid cell', "nan" +'Fetal Neuron', "nan" +'Fetal enterocyte ', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fibroblast', "nan" +'Follicular', "nan" +'Glial cells', "nan" +'Goblet cells', "nan" +'ILC', "nan" +'Immature Enterocytes 1', "nan" +'Immature Enterocytes 2', "nan" +'Immature Goblet', "nan" +'LYVE1 Macrophage', "nan" +'Lymphoid DC', "nan" +'M cells', "nan" +'MT-hi', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Myofibroblasts', "nan" +'NK', "nan" +'Neutrophil', "nan" +'Paneth cells', "nan" +'Pericytes', "nan" +'Primordial germ cell', "nan" +'Secretory TA', "nan" +'Smooth Muscle', "nan" +'Stem cells', "nan" +'Stromal', "nan" +'TA 1', "nan" +'TA 2', "nan" +'Tcm', "nan" +'Tfh', "nan" +'Th1', "nan" +'Th17', "nan" +'Treg', "nan" +'Tregs', "nan" +'Tuft', "nan" +'WNT2B+ Fos-lo 1', "nan" +'WNT5B+ 2', "nan" +'cycling DCs', "nan" +'cycling gd T', "nan" +'gd T', "nan" +'pDC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/duodenum.csv b/sfaira/versions/celltype_versions/target_universes/human/duodenum.csv new file mode 100644 index 000000000..e9d1722b1 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/duodenum.csv @@ -0,0 +1,20 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fibroblast', "nan" +'Goblet cell', "nan" +'Hepatocyte/Endodermal cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv b/sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv new file mode 100644 index 000000000..d605f47e6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv @@ -0,0 +1,15 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell (APC)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Epithelial cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/esophagus.csv b/sfaira/versions/celltype_versions/target_universes/human/esophagus.csv new file mode 100644 index 000000000..0b6a75914 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/esophagus.csv @@ -0,0 +1,35 @@ +"name","id" +'B cell (Plasmocyte)', "nan" +'B_CD27neg', "nan" +'B_CD27pos', "nan" +'Basal cell', "nan" +'Blood_vessel', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epi_dividing', "nan" +'Epi_suprabasal', "nan" +'Epi_upper', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Glands_duct', "nan" +'Glands_mucous', "nan" +'Loop of Henle', "nan" +'Lymph_vessel', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'NK_T_CD8_Cytotoxic', "nan" +'Neutrophil', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T_CD4', "nan" +'T_CD8', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/eye.csv b/sfaira/versions/celltype_versions/target_universes/human/eye.csv new file mode 100644 index 000000000..0f6b31741 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/eye.csv @@ -0,0 +1,46 @@ +"name","id" +'Amacrine cell', "nan" +'Antigen presenting cell (RPS high)', "nan" +'B-cell', "nan" +'Basal cell', "nan" +'CB CD34_pos', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Epithelial cell (intermediated)', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal endocrine cell', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Horizontal cells', "nan" +'Macroglia', "nan" +'Macrophage', "nan" +'Mast-cell', "nan" +'Melanocyte', "nan" +'Microglia', "nan" +'Muller cell', "nan" +'Pericyte', "nan" +'Primordial germ cell', "nan" +'Retinal bipolar neuron type A', "nan" +'Retinal bipolar neuron type B', "nan" +'Retinal bipolar neuron type C', "nan" +'Retinal bipolar neuron type D', "nan" +'Retinal cone cell', "nan" +'Retinal ganglion cell', "nan" +'Retinal pigment epithelium', "nan" +'Retinal rod cell type A', "nan" +'Retinal rod cell type B', "nan" +'Retinal rod cell type C', "nan" +'Schwann1', "nan" +'Schwann2', "nan" +'Stratified epithelial cell', "nan" +'T cell', "nan" +'T/NK-cell', "nan" +'Unknown', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv b/sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv new file mode 100644 index 000000000..2cd434786 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv @@ -0,0 +1,19 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fibroblast', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv b/sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv new file mode 100644 index 000000000..705d508fa --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv @@ -0,0 +1,29 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal Neuron', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Immature sertoli cell (Pre-Sertoli cell)', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv b/sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv new file mode 100644 index 000000000..938457fa1 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv @@ -0,0 +1,25 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Goblet cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Neutrophil', "nan" +'Primordial germ cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/heart.csv b/sfaira/versions/celltype_versions/target_universes/human/heart.csv new file mode 100644 index 000000000..8a34926bd --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/heart.csv @@ -0,0 +1,35 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal Neuron', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Ventricle cardiomyocyte', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/hesc.csv b/sfaira/versions/celltype_versions/target_universes/human/hesc.csv new file mode 100644 index 000000000..fe1319815 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/hesc.csv @@ -0,0 +1,6 @@ +"name","id" +'Fetal epithelial progenitor', "nan" +'Fetal neuron', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/ileum.csv b/sfaira/versions/celltype_versions/target_universes/human/ileum.csv new file mode 100644 index 000000000..36407748a --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/ileum.csv @@ -0,0 +1,33 @@ +"name","id" +'ACKR1+ endothelium', "nan" +'B cells', "nan" +'CD36+ endothelium', "nan" +'Cycling', "nan" +'Dendritic cell', "nan" +'Enterocytes', "nan" +'Enteroendocrine cells', "nan" +'Fetal enterocyte ', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal stromal cell', "nan" +'Fibroblasts', "nan" +'Glial cells', "nan" +'Goblet cells', "nan" +'Hepatocyte/Endodermal cell', "nan" +'ILC', "nan" +'Lymphatics', "nan" +'M2 Macrophage', "nan" +'MNP', "nan" +'Macrophage', "nan" +'Mast cells', "nan" +'Monocyte', "nan" +'Neutrophil (RPS high)', "nan" +'Paneth cells', "nan" +'Pericytes', "nan" +'Plasma Cells', "nan" +'Progenitors', "nan" +'Smooth muscle cell', "nan" +'Stem Cell', "nan" +'Stromal cell', "nan" +'T cells', "nan" +'TA', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/jejunum.csv b/sfaira/versions/celltype_versions/target_universes/human/jejunum.csv new file mode 100644 index 000000000..c68fdc8bd --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/jejunum.csv @@ -0,0 +1,18 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fibroblast', "nan" +'Hepatocyte/Endodermal cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/kidney.csv b/sfaira/versions/celltype_versions/target_universes/human/kidney.csv new file mode 100644 index 000000000..4c0025670 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/kidney.csv @@ -0,0 +1,107 @@ +"name","id" +'Acinar cell', "nan" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'CD4 T cell', "nan" +'CD8 T cell', "nan" +'CNT/PC - proximal UB', "nan" +'Cap mesenchyme', "nan" +'Chondrocyte', "nan" +'Collecting Duct - Intercalated Cells Type A (cortex)', "nan" +'Collecting Duct - Intercalated Cells Type A (medulla)', "nan" +'Collecting Duct - Intercalated Cells Type B', "nan" +'Collecting Duct - PCs - Stressed Dissoc Subset', "nan" +'Collecting Duct - Principal Cells (cortex)', "nan" +'Collecting Duct - Principal Cells (medulla)', "nan" +'Connecting tubule', "nan" +'Decending Limb', "nan" +'Distal Convoluted Tubule', "nan" +'Distal S shaped body', "nan" +'Distal renal vesicle', "nan" +'Distinct proximal tubule 1', "nan" +'Distinct proximal tubule 2', "nan" +'Endocrine cell', "nan" +'Endothelial Cells (unassigned)', "nan" +'Endothelial Cells - AEA & DVR', "nan" +'Endothelial Cells - AVR', "nan" +'Endothelial Cells - glomerular capillaries', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte ', "nan" +'Enterocyte progenitor', "nan" +'Epithelial Cells (unassigned)', "nan" +'Epithelial progenitor', "nan" +'Erythroid', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Indistinct intercalated cell', "nan" +'Innate like lymphocyte', "nan" +'Intermediated cell', "nan" +'Interstitium', "nan" +'Loop of Henle', "nan" +'M2 Macrophage', "nan" +'MNP-a/classical monocyte derived', "nan" +'MNP-b/non-classical monocyte derived', "nan" +'MNP-c/dendritic cell', "nan" +'MNP-d/Tissue macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mast cells', "nan" +'Medial S shaped body', "nan" +'Megakaryocyte', "nan" +'Mesangial Cells', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Myofibroblast', "nan" +'NK cell', "nan" +'NKT cell', "nan" +'Neuron', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Pelvic epithelium', "nan" +'Pelvic epithelium - distal UB', "nan" +'Peritubular capillary endothelium 1', "nan" +'Peritubular capillary endothelium 2', "nan" +'Plasmacytoid dendritic cell', "nan" +'Podocyte', "nan" +'Primordial germ cell', "nan" +'Proliferating B cell', "nan" +'Proliferating NK cell', "nan" +'Proliferating Proximal Tubule', "nan" +'Proliferating T cell', "nan" +'Proliferating cDC2', "nan" +'Proliferating cap mesenchyme', "nan" +'Proliferating distal renal vesicle', "nan" +'Proliferating fibroblast', "nan" +'Proliferating macrophage', "nan" +'Proliferating monocyte', "nan" +'Proliferating myofibroblast', "nan" +'Proliferating stroma progenitor', "nan" +'Proximal S shaped body', "nan" +'Proximal Tubule Epithelial Cells (S1)', "nan" +'Proximal Tubule Epithelial Cells (S2)', "nan" +'Proximal Tubule Epithelial Cells (S3)', "nan" +'Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)', "nan" +'Proximal Tubule Epithelial Cells - Stress/Inflam', "nan" +'Proximal UB', "nan" +'Proximal renal vesicle', "nan" +'Proximal tubule progenitor', "nan" +'Sinusoidal endothelial cell', "nan" +'Skeletal muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stroma progenitor', "nan" +'Stromal cell', "nan" +'Thick ascending limb of Loop of Henle', "nan" +'Thin ascending limb', "nan" +'Transitional urothelium', "nan" +'Unknown - Novel PT CFH+ Subpopulation (S2)', "nan" +'Vascular Smooth Muscle Cells and pericytes', "nan" +'cDC1', "nan" +'cDC2', "nan" +'hESC', "nan" +'pDC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/liver.csv b/sfaira/versions/celltype_versions/target_universes/human/liver.csv new file mode 100644 index 000000000..2fdf95f7d --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/liver.csv @@ -0,0 +1,67 @@ +"name","id" +'Alpha beta T cells', "nan" +'Antigen presenting cell (RPS high)', "nan" +'CB CD34+', "nan" +'Central venous LSECs', "nan" +'Cholangiocytes', "nan" +'Dendritic cell 1', "nan" +'Dendritic cell 2', "nan" +'Dendritic cell precursor', "nan" +'Early Erythroid', "nan" +'Early lymphoid T lymphocyte', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte ', "nan" +'Enterocyte progenitor', "nan" +'Epithelial progenitor', "nan" +'Fibroblast', "nan" +'Gamma delta T cells 1', "nan" +'Gamma delta T cells 2', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'HSC MPP', "nan" +'Hepatic stellate cells', "nan" +'Hepatocyte 1', "nan" +'Hepatocyte 2', "nan" +'Hepatocyte 3', "nan" +'Hepatocyte 4', "nan" +'Hepatocyte 5', "nan" +'Hepatocyte 6', "nan" +'ILC', "nan" +'ILC precursor', "nan" +'Inflammatory macrophages', "nan" +'Kupffer Cell', "nan" +'Late Erythroid', "nan" +'Liver sinusoidal endothelial cells', "nan" +'MEMP', "nan" +'MP', "nan" +'Macrovascular endothelial cells', "nan" +'Mast cell', "nan" +'Mature B cells', "nan" +'Megakaryocyte', "nan" +'Mesenchyme', "nan" +'Mesothelia', "nan" +'Mid Erythroid', "nan" +'Mono Macrophage', "nan" +'Monocyte', "nan" +'Monocyte precursor', "nan" +'Myeloid cell', "nan" +'NK cell', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Neutrophil myeloid progenitor', "nan" +'Non inflammatory macrophages', "nan" +'Other endothelial cells', "nan" +'Pancreas exocrine cell', "nan" +'Periportal LSECs', "nan" +'Plasma B cell', "nan" +'Plasma cells', "nan" +'Pre pro B cell', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Smooth muscle cell', "nan" +'Unknown', "nan" +'VCAM1pos EI macrophage', "nan" +'pDendritic cell precursor', "nan" +'pre B cell', "nan" +'pro B cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/lung.csv b/sfaira/versions/celltype_versions/target_universes/human/lung.csv new file mode 100644 index 000000000..79840aeaf --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/lung.csv @@ -0,0 +1,39 @@ +"name","id" +'Cycling cells', "nan" +'Cartilage', "nan" +'Fetal airway progenitors', "nan" +'Mesothelium', "nan" +'AT1', "nan" +'AT2', "nan" +'Acinar', "nan" +'Airway smooth muscle', "nan" +'Arterial', "nan" +'B cell lineage', "nan" +'Basal', "nan" +'Bronchial Vessel 1', "nan" +'Bronchial Vessel 2', "nan" +'Capillary', "nan" +'Capillary Intermediate 1', "nan" +'Capillary Intermediate 2', "nan" +'Dendritic cells', "nan" +'Erythrocytes', "nan" +'Fibroblasts', "nan" +'Fibromyocyte', "nan" +'Innate lymphoid cells', "nan" +'KRT5-/KRT17+', "nan" +'Lymphatic EC', "nan" +'Macrophages', "nan" +'Mast cells', "nan" +'Megakaryocytes', "nan" +'Monocytes', "nan" +'Multiciliated lineage', "nan" +'Myofibroblasts', "nan" +'Neutrophilic', "nan" +'Proliferating Epithelial Cells', "nan" +'Rare', "nan" +'Secretory', "nan" +'Submucosal Secretory', "nan" +'T cell lineage', "nan" +'Venous', "nan" +'Venous smooth muscle', "nan" +'unknown', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/malegonad.csv b/sfaira/versions/celltype_versions/target_universes/human/malegonad.csv new file mode 100644 index 000000000..90983cb5b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/malegonad.csv @@ -0,0 +1,38 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Differentiating Spermatogonia', "nan" +'Early Primary Spermatocytes', "nan" +'Elongated Spermatids', "nan" +'Endothelial cells', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal acinar cell', "nan" +'Fetal chondrocyte', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Late primary Spermatocytes', "nan" +'Leydig cells', "nan" +'Loop of Henle', "nan" +'Macrophages', "nan" +'Monocyte', "nan" +'Myoid cells', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proximal tubule progenitor', "nan" +'Round Spermatids', "nan" +'Sertoli cells', "nan" +'Smooth muscle cell', "nan" +'Sperm', "nan" +'Spermatogonial Stem cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Ureteric bud cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/muscle.csv b/sfaira/versions/celltype_versions/target_universes/human/muscle.csv new file mode 100644 index 000000000..edf9169fd --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/muscle.csv @@ -0,0 +1,28 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal Neuron', "nan" +'Fetal chondrocyte', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Ventricle cardiomyocyte', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/omentum.csv b/sfaira/versions/celltype_versions/target_universes/human/omentum.csv new file mode 100644 index 000000000..bdf05d5b6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/omentum.csv @@ -0,0 +1,21 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fibroblast', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mesothelial cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/pancreas.csv b/sfaira/versions/celltype_versions/target_universes/human/pancreas.csv new file mode 100644 index 000000000..38f08403b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/pancreas.csv @@ -0,0 +1,45 @@ +"name","id" +'Acinar cell', "nan" +'Activated Stellate cell', "nan" +'Alpha cell', "nan" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'Beta cell', "nan" +'CB CD34+', "nan" +'Co-expression cell', "nan" +'Delta cell', "nan" +'Dendritic cell', "nan" +'Ductal cell', "nan" +'Endothelial cell', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Epithelial progenitor', "nan" +'Epsilon cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fibroblast', "nan" +'Gamma cell', "nan" +'Gastric endocrine cell', "nan" +'Immature sertoli cell (Pre-Sertoli cell)', "nan" +'MHC class II cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mesenchymal Cell', "nan" +'Monocyte', "nan" +'Neuron', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'PSC cell', "nan" +'Pancreas exocrine cell', "nan" +'Primordial germ cell', "nan" +'Proximal tubule progenitor', "nan" +'Quiescent Stellate cell', "nan" +'Schwann cell', "nan" +'Skeletal muscle cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Unclassified endocrine cell', "nan" +'Unknown', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/placenta.csv b/sfaira/versions/celltype_versions/target_universes/human/placenta.csv new file mode 100644 index 000000000..df7b729ea --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/placenta.csv @@ -0,0 +1,55 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'CB CD34+', "nan" +'Decidual Macrophages 1', "nan" +'Decidual Macrophages 2', "nan" +'Decidual Macrophages 3', "nan" +'Decidual NK Cells 1', "nan" +'Decidual NK Cells 2', "nan" +'Decidual NK Cells 3', "nan" +'Decidual NK Cells p', "nan" +'Decidual Stromal Cells 1', "nan" +'Decidual Stromal Cells 2', "nan" +'Decidual Stromal Cells 3', "nan" +'Dendritic Cells 1', "nan" +'Dendritic Cells 2', "nan" +'Endothelial Cells L', "nan" +'Endothelial Cells f', "nan" +'Endothelial Cells m', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial Glandular Cells 1', "nan" +'Epithelial Glandular Cells 2', "nan" +'Epithelial cell (intermediated)', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Extravillous Trophoblasts', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fibroblasts 1', "nan" +'Fibroblasts 2', "nan" +'Granulocytes', "nan" +'Hofbauer Cells', "nan" +'ILC3', "nan" +'Intermediated cell', "nan" +'M2 Macrophage', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'NK Cells CD16+', "nan" +'NK Cells CD16-', "nan" +'Neutrophil', "nan" +'Perivascular Cells 1', "nan" +'Perivascular Cells 2', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'Syncytiotrophoblasts', "nan" +'T cell', "nan" +'Villous Cytotrophoblasts', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/pleura.csv b/sfaira/versions/celltype_versions/target_universes/human/pleura.csv new file mode 100644 index 000000000..f6e172e90 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/pleura.csv @@ -0,0 +1,27 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mesothelial cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/prostate.csv b/sfaira/versions/celltype_versions/target_universes/human/prostate.csv new file mode 100644 index 000000000..931fbb479 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/prostate.csv @@ -0,0 +1,24 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'Basal cell', "nan" +'Club', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Enterocyte progenitor', "nan" +'Epithelial cell (intermediated)', "nan" +'Fasciculata cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Hillock', "nan" +'Leukocytes', "nan" +'Luminal', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/rectum.csv b/sfaira/versions/celltype_versions/target_universes/human/rectum.csv new file mode 100644 index 000000000..b81da10b8 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/rectum.csv @@ -0,0 +1,20 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Dendritic cell', "nan" +'Endothelial cell (APC)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Enteroendocrine', "nan" +'Erythroid cell', "nan" +'Fetal stromal cell', "nan" +'Goblet', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Paneth-like', "nan" +'Smooth muscle cell', "nan" +'Stem Cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'TA', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/rib.csv b/sfaira/versions/celltype_versions/target_universes/human/rib.csv new file mode 100644 index 000000000..13f2ae832 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/rib.csv @@ -0,0 +1,25 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal Neuron', "nan" +'Fetal chondrocyte', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Kidney intercalated cell', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/skin.csv b/sfaira/versions/celltype_versions/target_universes/human/skin.csv new file mode 100644 index 000000000..7900982b4 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/skin.csv @@ -0,0 +1,38 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'Basal cell 1', "nan" +'Basal cell 2', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal Neuron', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Kidney intercalated cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'WNT1', "nan" +'channel', "nan" +'folicular', "nan" +'granular', "nan" +'hESC', "nan" +'melanocyte', "nan" +'mitotic', "nan" +'spinous', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv b/sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv new file mode 100644 index 000000000..9d3d26f90 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv @@ -0,0 +1,36 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'Astrocyte', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Epithelial cell', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal Neuron', "nan" +'Fetal chondrocyte', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Kidney intercalated cell', "nan" +'Loop of Henle', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/spleen.csv b/sfaira/versions/celltype_versions/target_universes/human/spleen.csv new file mode 100644 index 000000000..b1dcff92e --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/spleen.csv @@ -0,0 +1,44 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B_Hypermutation', "nan" +'B_T_doublet', "nan" +'B_follicular', "nan" +'B_mantle', "nan" +'CB CD34+', "nan" +'CD34_progenitor', "nan" +'DC_1', "nan" +'DC_2', "nan" +'DC_activated', "nan" +'DC_plasmacytoid', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fetal epithelial progenitor', "nan" +'Fibroblast', "nan" +'ILC', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'NK_CD160pos', "nan" +'NK_FCGR3Apos', "nan" +'NK_dividing', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Plasma_IgG', "nan" +'Plasma_IgM', "nan" +'Plasmablast', "nan" +'Platelet', "nan" +'Proliferating T cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T_CD4_conv', "nan" +'T_CD4_fh', "nan" +'T_CD4_naive', "nan" +'T_CD4_reg', "nan" +'T_CD8_CTL', "nan" +'T_CD8_MAIT', "nan" +'T_CD8_activated', "nan" +'T_CD8_gd', "nan" +'unknown', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/stomach.csv b/sfaira/versions/celltype_versions/target_universes/human/stomach.csv new file mode 100644 index 000000000..b14902524 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/stomach.csv @@ -0,0 +1,49 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte', "nan" +'Enterocyte progenitor', "nan" +'Epithelial cell', "nan" +'Epithelial cell (intermediated)', "nan" +'Erythroid cell', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal Neuron', "nan" +'Fetal acinar cell', "nan" +'Fetal chondrocyte', "nan" +'Fetal endocrine cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal neuron', "nan" +'Fetal skeletal muscle cell', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Gastric chief cell', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Hepatocyte/Endodermal cell', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Mesothelial cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Proximal tubule progenitor', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'hESC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/thymus.csv b/sfaira/versions/celltype_versions/target_universes/human/thymus.csv new file mode 100644 index 000000000..5db380277 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/thymus.csv @@ -0,0 +1,51 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B_memory', "nan" +'B_naive', "nan" +'B_plasma', "nan" +'B_pro/pre', "nan" +'CB CD34+', "nan" +'CD4+T', "nan" +'CD4+Tmem', "nan" +'CD8+T', "nan" +'CD8+Tmem', "nan" +'CD8αα', "nan" +'DC1', "nan" +'DC2', "nan" +'DN', "nan" +'DP', "nan" +'ETP', "nan" +'Endo', "nan" +'Epi_GCM2', "nan" +'Ery', "nan" +'Fb_1', "nan" +'Fb_2', "nan" +'Fb_cycling', "nan" +'Fetal epithelial progenitor', "nan" +'ILC3', "nan" +'Lymph', "nan" +'Mac', "nan" +'Mast', "nan" +'Mgk', "nan" +'Mono', "nan" +'NK', "nan" +'NKT', "nan" +'NMP', "nan" +'Neutrophil', "nan" +'Neutrophil (RPS high)', "nan" +'Proliferating T cell', "nan" +'T(agonist)', "nan" +'TEC(myo)', "nan" +'TEC(neuro)', "nan" +'Treg', "nan" +'VSMC', "nan" +'aDC', "nan" +'alpha_beta_T(entry)', "nan" +'cTEC', "nan" +'gamma_delta_T', "nan" +'mTEC(I)', "nan" +'mTEC(II)', "nan" +'mTEC(III)', "nan" +'mTEC(IV)', "nan" +'mcTEC', "nan" +'pDC', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/thyroid.csv b/sfaira/versions/celltype_versions/target_universes/human/thyroid.csv new file mode 100644 index 000000000..f1484f64e --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/thyroid.csv @@ -0,0 +1,28 @@ +"name","id" +'Antigen presenting cell (RPS high)', "nan" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'CB CD34+', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Enterocyte progenitor', "nan" +'Erythroid progenitor cell (RP high)', "nan" +'Fasciculata cell', "nan" +'Fetal enterocyte ', "nan" +'Fetal epithelial progenitor', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Loop of Henle', "nan" +'M2 Macrophage', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Neutrophil', "nan" +'Primordial germ cell', "nan" +'Proliferating T cell', "nan" +'Sinusoidal endothelial cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Thyroid follicular cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/trachea.csv b/sfaira/versions/celltype_versions/target_universes/human/trachea.csv new file mode 100644 index 000000000..43c4e12d4 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/trachea.csv @@ -0,0 +1,25 @@ +"name","id" +'B cell', "nan" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'Chondrocyte', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte progenitor', "nan" +'Fetal chondrocyte', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Goblet cell', "nan" +'Loop of Henle', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" +'Thyroid follicular cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/ureter.csv b/sfaira/versions/celltype_versions/target_universes/human/ureter.csv new file mode 100644 index 000000000..91ad5d6f6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/ureter.csv @@ -0,0 +1,14 @@ +"name","id" +'B cell (Plasmocyte)', "nan" +'Basal cell', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Epithelial cell (intermediated)', "nan" +'Fibroblast', "nan" +'Intermediated cell', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Smooth muscle cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/human/uterus.csv b/sfaira/versions/celltype_versions/target_universes/human/uterus.csv new file mode 100644 index 000000000..769d83afe --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/human/uterus.csv @@ -0,0 +1,24 @@ +"name","id" +'AT2 cell', "nan" +'B cell', "nan" +'Dendritic cell', "nan" +'Endothelial cell', "nan" +'Endothelial cell (APC)', "nan" +'Endothelial cell (endothelial to mesenchymal transition)', "nan" +'Enterocyte progenitor', "nan" +'Fetal epithelial progenitor', "nan" +'Fetal fibroblast', "nan" +'Fetal mesenchymal progenitor', "nan" +'Fetal stromal cell', "nan" +'Fibroblast', "nan" +'Gastric endocrine cell', "nan" +'Loop of Henle', "nan" +'Macrophage', "nan" +'Mast cell', "nan" +'Monocyte', "nan" +'Myeloid cell', "nan" +'Primordial germ cell', "nan" +'Smooth muscle cell', "nan" +'Stratified epithelial cell', "nan" +'Stromal cell', "nan" +'T cell', "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/__init__.py b/sfaira/versions/celltype_versions/target_universes/mouse/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv b/sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv new file mode 100644 index 000000000..07e41cccd --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv @@ -0,0 +1,12 @@ +"name","id" +"B cell", "CL:0000236" +"CD4-positive, alpha-beta T cell", "nan" +"CD8-positive, alpha-beta T cell", "nan" +"endothelial cell", "CL:0000115" +"epithelial cell", "CL:0000066" +"erythroblast", "nan" +"macrophage", "nan" +"mesenchymal stem cell of adipose", "CL:0002570" +"myeloid cell", "CL:0000763" +"NK cell", "CL:0000623" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv b/sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv new file mode 100644 index 000000000..c891c4e88 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv @@ -0,0 +1,13 @@ +"name","id" +"basal epithelial cell", "nan" +"bladder urothelial cell", "CL:1001428" +"dendritic cell", "nan" +"endothelial cell", "CL:0000115" +"epithelial cell", "nan" +"macrophage", "nan" +"mesenchymal stromal cell", "nan" +"NK cell", "nan" +"smooth muscle cell", "nan" +"stromal cell", "nan" +"umbrella cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/blood.csv b/sfaira/versions/celltype_versions/target_universes/mouse/blood.csv new file mode 100644 index 000000000..36f732834 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/blood.csv @@ -0,0 +1,11 @@ +"name","id" +"B cell", "CL:0000236" +"macrophage", "CL:0000235" +"T cell", "CL:0000084" +"NK cell", "nan" +"neutrophil", "nan" +"monocyte", "nan" +"erythroblast", "nan" +"dendritic cell", "nan" +"basophil", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/bone.csv b/sfaira/versions/celltype_versions/target_universes/mouse/bone.csv new file mode 100644 index 000000000..e6bbf18ca --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/bone.csv @@ -0,0 +1,29 @@ +"name","id" +"basophil", "CL:0000767" +"CD4-positive, alpha-beta T cell", "nan" +"dendritic cell", "nan" +"early pro-B cell", "CL:0002046" +"erythroblast", "CL:0000765" +"erythrocyte", "CL:0000232" +"erythroid progenitor", "CL:0000038" +"granulocyte monocyte progenitor cell", "nan" +"granulocytopoietic cell", "CL:0002191" +"hematopoietic precursor cell", "CL:0008001" +"hematopoietic stem cell", "nan" +"immature B cell", "CL:0000816" +"late pro-B cell", "CL:0002048" +"lymphoid progenitor cell", "nan" +"macrophage", "nan" +"mast cell", "nan" +"monocyte", "CL:0000576" +"megakaryocyte-erythroid progenitor cell", "CL:0000050" +"naive B cell", "CL:0000788" +"naive T cell", "CL:0000898" +"neutrophil", "nan" +"neutrophil progenitor", "nan" +"NK cell", "CL:0000623" +"plasma cell", "CL:0000786" +"precursor B cell", "CL:0000817" +"proerythroblast", "CL:0000547" +"promonocyte", "CL:0000559" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/brain.csv b/sfaira/versions/celltype_versions/target_universes/mouse/brain.csv new file mode 100644 index 000000000..2bde86f6a --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/brain.csv @@ -0,0 +1,32 @@ +"name","id" +"astrocyte", "CL:0000127" +"BAM", "nan" +"B cells", "nan" +"Bergmann glial cell", "CL:0000644" +"brain pericyte", "CL:2000043" +"CD8-positive, alpha-beta T cell", "CL:0000625" +"cDC1", "nan" +"cDC2", "nan" +"endothelial cell", "CL:0000115" +"ependymal cell", "CL:0000065" +"GABAergic cell", "nan" +"granulocyte", "nan" +"ILC", "nan" +"interneuron", "CL:0000099" +"macrophage", "CL:0000235" +"mature NK T cell", "nan" +"medium spiny neuron", "CL:1001474" +"microglial cell", "CL:0000129" +"migDC", "nan" +"monocyte", "nan" +"neuroepithelial cell", "nan" +"neuron", "CL:0000540" +"neuronal stem cell", "CL:0000047" +"neutorphils", "nan" +"NK cells", "nan" +"oligodendrocyte", "CL:0000128" +"oligodendrocyte precursor cell", "CL:0002453" +"pDC", "nan" +"schwann cell", "nan" +"yd T cells", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/colon.csv b/sfaira/versions/celltype_versions/target_universes/mouse/colon.csv new file mode 100644 index 000000000..456eb6d39 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/colon.csv @@ -0,0 +1,10 @@ +"name","id" +"Brush cell of epithelium proper of large intestine", "CL:0002203" +"enterocyte of epithelium of large intestine", "CL:0002071" +"enteroendocrine cell", "CL:0000164" +"epithelial cell of large intestine", "CL:0002253" +"hematopoietic stem cell", "CL:0000037" +"intestinal crypt stem cell", "CL:0002250" +"large intestine goblet cell", "CL:1000320" +"secretory cell", "CL:0000151" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv b/sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv new file mode 100644 index 000000000..3e7a14b1d --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv @@ -0,0 +1,8 @@ +"name","id" +"B cell", "CL:0000236" +"endothelial cell", "CL:0000115" +"macrophage", "CL:0000235" +"mesenchymal stem cell", "CL:0000134" +"skeletal muscle satellite cell", "CL:0000594" +"T cell", "CL:0000084" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/heart.csv b/sfaira/versions/celltype_versions/target_universes/mouse/heart.csv new file mode 100644 index 000000000..7bd722c7a --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/heart.csv @@ -0,0 +1,22 @@ +"name","id" +"aortic endothelial cell", "CL:0002544" +"atrial myocyte", "CL:0002129" +"B cell", "CL:CL:0000115" +"cardiac neuron", "CL:0000057" +"cardiomyocyte", "CL:0000746" +"endocardial cell", "CL:0002350" +"endothelial cell of coronary artery", "CL:2000018" +"epithelial cell", "CL:" +"erythrocyte", "CL:" +"fibroblast of cardiac tissue", "CL:0002548" +"fibrocyte", "CL:CL:0000145" +"leukocyte", "CL:0000738" +"mast cell", "nan" +"monocyte", "nan" +"macrophage", "nan" +"professional antigen presenting cell", "nan" +"smooth muscle cell", "CL:0000192" +"T cell", "nan" +"valve cell", "CL:0000663" +"ventricular myocyte", "CL:0002131" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv b/sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv new file mode 100644 index 000000000..b91c94332 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv @@ -0,0 +1,13 @@ +"name","id" +"B cell", "CL:0000236" +"macrophage", "CL:0000235" +"T cell", "CL:0000084" +"dendritic cell", "nan" +"mast cell", "nan" +"paneth cell", "nan" +"stromal cell", "nan" +"epithelial cell", "nan" +"epithelial cell villi", "nan" +"enteroendocrine cell", "nan" +"erythroblast", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv b/sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv new file mode 100644 index 000000000..154828fe0 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv @@ -0,0 +1,29 @@ +"name","id" +"B cell", "CL:0000236" +"brush cell", "nan" +"dendritic cell", "nan" +"endothelial cell", "nan" +"fenestrated cell", "CL:0000666" +"fetal adipocyte", "nan" +"fetal mesenchymal cell", "nan" +"fetal proliferative cell", "nan" +"fibroblast", "CL:0000057" +"interstitial fibroblast", "nan" +"glomerular epithelial cell", "nan" +"kidney collecting duct epithelial cell", "CL:1000454" +"kidney collecting duct principal cell", "CL:1001431" +"kidney cortex artery cell", "CL:1001045" +"kidney distal convoluted tubule epithelial cell", "CL:1000849" +"kidney loop of Henle ascending limb epithelial cell", "CL:1001016" +"kidney loop of Henle thick ascending limb epithelial cell", "CL:1001106" +"kidney proximal convoluted tubule epithelial cell", "CL:1000838" +"kidney proximal straight tubule epithelial cell", "nan" +"macrophage", "CL:0000235" +"mesangial cell", "CL:0000650" +"neutrophil progenitor", "nan" +"NK cell", "nan" +"podocyte", "CL:0000653" +"plasma cell", "CL:0000786" +"T cell", "CL:0000084" +"ureteric epithelial cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/liver.csv b/sfaira/versions/celltype_versions/target_universes/mouse/liver.csv new file mode 100644 index 000000000..eabf79942 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/liver.csv @@ -0,0 +1,18 @@ +"name","id" +"B cell", "CL:0000236" +"dendritic cell", "nan" +"CD4-positive, alpha-beta T cell", "nan" +"CD8-positive, alpha-beta T cell", "nan" +"duct epithelial cell", "nan" +"erythroblast", "nan" +"endothelial cell of hepatic sinusoid", "CL:1000398" +"granulocyte", "nan" +"hepatic stellate cell", "CL:0000632" +"hepatocyte", "CL:0000182" +"Kupffer cell", "CL:0000091" +"macrophage", "nan" +"neutrophil", "CL:0000775" +"NK cell", "CL:0000623" +"plasmacytoid dendritic cell", "CL:0000784" +"stromal cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/lung.csv b/sfaira/versions/celltype_versions/target_universes/mouse/lung.csv new file mode 100644 index 000000000..b65598dcd --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/lung.csv @@ -0,0 +1,40 @@ +"name","id" +"adventitial cell", "nan" +"alveolar bipotent progenitor", "nan" +"alveolar epithelial cell type I", "nan" +"alveolar epithelial cell type II", "nan" +"alveolar macrophage", "nan" +"B cell", "CL_0000236" +"basophil", "nan" +"bronchial smooth muscle cell", "nan" +"CD4-positive, alpha-beta T cell", "nan" +"CD8-positive, alpha-beta T cell", "nan" +"ciliated cell", "nan" +"clara cell", "nan" +"classical monocyte", "nan" +"club cell of bronchiole", "nan" +"endothelial cell of lymphatic vessel", "nan" +"eosinophil", "nan" +"fibroblast of lung", "nan" +"glial cell", "CL_0000125" +"intermediate monocyte", "nan" +"lung macrophage", "nan" +"lung neuroendocrine cell", "nan" +"monocyte progenitor", "nan" +"myeloid dendritic cell", "nan" +"neutrophil", "nan" +"NK cell", "CL_0000623" +"non-classical monocyte", "nan" +"nuocyte", "nan" +"pericyte cell", "nan" +"plasma cell", "nan" +"plasmacytoid dendritic cell", "nan" +"proliferative cell", "nan" +"pulmonary interstitial fibroblast", "nan" +"regulatory T cell", "nan" +"respiratory basal cell", "nan" +"smooth muscle cell of the pulmonary artery", "nan" +"type I pneumocyte", "nan" +"type II pneumocyte", "nan" +"vein endothelial cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv b/sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv new file mode 100644 index 000000000..f0c14d10b --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv @@ -0,0 +1,12 @@ +"name","id" +"macrophage", "CL:0000235" +"leydig cell", "nan" +"elongating spermatid", "nan" +"erythroblast", "nan" +"pre-sertoli cell", "nan" +"sertoli cell", "nan" +"preleptotene spermatogonia", "nan" +"spermatogonia", "nan" +"spermatocyte", "nan" +"spermatid", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv b/sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv new file mode 100644 index 000000000..88f6eb93d --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv @@ -0,0 +1,14 @@ +"name","id" +"B cell", "CL:0000236" +"basal cell", "CL:0000646" +"endothelial cell", "CL:0000115" +"luminal epithelial cell of mammary gland", "CL:0002326" +"luminal progenitor cell", "CL:0002326" +"macrophage", "CL:0000235" +"stromal cell", "CL:0000499" +"T cell", "CL:0000084" +"dendritic cell", "nan" +"proliferative cell", "nan" +"NK cell", "CL:0000623" +"stem and progenitor cell", "CL:0000623" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv b/sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv new file mode 100644 index 000000000..955f4e225 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv @@ -0,0 +1,17 @@ +"name","id" +"B cell", "CL:0000236" +"dendritic cell", "nan" +"endothelial cell", "CL:0000115" +"erythroblast", "nan" +"macrophage", "CL:0000235" +"mesenchymal stem cell", "CL:0000134" +"monocyte progenitor", "nan" +"muscle cell", "nan" +"neutrophil", "nan" +"Schwann cell", "CL:0002573" +"smooth muscle cell", "CL:0000192" +"stromal cell", "nan" +"skeletal muscle cell", "CL:0000192" +"skeletal muscle satellite cell", "CL:0000594" +"T cell", "CL:0000084" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv b/sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv new file mode 100644 index 000000000..4e54b44b0 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv @@ -0,0 +1,11 @@ +"name","id" +"cumulus cell", "nan" +"granulosa cell", "nan" +"large luteal cell", "nan" +"macrophage", "nan" +"small luteal cell", "nan" +"epithelial cell of ovarian surface", "nan" +"endothelial cell of ovarian surface", "nan" +"stromal cell", "nan" +"thecal cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv b/sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv new file mode 100644 index 000000000..4fba2e83f --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv @@ -0,0 +1,20 @@ +"name","id" +"b cell", "nan" +"dendritic cell", "nan" +"endothelial cell", "CL:0000115" +"erythroblast", "nan" +"glial cell", "nan" +"granulocyte", "nan" +"macrophage", "nan" +"pancreatic A cell", "CL:0000171" +"pancreatic acinar cell", "CL:0002064" +"pancreatic B cell", "CL:0000169" +"pancreatic D cell", "CL:0000173" +"pancreatic ductal cell", "CL:0002079" +"pancreatic PP cell", "CL:0002275" +"pancreatic stellate cell", "CL:0002410" +"smooth muscle cell", "nan" +"stromal cell", "nan" +"t cell", "nan" +"lymphatic endothelial cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv b/sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv new file mode 100644 index 000000000..20d6b4d7a --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv @@ -0,0 +1,24 @@ +"name","id" +"B cell", "CL:0000236" +"endothelial cell", "CL:0000115" +"macrophage", "CL:0000235" +"stromal cell", "CL:0000499" +"erythroblast", "nan" +"granulocyte", "nan" +"basophil", "nan" +"decidual stromal cell", "nan" +"dendritic cell", "nan" +"endodermal cell", "nan" +"monocyte progenitor", "nan" +"HSPC", "nan" +"megakaryocte", "nan" +"monocyte", "nan" +"NK cell", "nan" +"NKT cell", "nan" +"PE lineage cell", "nan" +"trophoblast progenitor", "nan" +"labyrinthine trophoblast", "nan" +"spiral artery trophoblast giant cells", "nan" +"invasive spongiotrophoblast", "nan" +"spongiotrophoblast", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv b/sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv new file mode 100644 index 000000000..a27a30f99 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv @@ -0,0 +1,8 @@ +"name","id" +"epithelial cell", "nan" +"glandular epithelial cell", "nan" +"T cell", "CL:0000084" +"glandular cell", "nan" +"stromal cell", "nan" +"dendritic cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/rib.csv b/sfaira/versions/celltype_versions/target_universes/mouse/rib.csv new file mode 100644 index 000000000..274cad925 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/rib.csv @@ -0,0 +1,16 @@ +"name","id" +"B cell", "CL:0000236" +"endothelial cell", "CL:0000115" +"macrophage", "CL:0000235" +"stromal cell", "CL:0000499" +"proliferative cell", "nan" +"cartilage cell", "nan" +"erythroblast", "nan" +"granulocyte", "nan" +"muscle cell", "nan" +"neuron", "nan" +"neutrophil", "nan" +"osteoblast", "nan" +"osteoclast", "nan" +"oligodendrocyte", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/skin.csv b/sfaira/versions/celltype_versions/target_universes/mouse/skin.csv new file mode 100644 index 000000000..30d107948 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/skin.csv @@ -0,0 +1,10 @@ +"name","id" +"basal cell of epidermis", "CL:0002187" +"bulge keratinocyte", "nan" +"epidermal cell", "CL:0000362" +"fibroblast", "nan" +"keratinocyte stem cell", "CL:0002337" +"macrophage", "nan" +"stem cell of epidermis", "CL:1000428" +"T cell", "CL:0000084" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv b/sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv new file mode 100644 index 000000000..4be417490 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv @@ -0,0 +1,18 @@ +"name","id" +"B cell", "CL:0000236" +"CD4-positive, alpha-beta T cell", "nan" +"CD8-positive, alpha-beta T cell", "nan" +"dendritic cell", "nan" +"erythroblast", "CL:0000765" +"granulocyte", "CL:0000094" +"immature NKT cell", "nan" +"macrophage", "nan" +"macrophage dendritic cell progenitor", "CL:0002009" +"mature NK T cell", "nan" +"megakaryocyte-erythroid progenitor cell", "nan" +"monocyte", "nan" +"neutrophil", "nan" +"NK cell", "CL:0000623" +"plasma cell", "nan" +"proerythroblast", "CL:0000547" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv b/sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv new file mode 100644 index 000000000..2d501a9bb --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv @@ -0,0 +1,14 @@ +"name","id" +"antral mucous cell", "nan" +"dendritic cell", "nan" +"G cell", "nan" +"gastric mucosal cell", "nan" +"epithelial cell", "nan" +"muscle cell", "nan" +"macrophage", "CL:0000235" +"parietal cell", "nan" +"pit cell", "nan" +"proliferative cell", "nan" +"stomach cell", "nan" +"tuft cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv b/sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv new file mode 100644 index 000000000..60d2c23d6 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv @@ -0,0 +1,16 @@ +"name","id" +"abT cell", "nan" +"B cell", "nan" +"dendritic cell", "nan" +"DN1 thymocyte", "nan" +"DN2 thymocyte", "nan" +"DN3 thymocyte", "nan" +"DN4 thymocyte", "nan" +"double positive T cell", "nan" +"endothelial cell", "CL:0000115" +"epithelial cell of thymus", "CL:0002293" +"fibroblast", "nan" +"gdT cell", "nan" +"macrophage", "nan" +"professional antigen presenting cell", "CL:0000145" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv b/sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv new file mode 100644 index 000000000..963780d95 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv @@ -0,0 +1,5 @@ +"name","id" +"basal cell of epidermis", "CL:0002187" +"keratinocyte", "CL:0000312" +"Langerhans cell", "nan" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv b/sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv new file mode 100644 index 000000000..6a197dcd3 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv @@ -0,0 +1,17 @@ +"name","id" +"basal epithelial cell of tracheobronchial tree", "CL:0002329" +"chondrocyte", "CL:0000138" +"ciliated columnar cell of tracheobronchial tree", "CL:0002145" +"endothelial cell", "CL:000115" +"epithelial cell", "CL:000115" +"fibroblast", "CL:0000057" +"granulocyte", "CL:0000094" +"keratinocyte", "nan" +"macrophage", "CL:0000235" +"mesenchymal cell", "nan" +"mesenchymal progenitor cell", "nan" +"mucus secreting cell", "CL:0000319" +"neuroendocrine cell", "nan" +"smooth muscle cell of trachea", "CL:0002600" +"T cell", "CL:0000084" +"unknown", "nan" diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv b/sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv new file mode 100644 index 000000000..55f6d8827 --- /dev/null +++ b/sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv @@ -0,0 +1,14 @@ +"name","id" +"B cell", "CL:0000236" +"dendritic cell", "nan" +"granulocyte", "nan" +"macrophage", "CL:0000235" +"NK cell", "CL:0000623" +"stromal cell", "nan" +"endothelial cell", "nan" +"glandular epithelial cell", "nan" +"keratinocyte", "nan" +"monocyte", "nan" +"muscle cell", "nan" +"smooth muscle cell", "nan" +"unknown", "nan" From f35f6bc54dbed2bc21a0ff920cfef2c3f708d790 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Feb 2021 11:47:52 +0100 Subject: [PATCH 035/161] Improved co automation (#90) * improved ontology based fuzzy string matching * added generalised Obo ontologies * constrained anatomical meta data by Uberon * added uberon ontology constraint to anatomy and linked cell type prediction to anatomy of sample * aded ebi web interface based protocol ontology * generalised usage of ontologies for meta data --- sfaira/__init__.py | 1 + sfaira/api/celltypes.py | 1 - sfaira/consts/adata_fields.py | 11 +- sfaira/data/base.py | 42 +- sfaira/estimators/external.py | 2 +- sfaira/interface/model_zoo.py | 2 +- sfaira/models/celltype/external.py | 4 - sfaira/models/celltype/marker.py | 9 +- sfaira/models/celltype/mlp.py | 9 +- sfaira/models/embedding/ae.py | 6 +- sfaira/models/embedding/external.py | 4 - sfaira/models/embedding/linear.py | 6 +- sfaira/models/embedding/vae.py | 6 +- sfaira/models/embedding/vaeiaf.py | 8 +- sfaira/models/embedding/vaevamp.py | 6 +- sfaira/train/summaries.py | 2 +- sfaira/unit_tests/external.py | 2 +- sfaira/unit_tests/test_celltype_universe.py | 12 +- sfaira/unit_tests/test_data_template.py | 3 +- sfaira/unit_tests/test_estimator.py | 2 +- sfaira/versions/__init__.py | 2 +- sfaira/versions/celltype_versions/__init__.py | 1 - sfaira/versions/celltype_versions/base.py | 316 ------- sfaira/versions/metadata/__init__.py | 7 + sfaira/versions/metadata/base.py | 822 ++++++++++++++++++ .../extensions/__init__.py | 0 .../extensions/obo_extension_human.py | 0 .../extensions/obo_extension_mouse.py | 0 .../human/__init__.py | 0 .../target_universes/__init__.py | 0 .../target_universes/human/__init__.py | 0 .../target_universes/human/adipose.csv | 0 .../target_universes/human/adrenalgland.csv | 0 .../target_universes/human/artery.csv | 0 .../target_universes/human/bladder.csv | 0 .../target_universes/human/blood.csv | 0 .../target_universes/human/bone.csv | 0 .../target_universes/human/brain.csv | 0 .../target_universes/human/calvaria.csv | 0 .../target_universes/human/cervix.csv | 0 .../human/chorionicvillus.csv | 0 .../target_universes/human/colon.csv | 0 .../target_universes/human/duodenum.csv | 0 .../target_universes/human/epityphlon.csv | 0 .../target_universes/human/esophagus.csv | 0 .../target_universes/human/eye.csv | 0 .../target_universes/human/fallopiantube.csv | 0 .../target_universes/human/femalegonad.csv | 0 .../target_universes/human/gallbladder.csv | 0 .../target_universes/human/heart.csv | 0 .../target_universes/human/hesc.csv | 0 .../target_universes/human/ileum.csv | 0 .../target_universes/human/jejunum.csv | 0 .../target_universes/human/kidney.csv | 0 .../target_universes/human/liver.csv | 0 .../target_universes/human/lung.csv | 0 .../target_universes/human/malegonad.csv | 0 .../target_universes/human/muscle.csv | 0 .../target_universes/human/omentum.csv | 0 .../target_universes/human/pancreas.csv | 0 .../target_universes/human/placenta.csv | 0 .../target_universes/human/pleura.csv | 0 .../target_universes/human/prostate.csv | 0 .../target_universes/human/rectum.csv | 0 .../target_universes/human/rib.csv | 0 .../target_universes/human/skin.csv | 0 .../target_universes/human/spinalcord.csv | 0 .../target_universes/human/spleen.csv | 0 .../target_universes/human/stomach.csv | 0 .../target_universes/human/thymus.csv | 0 .../target_universes/human/thyroid.csv | 0 .../target_universes/human/trachea.csv | 0 .../target_universes/human/ureter.csv | 0 .../target_universes/human/uterus.csv | 0 .../target_universes/mouse/__init__.py | 0 .../target_universes/mouse/adipose.csv | 0 .../target_universes/mouse/bladder.csv | 0 .../target_universes/mouse/blood.csv | 0 .../target_universes/mouse/bone.csv | 0 .../target_universes/mouse/brain.csv | 0 .../target_universes/mouse/colon.csv | 0 .../target_universes/mouse/diaphragm.csv | 0 .../target_universes/mouse/heart.csv | 0 .../target_universes/mouse/ileum.csv | 0 .../target_universes/mouse/kidney.csv | 0 .../target_universes/mouse/liver.csv | 0 .../target_universes/mouse/lung.csv | 0 .../target_universes/mouse/malegonad.csv | 0 .../target_universes/mouse/mammarygland.csv | 0 .../target_universes/mouse/muscle.csv | 0 .../target_universes/mouse/ovary.csv | 0 .../target_universes/mouse/pancreas.csv | 0 .../target_universes/mouse/placenta.csv | 0 .../target_universes/mouse/prostate.csv | 0 .../target_universes/mouse/rib.csv | 0 .../target_universes/mouse/skin.csv | 0 .../target_universes/mouse/spleen.csv | 0 .../target_universes/mouse/stomach.csv | 0 .../target_universes/mouse/thymus.csv | 0 .../target_universes/mouse/tongue.csv | 0 .../target_universes/mouse/trachea.csv | 0 .../target_universes/mouse/uterus.csv | 0 102 files changed, 906 insertions(+), 380 deletions(-) delete mode 100644 sfaira/api/celltypes.py delete mode 100644 sfaira/models/celltype/external.py delete mode 100644 sfaira/models/embedding/external.py delete mode 100644 sfaira/versions/celltype_versions/__init__.py delete mode 100644 sfaira/versions/celltype_versions/base.py create mode 100644 sfaira/versions/metadata/__init__.py create mode 100644 sfaira/versions/metadata/base.py rename sfaira/versions/{celltype_versions => metadata}/extensions/__init__.py (100%) rename sfaira/versions/{celltype_versions => metadata}/extensions/obo_extension_human.py (100%) rename sfaira/versions/{celltype_versions => metadata}/extensions/obo_extension_mouse.py (100%) rename sfaira/versions/{celltype_versions => metadata}/human/__init__.py (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/__init__.py (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/__init__.py (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/adipose.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/adrenalgland.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/artery.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/bladder.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/blood.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/bone.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/brain.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/calvaria.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/cervix.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/chorionicvillus.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/colon.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/duodenum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/epityphlon.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/esophagus.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/eye.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/fallopiantube.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/femalegonad.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/gallbladder.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/heart.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/hesc.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/ileum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/jejunum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/kidney.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/liver.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/lung.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/malegonad.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/muscle.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/omentum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/pancreas.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/placenta.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/pleura.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/prostate.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/rectum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/rib.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/skin.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/spinalcord.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/spleen.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/stomach.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/thymus.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/thyroid.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/trachea.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/ureter.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/human/uterus.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/__init__.py (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/adipose.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/bladder.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/blood.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/bone.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/brain.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/colon.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/diaphragm.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/heart.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/ileum.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/kidney.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/liver.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/lung.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/malegonad.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/mammarygland.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/muscle.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/ovary.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/pancreas.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/placenta.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/prostate.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/rib.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/skin.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/spleen.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/stomach.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/thymus.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/tongue.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/trachea.csv (100%) rename sfaira/versions/{celltype_versions => metadata}/target_universes/mouse/uterus.csv (100%) diff --git a/sfaira/__init__.py b/sfaira/__init__.py index feb4fbd59..a51ea95e3 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -7,6 +7,7 @@ import sfaira.genomes import sfaira.data import sfaira.consts as consts +import sfaira.versions from ._version import get_versions __version__ = get_versions()['version'] diff --git a/sfaira/api/celltypes.py b/sfaira/api/celltypes.py deleted file mode 100644 index 0f68392e7..000000000 --- a/sfaira/api/celltypes.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.versions.celltype_versions import CelltypeUniverse, OntologyObo diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 0d022b955..d44d9e104 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,6 +1,9 @@ import numpy as np from typing import List +from sfaira.versions.metadata import CelltypeUniverse, OntologyList +from sfaira.versions.metadata import ONTOLOGY_UBERON, ONTOLOGY_HSAPDV, ONTOLOGY_MMUSDV, ONTOLOGY_SLC + """ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. @@ -196,10 +199,10 @@ def __init__(self): self.dev_stage_allowed_entries = None self.ethnicity_allowed_entries = None self.normalization_allowed_entries = None - self.organ_allowed_entries = None - self.organism_allowed_entries = ["mouse", "human"] - self.protocol_allowed_entries = None - self.sex_allowed_entries = ["female", "male"] + self.organ_allowed_entries = ONTOLOGY_UBERON + self.organism_allowed_entries = OntologyList(terms=["mouse", "human"]) + self.protocol_allowed_entries = ONTOLOGY_SLC + self.sex_allowed_entries = OntologyList(terms=["female", "male"]) self.subtissue_allowed_entries = None self.year_allowed_entries = list(range(2000, 3000)) # Free fields that are not constrained: diff --git a/sfaira/data/base.py b/sfaira/data/base.py index e0a1fcb00..a22288936 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -15,7 +15,7 @@ import warnings from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.celltype_versions import CelltypeUniverse +from sfaira.versions.metadata import Ontology, CelltypeUniverse, ONTOLOGY_UBERON from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS UNS_STRING_META_IN_OBS = "__obs__" @@ -656,7 +656,12 @@ def fn_ontology_class_map_csv(self): """Standardised file name under which cell type conversion tables are saved.""" return self.doi_cleaned_id + ".csv" - def write_ontology_class_map(self, fn, protected_writing: bool = True): + def write_ontology_class_map( + self, + fn, + protected_writing: bool = True, + **kwargs + ): """ Load class maps of free text cell types to ontology classes. @@ -665,12 +670,13 @@ def write_ontology_class_map(self, fn, protected_writing: bool = True): :return: """ labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) - tab = self.ontology_celltypes.onto.fuzzy_match_nodes( + tab = self.ontology_celltypes.prepare_celltype_map_fuzzy( source=labels_original, match_only=False, - include_old=False, - include_synonyms=False, - remove=self._unknown_celltype_identifiers, + anatomical_constraint=self.organ, + include_synonyms=True, + omit_list=self._unknown_celltype_identifiers, + **kwargs ) if not os.path.exists(fn) or not protected_writing: tab.to_csv(fn, index=None) @@ -1371,23 +1377,33 @@ def __erasing_protection(self, attr, val_old, val_new): raise ValueError(f"attempted to set erasing protected attribute {attr}: " f"previously was {str(val_old)}, attempted to set {str(val_new)}") - def __value_protection(self, attr, allowed, attempted): + def __value_protection( + self, + attr: str, + allowed: Union[Ontology, bool, int, float, str, List[bool], List[int], List[float], List[str]], + attempted + ): """ Check whether value is from set of allowed values. Does not check if allowed is None. - :param attr: - :param allowed: - :param attempted: + :param attr: Attribut to set. + :param allowed: Constraint for values of `attr`. + Either ontology instance used to constrain entries, or list of allowed values. + :param attempted: Value to attempt to set in `attr`. :return: """ if allowed is not None: if not isinstance(attempted, list) and not isinstance(attempted, tuple): attempted = [attempted] - for x in attempted: - if x not in allowed: - raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + if isinstance(allowed, Ontology): + for x in attempted: + allowed.validate_node(x) + else: + for x in attempted: + if x not in allowed: + raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") def subset_cells(self, key, values): """ diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py index 14c612215..70d179770 100644 --- a/sfaira/estimators/external.py +++ b/sfaira/estimators/external.py @@ -1,4 +1,4 @@ -from sfaira.versions.celltype_versions import CelltypeUniverse +from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 5eaed4dc2..71c2ae34a 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -7,7 +7,7 @@ import pandas as pd from typing import List, Union -from sfaira.versions.celltype_versions import CelltypeUniverse +from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topology_versions import Topologies diff --git a/sfaira/models/celltype/external.py b/sfaira/models/celltype/external.py deleted file mode 100644 index 46629b4d4..000000000 --- a/sfaira/models/celltype/external.py +++ /dev/null @@ -1,4 +0,0 @@ -import sfaira.versions.celltype_versions as celltype_versions -from sfaira.versions.topology_versions import Topologies -from sfaira.models.base import BasicModel -from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index f06661d2c..318937651 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -2,9 +2,10 @@ import tensorflow as tf from typing import List, Union -from sfaira.models.celltype.external import BasicModel -from sfaira.models.celltype.external import PreprocInput -from sfaira.models.celltype.external import celltype_versions, Topologies +import sfaira.versions.metadata as metadata +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput class LearnedThresholdLayer(tf.keras.layers.Layer): @@ -94,7 +95,7 @@ def __init__( class CellTypeMarkerVersioned(CellTypeMarker): - cell_type_version: celltype_versions.CelltypeUniverse + cell_type_version: metadata.CelltypeUniverse def __init__( self, diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index fadd3ee99..fceb9aa55 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -2,9 +2,10 @@ import tensorflow as tf from typing import List, Union -from sfaira.models.celltype.external import BasicModel -from sfaira.models.celltype.external import PreprocInput -from sfaira.models.celltype.external import celltype_versions, Topologies +import sfaira.versions.metadata as metadata +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput class CellTypeMlp(BasicModel): @@ -69,7 +70,7 @@ def __init__( class CellTypeMlpVersioned(CellTypeMlp): - cell_type_version: celltype_versions.CelltypeUniverse + cell_type_version: metadata.CelltypeUniverse def __init__( self, diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index 58fa5dc47..44b6a6649 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -4,9 +4,9 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.models.embedding.external import BasicModel -from sfaira.models.embedding.external import PreprocInput -from sfaira.models.embedding.external import Topologies +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput class Encoder(tf.keras.layers.Layer): diff --git a/sfaira/models/embedding/external.py b/sfaira/models/embedding/external.py deleted file mode 100644 index c831cdb04..000000000 --- a/sfaira/models/embedding/external.py +++ /dev/null @@ -1,4 +0,0 @@ -from sfaira.versions.topology_versions import Topologies -from sfaira.models.base import BasicModel -from sfaira.models.pp_layer import PreprocInput -from sfaira.models.made import MaskingDense diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 72ac6f8e2..60da08920 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -4,9 +4,9 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.models.embedding.external import BasicModel -from sfaira.models.embedding.external import PreprocInput -from sfaira.models.embedding.external import Topologies +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput class EncoderLinear(tf.keras.layers.Layer): diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index f122d670b..433372226 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -4,9 +4,9 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.models.embedding.external import BasicModel -from sfaira.models.embedding.external import PreprocInput -from sfaira.models.embedding.external import Topologies +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput class Sampling(tf.keras.layers.Layer): diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 80436e1a3..3dacf965b 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -4,10 +4,10 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.models.embedding.external import BasicModel -from sfaira.models.embedding.external import PreprocInput -from sfaira.models.embedding.external import Topologies -from sfaira.models.embedding.external import MaskingDense +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput +from sfaira.models.made import MaskingDense class Sampling(tf.keras.layers.Layer): diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index db9d75c81..6edace92a 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -4,9 +4,9 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.models.embedding.external import BasicModel -from sfaira.models.embedding.external import PreprocInput -from sfaira.models.embedding.external import Topologies +from sfaira.versions.topology_versions import Topologies +from sfaira.models.base import BasicModel +from sfaira.models.pp_layer import PreprocInput def log_sum_of_exponentials(x, axis): diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 398110f02..3ff61eba9 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -886,7 +886,7 @@ def plot_best_classwise_heatmap( dataset.load() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - assert False, "depreceat celltype_versions code here" + assert False, "depreceat metadata code here" celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids diff --git a/sfaira/unit_tests/external.py b/sfaira/unit_tests/external.py index 6e2cfddc3..179f7c4d3 100644 --- a/sfaira/unit_tests/external.py +++ b/sfaira/unit_tests/external.py @@ -1,6 +1,6 @@ from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding from sfaira.interface.user_interface import UserInterface -import sfaira.versions.celltype_versions as celltype_versions +import sfaira.versions.metadata as celltype_versions from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.topology_versions import Topologies diff --git a/sfaira/unit_tests/test_celltype_universe.py b/sfaira/unit_tests/test_celltype_universe.py index e5f5030dd..5e149f359 100644 --- a/sfaira/unit_tests/test_celltype_universe.py +++ b/sfaira/unit_tests/test_celltype_universe.py @@ -2,7 +2,7 @@ import pandas as pd import unittest -from sfaira.versions.celltype_versions import OntologyObo, ORGANISM_DICT +from sfaira.versions.metadata import OntologyObo, ORGANISM_DICT class TestCellTypeUniverse(unittest.TestCase): @@ -16,9 +16,9 @@ def test_debugging(self, reduced=False): for k, v in ORGANISM_DICT.items(): for kk, vv in v.items(): universe = vv.celltype_universe["0"] - tab = onto.fuzzy_match_nodes(universe, match_only=True) + tab = onto.find_nodes_fuzzy(universe, match_only=True) if not np.all(tab["matched"].values): - tab2 = onto.fuzzy_match_nodes(universe, match_only=False, include_old=True, remove=["unkown"]) + tab2 = onto.find_nodes_fuzzy(universe, match_only=False, include_old=True, omit_list=["unkown"]) if not reduced: tab2.to_csv( self.dir_debugging + k + "_" + kk + "_universe.csv", @@ -36,9 +36,9 @@ def test_debugging2(self): for k, v in ORGANISM_DICT.items(): for kk, vv in v.items(): names = list(vv.ontology["0"]["names"].keys()) - tab = onto.fuzzy_match_nodes(names, match_only=True) + tab = onto.find_nodes_fuzzy(names, match_only=True) if not np.all(tab["matched"].values): - tab = onto.fuzzy_match_nodes(names, match_only=False, include_old=True, remove=["unkown"]) + tab = onto.find_nodes_fuzzy(names, match_only=False, include_old=True, omit_list=["unkown"]) tab.to_csv( self.dir_debugging2 + k + "_" + kk + "_universe.csv", index=False, quoting=csv.QUOTE_NONE, sep=";" @@ -61,7 +61,7 @@ def test_only(self): for k, v in ORGANISM_DICT.items(): for kk, vv in v.items(): universe = vv.celltype_universe["0"] - tab = onto.fuzzy_match_nodes(universe, match_only=True) + tab = onto.find_nodes_fuzzy(universe, match_only=True) print(tab.loc[tab["matched"].values == False]) assert np.all(tab["matched"].values), f"{k} {kk}" diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index dfe76b321..a11e3569c 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -39,7 +39,8 @@ def test_load(self): # Write this directly into sfaira installation so that it can be committed via git. v.write_ontology_class_map( fn=os.path.join("/".join(FILE_PATH.split("/")[:-1]), v.fn_ontology_class_map_csv), - protected_writing=False + protected_writing=False, + n_suggest=10, ) # ToDo: conflicts are not automatically resolved, please go back to https://www.ebi.ac.uk/ols/ontologies/cl # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. diff --git a/sfaira/unit_tests/test_estimator.py b/sfaira/unit_tests/test_estimator.py index ddb711dad..9dd68a194 100644 --- a/sfaira/unit_tests/test_estimator.py +++ b/sfaira/unit_tests/test_estimator.py @@ -6,7 +6,7 @@ import unittest from sfaira.unit_tests.external import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.unit_tests.external import celltype_versions, SuperGenomeContainer, Topologies +from sfaira.unit_tests.external import metadata, SuperGenomeContainer, Topologies class _TestEstimator: diff --git a/sfaira/versions/__init__.py b/sfaira/versions/__init__.py index 7840c39b1..e46a788f9 100644 --- a/sfaira/versions/__init__.py +++ b/sfaira/versions/__init__.py @@ -1,3 +1,3 @@ -from . import celltype_versions +from . import metadata from . import genome_versions from . import topology_versions diff --git a/sfaira/versions/celltype_versions/__init__.py b/sfaira/versions/celltype_versions/__init__.py deleted file mode 100644 index 3b6fd15ce..000000000 --- a/sfaira/versions/celltype_versions/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .base import CelltypeUniverse, OntologyObo diff --git a/sfaira/versions/celltype_versions/base.py b/sfaira/versions/celltype_versions/base.py deleted file mode 100644 index 7fef5c622..000000000 --- a/sfaira/versions/celltype_versions/base.py +++ /dev/null @@ -1,316 +0,0 @@ -import abc -import networkx -import numpy as np -import obonet -import pandas as pd -from typing import Dict, List, Tuple, Union -import warnings - -from sfaira.versions.celltype_versions.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE - - -class OntologyBase: - leaves: list - - @abc.abstractmethod - def set_leaves(self, nodes: list = None): - pass - - @abc.abstractmethod - def get_ancestors(self, node: str) -> List[str]: - pass - - def map_to_leaves(self, node: str, return_type: str = "elements", include_self: bool = True): - """ - Map a given list of nodes to leave nodes. - - :param node: - :param return_type: - - "elements": names of mapped leave nodes - "idx": indicies in leave note list of of mapped leave nodes - :param include_self: whether to include node itself - :return: - """ - assert self.leaves is not None - ancestors = self.get_ancestors(node) - if include_self: - ancestors = ancestors + [node] - if return_type == "elements": - return [x for x in self.leaves if x in ancestors] - if return_type == "idx": - return np.array([i for i, (x, y) in enumerate(self.leaves) if x in ancestors]) - - -class OntologyDict(OntologyBase): - - def __init__(self, onto: dict): - self.onto = onto - - def set_leaves(self, nodes: list = None): - self.leaves = nodes - - def get_ancestors(self, node: str) -> List[str]: - return self.onto[node] if node in self.onto.keys() else [node] - - -class OntologyObo(OntologyBase): - - graph: networkx.MultiDiGraph - - def __init__(self, obo: str = "http://purl.obolibrary.org/obo/cl.obo", **kwargs): - self.graph = obonet.read_obo(obo) - self._check_graph() - - def _check_graph(self): - # ToDo OBO from obolibrary is not DAG? - if not networkx.is_directed_acyclic_graph(self.graph): - warnings.warn("DAG was broken") - - @property - def nodes(self): - return self.graph.nodes() - - def set_leaves(self, nodes: list = None): - # ToDo check that these are not include parents of each other! - if nodes is not None: - for x in nodes: - assert x in self.graph.nodes, f"{x} not found" - self.leaves = nodes - else: - self.leaves = self.get_all_roots() - - def get_all_roots(self) -> List[str]: - return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] - - def get_ancestors(self, node: str) -> List[str]: - return list(networkx.ancestors(self.graph, node)) - - def map_class_to_id(self, x): - """ - Map ontology class to ID. - :param x: - :return: - """ - assert False # ToDo - - def map_id_to_class(self, x): - """ - Map ontology ID to class. - :param x: - :return: - """ - assert False # ToDo - - def fuzzy_match_nodes( - self, - source, - match_only: bool = False, - include_old: bool = False, - include_synonyms: bool = True, - remove: list = [] - ) -> pd.DataFrame: - """ - Map free text node names to ontology node names. - - :param source: Free text node labels which are to be matched to ontology nodes. - :param match_only: Whether to include strict matches only in output. - :param include_old: Whether to include previous (free text) node label in output. - :param include_synonyms: Whether to include synonym nodes. - :param remove: Free text node labels to omit in map. - :return: Table with source and target node names. Columns: "source", "target" - """ - from fuzzywuzzy import fuzz - matches = [] - nodes = [(k, v) for k, v in self.graph.nodes.items()] - include = [] - if isinstance(source, pd.DataFrame): - source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) - for x in source: - if not isinstance(x, list) and not isinstance(x, tuple): - x = [x, "nan"] - scores = np.array([ - np.max([ - fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) - ] + [ - fuzz.ratio(x[0].lower().strip("'").strip("\"").strip("]").strip("["), yy.lower()) - for yy in y[1]["synonym"] - ]) if "synonym" in y[1].keys() and include_synonyms else - np.max([ - fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) - ]) - if "name" in y[1].keys() else 0 # ToDo: these are empty nodes, where are they coming from? - for y in nodes - ]) - include.append(x[0].lower().strip("'").strip("\"") not in remove) - if match_only: - matches.append(np.any(scores == 100)) # perfect match - else: - if np.any(scores == 100): - matches.append([(nodes[i][1]["name"], nodes[i][0]) for i in np.where(scores == 100)[0]]) - else: - matchesi = [( - nodes[i][1]["name"] + "[" + ";".join([ - yy.strip("'").strip("\"").strip("]").strip("[") - for yy in nodes[i][1]["synonym"] - ]) + "}" - if "synonym" in nodes[i][1].keys() and include_synonyms else nodes[i][1]["name"], - nodes[i][0] - ) for i in np.argsort(scores)[-10:]] - if include_old: - matchesi = matchesi + [(x[0].upper(), x[1])] - matches.append(matchesi) - if match_only: - tab = pd.DataFrame({"source": source, "target": matches}) - else: - tab = pd.DataFrame({ - "source": source, - "target": [" ".join([",".join(zz) for zz in z]) for z in matches] - }) - return tab.loc[include] - - -class OntologyExtendedObo(OntologyObo): - """ - Basic .obo ontology extended by additional nodes and edges without breaking DAG. - """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.add_extension(dict_ontology=ONTOLOGIY_EXTENSION_HUMAN) # ToDo distinguish here - - def add_extension(self, dict_ontology: Dict[str, List[str]]): - """ - Extend ontology by additional edges and nodes defined in a dictionary. - - Checks that DAG is not broken after graph assembly. - - :param dict_ontology: Dictionary of nodes and edges to add to ontology. Parsing: - - - keys: parent nodes (which must be in ontology) - - values: children nodes (which can be in ontology), must be given as list of stringd. - If these are in the ontology, an edge is added, otherwise, an edge and the node are added. - :return: - """ - for k, v in dict_ontology.items(): - assert isinstance(v, list), "dictionary values should be list of strings" - # Check that parent node is present: - if k not in self.nodes: - raise ValueError(f"key {k} was not in reference ontology") - # Check if edge is added only, or edge and node. - for child_node in v: - if child_node not in self.nodes: # Add node. - self.graph.add_node(child_node) - # Add edge. - self.graph.add_edge(k, child_node) - # Check that DAG was not broken: - self._check_graph() - - -class CelltypeUniverse: - """ - Cell type universe (list) and ontology (hierarchy) container class. - - - Basic checks on the organ specific instance are performed in the constructor. - """ - ontology: OntologyBase - _target_universe: Union[List[str], None] - - def __init__(self, organism: str, **kwargs): - """ - - :param organism: Organism, defines ontology extension used. - :param kwargs: - """ - self.onto = OntologyExtendedObo(**kwargs) - self._target_universe = None - self._set_extension(organism=organism) - - def _set_extension(self, organism): - """ - - :param organism: Organism, defines ontology extension used. - """ - if organism == "human": - self.onto.add_extension(ONTOLOGIY_EXTENSION_HUMAN) - elif organism == "mouse": - self.onto.add_extension(ONTOLOGIY_EXTENSION_MOUSE) - else: - raise ValueError(f"organism {organism} not found") - - @property - def target_universe(self): - """ - Ontology classes of target universe (understandable cell type names). - - :return: - """ - return self._target_universe - - @target_universe.setter - def target_universe(self, x: List[str]): - # Check that all nodes are valid: - for xx in x: - if xx not in self.onto.nodes: - raise ValueError(f"cell type {xx} was not in ontology") - # Default universe is the full set of leave nodes of ontology: - self.target_universe = self.onto.leaves - self.onto.set_leaves(self.target_universe) - - @property - def target_universe_ids(self): - """ - Ontology IDs of target universe (codified cell type names). - - :return: - """ - return [self.onto.map_class_to_id(x) for x in self._target_universe] - - @property - def ntypes(self): - """ - Number of different cell types in target universe. - """ - return len(self.target_universe) - - def __validate_target_universe_table(self, tab: pd.DataFrame): - assert len(tab.columns) == 2 - assert tab.columns[0] == "name" and tab.columns[1] == "id" - - def load_target_universe(self, organ): - """ - - :param organ: Anatomic structure to load target universe for. - :return: - """ - # ToDo: Use pydoc based query of universes stored in ./target_universes/.. - tab = None - self.__validate_target_universe_table(tab=tab) - self.target_universe = None # ToDo - - def read_target_universe_csv(self, fn): - """ - - :param fn: File containing target universe. - :return: - """ - tab = pd.read_csv(fn) - self.__validate_target_universe_table(tab=tab) - self.target_universe = tab["name"].values - - def map_to_target_leaves( - self, - nodes: List[str], - return_type: str = "elements" - ): - """ - Map a given list of nodes to leave nodes defined for this ontology. - :param nodes: - :param return_type: - - "elements": names of mapped leave nodes - "idx": indices in leave note list of of mapped leave nodes - :return: - """ - return [self.onto.map_to_leaves(x, return_type=return_type) for x in nodes] diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py new file mode 100644 index 000000000..bb253513f --- /dev/null +++ b/sfaira/versions/metadata/__init__.py @@ -0,0 +1,7 @@ +from .base import Ontology, OntologyList, OntologyObo, CelltypeUniverse, \ + OntologyCelltypes, OntologyUberon, OntologyHancestro, OntologyHsapdv, OntologyMmusdv, \ + OntologySinglecellLibraryConstruction +ONTOLOGY_UBERON = OntologyUberon() +ONTOLOGY_HSAPDV = OntologyHsapdv() +ONTOLOGY_MMUSDV = OntologyMmusdv() +ONTOLOGY_SLC = OntologySinglecellLibraryConstruction() diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py new file mode 100644 index 000000000..cd04e5ff2 --- /dev/null +++ b/sfaira/versions/metadata/base.py @@ -0,0 +1,822 @@ +import abc +import networkx +import numpy as np +import obonet +#import owlready2 +import pandas as pd +import requests +from typing import Dict, List, Tuple, Union +import warnings + +from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE + +""" +Ontology managament classes. + +We consider any structured collection of meta data identifiers an ontology and define classes to interact with such +data here. + +- All classes inherit from Ontology() +- Onotlogies can be read as follows: + - from string lists which are typically hardcoded in sfaira (OntologyList), + - from .obo files which are emitted by obofoundry for example (OntologyObo)), + - ToDo from .owl files which are emitted from EBI for example (OntologyOwl)), + - from the EBI web API via direct queries (OntologyEbi)). + +ToDo explain usage of ontology extension. +""" + + +class Ontology: + leaves: List[str] + + @abc.abstractmethod + def node_names(self): + pass + + @abc.abstractmethod + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): + """ + Map free text node name to ontology node names via fuzzy string matching. + + :param x: Free text node label which is to be matched to ontology nodes. + :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :return List of proposed matches in ontology. + """ + pass + + def validate_node(self, x: str): + if x not in self.node_names: + suggestions = self.map_node_suggestion(x=x, include_synonyms=False) + raise ValueError(f"Node label {x} not found. Did you mean any of {suggestions}?") + + +class OntologyList(Ontology): + """ + Basic unordered ontology container + """ + + def __init__( + self, + terms: List[str], + **kwargs + ): + self.nodes = terms + + @property + def node_names(self): + return self.nodes + + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): + """ + Map free text node name to ontology node names via fuzzy string matching. + + :param x: Free text node label which is to be matched to ontology nodes. + :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :return List of proposed matches in ontology. + """ + from fuzzywuzzy import fuzz + scores = np.array([ + np.max([ + fuzz.ratio(x.lower(), y.lower()) + ]) + for y in self.node_names + ]) + # Suggest top n_suggest hits by string match: + return [self.node_names[i] for i in np.argsort(scores)[-n_suggest:]][::-1] + + def synonym_node_properties(self) -> List[str]: + return [] + + +class OntologyEbi(Ontology): + """ + Recursively assembles ontology by querying EBI web interface. + + Not recommended for large ontologies. + """ + + def __init__( + self, + ontology: str, + root_term: str, + **kwargs + ): + def get_url(iri): + return f"https://www.ebi.ac.uk/ols/api/ontologies/{ontology}/terms/" \ + f"http%253A%252F%252Fwww.ebi.ac.uk%252F{ontology}%252F{iri}/children" + + def recursive_search(iri): + terms = requests.get(get_url(iri=iri)).json()["_embedded"]["terms"] + nodes_new = {} + for x in terms: + nodes_new[x["iri"].split("/")[-1]] = { + "name": x["label"], + "description": x["description"], + "synonyms": x["synonyms"], + "has_children": x["has_children"], + } + if x["has_children"]: + nodes_new.update(recursive_search(iri=x["iri"].split("/")[-1])) + return nodes_new + + self.nodes = recursive_search(iri=root_term) + + @property + def node_names(self): + return [v["name"] for k, v in self.nodes.items()] + + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): + """ + Map free text node name to ontology node names via fuzzy string matching. + + :param x: Free text node label which is to be matched to ontology nodes. + :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :return List of proposed matches in ontology. + """ + from fuzzywuzzy import fuzz + scores = np.array([ + np.max( + [ + fuzz.ratio(x.lower(), v["name"].lower()) + ] + [ + fuzz.ratio(x.lower(), yyy.lower()) + for yy in self.synonym_node_properties if yy in v.keys() for yyy in v[yy] + ] + ) if include_synonyms else + np.max([ + fuzz.ratio(x.lower(), v["name"].lower()) + ]) + for k, v in self.nodes.items() + ]) + # Suggest top n_suggest hits by string match: + return [self.node_names[i] for i in np.argsort(scores)[-n_suggest:]][::-1] + + def synonym_node_properties(self) -> List[str]: + return ["synonyms"] + +# class OntologyOwl(Ontology): +# +# onto: owlready2.Ontology +# +# def __init__( +# self, +# owl: str, +# **kwargs +# ): +# self.onto = owlready2.get_ontology(owl) +# self.onto.load() +# # ToDo build support here +# +# @property +# def node_names(self): +# pass + + +class OntologyObo(Ontology): + + graph: networkx.MultiDiGraph + leaves: List[str] + + def __init__( + self, + obo: str, + **kwargs + ): + self.graph = obonet.read_obo(obo) + + def _check_graph(self): + if not networkx.is_directed_acyclic_graph(self.graph): + warnings.warn("DAG was broken") + + @property + def nodes(self): + return list(self.graph.nodes.items()) + + @property + def nodes_dict(self): + return self.graph.nodes.items() + + @property + def node_names(self): + return [x["name"] for x in self.graph.nodes.values()] + + @property + def node_ids(self): + return list(self.graph.nodes()) + + def id_from_name(self, x: str): + self.validate_node(x=x) + return [k for k, v in self.graph.nodes.items() if v["name"] == x][0] + + def set_leaves(self, nodes: list = None): + # ToDo check that these are not include parents of each other! + if nodes is not None: + for x in nodes: + assert x in self.graph.nodes, f"{x} not found" + self.leaves = nodes + else: + self.leaves = self.get_all_roots() + + def get_all_roots(self) -> List[str]: + return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] + + def get_ancestors(self, node: str) -> List[str]: + return list(networkx.ancestors(self.graph, node)) + + def map_to_leaves(self, node: str, return_type: str = "elements", include_self: bool = True): + """ + Map a given list of nodes to leave nodes. + + :param node: + :param return_type: + + "elements": names of mapped leave nodes + "idx": indicies in leave note list of of mapped leave nodes + :param include_self: whether to include node itself + :return: + """ + assert self.leaves is not None + ancestors = self.get_ancestors(node) + if include_self: + ancestors = ancestors + [node] + if return_type == "elements": + return [x for x in self.leaves if x in ancestors] + if return_type == "idx": + return np.array([i for i, (x, y) in enumerate(self.leaves) if x in ancestors]) + + @abc.abstractmethod + def synonym_node_properties(self) -> List[str]: + pass + + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): + """ + Map free text node name to ontology node names via fuzzy string matching. + + :param x: Free text node label which is to be matched to ontology nodes. + :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :return List of proposed matches in ontology. + """ + from fuzzywuzzy import fuzz + scores = np.array([ + np.max( + [ + fuzz.ratio(x.lower().strip("'").strip("\""), y[1]["name"].lower()) + ] + [ + fuzz.ratio(x.lower().strip("'").strip("\"").strip("]").strip("["), yyy.lower()) + for yy in self.synonym_node_properties if yy in y[1].keys() for yyy in y[1][yy] + ] + ) if "synonym" in y[1].keys() and include_synonyms else + np.max([ + fuzz.ratio(x.lower().strip("'").strip("\""), y[1]["name"].lower()) + ]) + for y in self.nodes + ]) + # Suggest top n_suggest hits by string match: + return [self.nodes[i][1]["name"] for i in np.argsort(scores)[-n_suggest:]][::-1] + + +class OntologyExtendedObo(OntologyObo): + """ + Basic .obo ontology extended by additional nodes and edges without breaking DAG. + """ + + def __init__(self, obo, **kwargs): + super().__init__(obo=obo, **kwargs) + # ToDo distinguish here: + self.add_extension(dict_ontology=ONTOLOGIY_EXTENSION_HUMAN) + + def add_extension(self, dict_ontology: Dict[str, List[str]]): + """ + Extend ontology by additional edges and nodes defined in a dictionary. + + Checks that DAG is not broken after graph assembly. + + :param dict_ontology: Dictionary of nodes and edges to add to ontology. Parsing: + + - keys: parent nodes (which must be in ontology) + - values: children nodes (which can be in ontology), must be given as list of stringd. + If these are in the ontology, an edge is added, otherwise, an edge and the node are added. + :return: + """ + for k, v in dict_ontology.items(): + assert isinstance(v, list), "dictionary values should be list of strings" + # Check that parent node is present: + if k not in self.nodes: + raise ValueError(f"key {k} was not in reference ontology") + # Check if edge is added only, or edge and node. + for child_node in v: + if child_node not in self.nodes: # Add node. + self.graph.add_node(child_node) + # Add edge. + self.graph.add_edge(k, child_node) + # Check that DAG was not broken: + self._check_graph() + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + +class OntologyUberon(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + super().__init__(obo="http://purl.obolibrary.org/obo/uberon.obo") + + # Clean up nodes: + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + # Only retain nodes which are "anatomical collection" 'UBERON:0034925': + # ToDo this seems to narrow, need to check if we need to constrain the nodes we use. + if "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + # Clean up edges: + # The graph object can hold different types of edges, + # and multiple types are loaded from the obo, not all of which are relevant for us: + # All edge types (based on previous download, assert below that this is not extended): + edge_types = [ + 'aboral_to', + 'adjacent_to', + 'anastomoses_with', + 'anterior_to', + 'anteriorly_connected_to', + 'attaches_to', + 'attaches_to_part_of', + 'bounding_layer_of', + 'branching_part_of', + 'channel_for', + 'channels_from', + 'channels_into', + 'composed_primarily_of', + 'conduit_for', + 'connected_to', + 'connects', + 'contains', + 'continuous_with', + 'contributes_to_morphology_of', + 'deep_to', + 'developmentally_induced_by', + 'developmentally_replaces', + 'develops_from', # developmental DAG -> include because it reflects the developmental hierarchy + 'develops_from_part_of', # developmental DAG -> include because it reflects the developmental hierarchy + 'develops_in', + 'directly_develops_from', # developmental DAG -> include because it reflects the developmental hierarchy + 'distal_to', + 'distally_connected_to', + 'distalmost_part_of', + 'dorsal_to', + 'drains', + 'ends', + 'ends_with', + 'existence_ends_during', + 'existence_ends_during_or_before', + 'existence_ends_with', + 'existence_starts_and_ends_during', + 'existence_starts_during', + 'existence_starts_during_or_after', + 'existence_starts_with', + 'extends_fibers_into', + 'filtered_through', + 'has_boundary', + 'has_component', + 'has_developmental_contribution_from', + 'has_fused_element', + 'has_member', + 'has_muscle_antagonist', + 'has_muscle_insertion', + 'has_muscle_origin', + 'has_part', + 'has_potential_to_develop_into', + 'has_potential_to_developmentally_contribute_to', + 'has_skeleton', + 'immediate_transformation_of', + 'immediately_anterior_to', + 'immediately_deep_to', + 'immediately_posterior_to', + 'immediately_preceded_by', + 'immediately_superficial_to', + 'in_anterior_side_of', + 'in_central_side_of', + 'in_deep_part_of', + 'in_distal_side_of', + 'in_dorsal_side_of', + 'in_innermost_side_of', + 'in_lateral_side_of', + 'in_left_side_of', + 'in_outermost_side_of', + 'in_posterior_side_of', + 'in_proximal_side_of', + 'in_right_side_of', + 'in_superficial_part_of', + 'in_ventral_side_of', + 'indirectly_supplies', + 'innervated_by', + 'innervates', + 'intersects_midsagittal_plane_of', + 'is_a', + 'layer_part_of', + 'located_in', # anatomic DAG -> include because it reflects the anatomic coarseness / hierarchy + 'location_of', + 'lumen_of', + 'luminal_space_of', + 'overlaps', + 'part_of', # anatomic DAG -> include because it reflects the anatomic coarseness / hierarchy + 'postaxialmost_part_of', + 'posterior_to', + 'posteriorly_connected_to', + 'preaxialmost_part_of', + 'preceded_by', + 'precedes', + 'produced_by', + 'produces', + 'protects', + 'proximal_to', + 'proximally_connected_to', + 'proximalmost_part_of', + 'seeAlso', + 'serially_homologous_to', + 'sexually_homologous_to', + 'skeleton_of', + 'starts', + 'starts_with', + 'subdivision_of', + 'superficial_to', + 'supplies', + 'surrounded_by', + 'surrounds', + 'transformation_of', + 'tributary_of', + 'trunk_part_of', + 'ventral_to' + ] + edges_to_delete = [] + for i, x in enumerate(self.graph.edges): + assert x[2] in edge_types, x + if x[2] not in [ + "develops_from", + "located_in", + "part_of", + ]: + edges_to_delete.append((x[0], x[1])) + for x in edges_to_delete: + self.graph.remove_edge(u=x[0], v=x[1]) + self._check_graph() + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym", "latin term", "has relational adjective"] + + +class OntologyCelltypes(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + super().__init__(obo="http://purl.obolibrary.org/obo/cl.obo") + + # Clean up nodes: + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + if "namespace" not in v.keys() or v["namespace"] != "cell": + nodes_to_delete.append(k) + elif "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + # Clean up edges: + # The graph object can hold different types of edges, + # and multiple types are loaded from the obo, not all of which are relevant for us: + # All edge types (based on previous download, assert below that this is not extended): + edge_types = [ + 'is_a', # nomenclature DAG -> include because of annotation coarseness differences + 'develops_from', # developmental DAG -> include because of developmental differences + 'has_part', # ? + 'develops_into', # inverse developmental DAG -> do not include + 'RO:0002120', # ? + 'RO:0002103', # ? + 'lacks_plasma_membrane_part' # ? + ] + edges_to_delete = [] + for i, x in enumerate(self.graph.edges): + assert x[2] in edge_types, x + if x[2] not in ["is_a", "develops_from"]: + edges_to_delete.append((x[0], x[1])) + for x in edges_to_delete: + self.graph.remove_edge(u=x[0], v=x[1]) + self._check_graph() + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + +class OntologyHancestro(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + super().__init__(obo="http://purl.obolibrary.org/obo/hancestro.obo") + + # Clean up nodes: + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + if "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + # Clean up edges: + # The graph object can hold different types of edges, + # and multiple types are loaded from the obo, not all of which are relevant for us: + # All edge types (based on previous download, assert below that this is not extended): + edge_types = [] # ToDo + edges_to_delete = [] + for i, x in enumerate(self.graph.edges): + assert x[2] in edge_types, x + if x[2] not in []: + edges_to_delete.append((x[0], x[1])) + for x in edges_to_delete: + self.graph.remove_edge(u=x[0], v=x[1]) + self._check_graph() + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + +class OntologyHsapdv(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + super().__init__(obo="http://purl.obolibrary.org/obo/hsapdv.obo") + + # Clean up nodes: + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + if "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + +class OntologyMmusdv(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + super().__init__(obo="http://purl.obolibrary.org/obo/mmusdv.obo") + + # Clean up nodes: + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + if "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + +class OntologySinglecellLibraryConstruction(OntologyEbi): + + def __init__( + self, + ontology: str = "efo", + root_term: str = "EFO_0010183", + ): + super().__init__(ontology=ontology, root_term=root_term) + + +class CelltypeUniverse: + """ + Cell type universe (list) and ontology (hierarchy) container class. + + + Basic checks on the organ specific instance are performed in the constructor. + """ + ontology: OntologyCelltypes + _target_universe: Union[List[str], None] + + def __init__(self, organism: str, **kwargs): + """ + + :param organism: Organism, defines ontology extension used. + :param kwargs: + """ + self.onto_cl = OntologyCelltypes(**kwargs) + self.onto_anatomy = OntologyUberon(**kwargs) + self._target_universe = None + self._set_extension(organism=organism) + + def _set_extension(self, organism): + """ + + :param organism: Organism, defines ontology extension used. + """ + if organism == "human": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) + elif organism == "mouse": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) + else: + raise ValueError(f"organism {organism} not found") + + @property + def target_universe(self): + """ + Ontology classes of target universe (understandable cell type names). + + :return: + """ + return self._target_universe + + @target_universe.setter + def target_universe(self, x: List[str]): + # Check that all nodes are valid: + for xx in x: + if xx not in self.onto_cl.nodes: + raise ValueError(f"cell type {xx} was not in ontology") + # Default universe is the full set of leave nodes of ontology: + self.target_universe = self.onto_cl.leaves + self.onto_cl.set_leaves(self.target_universe) + + @property + def target_universe_ids(self): + """ + Ontology IDs of target universe (codified cell type names). + + :return: + """ + return [self.onto_cl.map_class_to_id(x) for x in self._target_universe] + + @property + def ntypes(self): + """ + Number of different cell types in target universe. + """ + return len(self.target_universe) + + def __validate_target_universe_table(self, tab: pd.DataFrame): + assert len(tab.columns) == 2 + assert tab.columns[0] == "name" and tab.columns[1] == "id" + + def load_target_universe(self, organ): + """ + + :param organ: Anatomic structure to load target universe for. + :return: + """ + # ToDo: Use pydoc based query of universes stored in ./target_universes/.. + tab = None + self.__validate_target_universe_table(tab=tab) + self.target_universe = None # ToDo + + def read_target_universe_csv(self, fn): + """ + + :param fn: File containing target universe. + :return: + """ + tab = pd.read_csv(fn) + self.__validate_target_universe_table(tab=tab) + self.target_universe = tab["name"].values + + def map_to_target_leaves( + self, + nodes: List[str], + return_type: str = "elements" + ): + """ + Map a given list of nodes to leave nodes defined for this ontology. + :param nodes: + :param return_type: + + "elements": names of mapped leave nodes + "idx": indices in leave note list of of mapped leave nodes + :return: + """ + return [self.onto_cl.map_to_leaves(x, return_type=return_type) for x in nodes] + + def prepare_celltype_map_fuzzy( + self, + source, + match_only: bool = False, + include_synonyms: bool = True, + anatomical_constraint: Union[str, None] = None, + omit_list: list = [], + n_suggest: int = 10, + ) -> pd.DataFrame: + """ + Map free text node names to ontology node names via fuzzy string matching. + + If this function does not yield good matches, consider querying this web interface: + https://www.ebi.ac.uk/ols/index + + :param source: Free text node labels which are to be matched to ontology nodes. + :param match_only: Whether to include strict matches only in output. + :param include_synonyms: Whether to include synonyms of nodes in string search. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. + :param omit_list: Free text node labels to omit in map. + :param n_suggest: Number of cell types to suggest. + :return: Table with source and target node names. Columns: "source", "target" + """ + from fuzzywuzzy import fuzz + matches = [] + nodes = self.onto_cl.nodes + include = [] + if isinstance(source, pd.DataFrame): + source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) + for x in source: + if not isinstance(x, list) and not isinstance(x, tuple): + x = [x, "nan"] + scores = np.array([ + np.max([ + fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + ] + [ + fuzz.ratio(x[0].lower().strip("'").strip("\"").strip("]").strip("["), yy.lower()) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + np.max([ + fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + ]) + for y in nodes + ]) + include.append(x[0].lower().strip("'").strip("\"") not in omit_list) + if match_only: + matches.append(np.any(scores == 100)) # perfect match + else: + if np.any(scores == 100): + matches.append([nodes[i][1]["name"] for i in np.where(scores == 100)[0]]) + else: + if anatomical_constraint is not None: + # Check that anatomical constraint is a term in UBERON and get UBERON ID: + anatomical_constraint_id = self.onto_anatomy.id_from_name(anatomical_constraint) + # Select up to 5 nodes which match the anatomical constraint: + # The entries look as follows: + # node.value['relationship'] = ['part_of UBERON:0001885'] + # Find nodes that can be matched to UBERON: + anatomical_subselection = [ + "relationship" in y[1].keys() and + np.any(["part_of UBERON" in yy for yy in y[1]["relationship"]]) and + np.any([ + yy.split("part_of ")[-1] in self.onto_anatomy.node_ids + for yy in y[1]["relationship"] + ]) + for y in nodes + ] + uberon_ids = [ + y[1]["relationship"][ + np.where(["part_of UBERON" in yy for yy in y[1]["relationship"]])[0][0] + ].split("part_of ")[1] + if z else None + for y, z in zip(nodes, anatomical_subselection) + ] + # Check relationship in UBERON. Select for: + # a) parent -> a more general setting across anatomies from which one was sampled + # b) child -> a sub anatomy of the sampled tissue. + # Check this by checking if one is an ancestor of the other: + anatomical_subselection = [ + z and ( + anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or + y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) + ) + for y, z in zip(uberon_ids, anatomical_subselection) + ] + # Iterate over nodes sorted by string match score and masked by constraint: + matchesi = [ + nodes[i][1]["name"] + for i in np.argsort(scores) + if anatomical_subselection[i] + ][-5:][::-1] + # Select best remaining matches until n_suggests: + matchesi = matchesi + [ + nodes[i][1]["name"] + for i in np.argsort(scores) + if nodes[i][1]["name"] not in matchesi + ][-np.max(n_suggest - len(matchesi), 0):][::-1] + else: + # Suggest top 10 hits by string match: + matchesi = [nodes[i][1]["name"] for i in np.argsort(scores)[-n_suggest:]][::-1] + matches.append(matchesi) + tab = pd.DataFrame({ + "source": source, + "target": [":".join(z) for z in matches] + }) + return tab.loc[include] diff --git a/sfaira/versions/celltype_versions/extensions/__init__.py b/sfaira/versions/metadata/extensions/__init__.py similarity index 100% rename from sfaira/versions/celltype_versions/extensions/__init__.py rename to sfaira/versions/metadata/extensions/__init__.py diff --git a/sfaira/versions/celltype_versions/extensions/obo_extension_human.py b/sfaira/versions/metadata/extensions/obo_extension_human.py similarity index 100% rename from sfaira/versions/celltype_versions/extensions/obo_extension_human.py rename to sfaira/versions/metadata/extensions/obo_extension_human.py diff --git a/sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py b/sfaira/versions/metadata/extensions/obo_extension_mouse.py similarity index 100% rename from sfaira/versions/celltype_versions/extensions/obo_extension_mouse.py rename to sfaira/versions/metadata/extensions/obo_extension_mouse.py diff --git a/sfaira/versions/celltype_versions/human/__init__.py b/sfaira/versions/metadata/human/__init__.py similarity index 100% rename from sfaira/versions/celltype_versions/human/__init__.py rename to sfaira/versions/metadata/human/__init__.py diff --git a/sfaira/versions/celltype_versions/target_universes/__init__.py b/sfaira/versions/metadata/target_universes/__init__.py similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/__init__.py rename to sfaira/versions/metadata/target_universes/__init__.py diff --git a/sfaira/versions/celltype_versions/target_universes/human/__init__.py b/sfaira/versions/metadata/target_universes/human/__init__.py similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/__init__.py rename to sfaira/versions/metadata/target_universes/human/__init__.py diff --git a/sfaira/versions/celltype_versions/target_universes/human/adipose.csv b/sfaira/versions/metadata/target_universes/human/adipose.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/adipose.csv rename to sfaira/versions/metadata/target_universes/human/adipose.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv b/sfaira/versions/metadata/target_universes/human/adrenalgland.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/adrenalgland.csv rename to sfaira/versions/metadata/target_universes/human/adrenalgland.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/artery.csv b/sfaira/versions/metadata/target_universes/human/artery.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/artery.csv rename to sfaira/versions/metadata/target_universes/human/artery.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/bladder.csv b/sfaira/versions/metadata/target_universes/human/bladder.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/bladder.csv rename to sfaira/versions/metadata/target_universes/human/bladder.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/blood.csv b/sfaira/versions/metadata/target_universes/human/blood.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/blood.csv rename to sfaira/versions/metadata/target_universes/human/blood.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/bone.csv b/sfaira/versions/metadata/target_universes/human/bone.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/bone.csv rename to sfaira/versions/metadata/target_universes/human/bone.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/brain.csv b/sfaira/versions/metadata/target_universes/human/brain.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/brain.csv rename to sfaira/versions/metadata/target_universes/human/brain.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/calvaria.csv b/sfaira/versions/metadata/target_universes/human/calvaria.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/calvaria.csv rename to sfaira/versions/metadata/target_universes/human/calvaria.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/cervix.csv b/sfaira/versions/metadata/target_universes/human/cervix.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/cervix.csv rename to sfaira/versions/metadata/target_universes/human/cervix.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv b/sfaira/versions/metadata/target_universes/human/chorionicvillus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/chorionicvillus.csv rename to sfaira/versions/metadata/target_universes/human/chorionicvillus.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/colon.csv b/sfaira/versions/metadata/target_universes/human/colon.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/colon.csv rename to sfaira/versions/metadata/target_universes/human/colon.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/duodenum.csv b/sfaira/versions/metadata/target_universes/human/duodenum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/duodenum.csv rename to sfaira/versions/metadata/target_universes/human/duodenum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv b/sfaira/versions/metadata/target_universes/human/epityphlon.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/epityphlon.csv rename to sfaira/versions/metadata/target_universes/human/epityphlon.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/esophagus.csv b/sfaira/versions/metadata/target_universes/human/esophagus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/esophagus.csv rename to sfaira/versions/metadata/target_universes/human/esophagus.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/eye.csv b/sfaira/versions/metadata/target_universes/human/eye.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/eye.csv rename to sfaira/versions/metadata/target_universes/human/eye.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv b/sfaira/versions/metadata/target_universes/human/fallopiantube.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/fallopiantube.csv rename to sfaira/versions/metadata/target_universes/human/fallopiantube.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv b/sfaira/versions/metadata/target_universes/human/femalegonad.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/femalegonad.csv rename to sfaira/versions/metadata/target_universes/human/femalegonad.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv b/sfaira/versions/metadata/target_universes/human/gallbladder.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/gallbladder.csv rename to sfaira/versions/metadata/target_universes/human/gallbladder.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/heart.csv b/sfaira/versions/metadata/target_universes/human/heart.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/heart.csv rename to sfaira/versions/metadata/target_universes/human/heart.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/hesc.csv b/sfaira/versions/metadata/target_universes/human/hesc.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/hesc.csv rename to sfaira/versions/metadata/target_universes/human/hesc.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/ileum.csv b/sfaira/versions/metadata/target_universes/human/ileum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/ileum.csv rename to sfaira/versions/metadata/target_universes/human/ileum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/jejunum.csv b/sfaira/versions/metadata/target_universes/human/jejunum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/jejunum.csv rename to sfaira/versions/metadata/target_universes/human/jejunum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/kidney.csv b/sfaira/versions/metadata/target_universes/human/kidney.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/kidney.csv rename to sfaira/versions/metadata/target_universes/human/kidney.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/liver.csv b/sfaira/versions/metadata/target_universes/human/liver.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/liver.csv rename to sfaira/versions/metadata/target_universes/human/liver.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/lung.csv b/sfaira/versions/metadata/target_universes/human/lung.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/lung.csv rename to sfaira/versions/metadata/target_universes/human/lung.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/malegonad.csv b/sfaira/versions/metadata/target_universes/human/malegonad.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/malegonad.csv rename to sfaira/versions/metadata/target_universes/human/malegonad.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/muscle.csv b/sfaira/versions/metadata/target_universes/human/muscle.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/muscle.csv rename to sfaira/versions/metadata/target_universes/human/muscle.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/omentum.csv b/sfaira/versions/metadata/target_universes/human/omentum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/omentum.csv rename to sfaira/versions/metadata/target_universes/human/omentum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/pancreas.csv b/sfaira/versions/metadata/target_universes/human/pancreas.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/pancreas.csv rename to sfaira/versions/metadata/target_universes/human/pancreas.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/placenta.csv b/sfaira/versions/metadata/target_universes/human/placenta.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/placenta.csv rename to sfaira/versions/metadata/target_universes/human/placenta.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/pleura.csv b/sfaira/versions/metadata/target_universes/human/pleura.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/pleura.csv rename to sfaira/versions/metadata/target_universes/human/pleura.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/prostate.csv b/sfaira/versions/metadata/target_universes/human/prostate.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/prostate.csv rename to sfaira/versions/metadata/target_universes/human/prostate.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/rectum.csv b/sfaira/versions/metadata/target_universes/human/rectum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/rectum.csv rename to sfaira/versions/metadata/target_universes/human/rectum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/rib.csv b/sfaira/versions/metadata/target_universes/human/rib.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/rib.csv rename to sfaira/versions/metadata/target_universes/human/rib.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/skin.csv b/sfaira/versions/metadata/target_universes/human/skin.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/skin.csv rename to sfaira/versions/metadata/target_universes/human/skin.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv b/sfaira/versions/metadata/target_universes/human/spinalcord.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/spinalcord.csv rename to sfaira/versions/metadata/target_universes/human/spinalcord.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/spleen.csv b/sfaira/versions/metadata/target_universes/human/spleen.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/spleen.csv rename to sfaira/versions/metadata/target_universes/human/spleen.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/stomach.csv b/sfaira/versions/metadata/target_universes/human/stomach.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/stomach.csv rename to sfaira/versions/metadata/target_universes/human/stomach.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/thymus.csv b/sfaira/versions/metadata/target_universes/human/thymus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/thymus.csv rename to sfaira/versions/metadata/target_universes/human/thymus.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/thyroid.csv b/sfaira/versions/metadata/target_universes/human/thyroid.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/thyroid.csv rename to sfaira/versions/metadata/target_universes/human/thyroid.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/trachea.csv b/sfaira/versions/metadata/target_universes/human/trachea.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/trachea.csv rename to sfaira/versions/metadata/target_universes/human/trachea.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/ureter.csv b/sfaira/versions/metadata/target_universes/human/ureter.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/ureter.csv rename to sfaira/versions/metadata/target_universes/human/ureter.csv diff --git a/sfaira/versions/celltype_versions/target_universes/human/uterus.csv b/sfaira/versions/metadata/target_universes/human/uterus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/human/uterus.csv rename to sfaira/versions/metadata/target_universes/human/uterus.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/__init__.py b/sfaira/versions/metadata/target_universes/mouse/__init__.py similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/__init__.py rename to sfaira/versions/metadata/target_universes/mouse/__init__.py diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv b/sfaira/versions/metadata/target_universes/mouse/adipose.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/adipose.csv rename to sfaira/versions/metadata/target_universes/mouse/adipose.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv b/sfaira/versions/metadata/target_universes/mouse/bladder.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/bladder.csv rename to sfaira/versions/metadata/target_universes/mouse/bladder.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/blood.csv b/sfaira/versions/metadata/target_universes/mouse/blood.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/blood.csv rename to sfaira/versions/metadata/target_universes/mouse/blood.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/bone.csv b/sfaira/versions/metadata/target_universes/mouse/bone.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/bone.csv rename to sfaira/versions/metadata/target_universes/mouse/bone.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/brain.csv b/sfaira/versions/metadata/target_universes/mouse/brain.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/brain.csv rename to sfaira/versions/metadata/target_universes/mouse/brain.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/colon.csv b/sfaira/versions/metadata/target_universes/mouse/colon.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/colon.csv rename to sfaira/versions/metadata/target_universes/mouse/colon.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv b/sfaira/versions/metadata/target_universes/mouse/diaphragm.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/diaphragm.csv rename to sfaira/versions/metadata/target_universes/mouse/diaphragm.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/heart.csv b/sfaira/versions/metadata/target_universes/mouse/heart.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/heart.csv rename to sfaira/versions/metadata/target_universes/mouse/heart.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv b/sfaira/versions/metadata/target_universes/mouse/ileum.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/ileum.csv rename to sfaira/versions/metadata/target_universes/mouse/ileum.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv b/sfaira/versions/metadata/target_universes/mouse/kidney.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/kidney.csv rename to sfaira/versions/metadata/target_universes/mouse/kidney.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/liver.csv b/sfaira/versions/metadata/target_universes/mouse/liver.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/liver.csv rename to sfaira/versions/metadata/target_universes/mouse/liver.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/lung.csv b/sfaira/versions/metadata/target_universes/mouse/lung.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/lung.csv rename to sfaira/versions/metadata/target_universes/mouse/lung.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv b/sfaira/versions/metadata/target_universes/mouse/malegonad.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/malegonad.csv rename to sfaira/versions/metadata/target_universes/mouse/malegonad.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv b/sfaira/versions/metadata/target_universes/mouse/mammarygland.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/mammarygland.csv rename to sfaira/versions/metadata/target_universes/mouse/mammarygland.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv b/sfaira/versions/metadata/target_universes/mouse/muscle.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/muscle.csv rename to sfaira/versions/metadata/target_universes/mouse/muscle.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv b/sfaira/versions/metadata/target_universes/mouse/ovary.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/ovary.csv rename to sfaira/versions/metadata/target_universes/mouse/ovary.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv b/sfaira/versions/metadata/target_universes/mouse/pancreas.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/pancreas.csv rename to sfaira/versions/metadata/target_universes/mouse/pancreas.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv b/sfaira/versions/metadata/target_universes/mouse/placenta.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/placenta.csv rename to sfaira/versions/metadata/target_universes/mouse/placenta.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv b/sfaira/versions/metadata/target_universes/mouse/prostate.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/prostate.csv rename to sfaira/versions/metadata/target_universes/mouse/prostate.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/rib.csv b/sfaira/versions/metadata/target_universes/mouse/rib.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/rib.csv rename to sfaira/versions/metadata/target_universes/mouse/rib.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/skin.csv b/sfaira/versions/metadata/target_universes/mouse/skin.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/skin.csv rename to sfaira/versions/metadata/target_universes/mouse/skin.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv b/sfaira/versions/metadata/target_universes/mouse/spleen.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/spleen.csv rename to sfaira/versions/metadata/target_universes/mouse/spleen.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv b/sfaira/versions/metadata/target_universes/mouse/stomach.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/stomach.csv rename to sfaira/versions/metadata/target_universes/mouse/stomach.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv b/sfaira/versions/metadata/target_universes/mouse/thymus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/thymus.csv rename to sfaira/versions/metadata/target_universes/mouse/thymus.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv b/sfaira/versions/metadata/target_universes/mouse/tongue.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/tongue.csv rename to sfaira/versions/metadata/target_universes/mouse/tongue.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv b/sfaira/versions/metadata/target_universes/mouse/trachea.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/trachea.csv rename to sfaira/versions/metadata/target_universes/mouse/trachea.csv diff --git a/sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv b/sfaira/versions/metadata/target_universes/mouse/uterus.csv similarity index 100% rename from sfaira/versions/celltype_versions/target_universes/mouse/uterus.csv rename to sfaira/versions/metadata/target_universes/mouse/uterus.csv From afc694d2bfa373d7044a2082a77409ca515682dc Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Feb 2021 20:15:03 +0100 Subject: [PATCH 036/161] File wise celltype maps (#98) * added celltype maps for d10_1016_j_cmet_2019_01_021 * improved automatic cell type mapping * allowed single mapping csv per data set group in a single py file --- sfaira/data/base.py | 71 ++++++++++- .../mouse_pancreas_2019_10x_thompson_x.csv | 14 +++ .../mouse_pancreas_2019_10x_thompson_x.py | 21 +--- sfaira/data/dataloaders/super_group.py | 16 ++- sfaira/unit_tests/test_data_template.py | 115 ++++++++++++++++-- sfaira/versions/metadata/base.py | 92 ++++++++++---- 6 files changed, 269 insertions(+), 60 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv diff --git a/sfaira/data/base.py b/sfaira/data/base.py index a22288936..ecb8b2c8f 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -679,7 +679,7 @@ def write_ontology_class_map( **kwargs ) if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=None) + tab.to_csv(fn, index=False) def load_ontology_class_map(self, fn): """ @@ -708,6 +708,11 @@ def project_celltypes_to_ontology(self): else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x for x in labels_original ] + # Validate mapped IDs based on ontology: + # This aborts with a readable error if there was a target in the mapping file that does not match the + # ontology. + for x in labels_mapped: + self.ontology_celltypes.onto_cl.validate_node(x) del self.adata.obs[self.obs_key_cellontology_original] self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = labels_mapped self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = labels_original @@ -1574,6 +1579,10 @@ def __init__(self, datasets: dict): self.datasets = datasets self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() + @property + def _unknown_celltype_identifiers(self): + return np.unqiue(np.concatenate([v._unknown_celltype_identifiers for _, v in self.datasets.items()])) + def _load_group(self, load_raw: bool): """ @@ -1695,6 +1704,39 @@ def load_tobacked( except FileNotFoundError: del self.datasets[x] + def write_ontology_class_map( + self, + fn, + protected_writing: bool = True, + **kwargs + ): + """ + Write cell type maps of free text cell types to ontology classes. + + :param fn: File name of csv to load class maps from. + :param protected_writing: Only write if file was not already found. + :return: + """ + tab = [] + for k, v in self.datasets.items(): + labels_original = np.sort(np.unique(np.concatenate([ + v.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values + ]))) + tab.append(v.ontology_celltypes.prepare_celltype_map_fuzzy( + source=labels_original, + match_only=False, + anatomical_constraint=v.organ, + include_synonyms=True, + omit_list=v._unknown_celltype_identifiers, + **kwargs + )) + tab = pandas.concat(tab, axis=0) + # Take out columns with the same source: + tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() + tab = tab.sort_values("source") + if not os.path.exists(fn) or not protected_writing: + tab.to_csv(fn, index=False) + @property def ids(self): return list(self.datasets.keys()) @@ -1818,6 +1860,23 @@ def ncells(self, annotated_only: bool = False): cells = self.ncells_bydataset(annotated_only=annotated_only) return np.sum(cells) + @property + def ontology_celltypes(self): + organism = np.unique([v.organism for _, v in self.datasets.items()]) + if len(organism) > 1: + # ToDo: think about whether this should be handled differently. + warnings.warn("found more than one organism in group, this could cause problems with using a joined cell " + "type ontology. Using only the ontology of the first data set in the group.") + return self.datasets[self.ids[0]].ontology_celltypes + + def project_celltypes_to_ontology(self): + """ + Project free text cell type names to ontology based on mapping table. + :return: + """ + for _, v in self.datasets.items(): + v.project_celltypes_to_ontology() + def subset(self, key, values): """ Subset list of adata objects based on sample-wise properties. @@ -1948,7 +2007,7 @@ def __init__( datasets_f.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) # Load cell type maps: for x in datasets_f: - x.load_ontology_class_map(fn=os.path.join(cwd, x.fn_ontology_class_map_csv)) + x.load_ontology_class_map(fn=os.path.join(cwd, file_module + ".csv")) datasets.extend(datasets_f) keys = [x.id for x in datasets] @@ -2241,3 +2300,11 @@ def subset_cells(self, key, values: Union[str, List[str]]): """ for x in self.dataset_groups.ids: self.dataset_groups[x].subset_cells(key=key, values=values) + + def project_celltypes_to_ontology(self): + """ + Project free text cell type names to ontology based on mapping table. + :return: + """ + for _, v in self.dataset_groups: + v.project_celltypes_to_ontology() diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv new file mode 100644 index 000000000..27e4ae739 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv @@ -0,0 +1,14 @@ +source,target +acinar,pancreatic acinar cell +alpha,pancreatic A cell +beta,type B pancreatic cell +delta,pancreatic D cell +ductal,pancreatic ductal cell +endothelial,endothelial cell +erythroblast,erythroblast +fibroblast,fibroblast +leukocyte,leukocyte +lymphatic endothelial cell,endothelial cell of lymphatic vessel +pp,pancreatic PP cell +smooth_muscle,smooth muscle cell +stellate cell,pancreatic stellate cell diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index fbdcdada9..06fe407e2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -32,6 +32,7 @@ def __init__( f"10.1016/j.cmet.2019.01.021" self.download = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" + self.download_meta = "private" self.author = "Bhushan" self.doi = "10.1016/j.cmet.2019.01.021" @@ -39,7 +40,7 @@ def __init__( self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "diabetic" self.year = 2019 @@ -47,24 +48,6 @@ def __init__( self.obs_key_cellontology_original = "celltypes" - self.class_maps = { - "0": { - "acinar": "pancreatic acinar cell", - "ductal": "pancreatic ductal cell", - "leukocyte": "leukocyte", - "T cell(Pancreas)": "t cell", - "B cell(Pancreas)": "b cell", - "beta": "pancreatic B cell", - "alpha": "pancreatic A cell", - "delta": "pancreatic D cell", - "pp": "pancreatic PP cell", - "smooth_muscle": "smooth muscle cell", - "stellate cell": "pancreatic stellate cell", - "fibroblast": "stromal cell", - "endothelial": "endothelial cell" - }, - } - def _load(self, fn=None): path_base = os.path.join(self.path, "mouse", "pancreas") celltypes = pandas.read_csv(os.path.join(path_base, self.sample_fn + "_annotation.csv"), index_col=0) diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py index 2a549254b..9c0321dc2 100644 --- a/sfaira/data/dataloaders/super_group.py +++ b/sfaira/data/dataloaders/super_group.py @@ -1,5 +1,10 @@ from typing import Union +try: + import sfaira_extension as sfairae +except ImportError: + sfairae = None + from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders from sfaira.data.dataloaders.databases import DatasetSuperGroupDatabases from sfaira.data import DatasetSuperGroup @@ -21,7 +26,7 @@ def __init__( :param meta_path: :param cache_path: """ - super().__init__(dataset_groups=[ + dsgs = [ DatasetSuperGroupLoaders( path=path, meta_path=meta_path, @@ -32,4 +37,11 @@ def __init__( meta_path=meta_path, cache_path=cache_path, ) - ]) + ] + if sfairae is not None: + dsgs.append(sfairae.data.loaders.DatasetSuperGroupLoaders( + path=path, + meta_path=meta_path, + cache_path=cache_path, + )) + super().__init__(dataset_groups=dsgs) diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index a11e3569c..1bd4d44a5 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -1,19 +1,41 @@ import os +import pydoc import unittest -from sfaira.data import DatasetGroupDirectoryOriented +from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup class TestDatasetTemplate(unittest.TestCase): + dir_template: str = "./template_data" def test_load(self): """ - Address ToDos before running test to customize to your data set. + Unit test to assist with data set contribution. + + The workflow for contributing a data set with this data loader is as follows: + + 1. Write a data loader and add it into the loader directory of your local sfaira installation. + 2. Address ToDos below. + 3. Run this unit test until you are not getting errors from your data loader anymore. + + In the process of this unit test, this data loader will have written putative cell type maps from your + annotation to the cell ontology. + + 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. + Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be + differentially successfull. The proposed IDs groups are separate by ":|||:" strings to give you a visial anchor + when going through these lists. You need to delete all of these division strings and all labels in the second + columns other than the best fit label. Do not change the first column, + (Note that columns are separated by ",") + You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl + 5. Run this unit test for a last time to check the cell type maps. + :return: """ remove_gene_version = True match_to_reference = None + classmap_by_file = True # ToDo build one class map per file or per data loader (potentially many per file) # ToDo: add correct module here as "YOUR_STUDY": # Addition coming soon: This path can either be in sfaira or in sfaira_extensions. # So far, this still has to be in sfaira. @@ -34,17 +56,86 @@ def test_load(self): allow_caching=True # tests caching ) # Create cell type conversion table: - for k, v in ds.datasets.items(): - v.load() - # Write this directly into sfaira installation so that it can be committed via git. - v.write_ontology_class_map( - fn=os.path.join("/".join(FILE_PATH.split("/")[:-1]), v.fn_ontology_class_map_csv), - protected_writing=False, - n_suggest=10, - ) - # ToDo: conflicts are not automatically resolved, please go back to https://www.ebi.ac.uk/ols/ontologies/cl - # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. + cwd = os.path.dirname(FILE_PATH) + dataset_module = str(cwd.split("/")[-1]) + if classmap_by_file: + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + DatasetFound = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".Dataset") + # Check if global objects are available: + # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles + # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + sample_fns = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".SAMPLE_FNS") + sample_ids = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dataset_module + "." + + file_module + ".SAMPLE_IDS") + if sample_fns is not None and sample_ids is None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_fn=x, + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template + ) + for x in sample_fns + ] + elif sample_fns is None and sample_ids is not None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_id=x, + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template + ) + for x in sample_ids + ] + elif sample_fns is not None and sample_ids is not None: + raise ValueError(f"sample_fns and sample_ids both found for {f}") + else: + datasets_f = [DatasetFound( + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template + )] + # Build a data set group from the already loaded data sets and use the group ontology writing + # function. + current_ids = [x.id for x in datasets_f] + dsg_f = DatasetGroup(datasets=dict([(x, ds.datasets[x]) for x in current_ids])) + # Write this directly into sfaira installation so that it can be committed via git. + dsg_f.write_ontology_class_map( + fn=os.path.join(cwd, file_module + ".csv"), + protected_writing=True, + n_suggest=10, + ) + else: + for k, v in ds.datasets.items(): + # Write this directly into sfaira installation so that it can be committed via git. + v.write_ontology_class_map( + fn=os.path.join("/".join(FILE_PATH.split("/")[:-1]), v.fn_ontology_class_map_csv), + protected_writing=True, + n_suggest=10, + ) + + # ToDo: conflicts are not automatically resolved, please go back to + # https://www.ebi.ac.uk/ols/ontologies/cl + # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. + # Test loading from cache: + ds = DatasetGroupDirectoryOriented( + file_base=FILE_PATH, + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template + ) ds.load( remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index cd04e5ff2..7f2558051 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -2,7 +2,6 @@ import networkx import numpy as np import obonet -#import owlready2 import pandas as pd import requests from typing import Dict, List, Tuple, Union @@ -18,7 +17,7 @@ - All classes inherit from Ontology() - Onotlogies can be read as follows: - - from string lists which are typically hardcoded in sfaira (OntologyList), + - from string lists which are typically hardcoded in sfaira (OntologyList), - from .obo files which are emitted by obofoundry for example (OntologyObo)), - ToDo from .owl files which are emitted from EBI for example (OntologyOwl)), - from the EBI web API via direct queries (OntologyEbi)). @@ -64,7 +63,7 @@ def __init__( self.nodes = terms @property - def node_names(self): + def node_names(self) -> List[str]: return self.nodes def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): @@ -123,7 +122,7 @@ def recursive_search(iri): self.nodes = recursive_search(iri=root_term) @property - def node_names(self): + def node_names(self) -> List[str]: return [v["name"] for k, v in self.nodes.items()] def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): @@ -138,14 +137,14 @@ def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: scores = np.array([ np.max( [ - fuzz.ratio(x.lower(), v["name"].lower()) + fuzz.partial_ratio(x.lower(), v["name"].lower()) ] + [ - fuzz.ratio(x.lower(), yyy.lower()) + fuzz.partial_ratio(x.lower(), yyy.lower()) for yy in self.synonym_node_properties if yy in v.keys() for yyy in v[yy] ] ) if include_synonyms else np.max([ - fuzz.ratio(x.lower(), v["name"].lower()) + fuzz.partial_ratio(x.lower(), v["name"].lower()) ]) for k, v in self.nodes.items() ]) @@ -190,22 +189,22 @@ def _check_graph(self): warnings.warn("DAG was broken") @property - def nodes(self): + def nodes(self) -> List[Tuple[str, dict]]: return list(self.graph.nodes.items()) @property - def nodes_dict(self): + def nodes_dict(self) -> dict: return self.graph.nodes.items() @property - def node_names(self): + def node_names(self) -> List[str]: return [x["name"] for x in self.graph.nodes.values()] @property - def node_ids(self): + def node_ids(self) -> List[str]: return list(self.graph.nodes()) - def id_from_name(self, x: str): + def id_from_name(self, x: str) -> str: self.validate_node(x=x) return [k for k, v in self.graph.nodes.items() if v["name"] == x][0] @@ -729,6 +728,15 @@ def prepare_celltype_map_fuzzy( If this function does not yield good matches, consider querying this web interface: https://www.ebi.ac.uk/ols/index + We use anatomical constraints as follows; + An anatomic constraint is a name of an anatomical structure that can be mapped to UBERON. + 1. We select cell types expected in this UBERON clade based on the link between CL and UBERON. + 2. We perform an additional fuzzy string matching with the anatomical structure added to the proposed label. + This is often beneficial because analysts do not always prefix such extension (e.g. pancreatic) to the + free text cell type labels if the entire sample consists only of cells from this anatomical structure. + Note that if the maps from 1) were perfect, this would not be necessary. In practice, we find this to still + recover some hits that are otherwise missed. + :param source: Free text node labels which are to be matched to ontology nodes. :param match_only: Whether to include strict matches only in output. :param include_synonyms: Whether to include synonyms of nodes in string search. @@ -746,26 +754,44 @@ def prepare_celltype_map_fuzzy( for x in source: if not isinstance(x, list) and not isinstance(x, tuple): x = [x, "nan"] + term = x[0].lower().strip("'").strip("\"").strip("]").strip("[") + # fuzz ratio and partial_ratio capture different types of matches well, we use both here: scores = np.array([ np.max([ - fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + fuzz.ratio(term, y[1]["name"].lower()), + fuzz.partial_ratio(term, y[1]["name"].lower()) ] + [ - fuzz.ratio(x[0].lower().strip("'").strip("\"").strip("]").strip("["), yy.lower()) + fuzz.ratio(term, yy.lower()) + for yy in y[1]["synonym"] + ] + [ + fuzz.partial_ratio(term, yy.lower()) for yy in y[1]["synonym"] ]) if "synonym" in y[1].keys() and include_synonyms else np.max([ - fuzz.ratio(x[0].lower().strip("'").strip("\""), y[1]["name"].lower()) + fuzz.ratio(term, y[1]["name"].lower()), + fuzz.partial_ratio(term, y[1]["name"].lower()) ]) for y in nodes ]) include.append(x[0].lower().strip("'").strip("\"") not in omit_list) - if match_only: + if match_only and not anatomical_constraint: matches.append(np.any(scores == 100)) # perfect match else: - if np.any(scores == 100): + if np.any(scores == 100) and not anatomical_constraint: matches.append([nodes[i][1]["name"] for i in np.where(scores == 100)[0]]) else: if anatomical_constraint is not None: + # Select best overall matches: + matchesi = ":".join([ + nodes[i][1]["name"] + for i in np.argsort(scores) + ][-np.max(n_suggest - 7, 0):][::-1]) + + # Use anatomical constraints two fold: + # 1. Select cell types that are in the correct ontology. + # 2. Run a second string matching with the anatomical word included. + + # 1. Select cell types that are in the correct ontology. # Check that anatomical constraint is a term in UBERON and get UBERON ID: anatomical_constraint_id = self.onto_anatomy.id_from_name(anatomical_constraint) # Select up to 5 nodes which match the anatomical constraint: @@ -800,23 +826,39 @@ def prepare_celltype_map_fuzzy( for y, z in zip(uberon_ids, anatomical_subselection) ] # Iterate over nodes sorted by string match score and masked by constraint: - matchesi = [ + matchesi = matchesi + ":|||:" + ":".join([ nodes[i][1]["name"] for i in np.argsort(scores) - if anatomical_subselection[i] - ][-5:][::-1] - # Select best remaining matches until n_suggests: - matchesi = matchesi + [ + if anatomical_subselection[i] and nodes[i][1]["name"] not in matchesi + ][-4:][::-1]) + + # 2. Run a second string matching with the anatomical word included. + modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]").\ + strip("[") + scores_anatomy = np.array([ + np.max([ + fuzz.partial_ratio(modified_term, y[1]["name"].lower()) + ] + [ + fuzz.partial_ratio(modified_term, yy.lower()) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + np.max([ + fuzz.partial_ratio(modified_term, y[1]["name"].lower()) + ]) + for y in nodes + ]) + matchesi = matchesi + ":|||:" + ":".join([ nodes[i][1]["name"] - for i in np.argsort(scores) + for i in np.argsort(scores_anatomy) if nodes[i][1]["name"] not in matchesi - ][-np.max(n_suggest - len(matchesi), 0):][::-1] + ][-7:][::-1]) else: # Suggest top 10 hits by string match: matchesi = [nodes[i][1]["name"] for i in np.argsort(scores)[-n_suggest:]][::-1] + matchesi = ":".join(matchesi) matches.append(matchesi) tab = pd.DataFrame({ "source": source, - "target": [":".join(z) for z in matches] + "target": matches }) return tab.loc[include] From 3d89f688d9a91704fce49aa7648e8359eac21f0c Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Sat, 6 Feb 2021 17:26:34 +0100 Subject: [PATCH 037/161] File wise celltype maps (#99) * added cell type map writing script * took out train, ui and models module to fix import * first pass on dataset-wise meta data projection to UBERON and EBI experiment ontology --- sfaira/__init__.py | 6 +- .../human_pancreas_2017_smartseq2_enge_001.py | 2 +- .../mouse_bone_2018_microwell_001.py | 2 +- ...ouse_femalegonad_2018_microwell_han_001.py | 4 +- ...ouse_femalegonad_2018_microwell_han_002.py | 4 +- .../mouse_malegonad_2018_microwell_han_001.py | 4 +- .../mouse_malegonad_2018_microwell_han_002.py | 4 +- ...use_mammarygland_2018_microwell_han_001.py | 2 +- ...use_mammarygland_2018_microwell_han_002.py | 2 +- ...use_mammarygland_2018_microwell_han_003.py | 2 +- ...use_mammarygland_2018_microwell_han_004.py | 2 +- .../mouse_muscle_2018_microwell_han_001.py | 2 +- .../human_colon_2019_10x_kinchen_001.py | 2 +- .../human_colon_2019_10x_smilie_001.py | 2 +- .../human_ileum_2019_10x_martin_001.py | 2 +- .../human_prostate_2018_10x_henry_001.py | 2 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 2 +- .../human_lung_2020_10x_miller_001.py | 2 +- .../human_brain_2017_DroNcSeq_habib_001.py | 2 +- .../human_malegonad_2018_10x_guo_001.py | 6 +- .../human_liver_2018_10x_macparland_001.py | 2 +- .../human_mixed_2019_10x_szabo_001.py | 6 +- .../human_eye_2019_10x_menon_001.py | 2 +- .../human_liver_2019_10x_ramachandran_001.py | 2 +- .../human_liver_2019_10x_popescu_001.py | 2 +- .../human_bone_2020_microwell_han_001.py | 2 +- .../human_bone_2020_microwell_han_002.py | 2 +- ...uman_femalegonad_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_002.py | 4 +- .../human_malegonad_2020_microwell_han_001.py | 4 +- .../human_malegonad_2020_microwell_han_002.py | 4 +- .../human_muscle_2020_microwell_han_001.py | 2 +- .../human_muscle_2020_microwell_han_002.py | 2 +- .../human_colon_2020_10x_james_001.py | 2 +- .../human_lung_2019_10x_braga_001.py | 2 +- .../human_lung_2019_dropseq_braga_001.py | 2 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 2 +- .../human_kidney_2020_10x_liao_001.py | 2 +- .../human_eye_2019_10x_voigt_001.py | 2 +- .../human_x_2019_10x_wang_001.py | 2 +- .../human_lung_2020_10x_lukassen_001.py | 2 +- .../human_lung_2020_10x_habermann_001.py | 2 +- .../human_kidney_2019_10x_stewart_001.py | 2 +- .../human_thymus_2020_10x_park_001.py | 2 +- .../human_x_2019_10x_madissoon_001.py | 2 +- .../human_eye_2019_10x_lukowski_001.py | 2 +- .../d_nan/human_blood_2018_10x_ica_001.py | 2 +- .../human_blood_2019_10x_10xGenomics_001.py | 2 +- .../d_nan/human_bone_2018_10x_ica_001.py | 4 +- sfaira/data/utils/create_celltype_maps.py | 96 +++++++++++++++++++ sfaira/unit_tests/test_data_template.py | 6 +- sfaira/versions/metadata/base.py | 23 ++++- 52 files changed, 185 insertions(+), 68 deletions(-) create mode 100644 sfaira/data/utils/create_celltype_maps.py diff --git a/sfaira/__init__.py b/sfaira/__init__.py index a51ea95e3..d0b93c7f9 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- """A Data and Model Zoo for Single-Cell Genomics.""" -import sfaira.interface as ui -import sfaira.train -import sfaira.models +# import sfaira.interface as ui +# import sfaira.train +# import sfaira.models import sfaira.genomes import sfaira.data import sfaira.consts as consts diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index 75444b20e..5f264d8a2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -30,7 +30,7 @@ def __init__( self.doi = "10.1016/j.cell.2017.09.004" self.healthy = True self.normalization = "raw" - self.protocol = "Smartseq2" + self.protocol = "Smart-seq2" self.organ = "pancreas" # ToDo: "islet of Langerhans" self.organism = "human" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py index baa4cb60e..2034bcbbd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py @@ -16,7 +16,7 @@ def __init__( self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bone" + self.organ = "bone tissue" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py index db4d0801e..1faf7544d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py @@ -13,9 +13,9 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" + self.organ = "ovary" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py index affa74f21..9355c7acc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py @@ -13,9 +13,9 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "femalegonad" + self.organ = "ovary" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py index 32b6e3f18..0a1fd12e8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py @@ -13,9 +13,9 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" + self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" + self.organ = "testis" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py index 67da67428..166dd785d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py @@ -13,9 +13,9 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" + self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "malegonad" + self.organ = "testis" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py index 350514e31..bc523124f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" + self.organ = "mammary gland" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py index a8e2bca14..738329bad 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" + self.organ = "mammary gland" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py index 5f3bab9bd..384ff9f30 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" + self.organ = "mammary gland" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py index b6c0351df..6dcab9d3d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "mammarygland" + self.organ = "mammary gland" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py index 34860727d..21d3019f0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "muscle" + self.organ = "skeletal muscle organ" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index e40cb5c55..272b24426 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -26,7 +26,7 @@ def __init__( self.normalization = "raw" self.organ = "colon" # ToDo: "lamina propria of mucosa of colon" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.year = 2019 self.var_symbol_col = "names" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py index 9c4f27a3e..0e612bc21 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "colon" # ToDo: "colonic epithelium" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py index bba8ea11c..12a891bca 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "ileum" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py index c5222d562..b2ab03366 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -29,7 +29,7 @@ def __init__( self.state_exact = "healthy" self.organ = "prostate" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.year = 2018 self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index d03786716..0cce5039d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -26,7 +26,7 @@ def __init__( self.normalization = "raw" self.organ = "pancreas" self.organism = "human" - self.protocol = "Smartseq2" + self.protocol = "Smart-seq2" self.year = 2016 self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py index 184392a49..2bcadaa7b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py index 15283deff..6487341e8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -27,7 +27,7 @@ def __init__( self.normalization = "raw" self.organ = "brain" self.organism = "human" - self.protocol = "DroNcSeq" + self.protocol = "DroNc-seq" self.state_exact = "healthy" self.year = 2017 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py index 1a22b898a..deb1b9fbb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py @@ -17,7 +17,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_malegonad_2018_10x_guo_001_10.1038/s41422-018-0099-2" + self.id = "human_testis_2018_10x_guo_001_10.1038/s41422-018-0099-2" self.download = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_meta = None @@ -26,9 +26,9 @@ def __init__( self.doi = "10.1038/s41422-018-0099-2" self.healthy = True self.normalization = "raw" - self.organ = "malegonad" + self.organ = "testis" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2018 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py index 67945d2ce..1c994714f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -27,7 +27,7 @@ def __init__( self.normalization = "raw" self.organ = "liver" # ToDo: "caudate lobe" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2018 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index f9d31966d..2b56b50c8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -48,7 +48,7 @@ def __init__( self.healthy = True self.normalization = "raw" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 @@ -81,7 +81,7 @@ def _load(self, fn=None): self.adata.obs["organ"] = "Lung" elif "PP003" in fn or "PP004" in fn: self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Bone Marrow" + self.adata.obs["organ"] = "bone marrow" elif "PP005" in fn or "PP006" in fn: self.adata.obs["donor"] = "Donor1" self.adata.obs["organ"] = "Lymph Node" @@ -90,7 +90,7 @@ def _load(self, fn=None): self.adata.obs["organ"] = "Lung" elif "PP011" in fn or "PP012" in fn: self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Bone Marrow" + self.adata.obs["organ"] = "bone marrow" elif "PP013" in fn or "PP014" in fn: self.adata.obs["donor"] = "Donor2" self.adata.obs["organ"] = "Lymph Node" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py index 8ce5afabe..7242c7d03 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -26,7 +26,7 @@ def __init__( self.normalization = "raw" self.organ = "eye" # ToDo: "retina" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py index cf932bc48..041fa36ac 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -50,7 +50,7 @@ def __init__( self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.year = 2019 self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py index e70da4873..7cda429eb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -26,7 +26,7 @@ def __init__( self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py index 9f04b99d6..c3b3bd450 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "bone" + self.organ = "bone tissue" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py index 3bf057903..8debbac63 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "bone" + self.organ = "bone tissue" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py index 1fcb34991..7d00681f8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py @@ -12,8 +12,8 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "femalegonad" + self.id = "human_ovary_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "ovary" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py index 257d7750a..b64a40fb6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py @@ -12,8 +12,8 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "femalegonad" + self.id = "human_ovary_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "ovary" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py index 9acf2ac6e..322a7d9e1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py @@ -12,8 +12,8 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "malegonad" + self.id = "human_testis_2020_microwell_han_001_10.1038/s41586-020-2157-4" + self.organ = "testis" self.class_maps = { "0": { "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py index 8964fa222..4067551fb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py @@ -12,8 +12,8 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "malegonad" + self.id = "human_testis_2020_microwell_han_002_10.1038/s41586-020-2157-4" + self.organ = "testis" self.class_maps = { "0": { "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py index 1ac3d1e92..28f9e252d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "muscle" + self.organ = "skeletal muscle organ" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py index 9af4d2f15..3e4f75b71 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "muscle" + self.organ = "skeletal muscle organ" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py index 681dc6abd..2bd3c8b6c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "colon" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py index 2bbb8b5f3..4654b0eb2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py @@ -34,7 +34,7 @@ def __init__( # ToDo: 1->"alveoli, parenchyma" # ToDo: 2->"bronchi" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 self.normalization = "norm" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index a83c74dec..6808f4fbc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -27,7 +27,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" # ToDo: "parenchymal lung and distal airway specimens" self.organism = "human" - self.protocol = "dropseq" + self.protocol = "Drop-seq" self.state_exact = "uninvolved areas of tumour resection material" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py index 4ad06f4ec..e0a831c7e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -29,7 +29,7 @@ def __init__( self.normalization = "raw" self.organ = "brain" self.organism = "mouse" - self.protocol = "microwell" + self.protocol = "microwell-seq" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py index 81e9cf13c..d5939f778 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -29,7 +29,7 @@ def __init__( self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2020 self.doi = "10.1038/s41597-019-0351-8" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py index 929d0094b..30a2c4f34 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -27,7 +27,7 @@ def __init__( self.normalization = "norm" self.organ = "eye" # ToDo: "retina" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py index ad7e68dc9..05858a633 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py @@ -35,7 +35,7 @@ def __init__( self.normalization = "raw" self.organ = "colon" if organ == "colon" else "ileum" if organ == "ileum" else "rectum" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py index d31cbb9b6..7caf00e07 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -34,7 +34,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py index 51d8cc958..ce1359fa7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -30,7 +30,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" # ToDo: "parenchyma" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.year = 2020 self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py index 0e5dd65f6..d36eeec6a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -30,7 +30,7 @@ def __init__( self.normalization = "norm" self.organ = "kidney" # ToDo: "renal medulla, renal pelvis, ureter, cortex of kidney" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py index 5f9715507..671c19742 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -27,7 +27,7 @@ def __init__( self.normalization = "norm" self.organ = "thymus" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index 25ae0c22f..a52c8b4bc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -46,7 +46,7 @@ def __init__( self.normalization = "raw" self.organ = organ self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py index 9527f968a..f05e0a9b7 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "eye" # ToDo: "retina" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py index 67f12d467..6f5a1f1f4 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py @@ -28,7 +28,7 @@ def __init__( self.normalization = "raw" self.organ = "blood" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2018 diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py index c624bbd5b..5e226c2f8 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -38,7 +38,7 @@ def __init__( self.normalization = "raw" self.organ = "blood" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py index d799cacb2..5c0005607 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py @@ -26,9 +26,9 @@ def __init__( self.doi = "no_doi" self.healthy = True self.normalization = "raw" - self.organ = "bone" + self.organ = "bone tissue" self.organism = "human" - self.protocol = "10x" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2018 diff --git a/sfaira/data/utils/create_celltype_maps.py b/sfaira/data/utils/create_celltype_maps.py new file mode 100644 index 000000000..a1ecfc49a --- /dev/null +++ b/sfaira/data/utils/create_celltype_maps.py @@ -0,0 +1,96 @@ +import os +import pydoc +import sfaira +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021 import FILE_PATH + +print(tf.__version__) + +# Set global variables. +print("sys.argv", sys.argv) + +path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +processes = int(str(sys.argv[4])) + +# Data loaders with one file per data set rather than one summary file: +# ToDo: not supported yet. +# TMS d10_1101_661728 +# MCA d10_1016_j_cell_2018_02_001 +# HCL d10_1038_s41586_020_2157_4 +studys_separate_csvs = [] +# "d10_1101_661728", +# "d10_1016_j_cell_2018_02_001", +# "d10_1038_s41586_020_2157_4" +# ] + +dir_sfaira_dataloaders = os.path.join(*str(os.path.dirname(FILE_PATH)).split("/")[:-1]) + +dir_prefix = "d" +dir_exlcude = [] +for dir_study in os.listdir(dir_sfaira_dataloaders): + if os.path.isdir(os.path.join(dir_sfaira_dataloaders, dir_study)): # only directories + # Narrow down to data set directories: + if dir_study[:len(dir_prefix)] == dir_prefix and dir_study not in dir_exlcude: + for f_dataset in os.listdir(os.path.join(dir_sfaira_dataloaders, dir_study)): + if os.path.isfile(os.path.join(dir_sfaira_dataloaders, dir_study, f_dataset)): # only files + # Narrow down to data set files: + if f_dataset.split(".")[-1] == "py" and \ + f_dataset.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(str(f_dataset).split(".")[:-1]) + DatasetFound = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".Dataset") + # Check if global objects are available: + # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles + # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + sample_fns = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_FNS") + sample_ids = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_IDS") + if sample_fns is not None and sample_ids is None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_fn=x, + path=path, + meta_path=path_meta, + cache_path=path_cache + ) + for x in sample_fns + ] + elif sample_fns is None and sample_ids is not None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_id=x, + path=path, + meta_path=path_meta, + cache_path=path_cache + ) + for x in sample_ids + ] + elif sample_fns is not None and sample_ids is not None: + raise ValueError(f"sample_fns and sample_ids both found for {f_dataset}") + else: + datasets_f = [DatasetFound( + path=path, + meta_path=path_meta, + cache_path=path_cache + )] + dsg_f = sfaira.data.DatasetGroup(datasets=dict([(x.id, x) for x in datasets_f])) + dsg_f.load( + load_raw=False, + allow_caching=True + ) + if str(dir_study) in studys_separate_csvs: + pass + else: + dsg_f.write_ontology_class_map( + fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".csv"), + protected_writing=True, + n_suggest=10, + ) diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index 1bd4d44a5..50ae803a2 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -102,9 +102,9 @@ def test_load(self): raise ValueError(f"sample_fns and sample_ids both found for {f}") else: datasets_f = [DatasetFound( - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template + path=self.dir_template, + meta_path=self.dir_template, + cache_path=self.dir_template )] # Build a data set group from the already loaded data sets and use the group ontology writing # function. diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 7f2558051..73fd26a79 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -93,14 +93,28 @@ class OntologyEbi(Ontology): Recursively assembles ontology by querying EBI web interface. Not recommended for large ontologies. + Yields unstructured list of terms. """ def __init__( self, ontology: str, root_term: str, + additional_terms: Union[Dict[str, Dict[str, str]], None] = None, **kwargs ): + """ + + :param ontology: + :param root_term: + :param additional_terms: Dictionary with additional terms, values should be + + - "name" necessary + - "description" optional + - "synonyms" optional + - "has_children" optional + :param kwargs: + """ def get_url(iri): return f"https://www.ebi.ac.uk/ols/api/ontologies/{ontology}/terms/" \ f"http%253A%252F%252Fwww.ebi.ac.uk%252F{ontology}%252F{iri}/children" @@ -120,6 +134,7 @@ def recursive_search(iri): return nodes_new self.nodes = recursive_search(iri=root_term) + self.nodes.update(additional_terms) @property def node_names(self) -> List[str]: @@ -601,7 +616,13 @@ def __init__( ontology: str = "efo", root_term: str = "EFO_0010183", ): - super().__init__(ontology=ontology, root_term=root_term) + super().__init__( + ontology=ontology, + root_term=root_term, + additional_terms={ + "microwell-seq": {"name": "microwell-seq"} + } + ) class CelltypeUniverse: From 409a7967f7a4d44843a83760a30a3de2ba857ed1 Mon Sep 17 00:00:00 2001 From: le-ander <20015434+le-ander@users.noreply.github.com> Date: Mon, 8 Feb 2021 13:06:37 +0100 Subject: [PATCH 038/161] lint --- sfaira/unit_tests/test_celltype_universe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sfaira/unit_tests/test_celltype_universe.py b/sfaira/unit_tests/test_celltype_universe.py index 5e149f359..7838f64cd 100644 --- a/sfaira/unit_tests/test_celltype_universe.py +++ b/sfaira/unit_tests/test_celltype_universe.py @@ -25,7 +25,7 @@ def test_debugging(self, reduced=False): index=False, quoting=csv.QUOTE_NONE, sep=";" ) else: - tab2.loc[tab["matched"].values == False].to_csv( + tab2.loc[tab["matched"].values is False].to_csv( self.dir_debugging + k + "_" + kk + "_universe.csv", index=False, quoting=csv.QUOTE_NONE ) @@ -62,7 +62,7 @@ def test_only(self): for kk, vv in v.items(): universe = vv.celltype_universe["0"] tab = onto.find_nodes_fuzzy(universe, match_only=True) - print(tab.loc[tab["matched"].values == False]) + print(tab.loc[tab["matched"].values is False]) assert np.all(tab["matched"].values), f"{k} {kk}" From 2f269bd3f59b404e381e8405d6941ec73cd64b22 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 9 Feb 2021 13:22:26 +0100 Subject: [PATCH 039/161] added sfaira.data.utils and put cell type search into there, improved fuzzy match search (#107) --- requirements.txt | 1 + setup.py | 1 + sfaira/data/__init__.py | 1 + sfaira/data/base.py | 4 +- sfaira/data/utils.py | 53 ++++++++ .../create_celltype_maps.py | 2 +- .../{utils => utils_scripts}/create_meta.py | 0 .../create_meta_and_cache.py | 0 .../write_backed_human.py | 0 .../write_backed_mouse.py | 0 sfaira/unit_tests/test_data_template.py | 2 +- sfaira/unit_tests/test_data_utils.py | 14 ++ sfaira/versions/metadata/base.py | 122 ++++++++++++++---- 13 files changed, 169 insertions(+), 31 deletions(-) create mode 100644 sfaira/data/utils.py rename sfaira/data/{utils => utils_scripts}/create_celltype_maps.py (98%) rename sfaira/data/{utils => utils_scripts}/create_meta.py (100%) rename sfaira/data/{utils => utils_scripts}/create_meta_and_cache.py (100%) rename sfaira/data/{utils => utils_scripts}/write_backed_human.py (100%) rename sfaira/data/{utils => utils_scripts}/write_backed_mouse.py (100%) create mode 100644 sfaira/unit_tests/test_data_utils.py diff --git a/requirements.txt b/requirements.txt index d0722796f..901fa721c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ numpy>=1.14.0 obonet openpyxl pandas +pytest python-Levenshtein scanpy scipy diff --git a/setup.py b/setup.py index 91f191226..ad706615c 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ 'numpy>=1.16.4', 'obonet', 'pandas', + 'pytest', 'python-Levenshtein', 'scipy>=1.2.1', 'tqdm', diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index f5ff50212..963d64839 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -4,3 +4,4 @@ from . import dataloaders from .dataloaders import DatasetSuperGroupSfaira from .interactive import DatasetInteractive +from . import utils diff --git a/sfaira/data/base.py b/sfaira/data/base.py index ecb8b2c8f..1d396782d 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -670,7 +670,7 @@ def write_ontology_class_map( :return: """ labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) - tab = self.ontology_celltypes.prepare_celltype_map_fuzzy( + tab = self.ontology_celltypes.prepare_celltype_map_tab( source=labels_original, match_only=False, anatomical_constraint=self.organ, @@ -1722,7 +1722,7 @@ def write_ontology_class_map( labels_original = np.sort(np.unique(np.concatenate([ v.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values ]))) - tab.append(v.ontology_celltypes.prepare_celltype_map_fuzzy( + tab.append(v.ontology_celltypes.prepare_celltype_map_tab( source=labels_original, match_only=False, anatomical_constraint=v.organ, diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py new file mode 100644 index 000000000..46da63f9b --- /dev/null +++ b/sfaira/data/utils.py @@ -0,0 +1,53 @@ +from typing import Dict, List, Union + +from sfaira.versions.metadata import CelltypeUniverse + + +def map_celltype_to_ontology( + source: str, + organism: str, + include_synonyms: bool = True, + anatomical_constraint: Union[str, None] = None, + n_suggest: int = 4, + choices_for_perfect_match: bool = True, + keep_strategy: bool = False, + **kwargs +) -> Union[List[str], Dict[str, List[str]], str]: + """ + Map free text node name to ontology node names via sfaira cell type matching. + + For details, see also sfaira.versions.metadata.CelltypeUniverse.prepare_celltype_map_fuzzy() + + :param source: Free text node label which is to be matched to ontology nodes. Must not be a a list or tuple. + :param organism: Organism, defines ontology extension used. + :param include_synonyms: Whether to include synonyms of nodes in string search. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. + :param n_suggest: Number of cell types to suggest per search strategy. + :param choices_for_perfect_match: Whether to give additional matches if a perfect match was found. Note that there + are cases in which an apparent perfect match corresponds to a general term which could be specified knowing the + anatomic location of the sample. If this is False and a perfect match is found, only this perfect match is + returned as a string, rather than as a list. + :param keep_strategy: Whether to keep search results structured by search strategy. + For details, see also sfaira.versions.metadata.CelltypeUniverse.prepare_celltype_map_fuzzy() + :param **kwargs: Additional parameters to CelltypeUniverse. + :return: List of high priority matches or perfect match (see choices_for_perfect_match) or, if keep_strategy, + dictionary of lists of search strategies named by strategy name. + """ + assert isinstance(source, str) + cu = CelltypeUniverse(organism=organism, **kwargs) + matches = cu.prepare_celltype_map_fuzzy( + source=[source], + match_only=False, + include_synonyms=include_synonyms, + anatomical_constraint=anatomical_constraint, + omit_list=[], + n_suggest=n_suggest, + )[0][0] + # Flatten list of lists: + matches_flat = [x for xx in matches.values() for x in xx] + if not choices_for_perfect_match and source in matches_flat: + return source + elif keep_strategy: + return matches + else: + return matches_flat diff --git a/sfaira/data/utils/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps.py similarity index 98% rename from sfaira/data/utils/create_celltype_maps.py rename to sfaira/data/utils_scripts/create_celltype_maps.py index a1ecfc49a..f13bf37a1 100644 --- a/sfaira/data/utils/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps.py @@ -92,5 +92,5 @@ dsg_f.write_ontology_class_map( fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".csv"), protected_writing=True, - n_suggest=10, + n_suggest=4, ) diff --git a/sfaira/data/utils/create_meta.py b/sfaira/data/utils_scripts/create_meta.py similarity index 100% rename from sfaira/data/utils/create_meta.py rename to sfaira/data/utils_scripts/create_meta.py diff --git a/sfaira/data/utils/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py similarity index 100% rename from sfaira/data/utils/create_meta_and_cache.py rename to sfaira/data/utils_scripts/create_meta_and_cache.py diff --git a/sfaira/data/utils/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py similarity index 100% rename from sfaira/data/utils/write_backed_human.py rename to sfaira/data/utils_scripts/write_backed_human.py diff --git a/sfaira/data/utils/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py similarity index 100% rename from sfaira/data/utils/write_backed_mouse.py rename to sfaira/data/utils_scripts/write_backed_mouse.py diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index 50ae803a2..572c1f9a6 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -114,7 +114,7 @@ def test_load(self): dsg_f.write_ontology_class_map( fn=os.path.join(cwd, file_module + ".csv"), protected_writing=True, - n_suggest=10, + n_suggest=4, ) else: for k, v in ds.datasets.items(): diff --git a/sfaira/unit_tests/test_data_utils.py b/sfaira/unit_tests/test_data_utils.py new file mode 100644 index 000000000..3fb3c7a80 --- /dev/null +++ b/sfaira/unit_tests/test_data_utils.py @@ -0,0 +1,14 @@ +import pytest + +from sfaira.data.utils import map_celltype_to_ontology + + +@pytest.mark.parametrize("perfect_match", [True, False]) +def test_map_celltype_to_ontology(perfect_match: bool): + trial_cell_type = "T cell" if perfect_match else "T cells" + x = map_celltype_to_ontology(source=trial_cell_type, organism="human", choices_for_perfect_match=False) + print(x) + if perfect_match: + assert isinstance(x, str) + else: + assert isinstance(x, list) diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 73fd26a79..55cc46c6b 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -741,10 +741,13 @@ def prepare_celltype_map_fuzzy( include_synonyms: bool = True, anatomical_constraint: Union[str, None] = None, omit_list: list = [], - n_suggest: int = 10, - ) -> pd.DataFrame: + n_suggest: int = 4, + ) -> Tuple[ + List[Dict[str, List[str]]], + List[bool] + ]: """ - Map free text node names to ontology node names via fuzzy string matching. + Map free text node names to ontology node names via fuzzy string matching and return as list If this function does not yield good matches, consider querying this web interface: https://www.ebi.ac.uk/ols/index @@ -763,21 +766,37 @@ def prepare_celltype_map_fuzzy( :param include_synonyms: Whether to include synonyms of nodes in string search. :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. :param omit_list: Free text node labels to omit in map. - :param n_suggest: Number of cell types to suggest. - :return: Table with source and target node names. Columns: "source", "target" + :param n_suggest: Number of cell types to suggest per search strategy. + :return: Tuple + + - List with matches for each source, each entry is a dictionary, + of lists of search strategies named by strategy name. + - List with boolean indicator whether or not this output should be reported. """ from fuzzywuzzy import fuzz matches = [] nodes = self.onto_cl.nodes - include = [] + include_terms = [] if isinstance(source, pd.DataFrame): source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) for x in source: if not isinstance(x, list) and not isinstance(x, tuple): x = [x, "nan"] term = x[0].lower().strip("'").strip("\"").strip("]").strip("[") + # Test for perfect string matching: + scores_strict = np.array([ + np.max([ + 100 if term == y[1]["name"].lower() else 0 + ] + [ + 100 if term == yy.lower() else 0 + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + 100 if term == y[1]["name"].lower() else 0 + for y in nodes + ]) + # Test for partial string matching: # fuzz ratio and partial_ratio capture different types of matches well, we use both here: - scores = np.array([ + scores_lenient = np.array([ np.max([ fuzz.ratio(term, y[1]["name"].lower()), fuzz.partial_ratio(term, y[1]["name"].lower()) @@ -794,19 +813,29 @@ def prepare_celltype_map_fuzzy( ]) for y in nodes ]) - include.append(x[0].lower().strip("'").strip("\"") not in omit_list) + include_terms.append(x[0].lower().strip("'").strip("\"") not in omit_list) if match_only and not anatomical_constraint: - matches.append(np.any(scores == 100)) # perfect match + matches.append({"perfect_match": [ + [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]] + ]}) # perfect match else: - if np.any(scores == 100) and not anatomical_constraint: - matches.append([nodes[i][1]["name"] for i in np.where(scores == 100)[0]]) + matchesi = {} + if np.any(scores_strict == 100) and not anatomical_constraint: + matchesi.update({"perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]]}) else: if anatomical_constraint is not None: - # Select best overall matches: - matchesi = ":".join([ + # Select best overall matches based on lenient and strict matching: + matchesi.update({"perfect_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_strict) + ][-n_suggest:][::-1]}) + matchesi.update({"lenient_match": [ nodes[i][1]["name"] - for i in np.argsort(scores) - ][-np.max(n_suggest - 7, 0):][::-1]) + for i in np.argsort(scores_lenient) + if not np.any( + nodes[i][1]["name"] in v for v in matchesi.values() + ) + ][-n_suggest:][::-1]}) # Use anatomical constraints two fold: # 1. Select cell types that are in the correct ontology. @@ -847,11 +876,13 @@ def prepare_celltype_map_fuzzy( for y, z in zip(uberon_ids, anatomical_subselection) ] # Iterate over nodes sorted by string match score and masked by constraint: - matchesi = matchesi + ":|||:" + ":".join([ + matchesi = matchesi.update({"anatomic_onotolgy_match": [ nodes[i][1]["name"] - for i in np.argsort(scores) - if anatomical_subselection[i] and nodes[i][1]["name"] not in matchesi - ][-4:][::-1]) + for i in np.argsort(scores_lenient) + if anatomical_subselection[i] and not np.any( + nodes[i][1]["name"] in v for v in matchesi.values() + ) + ][-n_suggest:][::-1]}) # 2. Run a second string matching with the anatomical word included. modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]").\ @@ -868,18 +899,55 @@ def prepare_celltype_map_fuzzy( ]) for y in nodes ]) - matchesi = matchesi + ":|||:" + ":".join([ + matchesi = matchesi.update({"anatomic_string_match": [ nodes[i][1]["name"] for i in np.argsort(scores_anatomy) - if nodes[i][1]["name"] not in matchesi - ][-7:][::-1]) + if nodes[i][1]["name"] and not np.any( + nodes[i][1]["name"] in v for v in matchesi.values() + ) + ][-n_suggest:][::-1]}) else: # Suggest top 10 hits by string match: - matchesi = [nodes[i][1]["name"] for i in np.argsort(scores)[-n_suggest:]][::-1] - matchesi = ":".join(matchesi) - matches.append(matchesi) + matchesi.update({"lenient_match": [ + [nodes[i][1]["name"] for i in np.argsort(scores_lenient)[-n_suggest:]][::-1] + ]}) + matches.append(matchesi) + return matches, include_terms + + def prepare_celltype_map_tab( + self, + source, + match_only: bool = False, + include_synonyms: bool = True, + anatomical_constraint: Union[str, None] = None, + omit_list: list = [], + n_suggest: int = 10, + separator_suggestions: str = ":", + separator_groups: str = ":|||:", + ) -> pd.DataFrame: + """ + Map free text node names to ontology node names via fuzzy string matching and return as matching table. + + :param source: Free text node labels which are to be matched to ontology nodes. + :param match_only: Whether to include strict matches only in output. + :param include_synonyms: Whether to include synonyms of nodes in string search. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. + :param omit_list: Free text node labels to omit in map. + :param n_suggest: Number of cell types to suggest per search strategy. + :param separator_suggestions: String separator for matches of a single strategy in output target column. + :param separator_groups: String separator for search strategy grouped matches in output target column. + :return: Table with source and target node names. Columns: "source", "target" + """ + matches, include_terms = self.prepare_celltype_map_fuzzy( + source=source, + match_only=match_only, + include_synonyms=include_synonyms, + anatomical_constraint=anatomical_constraint, + omit_list=omit_list, + n_suggest=n_suggest, + ) tab = pd.DataFrame({ "source": source, - "target": matches + "target": [separator_groups.join([separator_suggestions.join(v) for v in x.values()]) for x in matches] }) - return tab.loc[include] + return tab.loc[include_terms] From 6b74875398cc9f609404cb2da9d51929c40bde13 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 10 Feb 2021 11:21:06 +0100 Subject: [PATCH 040/161] Fix bugs encountered in using new data loaders (#104) * fixed but in meta data reading * added progress reporting into cellmaps file * keep old cell type column in .obs after projection * excluded non annotated data sets from ontology map writing * fixed bug that annotated was not bool but array when loading from cache * fixed bug in meta data access after caching * speciefied anatomical structures in ToDos to UBERON IDs * fixed protocol misspellings * fixed bug in TMS loader * fixed bug in anatomy based CO matching * fixed bladder related uberon terms * fixed marrow related organ bugs * fixed tms heart * fixed skin annotation --- sfaira/data/base.py | 73 ++++++++++--------- .../human_pancreas_2017_smartseq2_enge_001.py | 2 +- .../mouse_bladder_2018_microwell_han_001.py | 2 +- .../human_colon_2019_10x_kinchen_001.py | 2 +- .../human_colon_2019_10x_smilie_001.py | 2 +- .../human_liver_2018_10x_macparland_001.py | 2 +- .../human_mixed_2019_10x_szabo_001.py | 8 +- .../human_eye_2019_10x_menon_001.py | 2 +- .../human_placenta_2018_10x_ventotormo_001.py | 5 +- .../human_bladder_2020_microwell_han_001.py | 2 +- .../human_bladder_2020_microwell_han_002.py | 2 +- ...uman_gallbladder_2020_microwell_han_001.py | 2 +- .../human_skin_2020_microwell_han_001.py | 2 +- .../human_skin_2020_microwell_han_002.py | 2 +- .../human_lung_2020_10x_travaglini_001.py | 2 +- .../human_lung_2019_10x_braga_001.py | 4 +- .../human_lung_2019_dropseq_braga_001.py | 2 +- .../human_eye_2019_10x_voigt_001.py | 2 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 28 +++---- .../human_lung_2020_10x_habermann_001.py | 2 +- .../human_kidney_2019_10x_stewart_001.py | 2 +- .../human_x_2019_10x_madissoon_001.py | 7 +- .../human_eye_2019_10x_lukowski_001.py | 2 +- .../d_nan/human_bone_2018_10x_ica_001.py | 2 +- .../utils_scripts/create_celltype_maps.py | 4 +- sfaira/versions/metadata/base.py | 4 +- 26 files changed, 86 insertions(+), 83 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 1d396782d..f8555cd0d 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -513,13 +513,10 @@ def _set_metadata_in_adata(self): ): if x is None and z is None: self.adata.uns[y] = None - elif x is not None and z is not None: - raise ValueError(f"attribute {y} of data set {self.id} was set both for full data set and per cell, " - f"only set one of the two or neither.") elif x is not None and z is None: # Attribute supplied per data set: Write into .uns. self.adata.uns[y] = x - elif x is None and z is not None: + elif z is not None: # Attribute supplied per cell: Write into .obs. # Search for direct match of the sought-after column name or for attribute specific obs key. if z not in self.adata.obs.keys(): @@ -669,17 +666,20 @@ def write_ontology_class_map( :param protected_writing: Only write if file was not already found. :return: """ - labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) - tab = self.ontology_celltypes.prepare_celltype_map_tab( - source=labels_original, - match_only=False, - anatomical_constraint=self.organ, - include_synonyms=True, - omit_list=self._unknown_celltype_identifiers, - **kwargs - ) - if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=False) + if not self.annotated: + warnings.warn(f"attempted to write ontology classmaps for data set {self.id} without annotation") + else: + labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) + tab = self.ontology_celltypes.prepare_celltype_map_tab( + source=labels_original, + match_only=False, + anatomical_constraint=self.organ, + include_synonyms=True, + omit_list=self._unknown_celltype_identifiers, + **kwargs + ) + if not os.path.exists(fn) or not protected_writing: + tab.to_csv(fn, index=False) def load_ontology_class_map(self, fn): """ @@ -713,7 +713,6 @@ def project_celltypes_to_ontology(self): # ontology. for x in labels_mapped: self.ontology_celltypes.onto_cl.validate_node(x) - del self.adata.obs[self.obs_key_cellontology_original] self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = labels_mapped self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = labels_original @@ -754,7 +753,7 @@ def load_meta(self, fn: Union[PathLike, str, None]): for k, v in self._META_DATA_FIELDS.items(): if k in meta.columns: if meta[k].values[0] is not None: - meta[k] = v(meta[k]) + meta[k] = np.asarray(meta[k].values, dtype=v) self.meta = meta.fillna("None").replace({"None": None}) def write_meta( @@ -859,7 +858,7 @@ def annotated(self) -> Union[bool, None]: if self.meta is None: self.load_meta(fn=None) if self.meta is not None and self._ADATA_IDS_SFAIRA.annotated in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.annotated] + return self.meta[self._ADATA_IDS_SFAIRA.annotated].values[0] elif self.loaded: # If data set was loaded and there is still no annotation indicated, it is declared unannotated. return False @@ -1719,23 +1718,27 @@ def write_ontology_class_map( """ tab = [] for k, v in self.datasets.items(): - labels_original = np.sort(np.unique(np.concatenate([ - v.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values - ]))) - tab.append(v.ontology_celltypes.prepare_celltype_map_tab( - source=labels_original, - match_only=False, - anatomical_constraint=v.organ, - include_synonyms=True, - omit_list=v._unknown_celltype_identifiers, - **kwargs - )) - tab = pandas.concat(tab, axis=0) - # Take out columns with the same source: - tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() - tab = tab.sort_values("source") - if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=False) + if v.annotated: + labels_original = np.sort(np.unique(np.concatenate([ + v.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values + ]))) + tab.append(v.ontology_celltypes.prepare_celltype_map_tab( + source=labels_original, + match_only=False, + anatomical_constraint=v.organ, + include_synonyms=True, + omit_list=v._unknown_celltype_identifiers, + **kwargs + )) + if len(tab) == 0: + warnings.warn(f"attempted to write ontology classmaps for group without annotated data sets") + else: + tab = pandas.concat(tab, axis=0) + # Take out columns with the same source: + tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() + tab = tab.sort_values("source") + if not os.path.exists(fn) or not protected_writing: + tab.to_csv(fn, index=False) @property def ids(self): diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index 5f264d8a2..3ad62fc30 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -31,7 +31,7 @@ def __init__( self.healthy = True self.normalization = "raw" self.protocol = "Smart-seq2" - self.organ = "pancreas" # ToDo: "islet of Langerhans" + self.organ = "islet of Langerhans" self.organism = "human" self.state_exact = "healthy" self.year = 2017 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py index 347120a7d..6854aa37e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -15,7 +15,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.organ = "bladder" + self.organ = "bladder organ" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index 272b24426..a3073137e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -24,7 +24,7 @@ def __init__( self.author = "Simmons" self.doi = "10.1016/j.cell.2018.08.067" self.normalization = "raw" - self.organ = "colon" # ToDo: "lamina propria of mucosa of colon" + self.organ = "lamina propria of mucosa of colon" self.organism = "human" self.protocol = "10X sequencing" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py index 0e612bc21..863e9e540 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -26,7 +26,7 @@ def __init__( self.doi = "10.1016/j.cell.2019.06.029" self.healthy = True self.normalization = "raw" - self.organ = "colon" # ToDo: "colonic epithelium" + self.organ = "colonic epithelium" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py index 1c994714f..75c3c24b1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -25,7 +25,7 @@ def __init__( self.doi = "10.1038/s41467-018-06318-7" self.healthy = True self.normalization = "raw" - self.organ = "liver" # ToDo: "caudate lobe" + self.organ = "caudate lobe of liver" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 2b56b50c8..d92829246 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -78,22 +78,22 @@ def _load(self, fn=None): self.adata.var = var if "PP001" in fn or "PP002" in fn: self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Lung" + self.adata.obs["organ"] = "lung" elif "PP003" in fn or "PP004" in fn: self.adata.obs["donor"] = "Donor1" self.adata.obs["organ"] = "bone marrow" elif "PP005" in fn or "PP006" in fn: self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "Lymph Node" + self.adata.obs["organ"] = "lymph node" elif "PP009" in fn or "PP010" in fn: self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Lung" + self.adata.obs["organ"] = "lung" elif "PP011" in fn or "PP012" in fn: self.adata.obs["donor"] = "Donor2" self.adata.obs["organ"] = "bone marrow" elif "PP013" in fn or "PP014" in fn: self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "Lymph Node" + self.adata.obs["organ"] = "lymph node" self.adata.obs.index = fn.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index self.adata.obs["cell_ontology_class"] = "Unknown" df1 = pd.read_csv(fn_annot[0], sep="\t", index_col=0, header=None) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py index 7242c7d03..c82ee0ce2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -24,7 +24,7 @@ def __init__( self.doi = "10.1038/s41467-019-12780-8" self.healthy = True self.normalization = "raw" - self.organ = "eye" # ToDo: "retina" + self.organ = "retina" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py index 0d23e723d..b8c06db99 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -32,10 +32,10 @@ def __init__( self.author = "Teichmann" self.healthy = True self.normalization = "raw" - self.organ = "placenta,decidua,blood" # ToDo: move this into .obs_key_organ? + self.organ = "placenta" self.organism = "human" self.doi = "10.1038/s41586-018-0698-6" - self.protocol = protocol + self.protocol = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" self.state_exact = "healthy" self.year = 2018 @@ -43,7 +43,6 @@ def __init__( self.var_ensembl_col = "ensembl" self.obs_key_cellontology_original = "annotation" - # ToDo: further anatomical information for subtissue in "location" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py index 60f46cc6c..0798a4801 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "bladder" + self.organ = "bladder organ" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py index 80fce100f..ac0024c36 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "bladder" + self.organ = "bladder organ" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py index b3674ca67..26886e888 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "gallbladder" + self.organ = "gall bladder" self.class_maps = { "0": {}, } diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py index 26d732174..2cf2cdd88 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "skin" + self.organ = "skin of body" self.class_maps = { "0": { "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py index 591ed500f..913b88fc4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "skin" + self.organ = "skin of body" self.class_maps = { "0": { "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index 9064f6b1c..e75dc8091 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -53,7 +53,7 @@ def __init__( self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = protocol + self.protocol = "10X sequencing" if self.sample_fn.split("_")[0] == "droplet" else "Smart-seq2" self.state_exact = "healthy" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py index 4654b0eb2..f90020c16 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py @@ -30,9 +30,7 @@ def __init__( self.author = "Teichmann" self.doi = "10.1038/s41591-019-0468-5" self.healthy = True - self.organ = "lung" - # ToDo: 1->"alveoli, parenchyma" - # ToDo: 2->"bronchi" + self.organ = "bronchus" if sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 6808f4fbc..248094396 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -25,7 +25,7 @@ def __init__( self.doi = "10.1038/s41591-019-0468-5" self.healthy = True self.normalization = "raw" - self.organ = "lung" # ToDo: "parenchymal lung and distal airway specimens" + self.organ = "lung" self.organism = "human" self.protocol = "Drop-seq" self.state_exact = "uninvolved areas of tumour resection material" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py index 30a2c4f34..ac457f8b9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -25,7 +25,7 @@ def __init__( self.doi = "10.1073/pnas.1914143116" self.healthy = True self.normalization = "norm" - self.organ = "eye" # ToDo: "retina" + self.organ = "retina" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 7668f9782..b6bd1dded 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -59,28 +59,30 @@ def __init__( ): super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) protocol = "10x" if sample_fn.split("-")[3] == "droplet" else "smartseq2" - organ = sample_fn.split("-")[-1].split(".")[0].lower() - organ = "adipose" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ - "bladder" if organ in ["bladder"] else \ - "marrow" if organ in ["marrow"] else \ + organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() + organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ + "aorta" if organ in ["aorta"] else \ + "bladder organ" if organ in ["bladder"] else \ + "bone marrow" if organ in ["marrow"] else \ "brain" if organ in ["brain_non-myeloid", "brain_myeloid"] else \ "colon" if organ in ["large_intestine"] else \ "diaphragm" if organ in ["diaphragm"] else \ - "heart" if organ in ["heart_and_aorta", "heart", "aorta"] else \ + "heart" if organ in ["heart_and_aorta", "heart"] else \ "kidney" if organ in ["kidney"] else \ "liver" if organ in ["liver"] else \ "lung" if organ in ["lung"] else \ - "mammary_gland" if organ in ["mammary_gland"] else \ - "muscle" if organ in ["limb_muscle"] else \ + "mammary gland" if organ in ["mammary_gland"] else \ + "muscle organ" if organ in ["limb_muscle"] else \ "pancreas" if organ in ["pancreas"] else \ - "skin" if organ in ["skin"] else \ + "skin of body" if organ in ["skin"] else \ "spleen" if organ in ["spleen"] else \ "thymus" if organ in ["thymus"] else \ "tongue" if organ in ["tongue"] else \ - "trachea" if organ in ["trachea"] else "error" + "trachea" if organ in ["trachea"] else organ + # ToDo: heart_and_aorta could be a distinct UBERON term, e.g. cardiovascular system? - self.id = f"mouse_{organ}_2019_{protocol}_pisco_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1101/661728" + self.id = f"mouse_{''.join(organ.split(' '))}_2019_{protocol}_pisco_" \ + f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1101/661728" self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" @@ -88,7 +90,7 @@ def __init__( self.obs_key_age = "age" self.obs_key_dev_stage = "development_stage" # not given in all data sets self.obs_key_sex = "sex" - # ToDo: further anatomical information for subtissue in "subtissue" + # ToDo: further anatomical information for subtissue in "subtissue"? self.author = "Quake" self.doi = "10.1101/661728" @@ -96,7 +98,7 @@ def __init__( self.normalization = "norm" self.organism = "mouse" self.organ = organ - self.protocol = protocol + self.protocol = "10X sequencing" if sample_fn.split("-")[3] == "droplet" else "Smart-seq2" self.state_exact = "healthy" self.year = 2019 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py index ce1359fa7..ad50957ba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -28,7 +28,7 @@ def __init__( self.author = "Kropski" self.doi = "10.1101/753806" self.normalization = "raw" - self.organ = "lung" # ToDo: "parenchyma" + self.organ = "lung parenchyma" self.organism = "human" self.protocol = "10X sequencing" self.year = 2020 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py index d36eeec6a..6e8781ef6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -28,7 +28,7 @@ def __init__( self.doi = "10.1126/science.aat5031" self.healthy = True self.normalization = "norm" - self.organ = "kidney" # ToDo: "renal medulla, renal pelvis, ureter, cortex of kidney" + self.organ = "kidney" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index a52c8b4bc..2f206b907 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -23,10 +23,9 @@ def __init__( **kwargs ): super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - organ = "lung" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ - "oesophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" - # ToDo: lung to "parenchyma"? - self.id = f"human_{organ}_2019_10x_madissoon_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ + organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ + "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" + self.id = f"human_{"".join(organ.split(" "))}_2019_10x_madissoon_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1186/s13059-019-1906-x" if self.sample_fn == "madissoon19_lung.processed.h5ad": diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py index f05e0a9b7..494ae8a4b 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -26,7 +26,7 @@ def __init__( self.doi = "10.15252/embj.2018100811" self.healthy = True self.normalization = "raw" - self.organ = "eye" # ToDo: "retina" + self.organ = "retina" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py index 5c0005607..9db2f1359 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py @@ -26,7 +26,7 @@ def __init__( self.doi = "no_doi" self.healthy = True self.normalization = "raw" - self.organ = "bone tissue" + self.organ = "bone marrow" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" diff --git a/sfaira/data/utils_scripts/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps.py index f13bf37a1..2457865f1 100644 --- a/sfaira/data/utils_scripts/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps.py @@ -28,16 +28,18 @@ # "d10_1038_s41586_020_2157_4" # ] -dir_sfaira_dataloaders = os.path.join(*str(os.path.dirname(FILE_PATH)).split("/")[:-1]) +dir_sfaira_dataloaders = "/" + str(os.path.join(*str(os.path.dirname(FILE_PATH)).split("/")[:-1])) dir_prefix = "d" dir_exlcude = [] for dir_study in os.listdir(dir_sfaira_dataloaders): + print(dir_study) if os.path.isdir(os.path.join(dir_sfaira_dataloaders, dir_study)): # only directories # Narrow down to data set directories: if dir_study[:len(dir_prefix)] == dir_prefix and dir_study not in dir_exlcude: for f_dataset in os.listdir(os.path.join(dir_sfaira_dataloaders, dir_study)): if os.path.isfile(os.path.join(dir_sfaira_dataloaders, dir_study, f_dataset)): # only files + print(f_dataset) # Narrow down to data set files: if f_dataset.split(".")[-1] == "py" and \ f_dataset.split(".")[0] not in ["__init__", "base", "group"]: diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 55cc46c6b..bfd002177 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -876,7 +876,7 @@ def prepare_celltype_map_fuzzy( for y, z in zip(uberon_ids, anatomical_subselection) ] # Iterate over nodes sorted by string match score and masked by constraint: - matchesi = matchesi.update({"anatomic_onotolgy_match": [ + matchesi.update({"anatomic_onotolgy_match": [ nodes[i][1]["name"] for i in np.argsort(scores_lenient) if anatomical_subselection[i] and not np.any( @@ -899,7 +899,7 @@ def prepare_celltype_map_fuzzy( ]) for y in nodes ]) - matchesi = matchesi.update({"anatomic_string_match": [ + matchesi.update({"anatomic_string_match": [ nodes[i][1]["name"] for i in np.argsort(scores_anatomy) if nodes[i][1]["name"] and not np.any( From cfb988f803ff337ef9f70253b186e36e10899d64 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 10 Feb 2021 14:01:49 +0100 Subject: [PATCH 041/161] Fix lenient ontology matching (#110) * improved lenient CO string matching --- sfaira/data/utils.py | 61 ++++++-- sfaira/unit_tests/test_data_utils.py | 30 ++-- sfaira/versions/metadata/base.py | 199 ++++++++++++++++++--------- 3 files changed, 201 insertions(+), 89 deletions(-) diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index 46da63f9b..6c87f02e1 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -4,13 +4,16 @@ def map_celltype_to_ontology( - source: str, + queries: Union[str, List[str]], organism: str, include_synonyms: bool = True, anatomical_constraint: Union[str, None] = None, + omit_target_list: list = ["cell"], n_suggest: int = 4, choices_for_perfect_match: bool = True, keep_strategy: bool = False, + always_return_list: bool = False, + threshold_for_partial_matching: float = 90., **kwargs ) -> Union[List[str], Dict[str, List[str]], str]: """ @@ -18,10 +21,12 @@ def map_celltype_to_ontology( For details, see also sfaira.versions.metadata.CelltypeUniverse.prepare_celltype_map_fuzzy() - :param source: Free text node label which is to be matched to ontology nodes. Must not be a a list or tuple. + :param queries: Free text node label which is to be matched to ontology nodes. + Can also be a list of strings to query. :param organism: Organism, defines ontology extension used. :param include_synonyms: Whether to include synonyms of nodes in string search. :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. + :param omit_target_list: Ontology nodes to not match to. :param n_suggest: Number of cell types to suggest per search strategy. :param choices_for_perfect_match: Whether to give additional matches if a perfect match was found. Note that there are cases in which an apparent perfect match corresponds to a general term which could be specified knowing the @@ -29,25 +34,53 @@ def map_celltype_to_ontology( returned as a string, rather than as a list. :param keep_strategy: Whether to keep search results structured by search strategy. For details, see also sfaira.versions.metadata.CelltypeUniverse.prepare_celltype_map_fuzzy() + :param always_return_list: Also return a list over queries if only one query was given. + :param threshold_for_partial_matching: Maximum fuzzy match score below which lenient matching (ratio) is + extended through partial_ratio. :param **kwargs: Additional parameters to CelltypeUniverse. - :return: List of high priority matches or perfect match (see choices_for_perfect_match) or, if keep_strategy, - dictionary of lists of search strategies named by strategy name. + :return: List over queries, each entry is: + A list of high priority matches or perfect match (see choices_for_perfect_match) or, if keep_strategy, + dictionary of lists of search strategies named by strategy name. If a search strategy yields perfect matches, it + does not return a list of strings but just a single string. """ - assert isinstance(source, str) + if isinstance(queries, str): + queries = [queries] cu = CelltypeUniverse(organism=organism, **kwargs) + matches_to_return = {} matches = cu.prepare_celltype_map_fuzzy( - source=[source], + source=queries, match_only=False, include_synonyms=include_synonyms, anatomical_constraint=anatomical_constraint, + choices_for_perfect_match=choices_for_perfect_match, omit_list=[], + omit_target_list=omit_target_list, n_suggest=n_suggest, - )[0][0] - # Flatten list of lists: - matches_flat = [x for xx in matches.values() for x in xx] - if not choices_for_perfect_match and source in matches_flat: - return source - elif keep_strategy: - return matches + threshold_for_partial_matching=threshold_for_partial_matching, + ) + # Prepare the output: + for x, matches_i in zip(queries, matches): + matches_i = matches_i[0] + # Flatten list of lists: + # Flatten dictionary of lists and account for string rather than list entries. + if len(matches_i.values()) == 1 and isinstance(list(matches_i.values())[0], str): + matches_flat = list(matches_i.values())[0] + else: + matches_flat = [] + for xx in matches_i.values(): + if isinstance(xx, list): + matches_flat.extend(xx) + else: + assert isinstance(xx, str) + matches_flat.append(xx) + if not choices_for_perfect_match and x in matches_flat: + matches_to_return.update({x: x}) + elif keep_strategy: + matches_to_return.update({x: matches_i}) + else: + matches_to_return.update({x: matches_flat}) + # Only return a list over queries if more than one query was given. + if len(queries) == 1 and not always_return_list: + return matches_to_return else: - return matches_flat + return matches_to_return[queries[0]] diff --git a/sfaira/unit_tests/test_data_utils.py b/sfaira/unit_tests/test_data_utils.py index 3fb3c7a80..bd59a7f29 100644 --- a/sfaira/unit_tests/test_data_utils.py +++ b/sfaira/unit_tests/test_data_utils.py @@ -1,14 +1,28 @@ import pytest +from typing import Union from sfaira.data.utils import map_celltype_to_ontology -@pytest.mark.parametrize("perfect_match", [True, False]) -def test_map_celltype_to_ontology(perfect_match: bool): - trial_cell_type = "T cell" if perfect_match else "T cells" - x = map_celltype_to_ontology(source=trial_cell_type, organism="human", choices_for_perfect_match=False) - print(x) - if perfect_match: - assert isinstance(x, str) +@pytest.mark.parametrize("perfectly_matched_query", [True, False]) +@pytest.mark.parametrize("choices_for_perfect_match", [True, False]) +@pytest.mark.parametrize("anatomical_constraint", [None, "pancreas"]) +def test_map_celltype_to_ontology( + perfectly_matched_query: bool, + choices_for_perfect_match: bool, + anatomical_constraint: Union[str, None] +): + trial_cell_type = "type B pancreatic cell" if perfectly_matched_query else "beta" + x = map_celltype_to_ontology( + queries=[trial_cell_type], + organism="human", + include_synonyms=True, + anatomical_constraint=anatomical_constraint, + choices_for_perfect_match=choices_for_perfect_match + ) + if perfectly_matched_query and not choices_for_perfect_match: + assert isinstance(x[trial_cell_type], str), x + assert x[trial_cell_type] == "type B pancreatic cell" else: - assert isinstance(x, list) + assert isinstance(x[trial_cell_type], list), x + assert "type B pancreatic cell" in x[trial_cell_type] diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index bfd002177..548caad67 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -740,10 +740,13 @@ def prepare_celltype_map_fuzzy( match_only: bool = False, include_synonyms: bool = True, anatomical_constraint: Union[str, None] = None, + choices_for_perfect_match: bool = True, omit_list: list = [], + omit_target_list: list = ["cell"], n_suggest: int = 4, + threshold_for_partial_matching: float = 90., ) -> Tuple[ - List[Dict[str, List[str]]], + List[Dict[str, Union[List[str], str]]], List[bool] ]: """ @@ -752,37 +755,60 @@ def prepare_celltype_map_fuzzy( If this function does not yield good matches, consider querying this web interface: https://www.ebi.ac.uk/ols/index - We use anatomical constraints as follows; + Search strategies: + + - exact_match: Only exact string matches to name or synonym in ontology. This is the only strategy that is + enabled if match_only is True. + - lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of match errors + ((fuzz.ratio). + - very_lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of matches + characters from query (fuzz.partial_ratio) + + Search strategies with anatomical constraints: An anatomic constraint is a name of an anatomical structure that can be mapped to UBERON. - 1. We select cell types expected in this UBERON clade based on the link between CL and UBERON. - 2. We perform an additional fuzzy string matching with the anatomical structure added to the proposed label. - This is often beneficial because analysts do not always prefix such extension (e.g. pancreatic) to the - free text cell type labels if the entire sample consists only of cells from this anatomical structure. - Note that if the maps from 1) were perfect, this would not be necessary. In practice, we find this to still - recover some hits that are otherwise missed. + + - anatomic_onotolgy_match: + We select cell types expected in this UBERON clade based on the link between CL and UBERON. + - anatomic_string_match: + We perform an additional fuzzy string matching with the anatomical structure added to the proposed + label. This is often beneficial because analysts do not always prefix such extension (e.g. pancreatic) + to the free text cell type labels if the entire sample consists only of cells from this anatomical + structure. Note that if the maps from 1) were perfect, this would not be necessary. In practice, we + find this to still recover some hits that are otherwise missed. + + Note that matches are shadowed in lower priorty strategies, ie a perfect match will not show up in the list + of hits of any other strategy. :param source: Free text node labels which are to be matched to ontology nodes. :param match_only: Whether to include strict matches only in output. :param include_synonyms: Whether to include synonyms of nodes in string search. - :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined + within UBERON. + :param choices_for_perfect_match: Whether to give additional matches if a perfect match was found and an + anatomical_constraint is not not defined. This is overridden by match_only. :param omit_list: Free text node labels to omit in map. + :param omit_target_list: Ontology nodes to not match to. :param n_suggest: Number of cell types to suggest per search strategy. + :param threshold_for_partial_matching: Maximum fuzzy match score below which lenient matching (ratio) is + extended through partial_ratio. :return: Tuple - List with matches for each source, each entry is a dictionary, - of lists of search strategies named by strategy name. + of lists of search strategies named by strategy name. If a search strategy yields perfect matches, it + does not return a list of strings but just a single string. - List with boolean indicator whether or not this output should be reported. """ from fuzzywuzzy import fuzz matches = [] nodes = self.onto_cl.nodes + nodes = [x for x in nodes if x[1]["name"] not in omit_target_list] include_terms = [] if isinstance(source, pd.DataFrame): source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) for x in source: if not isinstance(x, list) and not isinstance(x, tuple): x = [x, "nan"] - term = x[0].lower().strip("'").strip("\"").strip("]").strip("[") + term = x[0].lower().strip("'").strip("\"").strip("'").strip("\"").strip("]").strip("[") # Test for perfect string matching: scores_strict = np.array([ np.max([ @@ -795,48 +821,57 @@ def prepare_celltype_map_fuzzy( for y in nodes ]) # Test for partial string matching: - # fuzz ratio and partial_ratio capture different types of matches well, we use both here: + # fuzz ratio and partial_ratio capture different types of matches well, we use both here and decide below + # which scores are used in which scenario defined through the user input. + # Formatting of synonyms: These are richly annotated, we strip references following after either: + # BROAD, EXACT + # in the synonym string and characters: "' + + def synonym_string_processing(y): + return y.lower().split("broad")[0].split("exact")[0].lower().strip("'").strip("\"").split("\" ")[0] + scores_lenient = np.array([ - np.max([ - fuzz.ratio(term, y[1]["name"].lower()), - fuzz.partial_ratio(term, y[1]["name"].lower()) - ] + [ - fuzz.ratio(term, yy.lower()) + np.max([fuzz.ratio(term, y[1]["name"].lower())] + [ + fuzz.ratio(term, synonym_string_processing(yy)) for yy in y[1]["synonym"] - ] + [ - fuzz.partial_ratio(term, yy.lower()) + ]) if "synonym" in y[1].keys() and include_synonyms else + fuzz.ratio(term, y[1]["name"].lower()) + for y in nodes + ]) + scores_very_lenient = np.array([ + np.max([fuzz.partial_ratio(term, y[1]["name"].lower())] + [ + fuzz.partial_ratio(term, synonym_string_processing(yy)) for yy in y[1]["synonym"] ]) if "synonym" in y[1].keys() and include_synonyms else - np.max([ - fuzz.ratio(term, y[1]["name"].lower()), - fuzz.partial_ratio(term, y[1]["name"].lower()) - ]) + fuzz.partial_ratio(term, y[1]["name"].lower()) for y in nodes ]) - include_terms.append(x[0].lower().strip("'").strip("\"") not in omit_list) + include_terms.append(term not in omit_list) if match_only and not anatomical_constraint: - matches.append({"perfect_match": [ - [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]] - ]}) # perfect match + # Explicitly trying to report perfect matches (match_only is True). + matches.append({"perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0]}) else: - matchesi = {} + matches_i = {} if np.any(scores_strict == 100) and not anatomical_constraint: - matchesi.update({"perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]]}) + # Perfect match and not additional information through anatomical_constraint, ie no reason to assume + # that the user is not looking for this hit. + matches_i.update({ + "perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0] + }) + if choices_for_perfect_match: + matches_i.update({"lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest]}) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({"very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest]}) else: if anatomical_constraint is not None: - # Select best overall matches based on lenient and strict matching: - matchesi.update({"perfect_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_strict) - ][-n_suggest:][::-1]}) - matchesi.update({"lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_lenient) - if not np.any( - nodes[i][1]["name"] in v for v in matchesi.values() - ) - ][-n_suggest:][::-1]}) - # Use anatomical constraints two fold: # 1. Select cell types that are in the correct ontology. # 2. Run a second string matching with the anatomical word included. @@ -870,48 +905,70 @@ def prepare_celltype_map_fuzzy( # Check this by checking if one is an ancestor of the other: anatomical_subselection = [ z and ( - anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or - y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) + anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or + y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) ) for y, z in zip(uberon_ids, anatomical_subselection) ] # Iterate over nodes sorted by string match score and masked by constraint: - matchesi.update({"anatomic_onotolgy_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_lenient) - if anatomical_subselection[i] and not np.any( - nodes[i][1]["name"] in v for v in matchesi.values() - ) + matches_i.update({ + "anatomic_onotolgy_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_lenient) + if anatomical_subselection[i] and not + np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) ][-n_suggest:][::-1]}) # 2. Run a second string matching with the anatomical word included. - modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]").\ + modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]"). \ strip("[") scores_anatomy = np.array([ np.max([ fuzz.partial_ratio(modified_term, y[1]["name"].lower()) ] + [ - fuzz.partial_ratio(modified_term, yy.lower()) + fuzz.partial_ratio(modified_term, synonym_string_processing(yy)) for yy in y[1]["synonym"] ]) if "synonym" in y[1].keys() and include_synonyms else - np.max([ - fuzz.partial_ratio(modified_term, y[1]["name"].lower()) - ]) + fuzz.partial_ratio(modified_term, y[1]["name"].lower()) for y in nodes ]) - matchesi.update({"anatomic_string_match": [ + matches_i.update({ + "anatomic_string_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_anatomy) + if nodes[i][1]["name"] and not + np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][-n_suggest:][::-1] + }) + + # Select best overall matches based on lenient and strict matching: + matches_i.update({"perfect_match": [ nodes[i][1]["name"] - for i in np.argsort(scores_anatomy) - if nodes[i][1]["name"] and not np.any( - nodes[i][1]["name"] in v for v in matchesi.values() - ) - ][-n_suggest:][::-1]}) + for i in np.argsort(scores_strict)[::-1] + ][:n_suggest]}) + matches_i.update({"lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest]}) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({"very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest]}) else: - # Suggest top 10 hits by string match: - matchesi.update({"lenient_match": [ - [nodes[i][1]["name"] for i in np.argsort(scores_lenient)[-n_suggest:]][::-1] - ]}) - matches.append(matchesi) + # Suggest top hits by string match: + matches_i.update({"lenient_match": [ + nodes[i][1]["name"] for i in np.argsort(scores_lenient)[::-1] + ][:n_suggest]}) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({"very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest]}) + matches.append(matches_i) return matches, include_terms def prepare_celltype_map_tab( @@ -943,11 +1000,19 @@ def prepare_celltype_map_tab( match_only=match_only, include_synonyms=include_synonyms, anatomical_constraint=anatomical_constraint, + choices_for_perfect_match=False, omit_list=omit_list, n_suggest=n_suggest, ) tab = pd.DataFrame({ "source": source, - "target": [separator_groups.join([separator_suggestions.join(v) for v in x.values()]) for x in matches] + "target": [ + separator_groups.join([ + separator_suggestions.join(v) + if isinstance(v, list) else v + for v in x.values() + ]) + for x in matches + ] }) return tab.loc[include_terms] From 4a29bbb811d683b89ee1fa0f0c222b1bb71c6d57 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 10 Feb 2021 16:47:16 +0100 Subject: [PATCH 042/161] fix bugs in data loading (#112) * fixed raw file location of TMS --- .../loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index b6bd1dded..640bc3ee2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -106,7 +106,7 @@ def __init__( self.var_symbol_col = "index" def _load(self, fn): - base_path = os.path.join(self.path, "mouse", self.organ) + base_path = os.path.join(self.path, "raw", self.directory_formatted_doi) fn = os.path.join(base_path, self.sample_fn) self.adata = anndata.read_h5ad(fn) From 197b8f31fa08d80095284b6ab76596c9ac0c4dad Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Fri, 12 Feb 2021 00:14:43 +0100 Subject: [PATCH 043/161] automated dataset_download (#78) * refactor download and download_meta into download_url_data and download_url_meta * fix overindentation * fix mouse pancreas dataset for automated download * simplify hcl dataloader for automated dataset download (directly eats zipfile without extracting now) * simplify mousebrain dataloader for automated dataset download (directly eats zipfile without extracting now) * add filename to download url if download is private * add download functions * linting * add dataset id info to donwload error * fix mouse pancreas dataloading * reanme variables in loader * enable direct reading from downloaded object for MCA * add mera download field to mca loader * adjust path fo mca dataloaders * switch paths in datasets to use doi-based directory structure and automatically set the directory path * rename Dataset.full_path to Dataset.doi_path * fix and update dataloader templates * change _load_full to use self.adata attribute rather than returning it * convert ica dataloader to use manysamplesonefile structure * rename bone marrow tissue * fix synapse download * fix dataloaders * skip download if file already present and add download to datasetgroup and supergroup * change cache path * fix automatic doi_path setting [skip ci] * set self.doi_path only upon call of download method rather than in init [skip ci] * fix human liver protocol accodring to ontology [skip ci] * fix human liver protocol accodring to ontology [skip ci] * fix human kidney protocol according to ontology [skip ci] * fix human kidney protocol according to ontology [skip ci] * rename prostate according to uberon ontology [skip ci] * rename epityphlon according to uberon ontology [skip ci] * ahndle download urls correclty [skip ci] * handle download urls correctly [skip ci] * create dataset folder if it does not exist [skip ci] * catch unverified ssl certificates * improve printing during download [ci skip] * fix handling of private files in automatic download [skip ci] * improve sypse error messages [skip ci] * improve sypse error messages [skip ci] * improve synapse logging [skip ci] * improve synapse logging [skip ci] * improve synapse logging [skip ci] * improve synapse logging [skip ci] * Revert "improve synapse logging [skip ci]" This reverts commit 2480d6f4 * improve synapse logging [skip ci] * monkey-patching warnings [skip ci] * Revert "monkey-patching warnings [skip ci]" This reverts commit cd99bcbb * fix download url attributes [skip ci] * fix human placenta dataloader [skip ci] * fix human ileum dataloader [skip ci] --- sfaira/consts/adata_fields.py | 22 +- sfaira/consts/meta_data_files.py | 4 +- sfaira/data/base.py | 190 +++++++++++++----- .../databases/cellxgene/cellxgene_loader.py | 5 +- .../human_pancreas_2017_smartseq2_enge_001.py | 15 +- .../d10_1016_j_cell_2018_02_001/base.py | 23 ++- .../mouse_bladder_2018_microwell_han_001.py | 11 +- .../mouse_blood_2018_microwell_han_001.py | 11 +- .../mouse_blood_2018_microwell_han_002.py | 11 +- .../mouse_blood_2018_microwell_han_003.py | 11 +- .../mouse_blood_2018_microwell_han_004.py | 11 +- .../mouse_blood_2018_microwell_han_005.py | 11 +- .../mouse_bone_2018_microwell_001.py | 12 +- .../mouse_brain_2018_microwell_han_001.py | 11 +- .../mouse_brain_2018_microwell_han_002.py | 11 +- ...ouse_femalegonad_2018_microwell_han_001.py | 11 +- ...ouse_femalegonad_2018_microwell_han_002.py | 11 +- .../mouse_ileum_2018_microwell_han_001.py | 11 +- .../mouse_ileum_2018_microwell_han_002.py | 11 +- .../mouse_ileum_2018_microwell_han_003.py | 11 +- .../mouse_kidney_2018_microwell_han_001.py | 12 +- .../mouse_kidney_2018_microwell_han_002.py | 12 +- .../mouse_liver_2018_microwell_han_001.py | 12 +- .../mouse_liver_2018_microwell_han_002.py | 12 +- .../mouse_lung_2018_microwell_han_001.py | 11 +- .../mouse_lung_2018_microwell_han_002.py | 11 +- .../mouse_lung_2018_microwell_han_003.py | 11 +- .../mouse_malegonad_2018_microwell_han_001.py | 11 +- .../mouse_malegonad_2018_microwell_han_002.py | 11 +- ...use_mammarygland_2018_microwell_han_001.py | 11 +- ...use_mammarygland_2018_microwell_han_002.py | 11 +- ...use_mammarygland_2018_microwell_han_003.py | 11 +- ...use_mammarygland_2018_microwell_han_004.py | 11 +- .../mouse_muscle_2018_microwell_han_001.py | 11 +- .../mouse_pancreas_2018_microwell_han_001.py | 11 +- .../mouse_placenta_2018_microwell_han_001.py | 11 +- .../mouse_placenta_2018_microwell_han_002.py | 11 +- .../mouse_prostate_2018_microwell_han_001.py | 11 +- .../mouse_prostate_2018_microwell_han_002.py | 11 +- .../mouse_rib_2018_microwell_han_001.py | 11 +- .../mouse_rib_2018_microwell_han_002.py | 11 +- .../mouse_rib_2018_microwell_han_003.py | 11 +- .../mouse_spleen_2018_microwell_han_001.py | 11 +- .../mouse_stomach_2018_microwell_han_001.py | 11 +- .../mouse_thymus_2018_microwell_han_001.py | 11 +- .../mouse_uterus_2018_microwell_han_001.py | 11 +- .../mouse_uterus_2018_microwell_han_002.py | 11 +- .../human_colon_2019_10x_kinchen_001.py | 20 +- .../human_colon_2019_10x_smilie_001.py | 9 +- .../human_ileum_2019_10x_martin_001.py | 11 +- .../human_prostate_2018_10x_henry_001.py | 11 +- .../human_pancreas_2016_indrop_baron_001.py | 9 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 15 +- .../mouse_pancreas_2019_10x_thompson_x.py | 33 ++- .../human_lung_2020_10x_miller_001.py | 9 +- .../human_brain_2017_DroNcSeq_habib_001.py | 10 +- .../human_malegonad_2018_10x_guo_001.py | 9 +- .../human_liver_2018_10x_macparland_001.py | 15 +- .../human_kidney_2019_10xSn_lake_001.py | 21 +- .../human_mixed_2019_10x_szabo_001.py | 41 ++-- .../human_eye_2019_10x_menon_001.py | 9 +- .../human_placenta_2018_10x_ventotormo_001.py | 13 +- ... human_liver_2019_CELseq2_aizarani_001.py} | 17 +- .../human_liver_2019_10x_ramachandran_001.py | 9 +- .../human_liver_2019_10x_popescu_001.py | 9 +- .../d10_1038_s41586_020_2157_4/base.py | 41 +--- .../human_adipose_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_002.py | 4 +- ...man_adrenalgland_2020_microwell_han_003.py | 4 +- ...man_adrenalgland_2020_microwell_han_004.py | 4 +- ...man_adrenalgland_2020_microwell_han_005.py | 4 +- ...man_adrenalgland_2020_microwell_han_006.py | 4 +- .../human_artery_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_002.py | 4 +- .../human_bladder_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_001.py | 4 +- .../human_blood_2020_microwell_han_002.py | 4 +- .../human_blood_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_004.py | 4 +- .../human_blood_2020_microwell_han_005.py | 4 +- .../human_blood_2020_microwell_han_006.py | 4 +- .../human_blood_2020_microwell_han_007.py | 4 +- .../human_bone_2020_microwell_han_001.py | 4 +- .../human_bone_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_001.py | 4 +- .../human_brain_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_003.py | 4 +- .../human_brain_2020_microwell_han_004.py | 4 +- .../human_brain_2020_microwell_han_005.py | 4 +- .../human_brain_2020_microwell_han_006.py | 4 +- .../human_calvaria_2020_microwell_han_001.py | 4 +- .../human_cervix_2020_microwell_han_001.py | 4 +- ..._chorionicvillus_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_002.py | 4 +- .../human_colon_2020_microwell_han_003.py | 4 +- .../human_colon_2020_microwell_han_004.py | 4 +- .../human_duodenum_2020_microwell_han_001.py | 4 +- ...human_epityphlon_2020_microwell_han_001.py | 6 +- .../human_esophagus_2020_microwell_han_001.py | 4 +- .../human_esophagus_2020_microwell_han_002.py | 4 +- .../human_eye_2020_microwell_han_001.py | 4 +- ...an_fallopiantube_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_002.py | 4 +- ...uman_gallbladder_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_002.py | 4 +- .../human_heart_2020_microwell_han_003.py | 4 +- .../human_heart_2020_microwell_han_004.py | 4 +- .../human_hesc_2020_microwell_han_001.py | 4 +- .../human_ileum_2020_microwell_han_001.py | 4 +- .../human_jejunum_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_002.py | 4 +- .../human_kidney_2020_microwell_han_003.py | 4 +- .../human_kidney_2020_microwell_han_004.py | 4 +- .../human_kidney_2020_microwell_han_005.py | 4 +- .../human_kidney_2020_microwell_han_006.py | 4 +- .../human_kidney_2020_microwell_han_007.py | 4 +- .../human_liver_2020_microwell_han_001.py | 4 +- .../human_liver_2020_microwell_han_002.py | 4 +- .../human_liver_2020_microwell_han_003.py | 4 +- .../human_liver_2020_microwell_han_004.py | 4 +- .../human_liver_2020_microwell_han_005.py | 4 +- .../human_lung_2020_microwell_han_001.py | 4 +- .../human_lung_2020_microwell_han_002.py | 4 +- .../human_lung_2020_microwell_han_003.py | 4 +- .../human_lung_2020_microwell_han_004.py | 4 +- .../human_lung_2020_microwell_han_005.py | 4 +- .../human_malegonad_2020_microwell_han_001.py | 4 +- .../human_malegonad_2020_microwell_han_002.py | 4 +- .../human_muscle_2020_microwell_han_001.py | 4 +- .../human_muscle_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_001.py | 4 +- .../human_omentum_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_001.py | 4 +- .../human_pancreas_2020_microwell_han_002.py | 4 +- .../human_pancreas_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_004.py | 4 +- .../human_placenta_2020_microwell_han_001.py | 4 +- .../human_pleura_2020_microwell_han_001.py | 4 +- .../human_prostate_2020_microwell_han_001.py | 6 +- .../human_rectum_2020_microwell_han_001.py | 4 +- .../human_rib_2020_microwell_han_001.py | 4 +- .../human_rib_2020_microwell_han_002.py | 4 +- .../human_skin_2020_microwell_han_001.py | 4 +- .../human_skin_2020_microwell_han_002.py | 4 +- ...human_spinalcord_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_001.py | 4 +- .../human_stomach_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_003.py | 4 +- .../human_stomach_2020_microwell_han_004.py | 4 +- .../human_stomach_2020_microwell_han_005.py | 4 +- .../human_stomach_2020_microwell_han_006.py | 4 +- .../human_stomach_2020_microwell_han_007.py | 4 +- .../human_stomach_2020_microwell_han_008.py | 4 +- .../human_stomach_2020_microwell_han_009.py | 4 +- .../human_stomach_2020_microwell_han_010.py | 4 +- .../human_thymus_2020_microwell_han_001.py | 4 +- .../human_thymus_2020_microwell_han_002.py | 4 +- .../human_thyroid_2020_microwell_han_001.py | 4 +- .../human_thyroid_2020_microwell_han_002.py | 4 +- .../human_trachea_2020_microwell_han_001.py | 4 +- .../human_ureter_2020_microwell_han_001.py | 4 +- .../human_uterus_2020_microwell_han_001.py | 4 +- .../human_lung_2020_10x_travaglini_001.py | 14 +- .../human_colon_2020_10x_james_001.py | 9 +- ..._001.py => human_lung_2019_10x_braga_x.py} | 8 +- .../human_lung_2019_dropseq_braga_001.py | 11 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 43 ++-- .../human_kidney_2020_10x_liao_001.py | 9 +- .../human_eye_2019_10x_voigt_001.py | 9 +- .../human_x_2019_10x_wang_001.py | 8 +- .../human_lung_2020_10x_lukassen_001.py | 8 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 9 +- .../human_lung_2020_10x_habermann_001.py | 19 +- .../human_kidney_2019_10x_stewart_001.py | 15 +- .../human_thymus_2020_10x_park_001.py | 9 +- .../human_x_2019_10x_madissoon_001.py | 20 +- .../human_eye_2019_10x_lukowski_001.py | 9 +- .../d_nan/human_blood_2018_10x_ica_001.py | 48 ----- .../human_blood_2019_10x_10xGenomics_001.py | 10 +- .../d_nan/human_bone_2018_10x_ica_001.py | 48 ----- .../loaders/d_nan/human_x_2018_10x_ica_001.py | 60 ++++++ sfaira/data/interactive/loader.py | 6 +- .../your_dataset_file_1.py | 9 +- .../your_dataset_file_2.py | 9 +- .../your_dataset_file.py | 13 +- .../your_dataset_file.py | 20 +- .../your_dataset_file_1.py | 9 +- 196 files changed, 830 insertions(+), 1052 deletions(-) rename sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/{human_liver_2019_mCELSeq2_aizarani_001.py => human_liver_2019_CELseq2_aizarani_001.py} (82%) rename sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/{human_lung_2019_10x_braga_001.py => human_lung_2019_10x_braga_x.py} (94%) delete mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py create mode 100644 sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index d44d9e104..a4ab0c236 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -20,8 +20,8 @@ class ADATA_IDS_BASE: _cell_ontology_class: str _cell_ontology_id: str _doi: str - _download: str - _download_meta: str + _download_url_data: str + _download_url_meta: str _dataset: str _dataset_group: str _gene_id_ensembl: str @@ -69,12 +69,12 @@ def doi(self) -> str: return self._doi @property - def download(self) -> str: - return self._download + def download_url_data(self) -> str: + return self._download_url_data @property - def download_meta(self) -> str: - return self._download_meta + def download_url_meta(self) -> str: + return self._download_url_meta @property def gene_id_ensembl(self) -> str: @@ -170,8 +170,8 @@ def __init__(self): self._doi = "doi" self._dataset = "dataset" self._dataset_group = "dataset_group" - self._download = "download" - self._download_meta = "download_meta" + self._download_url_data = "download_url_data" + self._download_url_meta = "download_url_meta" self._gene_id_ensembl = "ensembl" self._gene_id_index = "ensembl" self._gene_id_names = "names" @@ -206,7 +206,7 @@ def __init__(self): self.subtissue_allowed_entries = None self.year_allowed_entries = list(range(2000, 3000)) # Free fields that are not constrained: - # _author, _download, _download_meta, _doi, _id, _state_exact + # _author, _download_url_data, _download_url_meta, _doi, _id, _state_exact self.unknown_celltype_name = "unknown" self.unknown_celltype_identifiers = ["nan", "none", "unknown", np.nan, None] @@ -240,8 +240,8 @@ def __init__(self): self._doi = "" # TODO self._dataset = "dataset" self._dataset_group = "dataset_group" - self._download = "" # TODO - self._download_meta = "" # never necessary as we interface via anndata objects + self._download_url_data = "" # TODO + self._download_url_meta = "" # never necessary as we interface via anndata objects self._gene_id_ensembl = "" # TODO self._gene_id_index = "ensembl" self._gene_id_names = "" # TODO diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index cdff4c828..0a0ecc1ef 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -7,8 +7,8 @@ "author": str, "cell_ontology_class": str, "doi": str, - "download": str, - "download_meta": str, + "download_url_data": str, + "download_url_meta": str, "healthy": str, "id": str, "ncells": int, diff --git a/sfaira/data/base.py b/sfaira/data/base.py index f8555cd0d..9d95f92bb 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -13,6 +13,11 @@ import scipy.sparse from typing import Dict, List, Tuple, Union import warnings +import urllib.request +import urllib.parse +import urllib.error +import cgi +import ssl from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse, ONTOLOGY_UBERON @@ -55,6 +60,7 @@ class DatasetBase(abc.ABC): path: Union[None, str] meta_path: Union[None, str] cache_path: Union[None, str] + doi_path: Union[None, str] id: Union[None, str] genome: Union[None, str] @@ -62,8 +68,8 @@ class DatasetBase(abc.ABC): _author: Union[None, str] _dev_stage: Union[None, str] _doi: Union[None, str] - _download: Union[Tuple[List[None]], Tuple[List[str]]] - _download_meta: Union[Tuple[List[None]], Tuple[List[str]]] + _download_url_data: Union[Tuple[List[None]], Tuple[List[str]], None] + _download_url_meta: Union[Tuple[List[None]], Tuple[List[str]], None] _ethnicity: Union[None, str] _healthy: Union[None, bool] _id: Union[None, str] @@ -115,13 +121,14 @@ def __init__( self.path = path self.meta_path = meta_path self.cache_path = cache_path + self.doi_path = None self._age = None self._author = None self._dev_stage = None self._doi = None - self._download = None - self._download_meta = None + self._download_url_data = None + self._download_url_meta = None self._ethnicity = None self._healthy = None self._id = None @@ -160,10 +167,7 @@ def __init__( self._ontology_class_map = None @abc.abstractmethod - def _load(self, fn): - pass - - def _download(self, fn): + def _load(self): pass @property @@ -180,12 +184,93 @@ def clear(self): self.adata = None gc.collect() - def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None) -> bool: + def download(self, **kwargs): + assert self.download_url_data is not None, f"The `download_url_data` attribute of dataset {self.id} " \ + f"is not set, cannot download dataset." + assert self.path is not None, f"No path was provided when instantiating the dataset container, " \ + f"cannot download datasets." + + self.doi_path = os.path.join(self.path, "raw", self.directory_formatted_doi) + if not os.path.exists(self.doi_path): + os.makedirs(self.doi_path) + + urls = self.download_url_data[0][0] + self.download_url_meta[0][0] + + for url in urls: + if url is None: + continue + if url.split(",")[0] == 'private': + if "," in url: + fn = ','.join(url.split(',')[1:]) + if os.path.isfile(os.path.join(self.doi_path, fn)): + print(f"File {fn} already found on disk, skipping download.") + else: + warnings.warn(f"Dataset {self.id} is not available for automatic download, please manually " + f"copy the file {fn} to the following location: " + f"{self.doi_path}") + else: + warnings.warn(f"A file for dataset {self.id} is not available for automatic download, please" + f"manually copy the associated file to the following location: {self.doi_path}") + + elif url.split(",")[0].startswith('syn'): + fn = ",".join(url.split(",")[1:]) + if os.path.isfile(os.path.join(self.doi_path, fn)): + print(f"File {fn} already found on disk, skipping download.") + else: + self._download_synapse(url.split(",")[0], fn, **kwargs) + + else: + url = urllib.parse.unquote(url) + + # Catch SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: + # unable to get local issuer certificate (_ssl.c:1124) + try: + urllib.request.urlopen(url) + except urllib.error.URLError: + ssl._create_default_https_context = ssl._create_unverified_context + + if 'Content-Disposition' in urllib.request.urlopen(url).info().keys(): + fn = cgi.parse_header(urllib.request.urlopen(url).info()['Content-Disposition'])[1]["filename"] + else: + fn = url.split("/")[-1] + if os.path.isfile(os.path.join(self.doi_path, fn)): + print(f"File {fn} already found on disk, skipping download.") + else: + print(f"Downloading: {fn}") + urllib.request.urlretrieve(url, os.path.join(self.doi_path, fn)) + + def _download_synapse(self, synapse_entity, fn, **kwargs): + try: + import synapseclient + except ImportError: + warnings.warn("synapseclient python package not found. This package is required to download some of the " + "selected datasets. Run `pip install synapseclient` to install it. Skipping download of the " + f"following dataset: {self.id}") + return + import shutil + import logging + logging.captureWarnings(False) # required to properly display warning messages below with sypaseclient loaded + + if "synapse_user" not in kwargs.keys(): + warnings.warn(f"No synapse username provided, skipping download of synapse dataset {fn}." + f"Provide your synapse username as the `synapse_user` argument to the download method.") + return + if "synapse_pw" not in kwargs.keys(): + warnings.warn(f"No synapse password provided, skipping download of synapse dataset {fn}." + f"Provide your synapse password as the `synapse_pw` argument to the download method.") + return + + print(f"Downloading from synapse: {fn}") + syn = synapseclient.Synapse() + syn.login(kwargs['synapse_user'], kwargs['synapse_pw']) + dataset = syn.get(entity=synapse_entity) + shutil.move(dataset.path, os.path.join(self.doi_path, fn)) + + def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None) -> bool: """ Only relevant for DatasetBaseGroupLoading but has to be a method of this class because it is used in DatasetGroup. - :param fn: :param adata_group: :return: Whether group loading is used. """ @@ -224,8 +309,8 @@ def _get_cache_fn(): cache = os.path.join( self.cache_path, - self.directory_formatted_doi, "cache", + self.directory_formatted_doi, self._directory_formatted_id + ".h5ad" ) return cache @@ -237,9 +322,9 @@ def _cached_reading(fn, fn_cache): else: warnings.warn(f"Cached loading enabled, but cache file {fn_cache} not found. " f"Loading from raw files.") - self._load(fn=fn) + self._load() else: - self._load(fn=fn) + self._load() def _cached_writing(fn_cache): if fn_cache is not None: @@ -248,15 +333,12 @@ def _cached_writing(fn_cache): os.makedirs(dir_cache) self.adata.write_h5ad(fn_cache) - if fn is None and self.path is None: - raise ValueError("provide either fn in load or path in constructor") - if load_raw and allow_caching: - self._load(fn=fn) + self._load() fn_cache = _get_cache_fn() _cached_writing(fn_cache) elif load_raw and not allow_caching: - self._load(fn=fn) + self._load() elif not load_raw and allow_caching: fn_cache = _get_cache_fn() _cached_reading(fn, fn_cache) @@ -297,10 +379,16 @@ def load( genome = "Mus_musculus_GRCm38_97" warnings.warn(f"using default genome {genome}") else: - raise ValueError(f"genome was not supplied and organism {self.organism} " - f"was not matched to a default choice") + raise ValueError(f"genome was not supplied and no default genome found for organism {self.organism}") self._set_genome(genome=genome) + # Set path to dataset directory + if fn is None: + if self.doi_path is None: + raise ValueError("Neither sfaira data repo path nor custom dataset path provided.") + else: + self.doi_path = fn + # Run data set-specific loading script: self._load_cached(fn=fn, load_raw=load_raw, allow_caching=allow_caching) # Set data-specific meta data in .adata: @@ -492,8 +580,8 @@ def _set_metadata_in_adata(self): self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated self.adata.uns[self._ADATA_IDS_SFAIRA.author] = self.author self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = self.doi - self.adata.uns[self._ADATA_IDS_SFAIRA.download] = self.download - self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta] = self.download_meta + self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_data] = self.download_url_data + self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_meta] = self.download_url_meta self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = self.normalization self.adata.uns[self._ADATA_IDS_SFAIRA.year] = self.year @@ -798,8 +886,8 @@ def write_meta( self._ADATA_IDS_SFAIRA.annotated: self.adata.uns[self._ADATA_IDS_SFAIRA.annotated], self._ADATA_IDS_SFAIRA.author: self.adata.uns[self._ADATA_IDS_SFAIRA.author], self._ADATA_IDS_SFAIRA.doi: self.adata.uns[self._ADATA_IDS_SFAIRA.doi], - self._ADATA_IDS_SFAIRA.download: self.adata.uns[self._ADATA_IDS_SFAIRA.download], - self._ADATA_IDS_SFAIRA.download_meta: self.adata.uns[self._ADATA_IDS_SFAIRA.download_meta], + self._ADATA_IDS_SFAIRA.download_url_data: self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_data], + self._ADATA_IDS_SFAIRA.download_url_meta: self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_meta], self._ADATA_IDS_SFAIRA.id: self.adata.uns[self._ADATA_IDS_SFAIRA.id], self._ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization], @@ -922,67 +1010,67 @@ def directory_formatted_doi(self) -> str: return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) @property - def download(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + def download_url_data(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: """ Data download website(s). Save as tuple with single element, which is a list of all download websites relevant to dataset. :return: """ - if self._download is not None: - x = self._download + if self._download_url_data is not None: + x = self._download_url_data else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[self._ADATA_IDS_SFAIRA.download] + x = self.meta[self._ADATA_IDS_SFAIRA.download_url_data] if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): x = (x,) return x - @download.setter - def download(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): - self.__erasing_protection(attr="download", val_old=self._download, val_new=x) + @download_url_data.setter + def download_url_data(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download_url_data", val_old=self._download_url_data, val_new=x) # Formats to tuple with single element, which is a list of all download websites relevant to dataset, # which can be used as a single element column in a pandas data frame. if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): x = (x,) - self._download = (x,) + self._download_url_data = (x,) @property - def download_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: + def download_url_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: """ Meta data download website(s). Save as tuple with single element, which is a list of all download websites relevant to dataset. :return: """ - x = self._download_meta - # if self._download_meta is not None: # TODO add this back in once download_meta is routinely set in datasets - # x = self._download_meta + x = self._download_url_meta + # if self._download_url_meta is not None: # TODO add this back in once download_meta is set in all datasets + # x = self._download_url_meta # else: # if self.meta is None: # self.load_meta(fn=None) - # x = self.meta[self._ADATA_IDS_SFAIRA.download_meta] + # x = self.meta[self._ADATA_IDS_SFAIRA.download_url_meta] if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): x = (x,) return x - @download_meta.setter - def download_meta(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): - self.__erasing_protection(attr="download_meta", val_old=self._download_meta, val_new=x) + @download_url_meta.setter + def download_url_meta(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): + self.__erasing_protection(attr="download_url_meta", val_old=self._download_url_meta, val_new=x) # Formats to tuple with single element, which is a list of all download websites relevant to dataset, # which can be used as a single element column in a pandas data frame. if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): x = (x,) - self._download_meta = (x,) + self._download_url_meta = (x,) @property def ethnicity(self) -> Union[None, str]: @@ -1482,20 +1570,20 @@ def sample_id(self): return self._sample_id @abc.abstractmethod - def _load_full(self, fn=None) -> anndata.AnnData: + def _load_full(self) -> anndata.AnnData: """ Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. - Override this method in the Dataset if this is relevant. + Overload this method in the Dataset if this is relevant. :return: adata_group """ pass - def set_raw_full_group_object(self, fn=None, adata_group: Union[None, anndata.AnnData] = None): + def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None): if self.adata is None and adata_group is not None: self.adata = adata_group elif self.adata is None and adata_group is not None: - self.adata = self._load_full(fn=fn) + self._load_full() elif self.adata is not None and self._unprocessed_full_group_object: pass else: @@ -1528,8 +1616,8 @@ def _subset_from_group( for k, v in subset_items: self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] - def _load(self, fn): - _ = self.set_raw_full_group_object(fn=fn, adata_group=None) + def _load(self): + _ = self.set_raw_full_group_object(adata_group=None) if self._unprocessed_full_group_object: self._load_from_group() self._unprocessed_full_group_object = False @@ -1740,6 +1828,10 @@ def write_ontology_class_map( if not os.path.exists(fn) or not protected_writing: tab.to_csv(fn, index=False) + def download(self, **kwargs): + for _, v in self.datasets.items(): + v.download(**kwargs) + @property def ids(self): return list(self.datasets.keys()) @@ -2107,6 +2199,10 @@ def flatten(self) -> DatasetGroup: ds[k] = v return DatasetGroup(datasets=ds) + def download(self, **kwargs): + for x in self.dataset_groups: + x.download(**kwargs) + def load_all( self, annotated_only: bool = False, diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index 48429f7ea..5265bf761 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -44,13 +44,12 @@ def __init__( "0": {}, } - def _load(self, fn=None): + def _load(self): """ Note that in contrast to data set specific data loaders, here, the core attributes are only identified from the data in this function and are not already set in the constructor. These attributes can still be used through meta data containers after the data was loaded once. - :param fn: :return: """ fn = os.path.join(self.path, self.fn) @@ -60,7 +59,7 @@ def _load(self, fn=None): self.author = adata.uns[self._ADATA_IDS_CELLXGENE.author][self._ADATA_IDS_CELLXGENE.author_names] self.doi = adata.uns[self._ADATA_IDS_CELLXGENE.doi] - self.download = self.download + self.download_url_data = self.download_url_data self.id = self.id self.normalization = 'raw' self.organ = str(self.fn).split("_")[3] # TODO interface this properly diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index 3ad62fc30..0c455ca07 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -23,8 +23,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" - self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" self.author = "Quake" self.doi = "10.1016/j.cell.2017.09.004" @@ -52,12 +52,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "GSE81547_RAW.tar"), - os.path.join(self.path, "human", "pancreas", "GSE81547_series_matrix.txt.gz") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE81547_RAW.tar"), + os.path.join(self.doi_path, "GSE81547_series_matrix.txt.gz") + ] dfs = [] with tarfile.open(fn[0]) as tar: for member in tar.getmembers(): diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py index e397eecf3..b05f4b670 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py @@ -3,6 +3,9 @@ import pandas from typing import Union from sfaira.data import DatasetBase +import zipfile +import tarfile +import os class Dataset_d10_1016_j_cell_2018_02_001(DatasetBase): @@ -19,6 +22,9 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.download_url_meta = None + self.obs_key_cellontology_class = "Annotation" self.obs_key_cellontology_original = "Annotation" @@ -33,11 +39,20 @@ def __init__( self.var_symbol_col = "index" - def _load_generalized(self, fn, fn_meta): - celltypes = pandas.read_csv(fn_meta, index_col=1) - celltypes = celltypes.drop(["Unnamed: 0"], axis=1) + def _load_generalized(self, samplename): + fn = os.path.join(self.doi_path, '5435866.zip') + + with zipfile.ZipFile(fn) as archive: + celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) + celltypes = celltypes.drop(["Unnamed: 0"], axis=1) + + with tarfile.open(fileobj=archive.open('MCA_500more_dge.tar.gz')) as tar: + data = pandas.read_csv(tar.extractfile(f'500more_dge/{samplename}.txt.gz'), + compression="gzip", + sep=" ", + header=0 + ) - data = pandas.read_csv(fn, sep=" ", header=0) self.adata = anndata.AnnData(data.T) self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() self.adata.obs = celltypes.loc[self.adata.obs_names, :] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py index 6854aa37e..447e4d9a8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "bladder organ" self.class_maps = { @@ -38,11 +37,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Bladder_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Bladder_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py index 4c4aa21e8..6c75c8abe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.class_maps = { @@ -43,11 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PeripheralBlood1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py index 980024416..b5db9ab9d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.class_maps = { @@ -43,11 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PeripheralBlood2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py index 10069693f..6af3d7d50 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.class_maps = { @@ -43,11 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PeripheralBlood3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py index c0602d28a..1a751a682 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.class_maps = { @@ -43,11 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PeripheralBlood4_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py index ae8f80e2f..7d719f861 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "blood" self.class_maps = { @@ -43,11 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PeripheralBlood5_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PeripheralBlood5_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py index 2034bcbbd..78bd0c28d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py @@ -13,9 +13,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "bone tissue" self.class_maps = { @@ -40,11 +38,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "BoneMarrow1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="BoneMarrow1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py index a96602c7c..50d165781 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "brain" self.class_maps = { @@ -36,11 +35,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Brain1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py index 7d188840b..118636b13 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "brain" self.class_maps = { @@ -36,11 +35,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Brain2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Brain2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py index 1faf7544d..36f5186de 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ovary" self.class_maps = { @@ -36,11 +35,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Ovary1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py index 9355c7acc..4b6fb475b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ovary" self.class_maps = { @@ -36,11 +35,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Ovary2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Ovary2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py index 2f817c510..45c66cd04 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.class_maps = { @@ -45,11 +44,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="SmallIntestine1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py index 61fb53c0e..ea2187180 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.class_maps = { @@ -45,11 +44,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="SmallIntestine2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py index 08303f3a1..fdbf281f4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "ileum" self.class_maps = { @@ -45,11 +44,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "SmallIntestine3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="SmallIntestine3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py index 365e62c50..c4405ac4b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py @@ -13,9 +13,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "kidney" self.class_maps = { @@ -25,11 +23,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Kidney1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py index 6b1aa65cd..eb3c94f50 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py @@ -13,9 +13,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "kidney" self.class_maps = { @@ -56,11 +54,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Kidney2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Kidney2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py index 345d8a0eb..f43f66f63 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py @@ -13,9 +13,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "liver" self.class_maps = { @@ -43,11 +41,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Liver1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py index e9223074a..77d52004c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py @@ -13,9 +13,7 @@ def __init__( **kwargs ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "liver" self.class_maps = { @@ -37,11 +35,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Liver2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Liver2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py index 8926dbf10..8fbf204a0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.class_maps = { @@ -54,11 +53,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Lung1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py index 3f82240d9..70491637a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.class_maps = { @@ -54,11 +53,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Lung2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py index 19b8775f3..451d13f92 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "lung" self.class_maps = { @@ -54,11 +53,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Lung3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Lung3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py index 0a1fd12e8..7813b9eb6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "testis" self.class_maps = { @@ -41,11 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Testis1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py index 166dd785d..de97e10fb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "testis" self.class_maps = { @@ -41,11 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Testis2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Testis2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py index bc523124f..f7663789c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammary gland" self.class_maps = { @@ -40,11 +39,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="MammaryGland.Virgin1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py index 738329bad..8ce2a1d9b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammary gland" self.class_maps = { @@ -40,11 +39,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="MammaryGland.Virgin2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py index 384ff9f30..e85e955e1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammary gland" self.class_maps = { @@ -40,11 +39,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="MammaryGland.Virgin3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py index 6dcab9d3d..45bab81ff 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "mammary gland" self.class_maps = { @@ -40,11 +39,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "MammaryGland.Virgin4_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="MammaryGland.Virgin4_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py index 21d3019f0..7dd8b82e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "skeletal muscle organ" self.class_maps = { @@ -39,11 +38,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Muscle_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Muscle_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py index 58acfa317..e485c8aa8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "pancreas" self.class_maps = { @@ -44,11 +43,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Pancreas_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Pancreas_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py index 12e4fcd7a..948eadbbb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "placenta" self.class_maps = { @@ -50,11 +49,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PlacentaE14.1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py index e62aaa0af..ddf097bfd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "placenta" self.class_maps = { @@ -50,11 +49,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "PlacentaE14.2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="PlacentaE14.2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py index 52baaaa0b..d3804d531 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "prostate" self.class_maps = { @@ -28,11 +27,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Prostate1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py index ddcaa51ec..092da9594 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "prostate" self.class_maps = { @@ -28,11 +27,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Prostate2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Prostate2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py index 7947f5881..5abd5e54a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.class_maps = { @@ -47,11 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="NeonatalRib1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py index 9dbbab288..0ea19de0b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.class_maps = { @@ -47,11 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="NeonatalRib2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py index d1461dd33..0c5fbc008 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "rib" self.class_maps = { @@ -47,11 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "NeonatalRib3_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="NeonatalRib3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py index dba49eeff..c33512c7f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "spleen" self.class_maps = { @@ -33,11 +32,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Spleen_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Spleen_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py index 184f6cb13..dcf8f06ed 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "stomach" self.class_maps = { @@ -39,11 +38,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas", "500more_dge", "Stomach_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Stomach_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py index 4f5f041a5..4a51ac334 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "thymus" self.class_maps = { @@ -30,11 +29,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Thymus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Thymus1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py index fd148575f..fcfca4c3c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "uterus" self.class_maps = { @@ -41,11 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus1_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Uterus1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py index b9c6ae41c..82465d647 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py @@ -14,7 +14,6 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.download = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.organ = "uterus" self.class_maps = { @@ -41,11 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - if self.path is None: - raise ValueError("provide either fn in load or path in constructor") - fn = os.path.join(self.path, "mouse", "temp_mouse_atlas/500more_dge", "Uterus2_dge.txt.gz") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_atlas", "MCA_CellAssignments.csv") - - self._load_generalized(fn=fn, fn_meta=fn_meta) + def _load(self): + self._load_generalized(samplename="Uterus2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index a3073137e..5ef304272 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -18,8 +18,11 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" - self.download = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" - self.download_meta = "private" + self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" + self.download_url_meta = [ + "private,uc_meta_data_stromal_with_donor.txt", + "private,hc_meta_data_stromal_with_donor.txt", + ] self.author = "Simmons" self.doi = "10.1016/j.cell.2018.08.067" @@ -56,13 +59,12 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "colon", "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.path, "human", "colon", "uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.path, "human", "colon", "hc_meta_data_stromal_with_donor.txt") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.doi_path, "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.doi_path, "hc_meta_data_stromal_with_donor.txt") + ] adata = anndata.read_loom(fn[0]) ctuc = pd.read_csv(fn[1], sep="\t") cthealthy = pd.read_csv(fn[2], sep="\t") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py index 863e9e540..1c3a4e0ad 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - self.download = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_url_meta = None self.author = "Regev" self.doi = "10.1016/j.cell.2019.06.029" @@ -79,9 +79,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "colon", "smillie19_epi.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "smillie19_epi.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py index 12a891bca..6b73a9549 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -19,11 +19,11 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" - self.download = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" + self.download_url_meta = None self.author = "Kenigsberg" - self.doi = "v" + self.doi = "10.1016/j.cell.2019.08.008" self.healthy = True self.normalization = "raw" self.organ = "ileum" @@ -60,9 +60,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "ileum", "martin19.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "martin19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py index b2ab03366..58b441b85 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -19,15 +19,15 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" - self.download = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" + self.download_url_meta = None self.author = "Strand" self.doi = "10.1016/j.celrep.2018.11.086" self.healthy = True self.normalization = "raw" self.state_exact = "healthy" - self.organ = "prostate" + self.organ = "prostate gland" self.organism = "human" self.protocol = "10X sequencing" self.year = 2018 @@ -49,9 +49,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "prostate", "henry18_0.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "henry18_0.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 6b28e777a..4dc1de175 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" - self.download = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" + self.download_url_meta = None self.author = "Yanai" self.doi = "10.1016/j.cels.2016.08.011" @@ -55,9 +55,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "pancreas", "baron16.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "baron16.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index 0cce5039d..2ca2712b7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" - self.download = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" - self.download_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" + self.download_url_data = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" + self.download_url_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" self.author = "Sandberg" self.doi = "10.1016/j.cmet.2016.08.020" @@ -56,12 +56,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.processed.1.zip"), - os.path.join(self.path, "human", "pancreas", "E-MTAB-5061.sdrf.txt") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "E-MTAB-5061.processed.1.zip"), + os.path.join(self.doi_path, "E-MTAB-5061.sdrf.txt") + ] df = pd.read_csv(fn[0], sep="\t") df.index = df.index.get_level_values(0) df = df.drop("#samples", axis=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 06fe407e2..8d48d2f07 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -1,7 +1,9 @@ import anndata -import numpy as np +import tarfile +import gzip +import scipy.io import os -import pandas +import pandas as pd from typing import Union from sfaira.data import DatasetBaseGroupLoadingManyFiles @@ -31,8 +33,8 @@ def __init__( self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_" \ f"10.1016/j.cmet.2019.01.021" - self.download = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117770" - self.download_meta = "private" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117770/suppl/GSE117770_RAW.tar" + self.download_url_meta = f"private,{self.sample_fn}_annotation.csv" self.author = "Bhushan" self.doi = "10.1016/j.cmet.2019.01.021" @@ -48,13 +50,22 @@ def __init__( self.obs_key_cellontology_original = "celltypes" - def _load(self, fn=None): - path_base = os.path.join(self.path, "mouse", "pancreas") - celltypes = pandas.read_csv(os.path.join(path_base, self.sample_fn + "_annotation.csv"), index_col=0) - - self.adata = anndata.read_mtx(os.path.join(path_base, self.sample_fn + "_matrix.mtx.gz")).transpose() - self.adata.var_names = np.genfromtxt(os.path.join(path_base, self.sample_fn + "_genes.tsv.gz"), dtype=str)[:, 1] - self.adata.obs_names = np.genfromtxt(os.path.join(path_base, self.sample_fn + "_barcodes.tsv.gz"), dtype=str) + def _load(self): + with tarfile.open(os.path.join(self.doi_path, 'GSE117770_RAW.tar')) as tar: + for member in tar.getmembers(): + if "_matrix.mtx.gz" in member.name and self.sample_fn in member.name: + name = "_".join(member.name.split("_")[:-1]) + with gzip.open(tar.extractfile(member), "rb") as mm: + x = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, + sep="\t", index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(name + "_genes.tsv.gz"), compression="gzip", header=None, + sep="\t") + var.columns = ["ensembl", "names"] + var.index = var["ensembl"].values + self.adata = anndata.AnnData(X=x, obs=obs, var=var) self.adata.var_names_make_unique() + celltypes = pd.read_csv(os.path.join(self.doi_path, self.sample_fn + "_annotation.csv"), index_col=0) self.adata = self.adata[celltypes.index] self.adata.obs["celltypes"] = celltypes diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py index 2bcadaa7b..e45740617 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" - self.download = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_url_meta = None self.author = "Spence" self.doi = "10.1016/j.devcel.2020.01.033" @@ -66,9 +66,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", "miller20.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "miller20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py index 6487341e8..499ec0128 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -18,8 +18,9 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" - self.download = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" - self.download_meta = None + + self.download_url_data = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" + self.download_url_meta = None self.author = "Regev" self.doi = "10.1038/nmeth.4407" @@ -55,9 +56,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "brain", "habib17.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "habib17.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py index deb1b9fbb..4117cd103 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_testis_2018_10x_guo_001_10.1038/s41422-018-0099-2" - self.download = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" + self.download_url_meta = None self.author = "Cairns" self.doi = "10.1038/s41422-018-0099-2" @@ -52,9 +52,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "malegonad", "guo18_donor.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "guo18_donor.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py index 75c3c24b1..3667a0ebc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" - self.download = "private" - self.download_meta = "private" + self.download_url_data = "private,GSE115469.csv.gz" + self.download_url_meta = "private,GSE115469_labels.txt" self.author = "McGilvray" self.doi = "10.1038/s41467-018-06318-7" @@ -60,12 +60,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE115469.csv.gz"), - os.path.join(self.path, "human", "liver", "GSE115469_labels.txt") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE115469.csv.gz"), + os.path.join(self.doi_path, "GSE115469_labels.txt") + ] self.adata = anndata.read_csv(fn[0]).T celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") self.adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py index 2e27a91c5..2a414198d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py @@ -18,10 +18,10 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ - "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" - self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/" \ - "GSE121nnn/GSE121862/suppl/GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ + "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ + "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" self.author = "Jain" self.doi = "10.1038/s41467-019-10861-2" @@ -29,7 +29,7 @@ def __init__( self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.protocol = "10xSn" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 @@ -69,12 +69,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.path, "human", "kidney", "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.doi_path, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") + ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) annot = pd.read_csv(fn[1], index_col=0, dtype="category") self.adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in self.adata.obs.index] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index d92829246..6e749196d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -40,8 +40,11 @@ def __init__( super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_10.1038/s41467-019-12464-3" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" - self.download_meta = "private" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" + self.download_url_meta = [ + "private,donor1.annotation.txt", + "private,donor2.annotation.txt" + ] self.author = "Sims" self.doi = "10.1038/s41467-019-12464-3" @@ -62,13 +65,13 @@ def __init__( "0": {}, } - def _load(self, fn=None): - fn_tar = os.path.join(self.path, "human", "mixed", "GSE126030_RAW.tar"), - fn_annot = [ - os.path.join(self.path, "human", "mixed", "donor1.annotation.txt"), - os.path.join(self.path, "human", "mixed", "donor2.annotation.txt") + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE126030_RAW.tar"), + os.path.join(self.doi_path, "donor1.annotation.txt"), + os.path.join(self.doi_path, "donor2.annotation.txt") ] - with tarfile.open(fn_tar) as tar: + with tarfile.open(fn[0]) as tar: df = pd.read_csv(tar.extractfile(self.sample_fn), compression="gzip", sep="\t") df.index = [i.split(".")[0] for i in df["Accession"]] var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) @@ -76,28 +79,28 @@ def _load(self, fn=None): df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) self.adata = anndata.AnnData(df.T) self.adata.var = var - if "PP001" in fn or "PP002" in fn: + if "PP001" in self.sample_fn or "PP002" in self.sample_fn: self.adata.obs["donor"] = "Donor1" self.adata.obs["organ"] = "lung" - elif "PP003" in fn or "PP004" in fn: + elif "PP003" in self.sample_fn or "PP004" in self.sample_fn: self.adata.obs["donor"] = "Donor1" self.adata.obs["organ"] = "bone marrow" - elif "PP005" in fn or "PP006" in fn: + elif "PP005" in self.sample_fn or "PP006" in self.sample_fn: self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "lymph node" - elif "PP009" in fn or "PP010" in fn: + self.adata.obs["organ"] = "lymph Node" + elif "PP009" in self.sample_fn or "PP010" in self.sample_fn: self.adata.obs["donor"] = "Donor2" self.adata.obs["organ"] = "lung" - elif "PP011" in fn or "PP012" in fn: + elif "PP011" in self.sample_fn or "PP012" in self.sample_fn: self.adata.obs["donor"] = "Donor2" self.adata.obs["organ"] = "bone marrow" - elif "PP013" in fn or "PP014" in fn: + elif "PP013" in self.sample_fn or "PP014" in self.sample_fn: self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "lymph node" - self.adata.obs.index = fn.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index + self.adata.obs["organ"] = "lymph Node" + self.adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index self.adata.obs["cell_ontology_class"] = "Unknown" - df1 = pd.read_csv(fn_annot[0], sep="\t", index_col=0, header=None) - df2 = pd.read_csv(fn_annot[1], sep="\t", index_col=0, header=None) + df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) + df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) for i in df1.index: self.adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] for i in df2.index: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py index c82ee0ce2..de23161c6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -17,8 +17,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" - self.download = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" + self.download_url_meta = None self.author = "Hafler" self.doi = "10.1038/s41467-019-12780-8" @@ -48,7 +48,6 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "eye", "menon19.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "menon19.processed.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py index b8c06db99..d5615a621 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -26,8 +26,10 @@ def __init__( self.id = f"human_placenta_2018_{protocol}_ventotormo_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41586-018-0698-6" - self.download = f"https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/{self.sample_fn}.1.zip" - self.download_meta = f"https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6701/{self.sample_fn}.2.zip" + self.download_url_data = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ + f"{self.sample_fn}.1.zip" + self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ + f"{self.sample_fn}.2.zip" self.author = "Teichmann" self.healthy = True @@ -81,11 +83,10 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", "placenta") + def _load(self): fn = [ - os.path.join(base_path, f"{self.sample_fn}.1.zip"), - os.path.join(base_path, f"{self.sample_fn}.2.zip"), + os.path.join(self.doi_path, f"{self.sample_fn}.1.zip"), + os.path.join(self.doi_path, f"{self.sample_fn}.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) df = pd.read_csv(fn[1], sep="\t") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py similarity index 82% rename from sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py index 9df9d2693..4459a8b12 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_mCELSeq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" - self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" self.author = "Gruen" self.doi = "10.1038/s41586-019-1373-2" @@ -27,7 +27,7 @@ def __init__( self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "mCEL-Seq2" + self.protocol = "CEL-seq2" self.state_exact = "healthy" self.year = 2019 @@ -79,12 +79,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "liver", "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.path, "human", "liver", "GSE124395_clusterpartition.txt.gz") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.doi_path, "GSE124395_clusterpartition.txt.gz") + ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) celltype_df = pd.read_csv(fn[1], sep=" ") self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py index 041fa36ac..984f0f41a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -42,8 +42,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" - self.download = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" - self.download_meta = None + self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" + self.download_url_meta = None self.author = "Henderson" self.doi = "10.1038/s41586-019-1631-3" @@ -77,7 +77,6 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "liver", "ramachandran.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "ramachandran.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py index 7cda429eb..504907193 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -17,8 +17,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" - self.download = "private" - self.download_meta = "private" + self.download_url_data = "private,fetal_liver_alladata_.h5ad" + self.download_url_meta = None self.author = "Haniffa" self.doi = "10.1038/s41586-019-1652-y" @@ -66,7 +66,6 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "liver", "fetal_liver_alladata_.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py index a54472e61..9f885b0bf 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -4,7 +4,6 @@ import pandas as pd import scipy.sparse from typing import Union -import urllib.request import zipfile from sfaira.data import DatasetBase @@ -24,8 +23,8 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.download = "https://ndownloader.figshare.com/files/17727365" - self.download_meta = [ + self.download_url_data = "https://ndownloader.figshare.com/files/17727365" + self.download_url_meta = [ "https://ndownloader.figshare.com/files/21758835", "https://ndownloader.figshare.com/files/22447898", ] @@ -46,29 +45,7 @@ def __init__( self.var_symbol_col = "index" - def _download(self): - # download required files from loaders cell landscape publication data: https://figshare.com/articles/HCL_DGE_Data/7235471 - print(urllib.request.urlretrieve( - "https://ndownloader.figshare.com/files/17727365", - os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad") - )) - print(urllib.request.urlretrieve( - "https://ndownloader.figshare.com/files/21758835", - os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx") - )) - - print(urllib.request.urlretrieve( - "https://ndownloader.figshare.com/files/22447898", - os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") - )) - # extract the downloaded zip archive - with zipfile.ZipFile( - os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip"), - "r" - ) as zip_ref: - zip_ref.extractall(os.path.join(self.path, self.directory_formatted_doi)) - - def _load_generalized(self, fn, sample_id: str): + def _load_generalized(self, sample_id: str): """ Attempt to find file, cache entire HCL if file was not found. @@ -112,13 +89,11 @@ def _load_generalized(self, fn, sample_id: str): df = pd.DataFrame( columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) - for f in os.listdir( - os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417") - ): - df1 = pd.read_csv( - os.path.join( - self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417", f - ), encoding="unicode_escape") + archive = zipfile.ZipFile( + os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + ) + for f in archive.namelist(): + df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") df = pd.concat([df, df1], sort=True) df = df.set_index("Cell_id") adata = adata[[i in df.index for i in adata.obs.index]].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py index 1ca7d67ed..f2ea60ba9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultAdipose_1") + def _load(self): + self._load_generalized(sample_id="AdultAdipose_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py index 3bbf998fe..30b10e000 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="NeonatalAdrenalGland_1") + def _load(self): + self._load_generalized(sample_id="NeonatalAdrenalGland_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py index f103be794..026663baa 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_2") + def _load(self): + self._load_generalized(sample_id="FetalAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py index 28fa28b71..24e4c0ff1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_3") + def _load(self): + self._load_generalized(sample_id="FetalAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py index 9250204e6..14946ce44 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_3") + def _load(self): + self._load_generalized(sample_id="AdultAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py index 6df0c6ba7..749d5f460 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalAdrenalGland_4") + def _load(self): + self._load_generalized(sample_id="FetalAdrenalGland_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py index 6bdad262b..f53816965 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultAdrenalGland_2") + def _load(self): + self._load_generalized(sample_id="AdultAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py index e8fef6576..835cbc675 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultArtery_1") + def _load(self): + self._load_generalized(sample_id="AdultArtery_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py index 0798a4801..16562bba3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultBladder_1") + def _load(self): + self._load_generalized(sample_id="AdultBladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py index ac0024c36..9c668e5c3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultBladder_2") + def _load(self): + self._load_generalized(sample_id="AdultBladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py index 6a275fd56..1072a4f5e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultGallbladder_2") + def _load(self): + self._load_generalized(sample_id="AdultGallbladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py index f42cdfd46..35ca6f204 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_3") + def _load(self): + self._load_generalized(sample_id="AdultPeripheralBlood_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py index f12385f4c..5ff3f876e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="PeripheralBlood_1") + def _load(self): + self._load_generalized(sample_id="PeripheralBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py index 2b5b470ee..b211b53c6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="CordBlood_2") + def _load(self): + self._load_generalized(sample_id="CordBlood_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py index ad3e7090c..5a4549db8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultPeripheralBlood_4") + def _load(self): + self._load_generalized(sample_id="AdultPeripheralBlood_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py index 9eb937bef..e196c17e5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="CordBloodCD34P_1") + def _load(self): + self._load_generalized(sample_id="CordBloodCD34P_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py index 015d311ee..7ee3358ff 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="CordBloodCD34P_2") + def _load(self): + self._load_generalized(sample_id="CordBloodCD34P_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py index a3ada5b3f..7fa17dbda 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="CordBlood_1") + def _load(self): + self._load_generalized(sample_id="CordBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py index c3b3bd450..be2880f7e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="BoneMarrow_1") + def _load(self): + self._load_generalized(sample_id="BoneMarrow_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py index 8debbac63..f7cf296ba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="BoneMarrow_2") + def _load(self): + self._load_generalized(sample_id="BoneMarrow_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py index 373b2c325..248957421 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalBrain_4") + def _load(self): + self._load_generalized(sample_id="FetalBrain_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py index f6b377c91..58b188adb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalBrain_5") + def _load(self): + self._load_generalized(sample_id="FetalBrain_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py index 78487bf6f..c3a7c5daa 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalBrain_3") + def _load(self): + self._load_generalized(sample_id="FetalBrain_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py index bd4c0cfde..44828ba45 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultTemporalLobe_1") + def _load(self): + self._load_generalized(sample_id="AdultTemporalLobe_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py index ea462000c..36b91969e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalBrain_6") + def _load(self): + self._load_generalized(sample_id="FetalBrain_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py index 749d7d71f..68bbe1688 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultCerebellum_1") + def _load(self): + self._load_generalized(sample_id="AdultCerebellum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py index 8524c25e4..0e831f3a6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalCalvaria_1") + def _load(self): + self._load_generalized(sample_id="FetalCalvaria_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py index 521bb924e..540856e10 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultCervix_1") + def _load(self): + self._load_generalized(sample_id="AdultCervix_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py index 560e297aa..f0d147e57 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="ChorionicVillus_1") + def _load(self): + self._load_generalized(sample_id="ChorionicVillus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py index 01ce9f3ee..4da28b14d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py @@ -46,5 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultAscendingColon_1") + def _load(self): + self._load_generalized(sample_id="AdultAscendingColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py index 1082dfa60..a2dc18aaa 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py @@ -46,5 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultColon_1") + def _load(self): + self._load_generalized(sample_id="AdultColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py index 099d79147..310d25720 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py @@ -46,5 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultTransverseColon_2") + def _load(self): + self._load_generalized(sample_id="AdultTransverseColon_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py index 30da95dfe..cbbd3bdef 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py @@ -46,5 +46,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultSigmoidColon_1") + def _load(self): + self._load_generalized(sample_id="AdultSigmoidColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py index 122bd7bf8..b77ca89cb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultDuodenum_1") + def _load(self): + self._load_generalized(sample_id="AdultDuodenum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py index c50ae3fac..928c8546c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py @@ -13,10 +13,10 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "epityphlon" + self.organ = "caecum" self.class_maps = { "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultEpityphlon_1") + def _load(self): + self._load_generalized(sample_id="AdultEpityphlon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py index 2f948e5a0..ec4aedccb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py @@ -43,5 +43,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultEsophagus_1") + def _load(self): + self._load_generalized(sample_id="AdultEsophagus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py index af43f661a..c71780471 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py @@ -43,5 +43,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultEsophagus_2") + def _load(self): + self._load_generalized(sample_id="AdultEsophagus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py index 10dde6d24..7b50b2b82 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py @@ -42,5 +42,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalEyes_1") + def _load(self): + self._load_generalized(sample_id="FetalEyes_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py index fec492e2e..abd15c99e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultFallopiantube_1") + def _load(self): + self._load_generalized(sample_id="AdultFallopiantube_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py index 7d00681f8..bdbde700e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_2") + def _load(self): + self._load_generalized(sample_id="FetalFemaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py index b64a40fb6..89e8c73e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalFemaleGonad_1") + def _load(self): + self._load_generalized(sample_id="FetalFemaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py index 26886e888..e4eb1de58 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultGallbladder_1") + def _load(self): + self._load_generalized(sample_id="AdultGallbladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py index 4b25db497..13f46e501 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalHeart_2") + def _load(self): + self._load_generalized(sample_id="FetalHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py index 839528da7..35ad6721f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultHeart_2") + def _load(self): + self._load_generalized(sample_id="AdultHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py index a4cc0fdfb..948c6f94d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultHeart_1") + def _load(self): + self._load_generalized(sample_id="AdultHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py index 7439b9fec..2b853e0de 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalHeart_1") + def _load(self): + self._load_generalized(sample_id="FetalHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py index 625583aa7..57c4d33ed 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py @@ -15,5 +15,5 @@ def __init__( self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "hesc" - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="HESC_1") + def _load(self): + self._load_generalized(sample_id="HESC_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py index 66d4209fc..2b34d00d5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py @@ -45,5 +45,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultIleum_2") + def _load(self): + self._load_generalized(sample_id="AdultIleum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py index 9db8f62a8..0b0918710 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultJejunum_2") + def _load(self): + self._load_generalized(sample_id="AdultJejunum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py index 77b2e117e..356e7bd2e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultKidney_2") + def _load(self): + self._load_generalized(sample_id="AdultKidney_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py index 42b21e827..4e581306f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultKidney_3") + def _load(self): + self._load_generalized(sample_id="AdultKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py index 2811fe77f..179ca418d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultKidney_4") + def _load(self): + self._load_generalized(sample_id="AdultKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py index 07cad1336..e2aa9d48b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalKidney_3") + def _load(self): + self._load_generalized(sample_id="FetalKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py index 751e9470f..d9ab0ed8e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalKidney_4") + def _load(self): + self._load_generalized(sample_id="FetalKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py index a1b0a195e..c45c8c6e6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalKidney_5") + def _load(self): + self._load_generalized(sample_id="FetalKidney_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py index 9793a4b2e..a8048080b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py @@ -68,5 +68,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalKidney_6") + def _load(self): + self._load_generalized(sample_id="FetalKidney_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py index 1318b3dba..0a0fc8d22 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLiver_1") + def _load(self): + self._load_generalized(sample_id="AdultLiver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py index 59b07abf1..0046eea36 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLiver_2") + def _load(self): + self._load_generalized(sample_id="AdultLiver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py index 1701446f1..137f41e39 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLiver_4") + def _load(self): + self._load_generalized(sample_id="AdultLiver_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py index 119f16030..f6b2063d6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="Liver_1") + def _load(self): + self._load_generalized(sample_id="Liver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py index 94fd323db..610ef167e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="Liver_2") + def _load(self): + self._load_generalized(sample_id="Liver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py index 2487b789a..c481cc965 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py @@ -69,5 +69,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalLung_1") + def _load(self): + self._load_generalized(sample_id="FetalLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py index d1d6a73c4..53ca247f2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py @@ -69,5 +69,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLung_3") + def _load(self): + self._load_generalized(sample_id="AdultLung_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py index 9dc30ed7e..7253b7022 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py @@ -69,5 +69,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLung_2") + def _load(self): + self._load_generalized(sample_id="AdultLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py index 6a85c3db0..ad9252b4d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py @@ -69,5 +69,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultLung_1") + def _load(self): + self._load_generalized(sample_id="AdultLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py index 0083c8e5b..2bbde67cb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py @@ -69,5 +69,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalLung_2") + def _load(self): + self._load_generalized(sample_id="FetalLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py index 322a7d9e1..82dc9ce21 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalMaleGonad_1") + def _load(self): + self._load_generalized(sample_id="FetalMaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py index 4067551fb..fbfc7385b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py @@ -47,5 +47,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalMaleGonad_2") + def _load(self): + self._load_generalized(sample_id="FetalMaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py index 28f9e252d..9fca53de1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalMuscle_1") + def _load(self): + self._load_generalized(sample_id="FetalMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py index 3e4f75b71..cf1e0aa69 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultMuscle_1") + def _load(self): + self._load_generalized(sample_id="AdultMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py index 82e0aa456..9bb9dc54d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultOmentum_2") + def _load(self): + self._load_generalized(sample_id="AdultOmentum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py index c903a8e78..3f0392086 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultOmentum_3") + def _load(self): + self._load_generalized(sample_id="AdultOmentum_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py index 37652c8ca..ebdd68863 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultOmentum_1") + def _load(self): + self._load_generalized(sample_id="AdultOmentum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py index a5b273cbd..c32932cdd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py @@ -57,5 +57,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultPancreas_1") + def _load(self): + self._load_generalized(sample_id="AdultPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py index c8eb43976..8a47851fa 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py @@ -57,5 +57,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalPancreas_1") + def _load(self): + self._load_generalized(sample_id="FetalPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py index aa5ba9d05..698a47c1e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py @@ -57,5 +57,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalPancreas_2") + def _load(self): + self._load_generalized(sample_id="FetalPancreas_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py index 02662bd9f..8d51a6d78 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py @@ -57,5 +57,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalPancreas_3") + def _load(self): + self._load_generalized(sample_id="FetalPancreas_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py index 537345671..d753b3923 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py @@ -50,5 +50,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="Placenta_1") + def _load(self): + self._load_generalized(sample_id="Placenta_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py index efcd5c949..51816a2af 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultPleura_1") + def _load(self): + self._load_generalized(sample_id="AdultPleura_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py index 1a6bef219..5e8efb0ca 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py @@ -13,7 +13,7 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "prostate" + self.organ = "prostate gland" self.class_maps = { "0": { "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", @@ -39,5 +39,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultProstate_1") + def _load(self): + self._load_generalized(sample_id="AdultProstate_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py index 25af7abb2..88c466404 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py @@ -34,5 +34,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultRectum_1") + def _load(self): + self._load_generalized(sample_id="AdultRectum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py index 1df96a84c..c65d99fa4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalRib_2") + def _load(self): + self._load_generalized(sample_id="FetalRib_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py index d37bf1bca..dde7f741c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalRib_3") + def _load(self): + self._load_generalized(sample_id="FetalRib_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py index 2cf2cdd88..f42150090 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py @@ -48,5 +48,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalSkin_2") + def _load(self): + self._load_generalized(sample_id="FetalSkin_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py index 913b88fc4..ee3816042 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py @@ -48,5 +48,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalSkin_3") + def _load(self): + self._load_generalized(sample_id="FetalSkin_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py index 935cad23b..7311407cd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalSpinalCord_1") + def _load(self): + self._load_generalized(sample_id="FetalSpinalCord_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py index f5dad107a..59703acd1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py @@ -40,5 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultSpleenParenchyma_1") + def _load(self): + self._load_generalized(sample_id="AdultSpleenParenchyma_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py index fd40ef79a..1f0aa4b37 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py @@ -40,5 +40,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultSpleen_1") + def _load(self): + self._load_generalized(sample_id="AdultSpleen_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py index 997a1795d..efd42e009 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultStomach_1") + def _load(self): + self._load_generalized(sample_id="AdultStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py index c32a24ee2..f0d1fd859 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalStomach_1") + def _load(self): + self._load_generalized(sample_id="FetalStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py index d93ed24d7..838e3ae81 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalIntestine_1") + def _load(self): + self._load_generalized(sample_id="FetalIntestine_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py index 9707559a3..e0f2155d5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalIntestine_3") + def _load(self): + self._load_generalized(sample_id="FetalIntestine_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py index 5319b9ce6..645153043 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalStomach_2") + def _load(self): + self._load_generalized(sample_id="FetalStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py index 76b06f3dd..b8eb86df2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalIntestine_2") + def _load(self): + self._load_generalized(sample_id="FetalIntestine_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py index 9de6d6c5a..533e6f808 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalIntestine_5") + def _load(self): + self._load_generalized(sample_id="FetalIntestine_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py index 115dd11ea..9368f66ea 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultStomach_3") + def _load(self): + self._load_generalized(sample_id="AdultStomach_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py index d26759d0d..7930c3791 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultStomach_2") + def _load(self): + self._load_generalized(sample_id="AdultStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py index 58741fd2b..76e3154fd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalIntestine_4") + def _load(self): + self._load_generalized(sample_id="FetalIntestine_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py index 9ec801179..915751810 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py @@ -32,5 +32,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalThymus_2") + def _load(self): + self._load_generalized(sample_id="FetalThymus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py index 9dd3bf713..f89e0d1a7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py @@ -32,5 +32,5 @@ def __init__( }, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="FetalThymus_1") + def _load(self): + self._load_generalized(sample_id="FetalThymus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py index 74b94bff4..d8fd72f71 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultThyroid_2") + def _load(self): + self._load_generalized(sample_id="AdultThyroid_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py index 2e932292b..042ec8262 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultThyroid_1") + def _load(self): + self._load_generalized(sample_id="AdultThyroid_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py index 368c39418..774ae4876 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultTrachea_2") + def _load(self): + self._load_generalized(sample_id="AdultTrachea_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py index 0ddad9999..e8fdbc5bd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultUreter_1") + def _load(self): + self._load_generalized(sample_id="AdultUreter_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py index 82efa82fb..be40e1071 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py @@ -18,5 +18,5 @@ def __init__( "0": {}, } - def _load(self, fn=None): - self._load_generalized(fn=fn, sample_id="AdultUterus_1") + def _load(self): + self._load_generalized(sample_id="AdultUterus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index e75dc8091..bcd843a4b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -44,8 +44,13 @@ def __init__( self.id = f"human_lung_2020_{protocol}_travaglini_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41586-020-2922-4" - self.download = "https://www.synapse.org/#!Synapse:syn21041850" - self.download_meta = None + synapse_id = { + "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625095", + "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625142" + } + + self.download_url_data = f"{synapse_id[self.sample_fn]},{self.sample_fn}" + self.download_url_meta = None self.author = "Krasnow" self.doi = "10.1038/s41586-020-2922-4" @@ -193,9 +198,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "lung", self.sample_fn) + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) if self.sample_fn.split("_")[0] == "droplet": norm_const = 1000000 else: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py index 2bd3c8b6c..d6a1b4dfd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" - self.download = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" + self.download_url_meta = None self.author = "Teichmann" self.doi = "10.1038/s41590-020-0602-z" @@ -67,9 +67,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "colon", "james20.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "james20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py similarity index 94% rename from sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py index f90020c16..9becde666 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py @@ -25,7 +25,8 @@ def __init__( self.id = f"human_lung_2019_10x_braga_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41591-019-0468-5" - self.download = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_meta = None self.author = "Teichmann" self.doi = "10.1038/s41591-019-0468-5" @@ -88,9 +89,8 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", "placenta") - fn = os.path.join(base_path, self.sample_fn) + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 248094396..36ac1cba3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" - self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" self.author = "Teichmann" self.doi = "10.1038/s41591-019-0468-5" @@ -53,11 +53,10 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", "placenta") + def _load(self): fn = [ - os.path.join(base_path, "GSE130148_raw_counts.csv.gz"), - os.path.join(base_path, "GSE130148_barcodes_cell_types.txt.gz"), + os.path.join(self.doi_path, "GSE130148_raw_counts.csv.gz"), + os.path.join(self.doi_path, "GSE130148_barcodes_cell_types.txt.gz"), ] self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py index e0a831c7e..d293c180b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -2,6 +2,8 @@ import numpy as np import os import pandas +import zipfile +import scipy.io from typing import Union from sfaira.data import DatasetBase @@ -18,10 +20,10 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" - self.download = \ - "www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" - self.download_meta = \ - "www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" + self.download_url_data = \ + "https://www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" + self.download_url_meta = \ + "https://www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" self.author = "Movahedi" self.doi = "10.1038/s41593-019-0393-4" @@ -48,29 +50,32 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "matrix.mtx") - fn_barcodes = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "barcodes.tsv") - fn_var = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "genes.tsv") - fn_meta = os.path.join(self.path, "mouse", "temp_mouse_brain_atlas", "annot_fullAggr.csv") + def _load(self): + fn = [ + os.path.join(self.doi_path, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), + os.path.join(self.doi_path, "annot_fullAggr.csv") + ] - self.adata = anndata.read_mtx(fn) - self.adata = anndata.AnnData(self.adata.X.T) - var = pandas.read_csv(fn_var, sep="\t", header=None) - var.columns = ["ensembl", "name"] - obs_names = pandas.read_csv(fn_barcodes, sep="\t", header=None)[0].values + with zipfile.Zipfile(fn[0]) as archive: + x = scipy.io.mmread(archive.open('filtered_gene_bc_matrices_mex/mm10/matrix.mtx')).T.tocsr() + self.adata = anndata.AnnData(x) + var = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/genes.tsv'), sep="\t", header=None) + var.columns = ["ensembl", "name"] + obs_names = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/barcodes.tsv'), + sep="\t", + header=None + )[0].values assert len(obs_names) == self.adata.shape[0] assert var.shape[0] == self.adata.shape[1] - obs = pandas.read_csv(self.path + fn_meta) + obs = pandas.read_csv(fn[1]) # Match annotation to raw data. obs.index = obs["cell"].values - obs = obs.loc[[x in obs_names for x in obs.index], :] - idx_tokeep = np.where([x in obs.index for x in obs_names])[0] + obs = obs.loc[[i in obs_names for i in obs.index], :] + idx_tokeep = np.where([i in obs.index for i in obs_names])[0] self.adata = self.adata[idx_tokeep, :] obs_names = obs_names[idx_tokeep] - idx_map = np.array([obs.index.tolist().index(x) for x in obs_names]) + idx_map = np.array([obs.index.tolist().index(i) for i in obs_names]) self.adata = self.adata[idx_map, :] obs_names = obs_names[idx_map] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py index d5939f778..47ab93b69 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -21,8 +21,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" - self.download = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" - self.download_meta = None + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" + self.download_url_meta = None self.author = "Mo" self.healthy = True @@ -41,9 +41,8 @@ def __init__( "0": {}, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "kidney", "GSE131685_RAW.tar") + def _load(self): + fn = os.path.join(self.doi_path, "GSE131685_RAW.tar") adatas = [] with tarfile.open(fn) as tar: for member in tar.getmembers(): diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py index ac457f8b9..86f2c6906 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" - self.download = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" + self.download_url_meta = None self.author = "Mullins" self.doi = "10.1073/pnas.1914143116" @@ -51,8 +51,7 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "eye", "voigt19.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "voigt19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py index 05858a633..d5bd563d8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py @@ -27,7 +27,8 @@ def __init__( organ = self.sample_fn.split("_")[1].split(".")[0] self.id = f"human_{organ}_2019_10x_wang_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1084/jem.20191130" - self.download = f"https://covid19.cog.sanger.ac.uk/wang20_{organ}.processed.h5ad" + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/wang20_{organ}.processed.h5ad" + self.download_url_meta = None self.author = "Chen" self.doi = "10.1084/jem.20191130" @@ -80,9 +81,8 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", self.organ) - fn = os.path.join(base_path, self.sample_fn) + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py index 7caf00e07..637d4c7c3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -26,7 +26,8 @@ def __init__( self.id = f"human_lung_2020_10x_lukassen_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1101/2020.03.13.991455" - self.download = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_meta = None self.author = "Eils" self.doi = "10.1101/2020.03.13.991455" @@ -76,9 +77,8 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", "lung") - fn = os.path.join(base_path, self.sample_fn) + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 640bc3ee2..51c421d00 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -84,7 +84,8 @@ def __init__( self.id = f"mouse_{''.join(organ.split(' '))}_2019_{protocol}_pisco_" \ f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1101/661728" - self.download = "https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" + self.download_url_data = f"https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/{sample_fn}" + self.download_url_meta = None self.obs_key_cellontology_original = "free_annotation" self.obs_key_age = "age" @@ -105,10 +106,8 @@ def __init__( self.var_ensembl_col = None self.var_symbol_col = "index" - def _load(self, fn): - base_path = os.path.join(self.path, "raw", self.directory_formatted_doi) - fn = os.path.join(base_path, self.sample_fn) - + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) self.adata = anndata.read_h5ad(fn) self.adata.X = self.adata.raw.X self.adata.var = self.adata.raw.var diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py index ad50957ba..6f541d5d4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -18,12 +18,12 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" - self.download = [ + self.download_url_data = [ "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fbarcodes%2Etsv%2Egz" ] - self.download_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" self.author = "Kropski" self.doi = "10.1101/753806" @@ -76,14 +76,13 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "lung", "GSE135893_matrix.mtx.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_genes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_barcodes.tsv.gz"), - os.path.join(self.path, "human", "lung", "GSE135893_IPF_metadata.csv.gz"), - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "GSE135893_matrix.mtx.gz"), + os.path.join(self.doi_path, "GSE135893_genes.tsv.gz"), + os.path.join(self.doi_path, "GSE135893_barcodes.tsv.gz"), + os.path.join(self.doi_path, "GSE135893_IPF_metadata.csv.gz"), + ] self.adata = anndata.read_mtx(fn[0]).T self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py index 6e8781ef6..efdad7127 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -18,11 +18,11 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" - self.download = [ + self.download_url_data = [ "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" ] - self.download_meta = None + self.download_url_meta = None self.author = "Clatworthy" self.doi = "10.1126/science.aat5031" @@ -114,12 +114,11 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = [ - os.path.join(self.path, "human", "kidney", "Mature_Full_v2.1.h5ad"), - os.path.join(self.path, "human", "kidney", "Fetal_full.h5ad") - ] + def _load(self): + fn = [ + os.path.join(self.doi_path, "Mature_Full_v2.1.h5ad"), + os.path.join(self.doi_path, "Fetal_full.h5ad") + ] adult = anndata.read(fn[0]) fetal = anndata.read(fn[1]) adult.obs["development"] = "adult" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py index 671c19742..f6f81511d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -18,8 +18,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" - self.download = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_url_meta = None self.author = "Teichmann" self.doi = "10.1126/science.aay3224" @@ -84,8 +84,7 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "thymus", "park20.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "park20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index 2f206b907..263a18f37 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -25,20 +25,23 @@ def __init__( super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" - self.id = f"human_{"".join(organ.split(" "))}_2019_10x_madissoon_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1186/s13059-019-1906-x" + self.id = f"human_{''.join(organ.split(' '))}_2019_10x_madissoon_" \ + f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1186/s13059-019-1906-x" if self.sample_fn == "madissoon19_lung.processed.h5ad": - "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" self.var_ensembl_col = "gene.ids.HCATisStab7509734" elif self.sample_fn == "oesophagus.cellxgene.h5ad": - self.download = "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" - # Associated HCA project: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.download_url_data = \ + "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" + # Associated DCP: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 self.var_ensembl_col = "gene_ids-HCATisStab7413619" else: - "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.download_url_data = \ + "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" self.var_ensembl_col = "gene_ids-HCATisStab7463846" + self.download_url_meta = None self.author = "Meyer" self.doi = "10.1186/s13059-019-1906-x" self.healthy = True @@ -140,9 +143,8 @@ def __init__( }, } - def _load(self, fn=None): - base_path = os.path.join(self.path, "human", self.organ) - fn = os.path.join(base_path, self.sample_fn) + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) self.adata = anndata.read(fn) if self.sample_fn == "oesophagus.cellxgene.h5ad" or self.sample_fn == "spleen.cellxgene.h5ad": self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py index 494ae8a4b..6ddcbe096 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -19,8 +19,8 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" - self.download = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" - self.download_meta = None + self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" + self.download_url_meta = None self.author = "Wong" self.doi = "10.15252/embj.2018100811" @@ -56,9 +56,8 @@ def __init__( }, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "eye", "lukowski19.processed.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "lukowski19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py deleted file mode 100644 index 6f5a1f1f4..000000000 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2018_10x_ica_001.py +++ /dev/null @@ -1,48 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2018_10x_ica_001_unknown" - - self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_meta = None - - self.author = "Regev" - self.doi = "no_doi" - self.healthy = True - self.normalization = "raw" - self.organ = "blood" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2018 - - self.var_symbol_col = "index" - self.var_ensembl_col = "Accession" - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "blood", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "umbilical cord blood").values, - (self.adata.obs["emptydrops_is_cell"] == "t").values) - self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py index 5e226c2f8..a52f4fbc6 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -29,8 +29,9 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2019_10x_10xGenomics_001_unknown" - self.download = "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" - self.download_meta = None + self.download_url_data = \ + "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" + self.download_url_meta = None self.author = "10x Genomics" self.doi = "no_doi" @@ -49,7 +50,6 @@ def __init__( "0": {}, } - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "blood", "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + def _load(self): + fn = os.path.join(self.doi_path, "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py deleted file mode 100644 index 9db2f1359..000000000 --- a/sfaira/data/dataloaders/loaders/d_nan/human_bone_2018_10x_ica_001.py +++ /dev/null @@ -1,48 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bone_2018_10x_ica_unknown" - - self.download = "https://data.humancellatlas.org/project-assets/project-matrices/cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" - self.download_meta = None - - self.author = "Regev" - self.doi = "no_doi" - self.healthy = True - self.normalization = "raw" - self.organ = "bone marrow" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2018 - - self.var_symbol_col = "index" - self.var_ensembl_col = "Accession" - - self.class_maps = { - "0": {}, - } - - def _load(self, fn=None): - if fn is None: - fn = os.path.join(self.path, "human", "bone", "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - idx = np.logical_and((self.adata.obs["derived_organ_parts_label"] == "bone marrow").values, - (self.adata.obs["emptydrops_is_cell"] == "t").values) - self.adata = self.adata[idx].copy() diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py new file mode 100644 index 000000000..f75a33303 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py @@ -0,0 +1,60 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingOneFile + +SAMPLE_IDS = [ + "umbilical cord blood", + "bone marrow" +] + + +class Dataset(DatasetBaseGroupLoadingOneFile): + + def __init__( + self, + sample_id: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__( + sample_id=sample_id, + path=path, + meta_path=meta_path, + cache_path=cache_path, + **kwargs + ) + + self.obs_key_sample = "derived_organ_parts_label" + self.id = f"human_{'blood' if sample_id == 'umbilical cord blood' else 'bone'}_2018_10x_ica_" \ + f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_unknown" + + self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/" \ + "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" + self.download_url_meta = None + + self.author = "Regev" + self.doi = "no_doi" + self.healthy = True + self.normalization = "raw" + self.organ = "blood" if sample_id == "umbilical cord blood" else "bone marrow" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + self.var_ensembl_col = "Accession" + + self.class_maps = { + "0": {}, + } + + def _load_full(self): + fn = os.path.join(self.doi_path, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + self.adata = anndata.read_loom(fn) + self.adata = self.adata[self.adata.obs["emptydrops_is_cell"] == "t"].copy() diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 77c1294ac..3f561bbc7 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -41,8 +41,8 @@ def __init__( self.author = "interactive_dataset" self.doi = "interactive_dataset" - self.download = "." - self.download_meta = "." + self.download_url_data = "." + self.download_url_meta = "." # self.age # not currently supported # self.dev_stage # not currently supported @@ -75,5 +75,5 @@ def __init__( self.adata = data - def _load(self, fn=None): + def _load(self): pass diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py index 896deaa18..65d162b48 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py @@ -1,5 +1,5 @@ from typing import Union - +import os from sfaira.data import DatasetBase @@ -14,7 +14,8 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # ToDo Add you meta data here. + # ToDo: Add you meta data here. - def _load(self, fn): - pass # ToDo: load file fn into self.adata. + def _load(self): + fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # ToDo: add code that loads to raw file into an AnnData object diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py index 896deaa18..65d162b48 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py @@ -1,5 +1,5 @@ from typing import Union - +import os from sfaira.data import DatasetBase @@ -14,7 +14,8 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # ToDo Add you meta data here. + # ToDo: Add you meta data here. - def _load(self, fn): - pass # ToDo: load file fn into self.adata. + def _load(self): + fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # ToDo: add code that loads to raw file into an AnnData object diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py index 26eba3e66..0652c60ae 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py @@ -1,4 +1,5 @@ from typing import Union +import os from sfaira.data import DatasetBaseGroupLoadingManyFiles @@ -20,9 +21,15 @@ def __init__( ): super().__init__( sample_fn=sample_fn, - path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + path=path, + meta_path=meta_path, + cache_path=cache_path, + **kwargs + ) + self.id = f"sth_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_doi" # ToDo: Index the Dataset ID by the file. # ToDo Add you meta data here. - def _load_any_object(self, fn=None): - pass # ToDo: load file fn into self.adata, using self.sample_fn, ie the current sample file. + def _load(self): + fn = os.path.join(self.doi_path, self.sample_fn) + # ToDo: load file fn into self.adata, self.sample_fn represents the current filename. diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py index 91052ce5d..7a3aa1320 100644 --- a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py +++ b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py @@ -1,7 +1,8 @@ import anndata from typing import Union +import os -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBaseGroupLoadingOneFile SAMPLE_IDS = [ # ToDo Add correct sample IDs here. "your_sample_id_1", @@ -9,23 +10,28 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBaseGroupLoadingOneFile): def __init__( self, - sample_fn: str, + sample_id: str, path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): super().__init__( - sample_fn=sample_fn, - path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + sample_id=sample_id, + path=path, + meta_path=meta_path, + cache_path=cache_path, + **kwargs + ) # ToDo Add you meta data here. self.obs_key_sample = 'Sample' # ToDo: Make sure to include this attribute which indicates the column in # self.adata in which you saved the sample IDs based on which the full adata object is subsetted. - def _load_full_group_object(self, fn=None) -> anndata.AnnData: - pass # ToDo: load full data object and return (no subsetting!) + def _load_full(self) -> anndata.AnnData: + fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # ToDo: load full data into AnnData object (no subsetting!) diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py index 896deaa18..65d162b48 100644 --- a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py +++ b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py @@ -1,5 +1,5 @@ from typing import Union - +import os from sfaira.data import DatasetBase @@ -14,7 +14,8 @@ def __init__( ): super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # ToDo Add you meta data here. + # ToDo: Add you meta data here. - def _load(self, fn): - pass # ToDo: load file fn into self.adata. + def _load(self): + fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # ToDo: add code that loads to raw file into an AnnData object From 94562c74c3065eac23b30eb018dc9a41daf956ec Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 15:52:42 +0100 Subject: [PATCH 044/161] Improved data path handling (#117) * fixed template unit test bugs * enabled template unit test to work on sfaira_extension data sets * depreceated optional data path in load functions in base (alread removed in data loaders) --- sfaira/data/base.py | 109 ++++--- .../anatomical_groups/human/human_adipose.py | 4 +- .../human/human_adrenalgland.py | 4 +- .../anatomical_groups/human/human_artery.py | 4 +- .../anatomical_groups/human/human_bladder.py | 4 +- .../anatomical_groups/human/human_blood.py | 4 +- .../anatomical_groups/human/human_bone.py | 4 +- .../anatomical_groups/human/human_brain.py | 4 +- .../anatomical_groups/human/human_calvaria.py | 4 +- .../anatomical_groups/human/human_cervix.py | 4 +- .../human/human_chorionicvillus.py | 4 +- .../anatomical_groups/human/human_colon.py | 4 +- .../anatomical_groups/human/human_duodenum.py | 4 +- .../human/human_epityphlon.py | 4 +- .../human/human_esophagus.py | 4 +- .../anatomical_groups/human/human_eye.py | 4 +- .../human/human_fallopiantube.py | 4 +- .../human/human_femalegonad.py | 4 +- .../human/human_gallbladder.py | 4 +- .../anatomical_groups/human/human_heart.py | 4 +- .../anatomical_groups/human/human_hesc.py | 4 +- .../anatomical_groups/human/human_ileum.py | 4 +- .../anatomical_groups/human/human_jejunum.py | 4 +- .../anatomical_groups/human/human_kidney.py | 4 +- .../anatomical_groups/human/human_liver.py | 4 +- .../anatomical_groups/human/human_lung.py | 4 +- .../human/human_malegonad.py | 4 +- .../anatomical_groups/human/human_muscle.py | 4 +- .../anatomical_groups/human/human_omentum.py | 4 +- .../anatomical_groups/human/human_pancreas.py | 4 +- .../anatomical_groups/human/human_placenta.py | 4 +- .../anatomical_groups/human/human_pleura.py | 4 +- .../anatomical_groups/human/human_prostate.py | 4 +- .../anatomical_groups/human/human_rectum.py | 4 +- .../anatomical_groups/human/human_rib.py | 4 +- .../anatomical_groups/human/human_skin.py | 4 +- .../human/human_spinalcord.py | 4 +- .../anatomical_groups/human/human_spleen.py | 4 +- .../anatomical_groups/human/human_stomach.py | 4 +- .../anatomical_groups/human/human_thymus.py | 4 +- .../anatomical_groups/human/human_thyroid.py | 4 +- .../anatomical_groups/human/human_trachea.py | 4 +- .../anatomical_groups/human/human_ureter.py | 4 +- .../anatomical_groups/human/human_uterus.py | 4 +- .../anatomical_groups/mouse/mouse_adipose.py | 4 +- .../anatomical_groups/mouse/mouse_bladder.py | 4 +- .../anatomical_groups/mouse/mouse_blood.py | 4 +- .../anatomical_groups/mouse/mouse_bone.py | 4 +- .../anatomical_groups/mouse/mouse_brain.py | 4 +- .../anatomical_groups/mouse/mouse_colon.py | 4 +- .../mouse/mouse_diaphragm.py | 4 +- .../mouse/mouse_femalegonad.py | 4 +- .../anatomical_groups/mouse/mouse_heart.py | 4 +- .../anatomical_groups/mouse/mouse_ileum.py | 4 +- .../anatomical_groups/mouse/mouse_kidney.py | 4 +- .../anatomical_groups/mouse/mouse_liver.py | 4 +- .../anatomical_groups/mouse/mouse_lung.py | 4 +- .../mouse/mouse_malegonad.py | 4 +- .../mouse/mouse_mammarygland.py | 4 +- .../anatomical_groups/mouse/mouse_muscle.py | 4 +- .../anatomical_groups/mouse/mouse_pancreas.py | 4 +- .../anatomical_groups/mouse/mouse_placenta.py | 4 +- .../anatomical_groups/mouse/mouse_prostate.py | 4 +- .../anatomical_groups/mouse/mouse_rib.py | 4 +- .../anatomical_groups/mouse/mouse_skin.py | 4 +- .../anatomical_groups/mouse/mouse_spleen.py | 4 +- .../anatomical_groups/mouse/mouse_stomach.py | 4 +- .../anatomical_groups/mouse/mouse_thymus.py | 4 +- .../anatomical_groups/mouse/mouse_tongue.py | 4 +- .../anatomical_groups/mouse/mouse_trachea.py | 4 +- .../anatomical_groups/mouse/mouse_uterus.py | 4 +- .../databases/cellxgene/cellxgene_group.py | 2 +- .../databases/cellxgene/cellxgene_loader.py | 8 +- sfaira/data/dataloaders/loaders/__init__.py | 1 + .../human_pancreas_2017_smartseq2_enge_001.py | 8 +- .../d10_1016_j_cell_2018_02_001/base.py | 6 +- .../mouse_bladder_2018_microwell_han_001.py | 4 +- .../mouse_blood_2018_microwell_han_001.py | 4 +- .../mouse_blood_2018_microwell_han_002.py | 4 +- .../mouse_blood_2018_microwell_han_003.py | 4 +- .../mouse_blood_2018_microwell_han_004.py | 4 +- .../mouse_blood_2018_microwell_han_005.py | 4 +- .../mouse_bone_2018_microwell_001.py | 4 +- .../mouse_brain_2018_microwell_han_001.py | 4 +- .../mouse_brain_2018_microwell_han_002.py | 4 +- ...ouse_femalegonad_2018_microwell_han_001.py | 4 +- ...ouse_femalegonad_2018_microwell_han_002.py | 4 +- .../mouse_ileum_2018_microwell_han_001.py | 4 +- .../mouse_ileum_2018_microwell_han_002.py | 4 +- .../mouse_ileum_2018_microwell_han_003.py | 4 +- .../mouse_kidney_2018_microwell_han_001.py | 4 +- .../mouse_kidney_2018_microwell_han_002.py | 4 +- .../mouse_liver_2018_microwell_han_001.py | 4 +- .../mouse_liver_2018_microwell_han_002.py | 4 +- .../mouse_lung_2018_microwell_han_001.py | 4 +- .../mouse_lung_2018_microwell_han_002.py | 4 +- .../mouse_lung_2018_microwell_han_003.py | 4 +- .../mouse_malegonad_2018_microwell_han_001.py | 4 +- .../mouse_malegonad_2018_microwell_han_002.py | 4 +- ...use_mammarygland_2018_microwell_han_001.py | 4 +- ...use_mammarygland_2018_microwell_han_002.py | 4 +- ...use_mammarygland_2018_microwell_han_003.py | 4 +- ...use_mammarygland_2018_microwell_han_004.py | 4 +- .../mouse_muscle_2018_microwell_han_001.py | 4 +- .../mouse_pancreas_2018_microwell_han_001.py | 4 +- .../mouse_placenta_2018_microwell_han_001.py | 4 +- .../mouse_placenta_2018_microwell_han_002.py | 4 +- .../mouse_prostate_2018_microwell_han_001.py | 4 +- .../mouse_prostate_2018_microwell_han_002.py | 4 +- .../mouse_rib_2018_microwell_han_001.py | 4 +- .../mouse_rib_2018_microwell_han_002.py | 4 +- .../mouse_rib_2018_microwell_han_003.py | 4 +- .../mouse_spleen_2018_microwell_han_001.py | 4 +- .../mouse_stomach_2018_microwell_han_001.py | 4 +- .../mouse_thymus_2018_microwell_han_001.py | 4 +- .../mouse_uterus_2018_microwell_han_001.py | 4 +- .../mouse_uterus_2018_microwell_han_002.py | 4 +- .../human_colon_2019_10x_kinchen_001.py | 10 +- .../human_colon_2019_10x_smilie_001.py | 6 +- .../human_ileum_2019_10x_martin_001.py | 6 +- .../human_prostate_2018_10x_henry_001.py | 6 +- .../human_pancreas_2016_indrop_baron_001.py | 6 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 8 +- .../mouse_pancreas_2019_10x_thompson_x.py | 8 +- .../human_lung_2020_10x_miller_001.py | 6 +- .../human_brain_2017_DroNcSeq_habib_001.py | 6 +- .../human_malegonad_2018_10x_guo_001.py | 6 +- .../human_liver_2018_10x_macparland_001.py | 8 +- .../human_kidney_2019_10xSn_lake_001.py | 8 +- .../human_mixed_2019_10x_szabo_001.py | 10 +- .../human_eye_2019_10x_menon_001.py | 6 +- .../human_placenta_2018_10x_ventotormo_001.py | 8 +- .../human_liver_2019_CELseq2_aizarani_001.py | 8 +- .../human_liver_2019_10x_ramachandran_001.py | 8 +- .../human_liver_2019_10x_popescu_001.py | 6 +- .../d10_1038_s41586_020_2157_4/base.py | 10 +- .../human_adipose_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_001.py | 4 +- ...man_adrenalgland_2020_microwell_han_002.py | 4 +- ...man_adrenalgland_2020_microwell_han_003.py | 4 +- ...man_adrenalgland_2020_microwell_han_004.py | 4 +- ...man_adrenalgland_2020_microwell_han_005.py | 4 +- ...man_adrenalgland_2020_microwell_han_006.py | 4 +- .../human_artery_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_001.py | 4 +- .../human_bladder_2020_microwell_han_002.py | 4 +- .../human_bladder_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_001.py | 4 +- .../human_blood_2020_microwell_han_002.py | 4 +- .../human_blood_2020_microwell_han_003.py | 4 +- .../human_blood_2020_microwell_han_004.py | 4 +- .../human_blood_2020_microwell_han_005.py | 4 +- .../human_blood_2020_microwell_han_006.py | 4 +- .../human_blood_2020_microwell_han_007.py | 4 +- .../human_bone_2020_microwell_han_001.py | 4 +- .../human_bone_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_001.py | 4 +- .../human_brain_2020_microwell_han_002.py | 4 +- .../human_brain_2020_microwell_han_003.py | 4 +- .../human_brain_2020_microwell_han_004.py | 4 +- .../human_brain_2020_microwell_han_005.py | 4 +- .../human_brain_2020_microwell_han_006.py | 4 +- .../human_calvaria_2020_microwell_han_001.py | 4 +- .../human_cervix_2020_microwell_han_001.py | 4 +- ..._chorionicvillus_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_001.py | 4 +- .../human_colon_2020_microwell_han_002.py | 4 +- .../human_colon_2020_microwell_han_003.py | 4 +- .../human_colon_2020_microwell_han_004.py | 4 +- .../human_duodenum_2020_microwell_han_001.py | 4 +- ...human_epityphlon_2020_microwell_han_001.py | 4 +- .../human_esophagus_2020_microwell_han_001.py | 4 +- .../human_esophagus_2020_microwell_han_002.py | 4 +- .../human_eye_2020_microwell_han_001.py | 4 +- ...an_fallopiantube_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_001.py | 4 +- ...uman_femalegonad_2020_microwell_han_002.py | 4 +- ...uman_gallbladder_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_001.py | 4 +- .../human_heart_2020_microwell_han_002.py | 4 +- .../human_heart_2020_microwell_han_003.py | 4 +- .../human_heart_2020_microwell_han_004.py | 4 +- .../human_hesc_2020_microwell_han_001.py | 4 +- .../human_ileum_2020_microwell_han_001.py | 4 +- .../human_jejunum_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_001.py | 4 +- .../human_kidney_2020_microwell_han_002.py | 4 +- .../human_kidney_2020_microwell_han_003.py | 4 +- .../human_kidney_2020_microwell_han_004.py | 4 +- .../human_kidney_2020_microwell_han_005.py | 4 +- .../human_kidney_2020_microwell_han_006.py | 4 +- .../human_kidney_2020_microwell_han_007.py | 4 +- .../human_liver_2020_microwell_han_001.py | 4 +- .../human_liver_2020_microwell_han_002.py | 4 +- .../human_liver_2020_microwell_han_003.py | 4 +- .../human_liver_2020_microwell_han_004.py | 4 +- .../human_liver_2020_microwell_han_005.py | 4 +- .../human_lung_2020_microwell_han_001.py | 4 +- .../human_lung_2020_microwell_han_002.py | 4 +- .../human_lung_2020_microwell_han_003.py | 4 +- .../human_lung_2020_microwell_han_004.py | 4 +- .../human_lung_2020_microwell_han_005.py | 4 +- .../human_malegonad_2020_microwell_han_001.py | 4 +- .../human_malegonad_2020_microwell_han_002.py | 4 +- .../human_muscle_2020_microwell_han_001.py | 4 +- .../human_muscle_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_001.py | 4 +- .../human_omentum_2020_microwell_han_002.py | 4 +- .../human_omentum_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_001.py | 4 +- .../human_pancreas_2020_microwell_han_002.py | 4 +- .../human_pancreas_2020_microwell_han_003.py | 4 +- .../human_pancreas_2020_microwell_han_004.py | 4 +- .../human_placenta_2020_microwell_han_001.py | 4 +- .../human_pleura_2020_microwell_han_001.py | 4 +- .../human_prostate_2020_microwell_han_001.py | 4 +- .../human_rectum_2020_microwell_han_001.py | 4 +- .../human_rib_2020_microwell_han_001.py | 4 +- .../human_rib_2020_microwell_han_002.py | 4 +- .../human_skin_2020_microwell_han_001.py | 4 +- .../human_skin_2020_microwell_han_002.py | 4 +- ...human_spinalcord_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_001.py | 4 +- .../human_spleen_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_001.py | 4 +- .../human_stomach_2020_microwell_han_002.py | 4 +- .../human_stomach_2020_microwell_han_003.py | 4 +- .../human_stomach_2020_microwell_han_004.py | 4 +- .../human_stomach_2020_microwell_han_005.py | 4 +- .../human_stomach_2020_microwell_han_006.py | 4 +- .../human_stomach_2020_microwell_han_007.py | 4 +- .../human_stomach_2020_microwell_han_008.py | 4 +- .../human_stomach_2020_microwell_han_009.py | 4 +- .../human_stomach_2020_microwell_han_010.py | 4 +- .../human_thymus_2020_microwell_han_001.py | 4 +- .../human_thymus_2020_microwell_han_002.py | 4 +- .../human_thyroid_2020_microwell_han_001.py | 4 +- .../human_thyroid_2020_microwell_han_002.py | 4 +- .../human_trachea_2020_microwell_han_001.py | 4 +- .../human_ureter_2020_microwell_han_001.py | 4 +- .../human_uterus_2020_microwell_han_001.py | 4 +- .../human_lung_2020_10x_travaglini_001.py | 8 +- .../human_colon_2020_10x_james_001.py | 6 +- .../human_lung_2019_10x_braga_x.py | 6 +- .../human_lung_2019_dropseq_braga_001.py | 8 +- ...mouse_brain_2019_mouse_brain_atlas_temp.py | 8 +- .../human_kidney_2020_10x_liao_001.py | 6 +- .../human_eye_2019_10x_voigt_001.py | 6 +- .../human_x_2019_10x_wang_001.py | 6 +- .../human_lung_2020_10x_lukassen_001.py | 6 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 6 +- .../human_lung_2020_10x_habermann_001.py | 12 +- .../human_kidney_2019_10x_stewart_001.py | 8 +- .../human_thymus_2020_10x_park_001.py | 6 +- .../human_x_2019_10x_madissoon_001.py | 6 +- .../human_eye_2019_10x_lukowski_001.py | 6 +- .../human_blood_2019_10x_10xGenomics_001.py | 8 +- .../loaders/d_nan/human_x_2018_10x_ica_001.py | 6 +- .../data/dataloaders/loaders/super_group.py | 6 +- sfaira/data/dataloaders/super_group.py | 10 +- sfaira/data/interactive/loader.py | 6 +- .../your_dataset_file_1.py | 7 +- .../your_dataset_file_2.py | 7 +- .../your_dataset_file.py | 7 +- .../your_dataset_file.py | 7 +- .../your_dataset_file_1.py | 7 +- .../utils_scripts/create_celltype_maps.py | 8 +- sfaira/data/utils_scripts/create_meta.py | 4 +- .../utils_scripts/create_meta_and_cache.py | 4 +- .../data/utils_scripts/write_backed_human.py | 2 +- .../data/utils_scripts/write_backed_mouse.py | 2 +- sfaira/interface/user_interface.py | 2 +- sfaira/unit_tests/test_data_template.py | 276 +++++++++--------- sfaira/unit_tests/test_dataset.py | 12 +- sfaira/versions/metadata/base.py | 7 +- 275 files changed, 818 insertions(+), 812 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 9d95f92bb..babcf49eb 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -57,10 +57,9 @@ class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict _meta: Union[None, pandas.DataFrame] - path: Union[None, str] + data_path: Union[None, str] meta_path: Union[None, str] cache_path: Union[None, str] - doi_path: Union[None, str] id: Union[None, str] genome: Union[None, str] @@ -107,7 +106,7 @@ class DatasetBase(abc.ABC): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs @@ -118,10 +117,9 @@ def __init__( self.adata = None self.meta = None self.genome = None - self.path = path + self.data_path = data_path self.meta_path = meta_path self.cache_path = cache_path - self.doi_path = None self._age = None self._author = None @@ -187,12 +185,11 @@ def clear(self): def download(self, **kwargs): assert self.download_url_data is not None, f"The `download_url_data` attribute of dataset {self.id} " \ f"is not set, cannot download dataset." - assert self.path is not None, f"No path was provided when instantiating the dataset container, " \ - f"cannot download datasets." + assert self.data_path is not None, "No path was provided when instantiating the dataset container, " \ + "cannot download datasets." - self.doi_path = os.path.join(self.path, "raw", self.directory_formatted_doi) - if not os.path.exists(self.doi_path): - os.makedirs(self.doi_path) + if not os.path.exists(self.data_dir): + os.makedirs(self.data_dir) urls = self.download_url_data[0][0] + self.download_url_meta[0][0] @@ -202,19 +199,19 @@ def download(self, **kwargs): if url.split(",")[0] == 'private': if "," in url: fn = ','.join(url.split(',')[1:]) - if os.path.isfile(os.path.join(self.doi_path, fn)): + if os.path.isfile(os.path.join(self.data_dir, fn)): print(f"File {fn} already found on disk, skipping download.") else: warnings.warn(f"Dataset {self.id} is not available for automatic download, please manually " f"copy the file {fn} to the following location: " - f"{self.doi_path}") + f"{self.data_dir}") else: warnings.warn(f"A file for dataset {self.id} is not available for automatic download, please" - f"manually copy the associated file to the following location: {self.doi_path}") + f"manually copy the associated file to the following location: {self.data_dir}") elif url.split(",")[0].startswith('syn'): fn = ",".join(url.split(",")[1:]) - if os.path.isfile(os.path.join(self.doi_path, fn)): + if os.path.isfile(os.path.join(self.data_dir, fn)): print(f"File {fn} already found on disk, skipping download.") else: self._download_synapse(url.split(",")[0], fn, **kwargs) @@ -233,11 +230,11 @@ def download(self, **kwargs): fn = cgi.parse_header(urllib.request.urlopen(url).info()['Content-Disposition'])[1]["filename"] else: fn = url.split("/")[-1] - if os.path.isfile(os.path.join(self.doi_path, fn)): + if os.path.isfile(os.path.join(self.data_dir, fn)): print(f"File {fn} already found on disk, skipping download.") else: print(f"Downloading: {fn}") - urllib.request.urlretrieve(url, os.path.join(self.doi_path, fn)) + urllib.request.urlretrieve(url, os.path.join(self.data_dir, fn)) def _download_synapse(self, synapse_entity, fn, **kwargs): try: @@ -264,7 +261,7 @@ def _download_synapse(self, synapse_entity, fn, **kwargs): syn = synapseclient.Synapse() syn.login(kwargs['synapse_user'], kwargs['synapse_pw']) dataset = syn.get(entity=synapse_entity) - shutil.move(dataset.path, os.path.join(self.doi_path, fn)) + shutil.move(dataset.data_path, os.path.join(self.data_dir, fn)) def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None) -> bool: """ @@ -278,7 +275,6 @@ def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = def _load_cached( self, - fn: str, load_raw: bool, allow_caching: bool, ): @@ -315,7 +311,7 @@ def _get_cache_fn(): ) return cache - def _cached_reading(fn, fn_cache): + def _cached_reading(fn_cache): if fn_cache is not None: if os.path.exists(fn_cache): self.adata = anndata.read_h5ad(fn_cache) @@ -341,15 +337,14 @@ def _cached_writing(fn_cache): self._load() elif not load_raw and allow_caching: fn_cache = _get_cache_fn() - _cached_reading(fn, fn_cache) + _cached_reading(fn_cache) _cached_writing(fn_cache) else: # not load_raw and not allow_caching fn_cache = _get_cache_fn() - _cached_reading(fn, fn_cache) + _cached_reading(fn_cache) def load( self, - fn: Union[str, None] = None, remove_gene_version: bool = True, match_to_reference: Union[str, None] = None, load_raw: bool = False, @@ -357,7 +352,6 @@ def load( ): """ - :param fn: Optional target file name, otherwise infers from defined directory structure. :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. :param match_to_reference: Reference genomes name. @@ -383,14 +377,11 @@ def load( self._set_genome(genome=genome) # Set path to dataset directory - if fn is None: - if self.doi_path is None: - raise ValueError("Neither sfaira data repo path nor custom dataset path provided.") - else: - self.doi_path = fn + if self.data_dir is None: + raise ValueError("No sfaira data repo path provided in constructor.") # Run data set-specific loading script: - self._load_cached(fn=fn, load_raw=load_raw, allow_caching=allow_caching) + self._load_cached(load_raw=load_raw, allow_caching=allow_caching) # Set data-specific meta data in .adata: self._set_metadata_in_adata() # Set loading hyper-parameter-specific meta data: @@ -632,7 +623,6 @@ def load_tobacked( adata_backed: anndata.AnnData, genome: str, idx: np.ndarray, - fn: Union[None, str] = None, load_raw: bool = False, allow_caching: bool = True ): @@ -647,13 +637,11 @@ def load_tobacked( :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a shuffled object. :param keys: - :param fn: :param load_raw: See .load(). :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ self.load( - fn=fn, remove_gene_version=True, match_to_reference=genome, load_raw=load_raw, @@ -848,7 +836,6 @@ def write_meta( self, fn_meta: Union[None, str] = None, dir_out: Union[None, str] = None, - fn_data: Union[None, str] = None, ): """ Write meta data object for data set. @@ -857,7 +844,6 @@ def write_meta( :param fn_meta: File to write to, selects automatically based on self.meta_path and self.id otherwise. :param dir_out: Path to write to, file name is selected automatically based on self.id. - :param fn_data: See .load() :return: """ if fn_meta is not None and dir_out is not None: @@ -875,7 +861,6 @@ def write_meta( if self.adata is None: self.load( - fn=fn_data, remove_gene_version=False, match_to_reference=None, load_raw=True, @@ -971,6 +956,16 @@ def author(self, x: str): self.__erasing_protection(attr="author", val_old=self._author, val_new=x) self._author = x + @property + def data_dir(self): + # Data is either directly in user supplied directory or in a sub directory if the overall directory is managed + # by sfaira: In this case, the sub directory is named after the doi of the data set. + sfaira_path = os.path.join(self.data_path, self.directory_formatted_doi) + if os.path.exists(sfaira_path): + return sfaira_path + else: + return self.data_path + @property def dev_stage(self) -> Union[None, str]: if self._dev_stage is not None: @@ -1556,12 +1551,12 @@ class DatasetBaseGroupLoadingOneFile(DatasetBase): def __init__( self, sample_id: str, - path: Union[str, None], + data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self._unprocessed_full_group_object = False self._sample_id = sample_id @@ -1633,12 +1628,12 @@ class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self._sample_fn = sample_fn @property @@ -1739,7 +1734,7 @@ def func(dataset, **kwargs_func): datasets_to_remove = [] for k, v in self.datasets.items(): print(f"loading {k}") - group_loading = v.set_raw_full_group_object(fn=None, adata_group=adata_group) + group_loading = v.set_raw_full_group_object(adata_group=adata_group) if adata_group is None and group_loading: # cache full adata object for subsequent Datasets adata_group = v.adata.copy() x = map_fn(tuple([v] + args)) @@ -1819,7 +1814,7 @@ def write_ontology_class_map( **kwargs )) if len(tab) == 0: - warnings.warn(f"attempted to write ontology classmaps for group without annotated data sets") + warnings.warn("attempted to write ontology classmaps for group without annotated data sets") else: tab = pandas.concat(tab, axis=0) # Take out columns with the same source: @@ -2031,7 +2026,7 @@ class DatasetGroupDirectoryOriented(DatasetGroup): def __init__( self, file_base: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, ): @@ -2042,7 +2037,7 @@ def __init__( here. :param file_base: - :param path: + :param data_path: :param meta_path: :param cache_path: """ @@ -2050,10 +2045,11 @@ def __init__( datasets = [] cwd = os.path.dirname(file_base) dataset_module = str(cwd.split("/")[-1]) + loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(cwd.split("/")[-5]) == "sfaira" else \ + "sfaira_extension.data.dataloaders.loaders." if "group.py" in os.listdir(cwd): - DatasetGroupFound = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + ".group.DatasetGroup") - dsg = DatasetGroupFound(path=path, meta_path=meta_path, cache_path=cache_path) + DatasetGroupFound = pydoc.locate(loader_pydoc_path + dataset_module + ".group.DatasetGroup") + dsg = DatasetGroupFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path) datasets.extend(list(dsg.datasets.values)) else: for f in os.listdir(cwd): @@ -2062,24 +2058,20 @@ def __init__( if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: datasets_f = [] file_module = ".".join(f.split(".")[:-1]) - DatasetFound = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".Dataset") + DatasetFound = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".Dataset") # Check if global objects are available: # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile - sample_fns = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".SAMPLE_FNS") - sample_ids = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".SAMPLE_IDS") + sample_fns = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + + ".SAMPLE_FNS") + sample_ids = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + + ".SAMPLE_IDS") if sample_fns is not None and sample_ids is None: # DatasetBaseGroupLoadingManyFiles: datasets_f.extend([ DatasetFound( sample_fn=x, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, ) @@ -2090,7 +2082,7 @@ def __init__( datasets_f.extend([ DatasetFound( sample_id=x, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, ) @@ -2099,7 +2091,8 @@ def __init__( elif sample_fns is not None and sample_ids is not None: raise ValueError(f"sample_fns and sample_ids both found for {f}") else: - datasets_f.append(DatasetFound(path=path, meta_path=meta_path, cache_path=cache_path)) + datasets_f.append( + DatasetFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path)) # Load cell type maps: for x in datasets_f: x.load_ontology_class_map(fn=os.path.join(cwd, file_module + ".csv")) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py index e0b4f2aaa..7dee16978 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -8,11 +8,11 @@ class DatasetGroupAdipose(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py index be0505ea0..0f259eaa9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -8,11 +8,11 @@ class DatasetGroupAdrenalgland(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py index 8fa2ed83f..ee0f8128b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -8,11 +8,11 @@ class DatasetGroupArtery(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py index 00b4ef431..fd83fc20e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -8,11 +8,11 @@ class DatasetGroupBladder(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py index 66c887cd4..88571e2c9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -8,11 +8,11 @@ class DatasetGroupBlood(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_blood_2018_10x_ica_001", "human_blood_2019_10x_10xGenomics_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py index c172fe8f5..3b0b34c4a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -8,11 +8,11 @@ class DatasetGroupBone(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_bone_2018_10x_ica_001", "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py index 8e5b5d38e..91bc43110 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -8,11 +8,11 @@ class DatasetGroupBrain(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_brain_2017_DroNcSeq_habib_001", "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py index 057987d06..a5f4507bb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -8,11 +8,11 @@ class DatasetGroupCalvaria(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py index 6bc3ca986..100583c1f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -8,11 +8,11 @@ class DatasetGroupCervix(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py index b5b0221a0..ec0c91901 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -8,11 +8,11 @@ class DatasetGroupChorionicvillus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py index 2de1cc5fa..94d8cda78 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -8,11 +8,11 @@ class DatasetGroupColon(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_colon_2019_10x_kinchen_001", "human_colon_2019_10x_smilie_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py index 8e232a69c..55314e37b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -8,11 +8,11 @@ class DatasetGroupDuodenum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py index 56f86c06f..a379e4c9c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -8,11 +8,11 @@ class DatasetGroupEpityphlon(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py index 9d6679f27..fdcd7987d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -8,11 +8,11 @@ class DatasetGroupEsophagus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_esophagus_2019_10x_madissoon_001", "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py index a86666ada..d28ff3061 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -8,11 +8,11 @@ class DatasetGroupEye(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_eye_2019_10x_lukowski_001", "human_eye_2019_10x_menon_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py index 1f819c846..eb25058c5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -8,11 +8,11 @@ class DatasetGroupFallopiantube(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py index f10e5d03c..dc75d9f4d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -8,11 +8,11 @@ class DatasetGroupFemalegonad(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py index d8a90fe34..a7e5ccb2d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -8,11 +8,11 @@ class DatasetGroupGallbladder(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py index 2f15fba64..56a81bed8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -8,11 +8,11 @@ class DatasetGroupHeart(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py index 16546f8c5..dd3189b0a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -8,11 +8,11 @@ class DatasetGroupHesc(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py index 8ecdbddbb..5a39d70ea 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -8,11 +8,11 @@ class DatasetGroupIleum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_ileum_2019_10x_martin_001", "human_ileum_2019_10x_wang_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py index f238e23b5..e0dc41cbd 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -8,11 +8,11 @@ class DatasetGroupJejunum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py index 72a9b5d86..7041b1361 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -8,11 +8,11 @@ class DatasetGroupKidney(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_kidney_2019_10xSn_lake_001", "human_kidney_2019_10x_stewart_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py index 1a525c7db..1d5b40fac 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -8,11 +8,11 @@ class DatasetGroupLiver(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_liver_2018_10x_macparland_001", "human_liver_2019_10x_popescu_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py index cea664c0d..8643327fe 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -8,11 +8,11 @@ class DatasetGroupLung(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_lung_2019_10x_braga_001", "human_lung_2019_10x_braga_002", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py index ba33bb371..4b666f4aa 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -8,11 +8,11 @@ class DatasetGroupMalegonad(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_malegonad_2018_10x_guo_001", "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py index 697538b2b..1119b8fda 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -8,11 +8,11 @@ class DatasetGroupMuscle(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py index ece41406d..739712ca9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -8,11 +8,11 @@ class DatasetGroupOmentum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py index f654095f2..b5c5e135c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -8,11 +8,11 @@ class DatasetGroupPancreas(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_pancreas_2016_indrop_baron_001", "human_pancreas_2016_smartseq2_segerstolpe_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py index 1ea950c50..b9437b352 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -8,11 +8,11 @@ class DatasetGroupPlacenta(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_placenta_2018_smartseq2_ventotormo_001", "human_placenta_2018_10x_ventotormo_002", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py index 534a531d5..04f7da782 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -8,11 +8,11 @@ class DatasetGroupPleura(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py index ec73a1c9e..2517d62e7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -8,11 +8,11 @@ class DatasetGroupProstate(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_prostate_2018_10x_henry_001", "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py index a4d769c9d..d0da3c8b9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -8,11 +8,11 @@ class DatasetGroupRectum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_rectum_2019_10x_wang_001", "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py index 9af9c34ac..18af63f1b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -8,11 +8,11 @@ class DatasetGroupRib(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py index 8d8ba3171..f38682fc9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -8,11 +8,11 @@ class DatasetGroupSkin(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py index c13172e37..1789c37e7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -8,11 +8,11 @@ class DatasetGroupSpinalcord(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py index 68d4b4da5..04e4005ed 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -8,11 +8,11 @@ class DatasetGroupSpleen(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_spleen_2019_10x_madissoon_001", "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py index f6f2f3a90..7135862de 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -8,11 +8,11 @@ class DatasetGroupStomach(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py index 661c08da4..f8ea9a05f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -8,11 +8,11 @@ class DatasetGroupThymus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_thymus_2020_10x_park_001", "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py index 245f28f99..d85b6a021 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -8,11 +8,11 @@ class DatasetGroupThyroid(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py index 88a93df17..cf60b31f4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -8,11 +8,11 @@ class DatasetGroupTrachea(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py index 665fbc401..f95e77a98 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -8,11 +8,11 @@ class DatasetGroupUreter(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py index cbc85303c..7abeea47b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -8,11 +8,11 @@ class DatasetGroupUterus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py index 59407309b..42356fc4a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -8,11 +8,11 @@ class DatasetGroupAdipose(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_adipose_2019_10x_pisco_001_10.1101/661728", "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py index 9b0a85f76..575964baa 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -8,11 +8,11 @@ class DatasetGroupBladder(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_bladder_2019_10x_pisco_001_10.1101/661728", "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py index 0a2901342..e49858759 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -8,11 +8,11 @@ class DatasetGroupBlood (DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py index b9f1ac781..aad754e29 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -8,11 +8,11 @@ class DatasetGroupBone(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_bone_2019_10x_pisco_001_10.1101/661728", "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py index 7309eb1d9..d4a094bdc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -8,11 +8,11 @@ class DatasetGroupBrain(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728", "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py index 1b579aea7..93b7c2963 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -8,11 +8,11 @@ class DatasetGroupColon(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_colon_2019_10x_pisco_001_10.1101/661728", "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py index 4ec085dbf..c8ab2a149 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -8,11 +8,11 @@ class DatasetGroupDiaphragm(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py index dbe2e7bb8..4dbadb8b0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -8,11 +8,11 @@ class DatasetGroupFemalegonad(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py index c8fdc754f..b13e76723 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -8,11 +8,11 @@ class DatasetGroupHeart(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_heart_2019_10x_pisco_001_10.1101/661728", "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py index 91bfa7a5e..d41430f05 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -8,11 +8,11 @@ class DatasetGroupIleum(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py index 4c2008342..bb63a1b17 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -8,11 +8,11 @@ class DatasetGroupKidney(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_kidney_2019_10x_pisco_001_10.1101/661728", "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py index c9ea7d277..5dc85bd40 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -8,11 +8,11 @@ class DatasetGroupLiver(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_liver_2019_10x_pisco_001_10.1101/661728", "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py index 41f9a46ad..29d064e7e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -8,11 +8,11 @@ class DatasetGroupLung(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_lung_2019_10x_pisco_001_10.1101/661728", "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py index 1da24a2b7..4d0e07b3c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -8,11 +8,11 @@ class DatasetGroupMalegonad(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py index 403b42c2f..c952ab20c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -8,11 +8,11 @@ class DatasetGroupMammaryGland(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728", "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py index 27121dcf2..073ec9f04 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -8,11 +8,11 @@ class DatasetGroupMuscle(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_muscle_2019_10x_pisco_001_10.1101/661728", "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py index ae25a6ccd..db927c906 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -8,11 +8,11 @@ class DatasetGroupPancreas(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_pancreas_2019_10x_pisco_001_10.1101/661728", "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py index ad4c12122..4515cdb98 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -8,11 +8,11 @@ class DatasetGroupPlacenta(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py index 0b1b91004..a932a1b5c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -8,11 +8,11 @@ class DatasetGroupProstate(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py index 285c33aca..c847d4474 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -8,11 +8,11 @@ class DatasetGroupRib(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py index 37ee74c8d..6c9f7a56d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -8,11 +8,11 @@ class DatasetGroupSkin(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_skin_2019_10x_pisco_001_10.1101/661728", "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py index 5b265e5e8..31e7619df 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -8,11 +8,11 @@ class DatasetGroupSpleen(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_spleen_2019_10x_pisco_001_10.1101/661728", "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py index 183ed1c68..4a1c988d8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -8,11 +8,11 @@ class DatasetGroupStomach(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py index 98044c2c2..1c052942c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -8,11 +8,11 @@ class DatasetGroupThymus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_thymus_2019_10x_pisco_001_10.1101/661728", "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py index fc54163f4..96e087e4f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -8,11 +8,11 @@ class DatasetGroupTongue(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_tongue_2019_10x_pisco_001_10.1101/661728", "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py index cbd9c4e8e..d7f9a812c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -8,11 +8,11 @@ class DatasetGroupTrachea(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_trachea_2019_10x_pisco_001_10.1101/661728", "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py index d170301d5..c67a0893a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -8,11 +8,11 @@ class DatasetGroupUterus(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(path=path, meta_path=meta_path, cache_path=cache_path) + dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py index 41328d40c..068bd4c0b 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -19,7 +19,7 @@ def __init__( fn_ls = os.listdir(path) fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] datasets = [ - Dataset(path=path, fn=x, meta_path=meta_path, cache_path=cache_path) + Dataset(data_path=path, fn=x, meta_path=meta_path, cache_path=cache_path) for x in fn_ls ] keys = [x.id for x in datasets] diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index 5265bf761..b93d8fbc4 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -9,19 +9,19 @@ class Dataset(DatasetBase): """ This is a dataloader for downloaded h5ad from cellxgene. - :param path: + :param data_path: :param meta_path: :param kwargs: """ def __init__( self, - path: Union[str, None], + data_path: Union[str, None], fn: str, meta_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, **kwargs) self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() self.fn = fn @@ -52,7 +52,7 @@ def _load(self): :return: """ - fn = os.path.join(self.path, self.fn) + fn = os.path.join(self.data_path, self.fn) adata = anndata.read(fn) adata.X = adata.raw.X # TODO delete raw? diff --git a/sfaira/data/dataloaders/loaders/__init__.py b/sfaira/data/dataloaders/loaders/__init__.py index cf0bdc722..542f4322f 100644 --- a/sfaira/data/dataloaders/loaders/__init__.py +++ b/sfaira/data/dataloaders/loaders/__init__.py @@ -1 +1,2 @@ from .super_group import DatasetSuperGroupLoaders +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index 0c455ca07..cb6794ff8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -15,12 +15,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" @@ -54,8 +54,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE81547_RAW.tar"), - os.path.join(self.doi_path, "GSE81547_series_matrix.txt.gz") + os.path.join(self.data_dir, "GSE81547_RAW.tar"), + os.path.join(self.data_dir, "GSE81547_series_matrix.txt.gz") ] dfs = [] with tarfile.open(fn[0]) as tar: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py index b05f4b670..ef0c0b7c5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py @@ -15,12 +15,12 @@ class Dataset_d10_1016_j_cell_2018_02_001(DatasetBase): def __init__( self, - path: Union[str, None], + data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.download_url_meta = None @@ -40,7 +40,7 @@ def __init__( self.var_symbol_col = "index" def _load_generalized(self, samplename): - fn = os.path.join(self.doi_path, '5435866.zip') + fn = os.path.join(self.data_dir, '5435866.zip') with zipfile.ZipFile(fn) as archive: celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py index 447e4d9a8..a7e5316ad 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "bladder organ" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py index 6c75c8abe..5b454a528 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py index b5db9ab9d..210f6484c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py index 6af3d7d50..6ee887fd7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py index 1a751a682..554a13555 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py index 7d719f861..a1be6338c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py index 78bd0c28d..0445e8a16 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "bone tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py index 50d165781..08ef1acde 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "brain" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py index 118636b13..a2536bc13 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "brain" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py index 36f5186de..84774208a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "ovary" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py index 4b6fb475b..5e9742d70 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "ovary" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py index 45c66cd04..57d5f7d18 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "ileum" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py index ea2187180..e09ac957f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "ileum" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py index fdbf281f4..a8f724263 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.organ = "ileum" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py index c4405ac4b..90890161e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "kidney" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py index eb3c94f50..c65a14f74 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "kidney" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py index f43f66f63..bb995046e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "liver" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py index 77d52004c..c70c2e993 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "liver" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py index 8fbf204a0..b98fe2101 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "lung" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py index 70491637a..99f979473 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "lung" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py index 451d13f92..3d5161d4f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.organ = "lung" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py index 7813b9eb6..82ef7068f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "testis" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py index de97e10fb..71e82424b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "testis" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py index f7663789c..ba02424bb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "mammary gland" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py index 8ce2a1d9b..e500e6c76 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "mammary gland" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py index e85e955e1..51afe8cf4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.organ = "mammary gland" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py index 45bab81ff..a47d16a19 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" self.organ = "mammary gland" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py index 7dd8b82e7..f44375d58 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "skeletal muscle organ" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py index e485c8aa8..454f555b2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "pancreas" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py index 948eadbbb..dbec73b0b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "placenta" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py index ddf097bfd..1e379530a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "placenta" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py index d3804d531..652f5dcdc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "prostate" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py index 092da9594..ee81ccb81 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "prostate" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py index 5abd5e54a..6fe195e19 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "rib" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py index 0ea19de0b..9f40e597a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "rib" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py index 0c5fbc008..4e2964633 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" self.organ = "rib" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py index c33512c7f..e3330d6ff 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "spleen" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py index dcf8f06ed..9f412743a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "stomach" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py index 4a51ac334..62eb1158e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "thymus" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py index fcfca4c3c..43710000b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" self.organ = "uterus" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py index 82465d647..9c6654a9c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py @@ -7,12 +7,12 @@ class Dataset(Dataset_d10_1016_j_cell_2018_02_001): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" self.organ = "uterus" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index 5ef304272..f2a1fe8ae 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" @@ -61,9 +61,9 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.doi_path, "uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.doi_path, "hc_meta_data_stromal_with_donor.txt") + os.path.join(self.data_dir, "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), + os.path.join(self.data_dir, "uc_meta_data_stromal_with_donor.txt"), + os.path.join(self.data_dir, "hc_meta_data_stromal_with_donor.txt") ] adata = anndata.read_loom(fn[0]) ctuc = pd.read_csv(fn[1], sep="\t") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py index 1c3a4e0ad..fe69f193f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" @@ -80,7 +80,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "smillie19_epi.processed.h5ad") + fn = os.path.join(self.data_dir, "smillie19_epi.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py index 6b73a9549..9d63f81cc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" @@ -61,7 +61,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "martin19.processed.h5ad") + fn = os.path.join(self.data_dir, "martin19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py index 58b441b85..40b5377bd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" @@ -50,7 +50,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "henry18_0.processed.h5ad") + fn = os.path.join(self.data_dir, "henry18_0.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 4dc1de175..c0cdaf49f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" self.download_url_data = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" @@ -56,7 +56,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "baron16.processed.h5ad") + fn = os.path.join(self.data_dir, "baron16.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index 2ca2712b7..bedcd2681 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" self.download_url_data = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" @@ -58,8 +58,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "E-MTAB-5061.processed.1.zip"), - os.path.join(self.doi_path, "E-MTAB-5061.sdrf.txt") + os.path.join(self.data_dir, "E-MTAB-5061.processed.1.zip"), + os.path.join(self.data_dir, "E-MTAB-5061.sdrf.txt") ] df = pd.read_csv(fn[0], sep="\t") df.index = df.index.get_level_values(0) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 8d48d2f07..5a882cd97 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -24,12 +24,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_" \ f"10.1016/j.cmet.2019.01.021" @@ -51,7 +51,7 @@ def __init__( self.obs_key_cellontology_original = "celltypes" def _load(self): - with tarfile.open(os.path.join(self.doi_path, 'GSE117770_RAW.tar')) as tar: + with tarfile.open(os.path.join(self.data_dir, 'GSE117770_RAW.tar')) as tar: for member in tar.getmembers(): if "_matrix.mtx.gz" in member.name and self.sample_fn in member.name: name = "_".join(member.name.split("_")[:-1]) @@ -66,6 +66,6 @@ def _load(self): var.index = var["ensembl"].values self.adata = anndata.AnnData(X=x, obs=obs, var=var) self.adata.var_names_make_unique() - celltypes = pd.read_csv(os.path.join(self.doi_path, self.sample_fn + "_annotation.csv"), index_col=0) + celltypes = pd.read_csv(os.path.join(self.data_dir, self.sample_fn + "_annotation.csv"), index_col=0) self.adata = self.adata[celltypes.index] self.adata.obs["celltypes"] = celltypes diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py index e45740617..ecfed8c7b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" @@ -67,7 +67,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "miller20.processed.h5ad") + fn = os.path.join(self.data_dir, "miller20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py index 499ec0128..79aaa7162 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" self.download_url_data = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" @@ -57,7 +57,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "habib17.processed.h5ad") + fn = os.path.join(self.data_dir, "habib17.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py index 4117cd103..327975757 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_testis_2018_10x_guo_001_10.1038/s41422-018-0099-2" self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" @@ -53,7 +53,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "guo18_donor.processed.h5ad") + fn = os.path.join(self.data_dir, "guo18_donor.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py index 3667a0ebc..3397212af 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" self.download_url_data = "private,GSE115469.csv.gz" @@ -62,8 +62,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE115469.csv.gz"), - os.path.join(self.doi_path, "GSE115469_labels.txt") + os.path.join(self.data_dir, "GSE115469.csv.gz"), + os.path.join(self.data_dir, "GSE115469_labels.txt") ] self.adata = anndata.read_csv(fn[0]).T celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py index 2a414198d..d008fa602 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ @@ -71,8 +71,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.doi_path, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") + os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) annot = pd.read_csv(fn[1], index_col=0, dtype="category") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 6e749196d..974eb109f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -32,12 +32,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_10.1038/s41467-019-12464-3" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" @@ -67,9 +67,9 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE126030_RAW.tar"), - os.path.join(self.doi_path, "donor1.annotation.txt"), - os.path.join(self.doi_path, "donor2.annotation.txt") + os.path.join(self.data_dir, "GSE126030_RAW.tar"), + os.path.join(self.data_dir, "donor1.annotation.txt"), + os.path.join(self.data_dir, "donor2.annotation.txt") ] with tarfile.open(fn[0]) as tar: df = pd.read_csv(tar.extractfile(self.sample_fn), compression="gzip", sep="\t") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py index de23161c6..af55b0e8b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -9,12 +9,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" @@ -49,5 +49,5 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "menon19.processed.h5ad") + fn = os.path.join(self.data_dir, "menon19.processed.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py index d5615a621..c98b8add7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -16,12 +16,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) protocol = "10x" if self.sample_fn == "E-MTAB-6678.processed" else "smartseq2" self.id = f"human_placenta_2018_{protocol}_ventotormo_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41586-018-0698-6" @@ -85,8 +85,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, f"{self.sample_fn}.1.zip"), - os.path.join(self.doi_path, f"{self.sample_fn}.2.zip"), + os.path.join(self.data_dir, f"{self.sample_fn}.1.zip"), + os.path.join(self.data_dir, f"{self.sample_fn}.2.zip"), ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) df = pd.read_csv(fn[1], sep="\t") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py index 4459a8b12..fe30a04eb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" @@ -81,8 +81,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.doi_path, "GSE124395_clusterpartition.txt.gz") + os.path.join(self.data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.data_dir, "GSE124395_clusterpartition.txt.gz") ] self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) celltype_df = pd.read_csv(fn[1], sep=" ") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py index 984f0f41a..d77b04481 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -27,19 +27,19 @@ class Dataset(DatasetBase): ## Notebook cell 3 sce.write("ramachandran.h5ad") - :param path: + :param data_path: :param meta_path: :param kwargs: """ def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" @@ -78,5 +78,5 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "ramachandran.h5ad") + fn = os.path.join(self.data_dir, "ramachandran.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py index 504907193..23ed85d8a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -9,12 +9,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" self.download_url_data = "private,fetal_liver_alladata_.h5ad" @@ -67,5 +67,5 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "fetal_liver_alladata_.h5ad") + fn = os.path.join(self.data_dir, "fetal_liver_alladata_.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py index 9f885b0bf..6fd3f2c14 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -16,12 +16,12 @@ class Dataset_d10_1038_s41586_020_2157_4(DatasetBase): def __init__( self, - path: Union[str, None], + data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = "https://ndownloader.figshare.com/files/17727365" self.download_url_meta = [ @@ -52,7 +52,7 @@ def _load_generalized(self, sample_id: str): :param fn: :return: """ - adata = anndata.read(os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) + adata = anndata.read(os.path.join(self.data_path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix adata.X = scipy.sparse.csr_matrix(adata.X).copy() @@ -69,7 +69,7 @@ def _load_generalized(self, sample_id: str): # load celltype labels and harmonise them # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: fig1_anno = pd.read_excel( - os.path.join(self.path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + os.path.join(self.data_path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), index_col="cellnames", engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables ) @@ -90,7 +90,7 @@ def _load_generalized(self, sample_id: str): columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) archive = zipfile.ZipFile( - os.path.join(self.path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + os.path.join(self.data_path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") ) for f in archive.namelist(): df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py index f2ea60ba9..ecaacf71f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "adipose" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py index 30b10e000..eb2a01b0f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py index 026663baa..5bf3a2f33 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py index 24e4c0ff1..5d35adeb7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py index 14946ce44..326e842a2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py index 749d5f460..00ffdf149 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py index f53816965..117678910 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" self.organ = "adrenalgland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py index 835cbc675..3d1ce0372 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "artery" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py index 16562bba3..8803259d1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "bladder organ" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py index 9c668e5c3..a4cd2c58c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "bladder organ" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py index 1072a4f5e..4585003c0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "bladder" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py index 35ca6f204..d41ceca66 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py index 5ff3f876e..751296492 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py index b211b53c6..3c59dbaad 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py index 5a4549db8..09fde31c4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py index e196c17e5..c21029981 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py index 7ee3358ff..497ce9180 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py index 7fa17dbda..bab12f9e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" self.organ = "blood" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py index be2880f7e..8e456f0e9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "bone tissue" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py index f7cf296ba..663b35876 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "bone tissue" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py index 248957421..8456273fd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py index 58b188adb..660b5ccc1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py index c3a7c5daa..10906fffc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py index 44828ba45..819dcfe4d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py index 36b91969e..93434bc1d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py index 68bbe1688..8b0d2f332 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" self.organ = "brain" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py index 0e831f3a6..6af2e2bb9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "calvaria" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py index 540856e10..eb768c2e0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "cervix" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py index f0d147e57..7ff919329 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "chorionicvillus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py index 4da28b14d..6dc78ba4e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Colon" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py index a2dc18aaa..38f5f1fc7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Colon" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py index 310d25720..c9fa5a835 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "Colon" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py index cbbd3bdef..b842d1193 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "Colon" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py index b77ca89cb..3eb457805 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "duodenum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py index 928c8546c..e8dbd84e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "caecum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py index ec4aedccb..081d03871 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Esophagus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py index c71780471..15c19836d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Esophagus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py index 7b50b2b82..ff60c338f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Eye" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py index abd15c99e..086ed6bf6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "fallopiantube" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py index bdbde700e..33ddc21bd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ovary_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "ovary" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py index 89e8c73e7..d0fe905e0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ovary_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "ovary" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py index e4eb1de58..883df3cd3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "gall bladder" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py index 13f46e501..5cfda3ef5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "heart" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py index 35ad6721f..6833761f4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "heart" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py index 948c6f94d..83ccaff7b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "heart" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py index 2b853e0de..992c673d6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "heart" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py index 57c4d33ed..d5dfe4cc8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "hesc" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py index 2b34d00d5..2c033b035 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "ileum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py index 0b0918710..fc9079ee0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "jejunum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py index 356e7bd2e..792f9f1f2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py index 4e581306f..9f27fe35d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py index 179ca418d..5f9f4aecb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py index e2aa9d48b..6954a5982 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py index d9ab0ed8e..73bef5db0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py index c45c8c6e6..e49320f26 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py index a8048080b..01b687783 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" self.organ = "Kidney" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py index 0a0fc8d22..9124fe50e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Liver" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py index 0046eea36..15b0e0686 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Liver" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py index 137f41e39..134ae0d1c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "Liver" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py index f6b2063d6..03cd609b4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "Liver" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py index 610ef167e..76ac33488 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "Liver" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py index c481cc965..8a8582238 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "lung" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py index 53ca247f2..d6307ba1b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "lung" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py index 7253b7022..d2bd1a332 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "lung" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py index ad9252b4d..27cb8d221 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "lung" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py index 2bbde67cb..7e916aece 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "lung" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py index 82dc9ce21..624a88655 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_testis_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "testis" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py index fbfc7385b..54531fcef 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_testis_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "testis" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py index 9fca53de1..37a2bb38c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "skeletal muscle organ" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py index cf1e0aa69..ff5b184c5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "skeletal muscle organ" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py index 9bb9dc54d..c57392c8e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "omentum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py index 3f0392086..4301ebc86 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "omentum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py index ebdd68863..1c162a52a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "omentum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py index c32932cdd..57f9b1020 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Pancreas" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py index 8a47851fa..816c8a8fa 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Pancreas" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py index 698a47c1e..0b9e723ba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "Pancreas" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py index 8d51a6d78..1f4ec1aef 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "Pancreas" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py index d753b3923..0c410bc5c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Placenta" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py index 51816a2af..7a39f5c02 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "pleura" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py index 5e8efb0ca..8d6e90def 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "prostate gland" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py index 88c466404..c5581b7f0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "rectum" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py index c65d99fa4..227f78b4b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "rib" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py index dde7f741c..1496d4560 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "rib" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py index f42150090..b61f225d2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "skin of body" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py index ee3816042..ed1ec23dd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "skin of body" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py index 7311407cd..53def3331 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "spinalcord" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py index 59703acd1..28c801822 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "Spleen" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py index 1f0aa4b37..ac2bcf79f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "Spleen" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py index efd42e009..53383af3e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py index f0d1fd859..e749f6ac1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py index 838e3ae81..1f00af568 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py index e0f2155d5..4deb7c5c3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py index 645153043..9021a3e3b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py index b8eb86df2..61fb96624 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py index 533e6f808..bef7c2130 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py index 9368f66ea..898570087 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py index 7930c3791..e6d0c4af3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py index 76e3154fd..31bf383f9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" self.organ = "stomach" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py index 915751810..6e9831bd2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "thymus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py index f89e0d1a7..2fa27ddab 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "thymus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py index d8fd72f71..db4fdff1c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "thyroid" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py index 042ec8262..fb89798bb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" self.organ = "thyroid" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py index 774ae4876..ab6db836a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "trachea" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py index e8fdbc5bd..55b21278f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "ureter" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py index be40e1071..b8b251eb0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py @@ -6,12 +6,12 @@ class Dataset(Dataset_d10_1038_s41586_020_2157_4): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" self.organ = "uterus" self.class_maps = { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index bcd843a4b..f397d69d1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -26,7 +26,7 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): syn21625095 = syn.get(entity="syn21625095") shutil.move(syn21625095.path, "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") - :param path: + :param data_path: :param meta_path: :param kwargs: """ @@ -34,12 +34,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) protocol = "10x" if self.sample_fn.split("_")[0] == "droplet" else "smartseq2" self.id = f"human_lung_2020_{protocol}_travaglini_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41586-020-2922-4" @@ -199,7 +199,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) if self.sample_fn.split("_")[0] == "droplet": norm_const = 1000000 else: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py index d6a1b4dfd..027c31bb5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" @@ -68,7 +68,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "james20.processed.h5ad") + fn = os.path.join(self.data_dir, "james20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py index 9becde666..dec0e4104 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py @@ -16,12 +16,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = f"human_lung_2019_10x_braga_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1038/s41591-019-0468-5" @@ -90,7 +90,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 36ac1cba3..488738311 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" @@ -55,8 +55,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE130148_raw_counts.csv.gz"), - os.path.join(self.doi_path, "GSE130148_barcodes_cell_types.txt.gz"), + os.path.join(self.data_dir, "GSE130148_raw_counts.csv.gz"), + os.path.join(self.data_dir, "GSE130148_barcodes_cell_types.txt.gz"), ] self.adata = anndata.read_csv(fn[0]).T self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py index d293c180b..e6160a50c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py @@ -12,12 +12,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" self.download_url_data = \ @@ -52,8 +52,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), - os.path.join(self.doi_path, "annot_fullAggr.csv") + os.path.join(self.data_dir, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), + os.path.join(self.data_dir, "annot_fullAggr.csv") ] with zipfile.Zipfile(fn[0]) as archive: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py index 47ab93b69..2075c294d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -13,12 +13,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" @@ -42,7 +42,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "GSE131685_RAW.tar") + fn = os.path.join(self.data_dir, "GSE131685_RAW.tar") adatas = [] with tarfile.open(fn) as tar: for member in tar.getmembers(): diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py index 86f2c6906..82afc496c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" @@ -52,6 +52,6 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "voigt19.processed.h5ad") + fn = os.path.join(self.data_dir, "voigt19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py index d5bd563d8..1249f6dba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py @@ -18,12 +18,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) organ = self.sample_fn.split("_")[1].split(".")[0] self.id = f"human_{organ}_2019_10x_wang_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1084/jem.20191130" @@ -82,7 +82,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py index 637d4c7c3..048cd06be 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -17,12 +17,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = f"human_lung_2020_10x_lukassen_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ f"10.1101/2020.03.13.991455" @@ -78,7 +78,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 51c421d00..49db8f35a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -52,12 +52,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) protocol = "10x" if sample_fn.split("-")[3] == "droplet" else "smartseq2" organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ @@ -107,7 +107,7 @@ def __init__( self.var_symbol_col = "index" def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read_h5ad(fn) self.adata.X = self.adata.raw.X self.adata.var = self.adata.raw.var diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py index 6f541d5d4..ac228acf8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" self.download_url_data = [ @@ -78,10 +78,10 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "GSE135893_matrix.mtx.gz"), - os.path.join(self.doi_path, "GSE135893_genes.tsv.gz"), - os.path.join(self.doi_path, "GSE135893_barcodes.tsv.gz"), - os.path.join(self.doi_path, "GSE135893_IPF_metadata.csv.gz"), + os.path.join(self.data_dir, "GSE135893_matrix.mtx.gz"), + os.path.join(self.data_dir, "GSE135893_genes.tsv.gz"), + os.path.join(self.data_dir, "GSE135893_barcodes.tsv.gz"), + os.path.join(self.data_dir, "GSE135893_IPF_metadata.csv.gz"), ] self.adata = anndata.read_mtx(fn[0]).T self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py index efdad7127..a5ed37668 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" self.download_url_data = [ @@ -116,8 +116,8 @@ def __init__( def _load(self): fn = [ - os.path.join(self.doi_path, "Mature_Full_v2.1.h5ad"), - os.path.join(self.doi_path, "Fetal_full.h5ad") + os.path.join(self.data_dir, "Mature_Full_v2.1.h5ad"), + os.path.join(self.data_dir, "Fetal_full.h5ad") ] adult = anndata.read(fn[0]) fetal = anndata.read(fn[1]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py index f6f81511d..565199f07 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -10,12 +10,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" @@ -85,6 +85,6 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "park20.processed.h5ad") + fn = os.path.join(self.data_dir, "park20.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index 263a18f37..3f738ff11 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -17,12 +17,12 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" self.id = f"human_{''.join(organ.split(' '))}_2019_10x_madissoon_" \ @@ -144,7 +144,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) if self.sample_fn == "oesophagus.cellxgene.h5ad" or self.sample_fn == "spleen.cellxgene.h5ad": self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py index 6ddcbe096..c43d8cc05 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -11,12 +11,12 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" @@ -57,7 +57,7 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "lukowski19.processed.h5ad") + fn = os.path.join(self.data_dir, "lukowski19.processed.h5ad") self.adata = anndata.read(fn) self.adata.X = np.expm1(self.adata.X) self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py index a52f4fbc6..4c3c6c323 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -14,19 +14,19 @@ class Dataset(DatasetBase): import scanpy scanpy.read_10x_h5("pbmc_10k_v3_filtered_feature_bc_matrix.h5").write("pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") - :param path: + :param data_path: :param meta_path: :param kwargs: """ def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "human_blood_2019_10x_10xGenomics_001_unknown" self.download_url_data = \ @@ -51,5 +51,5 @@ def __init__( } def _load(self): - fn = os.path.join(self.doi_path, "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") + fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py index f75a33303..f504fbf73 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py @@ -15,7 +15,7 @@ class Dataset(DatasetBaseGroupLoadingOneFile): def __init__( self, sample_id: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs @@ -23,7 +23,7 @@ def __init__( super().__init__( sample_id=sample_id, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs @@ -55,6 +55,6 @@ def __init__( } def _load_full(self): - fn = os.path.join(self.doi_path, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + fn = os.path.join(self.data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) self.adata = self.adata[self.adata.obs["emptydrops_is_cell"] == "t"].copy() diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py index dece9b82c..3c87e58dc 100644 --- a/sfaira/data/dataloaders/loaders/super_group.py +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -9,7 +9,7 @@ class DatasetSuperGroupLoaders(DatasetSuperGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, ): @@ -18,7 +18,7 @@ def __init__( :param file_base: :param dir_prefix: Prefix to sub-select directories by. Set to "" for no constraints. - :param path: + :param data_path: :param meta_path: :param cache_path: """ @@ -35,7 +35,7 @@ def __init__( if path_dsg is not None: dataset_groups.append(DatasetGroupDirectoryOriented( file_base=path_dsg, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path )) diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py index 9c0321dc2..5c3c1cfba 100644 --- a/sfaira/data/dataloaders/super_group.py +++ b/sfaira/data/dataloaders/super_group.py @@ -14,7 +14,7 @@ class DatasetSuperGroupSfaira(DatasetSuperGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, ): @@ -22,25 +22,25 @@ def __init__( Nested super group of data loaders, unifying data set wise data loader SuperGroup and the database interface SuperGroup. - :param path: + :param data_path: :param meta_path: :param cache_path: """ dsgs = [ DatasetSuperGroupLoaders( - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, ), DatasetSuperGroupDatabases( - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, ) ] if sfairae is not None: dsgs.append(sfairae.data.loaders.DatasetSuperGroupLoaders( - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, )) diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 3f561bbc7..742e3cad3 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -16,7 +16,7 @@ def __init__( obs_key_celltypes: Union[str, None] = None, class_maps: dict = {}, dataset_id: str = "interactive_dataset", - path: Union[str, None] = ".", + data_path: Union[str, None] = ".", meta_path: Union[str, None] = ".", cache_path: Union[str, None] = ".", ): @@ -31,11 +31,11 @@ def __init__( :param obs_key_celltypes: .obs column name which contains cell type labels. :param class_maps: Cell type class maps. :param dataset_id: Identifer of data set. - :param path: + :param data_path: :param meta_path: :param cache_path: """ - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path) self.id = dataset_id self.author = "interactive_dataset" diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py index 65d162b48..3fc8d9f6c 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py @@ -7,15 +7,16 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) # ToDo: Add you meta data here. def _load(self): - fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # fn = os.path.join(self.data_dir, ) # ToDo: add the name of the raw file # ToDo: add code that loads to raw file into an AnnData object + pass diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py index 65d162b48..3fc8d9f6c 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py @@ -7,15 +7,16 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) # ToDo: Add you meta data here. def _load(self): - fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # fn = os.path.join(self.data_dir, ) # ToDo: add the name of the raw file # ToDo: add code that loads to raw file into an AnnData object + pass diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py index 0652c60ae..b13fc28b7 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py @@ -14,14 +14,14 @@ class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, sample_fn: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): super().__init__( sample_fn=sample_fn, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs @@ -31,5 +31,6 @@ def __init__( # ToDo Add you meta data here. def _load(self): - fn = os.path.join(self.doi_path, self.sample_fn) + # fn = os.path.join(self.data_dir, self.sample_fn) # ToDo: add the name of the raw file # ToDo: load file fn into self.adata, self.sample_fn represents the current filename. + pass diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py index 7a3aa1320..8be85b2a0 100644 --- a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py +++ b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py @@ -15,14 +15,14 @@ class Dataset(DatasetBaseGroupLoadingOneFile): def __init__( self, sample_id: str, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): super().__init__( sample_id=sample_id, - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs @@ -33,5 +33,6 @@ def __init__( # self.adata in which you saved the sample IDs based on which the full adata object is subsetted. def _load_full(self) -> anndata.AnnData: - fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # fn = os.path.join(self.data_dir,) # ToDo: add the name of the raw file # ToDo: load full data into AnnData object (no subsetting!) + pass diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py index 65d162b48..ed1c1d8d9 100644 --- a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py +++ b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py @@ -7,15 +7,16 @@ class Dataset(DatasetBase): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) # ToDo: Add you meta data here. def _load(self): - fn = os.path.join(self.doi_path, ) # ToDo: add the name of the raw file + # fn = os.path.join(self.data_dir,) # ToDo: add the name of the raw file # ToDo: add code that loads to raw file into an AnnData object + pass diff --git a/sfaira/data/utils_scripts/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps.py index 2457865f1..ae389e6a4 100644 --- a/sfaira/data/utils_scripts/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps.py @@ -12,7 +12,7 @@ # Set global variables. print("sys.argv", sys.argv) -path = str(sys.argv[1]) +data_path = str(sys.argv[1]) path_meta = str(sys.argv[2]) path_cache = str(sys.argv[3]) processes = int(str(sys.argv[4])) @@ -58,7 +58,7 @@ datasets_f = [ DatasetFound( sample_fn=x, - path=path, + data_path=data_path, meta_path=path_meta, cache_path=path_cache ) @@ -69,7 +69,7 @@ datasets_f = [ DatasetFound( sample_id=x, - path=path, + data_path=data_path, meta_path=path_meta, cache_path=path_cache ) @@ -79,7 +79,7 @@ raise ValueError(f"sample_fns and sample_ids both found for {f_dataset}") else: datasets_f = [DatasetFound( - path=path, + data_path=data_path, meta_path=path_meta, cache_path=path_cache )] diff --git a/sfaira/data/utils_scripts/create_meta.py b/sfaira/data/utils_scripts/create_meta.py index 159d25dc9..e60981baa 100644 --- a/sfaira/data/utils_scripts/create_meta.py +++ b/sfaira/data/utils_scripts/create_meta.py @@ -13,12 +13,12 @@ def write_meta(args0, args1): # Set global variables. print("sys.argv", sys.argv) -path = str(sys.argv[1]) +data_path = str(sys.argv[1]) path_meta = str(sys.argv[2]) processes = int(str(sys.argv[3])) ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( - path=path, meta_path=path_meta, cache_path=path_meta + data_path=data_path, meta_path=path_meta, cache_path=path_meta ) dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. dsg.load( diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 50aad5181..68f540bdb 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -21,13 +21,13 @@ def write_meta(args0, args1): # Set global variables. print("sys.argv", sys.argv) -path = str(sys.argv[1]) +data_path = str(sys.argv[1]) path_meta = str(sys.argv[2]) path_cache = str(sys.argv[3]) processes = int(str(sys.argv[4])) ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( - path=path, meta_path=path_meta, cache_path=path_cache + data_path=data_path, meta_path=path_meta, cache_path=path_cache ) dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. # Write meta data, cache and test load from cache: diff --git a/sfaira/data/utils_scripts/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py index ec7b98766..d5f1ce497 100644 --- a/sfaira/data/utils_scripts/write_backed_human.py +++ b/sfaira/data/utils_scripts/write_backed_human.py @@ -14,7 +14,7 @@ path_meta = os.path.join(path, "meta") ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( - path=path, meta_path=path_meta, cache_path=path_meta + data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["human"]) ds.load_all_tobacked( diff --git a/sfaira/data/utils_scripts/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py index e8397186b..e3470a73f 100644 --- a/sfaira/data/utils_scripts/write_backed_mouse.py +++ b/sfaira/data/utils_scripts/write_backed_mouse.py @@ -14,7 +14,7 @@ path_meta = os.path.join(path, "meta") ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( - path=path, meta_path=path_meta, cache_path=path_meta + data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["mouse"]) ds.load_all_tobacked( diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index a7b70a7de..c98ebf1c0 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -316,7 +316,7 @@ def load_data( ) dataset.load( celltype_version=None, - fn=None, + data_dir=None, remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, load_raw=False, diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index 572c1f9a6..d4dabb8a1 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -1,150 +1,156 @@ import os import pydoc -import unittest from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup +try: + import sfaira_extension as sfairae +except ImportError: + sfairae = None -class TestDatasetTemplate(unittest.TestCase): +def test_load(dir_template: str = "./template_data"): + """ + Unit test to assist with data set contribution. - dir_template: str = "./template_data" + The workflow for contributing a data set with this data loader is as follows: - def test_load(self): - """ - Unit test to assist with data set contribution. + 1. Write a data loader and add it into the loader directory of your local sfaira installation. + 2. Address ToDos below. + 3. Run this unit test until you are not getting errors from your data loader anymore. - The workflow for contributing a data set with this data loader is as follows: + In the process of this unit test, this data loader will have written putative cell type maps from your + annotation to the cell ontology. - 1. Write a data loader and add it into the loader directory of your local sfaira installation. - 2. Address ToDos below. - 3. Run this unit test until you are not getting errors from your data loader anymore. + 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. + Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be + differentially successfull. The proposed IDs groups are separate by ":|||:" strings to give you a visial anchor + when going through these lists. You need to delete all of these division strings and all labels in the second + columns other than the best fit label. Do not change the first column, + (Note that columns are separated by ",") + You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl + 5. Run this unit test for a last time to check the cell type maps. - In the process of this unit test, this data loader will have written putative cell type maps from your - annotation to the cell ontology. + :return: + """ + remove_gene_version = True + match_to_reference = None + classmap_by_file = True + # ToDo build one class map per file or per data loader (potentially many per file) - 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. - Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be - differentially successfull. The proposed IDs groups are separate by ":|||:" strings to give you a visial anchor - when going through these lists. You need to delete all of these division strings and all labels in the second - columns other than the best fit label. Do not change the first column, - (Note that columns are separated by ",") - You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl - 5. Run this unit test for a last time to check the cell type maps. + flattened_doi = "d10_1016_j_cmet_2019_01_021" # ToDo: add correct module here as "YOUR_STUDY" + # Define file names and loader paths in sfaira or sfaira_extension: + # Define base paths of loader collections in sfaira and sfaira_extension: + dir_loader_sfaira = "sfaira.data.dataloaders.loaders." + file_path_sfaira = "/" + "/".join(pydoc.locate(dir_loader_sfaira + "FILE_PATH").split("/")[:-1]) + if sfairae is not None: + dir_loader_sfairae = "sfaira_extension.data.dataloaders.loaders." + file_path_sfairae = "/" + "/".join(pydoc.locate(dir_loader_sfairae + "FILE_PATH").split("/")[:-1]) + else: + file_path_sfairae = None + # Check if loader name is a directory either in sfaira or sfaira_extension loader collections: + if flattened_doi in os.listdir(file_path_sfaira): + dir_loader = dir_loader_sfaira + "." + flattened_doi + file_path = pydoc.locate(dir_loader + ".FILE_PATH") + elif flattened_doi in os.listdir(file_path_sfairae): + dir_loader = dir_loader_sfairae + "." + flattened_doi + else: + raise ValueError("data loader not found in sfaira and also not in sfaira_extension") + file_path = pydoc.locate(dir_loader + ".FILE_PATH") - :return: - """ - remove_gene_version = True - match_to_reference = None - classmap_by_file = True # ToDo build one class map per file or per data loader (potentially many per file) - # ToDo: add correct module here as "YOUR_STUDY": - # Addition coming soon: This path can either be in sfaira or in sfaira_extensions. - # So far, this still has to be in sfaira. - from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021 import FILE_PATH - ds = DatasetGroupDirectoryOriented( - file_base=FILE_PATH, - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template - ) - # Test raw loading and caching: - # You can set load_raw to True while debugging when caching works already to speed the test up, - # but be sure to set load_raw to True for final tests. - ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=False, # tests raw loading - allow_caching=True # tests caching - ) - # Create cell type conversion table: - cwd = os.path.dirname(FILE_PATH) - dataset_module = str(cwd.split("/")[-1]) - if classmap_by_file: - for f in os.listdir(cwd): - if os.path.isfile(os.path.join(cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(f.split(".")[:-1]) - DatasetFound = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".Dataset") - # Check if global objects are available: - # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles - # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile - sample_fns = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".SAMPLE_FNS") - sample_ids = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dataset_module + "." + - file_module + ".SAMPLE_IDS") - if sample_fns is not None and sample_ids is None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_fn=x, - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template - ) - for x in sample_fns - ] - elif sample_fns is None and sample_ids is not None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_id=x, - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template - ) - for x in sample_ids - ] - elif sample_fns is not None and sample_ids is not None: - raise ValueError(f"sample_fns and sample_ids both found for {f}") - else: - datasets_f = [DatasetFound( - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template - )] - # Build a data set group from the already loaded data sets and use the group ontology writing - # function. - current_ids = [x.id for x in datasets_f] - dsg_f = DatasetGroup(datasets=dict([(x, ds.datasets[x]) for x in current_ids])) - # Write this directly into sfaira installation so that it can be committed via git. - dsg_f.write_ontology_class_map( - fn=os.path.join(cwd, file_module + ".csv"), - protected_writing=True, - n_suggest=4, - ) - else: - for k, v in ds.datasets.items(): - # Write this directly into sfaira installation so that it can be committed via git. - v.write_ontology_class_map( - fn=os.path.join("/".join(FILE_PATH.split("/")[:-1]), v.fn_ontology_class_map_csv), - protected_writing=True, - n_suggest=10, - ) + ds = DatasetGroupDirectoryOriented( + file_base=file_path, + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template + ) + # Test raw loading and caching: + # You can set load_raw to True while debugging when caching works already to speed the test up, + # but be sure to set load_raw to True for final tests. + ds.load( + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=True, # tests raw loading + allow_caching=True # tests caching + ) + # Create cell type conversion table: + cwd = os.path.dirname(file_path) + dataset_module = str(cwd.split("/")[-1]) + if classmap_by_file: + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + DatasetFound = pydoc.locate(dir_loader + "." + file_module + ".Dataset") + # Check if global objects are available: + # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles + # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + sample_fns = pydoc.locate(dir_loader + "." + file_module + ".SAMPLE_FNS") + sample_ids = pydoc.locate(dir_loader + dataset_module + "." + file_module + ".SAMPLE_IDS") + if sample_fns is not None and sample_ids is None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_fn=x, + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template + ) + for x in sample_fns + ] + elif sample_fns is None and sample_ids is not None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_id=x, + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template + ) + for x in sample_ids + ] + elif sample_fns is not None and sample_ids is not None: + raise ValueError(f"sample_fns and sample_ids both found for {f}") + else: + datasets_f = [DatasetFound( + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template + )] + # Build a data set group from the already loaded data sets and use the group ontology writing + # function. + dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) + # Write this directly into sfaira installation so that it can be committed via git. + dsg_f.write_ontology_class_map( + fn=os.path.join(cwd, file_module + ".csv"), + protected_writing=True, + n_suggest=4, + ) + else: + for k, v in ds.datasets.items(): + # Write this directly into sfaira installation so that it can be committed via git. + v.write_ontology_class_map( + fn=os.path.join("/".join(file_path.split("/")[:-1]), v.fn_ontology_class_map_csv), + protected_writing=True, + n_suggest=10, + ) - # ToDo: conflicts are not automatically resolved, please go back to - # https://www.ebi.ac.uk/ols/ontologies/cl - # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. + # ToDo: conflicts are not automatically resolved, please go back to + # https://www.ebi.ac.uk/ols/ontologies/cl + # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. - # Test loading from cache: - ds = DatasetGroupDirectoryOriented( - file_base=FILE_PATH, - path=self.dir_template, - meta_path=self.dir_template, - cache_path=self.dir_template - ) - ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=False, - allow_caching=False - ) - # Test concatenation: - _ = ds.adata - - -if __name__ == '__main__': - unittest.main() + # Test loading from cache: + ds = DatasetGroupDirectoryOriented( + file_base=file_path, + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template + ) + ds.load( + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=False, + allow_caching=False + ) + # Test concatenation: + _ = ds.adata diff --git a/sfaira/unit_tests/test_dataset.py b/sfaira/unit_tests/test_dataset.py index 763dc89f0..207a7e7cd 100644 --- a/sfaira/unit_tests/test_dataset.py +++ b/sfaira/unit_tests/test_dataset.py @@ -12,13 +12,13 @@ class TestDatasetGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load_all() def test_adata(self): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["bladder"]) _ = ds.adata @@ -29,21 +29,21 @@ class TestDatasetSuperGroups(unittest.TestCase): dir_meta: str = "./test_data/meta" def test_load(self): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) ds.load_all() def test_adata(self): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) _ = ds.adata def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) @@ -57,7 +57,7 @@ def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) def test_load_backed_sparse(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 548caad67..65982b0af 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -905,8 +905,8 @@ def synonym_string_processing(y): # Check this by checking if one is an ancestor of the other: anatomical_subselection = [ z and ( - anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or - y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) + anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or + y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) ) for y, z in zip(uberon_ids, anatomical_subselection) ] @@ -917,7 +917,8 @@ def synonym_string_processing(y): for i in np.argsort(scores_lenient) if anatomical_subselection[i] and not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][-n_suggest:][::-1]}) + ][-n_suggest:][::-1] + }) # 2. Run a second string matching with the anatomical word included. modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]"). \ From 9def399f9b1ae5fd7962eb60159b466221f1d8aa Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 16:14:38 +0100 Subject: [PATCH 045/161] fixed one file many DS loading (#118) --- sfaira/data/base.py | 8 +++++--- .../loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py | 4 ---- .../dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py | 4 ---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index babcf49eb..2f3594973 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1565,7 +1565,7 @@ def sample_id(self): return self._sample_id @abc.abstractmethod - def _load_full(self) -> anndata.AnnData: + def _load_full(self): """ Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. @@ -1577,7 +1577,9 @@ def _load_full(self) -> anndata.AnnData: def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None): if self.adata is None and adata_group is not None: self.adata = adata_group - elif self.adata is None and adata_group is not None: + elif self.adata is None and adata_group is None: + self._load_full() + elif self.adata is not None and not self._unprocessed_full_group_object: self._load_full() elif self.adata is not None and self._unprocessed_full_group_object: pass @@ -1610,12 +1612,12 @@ def _subset_from_group( assert self.adata is not None, "this method should only be called if .adata is not None" for k, v in subset_items: self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] + self._unprocessed_full_group_object = False def _load(self): _ = self.set_raw_full_group_object(adata_group=None) if self._unprocessed_full_group_object: self._load_from_group() - self._unprocessed_full_group_object = False class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py index 4c3c6c323..fe62c7e8f 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py @@ -46,10 +46,6 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.class_maps = { - "0": {}, - } - def _load(self): fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") self.adata = anndata.read(fn) diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py index f504fbf73..40215c7f4 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py @@ -50,10 +50,6 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "Accession" - self.class_maps = { - "0": {}, - } - def _load_full(self): fn = os.path.join(self.data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") self.adata = anndata.read_loom(fn) From 3c56ecdf6a39422832b710b40d636e00af385c2b Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 16:33:20 +0100 Subject: [PATCH 046/161] fixed handling of no doi data sets (#120) * fixed handling of no doi data sets fixes #57 --- sfaira/data/base.py | 14 +++++++------- .../databases/cellxgene/cellxgene_loader.py | 2 +- .../loaders/d10_1038_s41586_020_2157_4/base.py | 6 +++--- .../{d_nan => dno_doi_10x_genomics}/__init__.py | 0 .../human_blood_2019_10x_10xGenomics_001.py | 2 +- .../human_x_2018_10x_ica_001.py | 4 +--- 6 files changed, 13 insertions(+), 15 deletions(-) rename sfaira/data/dataloaders/loaders/{d_nan => dno_doi_10x_genomics}/__init__.py (100%) rename sfaira/data/dataloaders/loaders/{d_nan => dno_doi_10x_genomics}/human_blood_2019_10x_10xGenomics_001.py (97%) rename sfaira/data/dataloaders/loaders/{d_nan => dno_doi_10x_genomics}/human_x_2018_10x_ica_001.py (97%) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 2f3594973..9d682575f 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -57,7 +57,7 @@ class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict _meta: Union[None, pandas.DataFrame] - data_path: Union[None, str] + data_dir_base: Union[None, str] meta_path: Union[None, str] cache_path: Union[None, str] id: Union[None, str] @@ -117,7 +117,7 @@ def __init__( self.adata = None self.meta = None self.genome = None - self.data_path = data_path + self.data_dir_base = data_path self.meta_path = meta_path self.cache_path = cache_path @@ -185,8 +185,8 @@ def clear(self): def download(self, **kwargs): assert self.download_url_data is not None, f"The `download_url_data` attribute of dataset {self.id} " \ f"is not set, cannot download dataset." - assert self.data_path is not None, "No path was provided when instantiating the dataset container, " \ - "cannot download datasets." + assert self.data_dir_base is not None, "No path was provided when instantiating the dataset container, " \ + "cannot download datasets." if not os.path.exists(self.data_dir): os.makedirs(self.data_dir) @@ -261,7 +261,7 @@ def _download_synapse(self, synapse_entity, fn, **kwargs): syn = synapseclient.Synapse() syn.login(kwargs['synapse_user'], kwargs['synapse_pw']) dataset = syn.get(entity=synapse_entity) - shutil.move(dataset.data_path, os.path.join(self.data_dir, fn)) + shutil.move(dataset.data_dir_base, os.path.join(self.data_dir, fn)) def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None) -> bool: """ @@ -960,11 +960,11 @@ def author(self, x: str): def data_dir(self): # Data is either directly in user supplied directory or in a sub directory if the overall directory is managed # by sfaira: In this case, the sub directory is named after the doi of the data set. - sfaira_path = os.path.join(self.data_path, self.directory_formatted_doi) + sfaira_path = os.path.join(self.data_dir_base, self.directory_formatted_doi) if os.path.exists(sfaira_path): return sfaira_path else: - return self.data_path + return self.data_dir_base @property def dev_stage(self) -> Union[None, str]: diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index b93d8fbc4..dc2c02e42 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -52,7 +52,7 @@ def _load(self): :return: """ - fn = os.path.join(self.data_path, self.fn) + fn = os.path.join(self.data_dir_base, self.fn) adata = anndata.read(fn) adata.X = adata.raw.X # TODO delete raw? diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py index 6fd3f2c14..fbedf1d90 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py @@ -52,7 +52,7 @@ def _load_generalized(self, sample_id: str): :param fn: :return: """ - adata = anndata.read(os.path.join(self.data_path, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) + adata = anndata.read(os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix adata.X = scipy.sparse.csr_matrix(adata.X).copy() @@ -69,7 +69,7 @@ def _load_generalized(self, sample_id: str): # load celltype labels and harmonise them # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: fig1_anno = pd.read_excel( - os.path.join(self.data_path, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), index_col="cellnames", engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables ) @@ -90,7 +90,7 @@ def _load_generalized(self, sample_id: str): columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) archive = zipfile.ZipFile( - os.path.join(self.data_path, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") + os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") ) for f in archive.namelist(): df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") diff --git a/sfaira/data/dataloaders/loaders/d_nan/__init__.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/__init__.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d_nan/__init__.py rename to sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/__init__.py diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py similarity index 97% rename from sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py rename to sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py index fe62c7e8f..2e2d2d284 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py @@ -34,7 +34,7 @@ def __init__( self.download_url_meta = None self.author = "10x Genomics" - self.doi = "no_doi" + self.doi = "no_doi_10x_genomics" self.healthy = True self.normalization = "raw" self.organ = "blood" diff --git a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_x_2018_10x_ica_001.py similarity index 97% rename from sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py rename to sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_x_2018_10x_ica_001.py index 40215c7f4..0e2822443 100644 --- a/sfaira/data/dataloaders/loaders/d_nan/human_x_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_x_2018_10x_ica_001.py @@ -20,7 +20,6 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__( sample_id=sample_id, data_path=data_path, @@ -28,7 +27,6 @@ def __init__( cache_path=cache_path, **kwargs ) - self.obs_key_sample = "derived_organ_parts_label" self.id = f"human_{'blood' if sample_id == 'umbilical cord blood' else 'bone'}_2018_10x_ica_" \ f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_unknown" @@ -38,7 +36,7 @@ def __init__( self.download_url_meta = None self.author = "Regev" - self.doi = "no_doi" + self.doi = "no_doi_10x_genomics" self.healthy = True self.normalization = "raw" self.organ = "blood" if sample_id == "umbilical cord blood" else "bone marrow" From e63cd1f6d2840a200f6296fbad5bb71dccfa41cf Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 16:58:55 +0100 Subject: [PATCH 047/161] fixed author (#121) --- .../human_kidney_2020_10x_liao_001.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py index 2075c294d..b94703a21 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -24,7 +24,7 @@ def __init__( self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None - self.author = "Mo" + self.author = "Liao" self.healthy = True self.normalization = "raw" self.organ = "kidney" @@ -37,10 +37,6 @@ def __init__( self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" - self.class_maps = { - "0": {}, - } - def _load(self): fn = os.path.join(self.data_dir, "GSE131685_RAW.tar") adatas = [] @@ -61,4 +57,3 @@ def _load(self): self.adata.obs["sample"] = name adatas.append(self.adata) self.adata = adatas[0].concatenate(adatas[1:]) - del self.adata.obs["batch"] From 4224291c012f558482cf8e6d04dcd2a27b388264 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 17:12:05 +0100 Subject: [PATCH 048/161] fixed bugs in loading of mouse_brain_2019_10x_hove_001 (#122) --- ...brain_atlas_temp.py => mouse_brain_2019_10x_hove_001.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/{mouse_brain_2019_mouse_brain_atlas_temp.py => mouse_brain_2019_10x_hove_001.py} (96%) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py similarity index 96% rename from sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py index e6160a50c..587c33d3f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_mouse_brain_atlas_temp.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py @@ -25,13 +25,13 @@ def __init__( self.download_url_meta = \ "https://www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" - self.author = "Movahedi" + self.author = "Hove" self.doi = "10.1038/s41593-019-0393-4" self.healthy = True self.normalization = "raw" self.organ = "brain" self.organism = "mouse" - self.protocol = "microwell-seq" + self.protocol = "10X sequencing" self.state_exact = "healthy" self.year = 2019 @@ -56,7 +56,7 @@ def _load(self): os.path.join(self.data_dir, "annot_fullAggr.csv") ] - with zipfile.Zipfile(fn[0]) as archive: + with zipfile.ZipFile(fn[0]) as archive: x = scipy.io.mmread(archive.open('filtered_gene_bc_matrices_mex/mm10/matrix.mtx')).T.tocsr() self.adata = anndata.AnnData(x) var = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/genes.tsv'), sep="\t", header=None) From 92875e68cb7e00c3d6787ee11f113dad6883da7c Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Fri, 12 Feb 2021 17:26:55 +0100 Subject: [PATCH 049/161] dataloader updates (#116) * fix abstract method for onefile dataloader container * rename bladder organ in TMS * convert hcl dataloader to manysamplesonefile schema * tidy hcl dataloader Co-authored-by: david.seb.fischer --- .../mouse_bladder_2018_microwell_han_001.py | 2 +- .../d10_1038_s41586_020_2157_4/base.py | 131 ------- .../human_adipose_2020_microwell_han_001.py | 22 -- ...man_adrenalgland_2020_microwell_han_001.py | 22 -- ...man_adrenalgland_2020_microwell_han_002.py | 22 -- ...man_adrenalgland_2020_microwell_han_003.py | 22 -- ...man_adrenalgland_2020_microwell_han_004.py | 22 -- ...man_adrenalgland_2020_microwell_han_005.py | 22 -- ...man_adrenalgland_2020_microwell_han_006.py | 22 -- .../human_artery_2020_microwell_han_001.py | 22 -- .../human_bladder_2020_microwell_han_001.py | 22 -- .../human_bladder_2020_microwell_han_002.py | 22 -- .../human_bladder_2020_microwell_han_003.py | 22 -- .../human_blood_2020_microwell_han_001.py | 22 -- .../human_blood_2020_microwell_han_002.py | 22 -- .../human_blood_2020_microwell_han_003.py | 22 -- .../human_blood_2020_microwell_han_004.py | 22 -- .../human_blood_2020_microwell_han_005.py | 22 -- .../human_blood_2020_microwell_han_006.py | 22 -- .../human_blood_2020_microwell_han_007.py | 22 -- .../human_bone_2020_microwell_han_001.py | 22 -- .../human_bone_2020_microwell_han_002.py | 22 -- .../human_brain_2020_microwell_han_001.py | 54 --- .../human_brain_2020_microwell_han_002.py | 54 --- .../human_brain_2020_microwell_han_003.py | 54 --- .../human_brain_2020_microwell_han_004.py | 54 --- .../human_brain_2020_microwell_han_005.py | 54 --- .../human_brain_2020_microwell_han_006.py | 54 --- .../human_calvaria_2020_microwell_han_001.py | 22 -- .../human_cervix_2020_microwell_han_001.py | 22 -- ..._chorionicvillus_2020_microwell_han_001.py | 22 -- .../human_colon_2020_microwell_han_001.py | 50 --- .../human_colon_2020_microwell_han_002.py | 50 --- .../human_colon_2020_microwell_han_003.py | 50 --- .../human_colon_2020_microwell_han_004.py | 50 --- .../human_duodenum_2020_microwell_han_001.py | 22 -- ...human_epityphlon_2020_microwell_han_001.py | 22 -- .../human_esophagus_2020_microwell_han_001.py | 47 --- .../human_esophagus_2020_microwell_han_002.py | 47 --- .../human_eye_2020_microwell_han_001.py | 46 --- ...an_fallopiantube_2020_microwell_han_001.py | 22 -- ...uman_femalegonad_2020_microwell_han_001.py | 22 -- ...uman_femalegonad_2020_microwell_han_002.py | 22 -- ...uman_gallbladder_2020_microwell_han_001.py | 22 -- .../human_heart_2020_microwell_han_001.py | 22 -- .../human_heart_2020_microwell_han_002.py | 22 -- .../human_heart_2020_microwell_han_003.py | 22 -- .../human_heart_2020_microwell_han_004.py | 22 -- .../human_hesc_2020_microwell_han_001.py | 19 - .../human_ileum_2020_microwell_han_001.py | 49 --- .../human_jejunum_2020_microwell_han_001.py | 22 -- .../human_kidney_2020_microwell_han_001.py | 72 ---- .../human_kidney_2020_microwell_han_002.py | 72 ---- .../human_kidney_2020_microwell_han_003.py | 72 ---- .../human_kidney_2020_microwell_han_004.py | 72 ---- .../human_kidney_2020_microwell_han_005.py | 72 ---- .../human_kidney_2020_microwell_han_006.py | 72 ---- .../human_kidney_2020_microwell_han_007.py | 72 ---- .../human_liver_2020_microwell_han_001.py | 51 --- .../human_liver_2020_microwell_han_002.py | 51 --- .../human_liver_2020_microwell_han_003.py | 51 --- .../human_liver_2020_microwell_han_004.py | 51 --- .../human_liver_2020_microwell_han_005.py | 51 --- .../human_lung_2020_microwell_han_001.py | 73 ---- .../human_lung_2020_microwell_han_002.py | 73 ---- .../human_lung_2020_microwell_han_003.py | 73 ---- .../human_lung_2020_microwell_han_004.py | 73 ---- .../human_lung_2020_microwell_han_005.py | 73 ---- .../human_malegonad_2020_microwell_han_001.py | 51 --- .../human_malegonad_2020_microwell_han_002.py | 51 --- .../human_muscle_2020_microwell_han_001.py | 22 -- .../human_muscle_2020_microwell_han_002.py | 22 -- .../human_omentum_2020_microwell_han_001.py | 22 -- .../human_omentum_2020_microwell_han_002.py | 22 -- .../human_omentum_2020_microwell_han_003.py | 22 -- .../human_pancreas_2020_microwell_han_001.py | 61 ---- .../human_pancreas_2020_microwell_han_002.py | 61 ---- .../human_pancreas_2020_microwell_han_003.py | 61 ---- .../human_pancreas_2020_microwell_han_004.py | 61 ---- .../human_placenta_2020_microwell_han_001.py | 54 --- .../human_pleura_2020_microwell_han_001.py | 22 -- .../human_prostate_2020_microwell_han_001.py | 43 --- .../human_rectum_2020_microwell_han_001.py | 38 -- .../human_rib_2020_microwell_han_001.py | 22 -- .../human_rib_2020_microwell_han_002.py | 22 -- .../human_skin_2020_microwell_han_001.py | 52 --- .../human_skin_2020_microwell_han_002.py | 52 --- ...human_spinalcord_2020_microwell_han_001.py | 22 -- .../human_spleen_2020_microwell_han_001.py | 44 --- .../human_spleen_2020_microwell_han_002.py | 44 --- .../human_stomach_2020_microwell_han_001.py | 22 -- .../human_stomach_2020_microwell_han_002.py | 22 -- .../human_stomach_2020_microwell_han_003.py | 22 -- .../human_stomach_2020_microwell_han_004.py | 22 -- .../human_stomach_2020_microwell_han_005.py | 22 -- .../human_stomach_2020_microwell_han_006.py | 22 -- .../human_stomach_2020_microwell_han_007.py | 22 -- .../human_stomach_2020_microwell_han_008.py | 22 -- .../human_stomach_2020_microwell_han_009.py | 22 -- .../human_stomach_2020_microwell_han_010.py | 22 -- .../human_thymus_2020_microwell_han_001.py | 36 -- .../human_thymus_2020_microwell_han_002.py | 36 -- .../human_thyroid_2020_microwell_han_001.py | 22 -- .../human_thyroid_2020_microwell_han_002.py | 22 -- .../human_trachea_2020_microwell_han_001.py | 22 -- .../human_ureter_2020_microwell_han_001.py | 22 -- .../human_uterus_2020_microwell_han_001.py | 22 -- .../human_x_2020_microwellseq_han_x.py | 331 ++++++++++++++++++ .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 2 +- 109 files changed, 333 insertions(+), 4010 deletions(-) delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py index a7e5316ad..3b3ab7330 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py @@ -14,7 +14,7 @@ def __init__( ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "bladder organ" + self.organ = "urinary bladder" self.class_maps = { "0": { diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py deleted file mode 100644 index fbedf1d90..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/base.py +++ /dev/null @@ -1,131 +0,0 @@ -import anndata -import numpy as np -import os -import pandas as pd -import scipy.sparse -from typing import Union -import zipfile - -from sfaira.data import DatasetBase - - -class Dataset_d10_1038_s41586_020_2157_4(DatasetBase): - """ - This is a dataloader template for loaders cell landscape data. - """ - - def __init__( - self, - data_path: Union[str, None], - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - self.download_url_data = "https://ndownloader.figshare.com/files/17727365" - self.download_url_meta = [ - "https://ndownloader.figshare.com/files/21758835", - "https://ndownloader.figshare.com/files/22447898", - ] - - self.author = "Guo" - self.doi = "10.1038/s41586-020-2157-4" - self.healthy = True - self.normalization = "raw" - self.organism = "human" - self.protocol = "microwell-seq" - self.state_exact = "healthy" - self.year = 2020 - - self.obs_key_cellontology_original = "cell_ontology_class" - self.obs_key_dev_stage = "dev_stage" - self.obs_key_sex = "gender" - self.obs_key_age = "age" - - self.var_symbol_col = "index" - - def _load_generalized(self, sample_id: str): - """ - Attempt to find file, cache entire HCL if file was not found. - - :param fn: - :return: - """ - adata = anndata.read(os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_adata.h5ad")) - # convert to sparse matrix - adata.X = scipy.sparse.csr_matrix(adata.X).copy() - - # harmonise annotations - for col in ["batch", "tissue"]: - adata.obs[col] = adata.obs[col].astype("str") - adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( - "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( - "FetalFemaleGonald", "FetalFemaleGonad", regex=True) - adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", - "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) - adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] - - # load celltype labels and harmonise them - # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: - fig1_anno = pd.read_excel( - os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), - index_col="cellnames", - engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables - ) - fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( - "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( - "FetalFemaleGonald", "FetalFemaleGonad", regex=True) - - # check that the order of cells and cell labels is the same - assert np.all(fig1_anno.index == adata.obs.index) - - # add annotations to adata object and rename columns - adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) - adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", - "celltype_global"] - - # add sample-wise annotations to the full adata object - df = pd.DataFrame( - columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", - "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) - archive = zipfile.ZipFile( - os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "annotation_rmbatch_data_revised417.zip") - ) - for f in archive.namelist(): - df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") - df = pd.concat([df, df1], sort=True) - df = df.set_index("Cell_id") - adata = adata[[i in df.index for i in adata.obs.index]].copy() - a_idx = adata.obs.index.copy() - adata.obs = pd.concat([adata.obs, df[["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"]]], axis=1) - assert np.all(a_idx == adata.obs.index) - - # remove mouse cells from the object # ToDo: add this back in as mouse data sets? - adata = adata[adata.obs["Source"] != "MCA2.0"].copy() - - # tidy up the column names of the obs annotations - adata.obs.columns = ["sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", - "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", - "protocol", "source"] - - # create a tidy organ annotation which is then used in sfaira - adata.obs["organ"] = adata.obs["sub_tissue"] \ - .str.replace("Adult", "") \ - .str.replace("Fetal", "") \ - .str.replace("Neonatal", "") \ - .str.replace("Transverse", "") \ - .str.replace("Sigmoid", "") \ - .str.replace("Ascending", "") \ - .str.replace("Cord", "") \ - .str.replace("Peripheral", "") \ - .str.replace("CD34P", "") \ - .str.replace("Cerebellum", "Brain") \ - .str.replace("TemporalLobe", "Brain") \ - .str.replace("BoneMarrow", "Bone") \ - .str.replace("Spinal", "SpinalCord") \ - .str.replace("Intestine", "Stomach") \ - .str.replace("Eyes", "Eye") \ - .str.lower() - - self.adata = adata[adata.obs["sample"] == sample_id].copy() diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py deleted file mode 100644 index ecaacf71f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adipose_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "adipose" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultAdipose_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py deleted file mode 100644 index eb2a01b0f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="NeonatalAdrenalGland_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py deleted file mode 100644 index 5bf3a2f33..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py deleted file mode 100644 index 5d35adeb7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py deleted file mode 100644 index 326e842a2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_004.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultAdrenalGland_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py deleted file mode 100644 index 00ffdf149..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_005.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalAdrenalGland_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py deleted file mode 100644 index 117678910..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_adrenalgland_2020_microwell_han_006.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = "adrenalgland" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultAdrenalGland_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py deleted file mode 100644 index 3d1ce0372..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_artery_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "artery" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultArtery_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py deleted file mode 100644 index 8803259d1..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "bladder organ" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultBladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py deleted file mode 100644 index a4cd2c58c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "bladder organ" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultBladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py deleted file mode 100644 index 4585003c0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bladder_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "bladder" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultGallbladder_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py deleted file mode 100644 index d41ceca66..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultPeripheralBlood_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py deleted file mode 100644 index 751296492..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="PeripheralBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py deleted file mode 100644 index 3c59dbaad..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="CordBlood_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py deleted file mode 100644 index 09fde31c4..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_004.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultPeripheralBlood_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py deleted file mode 100644 index c21029981..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_005.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="CordBloodCD34P_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py deleted file mode 100644 index 497ce9180..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_006.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="CordBloodCD34P_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py deleted file mode 100644 index bab12f9e7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_blood_2020_microwell_han_007.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = "blood" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="CordBlood_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py deleted file mode 100644 index 8e456f0e9..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "bone tissue" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="BoneMarrow_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py deleted file mode 100644 index 663b35876..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_bone_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "bone tissue" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="BoneMarrow_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py deleted file mode 100644 index 8456273fd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_001.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalBrain_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py deleted file mode 100644 index 660b5ccc1..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_002.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalBrain_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py deleted file mode 100644 index 10906fffc..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_003.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalBrain_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py deleted file mode 100644 index 819dcfe4d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_004.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultTemporalLobe_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py deleted file mode 100644 index 93434bc1d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_005.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalBrain_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py deleted file mode 100644 index 8b0d2f332..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_brain_2020_microwell_han_006.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = "brain" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Astrocyte": "Astrocyte", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Endothelial cell (APC)": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal endocrine cell": "Fetal endocrine cell", - "Fetal enterocyte ": "Fetal enterocyte ", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal Neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Oligodendrocyte": "Oligodendrocytes", - "Primordial germ cell": "Primordial germ cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "Neuronal stem cells" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultCerebellum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py deleted file mode 100644 index 6af2e2bb9..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_calvaria_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "calvaria" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalCalvaria_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py deleted file mode 100644 index eb768c2e0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_cervix_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "cervix" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultCervix_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py deleted file mode 100644 index 7ff919329..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_chorionicvillus_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "chorionicvillus" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="ChorionicVillus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py deleted file mode 100644 index 6dc78ba4e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_001.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Colon" - self.class_maps = { - "0": { - "Enterocyte progenitor": "Enterocyte Progenitors", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Enterocyte": "Enterocytes", - "Epithelial cell": "Epithelial cell", - "T cell": "T cell", - "Stromal cell": "Stromal", - "Macrophage": "Macrophage", - "B cell": "B cell", - "Smooth muscle cell": "Smooth Muscle", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "Endothelial cell": "Endothelial", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Enterocyte Progenitors", - "Fibroblast": "Fibroblast", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", - "Fetal stromal cell": "Stromal", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Monocyte": "Monocyte", - "Erythroid cell": "Erythroid cell", - "Fetal endocrine cell": "Enteroendocrine cells", - "Primordial germ cell": "Primordial germ cell", - "Fetal enterocyte": "Fetal enterocyte", - "M2 Macrophage": "Macrophage", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultAscendingColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py deleted file mode 100644 index 38f5f1fc7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_002.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Colon" - self.class_maps = { - "0": { - "Enterocyte progenitor": "Enterocyte Progenitors", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Enterocyte": "Enterocytes", - "Epithelial cell": "Epithelial cell", - "T cell": "T cell", - "Stromal cell": "Stromal", - "Macrophage": "Macrophage", - "B cell": "B cell", - "Smooth muscle cell": "Smooth Muscle", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "Endothelial cell": "Endothelial", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Enterocyte Progenitors", - "Fibroblast": "Fibroblast", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", - "Fetal stromal cell": "Stromal", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Monocyte": "Monocyte", - "Erythroid cell": "Erythroid cell", - "Fetal endocrine cell": "Enteroendocrine cells", - "Primordial germ cell": "Primordial germ cell", - "Fetal enterocyte": "Fetal enterocyte", - "M2 Macrophage": "Macrophage", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py deleted file mode 100644 index c9fa5a835..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_003.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "Colon" - self.class_maps = { - "0": { - "Enterocyte progenitor": "Enterocyte Progenitors", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Enterocyte": "Enterocytes", - "Epithelial cell": "Epithelial cell", - "T cell": "T cell", - "Stromal cell": "Stromal", - "Macrophage": "Macrophage", - "B cell": "B cell", - "Smooth muscle cell": "Smooth Muscle", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "Endothelial cell": "Endothelial", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Enterocyte Progenitors", - "Fibroblast": "Fibroblast", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", - "Fetal stromal cell": "Stromal", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Monocyte": "Monocyte", - "Erythroid cell": "Erythroid cell", - "Fetal endocrine cell": "Enteroendocrine cells", - "Primordial germ cell": "Primordial germ cell", - "Fetal enterocyte": "Fetal enterocyte", - "M2 Macrophage": "Macrophage", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultTransverseColon_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py deleted file mode 100644 index b842d1193..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_colon_2020_microwell_han_004.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "Colon" - self.class_maps = { - "0": { - "Enterocyte progenitor": "Enterocyte Progenitors", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Enterocyte": "Enterocytes", - "Epithelial cell": "Epithelial cell", - "T cell": "T cell", - "Stromal cell": "Stromal", - "Macrophage": "Macrophage", - "B cell": "B cell", - "Smooth muscle cell": "Smooth Muscle", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "Endothelial cell": "Endothelial", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Enterocyte Progenitors", - "Fibroblast": "Fibroblast", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial", - "Fetal stromal cell": "Stromal", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Monocyte": "Monocyte", - "Erythroid cell": "Erythroid cell", - "Fetal endocrine cell": "Enteroendocrine cells", - "Primordial germ cell": "Primordial germ cell", - "Fetal enterocyte": "Fetal enterocyte", - "M2 Macrophage": "Macrophage", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultSigmoidColon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py deleted file mode 100644 index 3eb457805..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_duodenum_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "duodenum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultDuodenum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py deleted file mode 100644 index e8dbd84e7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_epityphlon_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "caecum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultEpityphlon_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py deleted file mode 100644 index 081d03871..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_001.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Esophagus" - self.class_maps = { - "0": { - "Fibroblast": "Fibroblast", - "Basal cell": "Basal cell", - "Stratified epithelial cell": "Stratified epithelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Macrophage": "Macrophage", - "B cell": "B cell", - "T cell": "T cell", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Stromal cell": "Stromal cell", - "Monocyte": "Monocyte", - "Smooth muscle cell": "Smooth muscle cell", - "Endothelial cell": "Endothelial cell", - "Neutrophil": "Neutrophil", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Fetal stromal cell": "Fetal stromal cell", - "CB CD34+": "CB CD34+", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Gastric endocrine cell": "Gastric endocrine cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Loop of Henle": "Loop of Henle", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultEsophagus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py deleted file mode 100644 index 15c19836d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_esophagus_2020_microwell_han_002.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Esophagus" - self.class_maps = { - "0": { - "Fibroblast": "Fibroblast", - "Basal cell": "Basal cell", - "Stratified epithelial cell": "Stratified epithelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Macrophage": "Macrophage", - "B cell": "B cell", - "T cell": "T cell", - "Dendritic cell": "Dendritic cell", - "Mast cell": "Mast cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Stromal cell": "Stromal cell", - "Monocyte": "Monocyte", - "Smooth muscle cell": "Smooth muscle cell", - "Endothelial cell": "Endothelial cell", - "Neutrophil": "Neutrophil", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Fetal stromal cell": "Fetal stromal cell", - "CB CD34+": "CB CD34+", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Gastric endocrine cell": "Gastric endocrine cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Loop of Henle": "Loop of Henle", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultEsophagus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py deleted file mode 100644 index ff60c338f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_eye_2020_microwell_han_001.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Eye" - self.class_maps = { - "0": { - "Fetal neuron": "Fetal neuron", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Erythroid cell": "Erythroid cell", - "Primordial germ cell": "Primordial germ cell", - "Endothelial cell": "Endothelial cell", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "Fetal stromal cell": "Fetal stromal cell", - "Fetal fibroblast": "Fibroblast", - "Fetal Neuron": "Fetal neuron", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Dendritic cell": "Dendritic cell", - "Fetal endocrine cell": "Fetal endocrine cell", - "Macrophage": "Macrophage", - "T cell": "T cell", - "Basal cell": "Basal cell", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", - "Stratified epithelial cell": "Stratified epithelial cell", - "CB CD34+": "CB CD34_pos", - "hESC": "hESC" - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalEyes_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py deleted file mode 100644 index 086ed6bf6..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_fallopiantube_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "fallopiantube" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultFallopiantube_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py deleted file mode 100644 index 33ddc21bd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ovary_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "ovary" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalFemaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py deleted file mode 100644 index d0fe905e0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_femalegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ovary_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "ovary" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalFemaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py deleted file mode 100644 index 883df3cd3..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_gallbladder_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "gall bladder" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultGallbladder_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py deleted file mode 100644 index 5cfda3ef5..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "heart" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py deleted file mode 100644 index 6833761f4..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "heart" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultHeart_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py deleted file mode 100644 index 83ccaff7b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "heart" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py deleted file mode 100644 index 992c673d6..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_heart_2020_microwell_han_004.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "heart" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalHeart_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py deleted file mode 100644 index d5dfe4cc8..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_hesc_2020_microwell_han_001.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "hesc" - - def _load(self): - self._load_generalized(sample_id="HESC_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py deleted file mode 100644 index 2c033b035..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ileum_2020_microwell_han_001.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "ileum" - self.class_maps = { - "0": { - "B cell": "B cells", - "B cell (Plasmocyte)": "Plasma Cells", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte": "Enterocytes", - "Enterocyte progenitor": "Enterocytes", - "Epithelial cell": "Epithelial cell", - "Fetal Neuron": "Fetal neuron", - "Fetal enterocyte": "Enterocytes", - "Fetal epithelial progenitor": "Progenitors", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal neuron", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblasts", - "Hepatocyte/Endodermal cell": "Hepatocyte/Endodermal cell", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cells", - "Monocyte": "Monocyte", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Proliferating T cell": "T cells", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cells", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultIleum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py deleted file mode 100644 index fc9079ee0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_jejunum_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "jejunum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultJejunum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py deleted file mode 100644 index 792f9f1f2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_001.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultKidney_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py deleted file mode 100644 index 9f27fe35d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_002.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py deleted file mode 100644 index 5f9f4aecb..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_003.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py deleted file mode 100644 index 6954a5982..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_004.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalKidney_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py deleted file mode 100644 index 73bef5db0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_005.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalKidney_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py deleted file mode 100644 index e49320f26..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_006.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalKidney_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py deleted file mode 100644 index 01b687783..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_kidney_2020_microwell_han_007.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = "Kidney" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Epithelial cell (intermediated)": "Intermediated cell", - "Erythroid cell": "Erythroid", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal chondrocyte": "Chondrocyte", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Stroma progenitor", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stroma progenitor", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Intercalated cell": "Intercalated cell", - "Intermediated cell": "Intermediated cell", - "Kidney intercalated cell": "Intercalated cell", - "Loop of Henle": "Loop of Henle", - "M2 Macrophage": "M2 Macrophage", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Smooth muscle cell": "Vascular Smooth Muscle Cells and pericytes", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalKidney_6") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py deleted file mode 100644 index 9124fe50e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_001.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Liver" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "Plasma B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Late Erythroid", - "Erythroid progenitor cell (RP high)": "Early Erythroid", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Non inflammatory macrophages", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", - "Smooth muscle cell": "Smooth muscle cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLiver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py deleted file mode 100644 index 15b0e0686..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_002.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Liver" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "Plasma B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Late Erythroid", - "Erythroid progenitor cell (RP high)": "Early Erythroid", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Non inflammatory macrophages", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", - "Smooth muscle cell": "Smooth muscle cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLiver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py deleted file mode 100644 index 134ae0d1c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_003.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "Liver" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "Plasma B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Late Erythroid", - "Erythroid progenitor cell (RP high)": "Early Erythroid", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Non inflammatory macrophages", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", - "Smooth muscle cell": "Smooth muscle cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLiver_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py deleted file mode 100644 index 03cd609b4..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_004.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "Liver" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "Plasma B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Late Erythroid", - "Erythroid progenitor cell (RP high)": "Early Erythroid", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Non inflammatory macrophages", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", - "Smooth muscle cell": "Smooth muscle cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="Liver_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py deleted file mode 100644 index 76ac33488..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_liver_2020_microwell_han_005.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "Liver" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "Plasma B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Late Erythroid", - "Erythroid progenitor cell (RP high)": "Early Erythroid", - "Fetal enterocyte ": "Enterocyte ", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Non inflammatory macrophages", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Myeloid cell": "Myeloid cell", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Sinusoidal endothelial cell": "Liver sinusoidal endothelial cells", - "Smooth muscle cell": "Smooth muscle cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="Liver_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py deleted file mode 100644 index 8a8582238..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_001.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "lung" - self.class_maps = { - "0": { - "AT2 cell": "AT2", - "Antigen presenting cell (RPS high)": "unknown", - "B cell": "B cell lineage", - "B cell (Plasmocyte)": "B cell lineage", - "Basal cell": "Basal", - "CB CD34+": "Fetal airway progenitors", - "Chondrocyte": "1_Stroma", - "Dendritic cell": "Dendritic cells", - "Endothelial cell": "1_Endothelial", - "Endothelial cell (APC)": "1_Endothelial", - "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", - "Enterocyte progenitor": "1_Epithelial", - "Epithelial cell": "1_Epithelial", - "Epithelial cell (intermediated)": "1_Epithelial", - "Erythroid cell": "Erythrocytes", - "Erythroid progenitor cell (RP high)": "Erythrocytes", - "Fasciculata cell": "unknown", - "Fetal Neuron": "unknown", - "Fetal chondrocyte": "1_Stroma", - "Fetal endocrine cell": "unknown", - "Fetal enterocyte ": "1_Epithelial", - "Fetal epithelial progenitor": "1_Epithelial", - "Fetal fibroblast": "Fibroblasts", - "Fetal mesenchymal progenitor": "1_Stroma", - "Fetal neuron": "unknown", - "Fetal skeletal muscle cell": "unknown", - "Fetal stromal cell": "1_Stroma", - "Fibroblast": "Fibroblasts", - "Gastric endocrine cell": "unknown", - "Goblet cell": "Secretory", - "Kidney intercalated cell": "unknown", - "Loop of Henle": "unknown", - "M2 Macrophage": "Macrophages", - "Macrophage": "Macrophages", - "Mast cell": "Mast cells", - "Mesothelial cell": "Mast cells", - "Monocyte": "Monocytes", - "Myeloid cell": "2_Myeloid", - "Neutrophil": "Neutrophilic", - "Neutrophil (RPS high)": "Neutrophilic", - "Primordial germ cell": "unknown", - "Proliferating T cell": "T cell lineage", - "Proximal tubule progenitor": "unknown", - "Sinusoidal endothelial cell": "1_Endothelial", - "Smooth muscle cell": "2_Smooth Muscle", - "Stratified epithelial cell": "1_Epithelial", - "Stromal cell": "1_Stroma", - "T cell": "T cell lineage", - "Ventricle cardiomyocyte": "1_Stroma", - "hESC": "Fetal airway progenitors", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py deleted file mode 100644 index d6307ba1b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_002.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "lung" - self.class_maps = { - "0": { - "AT2 cell": "AT2", - "Antigen presenting cell (RPS high)": "unknown", - "B cell": "B cell lineage", - "B cell (Plasmocyte)": "B cell lineage", - "Basal cell": "Basal", - "CB CD34+": "Fetal airway progenitors", - "Chondrocyte": "1_Stroma", - "Dendritic cell": "Dendritic cells", - "Endothelial cell": "1_Endothelial", - "Endothelial cell (APC)": "1_Endothelial", - "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", - "Enterocyte progenitor": "1_Epithelial", - "Epithelial cell": "1_Epithelial", - "Epithelial cell (intermediated)": "1_Epithelial", - "Erythroid cell": "Erythrocytes", - "Erythroid progenitor cell (RP high)": "Erythrocytes", - "Fasciculata cell": "unknown", - "Fetal Neuron": "unknown", - "Fetal chondrocyte": "1_Stroma", - "Fetal endocrine cell": "unknown", - "Fetal enterocyte ": "1_Epithelial", - "Fetal epithelial progenitor": "1_Epithelial", - "Fetal fibroblast": "Fibroblasts", - "Fetal mesenchymal progenitor": "1_Stroma", - "Fetal neuron": "unknown", - "Fetal skeletal muscle cell": "unknown", - "Fetal stromal cell": "1_Stroma", - "Fibroblast": "Fibroblasts", - "Gastric endocrine cell": "unknown", - "Goblet cell": "Secretory", - "Kidney intercalated cell": "unknown", - "Loop of Henle": "unknown", - "M2 Macrophage": "Macrophages", - "Macrophage": "Macrophages", - "Mast cell": "Mast cells", - "Mesothelial cell": "Mast cells", - "Monocyte": "Monocytes", - "Myeloid cell": "2_Myeloid", - "Neutrophil": "Neutrophilic", - "Neutrophil (RPS high)": "Neutrophilic", - "Primordial germ cell": "unknown", - "Proliferating T cell": "T cell lineage", - "Proximal tubule progenitor": "unknown", - "Sinusoidal endothelial cell": "1_Endothelial", - "Smooth muscle cell": "2_Smooth Muscle", - "Stratified epithelial cell": "1_Epithelial", - "Stromal cell": "1_Stroma", - "T cell": "T cell lineage", - "Ventricle cardiomyocyte": "1_Stroma", - "hESC": "Fetal airway progenitors", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLung_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py deleted file mode 100644 index d2bd1a332..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_003.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "lung" - self.class_maps = { - "0": { - "AT2 cell": "AT2", - "Antigen presenting cell (RPS high)": "unknown", - "B cell": "B cell lineage", - "B cell (Plasmocyte)": "B cell lineage", - "Basal cell": "Basal", - "CB CD34+": "Fetal airway progenitors", - "Chondrocyte": "1_Stroma", - "Dendritic cell": "Dendritic cells", - "Endothelial cell": "1_Endothelial", - "Endothelial cell (APC)": "1_Endothelial", - "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", - "Enterocyte progenitor": "1_Epithelial", - "Epithelial cell": "1_Epithelial", - "Epithelial cell (intermediated)": "1_Epithelial", - "Erythroid cell": "Erythrocytes", - "Erythroid progenitor cell (RP high)": "Erythrocytes", - "Fasciculata cell": "unknown", - "Fetal Neuron": "unknown", - "Fetal chondrocyte": "1_Stroma", - "Fetal endocrine cell": "unknown", - "Fetal enterocyte ": "1_Epithelial", - "Fetal epithelial progenitor": "1_Epithelial", - "Fetal fibroblast": "Fibroblasts", - "Fetal mesenchymal progenitor": "1_Stroma", - "Fetal neuron": "unknown", - "Fetal skeletal muscle cell": "unknown", - "Fetal stromal cell": "1_Stroma", - "Fibroblast": "Fibroblasts", - "Gastric endocrine cell": "unknown", - "Goblet cell": "Secretory", - "Kidney intercalated cell": "unknown", - "Loop of Henle": "unknown", - "M2 Macrophage": "Macrophages", - "Macrophage": "Macrophages", - "Mast cell": "Mast cells", - "Mesothelial cell": "Mast cells", - "Monocyte": "Monocytes", - "Myeloid cell": "2_Myeloid", - "Neutrophil": "Neutrophilic", - "Neutrophil (RPS high)": "Neutrophilic", - "Primordial germ cell": "unknown", - "Proliferating T cell": "T cell lineage", - "Proximal tubule progenitor": "unknown", - "Sinusoidal endothelial cell": "1_Endothelial", - "Smooth muscle cell": "2_Smooth Muscle", - "Stratified epithelial cell": "1_Epithelial", - "Stromal cell": "1_Stroma", - "T cell": "T cell lineage", - "Ventricle cardiomyocyte": "1_Stroma", - "hESC": "Fetal airway progenitors", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py deleted file mode 100644 index 27cb8d221..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_004.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "lung" - self.class_maps = { - "0": { - "AT2 cell": "AT2", - "Antigen presenting cell (RPS high)": "unknown", - "B cell": "B cell lineage", - "B cell (Plasmocyte)": "B cell lineage", - "Basal cell": "Basal", - "CB CD34+": "Fetal airway progenitors", - "Chondrocyte": "1_Stroma", - "Dendritic cell": "Dendritic cells", - "Endothelial cell": "1_Endothelial", - "Endothelial cell (APC)": "1_Endothelial", - "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", - "Enterocyte progenitor": "1_Epithelial", - "Epithelial cell": "1_Epithelial", - "Epithelial cell (intermediated)": "1_Epithelial", - "Erythroid cell": "Erythrocytes", - "Erythroid progenitor cell (RP high)": "Erythrocytes", - "Fasciculata cell": "unknown", - "Fetal Neuron": "unknown", - "Fetal chondrocyte": "1_Stroma", - "Fetal endocrine cell": "unknown", - "Fetal enterocyte ": "1_Epithelial", - "Fetal epithelial progenitor": "1_Epithelial", - "Fetal fibroblast": "Fibroblasts", - "Fetal mesenchymal progenitor": "1_Stroma", - "Fetal neuron": "unknown", - "Fetal skeletal muscle cell": "unknown", - "Fetal stromal cell": "1_Stroma", - "Fibroblast": "Fibroblasts", - "Gastric endocrine cell": "unknown", - "Goblet cell": "Secretory", - "Kidney intercalated cell": "unknown", - "Loop of Henle": "unknown", - "M2 Macrophage": "Macrophages", - "Macrophage": "Macrophages", - "Mast cell": "Mast cells", - "Mesothelial cell": "Mast cells", - "Monocyte": "Monocytes", - "Myeloid cell": "2_Myeloid", - "Neutrophil": "Neutrophilic", - "Neutrophil (RPS high)": "Neutrophilic", - "Primordial germ cell": "unknown", - "Proliferating T cell": "T cell lineage", - "Proximal tubule progenitor": "unknown", - "Sinusoidal endothelial cell": "1_Endothelial", - "Smooth muscle cell": "2_Smooth Muscle", - "Stratified epithelial cell": "1_Epithelial", - "Stromal cell": "1_Stroma", - "T cell": "T cell lineage", - "Ventricle cardiomyocyte": "1_Stroma", - "hESC": "Fetal airway progenitors", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultLung_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py deleted file mode 100644 index 7e916aece..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_lung_2020_microwell_han_005.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "lung" - self.class_maps = { - "0": { - "AT2 cell": "AT2", - "Antigen presenting cell (RPS high)": "unknown", - "B cell": "B cell lineage", - "B cell (Plasmocyte)": "B cell lineage", - "Basal cell": "Basal", - "CB CD34+": "Fetal airway progenitors", - "Chondrocyte": "1_Stroma", - "Dendritic cell": "Dendritic cells", - "Endothelial cell": "1_Endothelial", - "Endothelial cell (APC)": "1_Endothelial", - "Endothelial cell (endothelial to mesenchymal transition)": "1_Endothelial", - "Enterocyte progenitor": "1_Epithelial", - "Epithelial cell": "1_Epithelial", - "Epithelial cell (intermediated)": "1_Epithelial", - "Erythroid cell": "Erythrocytes", - "Erythroid progenitor cell (RP high)": "Erythrocytes", - "Fasciculata cell": "unknown", - "Fetal Neuron": "unknown", - "Fetal chondrocyte": "1_Stroma", - "Fetal endocrine cell": "unknown", - "Fetal enterocyte ": "1_Epithelial", - "Fetal epithelial progenitor": "1_Epithelial", - "Fetal fibroblast": "Fibroblasts", - "Fetal mesenchymal progenitor": "1_Stroma", - "Fetal neuron": "unknown", - "Fetal skeletal muscle cell": "unknown", - "Fetal stromal cell": "1_Stroma", - "Fibroblast": "Fibroblasts", - "Gastric endocrine cell": "unknown", - "Goblet cell": "Secretory", - "Kidney intercalated cell": "unknown", - "Loop of Henle": "unknown", - "M2 Macrophage": "Macrophages", - "Macrophage": "Macrophages", - "Mast cell": "Mast cells", - "Mesothelial cell": "Mast cells", - "Monocyte": "Monocytes", - "Myeloid cell": "2_Myeloid", - "Neutrophil": "Neutrophilic", - "Neutrophil (RPS high)": "Neutrophilic", - "Primordial germ cell": "unknown", - "Proliferating T cell": "T cell lineage", - "Proximal tubule progenitor": "unknown", - "Sinusoidal endothelial cell": "1_Endothelial", - "Smooth muscle cell": "2_Smooth Muscle", - "Stratified epithelial cell": "1_Epithelial", - "Stromal cell": "1_Stroma", - "T cell": "T cell lineage", - "Ventricle cardiomyocyte": "1_Stroma", - "hESC": "Fetal airway progenitors", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalLung_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py deleted file mode 100644 index 624a88655..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_001.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_testis_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "testis" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal acinar cell": "Fetal acinar cell", - "Fetal chondrocyte": "Fetal chondrocyte", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal fibroblast": "Fetal fibroblast", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal neuron", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "Fetal stromal cell": "Fetal stromal cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", - "Loop of Henle": "Loop of Henle", - "Macrophage": "Macrophages", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalMaleGonad_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py deleted file mode 100644 index 54531fcef..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_malegonad_2020_microwell_han_002.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_testis_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "testis" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cells", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fasciculata cell": "Fasciculata cell", - "Fetal acinar cell": "Fetal acinar cell", - "Fetal chondrocyte": "Fetal chondrocyte", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal fibroblast": "Fetal fibroblast", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal neuron": "Fetal neuron", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "Fetal stromal cell": "Fetal stromal cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Sertoli cells", - "Loop of Henle": "Loop of Henle", - "Macrophage": "Macrophages", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "Ureteric bud cell": "Ureteric bud cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalMaleGonad_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py deleted file mode 100644 index 37a2bb38c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "skeletal muscle organ" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py deleted file mode 100644 index ff5b184c5..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_muscle_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_muscle_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "skeletal muscle organ" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultMuscle_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py deleted file mode 100644 index c57392c8e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "omentum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultOmentum_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py deleted file mode 100644 index 4301ebc86..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "omentum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultOmentum_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py deleted file mode 100644 index 1c162a52a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_omentum_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "omentum" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultOmentum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py deleted file mode 100644 index 57f9b1020..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_001.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Pancreas" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Mesenchymal Cell", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell" - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py deleted file mode 100644 index 816c8a8fa..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_002.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Pancreas" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Mesenchymal Cell", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalPancreas_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py deleted file mode 100644 index 0b9e723ba..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_003.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "Pancreas" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Mesenchymal Cell", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalPancreas_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py deleted file mode 100644 index 1f4ec1aef..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pancreas_2020_microwell_han_004.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "Pancreas" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte progenitor": "Enterocyte progenitor", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Neuron", - "Fetal acinar cell": "Acinar cell", - "Fetal endocrine cell": "Endocrine cell", - "Fetal enterocyte ": "Enterocyte", - "Fetal epithelial progenitor": "Epithelial progenitor", - "Fetal fibroblast": "Fibroblast", - "Fetal mesenchymal progenitor": "Mesenchymal Cell", - "Fetal neuron": "Neuron", - "Fetal skeletal muscle cell": "Skeletal muscle cell", - "Fetal stromal cell": "Stromal cell", - "Fibroblast": "Fibroblast", - "Gastric endocrine cell": "Gastric endocrine cell", - "Immature sertoli cell (Pre-Sertoli cell)": "Immature sertoli cell (Pre-Sertoli cell)", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Pancreas exocrine cell": "Pancreas exocrine cell", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "T cell", - "Proximal tubule progenitor": "Proximal tubule progenitor", - "Sinusoidal endothelial cell": "Endothelial cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalPancreas_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py deleted file mode 100644 index 0c410bc5c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_placenta_2020_microwell_han_001.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Placenta" - self.class_maps = { - "0": { - "Fibroblast": "Fibroblast", - "Macrophage": "Macrophage", - "Epithelial cell": "Epithelial cell", - "Erythroid cell": "Erythroid cell", - "Fetal stromal cell": "Fetal stromal cell", - "Stromal cell": "Stromal cell", - "Smooth muscle cell": "Smooth muscle cell", - "Endothelial cell": "Endothelial cell", - "T cell": "T cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Intermediated cell": "Intermediated cell", - "Dendritic cell": "Dendritic cell", - "CB CD34+": "CB CD34+", - "Stratified epithelial cell": "Stratified epithelial cell", - "Fetal neuron": "Fetal neuron", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "B cell": "B cell", - "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", - "hESC": "hESC", - "Basal cell": "Basal cell", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell (endothelial to mesenchymal transition)", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal fibroblast": "Fetal fibroblast", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "M2 Macrophage": "M2 Macrophage", - "Myeloid cell": "Myeloid cell", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - }, - } - - def _load(self): - self._load_generalized(sample_id="Placenta_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py deleted file mode 100644 index 7a39f5c02..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_pleura_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "pleura" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultPleura_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py deleted file mode 100644 index 8d6e90def..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_prostate_2020_microwell_han_001.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "prostate gland" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Basal cell": "Basal cell", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell", - "Endothelial cell (endothelial to mesenchymal transition)": "Endothelial cell", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell (intermediated)": "Epithelial cell (intermediated)", - "Fasciculata cell": "Fasciculata cell", - "Fetal enterocyte": "Fetal enterocyte", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Gastric endocrine cell": "Gastric endocrine cell", - "Goblet cell": "Goblet cell", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "Primordial germ cell": "Primordial germ cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stratified epithelial cell": "Stratified epithelial cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultProstate_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py deleted file mode 100644 index c5581b7f0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rectum_2020_microwell_han_001.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "rectum" - self.class_maps = { - "0": { - "B cell": "B cell", - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Dendritic cell": "Dendritic cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Enterocyte": "Enterocyte", - "Enterocyte progenitor": "Enterocyte progenitor", - "Epithelial cell": "Epithelial cell", - "Erythroid cell": "Erythroid cell", - "Fetal stromal cell": "Fetal stromal cell", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultRectum_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py deleted file mode 100644 index 227f78b4b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "rib" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalRib_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py deleted file mode 100644 index 1496d4560..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_rib_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "rib" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalRib_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py deleted file mode 100644 index b61f225d2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_001.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "skin of body" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Epithelial cell": "Epithelial cell", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal fibroblast": "Fetal fibroblast", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Kidney intercalated cell": "Kidney intercalated cell", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalSkin_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py deleted file mode 100644 index ed1ec23dd..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_skin_2020_microwell_han_002.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "skin of body" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "Basal cell": "Basal cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Endothelial cell": "Endothelial cell", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "Epithelial cell": "Epithelial cell", - "Erythroid cell": "Erythroid cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Fetal Neuron": "Fetal Neuron", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Fetal fibroblast": "Fetal fibroblast", - "Fetal mesenchymal progenitor": "Fetal mesenchymal progenitor", - "Fetal skeletal muscle cell": "Fetal skeletal muscle cell", - "Fetal stromal cell": "Fetal stromal cell", - "Fibroblast": "Fibroblast", - "Kidney intercalated cell": "Kidney intercalated cell", - "Macrophage": "Macrophage", - "Mast cell": "Mast cell", - "Monocyte": "Monocyte", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Primordial germ cell": "Primordial germ cell", - "Proliferating T cell": "Proliferating T cell", - "Smooth muscle cell": "Smooth muscle cell", - "Stromal cell": "Stromal cell", - "T cell": "T cell", - "hESC": "hESC", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalSkin_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py deleted file mode 100644 index 53def3331..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spinalcord_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "spinalcord" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalSpinalCord_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py deleted file mode 100644 index 28c801822..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_001.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "Spleen" - self.class_maps = { - "0": { - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "B cell": "B cell", - "Macrophage": "Macrophage", - "T cell": "T cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Dendritic cell": "Dendritic cell", - "CB CD34+": "CB CD34+", - "Erythroid cell": "Erythroid cell", - "Monocyte": "Monocyte", - "Endothelial cell": "Endothelial cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Proliferating T cell": "Proliferating T cell", - "Fibroblast": "Fibroblast", - "Stromal cell": "Stromal cell", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Mast cell": "Mast cell", - "Smooth muscle cell": "Smooth muscle cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultSpleenParenchyma_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py deleted file mode 100644 index ac2bcf79f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_spleen_2020_microwell_han_002.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "Spleen" - self.class_maps = { - "0": { - "B cell (Plasmocyte)": "B cell (Plasmocyte)", - "Neutrophil": "Neutrophil", - "Endothelial cell (APC)": "Endothelial cell (APC)", - "B cell": "B cell", - "Macrophage": "Macrophage", - "T cell": "T cell", - "Erythroid progenitor cell (RP high)": "Erythroid progenitor cell (RP high)", - "Dendritic cell": "Dendritic cell", - "CB CD34+": "CB CD34+", - "Erythroid cell": "Erythroid cell", - "Monocyte": "Monocyte", - "Endothelial cell": "Endothelial cell", - "Sinusoidal endothelial cell": "Sinusoidal endothelial cell", - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Proliferating T cell": "Proliferating T cell", - "Fibroblast": "Fibroblast", - "Stromal cell": "Stromal cell", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Mast cell": "Mast cell", - "Smooth muscle cell": "Smooth muscle cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="AdultSpleen_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py deleted file mode 100644 index 53383af3e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py deleted file mode 100644 index e749f6ac1..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalStomach_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py deleted file mode 100644 index 1f00af568..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_003.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalIntestine_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py deleted file mode 100644 index 4deb7c5c3..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_004.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalIntestine_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py deleted file mode 100644 index 9021a3e3b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_005.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py deleted file mode 100644 index 61fb96624..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_006.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalIntestine_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py deleted file mode 100644 index bef7c2130..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_007.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalIntestine_5") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py deleted file mode 100644 index 898570087..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_008.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultStomach_3") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py deleted file mode 100644 index e6d0c4af3..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_009.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultStomach_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py deleted file mode 100644 index 31bf383f9..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_stomach_2020_microwell_han_010.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4" - self.organ = "stomach" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="FetalIntestine_4") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py deleted file mode 100644 index 6e9831bd2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_001.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "thymus" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Erythroid cell": "Ery", - "Erythroid progenitor cell (RP high)": "Ery", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Macrophage": "Mac", - "Monocyte": "Mono", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Proliferating T cell": "Proliferating T cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalThymus_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py deleted file mode 100644 index 2fa27ddab..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thymus_2020_microwell_han_002.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "thymus" - self.class_maps = { - "0": { - "Antigen presenting cell (RPS high)": "Antigen presenting cell (RPS high)", - "B cell": "B cell", - "CB CD34+": "CB CD34+", - "Dendritic cell": "Dendritic cell", - "Erythroid cell": "Ery", - "Erythroid progenitor cell (RP high)": "Ery", - "Fetal epithelial progenitor": "Fetal epithelial progenitor", - "Macrophage": "Mac", - "Monocyte": "Mono", - "Neutrophil": "Neutrophil", - "Neutrophil (RPS high)": "Neutrophil (RPS high)", - "Proliferating T cell": "Proliferating T cell", - "T cell": "T cell", - }, - } - - def _load(self): - self._load_generalized(sample_id="FetalThymus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py deleted file mode 100644 index db4fdff1c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "thyroid" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultThyroid_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py deleted file mode 100644 index fb89798bb..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_thyroid_2020_microwell_han_002.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4" - self.organ = "thyroid" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultThyroid_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py deleted file mode 100644 index ab6db836a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_trachea_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "trachea" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultTrachea_2") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py deleted file mode 100644 index 55b21278f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_ureter_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "ureter" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultUreter_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py deleted file mode 100644 index b8b251eb0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_uterus_2020_microwell_han_001.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -from .base import Dataset_d10_1038_s41586_020_2157_4 - - -class Dataset(Dataset_d10_1038_s41586_020_2157_4): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - self.organ = "uterus" - self.class_maps = { - "0": {}, - } - - def _load(self): - self._load_generalized(sample_id="AdultUterus_1") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py new file mode 100644 index 000000000..487df65d8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -0,0 +1,331 @@ +import anndata +import numpy as np +import os +import pandas as pd +import scipy.sparse +from typing import Union +import zipfile + +from sfaira.data import DatasetBaseGroupLoadingOneFile + +SAMPLE_IDS = [ + 'AdultAdipose_1', + 'AdultAdrenalGland_2', + 'AdultAdrenalGland_3', + 'AdultArtery_1', + 'AdultAscendingColon_1', + 'AdultBladder_1', + 'AdultBladder_2', + 'AdultCerebellum_1', + 'AdultCervix_1', + 'AdultColon_1', + 'AdultDuodenum_1', + 'AdultEpityphlon_1', + 'AdultEsophagus_1', + 'AdultEsophagus_2', + 'AdultFallopiantube_1', + 'AdultGallbladder_1', + 'AdultGallbladder_2', + 'AdultHeart_1', + 'AdultHeart_2', + 'AdultIleum_2', + 'AdultJejunum_2', + 'AdultKidney_2', + 'AdultKidney_3', + 'AdultKidney_4', + 'AdultLiver_1', + 'AdultLiver_2', + 'AdultLiver_4', + 'AdultLung_1', + 'AdultLung_2', + 'AdultLung_3', + 'AdultMuscle_1', + 'AdultOmentum_1', + 'AdultOmentum_2', + 'AdultOmentum_3', + 'AdultPancreas_1', + 'AdultPeripheralBlood_3', + 'AdultPeripheralBlood_4', + 'AdultPleura_1', + 'AdultProstate_1', + 'AdultRectum_1', + 'AdultSigmoidColon_1', + 'AdultSpleenParenchyma_1', + 'AdultSpleen_1', + 'AdultStomach_1', + 'AdultStomach_2', + 'AdultStomach_3', + 'AdultTemporalLobe_1', + 'AdultThyroid_1', + 'AdultThyroid_2', + 'AdultTrachea_2', + 'AdultTransverseColon_2', + 'AdultUreter_1', + 'AdultUterus_1', + 'BoneMarrow_1', + 'BoneMarrow_2', + 'ChorionicVillus_1', + 'CordBloodCD34P_1', + 'CordBloodCD34P_2', + 'CordBlood_1', + 'CordBlood_2', + 'FetalAdrenalGland_2', + 'FetalAdrenalGland_3', + 'FetalAdrenalGland_4', + 'FetalBrain_3', + 'FetalBrain_4', + 'FetalBrain_5', + 'FetalBrain_6', + 'FetalCalvaria_1', + 'FetalEyes_1', + 'FetalFemaleGonad_1', + 'FetalFemaleGonad_2', + 'FetalHeart_1', + 'FetalHeart_2', + 'FetalIntestine_1', + 'FetalIntestine_2', + 'FetalIntestine_3', + 'FetalIntestine_4', + 'FetalIntestine_5', + 'FetalKidney_3', + 'FetalKidney_4', + 'FetalKidney_5', + 'FetalKidney_6', + 'FetalLung_1', + 'FetalLung_2', + 'FetalMaleGonad_1', + 'FetalMaleGonad_2', + 'FetalMuscle_1', + 'FetalPancreas_1', + 'FetalPancreas_2', + 'FetalPancreas_3', + 'FetalRib_2', + 'FetalRib_3', + 'FetalSkin_2', + 'FetalSkin_3', + 'FetalSpinalCord_1', + 'FetalStomach_1', + 'FetalStomach_2', + 'FetalThymus_1', + 'FetalThymus_2', + 'HESC_1', + 'Liver_1', + 'Liver_2', + 'NeonatalAdrenalGland_1', + 'PeripheralBlood_1', + 'Placenta_1' +] + + +class Dataset(DatasetBaseGroupLoadingOneFile): + + def __init__( + self, + sample_id: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + + super().__init__( + sample_id=sample_id, + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + **kwargs + ) + + sample_organ_dict = { + 'AdultAdipose_1': 'adipose tissue of abdominal region', + 'AdultAdrenalGland_2': 'adrenal gland', + 'AdultAdrenalGland_3': 'adrenal gland', + 'AdultArtery_1': 'artery', + 'AdultAscendingColon_1': 'ascending colon', + 'AdultBladder_1': 'urinary bladder', + 'AdultBladder_2': 'urinary bladder', + 'AdultCerebellum_1': 'cerebellum', + 'AdultCervix_1': 'uterine cervix', + 'AdultColon_1': 'colon', + 'AdultDuodenum_1': 'duodenum', + 'AdultEpityphlon_1': 'caecum', + 'AdultEsophagus_1': 'esophagus', + 'AdultEsophagus_2': 'esophagus', + 'AdultFallopiantube_1': 'fallopian tube', + 'AdultGallbladder_1': 'gall bladder', + 'AdultGallbladder_2': 'gall bladder', + 'AdultHeart_1': 'heart', + 'AdultHeart_2': 'heart', + 'AdultIleum_2': 'ileum', + 'AdultJejunum_2': 'jejunum', + 'AdultKidney_2': 'kidney', + 'AdultKidney_3': 'kidney', + 'AdultKidney_4': 'kidney', + 'AdultLiver_1': 'liver', + 'AdultLiver_2': 'liver', + 'AdultLiver_4': 'liver', + 'AdultLung_1': 'lung', + 'AdultLung_2': 'lung', + 'AdultLung_3': 'lung', + 'AdultMuscle_1': 'skeletal muscle organ', + 'AdultOmentum_1': 'omentum', + 'AdultOmentum_2': 'omentum', + 'AdultOmentum_3': 'omentum', + 'AdultPancreas_1': 'pancreas', + 'AdultPeripheralBlood_3': 'blood', + 'AdultPeripheralBlood_4': 'blood', + 'AdultPleura_1': 'pleura', + 'AdultProstate_1': 'prostate gland', + 'AdultRectum_1': 'rectum', + 'AdultSigmoidColon_1': 'sigmoid colon', + 'AdultSpleenParenchyma_1': 'parenchyma of spleen', + 'AdultSpleen_1': 'spleen', + 'AdultStomach_1': 'stomach', + 'AdultStomach_2': 'stomach', + 'AdultStomach_3': 'stomach', + 'AdultTemporalLobe_1': 'temporal lobe', + 'AdultThyroid_1': 'thyroid gland', + 'AdultThyroid_2': 'thyroid gland', + 'AdultTrachea_2': 'trachea', + 'AdultTransverseColon_2': 'transverse colon', + 'AdultUreter_1': 'ureter', + 'AdultUterus_1': 'uterus', + 'BoneMarrow_1': 'bone marrow', + 'BoneMarrow_2': 'bone marrow', + 'ChorionicVillus_1': 'chorionic villus', + 'CordBloodCD34P_1': 'umbilical cord blood', + 'CordBloodCD34P_2': 'umbilical cord blood', + 'CordBlood_1': 'umbilical cord blood', + 'CordBlood_2': 'umbilical cord blood', + 'FetalAdrenalGland_2': 'adrenal gland', + 'FetalAdrenalGland_3': 'adrenal gland', + 'FetalAdrenalGland_4': 'adrenal gland', + 'FetalBrain_3': 'brain', + 'FetalBrain_4': 'brain', + 'FetalBrain_5': 'brain', + 'FetalBrain_6': 'brain', + 'FetalCalvaria_1': 'vault of skull', + 'FetalEyes_1': 'eye', + 'FetalFemaleGonad_1': 'ovary', + 'FetalFemaleGonad_2': 'ovary', + 'FetalHeart_1': 'heart', + 'FetalHeart_2': 'heart', + 'FetalIntestine_1': 'intestine', + 'FetalIntestine_2': 'intestine', + 'FetalIntestine_3': 'intestine', + 'FetalIntestine_4': 'intestine', + 'FetalIntestine_5': 'intestine', + 'FetalKidney_3': 'kidney', + 'FetalKidney_4': 'kidney', + 'FetalKidney_5': 'kidney', + 'FetalKidney_6': 'kidney', + 'FetalLung_1': 'lung', + 'FetalLung_2': 'lung', + 'FetalMaleGonad_1': 'testis', + 'FetalMaleGonad_2': 'testis', + 'FetalMuscle_1': 'skeletal muscle organ', + 'FetalPancreas_1': 'pancreas', + 'FetalPancreas_2': 'pancreas', + 'FetalPancreas_3': 'pancreas', + 'FetalRib_2': 'rib', + 'FetalRib_3': 'rib', + 'FetalSkin_2': 'skin of body', + 'FetalSkin_3': 'skin of body', + 'FetalSpinalCord_1': 'spinal cord', + 'FetalStomach_1': 'stomach', + 'FetalStomach_2': 'stomach', + 'FetalThymus_1': 'thymus', + 'FetalThymus_2': 'thymus', + 'HESC_1': '', + 'Liver_1': 'liver', + 'Liver_2': 'liver', + 'NeonatalAdrenalGland_1': 'adrenal gland', + 'PeripheralBlood_1': 'blood', + 'Placenta_1': 'placenta', + } + + self.download_url_data = "https://ndownloader.figshare.com/files/17727365" + self.download_url_meta = [ + "https://ndownloader.figshare.com/files/21758835", + "https://ndownloader.figshare.com/files/22447898", + ] + + self.obs_key_sample = "sample" + + self.organ = sample_organ_dict[self.sample_id] + self.id = f"human_{self.organ}_2020_microwell_han_{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}" \ + f"_10.1038/s41586-020-2157-4" + + self.author = "Guo" + self.doi = "10.1038/s41586-020-2157-4" + self.healthy = True + self.normalization = "raw" + self.organism = "human" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2020 + + self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_dev_stage = "dev_stage" + self.obs_key_sex = "gender" + self.obs_key_age = "age" + + self.var_symbol_col = "index" + + def _load_full(self): + self.adata = anndata.read(os.path.join(self.data_dir, "HCL_Fig1_self.adata.h5ad")) + # convert to sparse matrix + self.adata.X = scipy.sparse.csr_matrix(self.adata.X).copy() + + # harmonise annotations + for col in ["batch", "tissue"]: + self.adata.obs[col] = self.adata.obs[col].astype("str") + self.adata.obs.index = self.adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + self.adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", + "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) + self.adata.obs.index = ["-".join(i.split("-")[:-1]) for i in self.adata.obs.index] + + # load celltype labels and harmonise them + # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: + fig1_anno = pd.read_excel( + os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + index_col="cellnames", + engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables + ) + fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + + # check that the order of cells and cell labels is the same + assert np.all(fig1_anno.index == self.adata.obs.index) + + # add annotations to self.adata object and rename columns + self.adata.obs = pd.concat([self.adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) + self.adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", + "celltype_global"] + + # add sample-wise annotations to the full self.adata object + df = pd.DataFrame( + columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", + "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) + archive = zipfile.ZipFile(os.path.join(self.data_dir, "annotation_rmbatch_data_revised417.zip")) + for f in archive.namelist(): + df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") + df = pd.concat([df, df1], sort=True) + df = df.set_index("Cell_id") + self.adata = self.adata[[i in df.index for i in self.adata.obs.index]].copy() + a_idx = self.adata.obs.index.copy() + self.adata.obs = pd.concat([self.adata.obs, df[ + ["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"] + ]], axis=1) + assert np.all(a_idx == self.adata.obs.index) + + # remove mouse cells from the object # ToDo: add this back in as mouse data sets? + self.adata = self.adata[self.adata.obs["Source"] != "MCA2.0"].copy() + + # tidy up the column names of the obs annotations + self.adata.obs.columns = [ + "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", + "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 49db8f35a..6e66057eb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -62,7 +62,7 @@ def __init__( organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ "aorta" if organ in ["aorta"] else \ - "bladder organ" if organ in ["bladder"] else \ + "urinary bladder" if organ in ["bladder"] else \ "bone marrow" if organ in ["marrow"] else \ "brain" if organ in ["brain_non-myeloid", "brain_myeloid"] else \ "colon" if organ in ["large_intestine"] else \ From 8042b50b0551d8cd2745199d72c23a3a88e78b6b Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Feb 2021 17:55:16 +0100 Subject: [PATCH 050/161] fixed d10_1038_s41593_019_0393_4 (#124) --- .../mouse_brain_2019_10x_hove_001.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py index 587c33d3f..3265b971b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py @@ -29,7 +29,6 @@ def __init__( self.doi = "10.1038/s41593-019-0393-4" self.healthy = True self.normalization = "raw" - self.organ = "brain" self.organism = "mouse" self.protocol = "10X sequencing" self.state_exact = "healthy" @@ -38,9 +37,8 @@ def __init__( self.var_ensembl_col = "ensembl" self.var_symbol_col = "names" - self.obs_key_cellontology_class = self._ADATA_IDS_SFAIRA.cell_ontology_class - self.obs_key_cellontology_id = self._ADATA_IDS_SFAIRA.cell_ontology_id - self.obs_key_cellontology_original = self._ADATA_IDS_SFAIRA.cell_ontology_class + self.obs_key_cellontology_original = "cluster" + self.obs_key_organ = "sample_anatomy" self.class_maps = { "0": { @@ -65,8 +63,8 @@ def _load(self): sep="\t", header=None )[0].values - assert len(obs_names) == self.adata.shape[0] - assert var.shape[0] == self.adata.shape[1] + assert len(obs_names) == self.adata.shape[0] # ToDo take asserts out + assert var.shape[0] == self.adata.shape[1] # ToDo take asserts out obs = pandas.read_csv(fn[1]) # Match annotation to raw data. @@ -79,8 +77,17 @@ def _load(self): self.adata = self.adata[idx_map, :] obs_names = obs_names[idx_map] + # Map anatomic locations to UBERON: + map_anatomy = { + "Choroid plexus": "choroid plexu", + "Dura mater": "dura mater", + 'Enr. SDM': "brain meninx", + "Whole brain": "brain", + } + obs["sample_anatomy"] = [map_anatomy[x] for x in obs["sample"].values] + # Assign attributes self.adata.obs_names = obs_names self.adata.var = var self.adata.obs = obs - assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) + assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) # ToDo take asserts out From 73f6e4c2516d3d96be6db42b680faa69f90958ed Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Fri, 12 Feb 2021 18:08:26 +0100 Subject: [PATCH 051/161] translated d10_1038_s41586_020_2157_4 to DatasetBaseGroupLoadingOneFile --- .../human_x_2020_microwellseq_han_x.py | 4 +- .../mouse_brain_2019_10x_hove_001.py | 39 ++++++++++++------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 487df65d8..379e5ab40 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -253,8 +253,8 @@ def __init__( self.obs_key_sample = "sample" self.organ = sample_organ_dict[self.sample_id] - self.id = f"human_{self.organ}_2020_microwell_han_{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}" \ - f"_10.1038/s41586-020-2157-4" + self.id = f"human_{''.join(self.organ.split(' '))}_2020_microwell_han_" \ + f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_10.1038/s41586-020-2157-4" self.author = "Guo" self.doi = "10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py index 3265b971b..d932be0f3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py @@ -5,20 +5,38 @@ import zipfile import scipy.io from typing import Union -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingOneFile +SAMPLE_IDS = [ + "Choroid plexus", + "Dura mater", + "Enr. SDM", + "Whole brain", +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingOneFile): def __init__( self, + sample_id: str, data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_brain_2019_10x_hove_001_10.1038/s41593-019-0393-4" + super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + sample_organ_dict = { + "Choroid plexus": "choroid plexus", + "Dura mater": "dura mater", + "Enr. SDM": "brain meninx", + "Whole brain": "brain", + } + self.obs_key_sample = "sample" + self.organ = sample_organ_dict[self.sample_id] + + self.id = f"mouse_{''.join(self.organ.split(' '))}_2019_10x_hove_" \ + f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_10.1038/s41593-019-0393-4" self.download_url_data = \ "https://www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" @@ -35,7 +53,7 @@ def __init__( self.year = 2019 self.var_ensembl_col = "ensembl" - self.var_symbol_col = "names" + self.var_symbol_col = "name" self.obs_key_cellontology_original = "cluster" self.obs_key_organ = "sample_anatomy" @@ -48,7 +66,7 @@ def __init__( }, } - def _load(self): + def _load_full(self): fn = [ os.path.join(self.data_dir, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), os.path.join(self.data_dir, "annot_fullAggr.csv") @@ -77,15 +95,6 @@ def _load(self): self.adata = self.adata[idx_map, :] obs_names = obs_names[idx_map] - # Map anatomic locations to UBERON: - map_anatomy = { - "Choroid plexus": "choroid plexu", - "Dura mater": "dura mater", - 'Enr. SDM': "brain meninx", - "Whole brain": "brain", - } - obs["sample_anatomy"] = [map_anatomy[x] for x in obs["sample"].values] - # Assign attributes self.adata.obs_names = obs_names self.adata.var = var From 48d080167f988d6961bfd55d5c334bfb1368ceb7 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Fri, 12 Feb 2021 18:15:39 +0100 Subject: [PATCH 052/161] fixed _subset_from_group --- sfaira/data/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 9d682575f..b9d6853d3 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1610,7 +1610,7 @@ def _subset_from_group( :return: """ assert self.adata is not None, "this method should only be called if .adata is not None" - for k, v in subset_items: + for k, v in subset_items.items(): self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] self._unprocessed_full_group_object = False From 3b841b1781478f2437be7bb7f4171280733c4deb Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Fri, 12 Feb 2021 18:26:39 +0100 Subject: [PATCH 053/161] renaming changes --- ...nad_2018_10x_guo_001.py => human_testis_2018_10x_guo_001.py} | 0 .../human_lung_2020_10x_travaglini_001.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/{human_malegonad_2018_10x_guo_001.py => human_testis_2018_10x_guo_001.py} (100%) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_malegonad_2018_10x_guo_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index f397d69d1..da7dccdd4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -52,7 +52,7 @@ def __init__( self.download_url_data = f"{synapse_id[self.sample_fn]},{self.sample_fn}" self.download_url_meta = None - self.author = "Krasnow" + self.author = "Travaglini" self.doi = "10.1038/s41586-020-2922-4" self.healthy = True self.normalization = "raw" From b96bfd394bad498b73edd676a9274e4452a939ce Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Fri, 12 Feb 2021 18:43:27 +0100 Subject: [PATCH 054/161] updated protocol --- ...xSn_lake_001.py => human_kidney_2019_droncseq_lake_001.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/{human_kidney_2019_10xSn_lake_001.py => human_kidney_2019_droncseq_lake_001.py} (97%) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py similarity index 97% rename from sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index d008fa602..072aec106 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_10xSn_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -16,7 +16,7 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2019_10xSn_lake_001_10.1038/s41467-019-10861-2" + self.id = "human_kidney_2019_droncseq_lake_001_10.1038/s41467-019-10861-2" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" @@ -29,7 +29,7 @@ def __init__( self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.protocol = "10X sequencing" + self.protocol = "DroNc-seq" self.state_exact = "healthy" self.year = 2019 From 798f307821838396aa56542b82a83a87acc73271 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Fri, 12 Feb 2021 19:27:25 +0100 Subject: [PATCH 055/161] renamed HCL IDs --- .../d10_1016_j_cell_2018_02_001/base.py | 58 --- .../mouse_bladder_2018_microwell_han_001.py | 41 --- .../mouse_blood_2018_microwell_han_001.py | 46 --- .../mouse_blood_2018_microwell_han_002.py | 46 --- .../mouse_blood_2018_microwell_han_003.py | 46 --- .../mouse_blood_2018_microwell_han_004.py | 46 --- .../mouse_blood_2018_microwell_han_005.py | 46 --- .../mouse_bone_2018_microwell_001.py | 42 --- .../mouse_brain_2018_microwell_han_001.py | 39 -- .../mouse_brain_2018_microwell_han_002.py | 39 -- ...ouse_femalegonad_2018_microwell_han_001.py | 39 -- ...ouse_femalegonad_2018_microwell_han_002.py | 39 -- .../mouse_ileum_2018_microwell_han_001.py | 48 --- .../mouse_ileum_2018_microwell_han_002.py | 48 --- .../mouse_ileum_2018_microwell_han_003.py | 48 --- .../mouse_kidney_2018_microwell_han_001.py | 27 -- .../mouse_kidney_2018_microwell_han_002.py | 58 --- .../mouse_liver_2018_microwell_han_001.py | 45 --- .../mouse_liver_2018_microwell_han_002.py | 39 -- .../mouse_lung_2018_microwell_han_001.py | 57 --- .../mouse_lung_2018_microwell_han_002.py | 57 --- .../mouse_lung_2018_microwell_han_003.py | 57 --- .../mouse_malegonad_2018_microwell_han_001.py | 44 --- .../mouse_malegonad_2018_microwell_han_002.py | 44 --- ...use_mammarygland_2018_microwell_han_001.py | 43 --- ...use_mammarygland_2018_microwell_han_002.py | 43 --- ...use_mammarygland_2018_microwell_han_003.py | 43 --- ...use_mammarygland_2018_microwell_han_004.py | 43 --- .../mouse_muscle_2018_microwell_han_001.py | 42 --- .../mouse_pancreas_2018_microwell_han_001.py | 47 --- .../mouse_placenta_2018_microwell_han_001.py | 53 --- .../mouse_placenta_2018_microwell_han_002.py | 53 --- .../mouse_prostate_2018_microwell_han_001.py | 31 -- .../mouse_prostate_2018_microwell_han_002.py | 31 -- .../mouse_rib_2018_microwell_han_001.py | 50 --- .../mouse_rib_2018_microwell_han_002.py | 50 --- .../mouse_rib_2018_microwell_han_003.py | 50 --- .../mouse_spleen_2018_microwell_han_001.py | 36 -- .../mouse_stomach_2018_microwell_han_001.py | 42 --- .../mouse_thymus_2018_microwell_han_001.py | 33 -- .../mouse_uterus_2018_microwell_han_001.py | 44 --- .../mouse_uterus_2018_microwell_han_002.py | 44 --- .../mouse_x_2018_microwellseq_han_x.py | 346 ++++++++++++++++++ .../human_x_2020_microwellseq_han_x.py | 2 +- 44 files changed, 347 insertions(+), 1878 deletions(-) delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py deleted file mode 100644 index ef0c0b7c5..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/base.py +++ /dev/null @@ -1,58 +0,0 @@ -import anndata -import numpy as np -import pandas -from typing import Union -from sfaira.data import DatasetBase -import zipfile -import tarfile -import os - - -class Dataset_d10_1016_j_cell_2018_02_001(DatasetBase): - """ - This is a dataloader template for mca data. - """ - - def __init__( - self, - data_path: Union[str, None], - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - self.download_url_data = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" - self.download_url_meta = None - - self.obs_key_cellontology_class = "Annotation" - self.obs_key_cellontology_original = "Annotation" - - self.author = "Guo" - self.doi = "10.1016/j.cell.2018.02.001" - self.normalization = "raw" - self.healthy = True - self.organism = "mouse" - self.protocol = "microwell-seq" - self.state_exact = "healthy" - self.year = 2018 - - self.var_symbol_col = "index" - - def _load_generalized(self, samplename): - fn = os.path.join(self.data_dir, '5435866.zip') - - with zipfile.ZipFile(fn) as archive: - celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) - celltypes = celltypes.drop(["Unnamed: 0"], axis=1) - - with tarfile.open(fileobj=archive.open('MCA_500more_dge.tar.gz')) as tar: - data = pandas.read_csv(tar.extractfile(f'500more_dge/{samplename}.txt.gz'), - compression="gzip", - sep=" ", - header=0 - ) - - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py deleted file mode 100644 index 3b3ab7330..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bladder_2018_microwell_han_001.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "urinary bladder" - - self.class_maps = { - "0": { - "Endothelial cell_Ly6c1 high(Bladder)": "endothelial cell", - "Vascular endothelial cell(Bladder)": "endothelial cell", - "Urothelium(Bladder)": "bladder urothelial cell", - "Dendritic cell_Cd74 high(Bladder)": "dendritic cell", - "Dendritic cell_Lyz2 high(Bladder)": "dendritic cell", - "Macrophage_Pf4 high(Bladder)": "macrophage", - "NK cell(Bladder)": "NK cell", - "Basal epithelial cell(Bladder)": "basal epithelial cell", - "Epithelial cell_Upk3a high(Bladder)": "epithelial cell", - "Epithelial cell_Gm23935 high(Bladder)": "epithelial cell", - "Mesenchymal stromal cell(Bladder)": "mesenchymal stromal cell", - "Stromal cell_Dpt high(Bladder)": "stromal cell", - "Stromal cell_Car3 high(Bladder)": "stromal cell", - "Smooth muscle cell(Bladder)": "smooth muscle cell", - "Vascular smooth muscle progenitor cell(Bladder)": "smooth muscle cell", - "Umbrella cell(Bladder)": "umbrella cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Bladder_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py deleted file mode 100644 index 5b454a528..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_001.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "blood" - - self.class_maps = { - "0": { - "B cell_Igha high(Peripheral_Blood)": "B cell", - "B cell_Ly6d high(Peripheral_Blood)": "B cell", - "B cell_Rps27rt high(Peripheral_Blood)": "B cell", - "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", - "Basophil_Prss34 high(Peripheral_Blood)": "basophil", - "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", - "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", - "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", - "Macrophage_Ace high(Peripheral_Blood)": "macrophage", - "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", - "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", - "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", - "Monocyte_Elane high(Peripheral_Blood)": "monocyte", - "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", - "NK cell_Gzma high(Peripheral_Blood)": "NK cell", - "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", - "T cell_Gm14303 high(Peripheral_Blood)": "T cell", - "T cell_Trbc2 high(Peripheral_Blood)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="PeripheralBlood1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py deleted file mode 100644 index 210f6484c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_002.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "blood" - - self.class_maps = { - "0": { - "B cell_Igha high(Peripheral_Blood)": "B cell", - "B cell_Ly6d high(Peripheral_Blood)": "B cell", - "B cell_Rps27rt high(Peripheral_Blood)": "B cell", - "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", - "Basophil_Prss34 high(Peripheral_Blood)": "basophil", - "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", - "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", - "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", - "Macrophage_Ace high(Peripheral_Blood)": "macrophage", - "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", - "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", - "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", - "Monocyte_Elane high(Peripheral_Blood)": "monocyte", - "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", - "NK cell_Gzma high(Peripheral_Blood)": "NK cell", - "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", - "T cell_Gm14303 high(Peripheral_Blood)": "T cell", - "T cell_Trbc2 high(Peripheral_Blood)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="PeripheralBlood2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py deleted file mode 100644 index 6ee887fd7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_003.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.organ = "blood" - - self.class_maps = { - "0": { - "B cell_Igha high(Peripheral_Blood)": "B cell", - "B cell_Ly6d high(Peripheral_Blood)": "B cell", - "B cell_Rps27rt high(Peripheral_Blood)": "B cell", - "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", - "Basophil_Prss34 high(Peripheral_Blood)": "basophil", - "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", - "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", - "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", - "Macrophage_Ace high(Peripheral_Blood)": "macrophage", - "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", - "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", - "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", - "Monocyte_Elane high(Peripheral_Blood)": "monocyte", - "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", - "NK cell_Gzma high(Peripheral_Blood)": "NK cell", - "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", - "T cell_Gm14303 high(Peripheral_Blood)": "T cell", - "T cell_Trbc2 high(Peripheral_Blood)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="PeripheralBlood3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py deleted file mode 100644 index 554a13555..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_004.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.organ = "blood" - - self.class_maps = { - "0": { - "B cell_Igha high(Peripheral_Blood)": "B cell", - "B cell_Ly6d high(Peripheral_Blood)": "B cell", - "B cell_Rps27rt high(Peripheral_Blood)": "B cell", - "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", - "Basophil_Prss34 high(Peripheral_Blood)": "basophil", - "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", - "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", - "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", - "Macrophage_Ace high(Peripheral_Blood)": "macrophage", - "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", - "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", - "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", - "Monocyte_Elane high(Peripheral_Blood)": "monocyte", - "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", - "NK cell_Gzma high(Peripheral_Blood)": "NK cell", - "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", - "T cell_Gm14303 high(Peripheral_Blood)": "T cell", - "T cell_Trbc2 high(Peripheral_Blood)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="PeripheralBlood4_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py deleted file mode 100644 index a1be6338c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_blood_2018_microwell_han_005.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001" - self.organ = "blood" - - self.class_maps = { - "0": { - "B cell_Igha high(Peripheral_Blood)": "B cell", - "B cell_Ly6d high(Peripheral_Blood)": "B cell", - "B cell_Rps27rt high(Peripheral_Blood)": "B cell", - "B cell_Vpreb3 high(Peripheral_Blood)": "B cell", - "Basophil_Prss34 high(Peripheral_Blood)": "basophil", - "Dendritic cell_Siglech high(Peripheral_Blood)": "dendritic cell", - "Erythroblast_Car2 high(Peripheral_Blood)": "erythroblast", - "Erythroblast_Hba-a2 high(Peripheral_Blood)": "erythroblast", - "Macrophage_Ace high(Peripheral_Blood)": "macrophage", - "Macrophage_Flt-ps1 high(Peripheral_Blood)": "macrophage", - "Macrophage_Pf4 high(Peripheral_Blood)": "macrophage", - "Macrophage_S100a4 high(Peripheral_Blood)": "macrophage", - "Monocyte_Elane high(Peripheral_Blood)": "monocyte", - "Monocyte_F13a1 high(Peripheral_Blood)": "monocyte", - "NK cell_Gzma high(Peripheral_Blood)": "NK cell", - "Neutrophil_Camp high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Il1b high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Ltf high(Peripheral_Blood)": "neutrophil", - "Neutrophil_Retnlg high(Peripheral_Blood)": "neutrophil", - "T cell_Gm14303 high(Peripheral_Blood)": "T cell", - "T cell_Trbc2 high(Peripheral_Blood)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="PeripheralBlood5_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py deleted file mode 100644 index 0445e8a16..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_bone_2018_microwell_001.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_bone_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "bone tissue" - - self.class_maps = { - "0": { - "B cell_Igkc high(Bone-Marrow)": "naive B cell", - "Dendritic cell_H2-Eb1 high(Bone-Marrow)": "dendritic cell", - "Dendritic cell_Siglech high(Bone-Marrow)": "dendritic cell", - "Macrophage_Ms4a6c high(Bone-Marrow)": "macrophage", - "Macrophage_S100a4 high(Bone-Marrow)": "macrophage", - "Erythroblast(Bone-Marrow)": "erythroid progenitor", - "Mast cell(Bone-Marrow)": "mast cell", - "Monocyte_Mif high(Bone-Marrow)": "monocyte", - "Monocyte_Prtn3 high(Bone-Marrow)": "monocyte", - "Neutrophil progenitor(Bone-Marrow)": "neutrophil progenitor", - "Neutrophil_Cebpe high(Bone-Marrow)": "neutrophil", - "Neutrophil_Fcnb high(Bone-Marrow)": "neutrophil", - "Neutrophil_Mmp8 high(Bone-Marrow)": "neutrophil", - "Neutrophil_Ngp high(Bone-Marrow)": "neutrophil", - "Hematopoietic stem progenitor cell(Bone-Marrow)": "hematopoietic precursor cell", - "Pre-pro B cell(Bone-Marrow)": "early pro-B cell", - "T cell_Ms4a4b high(Bone-Marrow)": "CD4-positive, alpha-beta T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="BoneMarrow1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py deleted file mode 100644 index 08ef1acde..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_001.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "brain" - - self.class_maps = { - "0": { - "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", - "Astrocyte_Atp1b2 high(Brain)": "astrocyte", - "Astrocyte_Mfe8 high(Brain)": "astrocyte", - "Astrocyte_Pla2g7 high(Brain)": "astrocyte", - "Granulocyte_Ngp high(Brain)": "granulocyte", - "Hypothalamic ependymal cell(Brain)": "ependymal cell", - "Macrophage_Klf2 high(Brain)": "macrophage", - "Macrophage_Lyz2 high(Brain)": "macrophage", - "Microglia(Brain)": "microglial cell", - "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", - "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", - "Neuron(Brain)": "neuron", - "Pan-GABAergic(Brain)": "GABAergic cell", - "Schwann cell(Brain)": "schwann cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Brain1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py deleted file mode 100644 index a2536bc13..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_brain_2018_microwell_han_002.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "brain" - - self.class_maps = { - "0": { - "Astroglial cell(Bergman glia)(Brain)": "Bergmann glial cell", - "Astrocyte_Atp1b2 high(Brain)": "astrocyte", - "Astrocyte_Mfe8 high(Brain)": "astrocyte", - "Astrocyte_Pla2g7 high(Brain)": "astrocyte", - "Granulocyte_Ngp high(Brain)": "granulocyte", - "Hypothalamic ependymal cell(Brain)": "ependymal cell", - "Macrophage_Klf2 high(Brain)": "macrophage", - "Macrophage_Lyz2 high(Brain)": "macrophage", - "Microglia(Brain)": "microglial cell", - "Myelinating oligodendrocyte(Brain)": "oligodendrocyte", - "Oligodendrocyte precursor cell(Brain)": "oligodendrocyte precursor cell", - "Neuron(Brain)": "neuron", - "Pan-GABAergic(Brain)": "GABAergic cell", - "Schwann cell(Brain)": "schwann cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Brain2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py deleted file mode 100644 index 84774208a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_ovary_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "ovary" - - self.class_maps = { - "0": { - "Cumulus cell_Car14 high(Ovary)": "cumulus cell", - "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", - "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", - "Granulosa cell_Inhba high(Ovary)": "granulosa cell", - "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", - "Large luteal cell(Ovary)": "large luteal cell", - "Macrophage_Lyz2 high(Ovary)": "macrophage", - "Marcrophage_Cd74 high(Ovary)": "macrophage", - "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", - "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", - "Small luteal cell(Ovary)": "small luteal cell", - "Stroma cell (Ovary)": "stromal cell", - "Thecal cell(Ovary)": "thecal cell", - "luteal cells(Ovary)": "luteal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Ovary1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py deleted file mode 100644 index 5e9742d70..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_femalegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_ovary_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "ovary" - - self.class_maps = { - "0": { - "Cumulus cell_Car14 high(Ovary)": "cumulus cell", - "Cumulus cell_Nupr1 high(Ovary)": "cumulus cell", - "Cumulus cell_Ube2c high(Ovary)": "cumulus cell", - "Granulosa cell_Inhba high(Ovary)": "granulosa cell", - "Granulosa cell_Kctd14 high(Ovary)": "granulosa cell", - "Large luteal cell(Ovary)": "large luteal cell", - "Macrophage_Lyz2 high(Ovary)": "macrophage", - "Marcrophage_Cd74 high(Ovary)": "macrophage", - "Ovarian surface epithelium cell(Ovary)": "epithelial cell of ovarian surface", - "Ovarian vascular surface endothelium cell(Ovary)": "endothelial cell of ovarian surface", - "Small luteal cell(Ovary)": "small luteal cell", - "Stroma cell (Ovary)": "stromal cell", - "Thecal cell(Ovary)": "thecal cell", - "luteal cells(Ovary)": "luteal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Ovary2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py deleted file mode 100644 index 57d5f7d18..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_001.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "ileum" - - self.class_maps = { - "0": { - "B cell_Ighd high(Small-Intestine)": "B cell", - "B cell_Igkv12-46 high(Small-Intestine)": "B cell", - "B cell_Jchain high(Small-Intestine)": "B cell", - "B cell_Ms4a1 high(Small-Intestine)": "B cell", - "Columnar epithelium(Small-Intestine)": "epithelial cell", - "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", - "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", - "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", - "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", - "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", - "Macrophage_Apoe high(Small-Intestine)": "macrophage", - "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", - "Paneth cell(Small-Intestine)": "paneth cell", - "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", - "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", - "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", - "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", - "T cell_Ccl5 high(Small-Intestine)": "T cell", - "T cell_Icos high(Small-Intestine)": "T cell", - "T cell_Cd7 high(Small-Intestine)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="SmallIntestine1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py deleted file mode 100644 index e09ac957f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_002.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "ileum" - - self.class_maps = { - "0": { - "B cell_Ighd high(Small-Intestine)": "B cell", - "B cell_Igkv12-46 high(Small-Intestine)": "B cell", - "B cell_Jchain high(Small-Intestine)": "B cell", - "B cell_Ms4a1 high(Small-Intestine)": "B cell", - "Columnar epithelium(Small-Intestine)": "epithelial cell", - "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", - "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", - "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", - "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", - "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", - "Macrophage_Apoe high(Small-Intestine)": "macrophage", - "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", - "Paneth cell(Small-Intestine)": "paneth cell", - "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", - "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", - "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", - "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", - "T cell_Ccl5 high(Small-Intestine)": "T cell", - "T cell_Icos high(Small-Intestine)": "T cell", - "T cell_Cd7 high(Small-Intestine)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="SmallIntestine2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py deleted file mode 100644 index a8f724263..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_ileum_2018_microwell_han_003.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.organ = "ileum" - - self.class_maps = { - "0": { - "B cell_Ighd high(Small-Intestine)": "B cell", - "B cell_Igkv12-46 high(Small-Intestine)": "B cell", - "B cell_Jchain high(Small-Intestine)": "B cell", - "B cell_Ms4a1 high(Small-Intestine)": "B cell", - "Columnar epithelium(Small-Intestine)": "epithelial cell", - "Dendritic cell_Siglech high(Small-Intestine)": "dendritic cell", - "Dendrtic cell_Cst3 high(Small-Intestine)": "dendritic cell", - "Epithelial cell_Kcne3 high(Small-Intestine)": "epithelial cell", - "Epithelial cell_Sh2d6 high(Small-Intestine)": "epithelial cell", - "Epithelium of small intestinal villi_Fabp1 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Fabp6 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_Gm23935 high(Small-Intestine)": "epithelial cell villi", - "Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine)": "epithelial cell villi", - "Macrophage_Apoe high(Small-Intestine)": "macrophage", - "Macrophage_Cxcl2 high(Small-Intestine)": "macrophage", - "Paneth cell(Small-Intestine)": "paneth cell", - "S cell_Chgb high(Small-Intestine)": "enteroendocrine cell", - "S cell_Gip high(Small-Intestine)": "enteroendocrine cell", - "Stromal cell_Adamdec1 high(Small-Intestine)": "stromal cell", - "Stromal cell_Dcn high(Small-Intestine)": "stromal cell", - "T cell_Ccl5 high(Small-Intestine)": "T cell", - "T cell_Icos high(Small-Intestine)": "T cell", - "T cell_Cd7 high(Small-Intestine)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="SmallIntestine3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py deleted file mode 100644 index 90890161e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_001.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "kidney" - - self.class_maps = { - "0": { - "Cell in cell cycle(Fetal_Kidney)": "fetal proliferative cell", - "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Kidney1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py deleted file mode 100644 index c65a14f74..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_kidney_2018_microwell_han_002.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "kidney" - - self.class_maps = { - "0": { - "Adipocyte(Fetal_Kidney)": "fetal adipocyte", - "B cell(Kidney)": "B cell", - "Dendritic cell_Ccr7 high(Kidney)": "dendritic cell", - "Dendritic cell_Cst3 high(Kidney)": "dendritic cell", - "Distal collecting duct principal cell_Cldn4 high(Kidney)": "kidney collecting duct principal cell", - "Distal collecting duct principal cell_Hsd11b2 high(Kidney)": "kidney collecting duct principal cell", - "Distal convoluted tubule_Pvalb high(Kidney)": "kidney distal convoluted tubule epithelial cell", - "Distal convoluted tubule_S100g high(Kidney)": "kidney distal convoluted tubule epithelial cell", - "Endothelial cell(Kidney)": "fenestrated cell", - "Epithelial cell_Cryab high(Kidney)": "epithelial cell", - "Fenestrated endothelial cell_Plvap high(Kidney)": "fenestrated cell", - "Fenestrated endothelial cell_Tm4sf1 high(Kidney)": "fenestrated cell", - "Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney)": "glomerular epithelial cell", - "Intercalated cells of collecting duct_Aqp6 high(Kidney)": "kidney collecting duct epithelial cell", - "Intercalated cells of collecting duct_Slc26a4 high(Kidney)": "kidney collecting duct epithelial cell", - "Macrophage_Ccl4 high (Kidney)": "macrophage", - "Macrophage_Lyz2 high(Kidney)": "macrophage", - "Metanephric mesenchyme(Fetal_Kidney)": "fetal mesenchymal cell", - "Neutrophil progenitor_S100a8 high(Kidney)": "neutrophil progenitor", - "Proximal tubule brush border cell(Kidney)": "brush cell", - "Proximal tubule cell_Cyp4a14 high(Kidney)": "epithelial cell of proximal tubule", - "Proximal tubule cell_Osgin1 high(Kidney)": "epithelial cell of proximal tubule", - "S1 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", - "S3 proximal tubule cells(Kidney)": "epithelial cell of proximal tubule", - "Stromal cell_Ankrd1 high(Kidney)": "fibroblast", - "Stromal cell_Cxcl10 high(Kidney)": "fibroblast", - "Stromal cell_Dcn high(Kidney)": "fibroblast", - "Stromal cell_Mgp high(Fetal_Kidney)": "fibroblast", - "Stromal cell_Mgp high(Kidney)": "fibroblast", - "Stromal cell_Ptgds high(Kidney)": "fibroblast", - "T cell(Kidney)": "T cell", - "Thick ascending limb of the loop of Henle(Kidney)": "kidney loop of Henle ascending limb epithelial cell", - "Ureteric epithelium(Kidney)": "ureteric epithelial cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Kidney2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py deleted file mode 100644 index bb995046e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_001.py +++ /dev/null @@ -1,45 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "liver" - - self.class_maps = { - "0": { - "B cell_Fcmr high(Liver)": "B cell", - "B cell_Jchain high(Liver)": "B cell", - "Dendritic cell_Cst3 high(Liver)": "dendritic cell", - "Dendritic cell_Siglech high(Liver)": "dendritic cell", - "Endothelial cell(Liver)": "endothelial cell of hepatic sinusoid", - "Epithelial cell(Liver)": "duct epithelial cell", - "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", - "Erythroblast_Hbb-bs high(Liver)": "erythroblast", - "Erythroblast_Hbb-bt high(Liver)": "erythroblast", - "Granulocyte(Liver)": "granulocyte", - "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", - "Hepatocyte_mt-Nd4 high(Liver)": "hepatocyte", - "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", - "Periportal (PP) hepatocyte(Liver)": "hepatocyte", - "Kuppfer cell(Liver)": "Kupffer cell", - "Macrophage_Chil3 high(Liver)": "macrophage", - "Neutrophil_Ngp high(Liver)": "neutrophil", - "Stromal cell(Liver)": "stromal cell", - "T cell_Gzma high(Liver)": "T cell", - "T cell_Trbc2 high(Liver)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Liver1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py deleted file mode 100644 index c70c2e993..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_liver_2018_microwell_han_002.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "liver" - - self.class_maps = { - "0": { - "B cell_Jchain high(Liver)": "B cell", - "Dendritic cell_Cst3 high(Liver)": "dendritic cell", - "Dendritic cell_Siglech high(Liver)": "dendritic cell", - "Epithelial cell(Liver)": "duct epithelial cell", - "Epithelia cell_Spp1 high(Liver)": "duct epithelial cell", - "Erythroblast_Hbb-bs high(Liver)": "erythroblast", - "Hepatocyte_Fabp1 high(Liver)": "hepatocyte", - "Pericentral (PC) hepatocytes(Liver)": "hepatocyte", - "Periportal (PP) hepatocyte(Liver)": "hepatocyte", - "Kuppfer cell(Liver)": "Kupffer cell", - "Macrophage_Chil3 high(Liver)": "macrophage", - "Stromal cell(Liver)": "stromal cell", - "T cell_Gzma high(Liver)": "T cell", - "T cell_Trbc2 high(Liver)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Liver2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py deleted file mode 100644 index b98fe2101..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_001.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "lung" - - self.class_maps = { - "0": { - "AT1 Cell(Lung)": "alveolar epithelial cell type I", - "AT2 Cell(Lung)": "alveolar epithelial cell type II", - "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", - "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", - "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", - "B Cell(Lung)": "B cell", - "Basophil(Lung)": "basophil", - "Ciliated cell(Lung)": "ciliated cell", - "Clara Cell(Lung)": "clara cell", - "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", - "Dendritic cell_Naaa high(Lung)": "dendritic cell", - "Dividing T cells(Lung)": "T cell", - "Dividing cells(Lung)": "unknown", - "Dividing dendritic cells(Lung)": "dendritic cell", - "Endothelial cell_Kdr high(Lung)": "endothelial cell", - "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", - "Endothelial cells_Vwf high(Lung)": "endothelial cell", - "Eosinophil granulocyte(Lung)": "eosinophil", - "Ig−producing B cell(Lung)": "B cell", - "Interstitial macrophage(Lung)": "lung macrophage", - "Monocyte progenitor cell(Lung)": "monocyte progenitor", - "NK Cell(Lung)": "NK cell", - "Neutrophil granulocyte(Lung)": "neutrophil", - "Nuocyte(Lung)": "nuocyte", - "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", - "Stromal cell_Acta2 high(Lung)": "stromal cell", - "Stromal cell_Dcn high(Lung)": "stromal cell", - "Stromal cell_Inmt high(Lung)": "stromal cell", - "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Lung1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py deleted file mode 100644 index 99f979473..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_002.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "lung" - - self.class_maps = { - "0": { - "AT1 Cell(Lung)": "alveolar epithelial cell type I", - "AT2 Cell(Lung)": "alveolar epithelial cell type II", - "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", - "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", - "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", - "B Cell(Lung)": "B cell", - "Basophil(Lung)": "basophil", - "Ciliated cell(Lung)": "ciliated cell", - "Clara Cell(Lung)": "clara cell", - "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", - "Dendritic cell_Naaa high(Lung)": "dendritic cell", - "Dividing T cells(Lung)": "T cell", - "Dividing cells(Lung)": "unknown", - "Dividing dendritic cells(Lung)": "dendritic cell", - "Endothelial cell_Kdr high(Lung)": "endothelial cell", - "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", - "Endothelial cells_Vwf high(Lung)": "endothelial cell", - "Eosinophil granulocyte(Lung)": "eosinophil", - "Ig−producing B cell(Lung)": "B cell", - "Interstitial macrophage(Lung)": "lung macrophage", - "Monocyte progenitor cell(Lung)": "monocyte progenitor", - "NK Cell(Lung)": "NK cell", - "Neutrophil granulocyte(Lung)": "neutrophil", - "Nuocyte(Lung)": "nuocyte", - "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", - "Stromal cell_Acta2 high(Lung)": "stromal cell", - "Stromal cell_Dcn high(Lung)": "stromal cell", - "Stromal cell_Inmt high(Lung)": "stromal cell", - "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Lung2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py deleted file mode 100644 index 3d5161d4f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_lung_2018_microwell_han_003.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.organ = "lung" - - self.class_maps = { - "0": { - "AT1 Cell(Lung)": "alveolar epithelial cell type I", - "AT2 Cell(Lung)": "alveolar epithelial cell type II", - "Alveolar bipotent progenitor(Lung)": "alveolar bipotent progenitor", - "Alveolar macrophage_Ear2 high(Lung)": "alveolar macrophage", - "Alveolar macrophage_Pclaf high(Lung)": "alveolar macrophage", - "B Cell(Lung)": "B cell", - "Basophil(Lung)": "basophil", - "Ciliated cell(Lung)": "ciliated cell", - "Clara Cell(Lung)": "clara cell", - "Conventional dendritic cell_Gngt2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_H2-M2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Mgl2 high(Lung)": "dendritic cell", - "Conventional dendritic cell_Tubb5 high(Lung)": "dendritic cell", - "Dendritic cell_Naaa high(Lung)": "dendritic cell", - "Dividing T cells(Lung)": "T cell", - "Dividing cells(Lung)": "unknown", - "Dividing dendritic cells(Lung)": "dendritic cell", - "Endothelial cell_Kdr high(Lung)": "endothelial cell", - "Endothelial cell_Tmem100 high(Lung)": "endothelial cell", - "Endothelial cells_Vwf high(Lung)": "endothelial cell", - "Eosinophil granulocyte(Lung)": "eosinophil", - "Ig−producing B cell(Lung)": "B cell", - "Interstitial macrophage(Lung)": "lung macrophage", - "Monocyte progenitor cell(Lung)": "monocyte progenitor", - "NK Cell(Lung)": "NK cell", - "Neutrophil granulocyte(Lung)": "neutrophil", - "Nuocyte(Lung)": "nuocyte", - "Plasmacytoid dendritic cell(Lung)": "plasmacytoid dendritic cell", - "Stromal cell_Acta2 high(Lung)": "stromal cell", - "Stromal cell_Dcn high(Lung)": "stromal cell", - "Stromal cell_Inmt high(Lung)": "stromal cell", - "T Cell_Cd8b1 high(Lung)": "CD8-positive, alpha-beta T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Lung3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py deleted file mode 100644 index 82ef7068f..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_001.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_testis_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "testis" - - self.class_maps = { - "0": { - "Elongating spermatid(Testis)": "elongating spermatid", - "Erythroblast_Hbb-bs high(Testis)": "erythroblast", - "Leydig cell(Testis)": "leydig cell", - "Macrophage_Lyz2 high(Testis)": "macrophage", - "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", - "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", - "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", - "Sertoli cell(Testis)": "sertoli cell", - "Spermatids_1700016P04Rik high(Testis)": "spermatid", - "Spermatids_Cst13 high(Testis)": "spermatid", - "Spermatids_Hmgb4 high(Testis)": "spermatid", - "Spermatids_Tnp1 high(Testis)": "spermatid", - "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", - "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", - "Spermatocyte_Calm2 high(Testis)": "spermatocyte", - "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", - "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", - "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", - "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" - }, - } - - def _load(self): - self._load_generalized(samplename="Testis1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py deleted file mode 100644 index 71e82424b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_malegonad_2018_microwell_han_002.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_testis_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "testis" - - self.class_maps = { - "0": { - "Elongating spermatid(Testis)": "elongating spermatid", - "Erythroblast_Hbb-bs high(Testis)": "erythroblast", - "Leydig cell(Testis)": "leydig cell", - "Macrophage_Lyz2 high(Testis)": "macrophage", - "Pre-Sertoli cell_Cst9 high(Testis)": "pre-sertoli cell", - "Pre-Sertoli cell_Ctsl high(Testis)": "pre-sertoli cell", - "Preleptotene spermatogonia(Testis)": "preleptotene spermatogonia", - "Sertoli cell(Testis)": "sertoli cell", - "Spermatids_1700016P04Rik high(Testis)": "spermatid", - "Spermatids_Cst13 high(Testis)": "spermatid", - "Spermatids_Hmgb4 high(Testis)": "spermatid", - "Spermatids_Tnp1 high(Testis)": "spermatid", - "Spermatocyte_1700001F09Rik high(Testis)": "spermatocyte", - "Spermatocyte_Cabs1 high(Testis)": "spermatocyte", - "Spermatocyte_Calm2 high(Testis)": "spermatocyte", - "Spermatocyte_Mesp1 high(Testis)": "spermatocyte", - "Spermatocyte_Slc2a3 high(Testis)": "spermatocyte", - "Spermatogonia_1700001P01Rik high(Testis)": "spermatogonia", - "Spermatogonia_Tbc1d23 high(Testis)": "spermatogonia" - }, - } - - def _load(self): - self._load_generalized(samplename="Testis2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py deleted file mode 100644 index ba02424bb..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_001.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "mammary gland" - - self.class_maps = { - "0": { - "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", - "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", - "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", - "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", - "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", - "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", - "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", - "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", - "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", - "NK cell(Mammary-Gland-Virgin)": "NK cell", - "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", - "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", - "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", - "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", - "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", - "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="MammaryGland.Virgin1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py deleted file mode 100644 index e500e6c76..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_002.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "mammary gland" - - self.class_maps = { - "0": { - "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", - "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", - "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", - "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", - "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", - "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", - "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", - "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", - "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", - "NK cell(Mammary-Gland-Virgin)": "NK cell", - "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", - "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", - "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", - "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", - "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", - "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="MammaryGland.Virgin2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py deleted file mode 100644 index 51afe8cf4..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_003.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.organ = "mammary gland" - - self.class_maps = { - "0": { - "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", - "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", - "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", - "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", - "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", - "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", - "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", - "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", - "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", - "NK cell(Mammary-Gland-Virgin)": "NK cell", - "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", - "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", - "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", - "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", - "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", - "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="MammaryGland.Virgin3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py deleted file mode 100644 index a47d16a19..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_mammarygland_2018_microwell_han_004.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001" - self.organ = "mammary gland" - - self.class_maps = { - "0": { - "B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin)": "B cell", - "B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin)": "B cell", - "B cell_Jchain high(Mammary-Gland-Virgin)": "B cell", - "Dendritic cell_Cst3 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Fscn1 high(Mammary-Gland-Virgin)": "dendritic cell", - "Dendritic cell_Siglech high(Mammary-Gland-Virgin)": "dendritic cell", - "Dividing cell(Mammary-Gland-Virgin)": "proliferative cell", - "Luminal cell_Krt19 high (Mammary-Gland-Virgin)": "luminal epithelial cell of mammary gland", - "Luminal progenitor(Mammary-Gland-Virgin)": "luminal progenitor cell", - "Macrophage_C1qc high(Mammary-Gland-Virgin)": "macrophage", - "Macrophage_Lyz1 high(Mammary-Gland-Virgin)": "macrophage", - "NK cell(Mammary-Gland-Virgin)": "NK cell", - "Stem and progenitor cell(Mammary-Gland-Virgin)": "stem and progenitor cell", - "Stromal cell_Col3a1 high(Mammary-Gland-Virgin)": "stromal cell", - "Stromal cell_Pi16 high(Mammary-Gland-Virgin)": "stromal cell", - "T cell_Cd8b1 high(Mammary-Gland-Virgin)": "T cell", - "T cell_Ly6c2 high(Mammary-Gland-Virgin)": "T cell", - "T-cells_Ctla4 high(Mammary-Gland-Virgin)": "T cell" - }, - } - - def _load(self): - self._load_generalized(samplename="MammaryGland.Virgin4_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py deleted file mode 100644 index f44375d58..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_muscle_2018_microwell_han_001.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "skeletal muscle organ" - - self.class_maps = { - "0": { - "B cell_Jchain high(Muscle)": "B cell", - "B cell_Vpreb3 high(Muscle)": "B cell", - "Dendritic cell(Muscle)": "dendritic cell", - "Endothelial cell(Muscle)": "endothelial cell", - "Erythroblast_Car1 high(Muscle)": "erythroblast", - "Erythroblast_Car2 high(Muscle)": "erythroblast", - "Granulocyte monocyte progenitor cell(Muscle)": "monocyte progenitor", - "Macrophage_Ms4a6c high(Muscle)": "macrophage", - "Macrophage_Retnla high(Muscle)": "macrophage", - "Muscle cell_Tnnc1 high(Muscle)": "muscle cell", - "Muscle cell_Tnnc2 high(Muscle)": "muscle cell", - "Muscle progenitor cell(Muscle)": "skeletal muscle satellite cell", - "Neutrophil_Camp high(Muscle)": "neutrophil", - "Neutrophil_Prg2 high(Muscle)": "neutrophil", - "Neutrophil_Retnlg high(Muscle)": "neutrophil", - "Stromal cell(Muscle)": "stromal cell", - "T cell(Muscle)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Muscle_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py deleted file mode 100644 index 454f555b2..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_pancreas_2018_microwell_han_001.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "pancreas" - - self.class_maps = { - "0": { - "Acinar cell(Pancreas)": "pancreatic acinar cell", - "Dendrtic cell(Pancreas)": "dendritic cell", - "Ductal cell(Pancreas)": "pancreatic ductal cell", - "Endocrine cell(Pancreas)": "endocrine cell", - "Dividing cell(Pancreas)": "endocrine cell", - "Endothelial cell_Fabp4 high(Pancreas)": "endothelial cell", - "Endothelial cell_Lrg1 high(Pancreas)": "endothelial cell", - "Endothelial cell_Tm4sf1 high(Pancreas)": "endothelial cell", - "Erythroblast_Hbb-bt high(Pancreas)": "erythroblast", - "Erythroblast_Igkc high(Pancreas)": "erythroblast", - "Granulocyte(Pancreas)": "granulocyte", - "Macrophage_Ly6c2 high(Pancreas)": "macrophage", - "Macrophage(Pancreas)": "macrophage", - "Glial cell(Pancreas)": "glial cell", - "Smooth muscle cell_Acta2 high(Pancreas)": "smooth muscle cell", - "Smooth muscle cell_Rgs5 high(Pancreas)": "smooth muscle cell", - "Stromal cell_Fn1 high(Pancreas)": "stromal cell", - "Stromal cell_Mfap4 high(Pancreas)": "stromal cell", - "Stromal cell_Smoc2 high(Pancreas)": "stromal cell", - "T cell(Pancreas)": "t cell", - "B cell(Pancreas)": "b cell", - "β-cell(Pancreas)": "pancreatic B cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Pancreas_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py deleted file mode 100644 index dbec73b0b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_001.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "placenta" - - self.class_maps = { - "0": { - "B cell(Placenta)": "B cell", - "Basophil(Placenta)": "basophil", - "Decidual stromal cell(Placenta)": "decidual stromal cell", - "Dendritic cell(Placenta)": "dendritic cell", - "Endodermal cell_Afp high(Placenta)": "endodermal cell", - "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", - "Erythroblast_Hbb-y high(Placenta)": "erythroblast", - "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", - "Granulocyte_Neat1 high(Placenta)": "granulocyte", - "Granulocyte_S100a9 high(Placenta)": "granulocyte", - "HSPC_Lmo2 high(Placenta)": "HSPC", - "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", - "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", - "Macrophage_Apoe high(Placenta)": "macrophage", - "Macrophage_Spp1 high(Placenta)": "macrophage", - "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", - "Monocyte(Placenta)": "monocyte", - "NK cell(Placenta)": "NK cell", - "NKT cell(Placenta)": "NKT cell", - "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", - "PE lineage cell_S100g high(Placenta)": "PE lineage cell", - "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", - "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", - "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", - "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", - "Stromal cell(Placenta)": "stromal cell", - "Stromal cell_Acta2 high(Placenta)": "stromal cell", - "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", - }, - } - - def _load(self): - self._load_generalized(samplename="PlacentaE14.1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py deleted file mode 100644 index 1e379530a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_placenta_2018_microwell_han_002.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "placenta" - - self.class_maps = { - "0": { - "B cell(Placenta)": "B cell", - "Basophil(Placenta)": "basophil", - "Decidual stromal cell(Placenta)": "decidual stromal cell", - "Dendritic cell(Placenta)": "dendritic cell", - "Endodermal cell_Afp high(Placenta)": "endodermal cell", - "Endothelial cell_Maged2 high(Placenta)": "endothelial cell", - "Erythroblast_Hbb-y high(Placenta)": "erythroblast", - "Granulocyte monocyte progenitors(Placenta)": "monocyte progenitor", - "Granulocyte_Neat1 high(Placenta)": "granulocyte", - "Granulocyte_S100a9 high(Placenta)": "granulocyte", - "HSPC_Lmo2 high(Placenta)": "HSPC", - "Invasive spongiotrophoblast(Placenta)": "invasive spongiotrophoblast", - "Labyrinthine trophoblast(Placenta)": "labyrinthine trophoblast", - "Macrophage_Apoe high(Placenta)": "macrophage", - "Macrophage_Spp1 high(Placenta)": "macrophage", - "Megakaryocyte progenitor cell(Placenta)": "megakaryocte", - "Monocyte(Placenta)": "monocyte", - "NK cell(Placenta)": "NK cell", - "NKT cell(Placenta)": "NKT cell", - "PE lineage cell_Gkn2 high(Placenta)": "PE lineage cell", - "PE lineage cell_S100g high(Placenta)": "PE lineage cell", - "Progenitor trophoblast_Gjb3 high(Placenta)": "trophoblast progenitor", - "Spiral artery trophoblast giant cells(Placenta)": "spiral artery trophoblast giant cells", - "Spongiotrophoblast_Hsd11b2 high(Placenta)": "spongiotrophoblast", - "Spongiotrophoblast_Phlda2 high(Placenta)": "spongiotrophoblast", - "Stromal cell(Placenta)": "stromal cell", - "Stromal cell_Acta2 high(Placenta)": "stromal cell", - "Trophoblast progenitor_Taf7l high(Placenta)": "trophoblast progenitor", - }, - } - - def _load(self): - self._load_generalized(samplename="PlacentaE14.2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py deleted file mode 100644 index 652f5dcdc..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_001.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "prostate" - - self.class_maps = { - "0": { - "Dendritic cell(Prostate)": "dendritic cell", - "Epithelial cell(Prostate)": "epithelial cell", - "Glandular epithelium(Prostate)": "glandular epithelial cell", - "Prostate gland cell(Prostate)": "glandular cell", - "Stromal cell(Prostate)": "stromal cell", - "T cell(Prostate)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Prostate1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py deleted file mode 100644 index ee81ccb81..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_prostate_2018_microwell_han_002.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "prostate" - - self.class_maps = { - "0": { - "Dendritic cell(Prostate)": "dendritic cell", - "Epithelial cell(Prostate)": "epithelial cell", - "Glandular epithelium(Prostate)": "glandular epithelial cell", - "Prostate gland cell(Prostate)": "glandular cell", - "Stromal cell(Prostate)": "stromal cell", - "T cell(Prostate)": "T cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Prostate2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py deleted file mode 100644 index 6fe195e19..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_001.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "rib" - - self.class_maps = { - "0": { - "B cell(Neonatal-Rib)": "B cell", - "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", - "Dividing cell(Neonatal-Rib)": "proliferative cell", - "Endothelial cell(Neonatal-Rib)": "endothelial cell", - "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", - "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", - "Granulocyte(Neonatal-Rib)": "granulocyte", - "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", - "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", - "Muscle cell(Neonatal-Rib)": "muscle cell", - "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", - "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", - "Neuron_Mpz high(Neonatal-Rib)": "neuron", - "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", - "Neutrophil(Neonatal-Rib)": "neutrophil", - "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", - "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", - "Osteoblast(Neonatal-Rib)": "osteoblast", - "Osteoclast(Neonatal-Rib)": "osteoclast", - "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", - "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="NeonatalRib1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py deleted file mode 100644 index 9f40e597a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_002.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "rib" - - self.class_maps = { - "0": { - "B cell(Neonatal-Rib)": "B cell", - "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", - "Dividing cell(Neonatal-Rib)": "proliferative cell", - "Endothelial cell(Neonatal-Rib)": "endothelial cell", - "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", - "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", - "Granulocyte(Neonatal-Rib)": "granulocyte", - "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", - "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", - "Muscle cell(Neonatal-Rib)": "muscle cell", - "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", - "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", - "Neuron_Mpz high(Neonatal-Rib)": "neuron", - "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", - "Neutrophil(Neonatal-Rib)": "neutrophil", - "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", - "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", - "Osteoblast(Neonatal-Rib)": "osteoblast", - "Osteoclast(Neonatal-Rib)": "osteoclast", - "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", - "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="NeonatalRib2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py deleted file mode 100644 index 4e2964633..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_rib_2018_microwell_han_003.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001" - self.organ = "rib" - - self.class_maps = { - "0": { - "B cell(Neonatal-Rib)": "B cell", - "Cartilage cell_Clu high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Col2a1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Cxcl14 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Ppa1 high(Neonatal-Rib)": "cartilage cell", - "Cartilage cell_Prg4 high(Neonatal-Rib)": "cartilage cell", - "Dividing cell(Neonatal-Rib)": "proliferative cell", - "Endothelial cell(Neonatal-Rib)": "endothelial cell", - "Erythroblast_Hba-a1 high(Neonatal-Rib)": "erythroblast", - "Erythroblast_Ttr high(Neonatal-Rib)": "erythroblast", - "Granulocyte(Neonatal-Rib)": "granulocyte", - "Macrophage_C1qc high(Neonatal-Rib)": "macrophage", - "Macrophage_Ctss high(Neonatal-Rib)": "macrophage", - "Muscle cell(Neonatal-Rib)": "muscle cell", - "Muscle cell_Acta2 high(Neonatal-Rib)": "muscle cell", - "Muscle cell_Actc1 high(Neonatal-Rib)": "muscle cell", - "Neuron_Mpz high(Neonatal-Rib)": "neuron", - "Neuron_Stmn2 high(Neonatal-Rib)": "neuron", - "Neutrophil(Neonatal-Rib)": "neutrophil", - "Neutrophil_Elane high(Neonatal-Rib)": "neutrophil", - "Oligodendrocyte(Neonatal-Rib)": "oligodendrocyte", - "Osteoblast(Neonatal-Rib)": "osteoblast", - "Osteoclast(Neonatal-Rib)": "osteoclast", - "Stromal cell_Acta1 high(Neonatal-Rib)": "stromal cell", - "Stromal cell_Tnmd high(Neonatal-Rib)": "stromal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="NeonatalRib3_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py deleted file mode 100644 index e3330d6ff..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_spleen_2018_microwell_han_001.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "spleen" - - self.class_maps = { - "0": { - "Erythroblast(Spleen)": "proerythroblast", - "Dendritic cell_S100a4 high(Spleen)": "dendritic cell", - "Dendritic cell_Siglech high(Spleen)": "dendritic cell", - "Granulocyte(Spleen)": "granulocyte", - "Macrophage(Spleen)": "macrophage", - "Monocyte(Spleen)": "monocyte", - "NK cell(Spleen)": "NK cell", - "Neutrophil(Spleen)": "neutrophil", - "Plasma cell(Spleen)": "plasma cell", - "T cell(Spleen)": "T cell", - "Marginal zone B cell(Spleen)": "B cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Spleen_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py deleted file mode 100644 index 9f412743a..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_stomach_2018_microwell_han_001.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "stomach" - - self.class_maps = { - "0": { - "Antral mucous cell (Stomach)": "antral mucous cell", - "Dendritic cell(Stomach)": "dendritic cell", - "Dividing cell(Stomach)": "proliferative cell", - "Epithelial cell_Gkn3 high(Stomach)": "epithelial cell", - "Epithelial cell_Krt20 high(Stomach)": "epithelial cell", - "Epithelial cell_Pla2g1b high(Stomach)": "epithelial cell", - "G cell(Stomach)": "G cell", - "Gastric mucosal cell(Stomach)": "gastric mucosal cell", - "Macrophage(Stomach)": "macrophage", - "Muscle cell(Stomach)": "muscle cell", - "Parietal cell (Stomach)": "parietal cell", - "Pit cell_Gm26917 high(Stomach)": "pit cell", - "Pit cell_Ifrd1 high(Stomach)": "pit cell", - "Stomach cell_Gkn2 high(Stomach)": "stomach cell", - "Stomach cell_Mt2 high(Stomach)": "stomach cell", - "Stomach cell_Muc5ac high(Stomach)": "stomach cell", - "Tuft cell(Stomach)": "tuft cell" - }, - } - - def _load(self): - self._load_generalized(samplename="Stomach_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py deleted file mode 100644 index 62eb1158e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_thymus_2018_microwell_han_001.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "thymus" - - self.class_maps = { - "0": { - "abT cell(Thymus)": "abT cell", - "B cell(Thymus)": "B cell", - "DPT cell(Thymus)": "double positive T cell", - "gdT cell (Thymus)": "gdT cell", - "Pre T cell(Thymus)": "immature T cell", - "Proliferating thymocyte(Thymus)": "immature T cell", - "T cell_Id2 high(Thymus)": "abT cell", # TODO check, not sure about this gene - "T cell_Ms4a4b high(Thymus)": "abT cell" # TODO check, not sure about this gene - }, - } - - def _load(self): - self._load_generalized(samplename="Thymus1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py deleted file mode 100644 index 43710000b..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_001.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - self.organ = "uterus" - - self.class_maps = { - "0": { - "B cell(Uterus)": "B cell", - "Dendritic cell(Uterus)": "dendritic cell", - "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", - "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", - "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", - "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", - "Granulocyte(Uterus)": "granulocyte", - "Keratinocyte(Uterus)": "keratinocyte", - "Macrophage(Uterus)": "macrophage", - "Monocyte(Uterus)": "monocyte", - "Muscle cell_Mgp high(Uterus)": "muscle cell", - "Muscle cell_Pcp4 high(Uterus)": "muscle cell", - "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", - "NK cell(Uterus)": "NK cell", - "Stromal cell_Ccl11 high(Uterus)": "stromal cell", - "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", - "Stromal cell_Gm23935 high(Uterus)": "stromal cell", - "Stromal cell_Has1 high(Uterus)": "stromal cell", - "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Uterus1_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py deleted file mode 100644 index 9c6654a9c..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_uterus_2018_microwell_han_002.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union -from .base import Dataset_d10_1016_j_cell_2018_02_001 - - -class Dataset(Dataset_d10_1016_j_cell_2018_02_001): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - self.organ = "uterus" - - self.class_maps = { - "0": { - "B cell(Uterus)": "B cell", - "Dendritic cell(Uterus)": "dendritic cell", - "Endothelial cell_Cldn5 high(Uterus)": "endothelial cell", - "Endothelial cell_Tm4sf1 high(Uterus)": "endothelial cell", - "Glandular epithelium_Ltf high(Uterus)": "glandular epithelial cell", - "Glandular epithelium_Sprr2f high(Uterus)": "glandular epithelial cell", - "Granulocyte(Uterus)": "granulocyte", - "Keratinocyte(Uterus)": "keratinocyte", - "Macrophage(Uterus)": "macrophage", - "Monocyte(Uterus)": "monocyte", - "Muscle cell_Mgp high(Uterus)": "muscle cell", - "Muscle cell_Pcp4 high(Uterus)": "muscle cell", - "Smooth muscle cell_Rgs5 high(Uterus)": "smooth muscle cell", - "NK cell(Uterus)": "NK cell", - "Stromal cell_Ccl11 high(Uterus)": "stromal cell", - "Stromal cell_Cxcl14 high(Uterus)": "stromal cell", - "Stromal cell_Gm23935 high(Uterus)": "stromal cell", - "Stromal cell_Has1 high(Uterus)": "stromal cell", - "Stromal cell_Hsd11b2 high(Uterus)": "stromal cell", - }, - } - - def _load(self): - self._load_generalized(samplename="Uterus2_dge") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py new file mode 100644 index 000000000..973deaac0 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -0,0 +1,346 @@ +import anndata +import numpy as np +import pandas +from typing import Union +import zipfile +import tarfile +import os + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "Bladder_dge.txt.gz" + "BoneMarrow1_dge.txt.gz" + "BoneMarrow2_dge.txt.gz" + "BoneMarrow3_dge.txt.gz" + "BoneMarrowcKit1_dge.txt.gz" + "BoneMarrowcKit2_dge.txt.gz" + "BoneMarrowcKit3_dge.txt.gz" + "Brain1_dge.txt.gz" + "Brain2_dge.txt.gz" + # "CJ7.EB14.Ezh2.1_dge.txt.gz" # ToDo: sort out meta data for these + # "CJ7.EB14.WT.1_dge.txt.gz" # ToDo: sort out meta data for these + # "CJ7.EB14.WT.2_dge.txt.gz" # ToDo: sort out meta data for these + # "EB.Ezh2_dge.txt.gz" # ToDo: sort out meta data for these + # "EB.WT_dge.txt.gz" # ToDo: sort out meta data for these + "EmbryonicMesenchymeE14.5_dge.txt.gz" + "EmbryonicStemCell.CJ7_Deep_dge.txt.gz" + "EmbryonicStemCells_dge.txt.gz" + "FetalBrain_dge.txt.gz" + "FetalFemaleGonad_dge.txt.gz" + "FetalIntestine_dge.txt.gz" + "FetalKidney1_dge.txt.gz" + "FetalKidney2_dge.txt.gz" + "FetalLiverE14.1_dge.txt.gz" + "FetalLung_dge.txt.gz" + "FetalMaleGonad_dge.txt.gz" + "FetalPancreas_dge.txt.gz" + "FetalStomach_dge.txt.gz" + # "human-293T_dge.txt.gz" # ToDo: sort out meta data for these + "Kidney1_dge.txt.gz" + "Kidney2_dge.txt.gz" + "Liver1_dge.txt.gz" + "Liver2_dge.txt.gz" + "Lung1_dge.txt.gz" + "Lung2_dge.txt.gz" + "Lung3_dge.txt.gz" + "MammaryGland.Involution.CD45.1_dge.txt.gz" + "MammaryGland.Involution.CD45.2_dge.txt.gz" + "MammaryGland.Involution1_dge.txt.gz" + "MammaryGland.Involution2_dge.txt.gz" + "MammaryGland.Lactation1_dge.txt.gz" + "MammaryGland.Lactation2_dge.txt.gz" + "MammaryGland.Pregnancy_dge.txt.gz" + "MammaryGland.Virgin.CD45.1_dge.txt.gz" + "MammaryGland.Virgin.CD45.2_dge.txt.gz" + "MammaryGland.Virgin1_dge.txt.gz" + "MammaryGland.Virgin2_dge.txt.gz" + "MammaryGland.Virgin3_dge.txt.gz" + "MammaryGland.Virgin4_dge.txt.gz" + # "mES.CJ7_dge.txt.gz" # ToDo: sort out meta data for these + "MesenchymalStemCells_dge.txt.gz" + "MesenchymalStemCellsPrimary_dge.txt.gz" + # "mouse-3T3_dge.txt.gz" # ToDo: sort out meta data for these + "Muscle_dge.txt.gz" + "NeonatalCalvaria1_dge.txt.gz" + "NeonatalCalvaria2_dge.txt.gz" + "NeonatalHeart_dge.txt.gz" + "NeonatalMuscle1_dge.txt.gz" + "NeonatalMuscle2_dge.txt.gz" + "NeonatalPancreas_dge.txt.zip" + "NeonatalRib1_dge.txt.gz" + "NeonatalRib2_dge.txt.gz" + "NeonatalRib3_dge.txt.gz" + "NeonatalSkin_dge.txt.gz" + "NeontalBrain1_dge.txt.gz" + "NeontalBrain2_dge.txt.gz" + "Ovary1_dge.txt.gz" + "Ovary2_dge.txt.gz" + "Pancreas_dge.txt.gz" + "PeripheralBlood1_dge.txt.gz" + "PeripheralBlood2_dge.txt.gz" + "PeripheralBlood3_dge.txt.gz" + "PeripheralBlood4_dge.txt.gz" + "PeripheralBlood5_dge.txt.gz" + "PeripheralBlood6_dge.txt.gz" + "PlacentaE14.1_dge.txt.gz" + "PlacentaE14.2_dge.txt.gz" + "Prostate1_dge.txt.gz" + "Prostate2_dge.txt.gz" + "SmallIntestine.CD45_dge.txt.gz" + "SmallIntestine1_dge.txt.gz" + "SmallIntestine2_dge.txt.gz" + "SmallIntestine3_dge.txt.gz" + "Spleen_dge.txt.gz" + "Stomach_dge.txt.gz" + "Testis1_dge.txt.gz" + "Testis2_dge.txt.gz" + "Thymus1_dge.txt.gz" + "Thymus2_dge.txt.gz" + "TrophoblastStemCells_dge.txt.gz" + "Uterus1_dge.txt.gz" + "Uterus2_dge.txt.gz" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__( + sample_fn=sample_fn, + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + **kwargs + ) + sample_organ_dict = { + "Bladder_dge.txt.gz": "urinary bladder", + "BoneMarrow1_dge.txt.gz": "bone marrow", + "BoneMarrow2_dge.txt.gz": "bone marrow", + "BoneMarrow3_dge.txt.gz": "bone marrow", + "BoneMarrowcKit1_dge.txt.gz": "bone marrow", + "BoneMarrowcKit2_dge.txt.gz": "bone marrow", + "BoneMarrowcKit3_dge.txt.gz": "bone marrow", + "Brain1_dge.txt.gz": "brain", + "Brain2_dge.txt.gz": "brain", + "CJ7.EB14.Ezh2.1_dge.txt.gz": None, + "CJ7.EB14.WT.1_dge.txt.gz": None, + "CJ7.EB14.WT.2_dge.txt.gz": None, + "EB.Ezh2_dge.txt.gz": None, + "EB.WT_dge.txt.gz": None, + "EmbryonicMesenchymeE14.5_dge.txt.gz": "mesenchyme", + "EmbryonicStemCell.CJ7_Deep_dge.txt.gz": "blastocyst", + "EmbryonicStemCells_dge.txt.gz": "blastocyst", + "FetalBrain_dge.txt.gz": "brain", + "FetalFemaleGonad_dge.txt.gz": "ovary", + "FetalIntestine_dge.txt.gz": "intestine", + "FetalKidney1_dge.txt.gz": "kidney", + "FetalKidney2_dge.txt.gz": "kidney", + "FetalLiverE14.1_dge.txt.gz": "liver", + "FetalLung_dge.txt.gz": "lung", + "FetalMaleGonad_dge.txt.gz": "testis", + "FetalPancreas_dge.txt.gz": "pancreas", + "FetalStomach_dge.txt.gz": "stomach", + "human-293T_dge.txt.gz": None, + "Kidney1_dge.txt.gz": "kidney", + "Kidney2_dge.txt.gz": "kidney", + "Liver1_dge.txt.gz": "liver", + "Liver2_dge.txt.gz": "liver", + "Lung1_dge.txt.gz": "lung", + "Lung2_dge.txt.gz": "lung", + "Lung3_dge.txt.gz": "lung", + "MammaryGland.Involution.CD45.1_dge.txt.gz": "mammary gland", + "MammaryGland.Involution.CD45.2_dge.txt.gz": "mammary gland", + "MammaryGland.Involution1_dge.txt.gz": "mammary gland", + "MammaryGland.Involution2_dge.txt.gz": "mammary gland", + "MammaryGland.Lactation1_dge.txt.gz": "mammary gland", + "MammaryGland.Lactation2_dge.txt.gz": "mammary gland", + "MammaryGland.Pregnancy_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin.CD45.1_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin.CD45.2_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin1_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin2_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin3_dge.txt.gz": "mammary gland", + "MammaryGland.Virgin4_dge.txt.gz": "mammary gland", + "mES.CJ7_dge.txt.gz": "blastocyst", + "MesenchymalStemCells_dge.txt.gz": "mesenchyme", + "MesenchymalStemCellsPrimary_dge.txt.gz": "mesenchyme", + "mouse-3T3_dge.txt.gz": None, + "Muscle_dge.txt.gz": "skeletal muscle organ", + "NeonatalCalvaria1_dge.txt.gz": "vault of skull", + "NeonatalCalvaria2_dge.txt.gz": "vault of skull", + "NeonatalHeart_dge.txt.gz": "heart", + "NeonatalMuscle1_dge.txt.gz": "skeletal muscle organ", + "NeonatalMuscle2_dge.txt.gz": "skeletal muscle organ", + "NeonatalPancreas_dge.txt.zip": "pancreas", + "NeonatalRib1_dge.txt.gz": "rib", + "NeonatalRib2_dge.txt.gz": "rib", + "NeonatalRib3_dge.txt.gz": "rib", + "NeonatalSkin_dge.txt.gz": "skin of body", + "NeontalBrain1_dge.txt.gz": "brain", + "NeontalBrain2_dge.txt.gz": "brain", + "Ovary1_dge.txt.gz": "ovary", + "Ovary2_dge.txt.gz": "ovary", + "Pancreas_dge.txt.gz": "pancreas", + "PeripheralBlood1_dge.txt.gz": "blood", + "PeripheralBlood2_dge.txt.gz": "blood", + "PeripheralBlood3_dge.txt.gz": "blood", + "PeripheralBlood4_dge.txt.gz": "blood", + "PeripheralBlood5_dge.txt.gz": "blood", + "PeripheralBlood6_dge.txt.gz": "blood", + "PlacentaE14.1_dge.txt.gz": "placenta", + "PlacentaE14.2_dge.txt.gz": "placenta", + "Prostate1_dge.txt.gz": "prostate", + "Prostate2_dge.txt.gz": "prostate", + "SmallIntestine.CD45_dge.txt.gz": "small intestine", + "SmallIntestine1_dge.txt.gz": "small intestine", + "SmallIntestine2_dge.txt.gz": "small intestine", + "SmallIntestine3_dge.txt.gz": "small intestine", + "Spleen_dge.txt.gz": "spleen", + "Stomach_dge.txt.gz": "stomach", + "Testis1_dge.txt.gz": "testis", + "Testis2_dge.txt.gz": "testis", + "Thymus1_dge.txt.gz": "testis", + "Thymus2_dge.txt.gz": "testis", + "TrophoblastStemCells_dge.txt.gz": "trophoblast", + "Uterus1_dge.txt.gz": "uterus", + "Uterus2_dge.txt.gz": "uterus", + } + sample_dev_stage_dict = { + "Bladder_dge.txt.gz": "adult", + "BoneMarrow1_dge.txt.gz": "adult", + "BoneMarrow2_dge.txt.gz": "adult", + "BoneMarrow3_dge.txt.gz": "adult", + "BoneMarrowcKit1_dge.txt.gz": "adult", + "BoneMarrowcKit2_dge.txt.gz": "adult", + "BoneMarrowcKit3_dge.txt.gz": "adult", + "Brain1_dge.txt.gz": "adult", + "Brain2_dge.txt.gz": "adult", + "CJ7.EB14.Ezh2.1_dge.txt.gz": None, + "CJ7.EB14.WT.1_dge.txt.gz": None, + "CJ7.EB14.WT.2_dge.txt.gz": None, + "EB.Ezh2_dge.txt.gz": None, + "EB.WT_dge.txt.gz": None, + "EmbryonicMesenchymeE14.5_dge.txt.gz": "embryonic", + "EmbryonicStemCell.CJ7_Deep_dge.txt.gz": "embryonic", + "EmbryonicStemCells_dge.txt.gz": "embryonic", + "FetalBrain_dge.txt.gz": "fetal", + "FetalFemaleGonad_dge.txt.gz": "fetal", + "FetalIntestine_dge.txt.gz": "fetal", + "FetalKidney1_dge.txt.gz": "fetal", + "FetalKidney2_dge.txt.gz": "fetal", + "FetalLiverE14.1_dge.txt.gz": "fetal", + "FetalLung_dge.txt.gz": "fetal", + "FetalMaleGonad_dge.txt.gz": "fetal", + "FetalPancreas_dge.txt.gz": "fetal", + "FetalStomach_dge.txt.gz": "fetal", + "human-293T_dge.txt.gz": None, + "Kidney1_dge.txt.gz": "adult", + "Kidney2_dge.txt.gz": "adult", + "Liver1_dge.txt.gz": "adult", + "Liver2_dge.txt.gz": "adult", + "Lung1_dge.txt.gz": "adult", + "Lung2_dge.txt.gz": "adult", + "Lung3_dge.txt.gz": "adult", + "MammaryGland.Involution.CD45.1_dge.txt.gz": "adult", + "MammaryGland.Involution.CD45.2_dge.txt.gz": "adult", + "MammaryGland.Involution1_dge.txt.gz": "adult", + "MammaryGland.Involution2_dge.txt.gz": "adult", + "MammaryGland.Lactation1_dge.txt.gz": "adult", + "MammaryGland.Lactation2_dge.txt.gz": "adult", + "MammaryGland.Pregnancy_dge.txt.gz": "adult", + "MammaryGland.Virgin.CD45.1_dge.txt.gz": "adult", + "MammaryGland.Virgin.CD45.2_dge.txt.gz": "adult", + "MammaryGland.Virgin1_dge.txt.gz": "adult", + "MammaryGland.Virgin2_dge.txt.gz": "adult", + "MammaryGland.Virgin3_dge.txt.gz": "adult", + "MammaryGland.Virgin4_dge.txt.gz": "adult", + "mES.CJ7_dge.txt.gz": "embryonic", + "MesenchymalStemCells_dge.txt.gz": "embryonic", + "MesenchymalStemCellsPrimary_dge.txt.gz": "embryonic", + "mouse-3T3_dge.txt.gz": None, + "Muscle_dge.txt.gz": "adult", + "NeonatalCalvaria1_dge.txt.gz": "neonatal", + "NeonatalCalvaria2_dge.txt.gz": "neonatal", + "NeonatalHeart_dge.txt.gz": "neonatal", + "NeonatalMuscle1_dge.txt.gz": "neonatal", + "NeonatalMuscle2_dge.txt.gz": "neonatal", + "NeonatalPancreas_dge.txt.zip": "neonatal", + "NeonatalRib1_dge.txt.gz": "neonatal", + "NeonatalRib2_dge.txt.gz": "neonatal", + "NeonatalRib3_dge.txt.gz": "neonatal", + "NeonatalSkin_dge.txt.gz": "neonatal", + "NeontalBrain1_dge.txt.gz": "neonatal", + "NeontalBrain2_dge.txt.gz": "neonatal", + "Ovary1_dge.txt.gz": "adult", + "Ovary2_dge.txt.gz": "adult", + "Pancreas_dge.txt.gz": "adult", + "PeripheralBlood1_dge.txt.gz": "adult", + "PeripheralBlood2_dge.txt.gz": "adult", + "PeripheralBlood3_dge.txt.gz": "adult", + "PeripheralBlood4_dge.txt.gz": "adult", + "PeripheralBlood5_dge.txt.gz": "adult", + "PeripheralBlood6_dge.txt.gz": "adult", + "PlacentaE14.1_dge.txt.gz": "adult", + "PlacentaE14.2_dge.txt.gz": "adult", + "Prostate1_dge.txt.gz": "adult", + "Prostate2_dge.txt.gz": "adult", + "SmallIntestine.CD45_dge.txt.gz": "adult", + "SmallIntestine1_dge.txt.gz": "adult", + "SmallIntestine2_dge.txt.gz": "adult", + "SmallIntestine3_dge.txt.gz": "adult", + "Spleen_dge.txt.gz": "adult", + "Stomach_dge.txt.gz": "adult", + "Testis1_dge.txt.gz": "adult", + "Testis2_dge.txt.gz": "adult", + "Thymus1_dge.txt.gz": "adult", + "Thymus2_dge.txt.gz": "adult", + "TrophoblastStemCells_dge.txt.gz": "embryonic", + "Uterus1_dge.txt.gz": "adult", + "Uterus2_dge.txt.gz": "adult", + } + + self.organ = sample_organ_dict[self.sample_fn] + self.id = f"mouse_{''.join(self.organ.split(' '))}_2018_microwellseq_han_" \ + f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1016/j.cell.2018.02.001" + + self.download_url_data = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" + self.download_url_meta = None + + self.author = "Guo" + self.dev_stage = sample_dev_stage_dict[self.sample_fn] + self.doi = "10.1016/j.cell.2018.02.001" + self.normalization = "raw" + self.healthy = True + self.organism = "mouse" + self.protocol = "microwell-seq" + self.state_exact = "healthy" + self.year = 2018 + + self.var_symbol_col = "index" + + self.obs_key_cellontology_original = "Annotation" + + def _load(self): + fn = os.path.join(self.data_dir, '5435866.zip') + with zipfile.ZipFile(fn) as archive: + celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) + celltypes = celltypes.drop(["Unnamed: 0"], axis=1) + + with tarfile.open(fileobj=archive.open('MCA_500more_dge.tar.gz')) as tar: + data = pandas.read_csv(tar.extractfile(f'500more_dge/{self.sample_fn}'), + compression="gzip", + sep=" ", + header=0 + ) + + self.adata = anndata.AnnData(data.T) + self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() + self.adata.obs = celltypes.loc[self.adata.obs_names, :] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 379e5ab40..50145a6a2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -253,7 +253,7 @@ def __init__( self.obs_key_sample = "sample" self.organ = sample_organ_dict[self.sample_id] - self.id = f"human_{''.join(self.organ.split(' '))}_2020_microwell_han_" \ + self.id = f"human_{''.join(self.organ.split(' '))}_2020_microwellseq_han_" \ f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_10.1038/s41586-020-2157-4" self.author = "Guo" From 707d2aa26ee2f49dd19c32fe181270bbc5593d2b Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Sat, 13 Feb 2021 18:24:04 +0100 Subject: [PATCH 056/161] removed dysfunctional loader 10.1038/s41586-019-1631-3 from auto discovery --- .../__init__.py | 0 .../human_liver_2019_10x_ramachandran_001.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sfaira/data/dataloaders/loaders/{d10_1038_s41586_019_1631_3 => _d10_1038_s41586_019_1631_3}/__init__.py (100%) rename sfaira/data/dataloaders/loaders/{d10_1038_s41586_019_1631_3 => _d10_1038_s41586_019_1631_3}/human_liver_2019_10x_ramachandran_001.py (100%) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/__init__.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py rename to sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/__init__.py diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py rename to sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py From ed745c8681c5fbf353bd4906f9996497ef1c362f Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 14:31:25 +0100 Subject: [PATCH 057/161] fixed bugs with handling list input to map_celltype_to_ontology fixes #101 --- sfaira/data/utils.py | 15 +++++------ sfaira/unit_tests/test_data_utils.py | 37 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index 6c87f02e1..a45ba4f0b 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -12,7 +12,7 @@ def map_celltype_to_ontology( n_suggest: int = 4, choices_for_perfect_match: bool = True, keep_strategy: bool = False, - always_return_list: bool = False, + always_return_dict: bool = False, threshold_for_partial_matching: float = 90., **kwargs ) -> Union[List[str], Dict[str, List[str]], str]: @@ -34,10 +34,10 @@ def map_celltype_to_ontology( returned as a string, rather than as a list. :param keep_strategy: Whether to keep search results structured by search strategy. For details, see also sfaira.versions.metadata.CelltypeUniverse.prepare_celltype_map_fuzzy() - :param always_return_list: Also return a list over queries if only one query was given. + :param always_return_dict: Also return a dictionary over queries if only one query was given. :param threshold_for_partial_matching: Maximum fuzzy match score below which lenient matching (ratio) is extended through partial_ratio. - :param **kwargs: Additional parameters to CelltypeUniverse. + :param kwargs: Additional parameters to CelltypeUniverse. :return: List over queries, each entry is: A list of high priority matches or perfect match (see choices_for_perfect_match) or, if keep_strategy, dictionary of lists of search strategies named by strategy name. If a search strategy yields perfect matches, it @@ -58,9 +58,10 @@ def map_celltype_to_ontology( n_suggest=n_suggest, threshold_for_partial_matching=threshold_for_partial_matching, ) + matches = matches[0] # Prepare the output: for x, matches_i in zip(queries, matches): - matches_i = matches_i[0] + matches_i = matches_i # Flatten list of lists: # Flatten dictionary of lists and account for string rather than list entries. if len(matches_i.values()) == 1 and isinstance(list(matches_i.values())[0], str): @@ -80,7 +81,7 @@ def map_celltype_to_ontology( else: matches_to_return.update({x: matches_flat}) # Only return a list over queries if more than one query was given. - if len(queries) == 1 and not always_return_list: - return matches_to_return - else: + if len(queries) == 1 and not always_return_dict: return matches_to_return[queries[0]] + else: + return matches_to_return diff --git a/sfaira/unit_tests/test_data_utils.py b/sfaira/unit_tests/test_data_utils.py index bd59a7f29..b67e3e191 100644 --- a/sfaira/unit_tests/test_data_utils.py +++ b/sfaira/unit_tests/test_data_utils.py @@ -4,25 +4,38 @@ from sfaira.data.utils import map_celltype_to_ontology -@pytest.mark.parametrize("perfectly_matched_query", [True, False]) +@pytest.mark.parametrize("trial_cell_type_labels", + ["type B pancreatic cell", "beta", ["type B pancreatic cell", "beta"]]) @pytest.mark.parametrize("choices_for_perfect_match", [True, False]) @pytest.mark.parametrize("anatomical_constraint", [None, "pancreas"]) def test_map_celltype_to_ontology( - perfectly_matched_query: bool, + trial_cell_type_labels: str, choices_for_perfect_match: bool, anatomical_constraint: Union[str, None] ): - trial_cell_type = "type B pancreatic cell" if perfectly_matched_query else "beta" - x = map_celltype_to_ontology( - queries=[trial_cell_type], + trial_cell_type_labels = [trial_cell_type_labels] if isinstance(trial_cell_type_labels, str) \ + else trial_cell_type_labels + perfectly_matched_query = ["type B pancreatic cell" == x for x in trial_cell_type_labels] + matches = map_celltype_to_ontology( + queries=trial_cell_type_labels, organism="human", include_synonyms=True, anatomical_constraint=anatomical_constraint, - choices_for_perfect_match=choices_for_perfect_match + choices_for_perfect_match=choices_for_perfect_match, + always_return_dict=False, ) - if perfectly_matched_query and not choices_for_perfect_match: - assert isinstance(x[trial_cell_type], str), x - assert x[trial_cell_type] == "type B pancreatic cell" - else: - assert isinstance(x[trial_cell_type], list), x - assert "type B pancreatic cell" in x[trial_cell_type] + for x, y in zip(trial_cell_type_labels, perfectly_matched_query): + if isinstance(matches, dict): # dictionary over queries with list of matches as value each + if y and not choices_for_perfect_match: + assert isinstance(matches[x], str), matches + assert matches[x] == "type B pancreatic cell" + else: + assert isinstance(matches[x], list), matches + assert "type B pancreatic cell" in matches[x] + else: # matches for single query + if y and not choices_for_perfect_match: + assert isinstance(matches, str), matches + assert matches == "type B pancreatic cell" + else: + assert isinstance(matches, list), matches + assert "type B pancreatic cell" in matches From 6b959540d1645d63d6465433ab459ba5c6edaea3 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 14:55:36 +0100 Subject: [PATCH 058/161] fixed d10_1186_s13059_019_1906_x loader --- .../human_x_2019_10x_madissoon_001.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index 3f738ff11..d056bb180 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -55,6 +55,8 @@ def __init__( self.var_symbol_col = "index" self.obs_key_cellontology_original = "Celltypes" + # ToDo: patient information in .obs["patient"] and sample information in .obs["sample"] (more samples than + # patients) if self.sample_fn == "madissoon19_lung.processed.h5ad": self.class_maps = { @@ -146,7 +148,10 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) - if self.sample_fn == "oesophagus.cellxgene.h5ad" or self.sample_fn == "spleen.cellxgene.h5ad": - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + # Cell type column called differently in madissoon19_lung.processed.h5ad: + if self.sample_fn == "madissoon19_lung.processed.h5ad": + self.adata.obs["Celltypes"] = self.adata.obs["CellType"] + del self.adata.obs["CellType"] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) From 4c2ae220f8bc83a042c071564b2eb8e356282d56 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 16:21:11 +0100 Subject: [PATCH 059/161] repositioned cache directory and allowed loading without feature space mapping --- sfaira/data/base.py | 61 +++++++++++-------- .../utils_scripts/create_celltype_maps.py | 3 +- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index b9d6853d3..533f8d903 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -305,8 +305,8 @@ def _get_cache_fn(): cache = os.path.join( self.cache_path, - "cache", self.directory_formatted_doi, + "cache", self._directory_formatted_id + ".h5ad" ) return cache @@ -346,7 +346,7 @@ def _cached_writing(fn_cache): def load( self, remove_gene_version: bool = True, - match_to_reference: Union[str, None] = None, + match_to_reference: Union[str, bool, None] = None, load_raw: bool = False, allow_caching: bool = True, ): @@ -354,7 +354,7 @@ def load( :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. - :param match_to_reference: Reference genomes name. + :param match_to_reference: Reference genomes name or False to keep original feature space. :param load_raw: Loads unprocessed version of data if available in data loader. :param allow_caching: Whether to allow method to cache adata object for faster re-loading. :return: @@ -364,16 +364,21 @@ def load( "while not removing gene versions. this can lead to very poor matching results") # Set default genomes per organism if none provided: - if match_to_reference: + if isinstance(match_to_reference, str): genome = match_to_reference - elif self.organism == "human": - genome = "Homo_sapiens_GRCh38_97" - warnings.warn(f"using default genome {genome}") - elif self.organism == "mouse": - genome = "Mus_musculus_GRCm38_97" - warnings.warn(f"using default genome {genome}") + elif match_to_reference is None or (isinstance(match_to_reference, bool) and match_to_reference): + if self.organism == "human": + genome = "Homo_sapiens_GRCh38_97" + warnings.warn(f"using default genome {genome}") + elif self.organism == "mouse": + genome = "Mus_musculus_GRCm38_97" + warnings.warn(f"using default genome {genome}") + else: + raise ValueError(f"genome was not supplied and no default genome found for organism {self.organism}") + elif not match_to_reference: + genome = None else: - raise ValueError(f"genome was not supplied and no default genome found for organism {self.organism}") + raise ValueError(f"invalid choice for match_to_reference={match_to_reference}") self._set_genome(genome=genome) # Set path to dataset directory @@ -703,21 +708,23 @@ def set_unkown_class_id(self, ids: List[str]): [x for x in ids if x not in self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers] ) - def _set_genome(self, genome: str): - - if genome.lower().startswith("homo_sapiens"): - g = SuperGenomeContainer( - organism="human", - genome=genome - ) - elif genome.lower().startswith("mus_musculus"): - g = SuperGenomeContainer( - organism="mouse", - genome=genome - ) + def _set_genome(self, genome: Union[str, None]): + if genome is not None: + if genome.lower().startswith("homo_sapiens"): + g = SuperGenomeContainer( + organism="human", + genome=genome + ) + elif genome.lower().startswith("mus_musculus"): + g = SuperGenomeContainer( + organism="mouse", + genome=genome + ) + else: + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or " + f"'Homo_Sapiens'.") else: - raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") - + g = None self.genome_container = g @property @@ -1679,7 +1686,7 @@ def load( self, annotated_only: bool = False, remove_gene_version: bool = True, - match_to_reference: Union[str, None] = None, + match_to_reference: Union[str, bool, None] = None, load_raw: bool = False, allow_caching: bool = True, processes: int = 1, @@ -2201,7 +2208,7 @@ def download(self, **kwargs): def load_all( self, annotated_only: bool = False, - match_to_reference: Union[str, None] = None, + match_to_reference: Union[str, bool, None] = None, remove_gene_version: bool = True, load_raw: bool = False, allow_caching: bool = True, diff --git a/sfaira/data/utils_scripts/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps.py index ae389e6a4..fd3467ba3 100644 --- a/sfaira/data/utils_scripts/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps.py @@ -86,7 +86,8 @@ dsg_f = sfaira.data.DatasetGroup(datasets=dict([(x.id, x) for x in datasets_f])) dsg_f.load( load_raw=False, - allow_caching=True + allow_caching=True, + match_to_reference=False, ) if str(dir_study) in studys_separate_csvs: pass From 646ae6ae1a6a127ee5adc165d39b1f734699ff8c Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 16:29:55 +0100 Subject: [PATCH 060/161] fixed bug in d10_1186_s13059_019_1906_x --- .../human_x_2019_10x_madissoon_001.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index d056bb180..58406a482 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -45,7 +45,7 @@ def __init__( self.author = "Meyer" self.doi = "10.1186/s13059-019-1906-x" self.healthy = True - self.normalization = "raw" + self.normalization = "raw" # ToDo "madissoon19_lung.processed.h5ad" is close to integer but not quire (~1e-4) self.organ = organ self.organism = "human" self.protocol = "10X sequencing" @@ -148,8 +148,9 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) self.adata = anndata.read(fn) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + if self.sample_fn != "madissoon19_lung.processed.h5ad": + self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) # Cell type column called differently in madissoon19_lung.processed.h5ad: if self.sample_fn == "madissoon19_lung.processed.h5ad": self.adata.obs["Celltypes"] = self.adata.obs["CellType"] From 35b64c9fc48c715d89cbf54472d754825bce3910 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 16:43:41 +0100 Subject: [PATCH 061/161] improved raw feature space yielding --- sfaira/data/base.py | 118 ++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 60 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 533f8d903..4e47edf54 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -394,12 +394,14 @@ def load( self.adata.uns[self._ADATA_IDS_SFAIRA.mapped_features] = match_to_reference self.adata.uns[self._ADATA_IDS_SFAIRA.remove_gene_version] = remove_gene_version # Streamline feature space: - self._convert_and_set_var_names() + self._convert_and_set_var_names(match_to_reference=match_to_reference) self._collapse_gene_versions(remove_gene_version=remove_gene_version) - self._match_features_to_reference(match_to_reference=match_to_reference) + if match_to_reference: + self._match_features_to_reference() def _convert_and_set_var_names( self, + match_to_reference: Union[str, bool, None], symbol_col: str = None, ensembl_col: str = None, ): @@ -432,8 +434,8 @@ def _convert_and_set_var_names( {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, axis='columns' ) - # If only symbol or ensembl was supplied, the other one is inferred ia a genome mapping dictionary. - if not ensembl_col: + # If only symbol or ensembl was supplied, the other one is inferred from a genome mapping dictionary. + if not ensembl_col and match_to_reference: id_dict = self.genome_container.names_to_id_dict id_strip_dict = self.genome_container.strippednames_to_id_dict # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, @@ -449,20 +451,20 @@ def _convert_and_set_var_names( ensids.append('n/a') self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids - if not symbol_col: + if not symbol_col and match_to_reference: id_dict = self.genome_container.id_to_names_dict self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] ] - # Lastly, the index of .var is set to ensembl IDs. - try: # debugging - self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() - except KeyError as e: - raise KeyError(e) - - self.adata.var_names_make_unique() + if match_to_reference: + # Lastly, the index of .var is set to ensembl IDs. + try: # debugging + self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() + except KeyError as e: + raise KeyError(e) + self.adata.var_names_make_unique() def _collapse_gene_versions(self, remove_gene_version): """ @@ -514,57 +516,53 @@ def _collapse_gene_versions(self, remove_gene_version): self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values - def _match_features_to_reference(self, match_to_reference): + def _match_features_to_reference(self): """ Match feature space to a genomes provided with sfaira - - :param match_to_reference: - :return: """ - if match_to_reference: - # Convert data matrix to csc matrix - if isinstance(self.adata.X, np.ndarray): - # Change NaN to zero. This occurs for example in concatenation of anndata instances. - if np.any(np.isnan(self.adata.X)): - self.adata.X[np.isnan(self.adata.X)] = 0 - x = scipy.sparse.csc_matrix(self.adata.X) - elif isinstance(self.adata.X, scipy.sparse.spmatrix): - x = self.adata.X.tocsc() - else: - raise ValueError(f"Data type {type(self.adata.X)} not recognized.") - - # Compute indices of genes to keep - data_ids = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values - idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] - idx_feature_map = np.array([self.genome_container.ensembl.index(x) - for x in data_ids[idx_feature_kept]]) - # Remove unmapped genes - x = x[:, idx_feature_kept] - - # Create reordered feature matrix based on reference and convert to csr - x_new = scipy.sparse.csc_matrix((x.shape[0], self.genome_container.ngenes), dtype=x.dtype) - # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: - # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) - # ValueError: could not convert integer scalar - step = 2000 - if step < len(idx_feature_map): - for i in range(0, len(idx_feature_map), step): - x_new[:, idx_feature_map[i:i + step]] = x[:, i:i + step] - x_new[:, idx_feature_map[i + step:]] = x[:, i + step:] - else: - x_new[:, idx_feature_map] = x - - x_new = x_new.tocsr() - - self.adata = anndata.AnnData( - X=x_new, - obs=self.adata.obs, - obsm=self.adata.obsm, - var=pd.DataFrame(data={'names': self.genome_container.names, - self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, - index=self.genome_container.ensembl), - uns=self.adata.uns - ) + # Convert data matrix to csc matrix + if isinstance(self.adata.X, np.ndarray): + # Change NaN to zero. This occurs for example in concatenation of anndata instances. + if np.any(np.isnan(self.adata.X)): + self.adata.X[np.isnan(self.adata.X)] = 0 + x = scipy.sparse.csc_matrix(self.adata.X) + elif isinstance(self.adata.X, scipy.sparse.spmatrix): + x = self.adata.X.tocsc() + else: + raise ValueError(f"Data type {type(self.adata.X)} not recognized.") + + # Compute indices of genes to keep + data_ids = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values + idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] + idx_feature_map = np.array([self.genome_container.ensembl.index(x) + for x in data_ids[idx_feature_kept]]) + # Remove unmapped genes + x = x[:, idx_feature_kept] + + # Create reordered feature matrix based on reference and convert to csr + x_new = scipy.sparse.csc_matrix((x.shape[0], self.genome_container.ngenes), dtype=x.dtype) + # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: + # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) + # ValueError: could not convert integer scalar + step = 2000 + if step < len(idx_feature_map): + for i in range(0, len(idx_feature_map), step): + x_new[:, idx_feature_map[i:i + step]] = x[:, i:i + step] + x_new[:, idx_feature_map[i + step:]] = x[:, i + step:] + else: + x_new[:, idx_feature_map] = x + + x_new = x_new.tocsr() + + self.adata = anndata.AnnData( + X=x_new, + obs=self.adata.obs, + obsm=self.adata.obsm, + var=pd.DataFrame(data={'names': self.genome_container.names, + self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, + index=self.genome_container.ensembl), + uns=self.adata.uns + ) def _set_metadata_in_adata(self): """ From fea7ea5d6bead0bd30ca060ee0fd9444ce1d8be1 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 16:54:11 +0100 Subject: [PATCH 062/161] set gene version removal in cellmap writing to false --- sfaira/data/base.py | 2 +- sfaira/data/utils_scripts/create_celltype_maps.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 4e47edf54..6a752bfeb 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -478,7 +478,7 @@ def _collapse_gene_versions(self, remove_gene_version): # Collapse if necessary: new_index_collapsed = list(np.unique(new_index)) if len(new_index_collapsed) < self.adata.n_vars: - print("WARNING: duplicate features detected after removing gene versions." + print("WARNING: duplicate features detected after removing gene versions. " "the code to collapse these features is implemented but not tested.") idx_map = np.array([new_index_collapsed.index(x) for x in new_index]) # Need reverse sorting to find index of last element in sorted list to split array using list index(). diff --git a/sfaira/data/utils_scripts/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps.py index fd3467ba3..f7aa7dd55 100644 --- a/sfaira/data/utils_scripts/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps.py @@ -88,6 +88,7 @@ load_raw=False, allow_caching=True, match_to_reference=False, + remove_gene_version=False, ) if str(dir_study) in studys_separate_csvs: pass From d668219d13022f6cbef02ff4aa742c9d743d5795 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 15 Feb 2021 21:40:34 +0100 Subject: [PATCH 063/161] fixed d10_1016_j_cell_2018_02_001 --- .../mouse_x_2018_microwellseq_han_x.py | 182 +++++++++--------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 973deaac0..e096a5302 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -9,97 +9,97 @@ from sfaira.data import DatasetBaseGroupLoadingManyFiles SAMPLE_FNS = [ - "Bladder_dge.txt.gz" - "BoneMarrow1_dge.txt.gz" - "BoneMarrow2_dge.txt.gz" - "BoneMarrow3_dge.txt.gz" - "BoneMarrowcKit1_dge.txt.gz" - "BoneMarrowcKit2_dge.txt.gz" - "BoneMarrowcKit3_dge.txt.gz" - "Brain1_dge.txt.gz" - "Brain2_dge.txt.gz" - # "CJ7.EB14.Ezh2.1_dge.txt.gz" # ToDo: sort out meta data for these - # "CJ7.EB14.WT.1_dge.txt.gz" # ToDo: sort out meta data for these - # "CJ7.EB14.WT.2_dge.txt.gz" # ToDo: sort out meta data for these - # "EB.Ezh2_dge.txt.gz" # ToDo: sort out meta data for these - # "EB.WT_dge.txt.gz" # ToDo: sort out meta data for these - "EmbryonicMesenchymeE14.5_dge.txt.gz" - "EmbryonicStemCell.CJ7_Deep_dge.txt.gz" - "EmbryonicStemCells_dge.txt.gz" - "FetalBrain_dge.txt.gz" - "FetalFemaleGonad_dge.txt.gz" - "FetalIntestine_dge.txt.gz" - "FetalKidney1_dge.txt.gz" - "FetalKidney2_dge.txt.gz" - "FetalLiverE14.1_dge.txt.gz" - "FetalLung_dge.txt.gz" - "FetalMaleGonad_dge.txt.gz" - "FetalPancreas_dge.txt.gz" - "FetalStomach_dge.txt.gz" - # "human-293T_dge.txt.gz" # ToDo: sort out meta data for these - "Kidney1_dge.txt.gz" - "Kidney2_dge.txt.gz" - "Liver1_dge.txt.gz" - "Liver2_dge.txt.gz" - "Lung1_dge.txt.gz" - "Lung2_dge.txt.gz" - "Lung3_dge.txt.gz" - "MammaryGland.Involution.CD45.1_dge.txt.gz" - "MammaryGland.Involution.CD45.2_dge.txt.gz" - "MammaryGland.Involution1_dge.txt.gz" - "MammaryGland.Involution2_dge.txt.gz" - "MammaryGland.Lactation1_dge.txt.gz" - "MammaryGland.Lactation2_dge.txt.gz" - "MammaryGland.Pregnancy_dge.txt.gz" - "MammaryGland.Virgin.CD45.1_dge.txt.gz" - "MammaryGland.Virgin.CD45.2_dge.txt.gz" - "MammaryGland.Virgin1_dge.txt.gz" - "MammaryGland.Virgin2_dge.txt.gz" - "MammaryGland.Virgin3_dge.txt.gz" - "MammaryGland.Virgin4_dge.txt.gz" - # "mES.CJ7_dge.txt.gz" # ToDo: sort out meta data for these - "MesenchymalStemCells_dge.txt.gz" - "MesenchymalStemCellsPrimary_dge.txt.gz" - # "mouse-3T3_dge.txt.gz" # ToDo: sort out meta data for these - "Muscle_dge.txt.gz" - "NeonatalCalvaria1_dge.txt.gz" - "NeonatalCalvaria2_dge.txt.gz" - "NeonatalHeart_dge.txt.gz" - "NeonatalMuscle1_dge.txt.gz" - "NeonatalMuscle2_dge.txt.gz" - "NeonatalPancreas_dge.txt.zip" - "NeonatalRib1_dge.txt.gz" - "NeonatalRib2_dge.txt.gz" - "NeonatalRib3_dge.txt.gz" - "NeonatalSkin_dge.txt.gz" - "NeontalBrain1_dge.txt.gz" - "NeontalBrain2_dge.txt.gz" - "Ovary1_dge.txt.gz" - "Ovary2_dge.txt.gz" - "Pancreas_dge.txt.gz" - "PeripheralBlood1_dge.txt.gz" - "PeripheralBlood2_dge.txt.gz" - "PeripheralBlood3_dge.txt.gz" - "PeripheralBlood4_dge.txt.gz" - "PeripheralBlood5_dge.txt.gz" - "PeripheralBlood6_dge.txt.gz" - "PlacentaE14.1_dge.txt.gz" - "PlacentaE14.2_dge.txt.gz" - "Prostate1_dge.txt.gz" - "Prostate2_dge.txt.gz" - "SmallIntestine.CD45_dge.txt.gz" - "SmallIntestine1_dge.txt.gz" - "SmallIntestine2_dge.txt.gz" - "SmallIntestine3_dge.txt.gz" - "Spleen_dge.txt.gz" - "Stomach_dge.txt.gz" - "Testis1_dge.txt.gz" - "Testis2_dge.txt.gz" - "Thymus1_dge.txt.gz" - "Thymus2_dge.txt.gz" - "TrophoblastStemCells_dge.txt.gz" - "Uterus1_dge.txt.gz" - "Uterus2_dge.txt.gz" + "Bladder_dge.txt.gz", + "BoneMarrow1_dge.txt.gz", + "BoneMarrow2_dge.txt.gz", + "BoneMarrow3_dge.txt.gz", + "BoneMarrowcKit1_dge.txt.gz", + "BoneMarrowcKit2_dge.txt.gz", + "BoneMarrowcKit3_dge.txt.gz", + "Brain1_dge.txt.gz", + "Brain2_dge.txt.gz", + # "CJ7.EB14.Ezh2.1_dge.txt.gz", # ToDo: sort out meta data for these + # "CJ7.EB14.WT.1_dge.txt.gz", # ToDo: sort out meta data for these + # "CJ7.EB14.WT.2_dge.txt.gz", # ToDo: sort out meta data for these + # "EB.Ezh2_dge.txt.gz", # ToDo: sort out meta data for these + # "EB.WT_dge.txt.gz", # ToDo: sort out meta data for these + "EmbryonicMesenchymeE14.5_dge.txt.gz", + "EmbryonicStemCell.CJ7_Deep_dge.txt.gz", + "EmbryonicStemCells_dge.txt.gz", + "FetalBrain_dge.txt.gz", + "FetalFemaleGonad_dge.txt.gz", + "FetalIntestine_dge.txt.gz", + "FetalKidney1_dge.txt.gz", + "FetalKidney2_dge.txt.gz", + "FetalLiverE14.1_dge.txt.gz", + "FetalLung_dge.txt.gz", + "FetalMaleGonad_dge.txt.gz", + "FetalPancreas_dge.txt.gz", + "FetalStomach_dge.txt.gz", + # "human-293T_dge.txt.gz", # ToDo: sort out meta data for these + "Kidney1_dge.txt.gz", + "Kidney2_dge.txt.gz", + "Liver1_dge.txt.gz", + "Liver2_dge.txt.gz", + "Lung1_dge.txt.gz", + "Lung2_dge.txt.gz", + "Lung3_dge.txt.gz", + "MammaryGland.Involution.CD45.1_dge.txt.gz", + "MammaryGland.Involution.CD45.2_dge.txt.gz", + "MammaryGland.Involution1_dge.txt.gz", + "MammaryGland.Involution2_dge.txt.gz", + "MammaryGland.Lactation1_dge.txt.gz", + "MammaryGland.Lactation2_dge.txt.gz", + "MammaryGland.Pregnancy_dge.txt.gz", + "MammaryGland.Virgin.CD45.1_dge.txt.gz", + "MammaryGland.Virgin.CD45.2_dge.txt.gz", + "MammaryGland.Virgin1_dge.txt.gz", + "MammaryGland.Virgin2_dge.txt.gz", + "MammaryGland.Virgin3_dge.txt.gz", + "MammaryGland.Virgin4_dge.txt.gz", + # "mES.CJ7_dge.txt.gz", # ToDo: sort out meta data for these + "MesenchymalStemCells_dge.txt.gz", + "MesenchymalStemCellsPrimary_dge.txt.gz", + # "mouse-3T3_dge.txt.gz", # ToDo: sort out meta data for these + "Muscle_dge.txt.gz", + "NeonatalCalvaria1_dge.txt.gz", + "NeonatalCalvaria2_dge.txt.gz", + "NeonatalHeart_dge.txt.gz", + "NeonatalMuscle1_dge.txt.gz", + "NeonatalMuscle2_dge.txt.gz", + "NeonatalPancreas_dge.txt.zip", + "NeonatalRib1_dge.txt.gz", + "NeonatalRib2_dge.txt.gz", + "NeonatalRib3_dge.txt.gz", + "NeonatalSkin_dge.txt.gz", + "NeontalBrain1_dge.txt.gz", + "NeontalBrain2_dge.txt.gz", + "Ovary1_dge.txt.gz", + "Ovary2_dge.txt.gz", + "Pancreas_dge.txt.gz", + "PeripheralBlood1_dge.txt.gz", + "PeripheralBlood2_dge.txt.gz", + "PeripheralBlood3_dge.txt.gz", + "PeripheralBlood4_dge.txt.gz", + "PeripheralBlood5_dge.txt.gz", + "PeripheralBlood6_dge.txt.gz", + "PlacentaE14.1_dge.txt.gz", + "PlacentaE14.2_dge.txt.gz", + "Prostate1_dge.txt.gz", + "Prostate2_dge.txt.gz", + "SmallIntestine.CD45_dge.txt.gz", + "SmallIntestine1_dge.txt.gz", + "SmallIntestine2_dge.txt.gz", + "SmallIntestine3_dge.txt.gz", + "Spleen_dge.txt.gz", + "Stomach_dge.txt.gz", + "Testis1_dge.txt.gz", + "Testis2_dge.txt.gz", + "Thymus1_dge.txt.gz", + "Thymus2_dge.txt.gz", + "TrophoblastStemCells_dge.txt.gz", + "Uterus1_dge.txt.gz", + "Uterus2_dge.txt.gz", ] From 31db7435add2cb69f559cf55f3cc6423fe53fad6 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 16 Feb 2021 11:32:42 +0100 Subject: [PATCH 064/161] fixed prostate naming --- .../mouse_x_2018_microwellseq_han_x.py | 4 ++-- .../human_x_2020_microwellseq_han_x.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index e096a5302..939af2c2b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -197,8 +197,8 @@ def __init__( "PeripheralBlood6_dge.txt.gz": "blood", "PlacentaE14.1_dge.txt.gz": "placenta", "PlacentaE14.2_dge.txt.gz": "placenta", - "Prostate1_dge.txt.gz": "prostate", - "Prostate2_dge.txt.gz": "prostate", + "Prostate1_dge.txt.gz": "prostate gland", + "Prostate2_dge.txt.gz": "prostate gland", "SmallIntestine.CD45_dge.txt.gz": "small intestine", "SmallIntestine1_dge.txt.gz": "small intestine", "SmallIntestine2_dge.txt.gz": "small intestine", diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 50145a6a2..5081beef6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -236,7 +236,7 @@ def __init__( 'FetalStomach_2': 'stomach', 'FetalThymus_1': 'thymus', 'FetalThymus_2': 'thymus', - 'HESC_1': '', + 'HESC_1': 'blastocyst', 'Liver_1': 'liver', 'Liver_2': 'liver', 'NeonatalAdrenalGland_1': 'adrenal gland', From 1938b4b9bb0904e20ef2181d1ee56782664c4a3e Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Tue, 16 Feb 2021 12:31:47 +0100 Subject: [PATCH 065/161] fixed MCA --- .../mouse_x_2018_microwellseq_han_x.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 939af2c2b..d939020e9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -11,7 +11,7 @@ SAMPLE_FNS = [ "Bladder_dge.txt.gz", "BoneMarrow1_dge.txt.gz", - "BoneMarrow2_dge.txt.gz", + # "BoneMarrow2_dge.txt.gz", # ToDo: not annotated, potentially bad quality? "BoneMarrow3_dge.txt.gz", "BoneMarrowcKit1_dge.txt.gz", "BoneMarrowcKit2_dge.txt.gz", From 930dc080c9bbeb8060949930823b85b68ed52380 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Wed, 17 Feb 2021 11:28:25 +0100 Subject: [PATCH 066/161] Update dataloaders (#128) * add automatic user agent adaptation to automatic download to prevent 403 error when downloading 10x PBMC dataset * read 10 PBMC dataset directly from raw downlaoded file * switch kinchen colon dataset to use GEO files instead of DCP files * fix DatasetBaseGroupLoadingManyFiles kinchen dataloader * rename path to data_path in remaining places * move ICA dataset to correct folder * fix directory creation in dataset download method * start counting dataset indices at 1 [skip ci] --- sfaira/data/base.py | 13 ++-- .../databases/cellxgene/cellxgene_group.py | 4 +- .../data/dataloaders/databases/super_group.py | 4 +- .../human_colon_2019_10x_kinchen_001.py | 77 ++++++++----------- .../mouse_pancreas_2019_10x_thompson_x.py | 2 +- .../human_mixed_2019_10x_szabo_001.py | 2 +- .../human_blood_2019_10x_10xGenomics_001.py | 43 +++++++---- .../loaders/dno_doi_regev/__init__.py | 1 + .../human_x_2018_10x_ica_001.py | 0 .../your_dataset_file.py | 2 +- 10 files changed, 76 insertions(+), 72 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/dno_doi_regev/__init__.py rename sfaira/data/dataloaders/loaders/{dno_doi_10x_genomics => dno_doi_regev}/human_x_2018_10x_ica_001.py (100%) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 6a752bfeb..22be2806d 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -188,8 +188,8 @@ def download(self, **kwargs): assert self.data_dir_base is not None, "No path was provided when instantiating the dataset container, " \ "cannot download datasets." - if not os.path.exists(self.data_dir): - os.makedirs(self.data_dir) + if not os.path.exists(os.path.join(self.data_dir_base, self.directory_formatted_doi)): + os.makedirs(os.path.join(self.data_dir_base, self.directory_formatted_doi)) urls = self.download_url_data[0][0] + self.download_url_meta[0][0] @@ -218,12 +218,13 @@ def download(self, **kwargs): else: url = urllib.parse.unquote(url) - - # Catch SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: - # unable to get local issuer certificate (_ssl.c:1124) try: urllib.request.urlopen(url) - except urllib.error.URLError: + except urllib.error.HTTPError as err: # modify headers if urllib useragent is blocked (eg.10x datasets) + opener = urllib.request.build_opener() + opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64)')] + urllib.request.install_opener(opener) + except urllib.error.URLError: # Catch SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1124) ssl._create_default_https_context = ssl._create_unverified_context if 'Content-Disposition' in urllib.request.urlopen(url).info().keys(): diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py index 068bd4c0b..f45dfbca7 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -10,13 +10,13 @@ class DatasetGroup(DatasetGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() - fn_ls = os.listdir(path) + fn_ls = os.listdir(data_path) fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] datasets = [ Dataset(data_path=path, fn=x, meta_path=meta_path, cache_path=cache_path) diff --git a/sfaira/data/dataloaders/databases/super_group.py b/sfaira/data/dataloaders/databases/super_group.py index df0605579..cf748c851 100644 --- a/sfaira/data/dataloaders/databases/super_group.py +++ b/sfaira/data/dataloaders/databases/super_group.py @@ -8,14 +8,14 @@ class DatasetSuperGroupDatabases(DatasetSuperGroup): def __init__( self, - path: Union[str, None] = None, + data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, ): dataset_groups = [] # List all data bases here: dataset_groups.append(DatasetGroupCellxgene( - path=path, + data_path=data_path, meta_path=meta_path, cache_path=cache_path )) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index f2a1fe8ae..5b55300b0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -1,44 +1,50 @@ -import anndata import os from typing import Union import pandas as pd +import anndata as ad +import scipy.sparse +import numpy as np -from sfaira.data import DatasetBase +from sfaira.data import DatasetBaseGroupLoadingManyFiles +SAMPLE_FNS = [ + "HC", + "UC", +] -class Dataset(DatasetBase): + +class Dataset(DatasetBaseGroupLoadingManyFiles): def __init__( self, + sample_fn: str, data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, **kwargs ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2019_10x_kinchen_001_10.1016/j.cell.2018.08.067" + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.id = f"human_colon_2019_10x_kinchen_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_10.1016/j.cell.2018.08.067" - self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom" - self.download_url_meta = [ - "private,uc_meta_data_stromal_with_donor.txt", - "private,hc_meta_data_stromal_with_donor.txt", - ] + self.download_url_data = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&" \ + f"file=GSE114374%5FHuman%5F{sample_fn}%5Fexpression%5Fmatrix%2Etxt%2Egz" + self.download_url_meta = f"private,{sample_fn.lower()}_meta_data_stromal_with_donor.txt" self.author = "Simmons" self.doi = "10.1016/j.cell.2018.08.067" - self.normalization = "raw" + self.normalization = "norm" self.organ = "lamina propria of mucosa of colon" self.organism = "human" self.protocol = "10X sequencing" self.year = 2019 - self.var_symbol_col = "names" - self.var_ensembl_col = "Accession" - - self.obs_key_state_exact = "donor_organism.diseases.ontology_label" + self.var_symbol_col = "index" + self.obs_key_state_exact = "state_exact" self.obs_key_healthy = self.obs_key_state_exact - self.healthy_state_healthy = "normal" - self.obs_key_cellontology_original = "celltype" + self.healthy_state_healthy = "healthy colon" + self.obs_key_cellontology_original = "Cluster" + self.obs_key_age = "Age" + self.obs_key_sex = "Sex" self.class_maps = { "0": { @@ -61,31 +67,14 @@ def __init__( def _load(self): fn = [ - os.path.join(self.data_dir, "f8aa201c-4ff1-45a4-890e-840d63459ca2.homo_sapiens.loom"), - os.path.join(self.data_dir, "uc_meta_data_stromal_with_donor.txt"), - os.path.join(self.data_dir, "hc_meta_data_stromal_with_donor.txt") + os.path.join(self.data_dir, f"GSE114374_Human_{self.sample_fn}_expression_matrix.txt.gz"), + os.path.join(self.data_dir, f"{self.sample_fn.lower()}_meta_data_stromal_with_donor.txt"), ] - adata = anndata.read_loom(fn[0]) - ctuc = pd.read_csv(fn[1], sep="\t") - cthealthy = pd.read_csv(fn[2], sep="\t") - adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() - adata = adata[adata.X.sum(axis=1).flatten() >= 250].copy() - uc = adata[adata.obs["donor_organism.diseases.ontology_label"] == "ulcerative colitis (disease)"].copy() - bcuc = [i.split("-")[0] for i in ctuc["Barcode"]] - seluc = [] - for i in uc.obs["barcode"]: - seluc.append((uc.obs["barcode"].str.count(i).sum() == 1) and i in bcuc) - uc = uc[seluc].copy() - ctuc.index = [i.split("-")[0] for i in ctuc["Barcode"]] - uc.obs["celltype"] = [ctuc.loc[i]["Cluster"] for i in uc.obs["barcode"]] - uc.var = uc.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") - healthy = adata[adata.obs["donor_organism.diseases.ontology_label"] == "normal"].copy() - bchealthy = [i.split("-")[0] for i in cthealthy["Barcode"]] - selhealthy = [] - for i in healthy.obs["barcode"]: - selhealthy.append((healthy.obs["barcode"].str.count(i).sum() == 1) and i in bchealthy) - healthy = healthy[selhealthy].copy() - cthealthy.index = [i.split("-")[0] for i in cthealthy["Barcode"]] - healthy.obs["celltype"] = [cthealthy.loc[i]["Cluster"] for i in healthy.obs["barcode"]] - healthy.var = healthy.var.reset_index().rename(columns={"index": "names"}).set_index("featurekey") - self.adata = healthy.concatenate(uc) + matrix = pd.read_csv(fn[0], sep="\t") + obs = pd.read_csv(fn[1], sep="\t", index_col=3) + self.adata = ad.AnnData(matrix.T) + self.adata.X = scipy.sparse.csc_matrix(np.expm1(self.adata.X)) + self.adata.obs = obs + self.adata.obs['state_exact'] = "healthy colon" if self.sample_fn == "HC" else "ulcerative colitis" + s_dict = {"F": "female", "M": "male"} + self.adata.obs['Sex'] = [s_dict[i] for i in self.adata.obs['Sex']] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 5a882cd97..0145cefec 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -30,7 +30,7 @@ def __init__( **kwargs ): super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_" \ + self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_" \ f"10.1016/j.cmet.2019.01.021" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117770/suppl/GSE117770_RAW.tar" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 974eb109f..61fb384c8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -38,7 +38,7 @@ def __init__( **kwargs ): super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_10.1038/s41467-019-12464-3" + self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_10.1038/s41467-019-12464-3" self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" self.download_url_meta = [ diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py index 2e2d2d284..95ecc5013 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py @@ -1,23 +1,14 @@ -import anndata import os from typing import Union +import scipy.sparse +import anndata as ad +import numpy as np +import tables from sfaira.data import DatasetBase class Dataset(DatasetBase): - """ - This data loader requires manual preprocessing of the raw datafile. To download the data, use the link in the - `.download_website` attribute of this class. To create the file required by this dataloader, run the following - python code: - - import scanpy - scanpy.read_10x_h5("pbmc_10k_v3_filtered_feature_bc_matrix.h5").write("pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") - - :param data_path: - :param meta_path: - :param kwargs: - """ def __init__( self, @@ -47,5 +38,27 @@ def __init__( self.var_ensembl_col = "gene_ids" def _load(self): - fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5ad") - self.adata = anndata.read(fn) + fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5") + with tables.open_file(str(fn), 'r') as f: + dsets = {} + for node in f.walk_nodes('/matrix', 'Array'): + dsets[node.name] = node.read() + M, N = dsets['shape'] + data = dsets['data'] + if dsets['data'].dtype == np.dtype('int32'): + data = dsets['data'].view('float32') + data[:] = dsets['data'] + matrix = scipy.sparse.csr_matrix( + (data, dsets['indices'], dsets['indptr']), + shape=(N, M), + ) + self.adata = ad.AnnData( + matrix, + dict(obs_names=dsets['barcodes'].astype(str)), + dict( + var_names=dsets['name'].astype(str), + gene_ids=dsets['id'].astype(str), + feature_types=dsets['feature_type'].astype(str), + genome=dsets['genome'].astype(str), + ), + ) diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/__init__.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py similarity index 100% rename from sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_x_2018_10x_ica_001.py rename to sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py index b13fc28b7..fc7d8ad3f 100644 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py +++ b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py @@ -27,7 +27,7 @@ def __init__( **kwargs ) - self.id = f"sth_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_doi" # ToDo: Index the Dataset ID by the file. + self.id = f"sth_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_doi" # ToDo: Index the Dataset ID by the file. # ToDo Add you meta data here. def _load(self): From 4648a2f37844422af132969c894a3cbc36b62ef2 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Wed, 17 Feb 2021 13:43:40 +0100 Subject: [PATCH 067/161] use default cache and meta paths if not user provided (#130) * catch specifically 403 https errors in download method * fix path in cellxgene group * use a default cache path if not explicitely set by the user. closes #127 * move handling of cache path into property of Dataset * use default meta path if none is provided --- sfaira/data/base.py | 85 +++++++++---------- .../databases/cellxgene/cellxgene_group.py | 2 +- 2 files changed, 40 insertions(+), 47 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 22be2806d..26acc1972 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -220,11 +220,15 @@ def download(self, **kwargs): url = urllib.parse.unquote(url) try: urllib.request.urlopen(url) - except urllib.error.HTTPError as err: # modify headers if urllib useragent is blocked (eg.10x datasets) - opener = urllib.request.build_opener() - opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64)')] - urllib.request.install_opener(opener) - except urllib.error.URLError: # Catch SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1124) + except urllib.error.HTTPError as err: + # modify headers if urllib useragent is blocked (eg.10x datasets) + if err.code == 403: + opener = urllib.request.build_opener() + opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64)')] + urllib.request.install_opener(opener) + except urllib.error.URLError: + # Catch SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable + # to get local issuer certificate (_ssl.c:1124) ssl._create_default_https_context = ssl._create_unverified_context if 'Content-Disposition' in urllib.request.urlopen(url).info().keys(): @@ -274,6 +278,19 @@ def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = """ return False + @property + def cache_fn(self): + if self.directory_formatted_doi is None or self._directory_formatted_id is None: + warnings.warn(f"Caching enabled, but Dataset.id or Dataset.doi not set. " + f"Disabling caching for now.") + return None + else: + if self.cache_path is None: + cache = self.data_dir + else: + cache = os.path.join(self.cache_path, self.directory_formatted_doi) + return os.path.join(cache, "cache", self._directory_formatted_id + ".h5ad") + def _load_cached( self, load_raw: bool, @@ -288,61 +305,35 @@ def _load_cached( :param allow_caching: Whether to allow method to cache adata object for faster re-loading. :return: """ - def _get_cache_fn(): - if None in [ - self.cache_path, - self.directory_formatted_doi, - self._directory_formatted_id - ]: - if self.cache_path is None: - w = "cache path" - elif self.directory_formatted_doi is None: - w = "self.doi" - else: # self._directory_formatted_id is None - w = "self.id" - warnings.warn(f"Caching enabled, but cannot find caching directory. Set {w} first. " - f"Disabling caching for now.") - return None - - cache = os.path.join( - self.cache_path, - self.directory_formatted_doi, - "cache", - self._directory_formatted_id + ".h5ad" - ) - return cache - def _cached_reading(fn_cache): - if fn_cache is not None: - if os.path.exists(fn_cache): - self.adata = anndata.read_h5ad(fn_cache) + def _cached_reading(filename): + if filename is not None: + if os.path.exists(filename): + self.adata = anndata.read_h5ad(filename) else: - warnings.warn(f"Cached loading enabled, but cache file {fn_cache} not found. " + warnings.warn(f"Cached loading enabled, but cache file {filename} not found. " f"Loading from raw files.") self._load() else: self._load() - def _cached_writing(fn_cache): - if fn_cache is not None: - dir_cache = os.path.dirname(fn_cache) + def _cached_writing(filename): + if filename is not None: + dir_cache = os.path.dirname(filename) if not os.path.exists(dir_cache): os.makedirs(dir_cache) - self.adata.write_h5ad(fn_cache) + self.adata.write_h5ad(filename) if load_raw and allow_caching: self._load() - fn_cache = _get_cache_fn() - _cached_writing(fn_cache) + _cached_writing(self.cache_fn) elif load_raw and not allow_caching: self._load() elif not load_raw and allow_caching: - fn_cache = _get_cache_fn() - _cached_reading(fn_cache) - _cached_writing(fn_cache) + _cached_reading(self.cache_fn) + _cached_writing(self.cache_fn) else: # not load_raw and not allow_caching - fn_cache = _get_cache_fn() - _cached_reading(fn_cache) + _cached_reading(self.cache_fn) def load( self, @@ -813,9 +804,11 @@ def citation(self): @property def meta_fn(self): if self.meta_path is None: - return None + meta = self.data_dir else: - return os.path.join(self.meta_path, self.doi_cleaned_id + "_meta.csv") + meta = os.path.join(self.meta_path, self.directory_formatted_doi) + + return os.path.join(meta, "meta", self.doi_cleaned_id + "_meta.csv") def load_meta(self, fn: Union[PathLike, str, None]): if fn is None: diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py index f45dfbca7..ee89ff3fd 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -19,7 +19,7 @@ def __init__( fn_ls = os.listdir(data_path) fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] datasets = [ - Dataset(data_path=path, fn=x, meta_path=meta_path, cache_path=cache_path) + Dataset(data_path=data_path, fn=x, meta_path=meta_path, cache_path=cache_path) for x in fn_ls ] keys = [x.id for x in datasets] From d7f1438fa274084e442b8ec6ae7e01f1ce9a87af Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Wed, 17 Feb 2021 14:38:49 +0100 Subject: [PATCH 068/161] dataloaders' _load() returns adata instead of assigning self.adata attribute (#131) * dataloaders' _load() returns adata instead of assigning self.adata attribute * fix flake8 --- sfaira/data/base.py | 13 +++--- .../human_liver_2019_10x_ramachandran_001.py | 4 +- .../human_pancreas_2017_smartseq2_enge_001.py | 10 +++-- .../mouse_x_2018_microwellseq_han_x.py | 16 +++---- .../human_colon_2019_10x_kinchen_001.py | 12 +++--- .../human_colon_2019_10x_smilie_001.py | 9 ++-- .../human_ileum_2019_10x_martin_001.py | 11 ++--- .../human_prostate_2018_10x_henry_001.py | 9 ++-- .../human_pancreas_2016_indrop_baron_001.py | 9 ++-- ...pancreas_2016_smartseq2_segerstolpe_001.py | 8 ++-- .../mouse_pancreas_2019_10x_thompson_x.py | 10 +++-- .../human_lung_2020_10x_miller_001.py | 10 ++--- .../human_brain_2017_DroNcSeq_habib_001.py | 9 ++-- .../human_testis_2018_10x_guo_001.py | 9 ++-- .../human_liver_2018_10x_macparland_001.py | 6 ++- .../human_kidney_2019_droncseq_lake_001.py | 6 ++- .../human_mixed_2019_10x_szabo_001.py | 40 +++++++++--------- .../human_eye_2019_10x_menon_001.py | 4 +- .../human_placenta_2018_10x_ventotormo_001.py | 14 ++++--- .../human_liver_2019_CELseq2_aizarani_001.py | 8 ++-- .../human_liver_2019_10x_popescu_001.py | 4 +- .../human_x_2020_microwellseq_han_x.py | 42 ++++++++++--------- .../human_lung_2020_10x_travaglini_001.py | 12 +++--- .../human_colon_2020_10x_james_001.py | 9 ++-- .../human_lung_2019_10x_braga_x.py | 7 ++-- .../human_lung_2019_dropseq_braga_001.py | 7 ++-- .../mouse_brain_2019_10x_hove_001.py | 17 ++++---- .../human_kidney_2020_10x_liao_001.py | 10 +++-- .../human_eye_2019_10x_voigt_001.py | 6 ++- .../human_x_2019_10x_wang_001.py | 9 ++-- .../human_lung_2020_10x_lukassen_001.py | 10 ++--- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 16 +++---- .../human_lung_2020_10x_habermann_001.py | 13 +++--- .../human_kidney_2019_10x_stewart_001.py | 6 ++- .../human_thymus_2020_10x_park_001.py | 6 ++- .../human_x_2019_10x_madissoon_001.py | 12 +++--- .../human_eye_2019_10x_lukowski_001.py | 9 ++-- .../human_blood_2019_10x_10xGenomics_001.py | 41 +++++++++--------- .../dno_doi_regev/human_x_2018_10x_ica_001.py | 6 ++- sfaira/train/summaries.py | 11 +++-- sfaira/train/train_model.py | 4 +- 41 files changed, 268 insertions(+), 206 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 26acc1972..ed4df4a24 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -165,7 +165,7 @@ def __init__( self._ontology_class_map = None @abc.abstractmethod - def _load(self): + def _load(self) -> anndata.AnnData: pass @property @@ -281,8 +281,7 @@ def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = @property def cache_fn(self): if self.directory_formatted_doi is None or self._directory_formatted_id is None: - warnings.warn(f"Caching enabled, but Dataset.id or Dataset.doi not set. " - f"Disabling caching for now.") + warnings.warn("Caching enabled, but Dataset.id or Dataset.doi not set. Disabling caching for now.") return None else: if self.cache_path is None: @@ -313,9 +312,9 @@ def _cached_reading(filename): else: warnings.warn(f"Cached loading enabled, but cache file {filename} not found. " f"Loading from raw files.") - self._load() + self.adata = self._load() else: - self._load() + self.adata = self._load() def _cached_writing(filename): if filename is not None: @@ -325,10 +324,10 @@ def _cached_writing(filename): self.adata.write_h5ad(filename) if load_raw and allow_caching: - self._load() + self.adata = self._load() _cached_writing(self.cache_fn) elif load_raw and not allow_caching: - self._load() + self.adata = self._load() elif not load_raw and allow_caching: _cached_reading(self.cache_fn) _cached_writing(self.cache_fn) diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py index d77b04481..ecb5706ba 100644 --- a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py @@ -79,4 +79,6 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "ramachandran.h5ad") - self.adata = anndata.read(fn) + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index cb6794ff8..98ee94d4f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -63,8 +63,8 @@ def _load(self): d = pd.read_csv(tar.extractfile(member), compression="gzip", header=None, sep="\t", index_col=0, names=[member.name.split("_")[0]]) dfs.append(d) - self.adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) + adata.X = scipy.sparse.csc_matrix(adata.X) with gzip.open(fn[1]) as f: file_content = [i.decode("utf-8") for i in f.readlines()] inputstring = "" @@ -82,5 +82,7 @@ def _load(self): d = d.reset_index().set_index("ID_REF") d.columns.name = None d.index.name = None - self.adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in self.adata.obs.index] - self.adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in self.adata.obs.index] + adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in adata.obs.index] + adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in adata.obs.index] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index d939020e9..76eaeb544 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -133,8 +133,8 @@ def __init__( "CJ7.EB14.Ezh2.1_dge.txt.gz": None, "CJ7.EB14.WT.1_dge.txt.gz": None, "CJ7.EB14.WT.2_dge.txt.gz": None, - "EB.Ezh2_dge.txt.gz": None, - "EB.WT_dge.txt.gz": None, + "EB.Ezh2_dge.txt.gz": None, + "EB.WT_dge.txt.gz": None, "EmbryonicMesenchymeE14.5_dge.txt.gz": "mesenchyme", "EmbryonicStemCell.CJ7_Deep_dge.txt.gz": "blastocyst", "EmbryonicStemCells_dge.txt.gz": "blastocyst", @@ -226,8 +226,8 @@ def __init__( "CJ7.EB14.Ezh2.1_dge.txt.gz": None, "CJ7.EB14.WT.1_dge.txt.gz": None, "CJ7.EB14.WT.2_dge.txt.gz": None, - "EB.Ezh2_dge.txt.gz": None, - "EB.WT_dge.txt.gz": None, + "EB.Ezh2_dge.txt.gz": None, + "EB.WT_dge.txt.gz": None, "EmbryonicMesenchymeE14.5_dge.txt.gz": "embryonic", "EmbryonicStemCell.CJ7_Deep_dge.txt.gz": "embryonic", "EmbryonicStemCells_dge.txt.gz": "embryonic", @@ -341,6 +341,8 @@ def _load(self): header=0 ) - self.adata = anndata.AnnData(data.T) - self.adata = self.adata[np.array([x in celltypes.index for x in self.adata.obs_names])].copy() - self.adata.obs = celltypes.loc[self.adata.obs_names, :] + adata = anndata.AnnData(data.T) + adata = adata[np.array([x in celltypes.index for x in adata.obs_names])].copy() + adata.obs = celltypes.loc[adata.obs_names, :] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py index 5b55300b0..9bf27bf17 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py @@ -72,9 +72,11 @@ def _load(self): ] matrix = pd.read_csv(fn[0], sep="\t") obs = pd.read_csv(fn[1], sep="\t", index_col=3) - self.adata = ad.AnnData(matrix.T) - self.adata.X = scipy.sparse.csc_matrix(np.expm1(self.adata.X)) - self.adata.obs = obs - self.adata.obs['state_exact'] = "healthy colon" if self.sample_fn == "HC" else "ulcerative colitis" + adata = ad.AnnData(matrix.T) + adata.X = scipy.sparse.csc_matrix(np.expm1(adata.X)) + adata.obs = obs + adata.obs['state_exact'] = "healthy colon" if self.sample_fn == "HC" else "ulcerative colitis" s_dict = {"F": "female", "M": "male"} - self.adata.obs['Sex'] = [s_dict[i] for i in self.adata.obs['Sex']] + adata.obs['Sex'] = [s_dict[i] for i in adata.obs['Sex']] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py index fe69f193f..388f37cd8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py @@ -81,7 +81,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "smillie19_epi.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py index 9d63f81cc..e184ad416 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py @@ -62,8 +62,9 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "martin19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) - self.adata = self.adata[self.adata.obs["CellType"] != "Doublets"].copy() + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + adata = adata[adata.obs["CellType"] != "Doublets"].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py index 40b5377bd..0f982892a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py @@ -51,7 +51,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "henry18_0.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index c0cdaf49f..0837a024f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -57,7 +57,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "baron16.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index bedcd2681..9ee4974b9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -65,7 +65,9 @@ def _load(self): df.index = df.index.get_level_values(0) df = df.drop("#samples", axis=1) df = df.T.iloc[:, :26178] - self.adata = anndata.AnnData(df) - self.adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[self.adata.obs.index] + adata = anndata.AnnData(df) + adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[adata.obs.index] # filter observations which are not cells (empty wells, low quality cells etc.) - self.adata = self.adata[self.adata.obs["Characteristics[cell type]"] != "not applicable"].copy() + adata = adata[adata.obs["Characteristics[cell type]"] != "not applicable"].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py index 0145cefec..18fae8841 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py @@ -64,8 +64,10 @@ def _load(self): sep="\t") var.columns = ["ensembl", "names"] var.index = var["ensembl"].values - self.adata = anndata.AnnData(X=x, obs=obs, var=var) - self.adata.var_names_make_unique() + adata = anndata.AnnData(X=x, obs=obs, var=var) + adata.var_names_make_unique() celltypes = pd.read_csv(os.path.join(self.data_dir, self.sample_fn + "_annotation.csv"), index_col=0) - self.adata = self.adata[celltypes.index] - self.adata.obs["celltypes"] = celltypes + adata = adata[celltypes.index] + adata.obs["celltypes"] = celltypes + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py index ecfed8c7b..dd39a4205 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py @@ -68,9 +68,9 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "miller20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None]))\ - .multiply(1 / 10000) - + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / 10000) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py index 79aaa7162..52840c0d3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py @@ -58,7 +58,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "habib17.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py index 327975757..dd9fcc50d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py @@ -54,7 +54,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "guo18_donor.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py index 3397212af..93778fdd9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py @@ -65,6 +65,8 @@ def _load(self): os.path.join(self.data_dir, "GSE115469.csv.gz"), os.path.join(self.data_dir, "GSE115469_labels.txt") ] - self.adata = anndata.read_csv(fn[0]).T + adata = anndata.read_csv(fn[0]).T celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") - self.adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in self.adata.obs.index] + adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in adata.obs.index] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 072aec106..604c5abcf 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -74,6 +74,8 @@ def _load(self): os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) annot = pd.read_csv(fn[1], index_col=0, dtype="category") - self.adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in self.adata.obs.index] + adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in adata.obs.index] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py index 61fb384c8..d4eb2980e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py @@ -77,32 +77,34 @@ def _load(self): var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) if df.columns[-1].startswith("Un"): df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - self.adata = anndata.AnnData(df.T) - self.adata.var = var + adata = anndata.AnnData(df.T) + adata.var = var if "PP001" in self.sample_fn or "PP002" in self.sample_fn: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "lung" + adata.obs["donor"] = "Donor1" + adata.obs["organ"] = "lung" elif "PP003" in self.sample_fn or "PP004" in self.sample_fn: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "bone marrow" + adata.obs["donor"] = "Donor1" + adata.obs["organ"] = "bone marrow" elif "PP005" in self.sample_fn or "PP006" in self.sample_fn: - self.adata.obs["donor"] = "Donor1" - self.adata.obs["organ"] = "lymph Node" + adata.obs["donor"] = "Donor1" + adata.obs["organ"] = "lymph Node" elif "PP009" in self.sample_fn or "PP010" in self.sample_fn: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "lung" + adata.obs["donor"] = "Donor2" + adata.obs["organ"] = "lung" elif "PP011" in self.sample_fn or "PP012" in self.sample_fn: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "bone marrow" + adata.obs["donor"] = "Donor2" + adata.obs["organ"] = "bone marrow" elif "PP013" in self.sample_fn or "PP014" in self.sample_fn: - self.adata.obs["donor"] = "Donor2" - self.adata.obs["organ"] = "lymph Node" - self.adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + self.adata.obs.index - self.adata.obs["cell_ontology_class"] = "Unknown" + adata.obs["donor"] = "Donor2" + adata.obs["organ"] = "lymph Node" + adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index + adata.obs["cell_ontology_class"] = "Unknown" df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) for i in df1.index: - self.adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] + adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] for i in df2.index: - self.adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) + adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] + adata.X = scipy.sparse.csc_matrix(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py index af55b0e8b..0b2409204 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py @@ -50,4 +50,6 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "menon19.processed.h5ad") - self.adata = anndata.read(fn) + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py index c98b8add7..38bb27c65 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py @@ -88,13 +88,15 @@ def _load(self): os.path.join(self.data_dir, f"{self.sample_fn}.1.zip"), os.path.join(self.data_dir, f"{self.sample_fn}.2.zip"), ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) df = pd.read_csv(fn[1], sep="\t") for i in df.columns: - self.adata.obs[i] = [df.loc[j][i] for j in self.adata.obs.index] + adata.obs[i] = [df.loc[j][i] for j in adata.obs.index] - self.adata.var["ensembl"] = [i.split("_")[1] for i in self.adata.var.index] - self.adata.var["names"] = [i.split("_")[0] for i in self.adata.var.index] - self.adata.var = self.adata.var.reset_index().reset_index().drop("index", axis=1) - self.adata = self.adata[:, ~self.adata.var.index.isin( + adata.var["ensembl"] = [i.split("_")[1] for i in adata.var.index] + adata.var["names"] = [i.split("_")[0] for i in adata.var.index] + adata.var = adata.var.reset_index().reset_index().drop("index", axis=1) + adata = adata[:, ~adata.var.index.isin( ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py index fe30a04eb..cd4d9021e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py @@ -84,7 +84,9 @@ def _load(self): os.path.join(self.data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), os.path.join(self.data_dir, "GSE124395_clusterpartition.txt.gz") ] - self.adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) celltype_df = pd.read_csv(fn[1], sep=" ") - self.adata = self.adata[[i in celltype_df.index for i in self.adata.obs.index]].copy() - self.adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in self.adata.obs.index] + adata = adata[[i in celltype_df.index for i in adata.obs.index]].copy() + adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in adata.obs.index] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py index 23ed85d8a..57bbdde0d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py @@ -68,4 +68,6 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "fetal_liver_alladata_.h5ad") - self.adata = anndata.read(fn) + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 5081beef6..2b32ed892 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -246,7 +246,7 @@ def __init__( self.download_url_data = "https://ndownloader.figshare.com/files/17727365" self.download_url_meta = [ - "https://ndownloader.figshare.com/files/21758835", + "adata", "https://ndownloader.figshare.com/files/22447898", ] @@ -273,19 +273,19 @@ def __init__( self.var_symbol_col = "index" def _load_full(self): - self.adata = anndata.read(os.path.join(self.data_dir, "HCL_Fig1_self.adata.h5ad")) + adata = anndata.read(os.path.join(self.data_dir, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix - self.adata.X = scipy.sparse.csr_matrix(self.adata.X).copy() + adata.X = scipy.sparse.csr_matrix(adata.X).copy() # harmonise annotations for col in ["batch", "tissue"]: - self.adata.obs[col] = self.adata.obs[col].astype("str") - self.adata.obs.index = self.adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + adata.obs[col] = adata.obs[col].astype("str") + adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( "FetalFemaleGonald", "FetalFemaleGonad", regex=True) - self.adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", - "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) - self.adata.obs.index = ["-".join(i.split("-")[:-1]) for i in self.adata.obs.index] + adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", + "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) + adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] # load celltype labels and harmonise them # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: @@ -299,14 +299,14 @@ def _load_full(self): "FetalFemaleGonald", "FetalFemaleGonad", regex=True) # check that the order of cells and cell labels is the same - assert np.all(fig1_anno.index == self.adata.obs.index) + assert np.all(fig1_anno.index == adata.obs.index) - # add annotations to self.adata object and rename columns - self.adata.obs = pd.concat([self.adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) - self.adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", - "celltype_global"] + # add annotations to adata object and rename columns + adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) + adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", + "celltype_global"] - # add sample-wise annotations to the full self.adata object + # add sample-wise annotations to the full adata object df = pd.DataFrame( columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) @@ -315,17 +315,19 @@ def _load_full(self): df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") df = pd.concat([df, df1], sort=True) df = df.set_index("Cell_id") - self.adata = self.adata[[i in df.index for i in self.adata.obs.index]].copy() - a_idx = self.adata.obs.index.copy() - self.adata.obs = pd.concat([self.adata.obs, df[ + adata = adata[[i in df.index for i in adata.obs.index]].copy() + a_idx = adata.obs.index.copy() + adata.obs = pd.concat([adata.obs, df[ ["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"] ]], axis=1) - assert np.all(a_idx == self.adata.obs.index) + assert np.all(a_idx == adata.obs.index) # remove mouse cells from the object # ToDo: add this back in as mouse data sets? - self.adata = self.adata[self.adata.obs["Source"] != "MCA2.0"].copy() + adata = adata[adata.obs["Source"] != "MCA2.0"].copy() # tidy up the column names of the obs annotations - self.adata.obs.columns = [ + adata.obs.columns = [ "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py index da7dccdd4..1d32cb3d0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py @@ -204,10 +204,10 @@ def _load(self): norm_const = 1000000 else: norm_const = 10000 - self.adata = anndata.read(fn) - self.adata.X = scipy.sparse.csc_matrix(self.adata.X) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nUMI"].values[:, None])) \ - .multiply(1 / norm_const) - + adata = anndata.read(fn) + adata.X = scipy.sparse.csc_matrix(adata.X) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / norm_const) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py index 027c31bb5..c697458b9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py @@ -69,7 +69,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "james20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py index dec0e4104..04b0f0bba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py @@ -91,7 +91,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 488738311..2656e52fe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -58,7 +58,8 @@ def _load(self): os.path.join(self.data_dir, "GSE130148_raw_counts.csv.gz"), os.path.join(self.data_dir, "GSE130148_barcodes_cell_types.txt.gz"), ] - self.adata = anndata.read_csv(fn[0]).T - self.adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) - + adata = anndata.read_csv(fn[0]).T + adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py index d932be0f3..bc4040d6b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py @@ -74,29 +74,28 @@ def _load_full(self): with zipfile.ZipFile(fn[0]) as archive: x = scipy.io.mmread(archive.open('filtered_gene_bc_matrices_mex/mm10/matrix.mtx')).T.tocsr() - self.adata = anndata.AnnData(x) + adata = anndata.AnnData(x) var = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/genes.tsv'), sep="\t", header=None) var.columns = ["ensembl", "name"] obs_names = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/barcodes.tsv'), sep="\t", header=None )[0].values - assert len(obs_names) == self.adata.shape[0] # ToDo take asserts out - assert var.shape[0] == self.adata.shape[1] # ToDo take asserts out obs = pandas.read_csv(fn[1]) # Match annotation to raw data. obs.index = obs["cell"].values obs = obs.loc[[i in obs_names for i in obs.index], :] idx_tokeep = np.where([i in obs.index for i in obs_names])[0] - self.adata = self.adata[idx_tokeep, :] + adata = adata[idx_tokeep, :] obs_names = obs_names[idx_tokeep] idx_map = np.array([obs.index.tolist().index(i) for i in obs_names]) - self.adata = self.adata[idx_map, :] + adata = adata[idx_map, :] obs_names = obs_names[idx_map] # Assign attributes - self.adata.obs_names = obs_names - self.adata.var = var - self.adata.obs = obs - assert np.all(self.adata.obs_names == self.adata.obs["cell"].values) # ToDo take asserts out + adata.obs_names = obs_names + adata.var = var + adata.obs = obs + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py index b94703a21..e89f8a93c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py @@ -53,7 +53,9 @@ def _load(self): sep="\t").iloc[:, :2] var.columns = ["ensembl", "names"] var.index = var["ensembl"].values - self.adata = anndata.AnnData(X=X, obs=obs, var=var) - self.adata.obs["sample"] = name - adatas.append(self.adata) - self.adata = adatas[0].concatenate(adatas[1:]) + adata = anndata.AnnData(X=X, obs=obs, var=var) + adata.obs["sample"] = name + adatas.append(adata) + adata = adatas[0].concatenate(adatas[1:]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py index 82afc496c..0bfa8cdf0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py @@ -53,5 +53,7 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "voigt19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py index 1249f6dba..2c6c807e7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py @@ -83,7 +83,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py index 048cd06be..1d881ef43 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py @@ -79,9 +79,9 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["nCount_RNA"].values[:, None]))\ - .multiply(1 / 10000) - + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nCount_RNA"].values[:, None])).multiply(1 / 10000) self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 6e66057eb..8bb0d6b9e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -108,10 +108,12 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) - self.adata = anndata.read_h5ad(fn) - self.adata.X = self.adata.raw.X - self.adata.var = self.adata.raw.var - del self.adata.raw - self.adata.obsm = {} - self.adata.varm = {} - self.adata.uns = {} + adata = anndata.read_h5ad(fn) + adata.X = adata.raw.X + adata.var = adata.raw.var + del adata.raw + adata.obsm = {} + adata.varm = {} + adata.uns = {} + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py index ac228acf8..959b4bf24 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py @@ -83,11 +83,12 @@ def _load(self): os.path.join(self.data_dir, "GSE135893_barcodes.tsv.gz"), os.path.join(self.data_dir, "GSE135893_IPF_metadata.csv.gz"), ] - self.adata = anndata.read_mtx(fn[0]).T - self.adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) - self.adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) + adata = anndata.read_mtx(fn[0]).T + adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) + adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) obs = pd.read_csv(fn[3], index_col=0) - self.adata = self.adata[obs.index.tolist(), :].copy() - self.adata.obs = obs - + adata = adata[obs.index.tolist(), :].copy() + adata.obs = obs self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py index a5ed37668..74d31f688 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py @@ -123,5 +123,7 @@ def _load(self): fetal = anndata.read(fn[1]) adult.obs["development"] = "adult" fetal.obs["development"] = "fetal" - self.adata = adult.concatenate(fetal) - self.adata.X = np.expm1(self.adata.X) + adata = adult.concatenate(fetal) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py index 565199f07..31620b966 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py @@ -86,5 +86,7 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "park20.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py index 58406a482..7da8ee04d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py @@ -147,12 +147,14 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) - self.adata = anndata.read(fn) + adata = anndata.read(fn) if self.sample_fn != "madissoon19_lung.processed.h5ad": - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) # Cell type column called differently in madissoon19_lung.processed.h5ad: if self.sample_fn == "madissoon19_lung.processed.h5ad": - self.adata.obs["Celltypes"] = self.adata.obs["CellType"] - del self.adata.obs["CellType"] + adata.obs["Celltypes"] = adata.obs["CellType"] + del adata.obs["CellType"] self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py index c43d8cc05..439e5e720 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py @@ -58,7 +58,8 @@ def __init__( def _load(self): fn = os.path.join(self.data_dir, "lukowski19.processed.h5ad") - self.adata = anndata.read(fn) - self.adata.X = np.expm1(self.adata.X) - self.adata.X = self.adata.X.multiply(scipy.sparse.csc_matrix(self.adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py index 95ecc5013..f4cbf2948 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py @@ -43,22 +43,25 @@ def _load(self): dsets = {} for node in f.walk_nodes('/matrix', 'Array'): dsets[node.name] = node.read() - M, N = dsets['shape'] - data = dsets['data'] - if dsets['data'].dtype == np.dtype('int32'): - data = dsets['data'].view('float32') - data[:] = dsets['data'] - matrix = scipy.sparse.csr_matrix( - (data, dsets['indices'], dsets['indptr']), - shape=(N, M), - ) - self.adata = ad.AnnData( - matrix, - dict(obs_names=dsets['barcodes'].astype(str)), - dict( - var_names=dsets['name'].astype(str), - gene_ids=dsets['id'].astype(str), - feature_types=dsets['feature_type'].astype(str), - genome=dsets['genome'].astype(str), - ), - ) + + M, N = dsets['shape'] + data = dsets['data'] + if dsets['data'].dtype == np.dtype('int32'): + data = dsets['data'].view('float32') + data[:] = dsets['data'] + matrix = scipy.sparse.csr_matrix( + (data, dsets['indices'], dsets['indptr']), + shape=(N, M), + ) + adata = ad.AnnData( + matrix, + dict(obs_names=dsets['barcodes'].astype(str)), + dict( + var_names=dsets['name'].astype(str), + gene_ids=dsets['id'].astype(str), + feature_types=dsets['feature_type'].astype(str), + genome=dsets['genome'].astype(str), + ), + ) + + return adata diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py index 0e2822443..298a7f2de 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py @@ -50,5 +50,7 @@ def __init__( def _load_full(self): fn = os.path.join(self.data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - self.adata = anndata.read_loom(fn) - self.adata = self.adata[self.adata.obs["emptydrops_is_cell"] == "t"].copy() + adata = anndata.read_loom(fn) + adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() + + return adata diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 3ff61eba9..aab593f21 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -884,9 +884,10 @@ def plot_best_classwise_heatmap( else: raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) dataset.load() - cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - assert False, "depreceat metadata code here" + raise NotImplementedError("deprecated metadata code here") + """ + cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids @@ -977,6 +978,7 @@ def plot_best_classwise_heatmap( cbar=False ) return fig, axs, sns_data_heatmap + """ def plot_best_classwise_scatter( self, @@ -1050,8 +1052,10 @@ def plot_best_classwise_scatter( else: raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) dataset.load() - cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() + raise NotImplementedError("deprecated metadata code here") + """ + cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() celltype_versions = ORGANISM_DICT.copy() celltype_versions[organism][organ].set_version(celltype_version) leafnodes = celltype_versions[organism][organ].ids @@ -1149,6 +1153,7 @@ def plot_best_classwise_scatter( ) return fig, axs, sns_data_scatter + """ class SummarizeGridsearchEmbedding(GridsearchContainer): diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index e3e334443..e95b20a55 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -375,9 +375,10 @@ def _save_specific( with open(fn + '_ontology_names.pickle', 'wb') as f: pickle.dump(obj=self.estimator.ids, file=f) + raise NotImplementedError("fix celltype versions code here, deprecated") + """ cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() - assert False, "fix celltyp versions code here, depreceate" celltype_versions[self.zoo.organism][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) leafnodes = celltype_versions[self.zoo.organism][self.zoo.organ].ids ontology = celltype_versions[self.zoo.organism][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] @@ -392,3 +393,4 @@ def _save_specific( del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) + """ From 61547eb782a89eeb94dc848e652263cffce03d2c Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 17 Feb 2021 15:04:30 +0100 Subject: [PATCH 069/161] Add sfaira commandline interface to manage dataloaders (#89) * add skeleton for CI Signed-off-by: Zethson * fix sfaira module path Signed-off-by: Zethson * fix sfaira module path Signed-off-by: Zethson * add template copying Signed-off-by: Zethson * add questionary boilerplate Signed-off-by: Zethson * add upgrade command Signed-off-by: Zethson * add some create boilerplate & CL Signed-off-by: Zethson * add create-dataloader logic order Signed-off-by: Zethson * add lint and test boilerplate Signed-off-by: Zethson * flake8 Signed-off-by: Zethson * flake8 Signed-off-by: Zethson * add prompting for dataloader template type Signed-off-by: Zethson * restructured creator into non-static class Signed-off-by: Zethson * add doi validation Signed-off-by: Zethson * add author Signed-off-by: Zethson * finish prompts Signed-off-by: Zethson * add template single_dataset Signed-off-by: Zethson * flake8 Signed-off-by: Zethson * add clean command Signed-off-by: Zethson * add clean docstrings Signed-off-by: Zethson * add future docs Signed-off-by: Zethson * add black to clean Signed-off-by: Zethson * flake8 Signed-off-by: Zethson * add create all templates Signed-off-by: Zethson * add author metadata Signed-off-by: Zethson * some lint boilerplate & docstrings Signed-off-by: Zethson * add linting functions Signed-off-by: Zethson * add _lint_load Signed-off-by: Zethson * add sphinx autodoc Signed-off-by: Zethson * add create_template wf Signed-off-by: Zethson * fix flake8 Signed-off-by: Zethson * add remaining templates to workflow Signed-off-by: Zethson * fix template creation wf Signed-off-by: Zethson * fix template creation wf Signed-off-by: Zethson * add doc for sfaira dl creation Signed-off-by: Zethson * add clean-dataloader to CL Signed-off-by: Zethson * relax requirements Signed-off-by: Zethson * add test-dataloader implementation Signed-off-by: Zethson * fix flake8 Signed-off-by: Zethson * remove docstring creation & lint Signed-off-by: Zethson * update cookiecutter dataloader templates * removed templates * update dataset templates to return anndata object in _load() Co-authored-by: david.seb.fischer Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> --- .github/workflows/create_templates.yml | 53 +++++++ docs/Makefile | 6 +- docs/_static/custom_sfaira.css | 141 +++++++++++++++++ docs/api/sfaira.data.DatasetBase.rst | 16 +- docs/api/sfaira.data.DatasetInteractive.rst | 16 +- docs/api/sfaira.data.DatasetSuperGroup.rst | 3 +- docs/changelog.rst | 7 +- docs/commandline_interface.rst | 6 + docs/conf.py | 6 +- docs/data.rst | 115 +++++++------- docs/index.rst | 3 +- docs/requirements.txt | 1 + requirements.txt | 23 ++- setup.cfg | 2 +- setup.py | 40 +++-- sfaira/__init__.py | 6 +- sfaira/cli.py | 117 ++++++++++++++ .../{data/templates => commands}/__init__.py | 0 sfaira/commands/clean_dataloader.py | 49 ++++++ sfaira/commands/create_dataloader.py | 148 ++++++++++++++++++ sfaira/commands/lint_dataloader.py | 147 +++++++++++++++++ sfaira/commands/questionary.py | 67 ++++++++ .../cookiecutter.json | 14 ++ .../__init__.py | 0 .../{{ cookiecutter.id_without_doi }}.py | 59 +++++++ .../cookiecutter.json | 14 ++ .../__init__.py | 0 .../{{ cookiecutter.id_without_doi }}.py | 70 +++++++++ .../cookiecutter.json | 14 ++ .../__init__.py | 0 .../{{ cookiecutter.id_without_doi }}.py | 68 ++++++++ .../single_dataset/cookiecutter.json | 14 ++ .../__init__.py | 0 .../{{ cookiecutter.id_without_doi }}.py | 59 +++++++ sfaira/commands/test_dataloader.py | 41 +++++ sfaira/commands/upgrade.py | 101 ++++++++++++ .../human_pancreas_2017_smartseq2_enge_001.py | 1 - .../__init__.py | 1 - .../your_dataset_file_1.py | 22 --- .../your_dataset_file_2.py | 22 --- .../__init__.py | 1 - .../your_dataset_file.py | 36 ----- .../many_samples_one_file/__init__.py | 1 - .../your_dataset_file.py | 38 ----- .../one_samples_one_files/__init__.py | 1 - .../your_dataset_file_1.py | 22 --- sfaira/unit_tests/conftest.py | 15 ++ sfaira/unit_tests/test_data_template.py | 6 +- sfaira/versions/metadata/base.py | 4 +- 49 files changed, 1340 insertions(+), 256 deletions(-) create mode 100644 .github/workflows/create_templates.yml create mode 100644 docs/_static/custom_sfaira.css create mode 100644 docs/commandline_interface.rst create mode 100644 sfaira/cli.py rename sfaira/{data/templates => commands}/__init__.py (100%) create mode 100644 sfaira/commands/clean_dataloader.py create mode 100644 sfaira/commands/create_dataloader.py create mode 100644 sfaira/commands/lint_dataloader.py create mode 100644 sfaira/commands/questionary.py create mode 100644 sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json rename sfaira/{data/templates/dataloaders => commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}}/__init__.py (100%) create mode 100644 sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json create mode 100644 sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/__init__.py create mode 100644 sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json create mode 100644 sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py create mode 100644 sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/templates/single_dataset/cookiecutter.json create mode 100644 sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py create mode 100644 sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/test_dataloader.py create mode 100644 sfaira/commands/upgrade.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py delete mode 100644 sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py delete mode 100644 sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py delete mode 100644 sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py create mode 100644 sfaira/unit_tests/conftest.py diff --git a/.github/workflows/create_templates.yml b/.github/workflows/create_templates.yml new file mode 100644 index 000000000..62868a1fd --- /dev/null +++ b/.github/workflows/create_templates.yml @@ -0,0 +1,53 @@ +name: Create sfaira data-loader templates + +on: [push, pull_request] + +jobs: + create-templates: + runs-on: ${{ matrix.os }} + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + strategy: + matrix: + os: [ubuntu-latest] + python: [3.7, 3.8] + env: + PYTHONIOENCODING: utf-8 + + steps: + - uses: actions/checkout@v2 + name: Check out source-code repository + + - name: Setup Python + uses: actions/setup-python@v2.1.4 + with: + python-version: ${{ matrix.python }} + + - name: Upgrade and install pip + run: python -m pip install --upgrade pip + + - name: Build sfaira + run: pip install . + + - name: Create single_dataset template + run: | + cd .. + echo -e "\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + rm -rf d10_1000_j_journal_2021_01_001/ + + - name: Create multiple_datasets_single_file template + run: | + cd .. + echo -e "\033[B\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + rm -rf d10_1000_j_journal_2021_01_001/ + + - name: Create multiple_datasets_streamlined template + run: | + cd .. + echo -e "\033[B\n\033[B\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + rm -rf d10_1000_j_journal_2021_01_001/ + + - name: Create multiple_datasets_not_streamlined template + run: | + cd .. + echo -e "\033[B\n\033[B\n\033[B\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + rm -rf d10_1000_j_journal_2021_01_001/ diff --git a/docs/Makefile b/docs/Makefile index 4a52c31ee..d637ccd38 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,10 +1,6 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = python -msphinx -SPHINXPROJ = system_intelligence +SPHINXPROJ = sfaira SOURCEDIR = . BUILDDIR = _build diff --git a/docs/_static/custom_sfaira.css b/docs/_static/custom_sfaira.css new file mode 100644 index 000000000..ff1006b23 --- /dev/null +++ b/docs/_static/custom_sfaira.css @@ -0,0 +1,141 @@ +@import "basic.css"; + +/*Color main components with a dark theme*/ +div, span, code { + background-color: #181a1b !important; +} + +.wy-side-nav-search { + background-color: #005fff !important; +} + +.wy-side-nav-search div { + background-color: #005fff !important; +} + +/*Font color is mainly white*/ +.rst-content p, li, h1, h2, h3, h4, h5, h6, .highlight-console, .n, .section { + color: #FFFFFF; +} + +/*The side menu is slightly more grey and lighter than the overall theme*/ +.wy-menu, .wy-menu-vertical { + background-color: #2D2E2F !important; +} + +.wy-side-scroll { + background-color: #2D2E2F !important; +} + +.wy-menu .caption-text { + background-color: #2D2E2F !important; +} + +.caption-text { + background-color: #181a1b !important; +} + +.figure p { + background-color: #181a1b !important; +} + +/*Toctree wrapper on index page has a dark background unlike the other menu items*/ +.toctree-wrapper .compound ul .toctree-l1, .toctree-wrapper .compound ul .toctree-l2 { + background-color: #181a1b !important; + color: #005fff !important; +} + +/*The current menu section has a blue background*/ +.wy-menu-vertical li.toctree-l1.current li.toctree-l2 > a:hover { + color: #FFFFFF !important; + background: #005fff !important; +} + +/*Subitems under the current section are displayed in grey again*/ +.wy-menu-vertical li.toctree-l1.current li.toctree-l2 > a { + color: #FFFFFF !important; + background: #2D2E2F !important; +} + +/*Current hovered item has a blue background*/ +.wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a:hover { + color: #FFFFFF !important; + background: #005fff !important; +} + +.wy-menu-vertical li.toctree-l2.current > a:hover { + color: #FFFFFF !important; + background: #005fff !important; +} + +.wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a { + color: #FFFFFF !important; + background: #2D2E2F !important; +} + +.wy-menu-vertical li.toctree-l1.current { + color: #FFFFFF !important; + background: #005fff !important; + border-color: #005fff !important; +} + +.wy-menu-vertical li.toctree-l1.current > a { + color: #FFFFFF !important; + background: #005fff !important; + border-color: #005fff !important; +} + +.wy-menu-vertical li.toctree-l1.current > a:hover { + color: #FFFFFF !important; + background: #005fff !important; + border-color: #005fff !important; +} + +/*The expand toctree items have the same background as its corresponding section*/ +.wy-menu-vertical li.toctree-l1 a span.toctree-expand { + background-color: #005fff !important; +} + +.wy-menu-vertical li.toctree-l2 a span.toctree-expand { + background-color: #2D2E2F !important; +} + +.wy-menu-vertical li.toctree-l2 a:hover span.toctree-expand { + background-color: #005fff !important; +} + +.wy-menu-vertical li.toctree-l3 a span.toctree-expand { + background-color: #2D2E2F !important; +} + +.wy-menu-vertical li.toctree-l3 a:hover span.toctree-expand { + background-color: #005fff !important; +} + +.code .docutils .literal .notranslate .pre { + background-color: #181a1b !important; +} + +/*Color footer separately corresponding to overall dark theme*/ +footer { + color: #FFFFFF; +} + +/*Color footer buttons in blue*/ +.rst-footer-buttons a, .rst-footer-buttons a:hover, .rst-footer-buttons span { + background-color: #005fff !important; +} + +.wy-side-nav-search a { + background-color: #005fff !important; +} + +.version { + background-color: #005fff !important; + color: #FFFFFF !important; +} + +/*Set max width to none so the theme uses all available width*/ +.wy-nav-content { + max-width: none; +} diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst index 22b767b80..edd05f7f8 100644 --- a/docs/api/sfaira.data.DatasetBase.rst +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -14,16 +14,17 @@ .. autosummary:: ~DatasetBase.__init__ - ~DatasetBase.assert_celltype_version_key ~DatasetBase.clear ~DatasetBase.load ~DatasetBase.load_meta + ~DatasetBase.load_ontology_class_map ~DatasetBase.load_tobacked - ~DatasetBase.map_ontology_class - ~DatasetBase.set_default_type_version + ~DatasetBase.project_celltypes_to_ontology + ~DatasetBase.set_raw_full_group_object ~DatasetBase.set_unkown_class_id - ~DatasetBase.subset_organs + ~DatasetBase.subset_cells ~DatasetBase.write_meta + ~DatasetBase.write_ontology_class_map @@ -36,17 +37,19 @@ ~DatasetBase.age ~DatasetBase.annotated ~DatasetBase.author - ~DatasetBase.available_type_versions ~DatasetBase.citation ~DatasetBase.dev_stage + ~DatasetBase.directory_formatted_doi ~DatasetBase.doi ~DatasetBase.doi_cleaned_id ~DatasetBase.download ~DatasetBase.download_meta ~DatasetBase.ethnicity + ~DatasetBase.fn_ontology_class_map_csv ~DatasetBase.healthy ~DatasetBase.healthy_state_healthy ~DatasetBase.id + ~DatasetBase.loaded ~DatasetBase.meta ~DatasetBase.meta_fn ~DatasetBase.ncells @@ -60,8 +63,11 @@ ~DatasetBase.obs_key_organ ~DatasetBase.obs_key_organism ~DatasetBase.obs_key_protocol + ~DatasetBase.obs_key_sample ~DatasetBase.obs_key_sex ~DatasetBase.obs_key_state_exact + ~DatasetBase.ontology_celltypes + ~DatasetBase.ontology_class_map ~DatasetBase.organ ~DatasetBase.organism ~DatasetBase.protocol diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst index d1bda2a3f..c7f23fc1e 100644 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -14,16 +14,17 @@ .. autosummary:: ~DatasetInteractive.__init__ - ~DatasetInteractive.assert_celltype_version_key ~DatasetInteractive.clear ~DatasetInteractive.load ~DatasetInteractive.load_meta + ~DatasetInteractive.load_ontology_class_map ~DatasetInteractive.load_tobacked - ~DatasetInteractive.map_ontology_class - ~DatasetInteractive.set_default_type_version + ~DatasetInteractive.project_celltypes_to_ontology + ~DatasetInteractive.set_raw_full_group_object ~DatasetInteractive.set_unkown_class_id - ~DatasetInteractive.subset_organs + ~DatasetInteractive.subset_cells ~DatasetInteractive.write_meta + ~DatasetInteractive.write_ontology_class_map @@ -36,17 +37,19 @@ ~DatasetInteractive.age ~DatasetInteractive.annotated ~DatasetInteractive.author - ~DatasetInteractive.available_type_versions ~DatasetInteractive.citation ~DatasetInteractive.dev_stage + ~DatasetInteractive.directory_formatted_doi ~DatasetInteractive.doi ~DatasetInteractive.doi_cleaned_id ~DatasetInteractive.download ~DatasetInteractive.download_meta ~DatasetInteractive.ethnicity + ~DatasetInteractive.fn_ontology_class_map_csv ~DatasetInteractive.healthy ~DatasetInteractive.healthy_state_healthy ~DatasetInteractive.id + ~DatasetInteractive.loaded ~DatasetInteractive.meta ~DatasetInteractive.meta_fn ~DatasetInteractive.ncells @@ -60,8 +63,11 @@ ~DatasetInteractive.obs_key_organ ~DatasetInteractive.obs_key_organism ~DatasetInteractive.obs_key_protocol + ~DatasetInteractive.obs_key_sample ~DatasetInteractive.obs_key_sex ~DatasetInteractive.obs_key_state_exact + ~DatasetInteractive.ontology_celltypes + ~DatasetInteractive.ontology_class_map ~DatasetInteractive.organ ~DatasetInteractive.organism ~DatasetInteractive.protocol diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst index cdbb18916..e607ab926 100644 --- a/docs/api/sfaira.data.DatasetSuperGroup.rst +++ b/docs/api/sfaira.data.DatasetSuperGroup.rst @@ -24,9 +24,10 @@ ~DatasetSuperGroup.ncells ~DatasetSuperGroup.ncells_bydataset ~DatasetSuperGroup.ncells_bydataset_flat + ~DatasetSuperGroup.project_celltypes_to_ontology ~DatasetSuperGroup.set_dataset_groups ~DatasetSuperGroup.subset - ~DatasetSuperGroup.subset_organs + ~DatasetSuperGroup.subset_cells diff --git a/docs/changelog.rst b/docs/changelog.rst index 598481d8d..b50a0b649 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,7 +11,12 @@ This project adheres to `Semantic Versioning `_. **Added** -* Initial release with online documentation. +* A commandline interface with Click, Rich and Questionary +* upgrade command, which checks whether the latest version of sfaira is installed on every sfaria startup and upgrades it if not. +* create-dataloader command which allows for the interactive creation of a sfaira dataloader script +* clean-dataloader command which cleans a with sfaira create-dataloader created dataloader script +* lint-dataloader command which runs static checks on the style and completeness of a dataloader script +* test-dataloader command which runs a unittest on a provided dataloader **Fixed** diff --git a/docs/commandline_interface.rst b/docs/commandline_interface.rst new file mode 100644 index 000000000..c70724ebd --- /dev/null +++ b/docs/commandline_interface.rst @@ -0,0 +1,6 @@ +Commandline interface +--------------------- + +.. click:: sfaira.cli:sfaira_cli + :prog: sfaira + :nested: full diff --git a/docs/conf.py b/docs/conf.py index 4497f1033..7ff074f47 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,7 +72,8 @@ 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', - 'sphinx.ext.autosummary' + 'sphinx.ext.autosummary', + 'sphinx_click' ] # Generate the API documentation when building @@ -112,6 +113,9 @@ conf_py_path='/docs/', # Path in the checkout to the docs root ) html_static_path = ['_static'] +html_css_files = [ + 'custom_sfaira.css', +] html_show_sphinx = False gh_url = 'https://github.com/{github_user}/{github_repo}'.format_map(html_context) diff --git a/docs/data.rst b/docs/data.rst index df067ca83..7bb8386cc 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -28,12 +28,19 @@ Contact us for support of any other repositories. .. _cellxgene: https://cellxgene.cziscience.com/ -Add data sets -~~~~~~~~~~~~~ +Adding data sets +~~~~~~~~~~~~~~~~~ - 1. Write a data loader as outlined below. - 2. Identify the raw files as indicated in the data loader classes and copy them into your directory structure as required by your data loader. - 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. +Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. +This process requires a couple of steps as outlined in the following sections. + + 1. Write a dataloader as outlined below. + 2. Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as required by your data loader. + 3. You can contribute the data loader to public sfaira, we do not manage data upload though. + During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. + +The following sections will first describe the underlying design principles of sfaira dataloaders and +then explain how to interactively create, validate and test dataloaders. Use data loaders on existing data repository -------------------------------------------- @@ -45,21 +52,14 @@ Depending on the functionalities you want to use, you would often want to create first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. -Write data loaders ------------------- +Writing dataloaders +--------------------- The study-centric data loader module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the sfaira code, data loaders are organised into directories, which correspond to publications. All data loaders corresponding to data sets of one study are grouped into this directory. -This directory contains an `__init__.py` file which makes these data loaders visible to sfaira: - -.. code-block:: python - - FILE_PATH = __file__ - - Next, each data set is represented by one data loader python file in this directory. See below for more complex set ups with repetitive data loader code. @@ -71,8 +71,7 @@ The core data loader identified is the directory compatible doi, which is the doi with all special characters replaced by "_" and a "d" prefix is used: "10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". Searching for this string should yield a match if it is already implemented, take care to look for both -preprint and publication DOIs if both are available. -We will also mention publication names in issues, you will however not find these in the code. +preprint and publication DOIs if both are available. We will also mention publication names in issues, you will however not find these in the code. .. _code: https://github.com/theislab/sfaira/tree/dev .. _issues: https://github.com/theislab/sfaira/issues @@ -83,8 +82,7 @@ The data loader python file Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class -looks very similar in parts to a cell in a juypter notebook that performs data loading. We suggest to copy a data loader -class file and simply adapt to the new data. The core features that must be included are: +looks very similar in parts to a cell in a juypter notebook that performs data loading. The core features that must be included are: 1. A constructor of the following form that can be used to interact with the data set before it is loaded into memory: @@ -199,34 +197,9 @@ Alternatively, we also provide the optional dependency sfaira_extensions (https: in which local data and cell type annotation can be managed separately but still be loaded as usual through sfaira. The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. -To get going, consider copying over code from our collection of template_ study-centric data loader directories. -In these templates, it is clearly annotated which code fragment can remain constant -and which have to be addressed by you. - -.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders - - -Map cell type labels to ontology -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with -the same name and directory as the python file in which the data loader is located. -This .csv contains two columns with one row for each unique cell type label. -The free text identifiers in the first column "source", -and the corresponding ontology term in the second column "target". -You can write this file entirely from scratch. -Sfaira also allows you to generate a first guess of this file using fuzzy string matching -which is automatically executed when you run the template data loader unit test for the first time with you new loader. -Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which -ontology term in the case of conflicts. -Still, this first guess usually drastically speeds up this annotation harmonization. - - -Repetitive data loader code -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are instances in which you find yourself copying code between data loader files corresponding to one study. -In most of these cases, you can avoid the copy operations and share the code more efficiently. +Handling multiple data sources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you have multiple data sets in a study which are all saved in separate files which come in similar formats: You can subclass `DatasetBaseGroupLoadingManyFiles` instead of `DatasetBase` and proceed as usual, @@ -234,12 +207,8 @@ only with adding `SAMPLE_FNS` in the data loader file name space, which is a list of all file names addressed with this file. You can then refer to an additional property of the Dataset class, `self.sample_fn` during loading or when dynamically defining meta data in the constructor. -Consider also this template_ and this example_. Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. -.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined -.. _example: https://github.com/theislab/sfaira/tree/dev/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130 - If you have multiple data sets in a study which are all saved in one file: You can subclass `DatasetBaseGroupLoadingOneFile` instead of `DatasetBase` and proceed as usual, only with adding `SAMPLE_IDS` in the data loader file name space, @@ -248,24 +217,50 @@ You can then refer to an additional property of the Dataset class, `self.sample_ or when dynamically defining meta data in the constructor. Note that `self.sample_id` refers to a `self.adata.obs` column in the loaded data set, this column has to be defined in `self.obs_key_sample`, which needs to be defined in the constructor. -Consider also this template_. Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. -.. _template: https://github.com/theislab/sfaira/tree/dev/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined +Creating dataloaders with the commandline interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +sfaira features an interactive way of creating, formatting and testing dataloaders. +The common workflow look as follows: -Test your data loader -~~~~~~~~~~~~~~~~~~~~~ +1. Create a new dataloader with ``sfaira create-dataloader`` +2. Format and clean the dataloader with ``sfaira clean-dataloader `` +3. Validate the dataloader with ``sfaira lint-dataloader `` +4. Test the dataloader using ``sfaira test-dataloader `` -Sfaira has a local data loader unit test_ with which you can debug your data loader and which aids with meta data -assignments, such as cell types. -You can use this test with pytest in an IDE. -You can simply place the raw data into `sfaira/unit_tests/template_data/` with the correct sub path, -as indicated in the `._load()`, -for the test to access this data. +When creating a dataloader with ``sfaira create-dataloader`` you are first asked for the dataloader type +which will be determined by the structure of your data (one vs many files etc). Next, common information such as +your name and email are prompted for followed by dataloader specific attributes such as organ, organism and many more. +If the requested information is not available simply hit enter and continue until done. If you have mixed organ or organism +data you will have to resolve this manually. Your dataloader template will be created in your current working directory +in a folder resembling your doi. -.. _test: https://github.com/theislab/sfaira/tree/dev/sfaira/unit_tests/test_data_template.py +Now simply fill in all missing properties in your dataloader script(s). Leave all unneeded properties outcommented. +When done run ``sfaira clean-dataloader `` on the just filled out dataloader script. +All unused attributes will be removed and the file is reformatted. + +Next validate the integrity of your dataloader with ``sfaira lint-dataloader ``. +All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information/function. + +Finally, test your dataloader with ``sfaira test-dataloader ``. +If all tests pass you can proceed to use your dataloader or to submit a pull request to sfaira. + +Map cell type labels to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with +the same name and directory as the python file in which the data loader is located. +This .csv contains two columns with one row for each unique cell type label. +The free text identifiers in the first column "source", +and the corresponding ontology term in the second column "target". +You can write this file entirely from scratch. +Sfaira also allows you to generate a first guess of this file using fuzzy string matching +which is automatically executed when you run the template data loader unit test for the first time with you new loader. +Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which +ontology term in the case of conflicts. +Still, this first guess usually drastically speeds up this annotation harmonization. Cell type ontology management ----------------------------- diff --git a/docs/index.rst b/docs/index.rst index 265bc7965..ed3256547 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,14 +30,13 @@ News Latest additions ---------------- -.. include:: release-latest.rst - .. toctree:: :maxdepth: 1 :hidden: installation api/index + commandline_interface tutorials data models diff --git a/docs/requirements.txt b/docs/requirements.txt index 5a833b05d..dcf07ea07 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,6 +2,7 @@ sphinx matplotlib sphinx_rtd_theme sphinx-autodoc-typehints +sphinx-click jinja2 docutils -r ../requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 901fa721c..c508cf4a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,29 @@ anndata>=0.7 docutils fuzzywuzzy -jinja2 loompy matplotlib networkx -numpy>=1.14.0 +numpy>=1.16.4 obonet openpyxl pandas -pytest +pytest>=6.2.2 python-Levenshtein -scanpy -scipy +scanpy>=1.7.0 +scipy>=1.2.1 seaborn -sphinx -sphinx_rtd_theme -tensorflow # TODO remove as soon as # 70 is solved +tensorflow>=2.0.0 # TODO remove as soon as # 70 is solved tqdm requests +versioneer +h5py xlrd==1.* +rich>=9.10.0 +click>=7.1.2 +questionary>=1.8.1 +packaging>=20.8 +requests>=2.25.1 +switchlang>=0.1.0 +cookiecutter==1.7.2 +black>=20.8b1 diff --git a/setup.cfg b/setup.cfg index 2b1c2738b..2e3eb8d69 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,7 +13,7 @@ inplace = 1 [flake8] ignore=F401, W504 -exclude = docs +exclude = docs, sfaira/commands/templates max-line-length = 160 [aliases] diff --git a/setup.py b/setup.py index ad706615c..1132dcf46 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +import os + from setuptools import setup, find_packages import versioneer @@ -8,6 +10,21 @@ with open("README.rst", "r") as fh: long_description = fh.read() +with open('requirements.txt') as f: + requirements = f.read().splitlines() + + +def package_files(directory): + paths = [] + for (path, directories, filenames) in os.walk(directory): + for filename in filenames: + paths.append(os.path.join('..', path, filename)) + return paths + + +WD = os.path.dirname(__file__) +templates = package_files(f'{WD}/sfaira/commands/templates') + setup( name='sfaira', author=author, @@ -24,21 +41,14 @@ 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', ], - packages=find_packages(), - install_requires=[ - 'anndata>=0.7', - 'fuzzywuzzy', - 'h5py', - 'networkx', - 'numpy>=1.16.4', - 'obonet', - 'pandas', - 'pytest', - 'python-Levenshtein', - 'scipy>=1.2.1', - 'tqdm', - 'tensorflow>=2.0.0' # TODO Remove and add to tensorflow profile - ], + packages=find_packages(include=['sfaira', 'sfaira.*']), + package_data={'': templates}, + entry_points={ + 'console_scripts': [ + 'sfaira=sfaira.cli:main', + ], + }, + install_requires=requirements, extras_require={ 'tensorflow': [ # 'tensorflow>=2.0.0' # TODO Add Tensorflow here again diff --git a/sfaira/__init__.py b/sfaira/__init__.py index d0b93c7f9..902930e82 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -18,9 +18,11 @@ ]) __author__ = ', '.join([ "Leander Dony", - "David S. Fischer" + "David S. Fischer", + "Lukas Heumos" ]) __email__ = ', '.join([ "leander.dony@helmholtz-muenchen.de", - "david.fischer@helmholtz-muenchen.de" + "david.fischer@helmholtz-muenchen.de", + "lukas.heumos@helmholtz-muenchen.de" ]) diff --git a/sfaira/cli.py b/sfaira/cli.py new file mode 100644 index 000000000..55b0b8dbd --- /dev/null +++ b/sfaira/cli.py @@ -0,0 +1,117 @@ +import logging +import os +import sys + +import click +import rich +import rich.logging +from rich import traceback +from rich import print +from sfaira.commands.test_dataloader import DataloaderTester + +from sfaira.commands.clean_dataloader import DataloaderCleaner +from sfaira.commands.lint_dataloader import DataloaderLinter + +import sfaira +from sfaira.commands.create_dataloader import DataloaderCreator +from sfaira.commands.upgrade import UpgradeCommand + +WD = os.path.dirname(__file__) +log = logging.getLogger() + + +def main(): + traceback.install(width=200, word_wrap=True) + print(r"""[bold blue] +███████ ███████  █████  ██ ██████  █████  +██      ██      ██   ██ ██ ██   ██ ██   ██  +███████ █████  ███████ ██ ██████  ███████  +     ██ ██     ██   ██ ██ ██   ██ ██   ██  +███████ ██  ██  ██ ██ ██  ██ ██  ██  +                                     + """) + + print('[bold blue]Run [green]sfaira --help [blue]for an overview of all commands\n') + + # Is the latest sfaira version installed? Upgrade if not! + if not UpgradeCommand.check_sfaira_latest(): + print('[bold blue]Run [green]sfaira upgrade [blue]to get the latest version.') + sfaira_cli() + + +@click.group() +@click.version_option(sfaira.__version__, message=click.style(f'sfaira Version: {sfaira.__version__}', fg='blue')) +@click.option('-v', '--verbose', is_flag=True, default=False, help='Enable verbose output (print debug statements).') +@click.option("-l", "--log-file", help="Save a verbose log to a file.") +@click.pass_context +def sfaira_cli(ctx, verbose, log_file): + """ + Create and manage sfaira dataloaders. + """ + # Set the base logger to output DEBUG + log.setLevel(logging.DEBUG) + + # Set up logs to the console + log.addHandler( + rich.logging.RichHandler( + level=logging.DEBUG if verbose else logging.INFO, + console=rich.console.Console(file=sys.stderr), + show_time=True, + markup=True, + ) + ) + + # Set up logs to a file if we asked for one + if log_file: + log_fh = logging.FileHandler(log_file, encoding="utf-8") + log_fh.setLevel(logging.DEBUG) + log_fh.setFormatter(logging.Formatter("[%(asctime)s] %(name)-20s [%(levelname)-7s] %(message)s")) + log.addHandler(log_fh) + + +@sfaira_cli.command() +def create_dataloader() -> None: + """ + Interactively create a new sfaira dataloader. + """ + dataloader_creator = DataloaderCreator() + dataloader_creator.create_dataloader() + + +@sfaira_cli.command() +@click.argument('path', type=click.Path(exists=True)) +def clean_dataloader(path) -> None: + """ + Clean a just written sfaira dataloader to adhere to sfaira's standards. + + PATH to the dataloader script. + """ + dataloader_cleaner = DataloaderCleaner(path) + dataloader_cleaner.clean_dataloader() + + +@sfaira_cli.command() +@click.argument('path', type=click.Path(exists=True)) +def lint_dataloader(path) -> None: + """ + Verifies the dataloader against sfaira's requirements. + + PATH to the dataloader script. + """ + dataloader_linter = DataloaderLinter() + dataloader_linter.lint(path) + + +@sfaira_cli.command() +@click.argument('path', type=click.Path(exists=True)) +def test_dataloader(path) -> None: + """ + Runs a dataloader unit test. + """ + dataloader_tester = DataloaderTester(path) + dataloader_tester.test_dataloader() + + +if __name__ == "__main__": + traceback.install() + sys.exit(main()) # pragma: no cover diff --git a/sfaira/data/templates/__init__.py b/sfaira/commands/__init__.py similarity index 100% rename from sfaira/data/templates/__init__.py rename to sfaira/commands/__init__.py diff --git a/sfaira/commands/clean_dataloader.py b/sfaira/commands/clean_dataloader.py new file mode 100644 index 000000000..43d08c0c5 --- /dev/null +++ b/sfaira/commands/clean_dataloader.py @@ -0,0 +1,49 @@ +import logging +from subprocess import Popen + +log = logging.getLogger(__name__) + + +class DataloaderCleaner: + + def __init__(self, path): + self.path = path + + def clean_dataloader(self) -> None: + """ + Removes any unwanted artifacts from a dataloader Python script and formats the code. + 1. Any line that starts with # self. <- outcommented attribute + 2. Any line that starts with # SFARA: <- explicitly marked full comments + 3. Any line with # something <- comments after attributes + 4. Runs black + """ + # Remove all unwanted artifacts + cleaned_content = [] + + with open(self.path, 'r') as data_loader_file: + content = data_loader_file.readlines() + for line in content: + line_stripped = line.strip() + if line_stripped.startswith('# self.'): + continue + elif line_stripped.startswith('# SFAIRA:'): + continue + else: + if '#' in line: + if len(line.split('#')) > 1: + try: + cleaned_content += f'{line.split("#")[0]}\n' + except KeyError: + cleaned_content += line + else: + cleaned_content += line + + with open(self.path, 'w') as data_loader_file: + for line in cleaned_content: + data_loader_file.write(line) + data_loader_file.write('\n') + + # run black + black = Popen(['black', self.path], + universal_newlines=True, shell=False, close_fds=True) + (black_stdout, black_stderr) = black.communicate() diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py new file mode 100644 index 000000000..f3cd5199f --- /dev/null +++ b/sfaira/commands/create_dataloader.py @@ -0,0 +1,148 @@ +import logging +import os +import re +from dataclasses import dataclass, asdict +from shutil import copyfile +from typing import Union + +from sfaira.commands.questionary import sfaira_questionary +from rich import print +from cookiecutter.main import cookiecutter + +log = logging.getLogger(__name__) + + +@dataclass +class TemplateAttributes: + dataloader_type: str = '' # One of single_dataset, multiple_datasets_single_file, multiple_datasets_streamlined, multiple_datasets_not_streamlined + id: str = '' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + id_without_doi: str = '' # complete id without the doi -> usually used to name the python scripts + + author: Union[str, list] = '' # author (list) who sampled / created the data set + doi: str = '' # doi of data set accompanying manuscript + doi_sfaira_repr: str = '' # internal representation with any special characters replaced with underscores + + download_url_data: str = '' # download website(s) of data files + download_url_meta: str = '' # download website(s) of meta data files + + organ: str = '' # (*, optional) organ (anatomical structure) + organism: str = '' # (*) species / organism + protocol: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) + year: str = 2021 # year in which sample was acquired + number_of_datasets: str = 1 # Required to determine the file names + + +class DataloaderCreator: + + def __init__(self): + self.WD = os.path.dirname(__file__) + self.TEMPLATES_PATH = f'{self.WD}/templates' + self.template_attributes = TemplateAttributes() + + def create_dataloader(self): + """ + Prompts and guides the user through a number of possible dataloader choices. + Prompts the user for required attributes which must be present in the dataloader. + Finally creates the specific cookiecutter dataloader template. + """ + self._prompt_dataloader_template() + self._prompt_dataloader_configuration() + self._create_dataloader_template() + + def _prompt_dataloader_template(self) -> None: + """ + Guides the user to select the appropriate dataloader template for his dataset. + Sets the dataloader_type + """ + number_datasets = sfaira_questionary(function='select', + question='How many datasets does your project have?', + choices=['One', 'More than one']) + # One dataset + if number_datasets == 'One': + self.template_attributes.dataloader_type = 'single_dataset' + return + # More than one dataset + dataset_counts = sfaira_questionary(function='select', + question='Are your datasets in a single file or is there one file per dataset?', + choices=['Single dataset file', 'Multiple dataset files']) + if dataset_counts == 'Single dataset file': + self.template_attributes.dataloader_type = 'multiple_datasets_single_file' + return + + # streamlined? + streamlined_datasets = sfaira_questionary(function='select', + question='Are your datasets in a similar format?', + choices=['Same format', 'Different formats']) + if streamlined_datasets == 'Same format': + self.template_attributes.dataloader_type = 'multiple_datasets_streamlined' + return + else: + self.template_attributes.dataloader_type = 'multiple_datasets_not_streamlined' + return + + def _prompt_dataloader_configuration(self): + """ + Prompts the user for all required attributes for a dataloader such as DOI, author, etc. + """ + author = sfaira_questionary(function='text', + question='Author(s):', + default='Einstein, Albert; Hawking, Stephen') + self.template_attributes.author = author.split(';') if ';' in author else author + doi = sfaira_questionary(function='text', + question='DOI:', + default='10.1000/j.journal.2021.01.001') + while not re.match(r'\b10\.\d+/[\w.]+\b', doi): + print('[bold red]The entered DOI is malformed!') # noqa: W605 + doi = sfaira_questionary(function='text', + question='DOI:', + default='10.1000/j.journal.2021.01.001') + self.template_attributes.doi = doi + self.template_attributes.doi_sfaira_repr = f'd{doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + + self.template_attributes.organism = sfaira_questionary(function='text', + question='Organism:', + default='NA') + self.template_attributes.organ = sfaira_questionary(function='text', + question='Organ:', + default='NA') + self.template_attributes.protocol = sfaira_questionary(function='text', + question='Protocol:', + default='NA') + self.template_attributes.year = sfaira_questionary(function='text', + question='Year:', + default='2021') + first_author = author[0] if isinstance(author, list) else author + try: + first_author_lastname = first_author.split(',')[0] + except KeyError: + print('[bold yellow] First author was not in the expected format. Using full first author for the id.') + first_author_lastname = first_author + self.template_attributes.id_without_doi = f'{self.template_attributes.organism}_{self.template_attributes.organ}_' \ + f'{self.template_attributes.year}_{self.template_attributes.protocol}_' \ + f'{first_author_lastname}_001' + self.template_attributes.id = self.template_attributes.id_without_doi + f'_{self.template_attributes.doi_sfaira_repr}' + self.template_attributes.download_url_data = sfaira_questionary(function='text', + question='URL to download the data', + default='https://ftp.ncbi.nlm.nih.gov/geo/') + self.template_attributes.number_of_datasets = sfaira_questionary(function='text', + question='Number of datasets:', + default='1').zfill(3) + + def _template_attributes_to_dict(self) -> dict: + """ + Create a dict from the our Template Structure dataclass + :return: The dict containing all key-value pairs with non empty values + """ + return {key: val for key, val in asdict(self.template_attributes).items() if val != ''} + + def _create_dataloader_template(self): + template_path = f'{self.TEMPLATES_PATH}/{self.template_attributes.dataloader_type}' + cookiecutter(f'{template_path}', + no_input=True, + overwrite_if_exists=True, + extra_context=self._template_attributes_to_dict()) + + if self.template_attributes.dataloader_type == 'multiple_datasets_not_streamlined': + for i in range(2, int(self.template_attributes.number_of_datasets.lstrip('0')) + 1): + copyfile(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi}.py', + f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py') diff --git a/sfaira/commands/lint_dataloader.py b/sfaira/commands/lint_dataloader.py new file mode 100644 index 000000000..032ec6752 --- /dev/null +++ b/sfaira/commands/lint_dataloader.py @@ -0,0 +1,147 @@ +import logging + +import rich +from rich.panel import Panel +from rich.progress import Progress, BarColumn + +log = logging.getLogger(__name__) + + +class DataloaderLinter: + + def __init__(self, path='.'): + self.path: str = path + self.content: list = [] + self.passed: dict = {} + self.warned: dict = {} + self.failed: dict = {} + self.linting_functions: list = [ + '_lint_dataloader_object', + '_lint_required_attributes', + '_lint_sfaira_todos', + '_lint_load' + ] + + def lint(self, path) -> None: + """ + Statically verifies a dataloader against a predefined set of rules. + Every rule is a function defined in this class, which must be part of this class' linting_functions. + :param path: Path to an existing dataloader + """ + with open(path, 'r') as f: + self.content = list(map(lambda line: line.strip(), f.readlines())) + + progress = Progress("[bold green]{task.description}", BarColumn(bar_width=None), + "[bold yellow]{task.completed} of {task.total}[reset] [bold green]{task.fields[func_name]}") + with progress: + lint_progress = progress.add_task("Running lint checks", + total=len(self.linting_functions), + func_name=self.linting_functions) + for fun_name in self.linting_functions: + progress.update(lint_progress, advance=1, func_name=fun_name) + getattr(self, fun_name)() + + self._print_results() + + def _lint_dataloader_object(self): + """ + Verifies that the Dataloader Object itself (no the attributes) is valid + """ + # TODO Could be more strict by checking also whether the constructor is valid, but too much of a hazzle with Black formatting. + passed_lint_dataloader_object = True + + try: + line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('class Dataset(DatasetBaseGroupLoadingManyFiles):', + 'class Dataset(DatasetBase):')), enumerate(self.content)))[0] + except IndexError: + passed_lint_dataloader_object = False + self.failed['-1'] = 'Missing one of class Dataset(DatasetBase) or class Dataset(DatasetBaseGroupLoadingManyFiles)' + + if passed_lint_dataloader_object: + self.passed[line] = 'Passed dataloader object checks.' + + def _lint_load(self): + """ + Verifies that the method _load_any_object(self, fn=None) is present. + """ + passed_load = True + + try: + line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('def _load_any_object(self, fn=None):', 'def _load(self, fn):')), + enumerate(self.content)))[0] + except IndexError: + passed_load = False + self.failed['-1'] = 'Missing one of methods _load_any_object(self, fn=None) or def _load(self, fn)' + + if passed_load: + self.passed[line] = 'Passed dataloader object checks.' + + def _lint_required_attributes(self): + """ + Verifies that all required attributes for every dataloader are present. + """ + passed_required_attributes = True + + attributes = ['self.id', + 'self.author', + 'self.doi', + 'self.download_url_data', + 'self.organ', + 'self.organism', + 'self.protocol', + 'self.year'] + + for attribute in attributes: + try: + line, attribute = list(filter(lambda line_attribute: line_attribute[1].startswith(attribute), enumerate(self.content)))[0] + except IndexError: + passed_required_attributes = False + self.failed['-1'] = 'One of required attributes id, author, doi, download_url_data, organ, organism, protocol, year is missing.' + + if passed_required_attributes: + self.passed[0] = 'Passed required dataloader attributes checks.' + + def _lint_sfaira_todos(self): + """ + Warns if any SFAIRA TODO: statements were found + """ + passed_sfaira_todos = True + + for index, line in enumerate(self.content): + if 'SFAIRA TODO' in line: + passed_sfaira_todos = False + self.warned[f'{index}'] = f'Line {index}: {line[2:]}' + + if passed_sfaira_todos: + self.passed['0'] = 'Passed sfaira TODOs checks.' + + def _print_results(self): + console = rich.console.Console() + console.print() + console.rule("[bold green] LINT RESULTS") + console.print() + console.print( + f' [bold green][[\u2714]] {len(self.passed):>4} tests passed\n [bold yellow][[!]] {len(self.warned):>4} tests had warnings\n' + f' [bold red][[\u2717]] {len(self.failed):>4} tests failed', + overflow="ellipsis", + highlight=False, + ) + + def format_result(linting_results: dict, color): + results = [] + for line, result in linting_results.items(): + results.append(f'[bold {color}]Result: {result}') + return "\n".join(results) + + if len(self.passed) > 0: + console.print() + console.rule("[bold green][[\u2714]] Tests Passed", style='green') + console.print(Panel(format_result(self.passed, 'green'), style='green'), no_wrap=False, overflow='ellipsis') + if len(self.warned) > 0: + console.print() + console.rule("[bold yellow][[!]] Test Warnings", style='yellow') + console.print(Panel(format_result(self.warned, 'yellow'), style="yellow"), no_wrap=False, overflow='ellipsis') + if len(self.failed) > 0: + console.print() + console.rule("[bold red][[\u2717]] Test Failures", style='red') + console.print(Panel(format_result(self.failed, 'red'), style='red'), no_wrap=False, overflow='ellipsis') diff --git a/sfaira/commands/questionary.py b/sfaira/commands/questionary.py new file mode 100644 index 000000000..ad5db11c1 --- /dev/null +++ b/sfaira/commands/questionary.py @@ -0,0 +1,67 @@ +import logging +import sys +from typing import Optional, List, Union + +import questionary +from prompt_toolkit.styles import Style # type: ignore +from rich import print + +log = logging.getLogger(__name__) + +sfaira_style = Style([ + ('qmark', 'fg:#0000FF bold'), # token in front of the question + ('question', 'bold'), # question text + ('answer', 'fg:#008000 bold'), # submitted answer text behind the question + ('pointer', 'fg:#0000FF bold'), # pointer used in select and checkbox prompts + ('highlighted', 'fg:#0000FF bold'), # pointed-at choice in select and checkbox prompts + ('selected', 'fg:#008000'), # style for a selected item of a checkbox + ('separator', 'fg:#cc5454'), # separator in lists + ('instruction', ''), # user instructions for select, rawselect, checkbox + ('text', ''), # plain text + ('disabled', 'fg:#FF0000 italic') # disabled choices for select and checkbox prompts +]) + + +def sfaira_questionary(function: str, + question: str, + choices: Optional[List[str]] = None, + default: Optional[str] = None) -> Union[str, bool]: + """ + Custom selection based on Questionary. Handles keyboard interrupts and default values. + + :param function: The function of questionary to call (e.g. select or text). See https://github.com/tmbo/questionary for all available functions. + :param question: The question to prompt for. Should not include default values or colons. + :param choices: List of all possible choices. Usually only relevant with 'select'. + :param default: A set default value which will be chosen if the user does not enter anything. + :return: The chosen answer. + """ + answer: Optional[str] = '' + try: + if function == 'select': + if default not in choices: # type: ignore + log.debug(f'Default value {default} is not in the set of choices!') + answer = getattr(questionary, function)(f'{question}: ', choices=choices, style=sfaira_style).unsafe_ask() + elif function == 'password': + while not answer or answer == '': + answer = getattr(questionary, function)(f'{question}: ', style=sfaira_style).unsafe_ask() + elif function == 'text': + if not default: + log.debug('Tried to utilize default value in questionary prompt, but is None! Please set a default value.') + default = '' + answer = getattr(questionary, function)(f'{question} [{default}]: ', style=sfaira_style).unsafe_ask() + elif function == 'confirm': + default_value_bool = True if default == 'Yes' or default == 'yes' else False + answer = getattr(questionary, function)(f'{question} [{default}]: ', style=sfaira_style, default=default_value_bool).unsafe_ask() + else: + log.debug(f'Unsupported questionary function {function} used!') + + except KeyboardInterrupt: + print('[bold red] Aborted!') + sys.exit(1) + if answer is None or answer == '': + answer = default + + log.debug(f'User was asked the question: ||{question}|| as: {function}') + log.debug(f'User selected {answer}') + + return answer # type: ignore diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json new file mode 100644 index 000000000..92c82b0ec --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json @@ -0,0 +1,14 @@ +{ + "dataloader_author_name": "", + "dataloader_author_email": "", + "id": "", + "id_without_doi": "", + "author": "", + "doi": "", + "doi_sfaira_repr": "", + "download_url_data": "", + "organ": "", + "organism": "", + "protocol": "", + "year": "" + } diff --git a/sfaira/data/templates/dataloaders/__init__.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py similarity index 100% rename from sfaira/data/templates/dataloaders/__init__.py rename to sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py new file mode 100644 index 000000000..c12ef73ff --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -0,0 +1,59 @@ +import os +from typing import Union +import anndata as ad + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # SFAIRA TODO Add your meta data here + self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set + self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript + + self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files + # self.download_url_meta = 'x' # download website(s) of meta data files + + self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) + self.organism = '{{ cookiecutter.organism }}' # (*) species / organism + self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.year = {{cookiecutter.year}} # year in which sample was acquired + # self.age = 'x' # (*, optional) age of sample + # self.dev_stage = x # (*, optional) developmental stage of organism + # self.ethnicity = x # (*, optional) ethnicity of sample + # self.healthy = x # (*, optional) whether sample represents a healthy organism + # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + # self.sex = x # (*, optional) sex + # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + + # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column + # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! + # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. + # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # SFAIRA: name of column which contain streamlined cell ontology cell type classes: + # self.obs_key_cellontology_original = x # (optional) + + def _load(self) -> ad.AnnData: + # fn = os.path.join(self.data_dir, ) # SFAIRA ToDo: add the name of the raw file + # SFAIRA ToDo: add code that loads to raw file into an AnnData object and return it + pass diff --git a/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json new file mode 100644 index 000000000..92c82b0ec --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json @@ -0,0 +1,14 @@ +{ + "dataloader_author_name": "", + "dataloader_author_email": "", + "id": "", + "id_without_doi": "", + "author": "", + "doi": "", + "doi_sfaira_repr": "", + "download_url_data": "", + "organ": "", + "organism": "", + "protocol": "", + "year": "" + } diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py new file mode 100644 index 000000000..cee763d5b --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -0,0 +1,70 @@ +import os +import anndata as ad +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + + +# SFAIRA TODO: Add correct sample IDs here. +SAMPLE_IDS = [ + # "your_sample_id_1", + # "your_sample_id_2" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # SFAIRA TODO: Add you meta data here. + self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = {{ cookiecutter.author }} # author (list) who sampled / created the data set + self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript + + self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files + # self.download_url_meta = 'x' # download website(s) of meta data files + + self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) + self.organism = '{{ cookiecutter.organism }}' # (*) species / organism + self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.year = {{ cookiecutter.year }} # year in which sample was acquired + # self.age = 'x' # (*, optional) age of sample + # self.dev_stage = x # (*, optional) developmental stage of organism + # self.ethnicity = x # (*, optional) ethnicity of sample + # self.healthy = x # (*, optional) whether sample represents a healthy organism + # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + # self.sex = x # (*, optional) sex + # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + + # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column + # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! + # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. + # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # SFAIRA: name of column which contain streamlined cell ontology cell type classes: + # self.obs_key_cellontology_original = x # (optional) + + # SFAIRA TODO: Make sure to include this attribute which indicates the column in self.adata in which you saved the sample IDs. + self.obs_key_sample = 'x' + + def _load_full(self) -> ad.AnnData: + # fn = os.path.join(self.data_dir,) # SFAIRA ToDo: add the name of the raw file + # SFAIRA ToDo: load full data into AnnData object (no subsetting!) and return it + pass diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json new file mode 100644 index 000000000..92c82b0ec --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json @@ -0,0 +1,14 @@ +{ + "dataloader_author_name": "", + "dataloader_author_email": "", + "id": "", + "id_without_doi": "", + "author": "", + "doi": "", + "doi_sfaira_repr": "", + "download_url_data": "", + "organ": "", + "organism": "", + "protocol": "", + "year": "" + } diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py new file mode 100644 index 000000000..b6c4e2b7a --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -0,0 +1,68 @@ +import anndata as ad +import os +from typing import Union + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + + +# SFARA TODO: Add correct sample IDs here. +SAMPLE_FNS = [ + "your_sample_fn_1", + "your_sample_fn_2" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # SFAIRA TODO: Add you meta data here. + # SFAIRA TODO: Increase index ID by file -> f"sth_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_doi" + self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set + self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript + + self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files + # self.download_url_meta = 'x' # download website(s) of meta data files + + self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) + self.organism = '{{ cookiecutter.organism }}' # (*) species / organism + self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.year = {{cookiecutter.year}} # year in which sample was acquired + # self.age = 'x' # (*, optional) age of sample + # self.dev_stage = x # (*, optional) developmental stage of organism + # self.ethnicity = x # (*, optional) ethnicity of sample + # self.healthy = x # (*, optional) whether sample represents a healthy organism + # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + # self.sex = x # (*, optional) sex + # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + + # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column + # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! + # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. + # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # SFAIRA: name of column which contain streamlined cell ontology cell type classes: + # self.obs_key_cellontology_original = x # (optional) + + def _load(self) -> ad.AnnData: + # fn = os.path.join(self.data_dir, self.sample_fn) # SFAIRA ToDo: add the name of the raw file + # SFAIRA ToDo: load file fn into self.adata and return it, self.sample_fn represents the current filename. + pass \ No newline at end of file diff --git a/sfaira/commands/templates/single_dataset/cookiecutter.json b/sfaira/commands/templates/single_dataset/cookiecutter.json new file mode 100644 index 000000000..92c82b0ec --- /dev/null +++ b/sfaira/commands/templates/single_dataset/cookiecutter.json @@ -0,0 +1,14 @@ +{ + "dataloader_author_name": "", + "dataloader_author_email": "", + "id": "", + "id_without_doi": "", + "author": "", + "doi": "", + "doi_sfaira_repr": "", + "download_url_data": "", + "organ": "", + "organism": "", + "protocol": "", + "year": "" + } diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py new file mode 100644 index 000000000..e312ff68e --- /dev/null +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -0,0 +1,59 @@ +import os +from typing import Union +import anndata as ad + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + + # SFAIRA TODO Add your meta data here + self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set + self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript + + self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files + # self.download_url_meta = 'x' # download website(s) of meta data files + + self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) + self.organism = '{{ cookiecutter.organism }}' # (*) species / organism + self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.year = {{cookiecutter.year}} # year in which sample was acquired + # self.age = 'x' # (*, optional) age of sample + # self.dev_stage = x # (*, optional) developmental stage of organism + # self.ethnicity = x # (*, optional) ethnicity of sample + # self.healthy = x # (*, optional) whether sample represents a healthy organism + # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + # self.sex = x # (*, optional) sex + # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + + # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column + # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! + # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. + # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # SFAIRA: name of column which contain streamlined cell ontology cell type classes: + # self.obs_key_cellontology_original = x # (optional) + + def _load(self) -> ad.AnnData: + # fn = os.path.join(self.data_dir,) # SFAIRA ToDo: add the name of the raw file + # SFAIRA ToDo: add code that loads to raw file into an AnnData object and return it + pass diff --git a/sfaira/commands/test_dataloader.py b/sfaira/commands/test_dataloader.py new file mode 100644 index 000000000..9b2a0157c --- /dev/null +++ b/sfaira/commands/test_dataloader.py @@ -0,0 +1,41 @@ +import logging +import os +from subprocess import Popen + +from rich import print +from sfaira.commands.questionary import sfaira_questionary + +log = logging.getLogger(__name__) + + +class DataloaderTester: + + def __init__(self, path): + self.WD = os.path.dirname(__file__) + self.path = path + self.doi = '' + self.doi_sfaira_repr = '' + + def test_dataloader(self): + """ + Runs a predefined unit test on a given dataloader. + """ + print('[bold blue]Please ensure that your dataloader is in sfaira/dataloaders/loaders/.') + print('[bold blue]Please ensure that your test data is in sfaira/unit_tests/template_data/.') + self._prompt_doi() + self._run_unittest() + + def _prompt_doi(self): + self.doi = sfaira_questionary(function='text', + question='Enter your DOI', + default='10.1000/j.journal.2021.01.001') + self.doi_sfaira_repr = f'd{self.doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + + def _run_unittest(self): + print('[bold blue]Conflicts are not automatically resolved.') + print('[bold blue]Please go back to [bold]https://www.ebi.ac.uk/ols/ontologies/cl[blue] for every mismatch or conflicts ' + 'and add the correct cell ontology class name into the .csv "target" column.') + pytest = Popen(['pytest', '-s', self.path, '--doi_sfaira_repr', self.doi_sfaira_repr], + universal_newlines=True, shell=False, close_fds=True) + (pytest_stdout, pytest_stderr) = pytest.communicate() + print(pytest_stderr) diff --git a/sfaira/commands/upgrade.py b/sfaira/commands/upgrade.py new file mode 100644 index 000000000..f1670400c --- /dev/null +++ b/sfaira/commands/upgrade.py @@ -0,0 +1,101 @@ +import json +import logging +import urllib +import sys +from pkg_resources import parse_version + +import sfaira +from urllib.error import HTTPError, URLError +from subprocess import Popen, PIPE, check_call +from rich import print + +from sfaira.commands.questionary import sfaira_questionary + +log = logging.getLogger(__name__) + + +class UpgradeCommand: + """ + Responsible for checking for newer versions sfaira and upgrading it if required. + """ + + @staticmethod + def check_upgrade_sfaira() -> None: + """ + Checks whether the locally installed version of sfaira is the latest. + If not it prompts whether to upgrade and runs the upgrade command if desired. + """ + if not UpgradeCommand.check_sfaira_latest(): + if sfaira_questionary(function='confirm', + question='Do you want to upgrade?', + default='y'): + UpgradeCommand.upgrade_sfaira() + + @classmethod + def check_sfaira_latest(cls) -> bool: + """ + Checks whether the locally installed version of sfaira is the latest available on PyPi. + + :return: True if locally version is the latest or PyPI is inaccessible, false otherwise + """ + latest_local_version = sfaira.__version__ + sliced_local_version = latest_local_version[:-9] if latest_local_version.endswith('-SNAPSHOT') else latest_local_version + log.debug(f'Latest local sfaira version is: {latest_local_version}.') + log.debug('Checking whether a new sfaira version exists on PyPI.') + try: + # Retrieve info on latest version + # Adding nosec (bandit) here, since we have a hardcoded https request + # It is impossible to access file:// or ftp:// + # See: https://stackoverflow.com/questions/48779202/audit-url-open-for-permitted-schemes-allowing-use-of-file-or-custom-schemes + req = urllib.request.Request('https://pypi.org/pypi/sfaira/json') # nosec + with urllib.request.urlopen(req, timeout=1) as response: # nosec + contents = response.read() + data = json.loads(contents) + latest_pypi_version = data['info']['version'] + except (HTTPError, TimeoutError, URLError): + print('[bold red]Unable to contact PyPI to check for the latest sfaira version. Do you have an internet connection?') + # Returning true by default since this is not a serious issue + return True + + if parse_version(sliced_local_version) > parse_version(latest_pypi_version): + print(f'[bold yellow]Installed version {latest_local_version} of sfaira is newer than the latest release {latest_pypi_version}!' + f' You are running a nightly version and features may break!') + elif parse_version(sliced_local_version) == parse_version(latest_pypi_version): + return True + else: + print(f'[bold red]Installed version {latest_local_version} of sfaira is outdated. Newest version is {latest_pypi_version}!') + return False + + return False + + @classmethod + def upgrade_sfaira(cls) -> None: + """ + Calls pip as a subprocess with the --upgrade flag to upgrade sfaira to the latest version. + """ + log.debug('Attempting to upgrade sfaira via pip install --upgrade sfaira .') + if not UpgradeCommand.is_pip_accessible(): + sys.exit(1) + try: + check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'sfaira']) + except Exception as e: + print('[bold red]Unable to upgrade sfaira') + print(f'[bold red]Exception: {e}') + + @classmethod + def is_pip_accessible(cls) -> bool: + """ + Verifies that pip is accessible and in the PATH. + + :return: True if accessible, false if not + """ + log.debug('Verifying that pip is accessible.') + pip_installed = Popen(['pip', '--version'], stdout=PIPE, stderr=PIPE, universal_newlines=True) + (git_installed_stdout, git_installed_stderr) = pip_installed.communicate() + if pip_installed.returncode != 0: + log.debug('Pip was not accessible!') + print('[bold red]Unable to find \'pip\' in the PATH. Is it installed?') + print('[bold red]Run command was [green]\'pip --version \'') + return False + + return True diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py index 98ee94d4f..b14dac445 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py @@ -1,4 +1,3 @@ -import anndata import os from typing import Union import tarfile diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py deleted file mode 100644 index b1d5b2c2b..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/__init__.py +++ /dev/null @@ -1 +0,0 @@ -FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py deleted file mode 100644 index 3fc8d9f6c..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_1.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -import os -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - # ToDo: Add you meta data here. - - def _load(self): - # fn = os.path.join(self.data_dir, ) # ToDo: add the name of the raw file - # ToDo: add code that loads to raw file into an AnnData object - pass diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py b/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py deleted file mode 100644 index 3fc8d9f6c..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_non_streamlined/your_dataset_file_2.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -import os -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - # ToDo: Add you meta data here. - - def _load(self): - # fn = os.path.join(self.data_dir, ) # ToDo: add the name of the raw file - # ToDo: add code that loads to raw file into an AnnData object - pass diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py deleted file mode 100644 index b1d5b2c2b..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/__init__.py +++ /dev/null @@ -1 +0,0 @@ -FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py deleted file mode 100644 index fc7d8ad3f..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_many_files_streamlined/your_dataset_file.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Union -import os - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ # ToDo Add correct sample file names here. - "your_sample_fn_1", - "your_sample_fn_2" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__( - sample_fn=sample_fn, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - **kwargs - ) - - self.id = f"sth_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_doi" # ToDo: Index the Dataset ID by the file. - # ToDo Add you meta data here. - - def _load(self): - # fn = os.path.join(self.data_dir, self.sample_fn) # ToDo: add the name of the raw file - # ToDo: load file fn into self.adata, self.sample_fn represents the current filename. - pass diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py b/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py deleted file mode 100644 index b1d5b2c2b..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_one_file/__init__.py +++ /dev/null @@ -1 +0,0 @@ -FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py b/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py deleted file mode 100644 index 8be85b2a0..000000000 --- a/sfaira/data/templates/dataloaders/many_samples_one_file/your_dataset_file.py +++ /dev/null @@ -1,38 +0,0 @@ -import anndata -from typing import Union -import os - -from sfaira.data import DatasetBaseGroupLoadingOneFile - -SAMPLE_IDS = [ # ToDo Add correct sample IDs here. - "your_sample_id_1", - "your_sample_id_2" -] - - -class Dataset(DatasetBaseGroupLoadingOneFile): - - def __init__( - self, - sample_id: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__( - sample_id=sample_id, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - **kwargs - ) - - # ToDo Add you meta data here. - self.obs_key_sample = 'Sample' # ToDo: Make sure to include this attribute which indicates the column in - # self.adata in which you saved the sample IDs based on which the full adata object is subsetted. - - def _load_full(self) -> anndata.AnnData: - # fn = os.path.join(self.data_dir,) # ToDo: add the name of the raw file - # ToDo: load full data into AnnData object (no subsetting!) - pass diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py b/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py deleted file mode 100644 index b1d5b2c2b..000000000 --- a/sfaira/data/templates/dataloaders/one_samples_one_files/__init__.py +++ /dev/null @@ -1 +0,0 @@ -FILE_PATH = __file__ diff --git a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py b/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py deleted file mode 100644 index ed1c1d8d9..000000000 --- a/sfaira/data/templates/dataloaders/one_samples_one_files/your_dataset_file_1.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union -import os -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - # ToDo: Add you meta data here. - - def _load(self): - # fn = os.path.join(self.data_dir,) # ToDo: add the name of the raw file - # ToDo: add code that loads to raw file into an AnnData object - pass diff --git a/sfaira/unit_tests/conftest.py b/sfaira/unit_tests/conftest.py new file mode 100644 index 000000000..94f90450e --- /dev/null +++ b/sfaira/unit_tests/conftest.py @@ -0,0 +1,15 @@ +""" +Parameterizing test according to https://stackoverflow.com/questions/40880259/how-to-pass-arguments-in-pytest-by-command-line +""" + + +def pytest_addoption(parser): + parser.addoption("--doi_sfaira_repr", action="store", default="d10_1016_j_cmet_2019_01_021") + + +def pytest_generate_tests(metafunc): + # This is called for every test. Only get/set command line arguments + # if the argument is specified in the list of test "fixturenames". + option_value = metafunc.config.option.name + if "doi_sfaira_repr" in metafunc.fixturenames and option_value is not None: + metafunc.parametrize("doi_sfaira_repr", [option_value]) diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/test_data_template.py index d4dabb8a1..352288c27 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/test_data_template.py @@ -8,7 +8,7 @@ sfairae = None -def test_load(dir_template: str = "./template_data"): +def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j.journal.2021.01.001"): """ Unit test to assist with data set contribution. @@ -23,7 +23,7 @@ def test_load(dir_template: str = "./template_data"): 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be - differentially successfull. The proposed IDs groups are separate by ":|||:" strings to give you a visial anchor + differentially successful. The proposed IDs groups are separate by ":|||:" strings to give you a visual anchor when going through these lists. You need to delete all of these division strings and all labels in the second columns other than the best fit label. Do not change the first column, (Note that columns are separated by ",") @@ -37,7 +37,7 @@ def test_load(dir_template: str = "./template_data"): classmap_by_file = True # ToDo build one class map per file or per data loader (potentially many per file) - flattened_doi = "d10_1016_j_cmet_2019_01_021" # ToDo: add correct module here as "YOUR_STUDY" + flattened_doi = doi_sfaira_repr # ToDo: add correct module here as "YOUR_STUDY" # Define file names and loader paths in sfaira or sfaira_extension: # Define base paths of loader collections in sfaira and sfaira_extension: dir_loader_sfaira = "sfaira.data.dataloaders.loaders." diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 65982b0af..a1a85008d 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -905,7 +905,7 @@ def synonym_string_processing(y): # Check this by checking if one is an ancestor of the other: anatomical_subselection = [ z and ( - anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or + anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or # noqa: E126 y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) ) for y, z in zip(uberon_ids, anatomical_subselection) @@ -918,7 +918,7 @@ def synonym_string_processing(y): if anatomical_subselection[i] and not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) ][-n_suggest:][::-1] - }) + }) # noqa: E122 # 2. Run a second string matching with the anatomical word included. modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]"). \ From dfe252c9719a48b924c0a5c065700360f4c06e2d Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Wed, 17 Feb 2021 20:02:49 +0100 Subject: [PATCH 070/161] fixed DatasetBaseGroupLoadingOneFile given new laod structure --- sfaira/data/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index ed4df4a24..87e7d45ed 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1578,7 +1578,7 @@ def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = elif self.adata is None and adata_group is None: self._load_full() elif self.adata is not None and not self._unprocessed_full_group_object: - self._load_full() + self.adata = self._load_full() elif self.adata is not None and self._unprocessed_full_group_object: pass else: From 17204113fa6e58b42e448ae22a5a7f2f7be7d291 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Wed, 17 Feb 2021 20:33:33 +0100 Subject: [PATCH 071/161] fixed additional bug in DatasetBaseGroupLoadingOneFile --- sfaira/data/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 87e7d45ed..51b12fd5e 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1576,7 +1576,7 @@ def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = if self.adata is None and adata_group is not None: self.adata = adata_group elif self.adata is None and adata_group is None: - self._load_full() + self.adata = self._load_full() elif self.adata is not None and not self._unprocessed_full_group_object: self.adata = self._load_full() elif self.adata is not None and self._unprocessed_full_group_object: From 8e4e28df2c56232cadd3311cc3c41a9686dc4ff6 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Wed, 17 Feb 2021 20:52:53 +0100 Subject: [PATCH 072/161] fixed bug in DatasetBaseGroupLoadingOneFile --- sfaira/data/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 51b12fd5e..d9221853a 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -1563,7 +1563,7 @@ def sample_id(self): return self._sample_id @abc.abstractmethod - def _load_full(self): + def _load_full(self) -> anndata.AnnData: """ Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. @@ -1612,10 +1612,11 @@ def _subset_from_group( self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] self._unprocessed_full_group_object = False - def _load(self): + def _load(self) -> anndata.AnnData: _ = self.set_raw_full_group_object(adata_group=None) if self._unprocessed_full_group_object: self._load_from_group() + return self.adata class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): From 58c808c56bcaec40597f46ab7b48580ba069c886 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 18 Feb 2021 10:17:34 +0100 Subject: [PATCH 073/161] miscelaneous fixes (#137) * fix #133 * assign adata object returned by _load_full() to self.adata attribute correctly * remove download helper scripts as we now have implemented automatic dataset downloads * fix Dataset doi properties. closes #136 --- .../{{ cookiecutter.id_without_doi }}.py | 4 +- .../{{ cookiecutter.id_without_doi }}.py | 4 +- .../{{ cookiecutter.id_without_doi }}.py | 4 +- .../{{ cookiecutter.id_without_doi }}.py | 4 +- sfaira/data/base.py | 2 +- .../get_batch_download_mouse.sh | 87 ------------------- 6 files changed, 9 insertions(+), 96 deletions(-) delete mode 100644 sfaira/data/download_scripts/get_batch_download_mouse.sh diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index c12ef73ff..a457cbc9e 100644 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -54,6 +54,6 @@ def __init__( # self.obs_key_cellontology_original = x # (optional) def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir, ) # SFAIRA ToDo: add the name of the raw file - # SFAIRA ToDo: add code that loads to raw file into an AnnData object and return it + # fn = os.path.join(self.data_dir, ) # SFAIRA TODO: add the name of the raw file + # SFAIRA TODO: add code that loads to raw file into an AnnData object and return it pass diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index cee763d5b..170c9742c 100644 --- a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -65,6 +65,6 @@ def __init__( self.obs_key_sample = 'x' def _load_full(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir,) # SFAIRA ToDo: add the name of the raw file - # SFAIRA ToDo: load full data into AnnData object (no subsetting!) and return it + # fn = os.path.join(self.data_dir,) # SFAIRA TODO: add the name of the raw file + # SFAIRA TODO: load full data into AnnData object (no subsetting!) and return it pass diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index b6c4e2b7a..82b99a672 100644 --- a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -63,6 +63,6 @@ def __init__( # self.obs_key_cellontology_original = x # (optional) def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir, self.sample_fn) # SFAIRA ToDo: add the name of the raw file - # SFAIRA ToDo: load file fn into self.adata and return it, self.sample_fn represents the current filename. + # fn = os.path.join(self.data_dir, self.sample_fn) # SFAIRA TODO: add the name of the raw file + # SFAIRA TODO: load file fn into self.adata and return it, self.sample_fn represents the current filename. pass \ No newline at end of file diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index e312ff68e..5acce65f4 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -54,6 +54,6 @@ def __init__( # self.obs_key_cellontology_original = x # (optional) def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir,) # SFAIRA ToDo: add the name of the raw file - # SFAIRA ToDo: add code that loads to raw file into an AnnData object and return it + # fn = os.path.join(self.data_dir,) # SFAIRA TODO: add the name of the raw file + # SFAIRA TODO: add code that loads to raw file into an AnnData object and return it pass diff --git a/sfaira/data/base.py b/sfaira/data/base.py index d9221853a..b831d17ee 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -989,7 +989,7 @@ def doi(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is None or self._ADATA_IDS_SFAIRA.healthy not in self.meta.columns: + if self.meta is None or self._ADATA_IDS_SFAIRA.doi not in self.meta.columns: raise ValueError("doi must be set but was neither set in constructor nor in meta data") return self.meta[self._ADATA_IDS_SFAIRA.doi] diff --git a/sfaira/data/download_scripts/get_batch_download_mouse.sh b/sfaira/data/download_scripts/get_batch_download_mouse.sh deleted file mode 100644 index 58a7e3e96..000000000 --- a/sfaira/data/download_scripts/get_batch_download_mouse.sh +++ /dev/null @@ -1,87 +0,0 @@ -# tabula muris sense -## full objects -#wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-bbknn-processed-official-annotations.h5ad -#wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-official-raw-obj.h5ad -#wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-official-raw-obj.h5ad -## by organ -mkdir -p bladder -cd bladder -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad -mkdir -p ../brain -cd ../brain -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad -mkdir -p ../diaphragm -cd ../diaphragm -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad -mkdir -p ../fat -cd ../fat -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad -mkdir -p ../ -cd ../heart -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad -mkdir -p ../kidney -cd ../kidney -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad -mkdir -p ../large_intestine -cd ../large_intestine -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad -mkdir -p ../ -cd ../limb_muscle -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad -mkdir -p ../liver -cd ../liver -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad -mkdir -p ../lung -cd ../lung -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad -mkdir -p ../mammary_gland -cd ../mammary_gland -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad -mkdir -p ../marrow -cd ../marrow -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad -mkdir -p ../pancreas -cd ../pancreas -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad -mkdir -p ../skin -cd ../skin -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad -mkdir -p ../spleen -cd ../spleen -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Spleen.h5ad -mkdir -p ../thymus -cd ../thymus -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad -mkdir -p ../tongue -cd ../tongue -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad -mkdir -p ../trachea -cd ../trachea -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad -wget https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad - -# ENCODE3 -## full objects -cd ../all -wget https://cells.ucsc.edu/mouse-limb/C1_200325/200315_C1_categorical.h5ad -wget https://cells.ucsc.edu/mouse-limb/10x/200120_10x.h5ad From af20e9c3be8dfa31d3b3a3bf0f4eaf292064f19a Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Fri, 26 Feb 2021 17:03:53 +0100 Subject: [PATCH 074/161] reduce comment duplication (#150) * add load repetitive code reduction Signed-off-by: Zethson * fix None comment Signed-off-by: Zethson * fix flake8 Signed-off-by: Zethson --- sfaira/data/base.py | 51 +++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index b831d17ee..e0f573beb 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -33,8 +33,7 @@ def map_fn(inputs): :param inputs: :return: None if function ran, error report otherwise """ - ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, \ - kwargs_func = inputs + ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, kwargs_func = inputs try: ds.load( remove_gene_version=remove_gene_version, @@ -52,8 +51,16 @@ def map_fn(inputs): return ds.id, e, -class DatasetBase(abc.ABC): +load_doc = \ + """ + :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. + :param match_to_reference: Reference genomes name or False to keep original feature space. + :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. + """ + +class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict _meta: Union[None, pandas.DataFrame] @@ -341,15 +348,6 @@ def load( load_raw: bool = False, allow_caching: bool = True, ): - """ - - :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different - data sets are superimposed. - :param match_to_reference: Reference genomes name or False to keep original feature space. - :param load_raw: Loads unprocessed version of data if available in data loader. - :param allow_caching: Whether to allow method to cache adata object for faster re-loading. - :return: - """ if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" "while not removing gene versions. this can lead to very poor matching results") @@ -390,6 +388,8 @@ def load( if match_to_reference: self._match_features_to_reference() + load.__doc__ = load_doc + def _convert_and_set_var_names( self, match_to_reference: Union[str, bool, None], @@ -481,17 +481,14 @@ def _collapse_gene_versions(self, remove_gene_version): # last element of each block as block boundaries: # n_genes - 1 - idx_map_sorted_rev.index(x) # Note that the blocks are named as positive integers starting at 1, without gaps. - counts = np.concatenate([ - np.sum(x, axis=1, keepdims=True) - for x in np.split( - self.adata[:, idx_map_sorted_fwd].X, # forward ordered data - indices_or_sections=[ - n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order - for x in np.arange(0, len(new_index_collapsed) - 1) # -1: do not need end of last partition - ], - axis=1 - ) - ][::-1], axis=1) + counts = np.concatenate([np.sum(x, axis=1, keepdims=True) + for x in np.split(self.adata[:, idx_map_sorted_fwd].X, # forward ordered data + indices_or_sections=[ + n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order + for x in np.arange(0, len(new_index_collapsed) - 1)], # -1: do not need end of last partition + axis=1 + ) + ][::-1], axis=1) # Remove varm and populate var with first occurrence only: obs_names = self.adata.obs_names self.adata = anndata.AnnData( @@ -1694,10 +1691,6 @@ def load( In this setting, datasets are removed from memory after the function has been executed. :param annotated_only: - :param remove_gene_version: See .load(). - :param match_to_reference: See .load(). - :param load_raw: See .load(). - :param allow_caching: See .load(). :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset instance. The return can be empty: @@ -1706,7 +1699,6 @@ def func(dataset, **kwargs_func): # code manipulating dataset and generating output x. return x :param kwargs_func: Kwargs of func. - :return: """ args = [ remove_gene_version, @@ -1747,6 +1739,8 @@ def func(dataset, **kwargs_func): del self.datasets[k] del adata_group + load.__doc__ += load_doc + def load_tobacked( self, adata_backed: anndata.AnnData, @@ -1798,7 +1792,6 @@ def write_ontology_class_map( :param fn: File name of csv to load class maps from. :param protected_writing: Only write if file was not already found. - :return: """ tab = [] for k, v in self.datasets.items(): From bb68ca5a366b6dd1cc6bdeee833615c96972ab43 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 1 Mar 2021 14:49:44 +0100 Subject: [PATCH 075/161] Cell type mapping csv files (#102) * added cell type map files which map free text labels to ontologies * added testing code for cell type maps Co-authored-by: davidsebfischer Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> Co-authored-by: Zethson --- .gitignore | 3 +- docs/api/sfaira.data.DatasetBase.rst | 2 +- docs/api/sfaira.data.DatasetInteractive.rst | 2 +- setup.py | 11 - sfaira/__init__.py | 10 +- sfaira/commands/create_dataloader.py | 13 + sfaira/commands/lint_dataloader.py | 8 +- .../{{ cookiecutter.id_without_doi }}.py | 2 +- .../{{ cookiecutter.id_without_doi }}.py | 9 +- .../{{ cookiecutter.id_without_doi }}.py | 6 +- .../{{ cookiecutter.id_without_doi }}.py | 2 +- sfaira/consts/__init__.py | 6 +- sfaira/consts/adata_fields.py | 27 +- sfaira/consts/ontologies.py | 29 + sfaira/data/base.py | 574 +++++++++------- .../databases/cellxgene/cellxgene_group.py | 8 +- .../databases/cellxgene/cellxgene_loader.py | 41 +- .../databases/cellxgene/external.py | 3 - ...er_2019_10xsequencing_ramachandran_001.py} | 21 +- ...er_2019_10xsequencing_ramachandran_001.tsv | 13 + ...etoflangerhans_2017_smartseq2_enge_001.py} | 20 +- ...etoflangerhans_2017_smartseq2_enge_001.tsv | 7 + .../mouse_x_2018_microwellseq_han_x.py | 87 +-- .../mouse_x_2018_microwellseq_han_x.tsv | 451 +++++++++++++ ...ofcolon_2019_10xsequencing_kinchen_001.py} | 26 +- ...ofcolon_2019_10xsequencing_kinchen_001.tsv | 15 + .../human_colon_2019_10x_smilie_001.py | 88 --- ...pithelium_2019_10xsequencing_smilie_001.py | 44 ++ ...ithelium_2019_10xsequencing_smilie_001.tsv | 39 ++ ...an_ileum_2019_10xsequencing_martin_001.py} | 30 +- ...an_ileum_2019_10xsequencing_martin_001.tsv | 20 + ...tategland_2018_10xsequencing_henry_001.py} | 23 +- ...tategland_2018_10xsequencing_henry_001.tsv | 9 + .../human_pancreas_2016_indrop_baron_001.py | 10 +- .../human_pancreas_2016_indrop_baron_001.tsv | 15 + ...pancreas_2016_smartseq2_segerstolpe_001.py | 29 +- ...ancreas_2016_smartseq2_segerstolpe_001.tsv | 13 + .../mouse_pancreas_2019_10x_thompson_x.csv | 14 - ...pancreas_2019_10xsequencing_thompson_x.py} | 11 +- ...pancreas_2019_10xsequencing_thompson_x.tsv | 14 + .../human_lung_2020_10x_miller_001.py | 76 --- ...uman_lung_2020_10xsequencing_miller_001.py | 50 ++ ...man_lung_2020_10xsequencing_miller_001.tsv | 22 + ...=> human_brain_2017_droncseq_habib_001.py} | 26 +- .../human_brain_2017_droncseq_habib_001.tsv | 15 + ...uman_testis_2018_10xsequencing_guo_001.py} | 21 +- ...uman_testis_2018_10xsequencing_guo_001.tsv | 13 + ...iver_2018_10xsequencing_macparland_001.py} | 30 +- ...iver_2018_10xsequencing_macparland_001.tsv | 21 + .../human_kidney_2019_droncseq_lake_001.py | 39 +- .../human_kidney_2019_droncseq_lake_001.tsv | 27 + ...> human_x_2019_10xsequencing_szabo_001.py} | 71 +- .../human_x_2019_10xsequencing_szabo_001.tsv | 21 + ...an_retina_2019_10xsequencing_menon_001.py} | 19 +- ...an_retina_2019_10xsequencing_menon_001.tsv | 10 + ...> human_placenta_2018_x_ventotormo_001.py} | 47 +- .../human_placenta_2018_x_ventotormo_001.tsv | 33 + .../human_liver_2019_CELseq2_aizarani_001.py | 92 --- .../human_liver_2019_celseq2_aizarani_001.py | 49 ++ .../human_liver_2019_celseq2_aizarani_001.tsv | 35 + .../human_liver_2019_10x_popescu_001.py | 73 --- ...an_liver_2019_10xsequencing_popescu_001.py | 40 ++ ...n_liver_2019_10xsequencing_popescu_001.tsv | 28 + .../human_x_2020_microwellseq_han_x.py | 46 +- .../human_x_2020_microwellseq_han_x.tsv | 618 ++++++++++++++++++ .../human_lung_2020_10x_travaglini_001.py | 213 ------ .../human_lung_2020_x_travaglini_001.py | 71 ++ .../human_lung_2020_x_travaglini_001.tsv | 140 ++++ ...man_colon_2020_10xsequencing_james_001.py} | 35 +- ...man_colon_2020_10xsequencing_james_001.tsv | 26 + .../human_lung_2019_10x_braga_x.py | 98 --- .../human_lung_2019_dropseq_braga_001.py | 25 +- .../human_lung_2019_dropseq_braga_001.tsv | 14 + .../human_x_2019_10xsequencing_braga_x.py | 50 ++ .../human_x_2019_10xsequencing_braga_x.tsv | 22 + ...=> mouse_x_2019_10xsequencing_hove_001.py} | 15 +- .../mouse_x_2019_10xsequencing_hove_001.tsv | 15 + ...man_kidney_2020_10xsequencing_liao_001.py} | 4 +- ...an_retina_2019_10xsequencing_voigt_001.py} | 21 +- ...an_retina_2019_10xsequencing_voigt_001.tsv | 12 + .../human_x_2019_10x_wang_001.py | 90 --- .../human_x_2019_10xsequencing_wang_001.py | 54 ++ .../human_x_2019_10xsequencing_wang_001.tsv | 8 + .../human_lung_2020_10x_lukassen_001.py | 87 --- ...an_lung_2020_10xsequencing_lukassen_001.py | 52 ++ ...n_lung_2020_10xsequencing_lukassen_001.tsv | 23 + .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 13 +- .../mouse_x_2019_x_pisco_x.tsv | 154 +++++ ...chyma_2020_10xsequencing_habermann_001.py} | 43 +- ...chyma_2020_10xsequencing_habermann_001.tsv | 32 + .../human_kidney_2019_10x_stewart_001.py | 129 ---- ...n_kidney_2019_10xsequencing_stewart_001.py | 62 ++ ..._kidney_2019_10xsequencing_stewart_001.tsv | 64 ++ .../human_thymus_2020_10x_park_001.py | 92 --- ...uman_thymus_2020_10xsequencing_park_001.py | 42 ++ ...man_thymus_2020_10xsequencing_park_001.tsv | 45 ++ .../human_x_2019_10x_madissoon_001.py | 160 ----- ...uman_x_2019_10xsequencing_madissoon_001.py | 73 +++ ...man_x_2019_10xsequencing_madissoon_001.tsv | 61 ++ ...retina_2019_10xsequencing_lukowski_001.py} | 25 +- ...retina_2019_10xsequencing_lukowski_001.tsv | 13 + ...ood_2019_10xsequencing_10xgenomics_001.py} | 4 +- ...> human_x_2018_10xsequencing_regev_001.py} | 17 +- .../data/dataloaders/loaders/super_group.py | 4 +- sfaira/data/dataloaders/super_group.py | 2 +- sfaira/data/utils.py | 9 +- .../clean_celltype_maps_global.py | 25 + ...maps.py => create_celltype_maps_global.py} | 2 +- .../create_celltype_maps_selected.py | 87 +++ .../utils_scripts/create_meta_and_cache.py | 2 +- sfaira/estimators/callbacks.py | 5 +- sfaira/estimators/external.py | 4 - sfaira/estimators/keras.py | 37 +- sfaira/estimators/losses.py | 6 +- sfaira/estimators/metrics.py | 6 +- sfaira/interface/model_zoo.py | 8 +- sfaira/models/celltype/marker.py | 18 +- sfaira/models/celltype/mlp.py | 18 +- sfaira/models/embedding/ae.py | 5 +- sfaira/models/embedding/linear.py | 5 +- sfaira/models/embedding/output_layers.py | 5 +- sfaira/models/embedding/vae.py | 5 +- sfaira/models/embedding/vaeiaf.py | 5 +- sfaira/models/embedding/vaevamp.py | 5 +- sfaira/models/made.py | 58 +- sfaira/models/pp_layer.py | 5 +- sfaira/train/summaries.py | 48 +- sfaira/train/train_model.py | 162 +++-- .../metadata/human => unit_tests}/__init__.py | 0 sfaira/unit_tests/data/__init__.py | 0 .../data/test_clean_celltype_maps.py | 13 + .../unit_tests/{ => data}/test_data_utils.py | 0 sfaira/unit_tests/{ => data}/test_dataset.py | 10 +- .../unit_tests/data_contribution/__init__.py | 0 .../{ => data_contribution}/conftest.py | 0 .../test_data_template.py | 7 +- sfaira/unit_tests/estimators/__init__.py | 0 .../{ => estimators}/test_estimator.py | 4 +- sfaira/unit_tests/external.py | 6 - sfaira/unit_tests/interface/__init__.py | 0 .../{ => interface}/test_userinterface.py | 6 +- sfaira/unit_tests/models/__init__.py | 0 sfaira/unit_tests/{ => models}/test_models.py | 0 sfaira/unit_tests/test_celltype_universe.py | 70 -- sfaira/unit_tests/versions/__init__.py | 0 sfaira/unit_tests/versions/test_ontologies.py | 10 + sfaira/unit_tests/{ => versions}/test_zoo.py | 6 +- sfaira/versions/metadata/__init__.py | 7 +- sfaira/versions/metadata/base.py | 426 +----------- sfaira/versions/metadata/universe.py | 415 ++++++++++++ 150 files changed, 4136 insertions(+), 2761 deletions(-) create mode 100644 sfaira/consts/ontologies.py delete mode 100644 sfaira/data/dataloaders/databases/cellxgene/external.py rename sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/{human_liver_2019_10x_ramachandran_001.py => human_liver_2019_10xsequencing_ramachandran_001.py} (74%) create mode 100644 sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv rename sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/{human_pancreas_2017_smartseq2_enge_001.py => human_isletoflangerhans_2017_smartseq2_enge_001.py} (85%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv rename sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/{human_colon_2019_10x_kinchen_001.py => human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py} (68%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.tsv rename sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/{human_ileum_2019_10x_martin_001.py => human_ileum_2019_10xsequencing_martin_001.py} (56%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv rename sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/{human_prostate_2018_10x_henry_001.py => human_prostategland_2018_10xsequencing_henry_001.py} (70%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv rename sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/{mouse_pancreas_2019_10x_thompson_x.py => mouse_pancreas_2019_10xsequencing_thompson_x.py} (89%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/{human_brain_2017_DroNcSeq_habib_001.py => human_brain_2017_droncseq_habib_001.py} (56%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/{human_testis_2018_10x_guo_001.py => human_testis_2018_10xsequencing_guo_001.py} (59%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/{human_liver_2018_10x_macparland_001.py => human_caudatelobeofliver_2018_10xsequencing_macparland_001.py} (57%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/{human_mixed_2019_10x_szabo_001.py => human_x_2019_10xsequencing_szabo_001.py} (51%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/{human_eye_2019_10x_menon_001.py => human_retina_2019_10xsequencing_menon_001.py} (66%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/{human_placenta_2018_10x_ventotormo_001.py => human_placenta_2018_x_ventotormo_001.py} (51%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/{human_colon_2020_10x_james_001.py => human_colon_2020_10xsequencing_james_001.py} (52%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/{mouse_brain_2019_10x_hove_001.py => mouse_x_2019_10xsequencing_hove_001.py} (85%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv rename sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/{human_kidney_2020_10x_liao_001.py => human_kidney_2020_10xsequencing_liao_001.py} (96%) rename sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/{human_eye_2019_10x_voigt_001.py => human_retina_2019_10xsequencing_voigt_001.py} (63%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.tsv rename sfaira/data/dataloaders/loaders/d10_1101_753806/{human_lung_2020_10x_habermann_001.py => human_lungparenchyma_2020_10xsequencing_habermann_001.py} (53%) create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.tsv delete mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv rename sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/{human_eye_2019_10x_lukowski_001.py => human_retina_2019_10xsequencing_lukowski_001.py} (55%) create mode 100644 sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv rename sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/{human_blood_2019_10x_10xGenomics_001.py => human_blood_2019_10xsequencing_10xgenomics_001.py} (96%) rename sfaira/data/dataloaders/loaders/dno_doi_regev/{human_x_2018_10x_ica_001.py => human_x_2018_10xsequencing_regev_001.py} (72%) create mode 100644 sfaira/data/utils_scripts/clean_celltype_maps_global.py rename sfaira/data/utils_scripts/{create_celltype_maps.py => create_celltype_maps_global.py} (99%) create mode 100644 sfaira/data/utils_scripts/create_celltype_maps_selected.py delete mode 100644 sfaira/estimators/external.py rename sfaira/{versions/metadata/human => unit_tests}/__init__.py (100%) create mode 100644 sfaira/unit_tests/data/__init__.py create mode 100644 sfaira/unit_tests/data/test_clean_celltype_maps.py rename sfaira/unit_tests/{ => data}/test_data_utils.py (100%) rename sfaira/unit_tests/{ => data}/test_dataset.py (93%) create mode 100644 sfaira/unit_tests/data_contribution/__init__.py rename sfaira/unit_tests/{ => data_contribution}/conftest.py (100%) rename sfaira/unit_tests/{ => data_contribution}/test_data_template.py (96%) create mode 100644 sfaira/unit_tests/estimators/__init__.py rename sfaira/unit_tests/{ => estimators}/test_estimator.py (97%) delete mode 100644 sfaira/unit_tests/external.py create mode 100644 sfaira/unit_tests/interface/__init__.py rename sfaira/unit_tests/{ => interface}/test_userinterface.py (91%) create mode 100644 sfaira/unit_tests/models/__init__.py rename sfaira/unit_tests/{ => models}/test_models.py (100%) delete mode 100644 sfaira/unit_tests/test_celltype_universe.py create mode 100644 sfaira/unit_tests/versions/__init__.py create mode 100644 sfaira/unit_tests/versions/test_ontologies.py rename sfaira/unit_tests/{ => versions}/test_zoo.py (92%) create mode 100644 sfaira/versions/metadata/universe.py diff --git a/.gitignore b/.gitignore index f9ffe2c3f..f2707b8fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ +cache/ontologies/cl/* sfaira/unit_tests/test_data_loaders/* sfaira/unit_tests/test_data/* sfaira/unit_tests/template_data/* -sfaira/unit_tests/test_data_template.py +sfaira/unit_tests/data_contribution/test_data_template.py git abuild cache sfaira.egg-info diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst index edd05f7f8..7e8dd3be0 100644 --- a/docs/api/sfaira.data.DatasetBase.rst +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -45,7 +45,7 @@ ~DatasetBase.download ~DatasetBase.download_meta ~DatasetBase.ethnicity - ~DatasetBase.fn_ontology_class_map_csv + ~DatasetBase.fn_ontology_class_map_tsv ~DatasetBase.healthy ~DatasetBase.healthy_state_healthy ~DatasetBase.id diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst index c7f23fc1e..0bb616838 100644 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -45,7 +45,7 @@ ~DatasetInteractive.download ~DatasetInteractive.download_meta ~DatasetInteractive.ethnicity - ~DatasetInteractive.fn_ontology_class_map_csv + ~DatasetInteractive.fn_ontology_class_map_tsv ~DatasetInteractive.healthy ~DatasetInteractive.healthy_state_healthy ~DatasetInteractive.id diff --git a/setup.py b/setup.py index 1132dcf46..09c36c2f8 100644 --- a/setup.py +++ b/setup.py @@ -53,22 +53,11 @@ def package_files(directory): 'tensorflow': [ # 'tensorflow>=2.0.0' # TODO Add Tensorflow here again ], - 'kipoi': [ - 'kipoi', - 'git-lfs' - ], 'plotting_deps': [ "seaborn", "matplotlib", "sklearn" ], - 'data': [ - "scanpy", - "loompy", - "requests", - "xlrd==1.*", - "openpyxl", - ], 'extension': [ "sfaira_extension", ], diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 902930e82..48839deaf 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- """A Data and Model Zoo for Single-Cell Genomics.""" -# import sfaira.interface as ui -# import sfaira.train -# import sfaira.models -import sfaira.genomes +import sfaira.consts import sfaira.data -import sfaira.consts as consts +import sfaira.genomes +import sfaira.interface as ui +import sfaira.models +import sfaira.train import sfaira.versions from ._version import get_versions diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py index f3cd5199f..c54fcccbb 100644 --- a/sfaira/commands/create_dataloader.py +++ b/sfaira/commands/create_dataloader.py @@ -142,7 +142,20 @@ def _create_dataloader_template(self): overwrite_if_exists=True, extra_context=self._template_attributes_to_dict()) + # multiple datasets not streamlined are not contained in a single file but in multiple files + # Hence, we create one copy per dataset and adapt the ID per dataloader script if self.template_attributes.dataloader_type == 'multiple_datasets_not_streamlined': for i in range(2, int(self.template_attributes.number_of_datasets.lstrip('0')) + 1): copyfile(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi}.py', f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py') + + # Replace the default ID of 1 with the file specific ID + with open(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py', 'r') as file: + content = file.readlines() + idx_fixed = list(map(lambda line: f' self.set_dataset_id(idx={i}) # autogenerated by sfaira' + if line.strip().startswith('self.set_dataset_id(idx=1)') + else line, + content)) + with open(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py', 'w') as file: + for line in idx_fixed: + file.write(line) diff --git a/sfaira/commands/lint_dataloader.py b/sfaira/commands/lint_dataloader.py index 032ec6752..d64d870cc 100644 --- a/sfaira/commands/lint_dataloader.py +++ b/sfaira/commands/lint_dataloader.py @@ -67,7 +67,9 @@ def _lint_load(self): passed_load = True try: - line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('def _load_any_object(self, fn=None):', 'def _load(self, fn):')), + line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('def _load_any_object(self, fn=None):', + 'def _load(self, fn):', + 'def _load(self)')), enumerate(self.content)))[0] except IndexError: passed_load = False @@ -82,7 +84,7 @@ def _lint_required_attributes(self): """ passed_required_attributes = True - attributes = ['self.id', + attributes = ['self.set_dataset_id', 'self.author', 'self.doi', 'self.download_url_data', @@ -96,7 +98,7 @@ def _lint_required_attributes(self): line, attribute = list(filter(lambda line_attribute: line_attribute[1].startswith(attribute), enumerate(self.content)))[0] except IndexError: passed_required_attributes = False - self.failed['-1'] = 'One of required attributes id, author, doi, download_url_data, organ, organism, protocol, year is missing.' + self.failed['-1'] = 'One of required attributes set_dataset_id, author, doi, download_url_data, organ, organism, protocol, year is missing.' if passed_required_attributes: self.passed[0] = 'Passed required dataloader attributes checks.' diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index a457cbc9e..128e8d722 100644 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -17,7 +17,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) # SFAIRA TODO Add your meta data here - self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + self.set_dataset_id(idx=1) # autogenerated by sfaira self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 170c9742c..9aaa6afc8 100644 --- a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -2,7 +2,7 @@ import anndata as ad from typing import Union -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBaseGroupLoadingOneFile # SFAIRA TODO: Add correct sample IDs here. @@ -12,7 +12,7 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBaseGroupLoadingOneFile): def __init__( self, @@ -22,10 +22,11 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, sample_ids=SAMPLE_IDS, meta_path=meta_path, cache_path=cache_path, + path=path, **kwargs) # SFAIRA TODO: Add you meta data here. - self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + self.set_dataset_id(idx=1) # autogenerated by sfaira # SFAIRA TODO: Increase index ID by file self.author = {{ cookiecutter.author }} # author (list) who sampled / created the data set self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 82b99a672..9e6f282bd 100644 --- a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -22,11 +22,11 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, meta_path=meta_path, cache_path=cache_path, + path=path, **kwargs) # SFAIRA TODO: Add you meta data here. - # SFAIRA TODO: Increase index ID by file -> f"sth_{str(SAMPLE_FNS.index(sample_fn)).zfill(3)}_doi" - self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + self.set_dataset_id(idx=1) # autogenerated by sfaira # SFAIRA TODO: Increase index ID by file self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 5acce65f4..b445b3319 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -17,7 +17,7 @@ def __init__( super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) # SFAIRA TODO Add your meta data here - self.id = '{{ cookiecutter.id }}' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + self.set_dataset_id(idx=1) # autogenerated by sfaira self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 1505f6ec2..6a4f59ac7 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,3 +1,5 @@ -from sfaira.consts.adata_fields import ADATA_IDS_BASE, ADATA_IDS_EXTENDED -from sfaira.consts.adata_fields import ADATA_IDS_CELLXGENE, ADATA_IDS_SFAIRA +from sfaira.consts.adata_fields import AdataIdsBase, AdataIdsExtended, AdataIdsSfaira, AdataIdsCellxgene from sfaira.consts.meta_data_files import META_DATA_FIELDS +from sfaira.consts.ontologies import OntologyContainerSfaira + +OCS = OntologyContainerSfaira() diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index a4ab0c236..c768be09f 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,16 +1,13 @@ import numpy as np from typing import List -from sfaira.versions.metadata import CelltypeUniverse, OntologyList -from sfaira.versions.metadata import ONTOLOGY_UBERON, ONTOLOGY_HSAPDV, ONTOLOGY_MMUSDV, ONTOLOGY_SLC - """ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. """ -class ADATA_IDS_BASE: +class AdataIdsBase: """ Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ @@ -125,7 +122,7 @@ def year(self) -> str: return self._year -class ADATA_IDS_EXTENDED(ADATA_IDS_BASE): +class AdataIdsExtended(AdataIdsBase): """ Base class with extended set of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ @@ -156,7 +153,7 @@ def state_exact(self) -> str: return self._state_exact -class ADATA_IDS_SFAIRA(ADATA_IDS_EXTENDED): +class AdataIdsSfaira(AdataIdsExtended): """ Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ @@ -194,19 +191,9 @@ def __init__(self): self._mapped_features = "mapped_features" self._remove_gene_version = "remove_gene_version" - # Allowed field values: - self.age_allowed_entries = None - self.dev_stage_allowed_entries = None - self.ethnicity_allowed_entries = None - self.normalization_allowed_entries = None - self.organ_allowed_entries = ONTOLOGY_UBERON - self.organism_allowed_entries = OntologyList(terms=["mouse", "human"]) - self.protocol_allowed_entries = ONTOLOGY_SLC - self.sex_allowed_entries = OntologyList(terms=["female", "male"]) - self.subtissue_allowed_entries = None - self.year_allowed_entries = list(range(2000, 3000)) - # Free fields that are not constrained: - # _author, _download_url_data, _download_url_meta, _doi, _id, _state_exact + self.classmap_source_key = "source" + self.classmap_target_key = "target" + self.classmap_target_id_key = "target_id" self.unknown_celltype_name = "unknown" self.unknown_celltype_identifiers = ["nan", "none", "unknown", np.nan, None] @@ -224,7 +211,7 @@ def remove_gene_version(self) -> str: return self._remove_gene_version -class ADATA_IDS_CELLXGENE(ADATA_IDS_EXTENDED): +class AdataIdsCellxgene(AdataIdsExtended): """ Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene objects. diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py new file mode 100644 index 000000000..4639eb70e --- /dev/null +++ b/sfaira/consts/ontologies.py @@ -0,0 +1,29 @@ +from sfaira.versions.metadata import OntologyList, OntologyCelltypes +from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMmusdv, \ + OntologySinglecellLibraryConstruction + + +class OntologyContainerSfaira: + + def __init__(self): + self.ontology_age = None + self._ontology_cell_types = None + self.ontology_cell_types = "v2021-02-01" + self.ontology_dev_stage = None + self.ontology_ethnicity = None + self.ontology_healthy = [True, False] + self.ontology_normalization = None + self.ontology_organ = OntologyUberon() + self.ontology_organism = OntologyList(terms=["mouse", "human"]) + self.ontology_protocol = OntologySinglecellLibraryConstruction() + self.ontology_sex = OntologyList(terms=["female", "male"]) + self.ontology_subtissue = None + self.ontology_year = list(range(2000, 3000)) + + @property + def ontology_cell_types(self): + return self._ontology_cell_types + + @ontology_cell_types.setter + def ontology_cell_types(self, x: str): + self._ontology_cell_types = OntologyCelltypes(branch=x) diff --git a/sfaira/data/base.py b/sfaira/data/base.py index e0f573beb..b31a9e65c 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -20,8 +20,8 @@ import ssl from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.metadata import Ontology, CelltypeUniverse, ONTOLOGY_UBERON -from sfaira.consts import ADATA_IDS_SFAIRA, META_DATA_FIELDS +from sfaira.versions.metadata import Ontology, CelltypeUniverse +from sfaira.consts import AdataIdsSfaira, META_DATA_FIELDS, OCS UNS_STRING_META_IN_OBS = "__obs__" @@ -108,7 +108,7 @@ class DatasetBase(abc.ABC): _var_symbol_col: Union[None, str] _var_ensembl_col: Union[None, str] - _ontology_celltypes: Union[None, CelltypeUniverse] + _celltype_universe: Union[None, CelltypeUniverse] _ontology_class_map: Union[None, dict] def __init__( @@ -118,8 +118,8 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - self._META_DATA_FIELDS = META_DATA_FIELDS + self._adata_ids_sfaira = AdataIdsSfaira() + self._ontology_container_sfaira = OCS # Using a pre-instantiated version of this yields drastic speed-ups. self.adata = None self.meta = None @@ -166,9 +166,9 @@ def __init__( self._var_ensembl_col = None self.class_maps = {"0": {}} - self._unknown_celltype_identifiers = self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers + self._unknown_celltype_identifiers = self._adata_ids_sfaira.unknown_celltype_identifiers - self._ontology_celltypes = None + self._celltype_universe = None self._ontology_class_map = None @abc.abstractmethod @@ -379,9 +379,9 @@ def load( # Set data-specific meta data in .adata: self._set_metadata_in_adata() # Set loading hyper-parameter-specific meta data: - self.adata.uns[self._ADATA_IDS_SFAIRA.load_raw] = load_raw - self.adata.uns[self._ADATA_IDS_SFAIRA.mapped_features] = match_to_reference - self.adata.uns[self._ADATA_IDS_SFAIRA.remove_gene_version] = remove_gene_version + self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw + self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference + self.adata.uns[self._adata_ids_sfaira.remove_gene_version] = remove_gene_version # Streamline feature space: self._convert_and_set_var_names(match_to_reference=match_to_reference) self._collapse_gene_versions(remove_gene_version=remove_gene_version) @@ -409,20 +409,20 @@ def _convert_and_set_var_names( # If the IDs were contained in the index, a new column is added to .var. if symbol_col: if symbol_col == 'index': - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = self.adata.var.index.values.tolist() + self.adata.var[self._adata_ids_sfaira.gene_id_names] = self.adata.var.index.values.tolist() else: assert symbol_col in self.adata.var.columns, f"symbol_col {symbol_col} not found in .var" self.adata.var = self.adata.var.rename( - {symbol_col: self._ADATA_IDS_SFAIRA.gene_id_names}, + {symbol_col: self._adata_ids_sfaira.gene_id_names}, axis='columns' ) if ensembl_col: if ensembl_col == 'index': - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = self.adata.var.index.values.tolist() + self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = self.adata.var.index.values.tolist() else: assert ensembl_col in self.adata.var.columns, f"ensembl_col {ensembl_col} not found in .var" self.adata.var = self.adata.var.rename( - {ensembl_col: self._ADATA_IDS_SFAIRA.gene_id_ensembl}, + {ensembl_col: self._adata_ids_sfaira.gene_id_ensembl}, axis='columns' ) # If only symbol or ensembl was supplied, the other one is inferred from a genome mapping dictionary. @@ -433,26 +433,26 @@ def _convert_and_set_var_names( # match it straight away, if it is not in there we try to match everything in front of the first period in # the gene name with a dictionary that was modified in the same way, if there is still no match we append na ensids = [] - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names]: + for n in self.adata.var[self._adata_ids_sfaira.gene_id_names]: if n in id_dict.keys(): ensids.append(id_dict[n]) elif n.split(".")[0] in id_strip_dict.keys(): ensids.append(id_strip_dict[n.split(".")[0]]) else: ensids.append('n/a') - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = ensids + self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = ensids if not symbol_col and match_to_reference: id_dict = self.genome_container.id_to_names_dict - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_names] = [ + self.adata.var[self._adata_ids_sfaira.gene_id_names] = [ id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] + for n in self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] ] if match_to_reference: # Lastly, the index of .var is set to ensembl IDs. try: # debugging - self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_index].values.tolist() + self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_index].values.tolist() except KeyError as e: raise KeyError(e) self.adata.var_names_make_unique() @@ -501,8 +501,8 @@ def _collapse_gene_versions(self, remove_gene_version): self.adata.obs_names = obs_names self.adata.var_names = new_index_collapsed new_index = new_index_collapsed - self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = new_index - self.adata.var.index = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values + self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = new_index + self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values def _match_features_to_reference(self): """ @@ -520,7 +520,7 @@ def _match_features_to_reference(self): raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep - data_ids = self.adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].values + data_ids = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.genome_container.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -534,6 +534,7 @@ def _match_features_to_reference(self): # ValueError: could not convert integer scalar step = 2000 if step < len(idx_feature_map): + i = 0 for i in range(0, len(idx_feature_map), step): x_new[:, idx_feature_map[i:i + step]] = x[:, i:i + step] x_new[:, idx_feature_map[i + step:]] = x[:, i + step:] @@ -547,7 +548,7 @@ def _match_features_to_reference(self): obs=self.adata.obs, obsm=self.adata.obsm, var=pd.DataFrame(data={'names': self.genome_container.names, - self._ADATA_IDS_SFAIRA.gene_id_ensembl: self.genome_container.ensembl}, + self._adata_ids_sfaira.gene_id_ensembl: self.genome_container.ensembl}, index=self.genome_container.ensembl), uns=self.adata.uns ) @@ -559,27 +560,35 @@ def _set_metadata_in_adata(self): :return: """ # Set data set-wide attributes (.uns): - self.adata.uns[self._ADATA_IDS_SFAIRA.annotated] = self.annotated - self.adata.uns[self._ADATA_IDS_SFAIRA.author] = self.author - self.adata.uns[self._ADATA_IDS_SFAIRA.doi] = self.doi - self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_data] = self.download_url_data - self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_meta] = self.download_url_meta - self.adata.uns[self._ADATA_IDS_SFAIRA.id] = self.id - self.adata.uns[self._ADATA_IDS_SFAIRA.normalization] = self.normalization - self.adata.uns[self._ADATA_IDS_SFAIRA.year] = self.year + self.adata.uns[self._adata_ids_sfaira.annotated] = self.annotated + self.adata.uns[self._adata_ids_sfaira.author] = self.author + self.adata.uns[self._adata_ids_sfaira.doi] = self.doi + self.adata.uns[self._adata_ids_sfaira.download_url_data] = self.download_url_data + self.adata.uns[self._adata_ids_sfaira.download_url_meta] = self.download_url_meta + self.adata.uns[self._adata_ids_sfaira.id] = self.id + self.adata.uns[self._adata_ids_sfaira.normalization] = self.normalization + self.adata.uns[self._adata_ids_sfaira.year] = self.year # Set cell-wise or data set-wide attributes (.uns / .obs): # These are saved in .uns if they are data set wide to save memory. - for x, y, z in ( - [self.age, self._ADATA_IDS_SFAIRA.age, self.obs_key_age], - [self.dev_stage, self._ADATA_IDS_SFAIRA.dev_stage, self.obs_key_dev_stage], - [self.ethnicity, self._ADATA_IDS_SFAIRA.ethnicity, self.obs_key_ethnicity], - [self.healthy, self._ADATA_IDS_SFAIRA.healthy, self.obs_key_healthy], - [self.organ, self._ADATA_IDS_SFAIRA.organ, self.obs_key_organ], - [self.protocol, self._ADATA_IDS_SFAIRA.protocol, self.obs_key_protocol], - [self.sex, self._ADATA_IDS_SFAIRA.sex, self.obs_key_sex], - [self.organism, self._ADATA_IDS_SFAIRA.organism, self.obs_key_organism], - [self.state_exact, self._ADATA_IDS_SFAIRA.state_exact, self.obs_key_state_exact], + for x, y, z, v in ( + [self.age, self._adata_ids_sfaira.age, self.obs_key_age, + self._ontology_container_sfaira.ontology_age], + [self.dev_stage, self._adata_ids_sfaira.dev_stage, self.obs_key_dev_stage, + self._ontology_container_sfaira.ontology_dev_stage], + [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.obs_key_ethnicity, + self._ontology_container_sfaira.ontology_ethnicity], + [self.healthy, self._adata_ids_sfaira.healthy, self.obs_key_healthy, + self._ontology_container_sfaira.ontology_healthy], + [self.organ, self._adata_ids_sfaira.organ, self.obs_key_organ, + self._ontology_container_sfaira.ontology_organism], + [self.protocol, self._adata_ids_sfaira.protocol, self.obs_key_protocol, + self._ontology_container_sfaira.ontology_protocol], + [self.sex, self._adata_ids_sfaira.sex, self.obs_key_sex, + self._ontology_container_sfaira.ontology_sex], + [self.organism, self._adata_ids_sfaira.organism, self.obs_key_organism, + self._ontology_container_sfaira.ontology_organism], + [self.state_exact, self._adata_ids_sfaira.state_exact, self.obs_key_state_exact, None], ): if x is None and z is None: self.adata.uns[y] = None @@ -599,6 +608,8 @@ def _set_metadata_in_adata(self): # Include flag in .uns that this attribute is in .obs: self.adata.uns[y] = UNS_STRING_META_IN_OBS # Remove potential pd.Categorical formatting: + self._value_protection( + attr="obs", allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) self.adata.obs[y] = self.adata.obs[z].values.tolist() else: assert False, "switch option should not occur" @@ -627,7 +638,6 @@ def load_tobacked( :param genome: Genome name to use as refernce. :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a shuffled object. - :param keys: :param load_raw: See .load(). :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. @@ -651,8 +661,9 @@ def load_tobacked( adata_backed.X[np.sort(idx), :] = x_new[np.argsort(idx), :] for k in adata_backed.obs.columns: - if k == self._ADATA_IDS_SFAIRA.dataset: - adata_backed.obs.loc[np.sort(idx), self._ADATA_IDS_SFAIRA.dataset] = [self.id for i in range(len(idx))] + if k == self._adata_ids_sfaira.dataset: + adata_backed.obs.loc[np.sort(idx), self._adata_ids_sfaira.dataset] = [ + self.id for _ in range(len(idx))] elif k in self.adata.obs.columns: adata_backed.obs.loc[np.sort(idx), k] = self.adata.obs[k].values[np.argsort(idx)] elif k in list(self.adata.uns.keys()): @@ -672,7 +683,7 @@ def load_tobacked( adata_backed._n_obs = adata_backed.X.shape[0] # not automatically updated after append adata_backed.obs = adata_backed.obs.append( # .obs was not broadcasted to the right shape! pandas.DataFrame(dict([ - (k, [self.id for i in range(len(idx))]) if k == self._ADATA_IDS_SFAIRA.dataset + (k, [self.id for i in range(len(idx))]) if k == self._adata_ids_sfaira.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns else (k, [self.adata.uns[k] for _ in range(len(idx))]) if k in list(self.adata.uns.keys()) else (k, ["key_not_found" for _ in range(len(idx))]) @@ -683,7 +694,7 @@ def load_tobacked( else: raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def set_unkown_class_id(self, ids: List[str]): + def set_unknown_class_id(self, ids: List[str]): """ Sets list of custom identifiers of unknown cell types data annotation. @@ -691,7 +702,7 @@ def set_unkown_class_id(self, ids: List[str]): :return: """ self._unknown_celltype_identifiers.extend( - [x for x in ids if x not in self._ADATA_IDS_SFAIRA.unknown_celltype_identifiers] + [x for x in ids if x not in self._adata_ids_sfaira.unknown_celltype_identifiers] ) def _set_genome(self, genome: Union[str, None]): @@ -718,9 +729,9 @@ def doi_cleaned_id(self): return "_".join(self.id.split("_")[:-1]) @property - def fn_ontology_class_map_csv(self): + def fn_ontology_class_map_tsv(self): """Standardised file name under which cell type conversion tables are saved.""" - return self.doi_cleaned_id + ".csv" + return self.doi_cleaned_id + ".tsv" def write_ontology_class_map( self, @@ -738,8 +749,8 @@ def write_ontology_class_map( if not self.annotated: warnings.warn(f"attempted to write ontology classmaps for data set {self.id} without annotation") else: - labels_original = np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values)) - tab = self.ontology_celltypes.prepare_celltype_map_tab( + labels_original = np.sort(np.unique(self.adata.obs[self._adata_ids_sfaira.cell_types_original].values)) + tab = self.celltypes_universe.prepare_celltype_map_tab( source=labels_original, match_only=False, anatomical_constraint=self.organ, @@ -748,7 +759,31 @@ def write_ontology_class_map( **kwargs ) if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=False) + self._write_class_map(fn=fn, tab=tab) + + def _write_class_map(self, fn, tab): + """ + Write class map. + + :param fn: File name of csv to write class maps to. + :param tab: Table to write + :return: + """ + tab.to_csv(fn, index=False, sep="\t") + + def _read_class_map(self, fn) -> pd.DataFrame: + """ + Read class map. + + :param fn: File name of csv to load class maps from. + :return: + """ + try: + tab = pd.read_csv(fn, header=0, index_col=None, sep="\t") + except pandas.errors.ParserError as e: + print(f"{self.id}") + raise pandas.errors.ParserError(e) + return tab def load_ontology_class_map(self, fn): """ @@ -758,7 +793,7 @@ def load_ontology_class_map(self, fn): :return: """ if os.path.exists(fn): - self.ontology_class_map = pd.read_csv(fn, header=0, index_col=None) + self.cell_ontology_map = self._read_class_map(fn=fn) else: warnings.warn(f"file {fn} does not exist") @@ -771,19 +806,35 @@ def project_celltypes_to_ontology(self): :return: """ labels_original = self.adata.obs[self.obs_key_cellontology_original].values - if self.ontology_class_map is not None: # only if this was defined + if self.cell_ontology_map is not None: # only if this was defined labels_mapped = [ - self.ontology_class_map[x] if x in self.ontology_class_map.keys() - else self._ADATA_IDS_SFAIRA.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers + self.cell_ontology_map[x] if x in self.cell_ontology_map.keys() + else self._adata_ids_sfaira.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x for x in labels_original ] - # Validate mapped IDs based on ontology: - # This aborts with a readable error if there was a target in the mapping file that does not match the - # ontology. - for x in labels_mapped: - self.ontology_celltypes.onto_cl.validate_node(x) - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class] = labels_mapped - self.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original] = labels_original + else: + labels_mapped = labels_original + # Validate mapped IDs based on ontology: + # This aborts with a readable error if there was a target in the mapping file that does not match the + # ontology. + self._value_protection( + attr="celltypes", + allowed=self.ontology_celltypes, + attempted=np.unique(labels_mapped).tolist() + ) + self.adata.obs[self._adata_ids_sfaira.cell_ontology_class] = labels_mapped + self.adata.obs[self._adata_ids_sfaira.cell_types_original] = labels_original + # Add cell type IDs into object: + # The IDs are not read from a source file but inferred based on the class name. + # TODO this could be changed in the future, this allows this function to be used both on cell type name mapping + # files with and without the ID in the third column. + ids_mapped = [ + self._ontology_container_sfaira.ontology_cell_types.id_from_name(x) + if x != self._adata_ids_sfaira.unknown_celltype_name + else self._adata_ids_sfaira.unknown_celltype_name + for x in labels_mapped + ] + self.adata.obs[self._adata_ids_sfaira.cell_ontology_id] = ids_mapped @property def citation(self): @@ -818,10 +869,10 @@ def load_meta(self, fn: Union[PathLike, str, None]): if os.path.isfile(fn): meta = pandas.read_csv( fn, - usecols=list(self._META_DATA_FIELDS.keys()), + usecols=list(META_DATA_FIELDS.keys()), ) # using dtype in read_csv through errors some times. - for k, v in self._META_DATA_FIELDS.items(): + for k, v in META_DATA_FIELDS.items(): if k in meta.columns: if meta[k].values[0] is not None: meta[k] = np.asarray(meta[k].values, dtype=v) @@ -863,41 +914,67 @@ def write_meta( ) # Add data-set wise meta data into table: meta = pandas.DataFrame({ - self._ADATA_IDS_SFAIRA.annotated: self.adata.uns[self._ADATA_IDS_SFAIRA.annotated], - self._ADATA_IDS_SFAIRA.author: self.adata.uns[self._ADATA_IDS_SFAIRA.author], - self._ADATA_IDS_SFAIRA.doi: self.adata.uns[self._ADATA_IDS_SFAIRA.doi], - self._ADATA_IDS_SFAIRA.download_url_data: self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_data], - self._ADATA_IDS_SFAIRA.download_url_meta: self.adata.uns[self._ADATA_IDS_SFAIRA.download_url_meta], - self._ADATA_IDS_SFAIRA.id: self.adata.uns[self._ADATA_IDS_SFAIRA.id], - self._ADATA_IDS_SFAIRA.ncells: self.adata.n_obs, - self._ADATA_IDS_SFAIRA.normalization: self.adata.uns[self._ADATA_IDS_SFAIRA.normalization], - self._ADATA_IDS_SFAIRA.year: self.adata.uns[self._ADATA_IDS_SFAIRA.year], + self._adata_ids_sfaira.annotated: self.adata.uns[self._adata_ids_sfaira.annotated], + self._adata_ids_sfaira.author: self.adata.uns[self._adata_ids_sfaira.author], + self._adata_ids_sfaira.doi: self.adata.uns[self._adata_ids_sfaira.doi], + self._adata_ids_sfaira.download_url_data: self.adata.uns[self._adata_ids_sfaira.download_url_data], + self._adata_ids_sfaira.download_url_meta: self.adata.uns[self._adata_ids_sfaira.download_url_meta], + self._adata_ids_sfaira.id: self.adata.uns[self._adata_ids_sfaira.id], + self._adata_ids_sfaira.ncells: self.adata.n_obs, + self._adata_ids_sfaira.normalization: self.adata.uns[self._adata_ids_sfaira.normalization], + self._adata_ids_sfaira.year: self.adata.uns[self._adata_ids_sfaira.year], }, index=range(1)) # Expand table by variably cell-wise or data set-wise meta data: for x in [ - self._ADATA_IDS_SFAIRA.age, - self._ADATA_IDS_SFAIRA.dev_stage, - self._ADATA_IDS_SFAIRA.ethnicity, - self._ADATA_IDS_SFAIRA.healthy, - self._ADATA_IDS_SFAIRA.organ, - self._ADATA_IDS_SFAIRA.protocol, - self._ADATA_IDS_SFAIRA.sex, - self._ADATA_IDS_SFAIRA.organism, - self._ADATA_IDS_SFAIRA.state_exact, + self._adata_ids_sfaira.age, + self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.ethnicity, + self._adata_ids_sfaira.healthy, + self._adata_ids_sfaira.organ, + self._adata_ids_sfaira.protocol, + self._adata_ids_sfaira.sex, + self._adata_ids_sfaira.organism, + self._adata_ids_sfaira.state_exact, ]: if self.adata.uns[x] == UNS_STRING_META_IN_OBS: meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) else: meta[x] = self.adata.uns[x] # Add cell types into table if available: - if self._ADATA_IDS_SFAIRA.cell_ontology_class in self.adata.obs.keys(): - meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = str(( - np.sort(np.unique(self.adata.obs[self._ADATA_IDS_SFAIRA.cell_ontology_class].values)), + if self._adata_ids_sfaira.cell_ontology_class in self.adata.obs.keys(): + meta[self._adata_ids_sfaira.cell_ontology_class] = str(( + np.sort(np.unique(self.adata.obs[self._adata_ids_sfaira.cell_ontology_class].values)), )) else: - meta[self._ADATA_IDS_SFAIRA.cell_ontology_class] = " " + meta[self._adata_ids_sfaira.cell_ontology_class] = " " meta.to_csv(fn_meta) + def set_dataset_id( + self, + idx: int = 1 + ): + def clean(s): + if s is not None: + s = s.replace(' ', '').replace('-', '').replace('_', '').lower() + return s + + if hasattr(self, 'sample_idx'): + idx += self.sample_idx + idx = str(idx).zfill(3) + + if isinstance(self.author, List): + author = self.author[0] + else: + author = self.author + + self.id = f"{clean(self.organism)}_" \ + f"{clean(self.organ)}_" \ + f"{self.year}_" \ + f"{clean(self.protocol)}_" \ + f"{clean(author)}_" \ + f"{idx}_" \ + f"{self.doi}" + # Properties: @property @@ -907,15 +984,15 @@ def age(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.age in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.age] + if self.meta is not None and self._adata_ids_sfaira.age in self.meta.columns: + return self.meta[self._adata_ids_sfaira.age] else: return None @age.setter def age(self, x: str): self.__erasing_protection(attr="age", val_old=self._age, val_new=x) - self.__value_protection(attr="age", allowed=self._ADATA_IDS_SFAIRA.age_allowed_entries, attempted=x) + self._value_protection(attr="age", allowed=self._ontology_container_sfaira.ontology_age, attempted=x) self._age = x @property @@ -925,8 +1002,8 @@ def annotated(self) -> Union[bool, None]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.annotated in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.annotated].values[0] + if self.meta is not None and self._adata_ids_sfaira.annotated in self.meta.columns: + return self.meta[self._adata_ids_sfaira.annotated].values[0] elif self.loaded: # If data set was loaded and there is still no annotation indicated, it is declared unannotated. return False @@ -942,9 +1019,9 @@ def author(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is None or self._ADATA_IDS_SFAIRA.author not in self.meta.columns: + if self.meta is None or self._adata_ids_sfaira.author not in self.meta.columns: raise ValueError("author must be set but was neither set in constructor nor in meta data") - return self.meta[self._ADATA_IDS_SFAIRA.author] + return self.meta[self._adata_ids_sfaira.author] @author.setter def author(self, x: str): @@ -968,15 +1045,16 @@ def dev_stage(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.dev_stage in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.dev_stage] + if self.meta is not None and self._adata_ids_sfaira.dev_stage in self.meta.columns: + return self.meta[self._adata_ids_sfaira.dev_stage] else: return None @dev_stage.setter def dev_stage(self, x: str): self.__erasing_protection(attr="dev_stage", val_old=self._dev_stage, val_new=x) - self.__value_protection(attr="dev_stage", allowed=self._ADATA_IDS_SFAIRA.dev_stage_allowed_entries, attempted=x) + self._value_protection(attr="dev_stage", allowed=self._ontology_container_sfaira.ontology_dev_stage, + attempted=x) self._dev_stage = x @property @@ -986,9 +1064,9 @@ def doi(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is None or self._ADATA_IDS_SFAIRA.doi not in self.meta.columns: + if self.meta is None or self._adata_ids_sfaira.doi not in self.meta.columns: raise ValueError("doi must be set but was neither set in constructor nor in meta data") - return self.meta[self._ADATA_IDS_SFAIRA.doi] + return self.meta[self._adata_ids_sfaira.doi] @doi.setter def doi(self, x: str): @@ -1012,7 +1090,7 @@ def download_url_data(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[self._ADATA_IDS_SFAIRA.download_url_data] + x = self.meta[self._adata_ids_sfaira.download_url_data] if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): @@ -1069,15 +1147,15 @@ def ethnicity(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.ethnicity in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.ethnicity] + if self.meta is not None and self._adata_ids_sfaira.ethnicity in self.meta.columns: + return self.meta[self._adata_ids_sfaira.ethnicity] else: return None @ethnicity.setter def ethnicity(self, x: str): self.__erasing_protection(attr="ethnicity", val_old=self._ethnicity, val_new=x) - self.__value_protection(attr="ethnicity", allowed=self._ADATA_IDS_SFAIRA.ethnicity_allowed_entries, attempted=x) + self._value_protection(attr="ethnicity", allowed=self._adata_ids_sfaira.ontology_ethnicity, attempted=x) self._ethnicity = x @property @@ -1087,8 +1165,8 @@ def healthy(self) -> Union[None, bool]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.healthy in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.healthy] + if self.meta is not None and self._adata_ids_sfaira.healthy in self.meta.columns: + return self.meta[self._adata_ids_sfaira.healthy] else: return None @@ -1111,9 +1189,8 @@ def id(self) -> str: if self._id is not None: return self._id else: - if self.meta is None: - self.load_meta(fn=None) - return self.meta[self._ADATA_IDS_SFAIRA.id] + raise AttributeError(f"Dataset ID was not set in dataloader in {self.doi}, please ensure the dataloader " + f"constructor of this dataset contains a call to self.set_dataset_id()") @id.setter def id(self, x: str): @@ -1137,14 +1214,14 @@ def meta(self, x: Union[None, pd.DataFrame]): if x is not None: for k, v in x.items(): v = v.tolist() # avoid numpy data types - if k not in self._META_DATA_FIELDS.keys(): + if k not in META_DATA_FIELDS.keys(): raise ValueError(f"did not find {k} in format look up table") else: if x[k] is not None: # None is always allowed. - if not isinstance(v[0], self._META_DATA_FIELDS[k]): + if not isinstance(v[0], META_DATA_FIELDS[k]): raise ValueError(f"key '{k}' of value `{v[0]}` and signature `{type(v[0])}` " f"in meta data table did not match signature " - f"{str(self._META_DATA_FIELDS[k])}") + f"{str(META_DATA_FIELDS[k])}") self._meta = x @property @@ -1157,7 +1234,7 @@ def ncells(self) -> int: else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[self._ADATA_IDS_SFAIRA.ncells] + x = self.meta[self._adata_ids_sfaira.ncells] return int(x) @property @@ -1167,16 +1244,16 @@ def normalization(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.normalization in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.normalization] + if self.meta is not None and self._adata_ids_sfaira.normalization in self.meta.columns: + return self.meta[self._adata_ids_sfaira.normalization] else: return None @normalization.setter def normalization(self, x: str): self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) - self.__value_protection(attr="normalization", allowed=self._ADATA_IDS_SFAIRA.normalization_allowed_entries, - attempted=x) + self._value_protection(attr="normalization", allowed=self._ontology_container_sfaira.ontology_normalization, + attempted=x) self._normalization = x @property @@ -1295,15 +1372,15 @@ def organ(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.organ in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.organ] + if self.meta is not None and self._adata_ids_sfaira.organ in self.meta.columns: + return self.meta[self._adata_ids_sfaira.organ] else: return None @organ.setter def organ(self, x: str): self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) - self.__value_protection(attr="organ", allowed=self._ADATA_IDS_SFAIRA.organ_allowed_entries, attempted=x) + self._value_protection(attr="organ", allowed=self._ontology_container_sfaira.ontology_organ, attempted=x) self._organ = x @property @@ -1313,15 +1390,15 @@ def organism(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.organism in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.organism] + if self.meta is not None and self._adata_ids_sfaira.organism in self.meta.columns: + return self.meta[self._adata_ids_sfaira.organism] else: return None @organism.setter def organism(self, x: str): self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) - self.__value_protection(attr="organism", allowed=self._ADATA_IDS_SFAIRA.organism_allowed_entries, attempted=x) + self._value_protection(attr="organism", allowed=self._ontology_container_sfaira.ontology_organism, attempted=x) self._organism = x @property @@ -1331,15 +1408,16 @@ def protocol(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.protocol in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.protocol] + if self.meta is not None and self._adata_ids_sfaira.protocol in self.meta.columns: + return self.meta[self._adata_ids_sfaira.protocol] else: return None @protocol.setter def protocol(self, x: str): self.__erasing_protection(attr="protocol", val_old=self._protocol, val_new=x) - self.__value_protection(attr="protocol", allowed=self._ADATA_IDS_SFAIRA.protocol_allowed_entries, attempted=x) + self._value_protection(attr="protocol", allowed=self._ontology_container_sfaira.ontology_protocol, + attempted=x) self._protocol = x @property @@ -1349,15 +1427,15 @@ def sex(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.sex in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.sex] + if self.meta is not None and self._adata_ids_sfaira.sex in self.meta.columns: + return self.meta[self._adata_ids_sfaira.sex] else: return None @sex.setter def sex(self, x: str): self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) - self.__value_protection(attr="sex", allowed=self._ADATA_IDS_SFAIRA.sex_allowed_entries, attempted=x) + self._value_protection(attr="sex", allowed=self._ontology_container_sfaira.ontology_sex, attempted=x) self._sex = x @property @@ -1376,8 +1454,8 @@ def state_exact(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.state_exact in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.state_exact] + if self.meta is not None and self._adata_ids_sfaira.state_exact in self.meta.columns: + return self.meta[self._adata_ids_sfaira.state_exact] else: return None @@ -1411,38 +1489,49 @@ def year(self) -> Union[None, int]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._ADATA_IDS_SFAIRA.year in self.meta.columns: - return self.meta[self._ADATA_IDS_SFAIRA.year] + if self.meta is not None and self._adata_ids_sfaira.year in self.meta.columns: + return self.meta[self._adata_ids_sfaira.year] else: return None @year.setter def year(self, x: int): self.__erasing_protection(attr="year", val_old=self._year, val_new=x) - self.__value_protection(attr="year", allowed=self._ADATA_IDS_SFAIRA.year_allowed_entries, attempted=x) + self._value_protection(attr="year", allowed=self._ontology_container_sfaira.ontology_year, attempted=x) self._year = x @property def ontology_celltypes(self): - if self._ontology_celltypes is None: - assert self.organism is not None, "set organism before using ontology_celltypes" - self._ontology_celltypes = CelltypeUniverse(organism=self.organism) - return self._ontology_celltypes + return self._ontology_container_sfaira.ontology_cell_types @property - def ontology_class_map(self) -> dict: + def ontology_organ(self): + return self._ontology_container_sfaira.ontology_organ + + @property + def celltypes_universe(self): + if self._celltype_universe: + self._celltype_universe = CelltypeUniverse( + cl=self.ontology_celltypes, + uberon=self._ontology_container_sfaira.ontology_organ, + organism=self.organism, + ) + return self._celltype_universe + + @property + def cell_ontology_map(self) -> dict: return self._ontology_class_map - @ontology_class_map.setter - def ontology_class_map(self, x: pd.DataFrame): + @cell_ontology_map.setter + def cell_ontology_map(self, x: pd.DataFrame): self.__erasing_protection(attr="ontology_class_map", val_old=self._ontology_class_map, val_new=x) - assert x.shape[1] == 2 - assert x.columns[0] == "source" - assert x.columns[1] == "target" + assert x.shape[1] in [2, 3], f"{x.shape} in {self.id}" + assert x.columns[0] == self._adata_ids_sfaira.classmap_source_key + assert x.columns[1] == self._adata_ids_sfaira.classmap_target_key # Transform data frame into a mapping dictionary: self._ontology_class_map = dict(list(zip( - x["source"].values.tolist(), - x["target"].values.tolist() + x[self._adata_ids_sfaira.classmap_source_key].values.tolist(), + x[self._adata_ids_sfaira.classmap_target_key].values.tolist() ))) # Private methods: @@ -1459,7 +1548,7 @@ def __erasing_protection(self, attr, val_old, val_new): raise ValueError(f"attempted to set erasing protected attribute {attr}: " f"previously was {str(val_old)}, attempted to set {str(val_new)}") - def __value_protection( + def _value_protection( self, attr: str, allowed: Union[Ontology, bool, int, float, str, List[bool], List[int], List[float], List[str]], @@ -1473,7 +1562,7 @@ def __value_protection( :param attr: Attribut to set. :param allowed: Constraint for values of `attr`. Either ontology instance used to constrain entries, or list of allowed values. - :param attempted: Value to attempt to set in `attr`. + :param attempted: Value(s) to attempt to set in `attr`. :return: """ if allowed is not None: @@ -1546,6 +1635,7 @@ class DatasetBaseGroupLoadingOneFile(DatasetBase): def __init__( self, sample_id: str, + sample_ids: List, data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, @@ -1554,11 +1644,16 @@ def __init__( super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self._unprocessed_full_group_object = False self._sample_id = sample_id + self._SAMPLE_IDS = sample_ids @property def sample_id(self): return self._sample_id + @property + def sample_idx(self): + return self._SAMPLE_IDS.index(self.sample_id) + @abc.abstractmethod def _load_full(self) -> anndata.AnnData: """ @@ -1626,6 +1721,7 @@ class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): def __init__( self, sample_fn: str, + sample_fns: List, data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, @@ -1633,11 +1729,16 @@ def __init__( ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self._sample_fn = sample_fn + self._SAMPLE_FNS = sample_fns @property def sample_fn(self): return self._sample_fn + @property + def sample_idx(self): + return self._SAMPLE_FNS.index(self.sample_fn) + class DatasetGroup: """ @@ -1656,19 +1757,14 @@ class DatasetGroup: datasets: Dict def __init__(self, datasets: dict): + self._adata_ids_sfaira = AdataIdsSfaira() self.datasets = datasets - self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() @property def _unknown_celltype_identifiers(self): return np.unqiue(np.concatenate([v._unknown_celltype_identifiers for _, v in self.datasets.items()])) - def _load_group(self, load_raw: bool): - """ - - :param load_raw: See .load(). - :return: - """ + def _load_group(self, **kwargs): return None def load( @@ -1797,9 +1893,9 @@ def write_ontology_class_map( for k, v in self.datasets.items(): if v.annotated: labels_original = np.sort(np.unique(np.concatenate([ - v.adata.obs[self._ADATA_IDS_SFAIRA.cell_types_original].values + v.adata.obs[self._adata_ids_sfaira.cell_types_original].values ]))) - tab.append(v.ontology_celltypes.prepare_celltype_map_tab( + tab.append(v.celltypes_universe.prepare_celltype_map_tab( source=labels_original, match_only=False, anatomical_constraint=v.organ, @@ -1813,9 +1909,9 @@ def write_ontology_class_map( tab = pandas.concat(tab, axis=0) # Take out columns with the same source: tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() - tab = tab.sort_values("source") + tab = tab.sort_values(self._adata_ids_sfaira.classmap_source_key) if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=False) + tab.to_csv(fn, index=False, sep="\t") def download(self, **kwargs): for _, v in self.datasets.items(): @@ -1841,14 +1937,14 @@ def adata(self): adata_ls = self.adata_ls # Save uns attributes that are fixed for entire data set to .obs to retain during concatenation: for adata in adata_ls: - adata.obs[self._ADATA_IDS_SFAIRA.author] = adata.uns[self._ADATA_IDS_SFAIRA.author] - adata.obs[self._ADATA_IDS_SFAIRA.year] = adata.uns[self._ADATA_IDS_SFAIRA.year] - adata.obs[self._ADATA_IDS_SFAIRA.protocol] = adata.uns[self._ADATA_IDS_SFAIRA.protocol] - if self._ADATA_IDS_SFAIRA.normalization in adata.uns.keys(): - adata.obs[self._ADATA_IDS_SFAIRA.normalization] = adata.uns[self._ADATA_IDS_SFAIRA.normalization] - if self._ADATA_IDS_SFAIRA.dev_stage in adata.obs.columns: - adata.obs[self._ADATA_IDS_SFAIRA.dev_stage] = adata.uns[self._ADATA_IDS_SFAIRA.dev_stage] - adata.obs[self._ADATA_IDS_SFAIRA.annotated] = adata.uns[self._ADATA_IDS_SFAIRA.annotated] + adata.obs[self._adata_ids_sfaira.author] = adata.uns[self._adata_ids_sfaira.author] + adata.obs[self._adata_ids_sfaira.year] = adata.uns[self._adata_ids_sfaira.year] + adata.obs[self._adata_ids_sfaira.protocol] = adata.uns[self._adata_ids_sfaira.protocol] + if self._adata_ids_sfaira.normalization in adata.uns.keys(): + adata.obs[self._adata_ids_sfaira.normalization] = adata.uns[self._adata_ids_sfaira.normalization] + if self._adata_ids_sfaira.dev_stage in adata.obs.columns: + adata.obs[self._adata_ids_sfaira.dev_stage] = adata.uns[self._adata_ids_sfaira.dev_stage] + adata.obs[self._adata_ids_sfaira.annotated] = adata.uns[self._adata_ids_sfaira.annotated] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: # Fix 1: @@ -1858,13 +1954,13 @@ def adata(self): if adata.uns is not None: keys_to_keep = [ 'neighbors', - self._ADATA_IDS_SFAIRA.author, - self._ADATA_IDS_SFAIRA.year, - self._ADATA_IDS_SFAIRA.protocol, - self._ADATA_IDS_SFAIRA.normalization, - self._ADATA_IDS_SFAIRA.dev_stage, - self._ADATA_IDS_SFAIRA.annotated, - self._ADATA_IDS_SFAIRA.mapped_features, + self._adata_ids_sfaira.author, + self._adata_ids_sfaira.year, + self._adata_ids_sfaira.protocol, + self._adata_ids_sfaira.normalization, + self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.annotated, + self._adata_ids_sfaira.mapped_features, ] for k in list(adata.uns.keys()): if k not in keys_to_keep: @@ -1876,7 +1972,7 @@ def adata(self): # To preserve gene names in .var, the target gene names are copied into var_names and are then copied # back into .var. for adata in adata_ls: - adata.var.index = adata.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl].tolist() + adata.var.index = adata.var[self._adata_ids_sfaira.gene_id_ensembl].tolist() if len(adata_ls) > 1: # TODO: need to keep this? -> yes, still catching errors here (March 2020) # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. @@ -1884,27 +1980,27 @@ def adata(self): adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=self._ADATA_IDS_SFAIRA.dataset, + batch_key=self._adata_ids_sfaira.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) except ValueError: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=self._ADATA_IDS_SFAIRA.dataset, + batch_key=self._adata_ids_sfaira.dataset, batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] ) - adata_concat.var[self._ADATA_IDS_SFAIRA.gene_id_ensembl] = adata_concat.var.index + adata_concat.var[self._adata_ids_sfaira.gene_id_ensembl] = adata_concat.var.index - if len(set([a.uns[self._ADATA_IDS_SFAIRA.mapped_features] for a in adata_ls])) == 1: - adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = \ - adata_ls[0].uns[self._ADATA_IDS_SFAIRA.mapped_features] + if len(set([a.uns[self._adata_ids_sfaira.mapped_features] for a in adata_ls])) == 1: + adata_concat.uns[self._adata_ids_sfaira.mapped_features] = \ + adata_ls[0].uns[self._adata_ids_sfaira.mapped_features] else: - adata_concat.uns[self._ADATA_IDS_SFAIRA.mapped_features] = False + adata_concat.uns[self._adata_ids_sfaira.mapped_features] = False else: adata_concat = adata_ls[0] - adata_concat.obs[self._ADATA_IDS_SFAIRA.dataset] = self.ids[0] + adata_concat.obs[self._adata_ids_sfaira.dataset] = self.ids[0] adata_concat.var_names_make_unique() return adata_concat @@ -1923,9 +2019,9 @@ def obs_concat(self, keys: Union[list, None] = None): obs_concat = pandas.concat([pandas.DataFrame(dict( [ (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns - else (k, ["nan" for i in range(self.datasets[x].adata.obs.shape[0])]) + else (k, ["nan" for _ in range(self.datasets[x].adata.obs.shape[0])]) for k in keys - ] + [(self._ADATA_IDS_SFAIRA.dataset, [x for i in range(self.datasets[x].adata.obs.shape[0])])] + ] + [(self._adata_ids_sfaira.dataset, [x for _ in range(self.datasets[x].adata.obs.shape[0])])] )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat @@ -2017,6 +2113,8 @@ def subset_cells(self, key, values: Union[str, List[str]]): class DatasetGroupDirectoryOriented(DatasetGroup): + _cwd: os.PathLike + def __init__( self, file_base: str, @@ -2037,17 +2135,17 @@ def __init__( """ # Collect all data loaders from files in directory: datasets = [] - cwd = os.path.dirname(file_base) - dataset_module = str(cwd.split("/")[-1]) - loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(cwd.split("/")[-5]) == "sfaira" else \ + self._cwd = os.path.dirname(file_base) + dataset_module = str(self._cwd.split("/")[-1]) + loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(self._cwd.split("/")[-5]) == "sfaira" else \ "sfaira_extension.data.dataloaders.loaders." - if "group.py" in os.listdir(cwd): + if "group.py" in os.listdir(self._cwd): DatasetGroupFound = pydoc.locate(loader_pydoc_path + dataset_module + ".group.DatasetGroup") dsg = DatasetGroupFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path) datasets.extend(list(dsg.datasets.values)) else: - for f in os.listdir(cwd): - if os.path.isfile(os.path.join(cwd, f)): # only files + for f in os.listdir(self._cwd): + if os.path.isfile(os.path.join(self._cwd, f)): # only files # Narrow down to data set files: if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: datasets_f = [] @@ -2089,12 +2187,44 @@ def __init__( DatasetFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path)) # Load cell type maps: for x in datasets_f: - x.load_ontology_class_map(fn=os.path.join(cwd, file_module + ".csv")) + x.load_ontology_class_map(fn=os.path.join(self._cwd, file_module + ".tsv")) datasets.extend(datasets_f) keys = [x.id for x in datasets] super().__init__(datasets=dict(zip(keys, datasets))) + def clean_ontology_class_map(self): + """ + Finalises processed class maps of free text cell types to ontology classes. + + Checks that the assigned ontology class names appear in the ontology. + Adds a third column with the corresponding ontology IDs into the file. + + :return: + """ + for f in os.listdir(self._cwd): + if os.path.isfile(os.path.join(self._cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + fn_map = os.path.join(self._cwd, file_module + ".tsv") + if os.path.exists(fn_map): + # Access reading and value protection mechanisms from first data set loaded in group. + tab = list(self.datasets.values())[0]._read_class_map(fn=fn_map) + # Checks that the assigned ontology class names appear in the ontology. + list(self.datasets.values())[0]._value_protection( + attr="celltypes", + allowed=self.ontology_celltypes, + attempted=np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() + ) + # Adds a third column with the corresponding ontology IDs into the file. + tab[self._adata_ids_sfaira.classmap_target_id_key] = [ + self.ontology_celltypes.id_from_name(x) if x != self._adata_ids_sfaira.unknown_celltype_name + else self._adata_ids_sfaira.unknown_celltype_name + for x in tab[self._adata_ids_sfaira.classmap_target_key].values + ] + list(self.datasets.values())[0]._write_class_map(fn=fn_map, tab=tab) + class DatasetSuperGroup: """ @@ -2105,26 +2235,32 @@ class DatasetSuperGroup: """ adata: Union[None, anndata.AnnData] fn_backed: Union[None, PathLike] - dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]] + dataset_groups: Union[list, List[DatasetGroup], List[DatasetSuperGroup]] def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetSuperGroup]]): self.adata = None self.fn_backed = None self.set_dataset_groups(dataset_groups=dataset_groups) - self._ADATA_IDS_SFAIRA = ADATA_IDS_SFAIRA() - - def set_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): - if isinstance(dataset_groups[0], DatasetGroup): - self.dataset_groups = dataset_groups - elif isinstance(dataset_groups[0], DatasetSuperGroup): - # Decompose super groups first - dataset_groups_proc = [] - for x in dataset_groups: - dataset_groups_proc.extend(x.dataset_groups) - self.dataset_groups = dataset_groups_proc + self._adata_ids_sfaira = AdataIdsSfaira() + + def set_dataset_groups(self, dataset_groups: Union[DatasetGroup, DatasetSuperGroup, List[DatasetGroup], + List[DatasetSuperGroup]]): + if isinstance(dataset_groups, DatasetGroup) or isinstance(dataset_groups, DatasetSuperGroup): + dataset_groups = [dataset_groups] + if len(dataset_groups) > 0: + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups = dataset_groups + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups = dataset_groups_proc + else: + assert False else: - assert False + self.dataset_groups = [] def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): if isinstance(dataset_groups[0], DatasetGroup): @@ -2133,7 +2269,7 @@ def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[D # Decompose super groups first dataset_groups_proc = [] for x in dataset_groups: - dataset_groups_proc.extend(x.dataset_groups) + dataset_groups_proc.extend(x.datasets) self.dataset_groups.extend(dataset_groups_proc) else: assert False @@ -2227,7 +2363,7 @@ def load_all( self.adata = self.dataset_groups[i].adata.concatenate( *[x.adata for x in self.dataset_groups[1:] if x is not None], join="outer", - batch_key=self._ADATA_IDS_SFAIRA.dataset_group + batch_key=self._adata_ids_sfaira.dataset_group ) def load_all_tobacked( @@ -2285,20 +2421,20 @@ def load_all_tobacked( X.indptr = X.indptr.astype(np.int64) self.adata.X = X keys = [ - self._ADATA_IDS_SFAIRA.annotated, - self._ADATA_IDS_SFAIRA.author, - self._ADATA_IDS_SFAIRA.dataset, - self._ADATA_IDS_SFAIRA.cell_ontology_class, - self._ADATA_IDS_SFAIRA.dev_stage, - self._ADATA_IDS_SFAIRA.normalization, - self._ADATA_IDS_SFAIRA.organ, - self._ADATA_IDS_SFAIRA.protocol, - self._ADATA_IDS_SFAIRA.state_exact, - self._ADATA_IDS_SFAIRA.year, + self._adata_ids_sfaira.annotated, + self._adata_ids_sfaira.author, + self._adata_ids_sfaira.dataset, + self._adata_ids_sfaira.cell_ontology_class, + self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.normalization, + self._adata_ids_sfaira.organ, + self._adata_ids_sfaira.protocol, + self._adata_ids_sfaira.state_exact, + self._adata_ids_sfaira.year, ] if scatter_update: self.adata.obs = pandas.DataFrame({ - k: ["nan" for x in range(n_cells)] for k in keys + k: ["nan" for _ in range(n_cells)] for k in keys }) else: for k in keys: @@ -2384,8 +2520,8 @@ def subset_cells(self, key, values: Union[str, List[str]]): :param values: Classes to overlap to. :return: """ - for x in self.dataset_groups.ids: - self.dataset_groups[x].subset_cells(key=key, values=values) + for i in range(len(self.dataset_groups)): + self.dataset_groups[i].subset_cells(key=key, values=values) def project_celltypes_to_ontology(self): """ diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py index ee89ff3fd..8224aaff1 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -1,7 +1,8 @@ import os from typing import Union -from .external import ADATA_IDS_CELLXGENE, DatasetGroup +from sfaira.data import DatasetGroup +from sfaira.consts import AdataIdsCellxgene from .cellxgene_loader import Dataset @@ -14,10 +15,9 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() - + self._adata_ids_cellxgene = AdataIdsCellxgene() fn_ls = os.listdir(data_path) - fn_ls = [x for x in fn_ls if x in self._ADATA_IDS_CELLXGENE.accepted_file_names] + fn_ls = [x for x in fn_ls if x in self._adata_ids_cellxgene.accepted_file_names] datasets = [ Dataset(data_path=data_path, fn=x, meta_path=meta_path, cache_path=cache_path) for x in fn_ls diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index dc2c02e42..104a0d6a9 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -1,8 +1,9 @@ import anndata import os from typing import Union -from .external import DatasetBase -from .external import ADATA_IDS_CELLXGENE + +from sfaira.data import DatasetBase +from sfaira.consts import AdataIdsCellxgene class Dataset(DatasetBase): @@ -22,23 +23,23 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, **kwargs) - self._ADATA_IDS_CELLXGENE = ADATA_IDS_CELLXGENE() + self._self._adata_ids_cellxgene = AdataIdsCellxgene() self.fn = fn - self.obs_key_cellontology_class = self._ADATA_IDS_CELLXGENE.cell_ontology_class - self.obs_key_cellontology_id = self._ADATA_IDS_CELLXGENE.cell_ontology_id - self.obs_key_cellontology_original = self._ADATA_IDS_CELLXGENE.cell_types_original - self.obs_key_dev_stage = self._ADATA_IDS_CELLXGENE.dev_stage - self.obs_key_ethnicity = self._ADATA_IDS_CELLXGENE.ethnicity - self.obs_key_healthy = self._ADATA_IDS_CELLXGENE.healthy - self.obs_key_sex = self._ADATA_IDS_CELLXGENE.sex - self.obs_key_organism = self._ADATA_IDS_CELLXGENE.organism - self.obs_key_state_exact = self._ADATA_IDS_CELLXGENE.state_exact + self.obs_key_cellontology_class = self._adata_ids_cellxgene.cell_ontology_class + self.obs_key_cellontology_id = self._adata_ids_cellxgene.cell_ontology_id + self.obs_key_cellontology_original = self._adata_ids_cellxgene.cell_types_original + self.obs_key_dev_stage = self._adata_ids_cellxgene.dev_stage + self.obs_key_ethnicity = self._adata_ids_cellxgene.ethnicity + self.obs_key_healthy = self._adata_ids_cellxgene.healthy + self.obs_key_sex = self._adata_ids_cellxgene.sex + self.obs_key_organism = self._adata_ids_cellxgene.organism + self.obs_key_state_exact = self._adata_ids_cellxgene.state_exact - self.healthy_state_healthy = self._ADATA_IDS_CELLXGENE.disease_state_healthy + self.healthy_state_healthy = self._adata_ids_cellxgene.disease_state_healthy - self.var_ensembl_col = self._ADATA_IDS_CELLXGENE.gene_id_ensembl - self.var_symbol_col = self._ADATA_IDS_CELLXGENE.gene_id_names + self.var_ensembl_col = self._adata_ids_cellxgene.gene_id_ensembl + self.var_symbol_col = self._adata_ids_cellxgene.gene_id_names self.class_maps = { "0": {}, @@ -57,13 +58,13 @@ def _load(self): adata.X = adata.raw.X # TODO delete raw? - self.author = adata.uns[self._ADATA_IDS_CELLXGENE.author][self._ADATA_IDS_CELLXGENE.author_names] - self.doi = adata.uns[self._ADATA_IDS_CELLXGENE.doi] + self.author = adata.uns[self._adata_ids_cellxgene.author][self._adata_ids_cellxgene.author_names] + self.doi = adata.uns[self._adata_ids_cellxgene.doi] self.download_url_data = self.download_url_data self.id = self.id self.normalization = 'raw' self.organ = str(self.fn).split("_")[3] # TODO interface this properly # self.organ = adata.obs["tissue"].values[0] - self.organism = adata.obs[self._ADATA_IDS_CELLXGENE.organism].values[0] - self.protocol = adata.obs[self._ADATA_IDS_CELLXGENE.protocol].values[0] - self.year = adata.uns[self._ADATA_IDS_CELLXGENE.year] + self.organism = adata.obs[self._adata_ids_cellxgene.organism].values[0] + self.protocol = adata.obs[self._adata_ids_cellxgene.protocol].values[0] + self.year = adata.uns[self._adata_ids_cellxgene.year] diff --git a/sfaira/data/dataloaders/databases/cellxgene/external.py b/sfaira/data/dataloaders/databases/cellxgene/external.py deleted file mode 100644 index c7a6982b7..000000000 --- a/sfaira/data/dataloaders/databases/cellxgene/external.py +++ /dev/null @@ -1,3 +0,0 @@ -from sfaira.data import DatasetBase, DatasetGroup -from sfaira.consts import ADATA_IDS_SFAIRA, ADATA_IDS_CELLXGENE -from sfaira.consts import META_DATA_FIELDS diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py similarity index 74% rename from sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py rename to sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index ecb5706ba..3f396a8ba 100644 --- a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10x_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -40,12 +40,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2019_10x_ramachandran_001_10.1038/s41586-019-1631-3" - self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" self.download_url_meta = None - self.author = "Henderson" + self.author = "Ramachandran" self.doi = "10.1038/s41586-019-1631-3" self.normalization = "raw" self.organ = "liver" @@ -60,22 +58,7 @@ def __init__( self.obs_key_healthy = self.obs_key_state_exact self.healthy_state_healthy = "Uninjured" - self.class_maps = { - "0": { - "MPs": "MP", - "Tcells": "Tcells", - "ILCs": "ILC", - "Endothelia": "Endothelia", - "Bcells": "Bcells", - "pDCs": "pDCs", - "Plasma Bcells": "Plasma B cell", - "Mast cells": "Mast cell", - "Mesenchyme": "Mesenchyme", - "Cholangiocytes": "Cholangiocytes", - "Hepatocytes": "Hepatocytes", - "Mesothelia": "Mesothelia", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "ramachandran.h5ad") diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv new file mode 100644 index 000000000..b9bd8d703 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv @@ -0,0 +1,13 @@ +source target +Bcells B cell +Cholangiocytes intrahepatic cholangiocyte +Endotheliaendothelial cell +Hepatocytes hepatocyte +ILCs innate lymphoid cell +MPs mononuclear phagocytes # ToDo this are subclustered in the manuscript, is this annotated in the object? +Mast cells mast cell +Mesenchyme mesenchymal cell +Mesothelia mesothelial cell +Plasma Bcells plasma cell +Tcells T cell +pDCs plasmacytoid dendritic cell diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py similarity index 85% rename from sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index b14dac445..8a2e9e6f1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_pancreas_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -20,12 +20,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2017_smartseq2_enge_001_10.1016/j.cell.2017.09.004" - self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" - self.author = "Quake" + self.author = "Enge" self.doi = "10.1016/j.cell.2017.09.004" self.healthy = True self.normalization = "raw" @@ -34,22 +32,10 @@ def __init__( self.organism = "human" self.state_exact = "healthy" self.year = 2017 - self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" - self.class_maps = { - "0": { - "alpha": "Alpha cell", - "acinar": "Acinar cell", - "ductal": "Ductal cell", - "beta": "Beta cell", - "unsure": "Unknown", - "delta": "Delta cell", - "mesenchymal": "Mesenchymal Cell" - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -84,4 +70,6 @@ def _load(self): adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in adata.obs.index] adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in adata.obs.index] + self.set_unknown_class_id(ids=["unsure"]) + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv new file mode 100644 index 000000000..74cd9094d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv @@ -0,0 +1,7 @@ +source target target_id +acinar pancreatic acinar cell CL:0002064 +alpha pancreatic A cell CL:0000171 +beta type B pancreatic cell CL:0000169 +delta pancreatic D cell CL:0000173 +ductal pancreatic ductal cell CL:0002079 +mesenchymal mesenchymal cell CL:0008019 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 76eaeb544..c776900b3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -11,21 +11,21 @@ SAMPLE_FNS = [ "Bladder_dge.txt.gz", "BoneMarrow1_dge.txt.gz", - # "BoneMarrow2_dge.txt.gz", # ToDo: not annotated, potentially bad quality? - "BoneMarrow3_dge.txt.gz", + # "BoneMarrow2_dge.txt.gz", # not annotated, potentially bad quality + # "BoneMarrow3_dge.txt.gz", # not annotated, potentially bad quality "BoneMarrowcKit1_dge.txt.gz", "BoneMarrowcKit2_dge.txt.gz", "BoneMarrowcKit3_dge.txt.gz", "Brain1_dge.txt.gz", "Brain2_dge.txt.gz", - # "CJ7.EB14.Ezh2.1_dge.txt.gz", # ToDo: sort out meta data for these - # "CJ7.EB14.WT.1_dge.txt.gz", # ToDo: sort out meta data for these - # "CJ7.EB14.WT.2_dge.txt.gz", # ToDo: sort out meta data for these - # "EB.Ezh2_dge.txt.gz", # ToDo: sort out meta data for these - # "EB.WT_dge.txt.gz", # ToDo: sort out meta data for these - "EmbryonicMesenchymeE14.5_dge.txt.gz", - "EmbryonicStemCell.CJ7_Deep_dge.txt.gz", - "EmbryonicStemCells_dge.txt.gz", + # "CJ7.EB14.Ezh2.1_dge.txt.gz", # TODO: sort out meta data for these + # "CJ7.EB14.WT.1_dge.txt.gz", # TODO: sort out meta data for these + # "CJ7.EB14.WT.2_dge.txt.gz", # TODO: sort out meta data for these + # "EB.Ezh2_dge.txt.gz", # TODO: sort out meta data for these + # "EB.WT_dge.txt.gz", # TODO: sort out meta data for these + # "EmbryonicMesenchymeE14.5_dge.txt.gz", # TODO: sort out meta data for these + # "EmbryonicStemCell.CJ7_Deep_dge.txt.gz", # TODO: sort out meta data for these + # "EmbryonicStemCells_dge.txt.gz", # TODO: sort out meta data for these "FetalBrain_dge.txt.gz", "FetalFemaleGonad_dge.txt.gz", "FetalIntestine_dge.txt.gz", @@ -44,30 +44,30 @@ "Lung1_dge.txt.gz", "Lung2_dge.txt.gz", "Lung3_dge.txt.gz", - "MammaryGland.Involution.CD45.1_dge.txt.gz", - "MammaryGland.Involution.CD45.2_dge.txt.gz", - "MammaryGland.Involution1_dge.txt.gz", - "MammaryGland.Involution2_dge.txt.gz", - "MammaryGland.Lactation1_dge.txt.gz", - "MammaryGland.Lactation2_dge.txt.gz", - "MammaryGland.Pregnancy_dge.txt.gz", - "MammaryGland.Virgin.CD45.1_dge.txt.gz", - "MammaryGland.Virgin.CD45.2_dge.txt.gz", + # "MammaryGland.Involution.CD45.1_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Involution.CD45.2_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Involution1_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Involution2_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Lactation1_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Lactation2_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Pregnancy_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Virgin.CD45.1_dge.txt.gz", # TODO not annotated? + # "MammaryGland.Virgin.CD45.2_dge.txt.gz", # TODO not annotated? "MammaryGland.Virgin1_dge.txt.gz", "MammaryGland.Virgin2_dge.txt.gz", "MammaryGland.Virgin3_dge.txt.gz", "MammaryGland.Virgin4_dge.txt.gz", - # "mES.CJ7_dge.txt.gz", # ToDo: sort out meta data for these + # "mES.CJ7_dge.txt.gz", # TODO: sort out meta data for these "MesenchymalStemCells_dge.txt.gz", "MesenchymalStemCellsPrimary_dge.txt.gz", - # "mouse-3T3_dge.txt.gz", # ToDo: sort out meta data for these + # "mouse-3T3_dge.txt.gz", # TODO: sort out meta data for these "Muscle_dge.txt.gz", "NeonatalCalvaria1_dge.txt.gz", "NeonatalCalvaria2_dge.txt.gz", "NeonatalHeart_dge.txt.gz", "NeonatalMuscle1_dge.txt.gz", "NeonatalMuscle2_dge.txt.gz", - "NeonatalPancreas_dge.txt.zip", + # "NeonatalPancreas_dge.txt.zip", # TODO enable zip file here "NeonatalRib1_dge.txt.gz", "NeonatalRib2_dge.txt.gz", "NeonatalRib3_dge.txt.gz", @@ -113,13 +113,8 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__( - sample_fn=sample_fn, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - **kwargs - ) + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) sample_organ_dict = { "Bladder_dge.txt.gz": "urinary bladder", "BoneMarrow1_dge.txt.gz": "bone marrow", @@ -288,8 +283,8 @@ def __init__( "PeripheralBlood4_dge.txt.gz": "adult", "PeripheralBlood5_dge.txt.gz": "adult", "PeripheralBlood6_dge.txt.gz": "adult", - "PlacentaE14.1_dge.txt.gz": "adult", - "PlacentaE14.2_dge.txt.gz": "adult", + "PlacentaE14.1_dge.txt.gz": "fetal", + "PlacentaE14.2_dge.txt.gz": "fetal", "Prostate1_dge.txt.gz": "adult", "Prostate2_dge.txt.gz": "adult", "SmallIntestine.CD45_dge.txt.gz": "adult", @@ -308,13 +303,11 @@ def __init__( } self.organ = sample_organ_dict[self.sample_fn] - self.id = f"mouse_{''.join(self.organ.split(' '))}_2018_microwellseq_han_" \ - f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1016/j.cell.2018.02.001" self.download_url_data = "https://ndownloader.figshare.com/articles/5435866?private_link=865e694ad06d5857db4b" self.download_url_meta = None - self.author = "Guo" + self.author = "Han" self.dev_stage = sample_dev_stage_dict[self.sample_fn] self.doi = "10.1016/j.cell.2018.02.001" self.normalization = "raw" @@ -326,7 +319,17 @@ def __init__( self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Annotation" + # Only adult and neonatal samples are annotated: + self.obs_key_cellontology_original = "Annotation" \ + if sample_dev_stage_dict[self.sample_fn] in ["adult", "neonatal"] and \ + self.sample_fn not in [ + "NeontalBrain1_dge.txt.gz", + "NeontalBrain2_dge.txt.gz", + "SmallIntestine.CD45_dge.txt.gz", + "Thymus2_dge.txt.gz", + ] else None + + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, '5435866.zip') @@ -342,7 +345,17 @@ def _load(self): ) adata = anndata.AnnData(data.T) - adata = adata[np.array([x in celltypes.index for x in adata.obs_names])].copy() - adata.obs = celltypes.loc[adata.obs_names, :] + annotated_cells = np.array([x in celltypes.index for x in adata.obs_names]) + # Subset to annotated cells if any are annotated: + if np.sum(annotated_cells) > 0: + adata = adata[annotated_cells].copy() + adata.obs = celltypes.loc[adata.obs_names, :] + + self.set_unknown_class_id(ids=[ + "Cell in cell cycle(Fetal_Kidney)", "Stomach cell_Gkn2 high(Stomach)", "Stomach cell_Mt2 high(Stomach)", + "Dividing cell(Mammary-Gland-Virgin)", "Dividing cell(Neonatal-Heart)", "Dividing cell(Neonatal-Rib)", + "Dividing cell(Neonatal-Skin)", "Dividing cell(Pancreas)", "Dividing cell(Stomach)", "Dividing cells(Lung)", + "Dividng cell(Neonatal-Calvaria)" + ]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv new file mode 100644 index 000000000..66a69cacd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv @@ -0,0 +1,451 @@ +source target target_id +AT1 Cell(Lung) type I pneumocyte CL:0002062 +AT2 Cell(Lung) type II pneumocyte CL:0002063 +Acinar cell(Pancreas) pancreatic acinar cell CL:0002064 +Adipocyte(Fetal_Kidney) fat cell CL:0000136 +Adipocyte(Neonatal-Skin) fat cell CL:0000136 +Adipocyte (Neonatal-Muscle) fat cell CL:0000136 +Alveolar bipotent progenitor(Lung) pneumocyte CL:0000322 +Alveolar macrophage_Ear2 high(Lung) alveolar macrophage CL:0000583 +Alveolar macrophage_Pclaf high(Lung) alveolar macrophage CL:0000583 +Antral mucous cell (Stomach) mucous cell of stomach CL:0002180 +Astrocyte_Atp1b2 high(Brain) astrocyte CL:0000127 +Astrocyte_Mfe8 high(Brain) astrocyte CL:0000127 +Astrocyte_Pla2g7 high(Brain) astrocyte CL:0000127 +Astroglial cell(Bergman glia)(Brain) Bergmann glial cell CL:0000644 +Atrial cardiomyocyte(Neonatal-Heart) regular atrial cardiac myocyte CL:0002129 +Atrial cardiomyocyte_Acta2 high(Neonatal-Heart) regular atrial cardiac myocyte CL:0002129 +B Cell(Lung) B cell CL:0000236 +B cell(Bone-Marrow_c-kit) B cell CL:0000236 +B cell(Kidney) B cell CL:0000236 +B cell(Neonatal-Muscle) B cell CL:0000236 +B cell(Neonatal-Rib) B cell CL:0000236 +B cell(Pancreas) B cell CL:0000236 +B cell(Thymus) B cell CL:0000236 +B cell(Uterus) B cell CL:0000236 +B cell_Cd79a&Fcer2a high(Mammary-Gland-Virgin) B cell CL:0000236 +B cell_Cd79a&Iglc2 high(Mammary-Gland-Virgin) B cell CL:0000236 +B cell_Fcmr high(Liver) B cell CL:0000236 +B cell_Igha high(Peripheral_Blood) B cell CL:0000236 +B cell_Ighd high(Small-Intestine) B cell CL:0000236 +B cell_Igkc high(Bone-Marrow) B cell CL:0000236 +B cell_Igkv12-46 high(Small-Intestine) B cell CL:0000236 +B cell_Jchain high(Liver) B cell CL:0000236 +B cell_Jchain high(Mammary-Gland-Virgin) B cell CL:0000236 +B cell_Jchain high(Muscle) B cell CL:0000236 +B cell_Jchain high(Small-Intestine) B cell CL:0000236 +B cell_Ly6d high(Peripheral_Blood) B cell CL:0000236 +B cell_Ms4a1 high(Small-Intestine) B cell CL:0000236 +B cell_Rps27rt high(Peripheral_Blood) B cell CL:0000236 +B cell_Vpreb3 high(Muscle) B cell CL:0000236 +B cell_Vpreb3 high(Peripheral_Blood) B cell CL:0000236 +Basal epithelial cell(Bladder) bladder urothelial cell CL:1001428 +Basophil(Bone-Marrow_c-kit) basophil CL:0000767 +Basophil(Lung) basophil CL:0000767 +Basophil_Prss34 high(Peripheral_Blood) basophil CL:0000767 +Brown adipose tissue(Neonatal-Muscle) brown fat cell CL:0000449 +Brown adipose tissue_Cidea high(Neonatal-Skin) brown fat cell CL:0000449 +Brown adipose tissue_Cox8b high(Neonatal-Skin) brown fat cell CL:0000449 +Brown adipose tissue_mt-Nd5 high(Neonatal-Skin) brown fat cell CL:0000449 +Cardiac muscle cell(Neonatal-Heart) cardiac muscle cell CL:0000746 +Cartilage cell_Clu high(Neonatal-Rib) chondrocyte CL:0000138 +Cartilage cell_Col2a1 high(Neonatal-Rib) chondrocyte CL:0000138 +Cartilage cell_Cxcl14 high(Neonatal-Rib) chondrocyte CL:0000138 +Cartilage cell_Ppa1 high(Neonatal-Rib) chondrocyte CL:0000138 +Cartilage cell_Prg4 high(Neonatal-Rib) chondrocyte CL:0000138 +Chondrocyte(Neonatal-Muscle) chondrocyte CL:0000138 +Ciliated cell(Lung) ciliated cell CL:0000064 +Clara Cell(Lung) club cell CL:0000158 +Columnar epithelium(Small-Intestine) epithelial cell CL:0000066 +Conventional dendritic cell_Gngt2 high(Lung) conventional dendritic cell CL:0000990 +Conventional dendritic cell_H2-M2 high(Lung) conventional dendritic cell CL:0000990 +Conventional dendritic cell_Mgl2 high(Lung) conventional dendritic cell CL:0000990 +Conventional dendritic cell_Tubb5 high(Lung) conventional dendritic cell CL:0000990 +Cumulus cell_Car14 high(Ovary) cumulus cell CL:0000711 +Cumulus cell_Nupr1 high(Ovary) cumulus cell CL:0000711 +Cumulus cell_Ube2c high(Ovary) cumulus cell CL:0000711 +DPT cell(Thymus) double-positive, alpha-beta thymocyte CL:0000809 +Dendritic cell(Muscle) dendritic cell CL:0000451 +Dendritic cell(Neonatal-Heart) dendritic cell CL:0000451 +Dendritic cell(Neonatal-Muscle) dendritic cell CL:0000451 +Dendritic cell(Prostate) dendritic cell CL:0000451 +Dendritic cell(Stomach) dendritic cell CL:0000451 +Dendritic cell(Uterus) dendritic cell CL:0000451 +Dendritic cell_Ccr7 high(Kidney) dendritic cell CL:0000451 +Dendritic cell_Cd74 high(Bladder) dendritic cell CL:0000451 +Dendritic cell_Cst3 high(Kidney) dendritic cell CL:0000451 +Dendritic cell_Cst3 high(Liver) dendritic cell CL:0000451 +Dendritic cell_Cst3 high(Mammary-Gland-Virgin) dendritic cell CL:0000451 +Dendritic cell_Fscn1 high(Mammary-Gland-Virgin) dendritic cell CL:0000451 +Dendritic cell_H2-Eb1 high(Bone-Marrow) dendritic cell CL:0000451 +Dendritic cell_Lyz2 high(Bladder) dendritic cell CL:0000451 +Dendritic cell_Naaa high(Lung) dendritic cell CL:0000451 +Dendritic cell_S100a4 high(Spleen) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Bone-Marrow) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Liver) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Mammary-Gland-Virgin) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Peripheral_Blood) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Small-Intestine) dendritic cell CL:0000451 +Dendritic cell_Siglech high(Spleen) dendritic cell CL:0000451 +Dendrtic cell(Pancreas) dendritic cell CL:0000451 +Dendrtic cell_Cst3 high(Small-Intestine) dendritic cell CL:0000451 +Distal collecting duct principal cell_Cldn4 high(Kidney) kidney collecting duct principal cell CL:1001431 +Distal collecting duct principal cell_Hsd11b2 high(Kidney) kidney collecting duct principal cell CL:1001431 +Distal convoluted tubule_Pvalb high(Kidney) kidney distal convoluted tubule epithelial cell CL:1000849 +Distal convoluted tubule_S100g high(Kidney) kidney distal convoluted tubule epithelial cell CL:1000849 +Dividing T cells(Lung) T cell CL:0000084 +Dividing dendritic cells(Lung) dendritic cell CL:0000451 +Ductal cell(Pancreas) pancreatic ductal cell CL:0002079 +Elongating spermatid(Testis) spermatid CL:0000018 +Endocrine cell(Pancreas) pancreatic endocrine cell CL:0008024 +Endothelial cell(Kidney) endothelial cell CL:0000115 +Endothelial cell(Liver) endothelial cell CL:0000115 +Endothelial cell(Muscle) endothelial cell CL:0000115 +Endothelial cell(Neonatal-Calvaria) endothelial cell CL:0000115 +Endothelial cell(Neonatal-Muscle) endothelial cell CL:0000115 +Endothelial cell(Neonatal-Rib) endothelial cell CL:0000115 +Endothelial cell(Neonatal-Skin) endothelial cell CL:0000115 +Endothelial cell_Cldn5 high(Uterus) endothelial cell CL:0000115 +Endothelial cell_Eln high(Neonatal-Heart) endothelial cell CL:0000115 +Endothelial cell_Enpp2 high(Neonatal-Heart) endothelial cell CL:0000115 +Endothelial cell_Fabp4 high(Pancreas) endothelial cell CL:0000115 +Endothelial cell_Igfbp5 high(Neonatal-Heart) endothelial cell CL:0000115 +Endothelial cell_Kdr high(Lung) endothelial cell CL:0000115 +Endothelial cell_Lrg1 high(Pancreas) endothelial cell CL:0000115 +Endothelial cell_Ly6c1 high(Bladder) endothelial cell CL:0000115 +Endothelial cell_Tm4sf1 high(Pancreas) endothelial cell CL:0000115 +Endothelial cell_Tm4sf1 high(Uterus) endothelial cell CL:0000115 +Endothelial cell_Tmem100 high(Lung) endothelial cell CL:0000115 +Endothelial cells_Vwf high(Lung) endothelial cell CL:0000115 +Eosinophil granulocyte(Lung) eosinophil CL:0000771 +Eosinophil progenitor cell(Bone-Marrow_c-kit) eosinophil progenitor cell CL:0000611 +Epithelia cell_Spp1 high(Liver) epithelial cell CL:0000066 +Epithelial cell(Liver) epithelial cell CL:0000066 +Epithelial cell(Neonatal-Heart) epithelial cell CL:0000066 +Epithelial cell(Neonatal-Muscle) epithelial cell CL:0000066 +Epithelial cell(Neonatal-Skin) epithelial cell CL:0000066 +Epithelial cell(Prostate) epithelial cell of prostatic duct CL:0002232 +Epithelial cell_Cryab high(Kidney) kidney epithelial cell CL:0002518 +Epithelial cell_Gkn3 high(Stomach) epithelial cell CL:0000066 +Epithelial cell_Gm23935 high(Bladder) bladder urothelial cell CL:1001428 +Epithelial cell_Kcne3 high(Small-Intestine) epithelial cell CL:0000066 +Epithelial cell_Krt20 high(Stomach) epithelial cell CL:0000066 +Epithelial cell_Pla2g1b high(Stomach) epithelial cell CL:0000066 +Epithelial cell_Sh2d6 high(Small-Intestine) epithelial cell CL:0000066 +Epithelial cell_Upk3a high(Bladder) bladder urothelial cell CL:1001428 +Epithelium of small intestinal villi_Fabp1 high(Small-Intestine) epithelial cell of small intestine CL:0002254 +Epithelium of small intestinal villi_Fabp6 high(Small-Intestine) epithelial cell of small intestine CL:0002254 +Epithelium of small intestinal villi_Gm23935 high(Small-Intestine) epithelial cell of small intestine CL:0002254 +Epithelium of small intestinal villi_S100g high(Small-Intestine) epithelial cell of small intestine CL:0002254 +Epithelium of small intestinal villi_mt-Nd1 high(Small-Intestine) epithelial cell of small intestine CL:0002254 +Erythroblast(Bone-Marrow) erythroblast CL:0000765 +Erythroblast(Neonatal-Calvaria) erythroblast CL:0000765 +Erythroblast(Neonatal-Skin) erythroblast CL:0000765 +Erythroblast(Small-Intestine) erythroblast CL:0000765 +Erythroblast(Spleen) erythroblast CL:0000765 +Erythroblast_Car1 high(Muscle) erythroblast CL:0000765 +Erythroblast_Car2 high(Muscle) erythroblast CL:0000765 +Erythroblast_Car2 high(Neonatal-Muscle) erythroblast CL:0000765 +Erythroblast_Car2 high(Peripheral_Blood) erythroblast CL:0000765 +Erythroblast_Hba-a1 high(Neonatal-Rib) erythroblast CL:0000765 +Erythroblast_Hba-a2 high(Peripheral_Blood) erythroblast CL:0000765 +Erythroblast_Hbb-bs high(Liver) erythroblast CL:0000765 +Erythroblast_Hbb-bs high(Neonatal-Muscle) erythroblast CL:0000765 +Erythroblast_Hbb-bs high(Testis) erythroblast CL:0000765 +Erythroblast_Hbb-bt high(Liver) erythroblast CL:0000765 +Erythroblast_Hbb-bt high(Pancreas) erythroblast CL:0000765 +Erythroblast_Igkc high(Pancreas) erythroblast CL:0000765 +Erythroblast_Ttr high(Neonatal-Rib) erythroblast CL:0000765 +Erythrocyte progenitor_Car1 high(Bone-Marrow_c-kit) megakaryocyte-erythroid progenitor cell CL:0000050 +Erythrocyte progenitor_Hba-a1 high(Bone-Marrow_c-kit) megakaryocyte-erythroid progenitor cell CL:0000050 +Fenestrated endothelial cell_Plvap high(Kidney) glomerular endothelial cell CL:0002188 +Fenestrated endothelial cell_Tm4sf1 high(Kidney) glomerular endothelial cell CL:0002188 +G cell(Stomach) type G enteroendocrine cell CL:0000508 +Gastric mucosal cell(Stomach) mucous cell of stomach CL:0002180 +Glandular epithelium(Prostate) endocrine-paracrine cell of prostate gland CL:0002313 +Glandular epithelium_Ltf high(Uterus) uterine cervix glandular cell CL:1001587 +Glandular epithelium_Sprr2f high(Uterus) uterine cervix glandular cell CL:1001587 +Glial cell(Neonatal-Muscle) glial cell CL:0000125 +Glial cell(Pancreas) glial cell CL:0000125 +Glomerular epithelial cell_Aldh1a2 high(Fetal_Kidney) kidney glomerular epithelial cell CL:1000510 +Granulocyte monocyte progenitor cell(Muscle) granulocyte monocyte progenitor cell CL:0000557 +Granulocyte monocyte progenitor cell(Neonatal-Calvaria) granulocyte monocyte progenitor cell CL:0000557 +Granulocyte(Liver) granulocyte CL:0000094 +Granulocyte(Neonatal-Rib) granulocyte CL:0000094 +Granulocyte(Pancreas) granulocyte CL:0000094 +Granulocyte(Spleen) granulocyte CL:0000094 +Granulocyte(Uterus) granulocyte CL:0000094 +Granulocyte_Il33 high(Brain) granulocyte CL:0000094 +Granulocyte_Ngp high(Brain) granulocyte CL:0000094 +Granulosa cell_Inhba high(Ovary) granulosa cell CL:0000501 +Granulosa cell_Kctd14 high(Ovary) granulosa cell CL:0000501 +Hematopoietic stem progenitor cell(Bone-Marrow) hematopoietic stem cell CL:0000037 +Hepatocyte_Fabp1 high(Liver) hepatocyte CL:0000182 +Hepatocyte_mt-Nd4 high(Liver) hepatocyte CL:0000182 +Hypothalamic ependymal cell(Brain) ependymal cell CL:0000065 +Ig−producing B cell(Lung) plasmablast CL:0000980 +Intercalated cells of collecting duct_Aqp6 high(Kidney) renal intercalated cell CL:0005010 +Intercalated cells of collecting duct_Slc26a4 high(Kidney) renal intercalated cell CL:0005010 +Interstitial macrophage(Lung) macrophage CL:0000235 +Keratinocyte(Neonatal-Skin) keratinocyte CL:0000312 +Keratinocyte(Uterus) keratinocyte CL:0000312 +Kuppfer cell(Liver) Kupffer cell CL:0000091 +Large luteal cell(Ovary) large luteal cell CL:0000592 +Left ventricle cardiomyocyte_Myl2 high(Neonatal-Heart) ventricular cardiac muscle cell CL:2000046 +Leydig cell(Testis) Leydig cell CL:0000178 +Luminal cell_Krt19 high (Mammary-Gland-Virgin) mammary alveolar cell CL:0002325 +Luminal progenitor(Mammary-Gland-Virgin) mammary alveolar cell CL:0002325 +Lymphatic vessel endothelial cell(Neonatal-Skin) dermis lymphatic vessel endothelial cell CL:2000011 +Macrophage(Neonatal-Heart) macrophage CL:0000235 +Macrophage(Pancreas) macrophage CL:0000235 +Macrophage(Spleen) macrophage CL:0000235 +Macrophage(Stomach) macrophage CL:0000235 +Macrophage(Uterus) macrophage CL:0000235 +Macrophage_Ace high(Peripheral_Blood) macrophage CL:0000235 +Macrophage_Apoe high(Small-Intestine) macrophage CL:0000235 +Macrophage_C1qc high(Bone-Marrow_c-kit) macrophage CL:0000235 +Macrophage_C1qc high(Mammary-Gland-Virgin) macrophage CL:0000235 +Macrophage_C1qc high(Neonatal-Rib) macrophage CL:0000235 +Macrophage_Ccl4 high (Kidney) macrophage CL:0000235 +Macrophage_Cd74 high(Bone-Marrow_c-kit) macrophage CL:0000235 +Macrophage_Cd74 high(Neonatal-Skin) macrophage CL:0000235 +Macrophage_Chil3 high(Liver) macrophage CL:0000235 +Macrophage_Ctss high(Neonatal-Rib) macrophage CL:0000235 +Macrophage_Cxcl2 high(Small-Intestine) macrophage CL:0000235 +Macrophage_Flt-ps1 high(Peripheral_Blood) macrophage CL:0000235 +Macrophage_G0s2 high(Small-Intestine) macrophage CL:0000235 +Macrophage_Klf2 high(Brain) macrophage CL:0000235 +Macrophage_Ly6c2 high(Pancreas) macrophage CL:0000235 +Macrophage_Lyz1 high(Mammary-Gland-Virgin) macrophage CL:0000235 +Macrophage_Lyz2 high(Brain) macrophage CL:0000235 +Macrophage_Lyz2 high(Kidney) macrophage CL:0000235 +Macrophage_Lyz2 high(Neonatal-Muscle) macrophage CL:0000235 +Macrophage_Lyz2 high(Neonatal-Skin) macrophage CL:0000235 +Macrophage_Lyz2 high(Ovary) macrophage CL:0000235 +Macrophage_Lyz2 high(Testis) macrophage CL:0000235 +Macrophage_Ms4a6c high(Bone-Marrow) macrophage CL:0000235 +Macrophage_Ms4a6c high(Muscle) macrophage CL:0000235 +Macrophage_Pf4 high(Bladder) macrophage CL:0000235 +Macrophage_Pf4 high(Neonatal-Muscle) macrophage CL:0000235 +Macrophage_Pf4 high(Neonatal-Skin) macrophage CL:0000235 +Macrophage_Pf4 high(Peripheral_Blood) macrophage CL:0000235 +Macrophage_Retnla high(Muscle) macrophage CL:0000235 +Macrophage_S100a4 high(Bone-Marrow) macrophage CL:0000235 +Macrophage_S100a4 high(Bone-Marrow_c-kit) macrophage CL:0000235 +Macrophage_S100a4 high(Peripheral_Blood) macrophage CL:0000235 +Marcrophage_Cd74 high(Ovary) macrophage CL:0000235 +Marginal zone B cell(Spleen) marginal zone B cell CL:0000845 +Mast cell(Bone-Marrow) mast cell CL:0000097 +Mast cell(Neonatal-Calvaria) mast cell CL:0000097 +Mast cell(Neonatal-Muscle) mast cell CL:0000097 +Mast cell(Neonatal-Skin) mast cell CL:0000097 +Mast cell(Small-Intestine) mast cell CL:0000097 +Megakaryocyte progenitor cell(Bone-Marrow_c-kit) megakaryocyte progenitor cell CL:0000553 +Melanocyte(Neonatal-Skin) dermal melanocyte CL:0002482 +Mesenchymal cell(Neonatal-Muscle) mesenchymal cell CL:0008019 +Mesenchymal stromal cell(Bladder) mesenchymal cell CL:0008019 +Metanephric mesenchyme(Fetal_Kidney) metanephric mesenchyme stem cell CL:0000324 +Microglia(Brain) microglial cell CL:0000129 +Monocyte progenitor cell(Lung) granulocyte monocyte progenitor cell CL:0000557 +Monocyte progenitor cell_Ctsg high(Bone-Marrow_c-kit) granulocyte monocyte progenitor cell CL:0000557 +Monocyte progenitor cell_Prtn3 high(Bone-Marrow_c-kit) granulocyte monocyte progenitor cell CL:0000557 +Monocyte progenitor(Bone-Marrow_c-kit) granulocyte monocyte progenitor cell CL:0000557 +Monocyte(Spleen) monocyte CL:0000576 +Monocyte(Uterus) monocyte CL:0000576 +Monocyte_Elane high(Peripheral_Blood) monocyte CL:0000576 +Monocyte_F13a1 high(Peripheral_Blood) monocyte CL:0000576 +Monocyte_Mif high(Bone-Marrow) monocyte CL:0000576 +Monocyte_Prtn3 high(Bone-Marrow) monocyte CL:0000576 +Multipotent progenitor_Ctla2a high(Bone-Marrow_c-kit) hematopoietic multipotent progenitor cell CL:0000837 +Muscle cell(Neonatal-Rib) cell of skeletal muscle CL:0000188 +Muscle cell(Stomach) cell of skeletal muscle CL:0000188 +Muscle cell_Acta2 high(Neonatal-Rib) cell of skeletal muscle CL:0000188 +Muscle cell_Actc1 high(Neonatal-Calvaria) cell of skeletal muscle CL:0000188 +Muscle cell_Actc1 high(Neonatal-Muscle) cell of skeletal muscle CL:0000188 +Muscle cell_Actc1 high(Neonatal-Rib) cell of skeletal muscle CL:0000188 +Muscle cell_Actc1 high(Neonatal-Skin) cell of skeletal muscle CL:0000188 +Muscle cell_Junb high(Neonatal-Calvaria) cell of skeletal muscle CL:0000188 +Muscle cell_Lrrc15 high(Neonatal-Skin) cell of skeletal muscle CL:0000188 +Muscle cell_Mgp high(Uterus) myometrial cell CL:0002366 +Muscle cell_Myl9 high(Neonatal-Calvaria) cell of skeletal muscle CL:0000188 +Muscle cell_Myl9 high(Neonatal-Muscle) cell of skeletal muscle CL:0000188 +Muscle cell_Pcp4 high(Uterus) myometrial cell CL:0002366 +Muscle cell_Tnnc1 high(Muscle) cell of skeletal muscle CL:0000188 +Muscle cell_Tnnc2 high(Muscle) cell of skeletal muscle CL:0000188 +Muscle progenitor cell(Muscle) cell of skeletal muscle CL:0000188 +Myelinating oligodendrocyte(Brain) oligodendrocyte CL:0000128 +NK Cell(Lung) mature natural killer cell CL:0000824 +NK cell(Bladder) mature natural killer cell CL:0000824 +NK cell(Mammary-Gland-Virgin) mature natural killer cell CL:0000824 +NK cell(Spleen) mature natural killer cell CL:0000824 +NK cell(Uterus) mature natural killer cell CL:0000824 +NK cell_Gzma high(Peripheral_Blood) mature natural killer cell CL:0000824 +Neuron(Brain) neuron CL:0000540 +Neuron(Neonatal-Calvaria) neuron CL:0000540 +Neuron(Neonatal-Muscle) neuron CL:0000540 +Neuron(Neonatal-Skin) neuron CL:0000540 +Neuron_Mpz high(Neonatal-Rib) neuron CL:0000540 +Neuron_Stmn2 high(Neonatal-Rib) neuron CL:0000540 +Neutrophil granulocyte(Lung) neutrophil CL:0000775 +Neutrophil progenitor(Bone-Marrow) neutrophil progenitor cell CL:0000834 +Neutrophil progenitor_S100a8 high(Kidney) neutrophil progenitor cell CL:0000834 +Neutrophil(Neonatal-Calvaria) neutrophil CL:0000775 +Neutrophil(Neonatal-Rib) neutrophil CL:0000775 +Neutrophil(Neonatal-Skin) neutrophil CL:0000775 +Neutrophil(Spleen) neutrophil CL:0000775 +Neutrophil_Camp high(Muscle) neutrophil CL:0000775 +Neutrophil_Camp high(Peripheral_Blood) neutrophil CL:0000775 +Neutrophil_Cebpe high(Bone-Marrow) neutrophil CL:0000775 +Neutrophil_Elane high(Neonatal-Muscle) neutrophil CL:0000775 +Neutrophil_Elane high(Neonatal-Rib) neutrophil CL:0000775 +Neutrophil_Fcnb high(Bone-Marrow) neutrophil CL:0000775 +Neutrophil_Fcnb high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Hmox1 high(Neonatal-Muscle) neutrophil CL:0000775 +Neutrophil_Ighg1 high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Il1b high(Peripheral_Blood) neutrophil CL:0000775 +Neutrophil_Lcn2 high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Ltf high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Ltf high(Peripheral_Blood) neutrophil CL:0000775 +Neutrophil_Mmp8 high(Bone-Marrow) neutrophil CL:0000775 +Neutrophil_Ngp high(Bone-Marrow) neutrophil CL:0000775 +Neutrophil_Ngp high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Ngp high(Liver) neutrophil CL:0000775 +Neutrophil_Ngp high(Neonatal-Heart) neutrophil CL:0000775 +Neutrophil_Prg2 high(Muscle) neutrophil CL:0000775 +Neutrophil_Retnlg high(Bone-Marrow_c-kit) neutrophil CL:0000775 +Neutrophil_Retnlg high(Muscle) neutrophil CL:0000775 +Neutrophil_Retnlg high(Neonatal-Heart) neutrophil CL:0000775 +Neutrophil_Retnlg high(Peripheral_Blood) neutrophil CL:0000775 +Neutrophil_Stfa3 high(Neonatal-Muscle) neutrophil CL:0000775 +Nuocyte(Lung) group 2 innate lymphoid cell CL:0001069 +Oligodendrocyte precursor cell(Brain) oligodendrocyte precursor cell CL:0002453 +Oligodendrocyte(Neonatal-Rib) oligodendrocyte CL:0000128 +Osteoblast(Neonatal-Rib) osteoblast CL:0000062 +Osteoblast_Dlk1 high(Neonatal-Skin) osteoblast CL:0000062 +Osteoblast_Ppic high(Neonatal-Skin) osteoblast CL:0000062 +Osteoblast (Neonatal-Calvaria) calvarial osteoblast CL:2000058 +Osteoclast(Neonatal-Rib) osteoclast CL:0000092 +Ovarian surface epithelium cell(Ovary) ovarian surface epithelial cell CL:2000064 +Ovarian vascular surface endothelium cell(Ovary) ovarian microvascular endothelial cell CL:2000065 +Pan-GABAergic(Brain) GABAergic neuron CL:0000617 +Paneth cell(Small-Intestine) paneth cell of epithelium of small intestine CL:1000343 +Parietal cell (Stomach) parietal cell CL:0000162 +Pericentral (PC) hepatocytes(Liver) centrilobular region hepatocyte CL:0019029 +Periportal (PP) hepatocyte(Liver) periportal region hepatocyte CL:0019026 +Pit cell_Gm26917 high(Stomach) natural killer cell CL:0000623 +Pit cell_Ifrd1 high(Stomach) natural killer cell CL:0000623 +Plasma cell(Spleen) plasma cell CL:0000786 +Plasmacytoid dendritic cell(Lung) plasmacytoid dendritic cell CL:0000784 +Pre T cell(Thymus) thymocyte CL:0000893 +Pre-Sertoli cell_Cst9 high(Testis) Sertoli cell CL:0000216 +Pre-Sertoli cell_Ctsl high(Testis) Sertoli cell CL:0000216 +Pre-pro B cell(Bone-Marrow) Fraction A pre-pro B cell CL:0002045 +Preleptotene spermatogonia(Testis) primary spermatocyte CL:0000656 +Preosteoblast/Osteoblast/Bone cell/Cartilage cell(Neonatal-Calvaria) non-terminally differentiated cell CL:0000055 +Proliferating thymocyte(Thymus) thymocyte CL:0000893 +Prostate gland cell(Prostate) endocrine-paracrine cell of prostate gland CL:0002313 +Proximal tubule brush border cell(Kidney) epithelial cell of proximal tubule CL:0002306 +Proximal tubule cell_Cyp4a14 high(Kidney) epithelial cell of proximal tubule CL:0002306 +Proximal tubule cell_Osgin1 high(Kidney) epithelial cell of proximal tubule CL:0002306 +S cell_Chgb high(Small-Intestine) type S enteroendocrine cell CL:0002281 +S cell_Gip high(Small-Intestine) type S enteroendocrine cell CL:0002281 +S1 proximal tubule cells(Kidney) epithelial cell of proximal tubule CL:0002306 +S3 proximal tubule cells(Kidney) epithelial cell of proximal tubule CL:0002306 +Schwann cell(Brain) Schwann cell CL:0002573 +Sertoli cell(Testis) Sertoli cell CL:0000216 +Skeletal muscle cell_Myl1 high(Neonatal-Muscle) cell of skeletal muscle CL:0000188 +Skeletal muscle cell_Tnnc2 high(Neonatal-Muscle) cell of skeletal muscle CL:0000188 +Small luteal cell(Ovary) small luteal cell CL:0000590 +Smooth muscle cell(Bladder) smooth muscle cell of bladder CL:0002597 +Smooth muscle cell(Neonatal-Heart) smooth muscle cell CL:0000192 +Smooth muscle cell_Acta2 high(Neonatal-Skin) smooth muscle cell CL:0000192 +Smooth muscle cell_Acta2 high(Pancreas) smooth muscle cell CL:0000192 +Smooth muscle cell_Mylk high(Neonatal-Muscle) cell of skeletal muscle CL:0000188 +Smooth muscle cell_Rgs5 high(Pancreas) smooth muscle cell CL:0000192 +Smooth muscle cell_Rgs5 high(Uterus) uterine smooth muscle cell CL:0002601 +Spermatids_1700016P04Rik high(Testis) spermatid CL:0000018 +Spermatids_Cst13 high(Testis) spermatid CL:0000018 +Spermatids_Hmgb4 high(Testis) spermatid CL:0000018 +Spermatids_Tnp1 high(Testis) spermatid CL:0000018 +Spermatocyte_1700001F09Rik high(Testis) spermatocyte CL:0000017 +Spermatocyte_Cabs1 high(Testis) spermatocyte CL:0000017 +Spermatocyte_Calm2 high(Testis) spermatocyte CL:0000017 +Spermatocyte_Mesp1 high(Testis) spermatocyte CL:0000017 +Spermatocyte_Slc2a3 high(Testis) spermatocyte CL:0000017 +Spermatogonia_1700001P01Rik high(Testis) primary spermatocyte CL:0000656 +Spermatogonia_Tbc1d23 high(Testis) primary spermatocyte CL:0000656 +Stem and progenitor cell(Mammary-Gland-Virgin) stem cell CL:0000034 +Stomach cell_Muc5ac high(Stomach) mucous cell of stomach CL:0002180 +Stroma cell (Ovary) stromal cell of ovary CL:0002132 +Stromal cell(Liver) stromal cell CL:0000499 +Stromal cell(Muscle) stromal cell CL:0000499 +Stromal cell(Prostate) prostate stromal cell CL:0002622 +Stromal cell_Acta1 high(Neonatal-Rib) stromal cell CL:0000499 +Stromal cell_Acta2 high(Lung) stromal cell CL:0000499 +Stromal cell_Adamdec1 high(Small-Intestine) stromal cell CL:0000499 +Stromal cell_Akr1c18 high(Neonatal-Skin) stromal cell CL:0000499 +Stromal cell_Ankrd1 high(Kidney) stromal cell CL:0000499 +Stromal cell_Car3 high(Bladder) stromal cell CL:0000499 +Stromal cell_Ccl11 high(Uterus) stromal cell CL:0000499 +Stromal cell_Col1a1 high(Neonatal-Calvaria) stromal cell CL:0000499 +Stromal cell_Col3a1 high(Mammary-Gland-Virgin) stromal cell CL:0000499 +Stromal cell_Col3a1 high(Neonatal-Calvaria) stromal cell CL:0000499 +Stromal cell_Col3a1 high(Neonatal-Heart) stromal cell CL:0000499 +Stromal cell_Cxcl10 high(Kidney) stromal cell CL:0000499 +Stromal cell_Cxcl14 high(Uterus) stromal cell CL:0000499 +Stromal cell_Dcn high(Kidney) stromal cell CL:0000499 +Stromal cell_Dcn high(Lung) stromal cell CL:0000499 +Stromal cell_Dcn high(Small-Intestine) stromal cell CL:0000499 +Stromal cell_Dpt high(Bladder) stromal cell CL:0000499 +Stromal cell_Fmod high(Neonatal-Heart) stromal cell CL:0000499 +Stromal cell_Fn1 high(Pancreas) stromal cell of pancreas CL:0002574 +Stromal cell_Gas6 high(Neonatal-Skin) stromal cell CL:0000499 +Stromal cell_Gm23935 high(Uterus) stromal cell CL:0000499 +Stromal cell_Has1 high(Uterus) stromal cell CL:0000499 +Stromal cell_Hsd11b2 high(Uterus) stromal cell CL:0000499 +Stromal cell_Inmt high(Lung) stromal cell CL:0000499 +Stromal cell_Mfap4 high(Pancreas) stromal cell CL:0000499 +Stromal cell_Mgp high(Fetal_Kidney) stromal cell CL:0000499 +Stromal cell_Mgp high(Kidney) stromal cell CL:0000499 +Stromal cell_Ndn high(Neonatal-Muscle) stromal cell CL:0000499 +Stromal cell_Pi16 high(Mammary-Gland-Virgin) stromal cell CL:0000499 +Stromal cell_Ptgds high(Kidney) stromal cell CL:0000499 +Stromal cell_Smoc2 high(Pancreas) stromal cell of pancreas CL:0002574 +Stromal cell_Spp1 high(Neonatal-Muscle) stromal cell CL:0000499 +Stromal cell_Tnmd high(Neonatal-Rib) stromal cell CL:0000499 +T Cell_Cd8b1 high(Lung) CD8-positive, alpha-beta T cell CL:0000625 +T cell(Kidney) T cell CL:0000084 +T cell(Muscle) T cell CL:0000084 +T cell(Neonatal-Muscle) T cell CL:0000084 +T cell(Pancreas) T cell CL:0000084 +T cell(Prostate) T cell CL:0000084 +T cell(Spleen) T cell CL:0000084 +T cell_Ccl5 high(Small-Intestine) T cell CL:0000084 +T cell_Cd7 high(Small-Intestine) T cell CL:0000084 +T cell_Cd8b1 high(Mammary-Gland-Virgin) CD8-positive, alpha-beta T cell CL:0000625 +T cell_Gm14303 high(Peripheral_Blood) T cell CL:0000084 +T cell_Gzma high(Liver) T cell CL:0000084 +T cell_Icos high(Small-Intestine) T cell CL:0000084 +T cell_Id2 high(Thymus) T cell CL:0000084 +T cell_Ly6c2 high(Mammary-Gland-Virgin) T cell CL:0000084 +T cell_Ms4a4b high(Bone-Marrow) T cell CL:0000084 +T cell_Ms4a4b high(Small-Intestine) T cell CL:0000084 +T cell_Ms4a4b high(Thymus) T cell CL:0000084 +T cell_Trbc2 high(Liver) T cell CL:0000084 +T cell_Trbc2 high(Peripheral_Blood) T cell CL:0000084 +T-cells_Ctla4 high(Mammary-Gland-Virgin) T cell CL:0000084 +Tendon stem/progenitor cell(Neonatal-Muscle) preosteoblast CL:0007010 +Thecal cell(Ovary) theca cell CL:0000503 +Thick ascending limb of the loop of Henle(Kidney) kidney loop of Henle thick ascending limb epithelial cell CL:1001106 +Tuft cell(Stomach) brush cell CL:0002204 +Umbrella cell(Bladder) bladder urothelial cell CL:1001428 +Ureteric epithelium(Kidney) epithelial cell CL:0000066 +Urothelium(Bladder) bladder urothelial cell CL:1001428 +Vascular endothelial cell(Bladder) endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell(Neonatal-Heart) endothelial cell of vascular tree CL:0002139 +Vascular smooth muscle progenitor cell(Bladder) vascular associated smooth muscle cell CL:0000359 +Ventricle cardiomyocyte_Kcnj8 high(Neonatal-Heart) ventricular cardiac muscle cell CL:2000046 +abT cell(Thymus) immature alpha-beta T cell CL:0000790 +gdT cell (Thymus) gamma-delta thymocyte CL:0002405 +luteal cells(Ovary) luteal cell CL:0000175 +β-cell(Pancreas) type B pancreatic cell CL:0000169 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py similarity index 68% rename from sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py index 9bf27bf17..ad7d805ca 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_colon_2019_10x_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py @@ -23,14 +23,13 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"human_colon_2019_10x_kinchen_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_10.1016/j.cell.2018.08.067" - + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) self.download_url_data = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&" \ f"file=GSE114374%5FHuman%5F{sample_fn}%5Fexpression%5Fmatrix%2Etxt%2Egz" self.download_url_meta = f"private,{sample_fn.lower()}_meta_data_stromal_with_donor.txt" - self.author = "Simmons" + self.author = "Kinchen" self.doi = "10.1016/j.cell.2018.08.067" self.normalization = "norm" self.organ = "lamina propria of mucosa of colon" @@ -46,24 +45,7 @@ def __init__( self.obs_key_age = "Age" self.obs_key_sex = "Sex" - self.class_maps = { - "0": { - "Endothelial 1": "Endothelial", - "Endothelial 2": "Endothelial", - "Glial": "Glial cells", - "Myofibroblasts": "Myofibroblasts", - "Pericyte 1": "Pericytes", - "Pericyte 2": "Pericytes", - "Pericytes": "Pericytes", - "Plasma Cells": "Plasma Cells", - "Smooth Muscle": "Smooth Muscle", - "Stromal 1": "Stromal", - "Stromal 2a": "Stromal", - "Stromal 2b": "Stromal", - "Stromal 3": "Stromal", - "Stromal 4": "Stromal", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.tsv new file mode 100644 index 000000000..67c929db1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.tsv @@ -0,0 +1,15 @@ +source target target_id +Endothelial 1 endothelial cell CL:0000115 +Endothelial 2 endothelial cell CL:0000115 +Glial glial cell CL:0000125 +Myofibroblasts myofibroblast cell CL:0000186 +Pericyte 1 pericyte cell CL:0000669 +Pericyte 2 pericyte cell CL:0000669 +Pericytes pericyte cell CL:0000669 +Plasma Cells plasma cell CL:0000786 +Smooth Muscle smooth muscle cell of large intestine CL:1000279 +Stromal 1 stromal cell CL:0000499 +Stromal 2a stromal cell CL:0000499 +Stromal 2b stromal cell CL:0000499 +Stromal 3 stromal cell CL:0000499 +Stromal 4 stromal cell CL:0000499 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py deleted file mode 100644 index 388f37cd8..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colon_2019_10x_smilie_001.py +++ /dev/null @@ -1,88 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - - self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" - self.download_url_meta = None - - self.author = "Regev" - self.doi = "10.1016/j.cell.2019.06.029" - self.healthy = True - self.normalization = "raw" - self.organ = "colonic epithelium" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "Cycling TA": "Cycling TA", - "TA 1": "TA 1", - "TA 2": "TA 2", - "Immature Enterocytes 2": "Immature Enterocytes 2", - "Immature Enterocytes 1": "Immature Enterocytes 1", - "Enterocyte Progenitors": "Enterocyte Progenitors", - "Immature Goblet": "Immature Goblet", - "Enterocytes": "Enterocytes", - "Secretory TA": "Secretory TA", - "Best4+ Enterocytes": "Best4+ Enterocytes", - "CD8+ IELs": "CD8+ IELs", - "Goblet": "Goblet cells", - "Stem": "Stem cells", - "Tuft": "Tuft", - "Follicular": "Follicular", - "Enteroendocrine": "Enteroendocrine cells", - "Plasma": "Plasma Cells", - "CD4+ Memory": "CD4+ Memory", - "CD8+ LP": "CD8+ LP", - "CD69- Mast": "CD69- Mast", - "Macrophages": "Macrophage", - "GC": "Glial cells", - "Cycling B": "B cell cycling", - "CD4+ Activated Fos-hi": "CD4+ T Activated Fos-hi", - "CD4+ Activated Fos-lo": "CD4+ T Activated Fos-lo", - "NKs": "NK", - "Cycling T": "Cycling T", - "M cells": "M cells", - "CD69+ Mast": "CD69+ Mast", - "MT-hi": "MT-hi", - "CD8+ IL17+": "CD8+ IL17+", - "CD4+ PD1+": "CD4+ PD1+", - "DC2": "DC2", - "Treg": "Treg", - "ILCs": "ILC", - "DC1": "DC1", - "WNT2B+ Fos-lo 1": "WNT2B+ Fos-lo 1", - "WNT5B+ 2": "WNT5B+ 2", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, "smillie19_epi.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py new file mode 100644 index 000000000..027400290 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -0,0 +1,44 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" + self.download_url_meta = None + + self.author = "Smilie" + self.doi = "10.1016/j.cell.2019.06.029" + self.healthy = True + self.normalization = "raw" + self.organ = "colonic epithelium" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "CellType" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, "smillie19_epi.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.tsv new file mode 100644 index 000000000..3cc9c58c8 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.tsv @@ -0,0 +1,39 @@ +source target target_id +Best4+ Enterocytes enterocyte CL:0000584 +CD4+ Activated Fos-hi activated CD4-positive, alpha-beta T cell CL:0000896 +CD4+ Activated Fos-lo activated CD4-positive, alpha-beta T cell CL:0000896 +CD4+ Memory CD4-positive, alpha-beta memory T cell CL:0000897 +CD4+ PD1+ CD4-positive, alpha-beta T cell CL:0000624 +CD69+ Mast mast cell CL:0000097 +CD69- Mast mast cell CL:0000097 +CD8+ IELs CD8-alpha-beta-positive, alpha-beta intraepithelial T cell CL:0000796 +CD8+ IL17+ CD8-positive, alpha-beta thymocyte CL:0000811 +CD8+ LP CD8-positive, alpha-beta thymocyte CL:0000811 +Cycling B B cell CL:0000236 +Cycling T T cell CL:0000084 +Cycling TA transit amplifying cell of large intestine CL:0009011 +DC1 dendritic cell CL:0000451 +DC2 dendritic cell CL:0000451 +Enterocyte Progenitors enterocyte CL:0000584 +Enterocytes enterocyte CL:0000584 +Enteroendocrine enteroendocrine cell CL:0000164 +Follicular follicular B cell CL:0000843 +GC goblet cell CL:0000160 +Goblet goblet cell CL:0000160 +ILCs innate lymphoid cell CL:0001065 +Immature Enterocytes 1 enterocyte CL:0000584 +Immature Enterocytes 2 enterocyte CL:0000584 +Immature Goblet goblet cell CL:0000160 +M cells M cell of gut CL:0000682 +MT-hi T cell CL:0000084 +Macrophages macrophage CL:0000235 +NKs natural killer cell CL:0000623 +Plasma plasma cell CL:0000786 +Secretory TA transit amplifying cell of large intestine CL:0009011 +Stem intestinal crypt stem cell CL:0002250 +TA 1 transit amplifying cell of large intestine CL:0009011 +TA 2 transit amplifying cell of large intestine CL:0009011 +Tregs regulatory T cell CL:0000815 +Tuft intestinal tuft cell CL:0019032 +WNT2B+ Fos-lo 1 fibroblast CL:0000057 +WNT5B+ 2 fibroblast CL:0000057 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py similarity index 56% rename from sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index e184ad416..b1248a41c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10x_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -17,12 +17,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_ileum_2019_10x_martin_001_10.1016/j.cell.2019.08.008" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" self.download_url_meta = None - self.author = "Kenigsberg" + self.author = "Martin" self.doi = "10.1016/j.cell.2019.08.008" self.healthy = True self.normalization = "raw" @@ -35,30 +33,7 @@ def __init__( self.var_ensembl_col = "gene_ids" self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "T cells": "T cells", - "Plasma cells": "Plasma Cells", - "B cells": "B cells", - "MNP": "MNP", - "ILC": "ILC", - "Enterocytes": "Enterocytes", - "Fibs": "Fibroblasts", - "CD36+ endothelium": "CD36+ endothelium", - "Progenitors": "Progenitors", - "Goblets": "Goblet cells", - "Glial cells": "Glial cells", - "Cycling": "Cycling", - "ACKR1+ endothelium": "ACKR1+ endothelium", - "Pericytes": "Pericytes", - "Lymphatics": "Lymphatics", - "Mast cells": "Mast cells", - "SM": "Smooth muscle cell", - "TA": "TA", - "Paneth cells": "Paneth cells", - "Enteroendocrines": "Enteroendocrine cells", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "martin19.processed.h5ad") @@ -66,5 +41,6 @@ def _load(self): adata.X = np.expm1(adata.X) adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) adata = adata[adata.obs["CellType"] != "Doublets"].copy() + self.set_unknown_class_id(ids=["Cycling"]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv new file mode 100644 index 000000000..9ca20f7a7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv @@ -0,0 +1,20 @@ +source target target_id +ACKR1+ endothelium endothelial cell CL:0000115 +B cells B cell CL:0000236 +CD36+ endothelium endothelial cell CL:0000115 +Enterocytes enterocyte CL:0000584 +Enteroendocrines enteroendocrine cell CL:0000164 +Fibs fibroblast CL:0000057 +Glial cells glial cell CL:0000125 +Goblets ileal goblet cell CL:1000326 +ILC innate lymphoid cell CL:0001065 +Lymphatics endothelial cell of lymphatic vessel CL:0002138 +MNP mononuclear phagocyte CL:0000113 +Mast cells mast cell CL:0000097 +Paneth cells paneth cell CL:0000510 +Pericytes pericyte cell CL:0000669 +Plasma cells plasma cell CL:0000786 +Progenitors intestinal crypt stem cell CL:0002250 +SM smooth muscle cell of small intestine CL:1000275 +T cells T cell CL:0000084 +TA transit amplifying cell of large intestine CL:0009011 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py similarity index 70% rename from sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index 0f982892a..ae6b33ed3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostate_2018_10x_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -8,6 +8,11 @@ class Dataset(DatasetBase): + """ + ToDo: revisit these cell type maps, Club and Hillock are described in this paper. + Club,epithelial cell of prostate + Hillock,epithelial cell of prostate + """ def __init__( self, @@ -17,12 +22,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_prostate_2018_10x_henry_001_10.1016/j.celrep.2018.11.086" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" self.download_url_meta = None - self.author = "Strand" + self.author = "Henry" self.doi = "10.1016/j.celrep.2018.11.086" self.healthy = True self.normalization = "raw" @@ -33,21 +36,9 @@ def __init__( self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "Basal": "Basal cell", - "Hillock": "Hillock", - "Luminal": "Luminal", - "Endothelia": "Endothelial cell", - "Club": "Club", - "Fibroblast": "Fibroblast", - "Smooth muscle": "Smooth muscle cell", - "Leukocytes": "Leukocytes", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "henry18_0.processed.h5ad") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.tsv new file mode 100644 index 000000000..182443a59 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.tsv @@ -0,0 +1,9 @@ +source target target_id +Basal basal cell of prostate epithelium CL:0002341 +Club epithelial cell of prostate CL:0002231 +Endothelia endothelial cell CL:0000115 +Fibroblast fibroblast CL:0000057 +Hillock epithelial cell of prostate CL:0002231 +Leukocytes leukocyte CL:0000738 +Luminal luminal cell of prostate epithelium CL:0002340 +Smooth muscle smooth muscle cell of prostate CL:1000487 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 0837a024f..3fd237a09 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -8,6 +8,9 @@ class Dataset(DatasetBase): + """ + ToDo: revisit gamma cell missing in CO + """ def __init__( self, @@ -17,12 +20,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2016_indrop_baron_001_10.1016/j.cels.2016.08.011" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" self.download_url_meta = None - self.author = "Yanai" + self.author = "Baron" self.doi = "10.1016/j.cels.2016.08.011" self.healthy = True self.normalization = "raw" @@ -33,9 +34,10 @@ def __init__( self.year = 2016 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.set_dataset_id(idx=1) + self.class_maps = { "0": { "t_cell": "T cell", diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.tsv new file mode 100644 index 000000000..0ba8de392 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.tsv @@ -0,0 +1,15 @@ +source target target_id +acinar pancreatic acinar cell CL:0002064 +activated_stellate pancreatic stellate cell CL:0002410 +alpha pancreatic A cell CL:0000171 +beta type B pancreatic cell CL:0000169 +delta pancreatic D cell CL:0000173 +ductal pancreatic ductal cell CL:0002079 +endothelial endothelial cell CL:0000115 +epsilon pancreatic epsilon cell CL:0005019 +gamma pancreatic endocrine cell CL:0008024 +macrophage macrophage CL:0000235 +mast mast cell CL:0000097 +quiescent_stellate pancreatic stellate cell CL:0002410 +schwann Schwann cell CL:0002573 +t_cell T cell CL:0000084 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index 9ee4974b9..110f13997 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -7,6 +7,9 @@ class Dataset(DatasetBase): + """ + ToDo: revisit gamma cell missing in CO + """ def __init__( self, @@ -16,12 +19,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_pancreas_2016_smartseq2_segerstolpe_001_10.1016/j.cmet.2016.08.020" - self.download_url_data = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" self.download_url_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" - self.author = "Sandberg" + self.author = "Segerstolpe" self.doi = "10.1016/j.cmet.2016.08.020" self.normalization = "raw" self.organ = "pancreas" @@ -30,31 +31,12 @@ def __init__( self.year = 2016 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Characteristics[cell type]" self.obs_key_state_exact = "Characteristics[disease]" self.obs_key_healthy = self.obs_key_state_exact - self.healthy_state_healthy = "normal" - self.class_maps = { - "0": { - "alpha cell": "Alpha cell", - "ductal cell": "Ductal cell", - "beta cell": "Beta cell", - "gamma cell": "Gamma cell", - "acinar cell": "Acinar cell", - "delta cell": "Delta cell", - "PSC cell": "PSC cell", - "unclassified endocrine cell": "Unclassified endocrine cell", - "co-expression cell": "Co-expression cell", - "endothelial cell": "Endothelial cell", - "epsilon cell": "Epsilon cell", - "mast cell": "Mast cell", - "MHC class II cell": "MHC class II cell", - "unclassified cell": "Unknown", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -69,5 +51,6 @@ def _load(self): adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[adata.obs.index] # filter observations which are not cells (empty wells, low quality cells etc.) adata = adata[adata.obs["Characteristics[cell type]"] != "not applicable"].copy() + self.set_unknown_class_id(ids=["unclassified cell", "MHC class II cell"]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv new file mode 100644 index 000000000..48da67f57 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv @@ -0,0 +1,13 @@ +source target target_id +PSC cell pancreatic stellate cell CL:0002410 +acinar cell pancreatic acinar cell CL:0002064 +alpha cell pancreatic A cell CL:0000171 +beta cell type B pancreatic cell CL:0000169 +co-expression cell pancreatic endocrine cell CL:0008024 +delta cell pancreatic D cell CL:0000173 +ductal cell pancreatic ductal cell CL:0002079 +endothelial cell endothelial cell CL:0000115 +epsilon cell pancreatic epsilon cell CL:0005019 +gamma cell pancreatic endocrine cell CL:0008024 +mast cell mast cell CL:0000097 +unclassified endocrine cell pancreatic endocrine cell CL:0008024 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv deleted file mode 100644 index 27e4ae739..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.csv +++ /dev/null @@ -1,14 +0,0 @@ -source,target -acinar,pancreatic acinar cell -alpha,pancreatic A cell -beta,type B pancreatic cell -delta,pancreatic D cell -ductal,pancreatic ductal cell -endothelial,endothelial cell -erythroblast,erythroblast -fibroblast,fibroblast -leukocyte,leukocyte -lymphatic endothelial cell,endothelial cell of lymphatic vessel -pp,pancreatic PP cell -smooth_muscle,smooth muscle cell -stellate cell,pancreatic stellate cell diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py similarity index 89% rename from sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py rename to sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 18fae8841..d37a9ee58 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10x_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -29,14 +29,12 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"mouse_pancreas_2019_10x_thompson_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_" \ - f"10.1016/j.cmet.2019.01.021" - + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117770/suppl/GSE117770_RAW.tar" self.download_url_meta = f"private,{self.sample_fn}_annotation.csv" - self.author = "Bhushan" + self.author = "Thompson" self.doi = "10.1016/j.cmet.2019.01.021" self.healthy = False self.normalization = "raw" @@ -47,9 +45,10 @@ def __init__( self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltypes" + self.set_dataset_id(idx=1) + def _load(self): with tarfile.open(os.path.join(self.data_dir, 'GSE117770_RAW.tar')) as tar: for member in tar.getmembers(): diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.tsv new file mode 100644 index 000000000..c519ca087 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.tsv @@ -0,0 +1,14 @@ +source target target_id +acinar pancreatic acinar cell CL:0002064 +alpha pancreatic A cell CL:0000171 +beta type B pancreatic cell CL:0000169 +delta pancreatic D cell CL:0000173 +ductal pancreatic ductal cell CL:0002079 +endothelial endothelial cell CL:0000115 +erythroblast erythroblast CL:0000765 +fibroblast fibroblast CL:0000057 +leukocyte leukocyte CL:0000738 +lymphatic endothelial cell endothelial cell of lymphatic vessel CL:0002138 +pp pancreatic PP cell CL:0002275 +smooth_muscle smooth muscle cell CL:0000192 +stellate cell pancreatic stellate cell CL:0002410 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py deleted file mode 100644 index dd39a4205..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10x_miller_001.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_10x_miller_001_10.1016/j.devcel.2020.01.033" - - self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" - self.download_url_meta = None - - self.author = "Spence" - self.doi = "10.1016/j.devcel.2020.01.033" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "Cell_type" - - self.class_maps = { - "0": { - "Airway Smooth Muscle": "Airway smooth muscle", - "Basal cell": "Basal", - "Bud tip adjacent": "Fetal airway progenitors", - "Bud tip progenitor": "Fetal airway progenitors", - "Cartilage": "Cartilage", - "Club-like secretory": "Secretory", - "Endothelial": "1_Endothelial", - "Epithelial": "1_Epithelial", - "Goblet-like secretory": "Secretory", - "Hematopoietic, B Cells": "B cell lineage", - "Hematopoietic, Macrophage": "Macrophages", - "Hematopoietic, Natural Killer Cell": "Innate lymphoid cells", - "Hematopoietic, T Cells": "T cell lineage", - "Immune": "1_Immune", - "Intermediate ciliated": "Multiciliated lineage", - "Mesenchyme RSPO2+": "1_Stroma", - "Mesenchyme SERPINF1-high": "1_Stroma", - "Multiciliated cell": "Multiciliated lineage", - "Multiciliated precursor": "Multiciliated lineage", - "Neuroendocrine": "Rare", - "Pericyte": "Fibroblasts", - "RBC": "Erythrocytes", - "Secretory progenitor": "Secretory", - "Submucosal gland": "Submucosal Secretory", - "Submucosal gland basal": "Submucosal Secretory", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, "miller20.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / 10000) - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py new file mode 100644 index 000000000..c6e3fed8d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -0,0 +1,50 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" + self.download_url_meta = None + + self.author = "Miller" + self.doi = "10.1016/j.devcel.2020.01.033" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "Cell_type" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, "miller20.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / 10000) + self.set_unknown_class_id(ids=[ + "Bud tip adjacent", + "Bud tip progenitor", + "Submucosal gland", + "Submucosal gland basal", + ]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv new file mode 100644 index 000000000..219d954f4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv @@ -0,0 +1,22 @@ +source target target_id +Airway Smooth Muscle bronchial smooth muscle cell CL:0002598 +Basal cell respiratory basal cell CL:0002633 +Cartilage chondrocyte CL:0000138 +Club-like secretory secretory cell CL:0000151 +Endothelial endothelial cell CL:0000115 +Epithelial epithelial cell of lung CL:0000082 +Goblet-like secretory secretory cell CL:0000151 +Hematopoietic B Cells B cell CL:0000236 +Hematopoietic Macrophage macrophage CL:0000235 +Hematopoietic Natural Killer Cell natural killer cell CL:0000623 +Hematopoietic T Cells T cell CL:0000084 +Immune leukocyte CL:0000738 +Intermediate ciliated ciliated cell CL:0000064 +Mesenchyme RSPO2+ mesenchymal cell CL:0008019 +Mesenchyme SERPINF1-high mesenchymal cell CL:0008019 +Multiciliated cell multi-ciliated epithelial cell CL:0005012 +Multiciliated precursor multi-ciliated epithelial cell CL:0005012 +Neuroendocrine neuroendocrine cell CL:0000165 +Pericyte pericyte cell CL:0000669 +RBC erythrocyte CL:0000232 +Secretory progenitor secretory cell CL:0000151 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py similarity index 56% rename from sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index 52840c0d3..be308522f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_DroNcSeq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -17,12 +17,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_brain_2017_DroNcSeq_habib_001_10.1038/nmeth.4407" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" self.download_url_meta = None - self.author = "Regev" + self.author = "Habib" self.doi = "10.1038/nmeth.4407" self.healthy = True self.normalization = "raw" @@ -33,28 +31,9 @@ def __init__( self.year = 2017 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "exPFC1": "Glutamatergic neurons from the PFC 1", - "exPFC2": "Glutamatergic neurons from the PFC 2", - "exDG": "Granule neurons from the hip dentate gyrus region", - "GABA1": "GABAergic interneurons 1", - "GABA2": "GABAergic interneurons 2", - "exCA3": "Pyramidal neurons from the hip CA region 1", - "exCA1": "Pyramidal neurons from the hip CA region 2", - "ODC1": "Oligodendrocytes", - "ASC1": "Astrocytes 1", - "OPC": "Oligodendrocyte precursors", - "ASC2": "Astrocytes 2", - "Unclassified": "Unknown", - "MG": "Microglia", - "NSC": "Neuronal stem cells", - "END": "Endothelial cells", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "habib17.processed.h5ad") @@ -62,4 +41,5 @@ def _load(self): adata.X = np.expm1(adata.X) adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + self.set_unknown_class_id(ids=["Unclassified"]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv new file mode 100644 index 000000000..b15f31bdf --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv @@ -0,0 +1,15 @@ +source target target_id +ASC1 astrocyte CL:0000127 +ASC2 astrocyte CL:0000127 +END endothelial cell CL:0000115 +GABA1 GABAergic interneuron CL:0011005 +GABA2 GABAergic interneuron CL:0011005 +MG microglial cell CL:0000129 +NSC neuronal stem cell CL:0000047 +ODC1 oligodendrocyte CL:0000128 +OPC oligodendrocyte precursor cell CL:0002453 +exCA1 hippocampal pyramidal neuron CL:1001571 +exCA3 hippocampal pyramidal neuron CL:1001571 +exDG dentate gyrus of hippocampal formation granule cell CL:2000089 +exPFC1 primary motor cortex pyramidal cell CL:2000049 +exPFC2 primary motor cortex pyramidal cell CL:2000049 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py similarity index 59% rename from sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index dd9fcc50d..65520ea3d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10x_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -17,12 +17,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_testis_2018_10x_guo_001_10.1038/s41422-018-0099-2" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_url_meta = None - self.author = "Cairns" + self.author = "Guo" self.doi = "10.1038/s41422-018-0099-2" self.healthy = True self.normalization = "raw" @@ -33,24 +31,9 @@ def __init__( self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "Elongated Spermatids": "Elongated Spermatids", - "Leydig cells": "Leydig cells", - "Early Primary Spermatocytes": "Early Primary Spermatocytes", - "Round Spermatids": "Round Spermatids", - "Endothelial cells": "Endothelial cells", - "Macrophages": "Macrophages", - "Myoid cells": "Myoid cells", - "Differentiating Spermatogonia": "Differentiating Spermatogonia", - "Late primary Spermatocytes": "Late primary Spermatocytes", - "Spermatogonial Stem cell": "Spermatogonial Stem cell", - "Sertoli cells": "Sertoli cells", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "guo18_donor.processed.h5ad") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.tsv new file mode 100644 index 000000000..bbb34ee72 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.tsv @@ -0,0 +1,13 @@ +source target target_id +Differentiating Spermatogonia spermatogonium CL:0000020 +Early Primary Spermatocytes primary spermatocyte CL:0000656 +Elongated Spermatids spermatid CL:0000018 +Endothelial cells endothelial cell CL:0000115 +Late primary Spermatocytes primary spermatocyte CL:0000656 +Leydig cells Leydig cell CL:0000178 +Macrophages macrophage CL:0000235 +Myoid cells peritubular myoid cell CL:0002481 +Round Spermatids spermatid CL:0000018 +Sertoli cells Sertoli cell CL:0000216 +Sperm sperm CL:0000019 +Spermatogonial Stem cell male germ line stem cell (sensu Vertebrata) CL:0000089 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py similarity index 57% rename from sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index 93778fdd9..6cace3eb6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_liver_2018_10x_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -16,12 +16,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2018_10x_macparland_001_10.1038/s41467-018-06318-7" - self.download_url_data = "private,GSE115469.csv.gz" self.download_url_meta = "private,GSE115469_labels.txt" - self.author = "McGilvray" + self.author = "MacParland" self.doi = "10.1038/s41467-018-06318-7" self.healthy = True self.normalization = "raw" @@ -32,33 +30,9 @@ def __init__( self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" - self.class_maps = { - "0": { - "1": "Hepatocyte 1", - "2": "Alpha beta T cells", - "3": "Hepatocyte 2", - "4": "Inflammatory macrophages", - "5": "Hepatocyte 3", - "6": "Hepatocyte 4", - "7": "Plasma cells", - "8": "NK cell", - "9": "Gamma delta T cells 1", - "10": "Non inflammatory macrophages", - "11": "Periportal LSECs", - "12": "Central venous LSECs", - "13": "Endothelial cell", - "14": "Hepatocyte 5", - "15": "Hepatocyte 6", - "16": "Mature B cells", - "17": "Cholangiocytes", - "18": "Gamma delta T cells 2", - "19": "Erythroid cells", - "20": "Hepatic stellate cells" - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.tsv new file mode 100644 index 000000000..78ac4a487 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.tsv @@ -0,0 +1,21 @@ +source target target_id +1 hepatocyte CL:0000182 +10 macrophage CL:0000235 +11 endothelial cell of periportal hepatic sinusoid CL:0019021 +12 endothelial cell of pericentral hepatic sinusoid CL:0019022 +13 endothelial cell CL:0000115 +14 hepatocyte CL:0000182 +15 hepatocyte CL:0000182 +16 mature B cell CL:0000785 +17 cholangiocyte CL:1000488 +18 gamma-delta T cell CL:0000798 +19 erythroid lineage cell CL:0000764 +2 alpha-beta T cell CL:0000789 +20 hepatic stellate cell CL:0000632 +3 hepatocyte CL:0000182 +4 inflammatory macrophage CL:0000863 +5 hepatocyte CL:0000182 +6 hepatocyte CL:0000182 +7 plasma cell CL:0000786 +8 natural killer cell CL:0000623 +9 gamma-delta T cell CL:0000798 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 604c5abcf..a364add29 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -16,14 +16,12 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2019_droncseq_lake_001_10.1038/s41467-019-10861-2" - self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" - self.author = "Jain" + self.author = "Lake" self.doi = "10.1038/s41467-019-10861-2" self.healthy = True self.normalization = "raw" @@ -34,40 +32,9 @@ def __init__( self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" - self.class_maps = { - "0": { - "Collecting Duct - Intercalated Cells Type A (cortex)": "Collecting Duct - Intercalated Cells Type A (cortex)", - "Collecting Duct - Intercalated Cells Type A (medulla)": "Collecting Duct - Intercalated Cells Type A (medulla)", - "Collecting Duct - Intercalated Cells Type B": "Collecting Duct - Intercalated Cells Type B", - "Collecting Duct - PCs - Stressed Dissoc Subset": "Collecting Duct - PCs - Stressed Dissoc Subset", - "Collecting Duct - Principal Cells (cortex)": "Collecting Duct - Principal Cells (cortex)", - "Collecting Duct - Principal Cells (medulla)": "Collecting Duct - Principal Cells (medulla)", - "Connecting Tubule": "Connecting tubule", - "Decending Limb": "Decending Limb", - "Distal Convoluted Tubule": "Distal Convoluted Tubule", - "Endothelial Cells (unassigned)": "Endothelial Cells (unassigned)", - "Endothelial Cells - AEA & DVR ": "Endothelial Cells - AEA & DVR", - "Endothelial Cells - AVR": "Endothelial Cells - AVR", - "Endothelial Cells - glomerular capillaries": "Endothelial Cells - glomerular capillaries", - "Epithelial Cells (unassigned)": "Epithelial Cells (unassigned)", - "Immune Cells - Macrophages": "Macrophage", - "Interstitium": "Interstitium", - "Mesangial Cells": "Mesangial Cells", - "Podocytes": "Podocyte", - "Proximal Tubule Epithelial Cells (S1)": "Proximal Tubule Epithelial Cells (S1)", - "Proximal Tubule Epithelial Cells (S2)": "Proximal Tubule Epithelial Cells (S2)", - "Proximal Tubule Epithelial Cells (S3)": "Proximal Tubule Epithelial Cells (S3)", - "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3 )": "Proximal Tubule Epithelial Cells - Fibrinogen+ (S3)", - "Proximal Tubule Epithelial Cells - Stress/Inflam": "Proximal Tubule Epithelial Cells - Stress/Inflam", - "Thick Ascending Limb": "Thick ascending limb of Loop of Henle", - "Thin ascending limb": "Thin ascending limb", - "Unknown - Novel PT CFH+ Subpopulation (S2)": "Unknown - Novel PT CFH+ Subpopulation (S2)", - "Vascular Smooth Muscle Cells and pericytes": "Vascular Smooth Muscle Cells and pericytes", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -78,4 +45,6 @@ def _load(self): annot = pd.read_csv(fn[1], index_col=0, dtype="category") adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in adata.obs.index] + self.set_unknown_class_id(ids=["Unknown"]) + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv new file mode 100644 index 000000000..86af4d6cd --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv @@ -0,0 +1,27 @@ +source target target_id +Collecting Duct - Intercalated Cells Type A (cortex) kidney collecting duct intercalated cell CL:1001432 +Collecting Duct - Intercalated Cells Type A (medulla) kidney collecting duct intercalated cell CL:1001432 +Collecting Duct - Intercalated Cells Type B kidney collecting duct intercalated cell CL:1001432 +Collecting Duct - PCs - Stressed Dissoc Subset kidney collecting duct principal cell CL:1001431 +Collecting Duct - Principal Cells (cortex) kidney collecting duct principal cell CL:1001431 +Collecting Duct - Principal Cells (medulla) kidney collecting duct principal cell CL:1001431 +Connecting Tubule kidney connecting tubule epithelial cell CL:1000768 +Decending Limb kidney loop of Henle descending limb epithelial cell CL:1001021 +Distal Convoluted Tubule kidney distal convoluted tubule epithelial cell CL:1000849 +Endothelial Cells (unassigned) endothelial cell CL:0000115 +Endothelial Cells - AEA & DVR kidney blood vessel cell CL:1000854 +Endothelial Cells - AVR kidney blood vessel cell CL:1000854 +Endothelial Cells - glomerular capillaries glomerular capillary endothelial cell CL:1001005 +Epithelial Cells (unassigned) renal intercalated cell CL:0005010 +Immune Cells - Macrophages macrophage CL:0000235 +Interstitium kidney interstitial cell CL:1000500 +Mesangial Cells mesangial cell CL:0000650 +Podocytes glomerular visceral epithelial cell CL:0000653 +Proximal Tubule Epithelial Cells (S1) epithelial cell of proximal tubule CL:0002306 +Proximal Tubule Epithelial Cells (S2) epithelial cell of proximal tubule CL:0002306 +Proximal Tubule Epithelial Cells (S3) epithelial cell of proximal tubule CL:0002306 +Proximal Tubule Epithelial Cells - Fibrinogen+ (S3) epithelial cell of proximal tubule CL:0002306 +Proximal Tubule Epithelial Cells - Stress/Inflam epithelial cell of proximal tubule CL:0002306 +Thick Ascending Limb kidney loop of Henle thick ascending limb epithelial cell CL:1001106 +Thin ascending limb kidney loop of Henle thin ascending limb epithelial cell CL:1001107 +Vascular Smooth Muscle Cells and pericytes kidney pelvis smooth muscle cell CL:1000702 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py similarity index 51% rename from sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index d4eb2980e..b70005f30 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_mixed_2019_10x_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -37,33 +37,49 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"human_mixed_2019_10x_szabo_{str(SAMPLE_FNS.index(sample_fn)+1).zfill(3)}_10.1038/s41467-019-12464-3" - + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" self.download_url_meta = [ "private,donor1.annotation.txt", "private,donor2.annotation.txt" ] - self.author = "Sims" + self.sample_dict = { + "GSM3589406_PP001swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "healthy"], + "GSM3589407_PP002swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "stimulated"], + "GSM3589408_PP003swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "healthy"], + "GSM3589409_PP004swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "stimulated"], + "GSM3589410_PP005swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "healthy"], + "GSM3589411_PP006swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "stimulated"], + "GSM3589412_PP009swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "healthy"], + "GSM3589413_PP010swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "stimulated"], + "GSM3589414_PP011swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "healthy"], + "GSM3589415_PP012swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "stimulated"], + "GSM3589416_PP013swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "healthy"], + "GSM3589417_PP014swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "stimulated"], + "GSM3589418_PP017swap.filtered.matrix.txt.gz": ["blood", "Donor A", "stimulated"], + "GSM3589419_PP018swap.filtered.matrix.txt.gz": ["blood", "Donor A", "healthy"], + "GSM3589420_PP019swap.filtered.matrix.txt.gz": ["blood", "Donor B", "stimulated"], + "GSM3589421_PP020swap.filtered.matrix.txt.gz": ["blood", "Donor B", "healthy"], + } + + self.author = "Szabo" self.doi = "10.1038/s41467-019-12464-3" - self.healthy = True self.normalization = "raw" + self.organ = self.sample_dict[self.sample_fn][0] self.organism = "human" self.protocol = "10X sequencing" - self.state_exact = "healthy" + self.state_exact = self.sample_dict[self.sample_fn][2] + self.healthy = self.sample_dict[self.sample_fn][2] == "healthy" self.year = 2019 self.var_symbol_col = "Gene" self.var_ensembl_col = "Accession" - self.obs_key_cellontology_original = "cell_ontology_class" self.obs_key_organ = "organ" - self.class_maps = { - "0": {}, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -73,32 +89,15 @@ def _load(self): ] with tarfile.open(fn[0]) as tar: df = pd.read_csv(tar.extractfile(self.sample_fn), compression="gzip", sep="\t") - df.index = [i.split(".")[0] for i in df["Accession"]] - var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) - if df.columns[-1].startswith("Un"): - df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - adata = anndata.AnnData(df.T) - adata.var = var - if "PP001" in self.sample_fn or "PP002" in self.sample_fn: - adata.obs["donor"] = "Donor1" - adata.obs["organ"] = "lung" - elif "PP003" in self.sample_fn or "PP004" in self.sample_fn: - adata.obs["donor"] = "Donor1" - adata.obs["organ"] = "bone marrow" - elif "PP005" in self.sample_fn or "PP006" in self.sample_fn: - adata.obs["donor"] = "Donor1" - adata.obs["organ"] = "lymph Node" - elif "PP009" in self.sample_fn or "PP010" in self.sample_fn: - adata.obs["donor"] = "Donor2" - adata.obs["organ"] = "lung" - elif "PP011" in self.sample_fn or "PP012" in self.sample_fn: - adata.obs["donor"] = "Donor2" - adata.obs["organ"] = "bone marrow" - elif "PP013" in self.sample_fn or "PP014" in self.sample_fn: - adata.obs["donor"] = "Donor2" - adata.obs["organ"] = "lymph Node" - adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index - adata.obs["cell_ontology_class"] = "Unknown" + df.index = [i.split(".")[0] for i in df["Accession"]] + var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) + if df.columns[-1].startswith("Un"): + df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) + adata = anndata.AnnData(df.T) + adata.var = var + adata.obs["donor"] = self.sample_dict[self.sample_fn][1] + adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index + adata.obs["cell_ontology_class"] = "unknown" df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) for i in df1.index: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv new file mode 100644 index 000000000..908ddf806 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv @@ -0,0 +1,21 @@ +source target target_id +1.CD4rest CD4-positive, alpha-beta T cell CL:0000624 +10.CD8EM/TRMact effector memory CD8-positive, alpha-beta T cell CL:0000913 +10.CD8TEMRAact effector memory CD8-positive, alpha-beta T cell CL:0000913 +11.CD8TEMRA effector memory CD8-positive, alpha-beta T cell CL:0000913 +2.CD4act1 CD4-positive, alpha-beta T cell CL:0000624 +2.CD4rest2 CD4-positive, alpha-beta T cell CL:0000624 +3.CD4act1 CD4-positive, alpha-beta T cell CL:0000624 +3.CD4act2 CD4-positive, alpha-beta T cell CL:0000624 +4.CD4act2 CD4-positive, alpha-beta T cell CL:0000624 +4.CD4act3 CD4-positive, alpha-beta T cell CL:0000624 +5.CD4TRMrest CD4-positive, alpha-beta T cell CL:0000624 +5.CD4act3 CD4-positive, alpha-beta T cell CL:0000624 +6.CD4TRMact CD4-positive, alpha-beta T cell CL:0000624 +6.CD4Treg CD4-positive, CD25-positive, alpha-beta regulatory T cell CL:0000792 +7.CD4Treg CD4-positive, CD25-positive, alpha-beta regulatory T cell CL:0000792 +7.CD8EM/TRMrest effector memory CD8-positive, alpha-beta T cell CL:0000913 +8.CD8EM/TRMact effector memory CD8-positive, alpha-beta T cell CL:0000913 +8.CD8EM/TRMrest effector memory CD8-positive, alpha-beta T cell CL:0000913 +9.CD8TEMRArest effector memory CD8-positive, alpha-beta T cell CL:0000913 +9.CD8TRMrest effector memory CD8-positive, alpha-beta T cell CL:0000913 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py similarity index 66% rename from sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 0b2409204..84240f33b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_eye_2019_10x_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -15,12 +15,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_eye_2019_10x_menon_001_10.1038/s41467-019-12780-8" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" self.download_url_meta = None - self.author = "Hafler" + self.author = "Menon" self.doi = "10.1038/s41467-019-12780-8" self.healthy = True self.normalization = "raw" @@ -31,22 +29,9 @@ def __init__( self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "ACs": "Amacrine cell", - "BPs": "BPs", - "Cones": "Retinal cone cell", - "Endo": "Endothelial cell", - "HCs": "Horizontal cells", - "Macroglia": "Macroglia", - "Microglia": "Microglia", - "RGCs": "Retinal ganglion cell", - "Rods": "Rods", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "menon19.processed.h5ad") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.tsv new file mode 100644 index 000000000..11901244d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.tsv @@ -0,0 +1,10 @@ +source target target_id +ACs amacrine cell CL:0000561 +BPs cone retinal bipolar cell CL:0000752 +Cones retinal cone cell CL:0000573 +Endo retinal blood vessel endothelial cell CL:0002585 +HCs retina horizontal cell CL:0000745 +Macroglia macroglial cell CL:0000126 +Microglia microglial cell CL:0000129 +RGCs retinal ganglion cell CL:0000740 +Rods retinal rod cell CL:0000604 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py similarity index 51% rename from sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 38bb27c65..3410fca6d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_10x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -21,17 +21,14 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - protocol = "10x" if self.sample_fn == "E-MTAB-6678.processed" else "smartseq2" - self.id = f"human_placenta_2018_{protocol}_ventotormo_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1038/s41586-018-0698-6" - + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) self.download_url_data = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.1.zip" self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.2.zip" - self.author = "Teichmann" + self.author = "Ventotormo" self.healthy = True self.normalization = "raw" self.organ = "placenta" @@ -43,45 +40,9 @@ def __init__( self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" - self.obs_key_cellontology_original = "annotation" - self.class_maps = { - "0": { - "DC1": "Dendritic Cells 1", - "DC2": "Dendritic Cells 2", - "EVT": "Extravillous Trophoblasts", - "Endo (f)": "Endothelial Cells f", - "Endo (m)": "Endothelial Cells m", - "Endo L": "Endothelial Cells L", - "Epi1": "Epithelial Glandular Cells 1", - "Epi2": "Epithelial Glandular Cells 2", - "Granulocytes": "Granulocytes", - "HB": "Hofbauer Cells", - "ILC3": "ILC3", - "MO": "Monocyte", - "NK CD16+": "NK Cells CD16+", - "NK CD16-": "NK Cells CD16-", - "Plasma": "B cell (Plasmocyte)", - "SCT": "Syncytiotrophoblasts", - "Tcells": "T cell", - "VCT": "Villous Cytotrophoblasts", - "dM1": "Decidual Macrophages 1", - "dM2": "Decidual Macrophages 2", - "dM3": "Decidual Macrophages 3", - "dNK p": "Decidual NK Cells p", - "dNK1": "Decidual NK Cells 1", - "dNK2": "Decidual NK Cells 2", - "dNK3": "Decidual NK Cells 3", - "dP1": "Perivascular Cells 1", - "dP2": "Perivascular Cells 2", - "dS1": "Decidual Stromal Cells 1", - "dS2": "Decidual Stromal Cells 2", - "dS3": "Decidual Stromal Cells 3", - "fFB1": "Fibroblasts 1", - "fFB2": "Fibroblasts 2", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.tsv new file mode 100644 index 000000000..179972c0a --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.tsv @@ -0,0 +1,33 @@ +source target target_id +DC1 dendritic cell CL:0000451 +DC2 dendritic cell CL:0000451 +EVT extravillous trophoblast CL:0008036 +Endo (f) endothelial cell CL:0000115 +Endo (m) endothelial cell CL:0000115 +Endo L endothelial cell CL:0000115 +Epi1 placental epithelial cell CL:0002577 +Epi2 placental epithelial cell CL:0002577 +Granulocytes granulocyte CL:0000094 +HB Hofbauer cell CL:3000001 +ILC3 group 3 innate lymphoid cell CL:0001071 +MO monocyte CL:0000576 +NK CD16+ natural killer cell CL:0000623 +NK CD16- natural killer cell CL:0000623 +Plasma plasma cell CL:0000786 +SCT syncytiotrophoblast cell CL:0000525 +Tcells T cell CL:0000084 +VCT placental villous trophoblast CL:2000060 +dM1 macrophage CL:0000235 +dM2 macrophage CL:0000235 +dM3 macrophage CL:0000235 +dNK p decidual natural killer cell, human CL:0002343 +dNK1 decidual natural killer cell, human CL:0002343 +dNK2 decidual natural killer cell, human CL:0002343 +dNK3 decidual natural killer cell, human CL:0002343 +dP1 smooth muscle cell CL:0000192 +dP2 smooth muscle cell CL:0000192 +dS1 decidual cell CL:2000002 +dS2 decidual cell CL:2000002 +dS3 decidual cell CL:2000002 +fFB1 fibroblast CL:0000057 +fFB2 fibroblast CL:0000057 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py deleted file mode 100644 index cd4d9021e..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_CELseq2_aizarani_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import os -from typing import Union -import pandas as pd - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2019_mCELSeq2_aizarani_001_10.1038/s41586-019-1373-2" - - self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" - self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" - - self.author = "Gruen" - self.doi = "10.1038/s41586-019-1373-2" - self.healthy = True - self.normalization = "raw" - self.organ = "liver" - self.organism = "human" - self.protocol = "CEL-seq2" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - self.class_maps = { - "0": { - "1": "NK, NKT and T cells", - "2": "Kupffer Cell", - "3": "NK, NKT and T cells", - "4": "Cholangiocytes", - "5": "NK, NKT and T cells", - "6": "Kupffer Cell", - "7": "Cholangiocytes", - "8": "B Cell", - "9": "Liver sinusoidal endothelial cells", - "10": "Macrovascular endothelial cells", - "11": "Hepatocyte", - "12": "NK, NKT and T cells", - "13": "Liver sinusoidal endothelial cells", - "14": "Hepatocyte", - "15": "Other endothelial cells", - "16": "Unknown", - "17": "Hepatocyte", - "18": "NK, NKT and T cells", - "19": "Unknown", - "20": "Liver sinusoidal endothelial cells", - "21": "Macrovascular endothelial cells", - "22": "B Cell", - "23": "Kupffer Cell", - "24": "Cholangiocytes", - "25": "Kupffer Cell", - "26": "Other endothelial cells", - "27": "Unknown", - "28": "NK, NKT and T cells", - "29": "Macrovascular endothelial cells", - "30": "Hepatocyte", - "31": "Kupffer Cell", - "32": "Liver sinusoidal endothelial cells", - "33": "Hepatic stellate cells", - "34": "B Cell", - "35": "Other endothelial cells", - "36": "Unknown", - "37": "Unknown", - "38": "B Cell", - "39": "Cholangiocytes" - }, - } - - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.data_dir, "GSE124395_clusterpartition.txt.gz") - ] - adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) - celltype_df = pd.read_csv(fn[1], sep=" ") - adata = adata[[i in celltype_df.index for i in adata.obs.index]].copy() - adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in adata.obs.index] - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py new file mode 100644 index 000000000..dfe3473d7 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -0,0 +1,49 @@ +import anndata +import os +from typing import Union +import pandas as pd + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" + self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" + + self.author = "Aizarani" + self.doi = "10.1038/s41586-019-1373-2" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "CEL-seq2" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "CellType" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = [ + os.path.join(self.data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(self.data_dir, "GSE124395_clusterpartition.txt.gz") + ] + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + celltype_df = pd.read_csv(fn[1], sep=" ") + adata = adata[[i in celltype_df.index for i in adata.obs.index]].copy() + adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in adata.obs.index] + + self.set_unknown_class_id(ids=["16", "19", "27", "36", "37"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv new file mode 100644 index 000000000..883651b69 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv @@ -0,0 +1,35 @@ +source target target_id +1 alpha-beta T cell CL:0000789 +10 endothelial cell CL:0000115 +11 hepatocyte CL:0000182 +12 alpha-beta T cell CL:0000789 +13 endothelial cell of hepatic sinusoid CL:1000398 +14 hepatocyte CL:0000182 +15 endothelial cell CL:0000115 +17 hepatocyte CL:0000182 +18 alpha-beta T cell CL:0000789 +2 Kupffer cell CL:0000091 +20 endothelial cell of hepatic sinusoid CL:1000398 +21 endothelial cell CL:0000115 +22 B cell CL:0000236 +23 Kupffer cell CL:0000091 +24 cholangiocyte CL:1000488 +25 Kupffer cell CL:0000091 +26 endothelial cell CL:0000115 +28 alpha-beta T cell CL:0000789 +29 endothelial cell of vascular tree CL:0002139 +3 alpha-beta T cell CL:0000789 +30 hepatocyte CL:0000182 +31 Kupffer cell CL:0000091 +32 endothelial cell of hepatic sinusoid CL:1000398 +33 hepatic stellate cell CL:0000632 +34 B cell CL:0000236 +35 endothelial cell CL:0000115 +38 B cell CL:0000236 +39 cholangiocyte CL:1000488 +4 cholangiocyte CL:1000488 +5 alpha-beta T cell CL:0000789 +6 Kupffer cell CL:0000091 +7 cholangiocyte CL:1000488 +8 B cell CL:0000236 +9 endothelial cell of hepatic sinusoid CL:1000398 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py deleted file mode 100644 index 57bbdde0d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10x_popescu_001.py +++ /dev/null @@ -1,73 +0,0 @@ -import anndata -import os -from typing import Union - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_liver_2019_10x_popescu_001_10.1038/s41586-019-1652-y" - - self.download_url_data = "private,fetal_liver_alladata_.h5ad" - self.download_url_meta = None - - self.author = "Haniffa" - self.doi = "10.1038/s41586-019-1652-y" - self.healthy = True - self.normalization = "raw" - self.organ = "liver" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "cell.labels" - - self.class_maps = { - "0": { - "B cell": "Mature B cells", - "DC1": "Dendritic cell 1", - "DC2": "Dendritic cell 2", - "DC precursor": "Dendritic cell precursor", - "Early Erythroid": "Early Erythroid", - "Early lymphoid_T lymphocyte": "Early lymphoid T lymphocyte", - "Endothelial cell": "Endothelial cell", - "Fibroblast": "Fibroblast", - "HSC_MPP": "HSC MPP", - "Hepatocyte": "Hepatocyte", - "ILC precursor": "ILC precursor", - "Kupffer Cell": "Kupffer Cell", - "Late Erythroid": "Late Erythroid", - "MEMP": "MEMP", - "Mast cell": "Mast cell", - "Megakaryocyte": "Megakaryocyte", - "Mid Erythroid": "Mid Erythroid", - "Mono-Mac": "Mono Macrophage", - "Monocyte": "Monocyte", - "Monocyte precursor": "Monocyte precursor", - "NK": "NK cell", - "Neutrophil-myeloid progenitor": "Neutrophil myeloid progenitor", - "Pre pro B cell": "Pre pro B cell", - "VCAM1+ EI macrophage": "VCAM1pos EI macrophage", - "pDC precursor": "pDendritic cell precursor", - "pre-B cell": "pre B cell", - "pro-B cell": "pro B cell" - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, "fetal_liver_alladata_.h5ad") - adata = anndata.read(fn) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py new file mode 100644 index 000000000..931081b0c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -0,0 +1,40 @@ +import anndata +import os +from typing import Union + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "private,fetal_liver_alladata_.h5ad" + self.download_url_meta = None + + self.author = "Popescu" + self.doi = "10.1038/s41586-019-1652-y" + self.healthy = True + self.normalization = "raw" + self.organ = "liver" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "cell.labels" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, "fetal_liver_alladata_.h5ad") + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.tsv new file mode 100644 index 000000000..56d02a20e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.tsv @@ -0,0 +1,28 @@ +source target target_id +B cell B cell CL:0000236 +DC precursor common dendritic progenitor CL:0001029 +DC1 dendritic cell CL:0000451 +DC2 dendritic cell CL:0000451 +Early Erythroid erythroid progenitor cell CL:0000038 +Early lymphoid_T lymphocyte early lymphoid progenitor CL:0000936 +Endothelial cell endothelial cell CL:0000115 +Fibroblast fibroblast CL:0000057 +HSC_MPP hematopoietic multipotent progenitor cell CL:0000837 +Hepatocyte hepatocyte CL:0000182 +ILC precursor immature innate lymphoid cell CL:0001082 +Kupffer Cell Kupffer cell CL:0000091 +Late Erythroid erythroid progenitor cell CL:0000038 +MEMP megakaryocyte-erythroid progenitor cell CL:0000050 +Mast cell mast cell CL:0000097 +Megakaryocyte megakaryocyte CL:0000556 +Mid Erythroid erythroid progenitor cell CL:0000038 +Mono-Mac myeloid leukocyte CL:0000766 +Monocyte monocyte CL:0000576 +Monocyte precursor granulocyte monocyte progenitor cell CL:0000557 +NK natural killer cell CL:0000623 +Neutrophil-myeloid progenitor common myeloid progenitor CL:0000049 +Pre pro B cell Fraction A pre-pro B cell CL:0002045 +VCAM1+ EI macrophage macrophage CL:0000235 +pDC precursor plasmacytoid dendritic cell CL:0000784 +pre-B cell precursor B cell CL:0000817 +pro-B cell pro-B cell CL:0000826 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 2b32ed892..fbdfa14b0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -128,13 +128,8 @@ def __init__( **kwargs ): - super().__init__( - sample_id=sample_id, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - **kwargs - ) + super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) sample_organ_dict = { 'AdultAdipose_1': 'adipose tissue of abdominal region', @@ -246,17 +241,15 @@ def __init__( self.download_url_data = "https://ndownloader.figshare.com/files/17727365" self.download_url_meta = [ - "adata", + "https://ndownloader.figshare.com/files/21758835", "https://ndownloader.figshare.com/files/22447898", ] self.obs_key_sample = "sample" self.organ = sample_organ_dict[self.sample_id] - self.id = f"human_{''.join(self.organ.split(' '))}_2020_microwellseq_han_" \ - f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_10.1038/s41586-020-2157-4" - self.author = "Guo" + self.author = "Han" self.doi = "10.1038/s41586-020-2157-4" self.healthy = True self.normalization = "raw" @@ -265,13 +258,14 @@ def __init__( self.state_exact = "healthy" self.year = 2020 - self.obs_key_cellontology_original = "cell_ontology_class" + self.obs_key_cellontology_original = "celltype_specific" self.obs_key_dev_stage = "dev_stage" self.obs_key_sex = "gender" self.obs_key_age = "age" - self.var_symbol_col = "index" + self.set_dataset_id(idx=1) + def _load_full(self): adata = anndata.read(os.path.join(self.data_dir, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix @@ -290,7 +284,7 @@ def _load_full(self): # load celltype labels and harmonise them # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: fig1_anno = pd.read_excel( - os.path.join(self.data_dir_base, "human", self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), + os.path.join(self.data_dir_base, self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), index_col="cellnames", engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables ) @@ -330,4 +324,28 @@ def _load_full(self): "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] + self.set_unknown_class_id( + ids=[ + "0", + "Unknown1", + "Unknown2", + "Intermediated cell", + "MT high", + "MT-gene high cell", + "Proliferating cell", + "Proliferating cell", + "Proliferating cell_C7 high", + "Proliferating cell_CCNB1 high", + "Proliferating cell_FABP5 high", + "Proliferating cell_HMGB2 high", + "Proliferating cell_KIAA0101 high", + "Proliferating cell_KIAA0101_high", + "Proliferating cell_PTTG1 high", + "Proliferating cell_TOP2A high", + "Proliferating cell_UBE2C high", + "Proliferating cell_UBE2C high", + "Proliferating cell_UBE2C_high" + ] + ) + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv new file mode 100644 index 000000000..920b736b1 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv @@ -0,0 +1,618 @@ +source target target_id +AT1 cell type I pneumocyte CL:0002062 +AT1 cell type I pneumocyte CL:0002062 +AT2 cell type II pneumocyte CL:0002063 +Absorptive cell absorptive cell CL:0000212 +Acinar cell_CLPS high acinar cell CL:0000622 +Acinar cell_CPA1 high acinar cell CL:0000622 +Acinar cell_CPA2 high acinar cell CL:0000622 +Acinar cell_REG1B high acinar cell CL:0000622 +Acniar cell_ANXA4 high acinar cell CL:0000622 +Activated T cell T cell CL:0000084 +Actived T cell T cell CL:0000084 +Adenocarcinoma epithelial cell abnormal cell CL:0001061 +Adipocyte _FGR high fat cell CL:0000136 +Adipocyte_SPP1 high fat cell CL:0000136 +Adipose tissue fat cell CL:0000136 +Adrenocortical cell cortical cell of adrenal gland CL:0002097 +Adrenocortical cell_NOV high cortical cell of adrenal gland CL:0002097 +Adrenocortical cell_TPM2 high cortical cell of adrenal gland CL:0002097 +Airway smooth muscle cell smooth muscle cell of trachea CL:0002600 +Alpha cell pancreatic A cell CL:0000171 +Alveolar bipotent/intermediate cell pneumocyte CL:0000322 +Antigen presenting cell professional antigen presenting cell CL:0000145 +Antigen-presenting cell professional antigen presenting cell CL:0000145 +Arterial endothelial cell endothelial cell of artery CL:1000413 +Arterial endothelial cell_GJA5 high endothelial cell of artery CL:1000413 +Artry endothelial cell endothelial cell of artery CL:1000413 +Astrocyte astrocyte CL:0000127 +Astrocyte(Bergmann glia) Bergmann glial cell CL:0000644 +Atrial cardiomyocyte_MT high regular atrial cardiac myocyte CL:0002129 +Atrial cardiomyocyte_MYH6 high regular atrial cardiac myocyte CL:0002129 +Atrial cardiomyocyte_NPPA high regular atrial cardiac myocyte CL:0002129 +B cell B cell CL:0000236 +B cell (Centrocyte) B cell CL:0000236 +B cell (Plasmocyte) plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA/HG high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA/HM high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA/HM_IGK high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA/HM_IGL high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA_IGK high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHA_IGL high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG/HA high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG/HA_IGK high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG3 high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG_IGK high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHG_IGL high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHM/HA_IGK high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHM/HA_IGL high plasma cell CL:0000786 +B cell (Plasmocyte)_IGHM/HG high plasma cell CL:0000786 +B cell (Plasmocyte)_IGLV3-1 high plasma cell CL:0000786 +B cell (centrocyte) B cell CL:0000236 +B cell(Centrocyte) B cell CL:0000236 +B cell(Centrocyte)_IGLC2 high B cell CL:0000236 +B cell(Centrocyte)_IGLL1 high B cell CL:0000236 +B cell(Plasmocyte) plasma cell CL:0000786 +B cell(Plasmocyte)_IGHG1 high plasma cell CL:0000786 +B cell(Plasmocyte)_IGHG4 high plasma cell CL:0000786 +B cell(Plasmocyte)_IGHM high plasma cell CL:0000786 +B cell(Plasmocyte)_IGKC high plasma cell CL:0000786 +B cell(Plasmocyte)_IGLC3 high plasma cell CL:0000786 +B cell(Plasmocyte)_IGLL5 high plasma cell CL:0000786 +B cell(Unknown) B cell CL:0000236 +B cell(plasmocyte)_IGHA1 high plasma cell CL:0000786 +B cell(plasmocyte)_IGHG3 high plasma cell CL:0000786 +B cell_Contamination B cell CL:0000236 +B cell_JCHAIN high B cell CL:0000236 +Basal cell basal cell CL:0000646 +Basal cell basal cell CL:0000646 +Basal cell_KRT6A high basal cell CL:0000646 +Basal/Epithelial cell basal cell CL:0000646 +Basal/epithelial cell basal cell CL:0000646 +Beta cell type B pancreatic cell CL:0000169 +CD4_T cell CD4-positive, alpha-beta T cell CL:0000624 +CD8 T cell CD8-positive, alpha-beta T cell CL:0000625 +CD8+ T cell CD8-positive, alpha-beta T cell CL:0000625 +CD8_T cell CD8-positive, alpha-beta T cell CL:0000625 +Cardiomyocyte cardiac muscle cell CL:0000746 +Cervical Mesothelial cell mesothelial cell CL:0000077 +Cholangiocyte cholangiocyte CL:1000488 +Chondrocyte chondrocyte CL:0000138 +Chondrocyte_CYTL1 high chondrocyte CL:0000138 +Chondrocyte_PANX3 high chondrocyte CL:0000138 +Chromaffin cell chromaffin cell CL:0000166 +Chromaffin cell_SPOCK3 high chromaffin cell CL:0000166 +Chromaffin cell_VIP high chromaffin cell CL:0000166 +Ciliated epithelial cell ciliated epithelial cell CL:0000067 +Ciliated cell ciliated cell CL:0000064 +Club cell club cell CL:0000158 +Club cell_BPIFB1 high club cell CL:0000158 +Club cell_KLK11 high club cell CL:0000158 +Collecting duct cell kidney collecting duct cell CL:1001225 +Collecting duct cell_CRABP1 high kidney collecting duct cell CL:1001225 +Conventional dendritic cell conventional dendritic cell CL:0000990 +Conventional dendritic cell_FECER1A high conventional dendritic cell CL:0000990 +Conventional dendritic cell_IL8 high conventional dendritic cell CL:0000990 +Cytotrophoblast mononuclear cytotrophoblast cell CL:0000523 +Cytotrophoblast_PAGE4 high mononuclear cytotrophoblast cell CL:0000523 +Cytotrophoblast_PEG10 high mononuclear cytotrophoblast cell CL:0000523 +D cell/ X/A cell type D cell of stomach CL:0002267 +Dendritic cell dendritic cell CL:0000451 +Dendritic cell dendritic cell CL:0000451 +Dendritic cell_CPVL high dendritic cell CL:0000451 +Dendritic cell_FCER1A high dendritic cell CL:0000451 +Dendritic cell_HLA-DRA high dendritic cell CL:0000451 +Dendritic cell_LGALS2 high dendritic cell CL:0000451 +Dendritic cell_LYZ high dendritic cell CL:0000451 +Dendritic cell_WDFY4 high dendritic cell CL:0000451 +Dermis fibroblast fibroblast of dermis CL:0002551 +Distal progenitor cell epithelial cell of distal tubule CL:0002305 +Distal tubule cell epithelial cell of distal tubule CL:0002305 +Distal tubule cell_SLC12A3 high epithelial cell of distal tubule CL:0002305 +Distal tubule progenitor cell epithelial cell of distal tubule CL:0002305 +Duct cell pancreatic ductal cell CL:0002079 +Ductal cell pancreatic ductal cell CL:0002079 +ES_ANXA1 high embryonic stem cell CL:0002322 +ES_S100A6 high embryonic stem cell CL:0002322 +ES_TERF1 high embryonic stem cell CL:0002322 +Effector T cell effector T cell CL:0000911 +Endocardial cell endocardial cell CL:0002350 +Endocervix Mesothelial cell epithelial cell of cervix CL:0002535 +Endocrine cell pancreatic endocrine cell CL:0008024 +Endometrial cell stromal cell of endometrium CL:0002255 +Endothelial cell gut endothelial cell CL:0000131 +Endothelial cell endothelial cell CL:0000115 +Endothelial cell (non-professional APC) endothelial cell CL:0000115 +Endothelial cell in EMT endothelial cell CL:0000115 +Endothelial cell_A2M high endothelial cell CL:0000115 +Endothelial cell_ACKR1 high endothelial cell CL:0000115 +Endothelial cell_APC endothelial cell CL:0000115 +Endothelial cell_APLNR high endothelial cell CL:0000115 +Endothelial cell_CCL2 high endothelial cell CL:0000115 +Endothelial cell_CCL21 high endothelial cell CL:0000115 +Endothelial cell_COL15A1 high endothelial cell CL:0000115 +Endothelial cell_Col4A1 high endothelial cell CL:0000115 +Endothelial cell_EMCN high endothelial cell CL:0000115 +Endothelial cell_ESAM high endothelial cell CL:0000115 +Endothelial cell_ESM1 high endothelial cell CL:0000115 +Endothelial cell_FABP4 high endothelial cell CL:0000115 +Endothelial cell_GJA4 high endothelial cell CL:0000115 +Endothelial cell_IGFBP3 high endothelial cell CL:0000115 +Endothelial cell_IGFBP5 high endothelial cell CL:0000115 +Endothelial cell_IL6 high endothelial cell CL:0000115 +Endothelial cell_NEAT1 high endothelial cell CL:0000115 +Endothelial cell_NTS high endothelial cell CL:0000115 +Endothelial cell_PLVAP high endothelial cell CL:0000115 +Endothelial cell_PODXL high endothelial cell CL:0000115 +Endothelial cell_SELE high endothelial cell CL:0000115 +Endothelial cell_SOCS3 high endothelial cell CL:0000115 +Endothelial cell_SPARCL1 high endothelial cell CL:0000115 +Endothelial cell_STC1 high endothelial cell CL:0000115 +Endothelial cell_TM4SF1 high endothelial cell CL:0000115 +Endothelial cell_TMEM100 high endothelial cell CL:0000115 +Endothelial cell_VWF high endothelial cell CL:0000115 +Endothelial progenitor cell endothelial cell CL:0000115 +Enteric glial cell glial cell CL:0000125 +Enteric nerval cell neuron CL:0000540 +Enterocyte enterocyte CL:0000584 +Enterocyte enterocyte CL:0000584 +Enterocyte progenitor enterocyte CL:0000584 +Enterocyte progenitor_APOA4 high enterocyte CL:0000584 +Enterocyte progenitor_OLFM4 high enterocyte CL:0000584 +Enterocyte progenitor_REG1A high enterocyte CL:0000584 +Enterocyte_AGR2 high enterocyte CL:0000584 +Enterocyte_APOA1 high enterocyte CL:0000584 +Enterocyte_APOA4 high enterocyte CL:0000584 +Enterocyte_BEST4 high enterocyte CL:0000584 +Enterocyte_CA1 high enterocyte CL:0000584 +Enterocyte_CA7 high enterocyte CL:0000584 +Enterocyte_MT gene high enterocyte CL:0000584 +Enterocyte_OTOP2 high enterocyte CL:0000584 +Enterocyte_PHGR1 high enterocyte CL:0000584 +Enterocyte_RBP2 high enterocyte CL:0000584 +Enterocyte_RN7SK high enterocyte CL:0000584 +Enterocyte_SELENBP1 high enterocyte CL:0000584 +Enterocyte_SLC26A3 high enterocyte CL:0000584 +Enteroendocrine cell enteroendocrine cell CL:0000164 +Eosinophil eosinophil CL:0000771 +Ependymal cell ependymal cell CL:0000065 +Epithelial cell epithelial cell CL:0000066 +Epithelial cell_CCL21 high epithelial cell CL:0000066 +Epithelial cell_CCNB1 high epithelial cell CL:0000066 +Epithelial cell_CD24 high epithelial cell CL:0000066 +Epithelial cell_CLCA1 high epithelial cell CL:0000066 +Epithelial cell_CYSTM1 high epithelial cell CL:0000066 +Epithelial cell_EDN1 high epithelial cell CL:0000066 +Epithelial cell_FABP1 high epithelial cell CL:0000066 +Epithelial cell_HP high epithelial cell CL:0000066 +Epithelial cell_IGFBP5 high epithelial cell CL:0000066 +Epithelial cell_ITLN high epithelial cell CL:0000066 +Epithelial cell_KRT13 high epithelial cell of esophagus CL:0002252 +Epithelial cell_KRT14 high epithelial cell of esophagus CL:0002252 +Epithelial cell_KRT16 high epithelial cell of esophagus CL:0002252 +Epithelial cell_KRT17 high epithelial cell of esophagus CL:0002252 +Epithelial cell_KRT4 high epithelial cell of esophagus CL:0002252 +Epithelial cell_KRT7 high epithelial cell of esophagus CL:0002252 +Epithelial cell_MMP10 high epithelial cell of lower respiratory tract CL:0002632 +Epithelial cell_MMP7 high epithelial cell of esophagus CL:0002252 +Epithelial cell_MT1G high epithelial cell CL:0000066 +Epithelial cell_MT1L high epithelial cell CL:0000066 +Epithelial cell_NR4A2 high epithelial cell CL:0000066 +Epithelial cell_PLA2G2A high epithelial cell of lung CL:0000082 +Epithelial cell_S100A2 high epithelial cell of lung CL:0000082 +Epithelial cell_SCGB3A1 high epithelial cell CL:0000066 +Epithelial cell_TM4SF4 high epithelial cell CL:0000066 +Epithelial cell_TPPP3 high epithelial cell CL:0000066 +Epithelial progenitor cell epithelial cell CL:0000066 +Epithelial_cell_NUPR1 high epithelial cell CL:0000066 +Epithelial¨Cmesenchymal transition epithelial cell CL:0000066 +Erythroid cell erythroid lineage cell CL:0000764 +"Erythroid cell +" erythroid lineage cell CL:0000764 +Erythroid cell_AHSP high erythroid lineage cell CL:0000764 +Erythroid cell_HBA1 high erythroid lineage cell CL:0000764 +Erythroid cell_HBB high erythroid lineage cell CL:0000764 +Erythroid cell_HBB_high erythroid lineage cell CL:0000764 +Erythroid cell_HBE1 high erythroid lineage cell CL:0000764 +Erythroid cell_HBM high erythroid lineage cell CL:0000764 +Erythroid cell_HBZ high erythroid lineage cell CL:0000764 +Erythroid cell_HBZ_high erythroid lineage cell CL:0000764 +Erythroid cell_PRDX2 high erythroid lineage cell CL:0000764 +Erythroid cell_SLC4A1 high erythroid lineage cell CL:0000764 +Erythroid cell__HBD high erythroid lineage cell CL:0000764 +Erythroid progenitor cell erythroblast CL:0000765 +Erythroid progenitor cell_NPM1 high erythroblast CL:0000765 +Erythroid progenitor cell_REXO2 high erythroblast CL:0000765 +Erythroid/Basophil Progenitor erythroblast CL:0000765 +Excitatory neuron excitatory neuron CL:0008030 +Exocrine cell pancreas exocrine glandular cell CL:1001599 +Exocrine cell_SAA1 high pancreas exocrine glandular cell CL:1001599 +Extravillous trophoblast_AOC1 high extravillous trophoblast CL:0008036 +Extravillous trophoblast_HPGD high extravillous trophoblast CL:0008036 +Extravillous trophoblast_LAIR2 high extravillous trophoblast CL:0008036 +Extravillous trophoblast_MMP12 high extravillous trophoblast CL:0008036 +Extravillous trophoblast_NOTUM high extravillous trophoblast CL:0008036 +Extravillous trophoblast_TIMP3 high extravillous trophoblast CL:0008036 +Fast skeletal muscle cell fast muscle cell CL:0000190 +Fenestrated endothelial cell_EMCN high fenestrated cell CL:0000666 +Fenestrated endothelial cell_SELE high fenestrated cell CL:0000666 +Fetal leydig cell Leydig cell CL:0000178 +Fibroblast fibroblast CL:0000057 +Fibroblast cell_MFAP5 high fibroblast CL:0000057 +Fibroblast_ CXCL12 high fibroblast CL:0000057 +Fibroblast_A2M high fibroblast CL:0000057 +Fibroblast_APOD high fibroblast CL:0000057 +Fibroblast_COL1A1 high fibroblast of choroid plexus CL:0002549 +Fibroblast_COL3A1 high fibroblast CL:0000057 +Fibroblast_COL3A1 high fibroblast CL:0000057 +Fibroblast_COL5A2 high fibroblast of dermis CL:0002551 +Fibroblast_DCN high fibroblast CL:0000057 +Fibroblast_EFEMP1 high fibroblast of dermis CL:0002551 +Fibroblast_FBLN1 high fibroblast CL:0000057 +Fibroblast_FNDC1 high fibroblast CL:0000057 +Fibroblast_HMGB2 high fibroblast CL:0000057 +Fibroblast_IBSP_high fibroblast CL:0000057 +Fibroblast_LUM high fibroblast CL:0000057 +Fibroblast_MFAP4 high fibroblast CL:0000057 +Fibroblast_MFAP5 high fibroblast CL:0000057 +Fibroblast_MGP high fibroblast CL:0000057 +Fibroblast_MGP_high fibroblast CL:0000057 +Fibroblast_PENK high fibroblast CL:0000057 +Fibroblast_POSTN high fibroblast CL:0000057 +Fibroblast_PTX3 high fibroblast CL:0000057 +Fibroblast_SFPR4 high fibroblast of dermis CL:0002551 +Fibroblast_SFRP high fibroblast CL:0000057 +Fibroblast_ZFAND2A high fibroblast CL:0000057 +Follicular dendritic cell follicular dendritic cell CL:0000442 +Ganglion cell neural cell CL:0002319 +Gastric chief cell peptic cell CL:0000155 +Gastric chief cell_CHIA high peptic cell CL:0000155 +Gastric chief cell_MT high peptic cell CL:0000155 +Gastric chief cell_PGA3 high peptic cell CL:0000155 +Gastric mucosa cell mucous cell of stomach CL:0002180 +Germ cell germ cell CL:0000586 +Glial cell glial cell CL:0000125 +Glomerular endothelial cell_AQP1 high glomerular endothelial cell CL:0002188 +Goblet cell goblet cell CL:0000160 +Goblet cell_FCGBP high goblet cell CL:0000160 +Goblet cell_PIGR high goblet cell CL:0000160 +Granulocyte granulocyte CL:0000094 +HSPC hematopoietic stem cell CL:0000037 +Hepatocyte hepatocyte CL:0000182 +Hepatocyte-like cell hepatocyte CL:0000182 +Hepatocyte_FGB high hepatocyte CL:0000182 +Hepatocyte_GSTA1 high hepatocyte CL:0000182 +Hepatocyte_HP high hepatocyte CL:0000182 +Hepatocyte_TF high hepatocyte CL:0000182 +Hofbauer cell Hofbauer cell CL:3000001 +Hypertrophic chondrocyte hypertrophic chondrocyte CL:0000743 +IC-tran-PC kidney cell CL:1000497 +Immature sertoli cell(Pre-Sertoli cell) Sertoli cell CL:0000216 +Immune response stromal cell stromal cell CL:0000499 +Inflammatory cell inflammatory cell CL:0009002 +Inflammatory stromal cell_HSPA1A high inflammatory cell CL:0009002 +Inflammatory stromal cell_MT-RNR2 high inflammatory cell CL:0009002 +Inflammed epithelial cell inflammatory cell CL:0009002 +Inhibitory neuron inhibitory neuron CL:0008029 +Intercalated cell renal intercalated cell CL:0005010 +Intercalated cell_SLC26A4 high renal intercalated cell CL:0005010 +Intercalated cell_SPINK1 high renal intercalated cell CL:0005010 +Intermediate Epithelial cell intermediate epitheliocyte CL:0002209 +Interneuron interneuron CL:0000099 +Interstitial cell_POSTN high kidney interstitial cell CL:1000500 +Interstitial cell_PTN high kidney interstitial cell CL:1000500 +Interstitial progenitor cell kidney interstitial cell CL:1000500 +Intra-adrenal chromoblast pigment cell CL:0000147 +Intra-adrenal ganglion neuron neuron CL:0000540 +Keratinocyte keratinocyte CL:0000312 +Kerationcyte keratinocyte CL:0000312 +Kidney Epithelial cell kidney epithelial cell CL:0002518 +Kupffer cell Kupffer cell CL:0000091 +Kuppfer Cell Kupffer cell CL:0000091 +Kuppfer cell Kupffer cell CL:0000091 +Lens epithelial cell lens epithelial cell CL:0002224 +Loop of Henle (Thick ascending limb) kidney loop of Henle thick ascending limb epithelial cell CL:1001106 +Loop of henle _ANXA1 high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle _KNG1 high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle _UMOD high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle progenitor cell kidney loop of Henle epithelial cell CL:1000909 +Loop of henle_SFN high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle_SLPI high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle_SOD3 high kidney loop of Henle epithelial cell CL:1000909 +Loop of henle_SPP1 high kidney loop of Henle epithelial cell CL:1000909 +Luminal cell luminal epithelial cell of prostatic duct CL:0002237 +Luminal epithelium placental epithelial cell CL:0002577 +Lung mesenchyme cell (cardiopulmonary progenitor) mesenchymal cell CL:0008019 +Lymphatic endothelial cell endothelial cell CL:0000115 +Lymphatic endothelial cell_CCL21 high endothelial cell CL:0000115 +Lymphatic endothelial cell_NTS high endothelial cell CL:0000115 +Lymphocyte lymphocyte CL:0000542 +Lymphoid progenitor cell lymphoid lineage restricted progenitor cell CL:0000838 +M1 Macrophage inflammatory macrophage CL:0000863 +M1 Macrophage_CCL2_high inflammatory macrophage CL:0000863 +M1 Macrophage_CXCL8_high inflammatory macrophage CL:0000863 +M2 Macrophage alternatively activated macrophage CL:0000890 +M2 macrophage_CXCL8 high alternatively activated macrophage CL:0000890 +M2 macrophage_MALAT1 high alternatively activated macrophage CL:0000890 +Macrophage macrophage CL:0000235 +Macrophage_APOC1 high macrophage CL:0000235 +Macrophage_C1QB high macrophage CL:0000235 +Macrophage_CCL20 high macrophage CL:0000235 +Macrophage_CCL3L3 high macrophage CL:0000235 +Macrophage_CD52 high macrophage CL:0000235 +Macrophage_CTSB high macrophage CL:0000235 +Macrophage_CXCL2 high macrophage CL:0000235 +Macrophage_CXCL8 high macrophage CL:0000235 +Macrophage_FABP5 high macrophage CL:0000235 +Macrophage_FCGR3A high macrophage CL:0000235 +Macrophage_GPR183 high macrophage CL:0000235 +Macrophage_HLA-DRA high macrophage CL:0000235 +Macrophage_IL1B high macrophage CL:0000235 +Macrophage_M2 macrophage CL:0000235 +Macrophage_RGS1 high macrophage CL:0000235 +Macrophage_RNASE1 high macrophage CL:0000235 +Macrophage_SDS high macrophage CL:0000235 +Macrophage_SPP1 high macrophage CL:0000235 +Macrophage_TPSB2 high macrophage CL:0000235 +Macrophage_VSIG4 high macrophage CL:0000235 +Mast mast cell CL:0000097 +Mast cell mast cell CL:0000097 +Mast progenitor cell mast cell CL:0000097 +Megakaryocyte megakaryocyte CL:0000556 +Megakaryocyte megakaryocyte CL:0000556 +Megakaryocyte/Erythroid Progenitor megakaryocyte-erythroid progenitor cell CL:0000050 +Megakaryocyte/Erythtoid progenitor cell megakaryocyte-erythroid progenitor cell CL:0000050 +Melanocyte melanocyte CL:0000148 +Melanocyte_MLANA high melanocyte CL:0000148 +Melanocyte_S100B high melanocyte CL:0000148 +Mesangial cell mesangial cell CL:0000650 +Mesenchymal cell mesenchymal cell CL:0008019 +Mesothelial cell mesothelial cell of peritoneum CL:1000490 +Mesothelial cell_CPA3 high mesothelial cell of peritoneum CL:1000490 +Mesothelial cell_ITLN high mesothelial cell of peritoneum CL:1000490 +Mesothelial cell_MT gene high mesothelial cell of peritoneum CL:1000490 +Microglia microglial cell CL:0000129 +Microglia_ALOX5AP high microglial cell CL:0000129 +Microglia_C3 high microglial cell CL:0000129 +Microglia_SPP1_high microglial cell CL:0000129 +Microvascular endothelial cell microvascular endothelial cell CL:2000008 +Monocyte monocyte CL:0000576 +Monocyte/DC progenitor macrophage dendritic cell progenitor CL:0002009 +Monocyte_CXCL8 high monocyte CL:0000576 +Monocyte_CXCR2 high monocyte CL:0000576 +Monocyte_FCGR3A high monocyte CL:0000576 +Monocyte_FTL high monocyte CL:0000576 +Monocyte_IGHG4 high monocyte CL:0000576 +Monocyte_ISG15 high monocyte CL:0000576 +Monocyte_S100A12 high monocyte CL:0000576 +Monocyte_S100A9 high monocyte CL:0000576 +Monocyte_TPPP3 high monocyte CL:0000576 +Motile liver macrophage macrophage CL:0000235 +Motor neuron motor neuron CL:0000100 +Mucosal aquamous Epithelial cell mucous cell of stomach CL:0002180 +Mucous Epithelial cell epithelial cell CL:0000066 +Mucous Epithelial cell_REG1A high epithelial cell CL:0000066 +Mucous Epithelial cell_TFF1 high epithelial cell CL:0000066 +Mucous neck cell mucous neck cell CL:0000651 +Multipotential progenitor cell multi fate stem cell CL:0000048 +Muscle progenitor cell cell of skeletal muscle CL:0000188 +Myelinating oligodendrocyte oligodendrocyte CL:0000128 +Myeloid cell myeloid cell CL:0000763 +Myocyte muscle cell CL:0000187 +Myoepithelial cell myoepithelial cell CL:0000185 +Myofibroblast myofibroblast cell CL:0000186 +Myofibroblast_ELN high myofibroblast cell CL:0000186 +Myofibroblast_POSTN high myofibroblast cell CL:0000186 +Myofibroblast_TAGLN high myofibroblast cell CL:0000186 +Myogenic precursor cell cell of skeletal muscle CL:0000188 +M¨¹ller glia Mueller cell CL:0000636 +NK cell natural killer cell CL:0000623 +Natural killer cell natural killer cell CL:0000623 +Nephrogenic mesenchyme cell nephrogenic mesenchyme stem cell CL:0000383 +Nephrogenic mesenchyme cell_DAPL1 high nephrogenic mesenchyme stem cell CL:0000383 +Neuroendocrine cell neuroendocrine cell CL:0000165 +Neuroendocrine cell_ACPP high neuroendocrine cell CL:0000165 +Neuroendocrine cell_CCL21 high neuroendocrine cell CL:0000165 +Neuroendocrine cell_SST high neuroendocrine cell CL:0000165 +Neuron neuron CL:0000540 +Neuron_GAP43 high neuron CL:0000540 +Neuron_LINC00682 HIGH neuron CL:0000540 +Neuron_NEUROD6 high neuron CL:0000540 +Neuron_PENK high neuron CL:0000540 +Neuron_PLP1 high neuron CL:0000540 +Neuron_PPP1R17 high neuron CL:0000540 +Neuron_RELN high neuron CL:0000540 +Neuron_S100B high neuron CL:0000540 +Neuron_TMEM233 high neuron CL:0000540 +Neuron_XPR1 high neuron CL:0000540 +Neutriophil neutrophil CL:0000775 +Neutrophil neutrophil CL:0000775 +Neutrophil neutrophil CL:0000775 +Neutrophil _S100A8 high neutrophil CL:0000775 +Neutrophil_CAMP high neutrophil CL:0000775 +Neutrophil_CAMP_high neutrophil CL:0000775 +Neutrophil_CD177 high neutrophil CL:0000775 +Neutrophil_CXCL3 high neutrophil CL:0000775 +Neutrophil_DEFA3 high neutrophil CL:0000775 +Neutrophil_DEFA4 high neutrophil CL:0000775 +Neutrophil_DEFA4_high neutrophil CL:0000775 +Neutrophil_ELANE high neutrophil CL:0000775 +Neutrophil_FCGR3B high neutrophil CL:0000775 +Neutrophil_IL1B high neutrophil CL:0000775 +Neutrophil_LCN2 high neutrophil CL:0000775 +Neutrophil_LTF high neutrophil CL:0000775 +Neutrophil_LYZ high neutrophil CL:0000775 +Neutrophil_MMP high neutrophil CL:0000775 +Neutrophil_MMP9 high neutrophil CL:0000775 +Neutrophil_MPO high neutrophil CL:0000775 +Neutrophil_MPO_high neutrophil CL:0000775 +Neutrophil_OLFM4 high neutrophil CL:0000775 +Neutrophil_OLFM42 high neutrophil CL:0000775 +Neutrophil_PRTN3 high neutrophil CL:0000775 +Neutrophil_RNASE2 high neutrophil CL:0000775 +Neutrophil_S100A12 high neutrophil CL:0000775 +Neutrophil_S100A8 high neutrophil CL:0000775 +Neutrophil_S100A9 high neutrophil CL:0000775 +Neutrophil_S100P high neutrophil CL:0000775 +Neutrophil__S100A12 high neutrophil CL:0000775 +Oligodendrocyte oligodendrocyte CL:0000128 +Oligodendrocyte progenitor cell oligodendrocyte CL:0000128 +Oligodendrocyte_MT gene high oligodendrocyte CL:0000128 +Oligodendrocyte_TF high oligodendrocyte CL:0000128 +Osteoblast osteoblast CL:0000062 +Osteoblast_IBSP high osteoblast CL:0000062 +Osteoclast osteoclast CL:0000092 +Osteoclast_ACP5 high osteoclast CL:0000092 +Oviductal epithelial cell epithelial cell CL:0000066 +Paneth cell paneth cell CL:0000510 +Paneth cell_DEFA5 high paneth cell CL:0000510 +Paneth cell_REG3A high paneth cell CL:0000510 +Parietal cell parietal cell CL:0000162 +Parietal cell_ATP4B high parietal cell CL:0000162 +Parietal cell_GIF high parietal cell CL:0000162 +Pericyte pericyte cell CL:0000669 +Pit cell hepatic pit cell CL:2000054 +Pit cell_FOXQ1 high hepatic pit cell CL:2000054 +Pit cell_MUC6 high hepatic pit cell CL:2000054 +Pit cell_TFF2 high hepatic pit cell CL:2000054 +Pit cell_WFDC21P high hepatic pit cell CL:2000054 +Plasmacytoid dendritic cell plasmacytoid dendritic cell, human CL:0001058 +Podocyte glomerular visceral epithelial cell CL:0000653 +Pre B cell precursor B cell CL:0000817 +Primordial germ cell primordial germ cell CL:0000670 +Primordial germ cell primordial germ cell CL:0000670 +Primordial germ cell_GTSF1 high primordial germ cell CL:0000670 +Primordial germ cell_TCL1A high primordial germ cell CL:0000670 +Principle cell renal principal cell CL:0005009 +Proliferating B cell B cell CL:0000236 +Proliferating keratinocyte keratinocyte CL:0000312 +Proliferating B cell B cell CL:0000236 +Proliferating Intra-adrenal ganglion neuron neuron CL:0000540 +Proliferating T cell T cell CL:0000084 +Proliferating alveolar bipotent progenitor cell pneumocyte CL:0000322 +Proliferating endothelial cell endothelial cell CL:0000115 +Proliferating fibroblast fibroblast CL:0000057 +Proliferating lung mesenchyme cell_HIST1H4C high mesenchymal cell CL:0008019 +Proliferating lung mesenchyme cell_UBE2C high mesenchymal cell CL:0008019 +Proliferating mesothelial cell mesothelial cell of pleura CL:1000491 +Proliferating monocyte monocyte CL:0000576 +Proliferating radial glia radial glial cell CL:0000681 +Proliferating smooth muscle cell smooth muscle cell CL:0000192 +Proliferating stromal cell stromal cell CL:0000499 +Proximal progenitor cell epithelial cell of lung CL:0000082 +Proximal progenitor cell_GRP high epithelial cell of lung CL:0000082 +Proximal progenitor cell_SCGB3A2 high epithelial cell of lung CL:0000082 +Proximal tubule cell epithelial cell of proximal tubule CL:0002306 +Proximal tubule cell_ALDOB high epithelial cell of proximal tubule CL:0002306 +Proximal tubule cell_MT1G high epithelial cell of proximal tubule CL:0002306 +Proximal tubule cell_SOX4 high epithelial cell of proximal tubule CL:0002306 +Proximal tubule progenitor cell epithelial cell of proximal tubule CL:0002306 +Purkinje cell Purkinje cell CL:0000121 +Radial glia radial glial cell CL:0000681 +Radial glia_HES1 high radial glial cell CL:0000681 +Radial glia_HES1_high radial glial cell CL:0000681 +Retinal pigment epithelal cell retinal pigment epithelial cell CL:0002586 +Retinal pigment epithelal cell_MLANA high retinal pigment epithelial cell CL:0002586 +Retinal progenitor cell_ATP1A2 high retinal progenitor cell CL:0002672 +Retinal progenitor cell_FABP7 high retinal progenitor cell CL:0002672 +S-shaped body cell kidney cell CL:1000497 +S-shaped body cell_CFAP126 high kidney cell CL:1000497 +S-shaped body cell_LINC01158 high kidney cell CL:1000497 +S-shaped body medial cell kidney cell CL:1000497 +Secretory epithelial cell serous cell of epithelium of trachea CL:1000330 +Sertoli cell_DLK1 high Sertoli cell CL:0000216 +Sinusoidal endothelial cell endothelial cell of hepatic sinusoid CL:1000398 +Sinusoidal endothelial cell_FCN1 high endothelial cell of hepatic sinusoid CL:1000398 +Smooth muscel cell smooth muscle cell CL:0000192 +Smooth muscle cell smooth muscle cell CL:0000192 +Smooth muscle cell_ACTA2 high smooth muscle cell CL:0000192 +Smooth muscle cell_ACTG2 high smooth muscle cell CL:0000192 +Smooth muscle cell_ADIRF high smooth muscle cell CL:0000192 +Smooth muscle cell_CCL19 high smooth muscle cell CL:0000192 +Smooth muscle cell_CCL4L2 high smooth muscle cell CL:0000192 +Smooth muscle cell_CYCS high smooth muscle cell of the pulmonary artery CL:0002591 +Smooth muscle cell_MYL9 high smooth muscle cell CL:0000192 +Smooth muscle cell_PDK4 high smooth muscle cell CL:0000192 +Smooth muscle cell_TAGLN high smooth muscle cell CL:0000192 +Soomth muscle cell smooth muscle cell CL:0000192 +Spermatocyte spermatocyte CL:0000017 +Stomach fundus cell mucous cell of stomach CL:0002180 +Striated muscle cell striated muscle cell CL:0000737 +Stromal cell stromal cell CL:0000499 +Stromal cell stromal cell CL:0000499 +Stromal cell_APOD high stromal cell CL:0000499 +Stromal cell_ASPN high stromal cell CL:0000499 +Stromal cell_CLEC3B high stromal cell CL:0000499 +Stromal cell_COL1A1 high stromal cell CL:0000499 +Stromal cell_COL3A1 high stromal cell CL:0000499 +Stromal cell_CXCL14 high stromal cell CL:0000499 +Stromal cell_DCN high stromal cell CL:0000499 +Stromal cell_ERRFI1 high stromal cell of endometrium CL:0002255 +Stromal cell_GPX3 high stromal cell CL:0000499 +Stromal cell_HES1 high stromal cell CL:0000499 +Stromal cell_LOX high stromal cell CL:0000499 +Stromal cell_LUM high stromal cell CL:0000499 +Stromal cell_MFAP4 high stromal cell CL:0000499 +Stromal cell_MFAP5 high stromal cell CL:0000499 +Stromal cell_MGP high stromal cell CL:0000499 +Stromal cell_NOV high stromal cell CL:0000499 +Stromal cell_PCP4 high stromal cell CL:0000499 +Stromal cell_PLA2G2A high stromal cell CL:0000499 +Stromal cell_PRG4 high stromal cell CL:0000499 +Stromal cell_PTGDS high stromal cell CL:0000499 +Stromal cell_PTGDS_high stromal cell CL:0000499 +Stromal cell_SFRP2 high stromal cell CL:0000499 +Stromal cell_SULT1E1 high stromal cell CL:0000499 +Stromal cell_TSLP high stromal cell CL:0000499 +Superficial cell ureteral cell CL:1000601 +Sympathetic neuron sympathetic neuron CL:0011103 +Syncytiotrophoblast syncytiotrophoblast cell CL:0000525 +Syncytiotrophoblast cell syncytiotrophoblast cell CL:0000525 +T cell T cell CL:0000084 +T cell_CCL4 high T cell CL:0000084 +T cell_CCL5 high T cell CL:0000084 +T cell_GNLY high T cell CL:0000084 +T cell_GZMA high T cell CL:0000084 +T cell_IFNG high T cell CL:0000084 +T cell_IL7R high T cell CL:0000084 +T cell_LEPROTL1 high T cell CL:0000084 +T cell_TRAC high T cell CL:0000084 +Tendon cell_GCG high tendon cell CL:0000388 +Theca cell theca cell CL:0000503 +Thyroid follicular cell thyroid follicular cell CL:0002258 +Treg cell regulatory T cell CL:0000815 +Unknown Epithelial cell_EFNA1 high epithelial cell CL:0000066 +Unknown Epithelial cell_FOS high epithelial cell CL:0000066 +Ureteric Epithelial cell epithelial cell CL:0000066 +Ureteric bud cell epithelial cell CL:0000066 +Ureteric epithelial cell epithelial cell CL:0000066 +Ureteric smooth muscle cell ureter smooth muscle cell CL:1000979 +Urothelial cell bladder urothelial cell CL:1001428 +Urothelial cell bladder urothelial cell CL:1001428 +Vascular endothelial cell endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_A2M high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_AQP1 high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_CD34 high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_FABP4 high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_FABP5 high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_IGFBP3 high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_SELE high endothelial cell of vascular tree CL:0002139 +Vascular endothelial cell_VWF high endothelial cell of vascular tree CL:0002139 +Vascular epithelial cell epithelial cell CL:0000066 +Vascular smooth muscle cell vascular associated smooth muscle cell CL:0000359 +Ventricle Cardiomyocyte_MYL2 high cardiac muscle cell CL:0000746 +Ventricle cardiomyocyte cardiac muscle cell CL:0000746 +Ventricle cardiomyocyte_CSRP3 high cardiac muscle cell CL:0000746 +Ventricle cardiomyocyte_MB high cardiac muscle cell CL:0000746 +Ventricle cardiomyocyte_MT gene high cardiac muscle cell CL:0000746 +Villous trophoblast cell trophoblast cell CL:0000351 +Zona fasciculata cell cortical cell of adrenal gland CL:0002097 +Zona fasciculata cell_CYP11B2 high cortical cell of adrenal gland CL:0002097 +Zona fasciculata cell_FDX1 high cortical cell of adrenal gland CL:0002097 +Zona fasciculata cell_GSTA1 high cortical cell of adrenal gland CL:0002097 +Zona fasciculata cell_HSD3B2 high cortical cell of adrenal gland CL:0002097 +activative T cell T cell CL:0000084 +inflammatory cell inflammatory cell CL:0009002 +lymphatic endothelial cell endothelial cell of lymphatic vessel CL:0002138 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py deleted file mode 100644 index 1d32cb3d0..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_10x_travaglini_001.py +++ /dev/null @@ -1,213 +0,0 @@ -import anndata -import os -from typing import Union -import scipy.sparse -import numpy as np - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad", - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - """ - This data loader directly processes the data file provided under the download link. - To obtain the file, you need to create a free account at https://www.synapse.org. - You can then use those login credentials to download the file with python using the synapse client, - installable via `pip install synapseclient`: - - import synapseclient - import shutil - syn = synapseclient.Synapse() - syn.login("synapse_username","password") - syn21625095 = syn.get(entity="syn21625095") - shutil.move(syn21625095.path, "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad") - - :param data_path: - :param meta_path: - :param kwargs: - """ - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - protocol = "10x" if self.sample_fn.split("_")[0] == "droplet" else "smartseq2" - self.id = f"human_lung_2020_{protocol}_travaglini_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1038/s41586-020-2922-4" - - synapse_id = { - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625095", - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625142" - } - - self.download_url_data = f"{synapse_id[self.sample_fn]},{self.sample_fn}" - self.download_url_meta = None - - self.author = "Travaglini" - self.doi = "10.1038/s41586-020-2922-4" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" - self.organism = "human" - self.protocol = "10X sequencing" if self.sample_fn.split("_")[0] == "droplet" else "Smart-seq2" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.class_maps = { - "0": { - "Adventitial Fibroblast_P1": "Fibroblasts", - "Adventitial Fibroblast_P2": "Fibroblasts", - "Adventitial Fibroblast_P3": "Fibroblasts", - "Airway Smooth Muscle_P1": "Airway smooth muscle", - "Airway Smooth Muscle_P2": "Airway smooth muscle", - "Airway Smooth Muscle_P3": "Airway smooth muscle", - "Alveolar Epithelial Type 1_P1": "AT1", - "Alveolar Epithelial Type 1_P2": "AT1", - "Alveolar Epithelial Type 1_P3": "AT1", - "Alveolar Epithelial Type 2_P1": "AT2", - "Alveolar Epithelial Type 2_P2": "AT2", - "Alveolar Epithelial Type 2_P3": "AT2", - "Alveolar Fibroblast_P1": "Fibroblasts", - "Alveolar Fibroblast_P2": "Fibroblasts", - "Alveolar Fibroblast_P3": "Fibroblasts", - "Artery_P1": "Arterial", - "Artery_P2": "Arterial", - "Artery_P3": "Arterial", - "B_P1": "B cell lineage", - "B_P2": "B cell lineage", - "B_P3": "B cell lineage", - "Basal_P1": "Basal", - "Basal_P2": "Basal", - "Basal_P3": "Basal", - "Basophil/Mast 1_P1": "Mast cells", - "Basophil/Mast 1_P2": "Mast cells", - "Basophil/Mast 1_P3": "Mast cells", - "Basophil/Mast 2_P3": "Mast cells", - "Bronchial Vessel 1_P1": "Bronchial Vessel 1", - "Bronchial Vessel 1_P3": "Bronchial Vessel 1", - "Bronchial Vessel 2_P1": "Bronchial Vessel 2", - "Bronchial Vessel 2_P3": "Bronchial Vessel 2", - "CD4+ Memory/Effector T_P1": "T cell lineage", - "CD4+ Memory/Effector T_P2": "T cell lineage", - "CD4+ Memory/Effector T_P3": "T cell lineage", - "CD4+ Naive T_P1": "T cell lineage", - "CD4+ Naive T_P2": "T cell lineage", - "CD4+ Naive T_P3": "T cell lineage", - "CD8+ Memory/Effector T_P1": "T cell lineage", - "CD8+ Memory/Effector T_P2": "T cell lineage", - "CD8+ Memory/Effector T_P3": "T cell lineage", - "CD8+ Naive T_P1": "T cell lineage", - "CD8+ Naive T_P2": "T cell lineage", - "CD8+ Naive T_P3": "T cell lineage", - "Capillary Aerocyte_P1": "Capillary", - "Capillary Aerocyte_P2": "Capillary", - "Capillary Aerocyte_P3": "Capillary", - "Capillary Intermediate 1_P2": "Capillary Intermediate 1", - "Capillary Intermediate 2_P2": "Capillary Intermediate 2", - "Capillary_P1": "Capillary", - "Capillary_P2": "Capillary", - "Capillary_P3": "Capillary", - "Ciliated_P1": "Multiciliated lineage", - "Ciliated_P2": "Multiciliated lineage", - "Ciliated_P3": "Multiciliated lineage", - "Classical Monocyte_P1": "Monocytes", - "Classical Monocyte_P2": "Monocytes", - "Classical Monocyte_P3": "Monocytes", - "Club_P1": "Secretory", - "Club_P2": "Secretory", - "Club_P3": "Secretory", - "Differentiating Basal_P1": "Basal", - "Differentiating Basal_P3": "Basal", - "EREG+ Dendritic_P1": "Macrophages", - "EREG+ Dendritic_P2": "Macrophages", - "Fibromyocyte_P3": "Fibromyocyte", - "Goblet_P3": "Secretory", - "IGSF21+ Dendritic_P1": "Macrophages", - "IGSF21+ Dendritic_P2": "Macrophages", - "IGSF21+ Dendritic_P3": "Macrophages", - "Intermediate Monocyte_P2": "Monocytes", - "Ionocyte_P3": "Rare", - "Lipofibroblast_P1": "Fibroblasts", - "Lymphatic_P1": "Lymphatic EC", - "Lymphatic_P2": "Lymphatic EC", - "Lymphatic_P3": "Lymphatic EC", - "Macrophage_P1": "Macrophages", - "Macrophage_P2": "Macrophages", - "Macrophage_P3": "Macrophages", - "Mesothelial_P1": "Mesothelium", - "Mucous_P2": "Submucosal Secretory", - "Mucous_P3": "Submucosal Secretory", - "Myeloid Dendritic Type 1_P1": "Dendritic cells", - "Myeloid Dendritic Type 1_P2": "Dendritic cells", - "Myeloid Dendritic Type 1_P3": "Dendritic cells", - "Myeloid Dendritic Type 2_P1": "Dendritic cells", - "Myeloid Dendritic Type 2_P2": "Dendritic cells", - "Myeloid Dendritic Type 2_P3": "Dendritic cells", - "Myofibroblast_P1": "Myofibroblasts", - "Myofibroblast_P2": "Myofibroblasts", - "Myofibroblast_P3": "Myofibroblasts", - "Natural Killer T_P2": "T cell lineage", - "Natural Killer T_P3": "T cell lineage", - "Natural Killer_P1": "Innate lymphoid cells", - "Natural Killer_P2": "Innate lymphoid cells", - "Natural Killer_P3": "Innate lymphoid cells", - "Neuroendocrine_P3": "Rare", - "Nonclassical Monocyte_P1": "Monocytes", - "Nonclassical Monocyte_P2": "Monocytes", - "Nonclassical Monocyte_P3": "Monocytes", - "OLR1+ Classical Monocyte_P2": "Monocytes", - "Pericyte_P1": "Fibroblasts", - "Pericyte_P2": "Fibroblasts", - "Pericyte_P3": "Fibroblasts", - "Plasma_P1": "B cell lineage", - "Plasma_P3": "B cell lineage", - "Plasmacytoid Dendritic_P1": "Dendritic cells", - "Plasmacytoid Dendritic_P2": "Dendritic cells", - "Plasmacytoid Dendritic_P3": "Dendritic cells", - "Platelet/Megakaryocyte_P1": "Megakaryocytes", - "Platelet/Megakaryocyte_P3": "Megakaryocytes", - "Proliferating Basal_P1": "Basal", - "Proliferating Basal_P3": "Basal", - "Proliferating Macrophage_P1": "Macrophages", - "Proliferating Macrophage_P2": "Macrophages", - "Proliferating Macrophage_P3": "Macrophages", - "Proliferating NK/T_P2": "Innate lymphoid cells", - "Proliferating NK/T_P3": "Innate lymphoid cells", - "Proximal Basal_P3": "Basal", - "Proximal Ciliated_P3": "Multiciliated lineage", - "Serous_P3": "Submucosal Secretory", - "Signaling Alveolar Epithelial Type 2_P3": "AT2", - "TREM2+ Dendritic_P1": "Macrophages", - "TREM2+ Dendritic_P3": "Macrophages", - "Vascular Smooth Muscle_P2": "2_Smooth Muscle", - "Vascular Smooth Muscle_P3": "2_Smooth Muscle", - "Vein_P1": "Venous", - "Vein_P2": "Venous", - "Vein_P3": "Venous", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - if self.sample_fn.split("_")[0] == "droplet": - norm_const = 1000000 - else: - norm_const = 10000 - adata = anndata.read(fn) - adata.X = scipy.sparse.csc_matrix(adata.X) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / norm_const) - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py new file mode 100644 index 000000000..32e0a11c9 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py @@ -0,0 +1,71 @@ +import anndata +import os +from typing import Union +import scipy.sparse +import numpy as np + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad", + "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + """ + ToDo split by sample / patient in obs columns: + bio replicates droplet file "orig.ident"+"sample"+"magnetic.selection", + bio replicates facs file "patient"+"sample" + tech replicates droplet file "channel", + tech replicates facs file "plate.barcode" + """ + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) + synapse_id = { + "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625095", + "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625142" + } + + self.download_url_data = f"{synapse_id[self.sample_fn]},{self.sample_fn}" + self.download_url_meta = None + + self.author = "Travaglini" + self.doi = "10.1038/s41586-020-2922-4" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10X sequencing" if self.sample_fn.split("_")[0] == "droplet" else "Smart-seq2" + self.state_exact = "healthy" + self.year = 2020 + + self.obs_key_cellontology_original = "free_annotation" + self.var_symbol_col = "index" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, self.sample_fn) + if self.sample_fn.split("_")[0] == "droplet": + norm_const = 10000 + sf_key = "nUMI" + else: + norm_const = 1000000 + sf_key = "nReads" + adata = anndata.read(fn) + adata.X = scipy.sparse.csc_matrix(adata.X) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs[sf_key].values[:, None])).multiply(1 / norm_const) + self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.tsv new file mode 100644 index 000000000..17f26b130 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.tsv @@ -0,0 +1,140 @@ +source target target_id +Adventitial Fibroblast_P1 adventitial cell CL:0002503 +Adventitial Fibroblast_P2 adventitial cell CL:0002503 +Adventitial Fibroblast_P3 adventitial cell CL:0002503 +Airway Smooth Muscle_P1 tracheobronchial smooth muscle cell CL:0019019 +Airway Smooth Muscle_P2 tracheobronchial smooth muscle cell CL:0019019 +Airway Smooth Muscle_P3 tracheobronchial smooth muscle cell CL:0019019 +Alveolar Epithelial Type 1_P1 type I pneumocyte CL:0002062 +Alveolar Epithelial Type 1_P2 type I pneumocyte CL:0002062 +Alveolar Epithelial Type 1_P3 type I pneumocyte CL:0002062 +Alveolar Epithelial Type 2_P1 type II pneumocyte CL:0002063 +Alveolar Epithelial Type 2_P2 type II pneumocyte CL:0002063 +Alveolar Epithelial Type 2_P3 type II pneumocyte CL:0002063 +Alveolar Fibroblast_P1 fibroblast of lung CL:0002553 +Alveolar Fibroblast_P2 fibroblast of lung CL:0002553 +Alveolar Fibroblast_P3 fibroblast of lung CL:0002553 +Artery_P1 endothelial cell CL:0000115 +Artery_P2 endothelial cell CL:0000115 +Artery_P3 endothelial cell CL:0000115 +B_P1 B cell CL:0000236 +B_P2 B cell CL:0000236 +B_P3 B cell CL:0000236 +Basal_P1 respiratory basal cell CL:0002633 +Basal_P2 respiratory basal cell CL:0002633 +Basal_P3 respiratory basal cell CL:0002633 +Basophil/Mast 1_P1 myeloid leukocyte CL:0000766 +Basophil/Mast 1_P2 myeloid leukocyte CL:0000766 +Basophil/Mast 1_P3 myeloid leukocyte CL:0000766 +Basophil/Mast 2_P3 myeloid leukocyte CL:0000766 +Bronchial Vessel 1_P1 endothelial cell CL:0000115 +Bronchial Vessel 1_P3 endothelial cell CL:0000115 +Bronchial Vessel 2_P1 endothelial cell CL:0000115 +Bronchial Vessel 2_P3 endothelial cell CL:0000115 +CD4+ Memory/Effector T_P1 effector memory CD4-positive, alpha-beta T cell, terminally differentiated CL:0001087 +CD4+ Memory/Effector T_P2 effector memory CD4-positive, alpha-beta T cell, terminally differentiated CL:0001087 +CD4+ Memory/Effector T_P3 effector memory CD4-positive, alpha-beta T cell, terminally differentiated CL:0001087 +CD4+ Naive T_P1 naive thymus-derived CD4-positive, alpha-beta T cell CL:0000895 +CD4+ Naive T_P2 naive thymus-derived CD4-positive, alpha-beta T cell CL:0000895 +CD4+ Naive T_P3 naive thymus-derived CD4-positive, alpha-beta T cell CL:0000895 +CD8+ Memory/Effector T_P1 effector memory CD8-positive, alpha-beta T cell, terminally differentiated CL:0001062 +CD8+ Memory/Effector T_P2 effector memory CD8-positive, alpha-beta T cell, terminally differentiated CL:0001062 +CD8+ Memory/Effector T_P3 effector memory CD8-positive, alpha-beta T cell, terminally differentiated CL:0001062 +CD8+ Naive T_P1 naive thymus-derived CD8-positive, alpha-beta T cell CL:0000900 +CD8+ Naive T_P2 naive thymus-derived CD8-positive, alpha-beta T cell CL:0000900 +CD8+ Naive T_P3 naive thymus-derived CD8-positive, alpha-beta T cell CL:0000900 +Capillary Aerocyte_P1 endothelial cell CL:0000115 +Capillary Aerocyte_P2 endothelial cell CL:0000115 +Capillary Aerocyte_P3 endothelial cell CL:0000115 +Capillary Intermediate 1_P2 endothelial cell CL:0000115 +Capillary Intermediate 2_P2 endothelial cell CL:0000115 +Capillary_P1 endothelial cell CL:0000115 +Capillary_P2 endothelial cell CL:0000115 +Capillary_P3 endothelial cell CL:0000115 +Ciliated_P1 lung ciliated cell CL:1000271 +Ciliated_P2 lung ciliated cell CL:1000271 +Ciliated_P3 lung ciliated cell CL:1000271 +Classical Monocyte_P1 classical monocyte CL:0000860 +Classical Monocyte_P2 classical monocyte CL:0000860 +Classical Monocyte_P3 classical monocyte CL:0000860 +Club_P1 club cell CL:0000158 +Club_P2 club cell CL:0000158 +Club_P3 club cell CL:0000158 +Dendritic_P1 dendritic cell CL:0000451 +Differentiating Basal_P1 respiratory basal cell CL:0002633 +Differentiating Basal_P3 respiratory basal cell CL:0002633 +EREG+ Dendritic_P1 dendritic cell CL:0000451 +EREG+ Dendritic_P2 dendritic cell CL:0000451 +Fibromyocyte_P3 myofibroblast cell CL:0000186 +Goblet_P1 lung goblet cell CL:1000143 +Goblet_P2 lung goblet cell CL:1000143 +Goblet_P3 lung goblet cell CL:1000143 +IGSF21+ Dendritic_P1 dendritic cell CL:0000451 +IGSF21+ Dendritic_P2 dendritic cell CL:0000451 +IGSF21+ Dendritic_P3 dendritic cell CL:0000451 +Intermediate Monocyte_P2 intermediate monocyte CL:0002393 +Intermediate Monocyte_P3 intermediate monocyte CL:0002393 +Ionocyte_P3 ionocyte CL:0005006 +Lipofibroblast_P1 fibroblast of lung CL:0002553 +Lymphatic_P1 endothelial cell of lymphatic vessel CL:0002138 +Lymphatic_P2 endothelial cell of lymphatic vessel CL:0002138 +Lymphatic_P3 endothelial cell of lymphatic vessel CL:0002138 +Macrophage_P1 lung macrophage CL:1001603 +Macrophage_P2 lung macrophage CL:1001603 +Macrophage_P3 lung macrophage CL:1001603 +Mesothelial_P1 mesothelial cell CL:0000077 +Mucous_P2 mucus secreting cell CL:0000319 +Mucous_P3 mucus secreting cell CL:0000319 +Myeloid Dendritic Type 1_P1 myeloid dendritic cell CL:0000782 +Myeloid Dendritic Type 1_P2 myeloid dendritic cell CL:0000782 +Myeloid Dendritic Type 1_P3 myeloid dendritic cell CL:0000782 +Myeloid Dendritic Type 2_P1 myeloid dendritic cell CL:0000782 +Myeloid Dendritic Type 2_P2 myeloid dendritic cell CL:0000782 +Myeloid Dendritic Type 2_P3 myeloid dendritic cell CL:0000782 +Myofibroblast_P1 myofibroblast cell CL:0000186 +Myofibroblast_P2 myofibroblast cell CL:0000186 +Myofibroblast_P3 myofibroblast cell CL:0000186 +Natural Killer T_P2 mature NK T cell CL:0000814 +Natural Killer T_P3 mature NK T cell CL:0000814 +Natural Killer_P1 natural killer cell CL:0000623 +Natural Killer_P2 natural killer cell CL:0000623 +Natural Killer_P3 natural killer cell CL:0000623 +Neuroendocrine_P1 neuroendocrine cell CL:0000165 +Neuroendocrine_P3 neuroendocrine cell CL:0000165 +Neutrophil_P1 neutrophil CL:0000775 +Neutrophil_P2 neutrophil CL:0000775 +Neutrophil_P3 neutrophil CL:0000775 +Nonclassical Monocyte_P1 non-classical monocyte CL:0000875 +Nonclassical Monocyte_P2 non-classical monocyte CL:0000875 +Nonclassical Monocyte_P3 non-classical monocyte CL:0000875 +OLR1+ Classical Monocyte_P2 classical monocyte CL:0000860 +Pericyte_P1 pericyte cell CL:0000669 +Pericyte_P2 pericyte cell CL:0000669 +Pericyte_P3 pericyte cell CL:0000669 +Plasma_P1 plasma cell CL:0000786 +Plasma_P3 plasma cell CL:0000786 +Plasmacytoid Dendritic_P1 plasmacytoid dendritic cell CL:0000784 +Plasmacytoid Dendritic_P2 plasmacytoid dendritic cell CL:0000784 +Plasmacytoid Dendritic_P3 plasmacytoid dendritic cell CL:0000784 +Platelet/Megakaryocyte_P1 megakaryocyte CL:0000556 +Platelet/Megakaryocyte_P3 megakaryocyte CL:0000556 +Proliferating Basal_P1 respiratory basal cell CL:0002633 +Proliferating Basal_P3 respiratory basal cell CL:0002633 +Proliferating Macrophage_P1 macrophage CL:0000235 +Proliferating Macrophage_P2 macrophage CL:0000235 +Proliferating Macrophage_P3 macrophage CL:0000235 +Proliferating NK/T_P2 mature NK T cell CL:0000814 +Proliferating NK/T_P3 mature NK T cell CL:0000814 +Proximal Basal_P3 respiratory basal cell CL:0002633 +Proximal Ciliated_P3 lung ciliated cell CL:1000271 +Serous_P3 serous secreting cell CL:0000313 +Signaling Alveolar Epithelial Type 2_P1 type II pneumocyte CL:0002063 +Signaling Alveolar Epithelial Type 2_P3 type II pneumocyte CL:0002063 +TREM2+ Dendritic_P1 dendritic cell CL:0000451 +TREM2+ Dendritic_P3 dendritic cell CL:0000451 +Vascular Smooth Muscle_P1 vascular associated smooth muscle cell CL:0000359 +Vascular Smooth Muscle_P2 vascular associated smooth muscle cell CL:0000359 +Vascular Smooth Muscle_P3 vascular associated smooth muscle cell CL:0000359 +Vein_P1 endothelial cell CL:0000115 +Vein_P2 endothelial cell CL:0000115 +Vein_P3 endothelial cell CL:0000115 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py similarity index 52% rename from sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index c697458b9..03efe4a2c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10x_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -17,12 +17,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_colon_2019_10x_james_001_10.1038/s41590-020-0602-z" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" self.download_url_meta = None - self.author = "Teichmann" + self.author = "James" self.doi = "10.1038/s41590-020-0602-z" self.healthy = True self.normalization = "raw" @@ -34,38 +32,9 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.obs_key_cellontology_original = "cell_type" - self.class_maps = { - "0": { - "Activated CD4 T": "Activated CD4 T", - "B cell IgA Plasma": "B cell IgA Plasma", - "B cell IgG Plasma": "B cell IgG Plasma", - "B cell cycling": "B cell cycling", - "B cell memory": "B cell memory", - "CD8 T": "CD8 T", - "Follicular B cell": "Follicular", - "ILC": "ILC", - "LYVE1 Macrophage": "LYVE1 Macrophage", - "Lymphoid DC": "Lymphoid DC", - "Macrophage": "Macrophage", - "Mast": "Mast cell", - "Monocyte": "Monocyte", - "NK": "NK", - "Tcm": "Tcm", - "Tfh": "Tfh", - "Th1": "Th1", - "Th17": "Th17", - "Treg": "Treg", - "cDC1": "DC1", - "cDC2": "DC2", - "cycling DCs": "cycling DCs", - "cycling gd T": "cycling gd T", - "gd T": "gd T", - "pDC": "pDC", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "james20.processed.h5ad") diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.tsv new file mode 100644 index 000000000..751d8c43f --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.tsv @@ -0,0 +1,26 @@ +source target target_id +Activated CD4 T activated CD4-positive, alpha-beta T cell CL:0000896 +B cell IgA Plasma IgA plasma cell CL:0000987 +B cell IgG Plasma IgG plasma cell CL:0000985 +B cell cycling B cell CL:0000236 +B cell memory memory B cell CL:0000787 +CD8 T CD8-positive, alpha-beta T cell CL:0000625 +Follicular B cell follicular B cell CL:0000843 +ILC innate lymphoid cell CL:0001065 +LYVE1 Macrophage macrophage CL:0000235 +Lymphoid DC plasmacytoid dendritic cell CL:0000784 +Macrophage macrophage CL:0000235 +Mast mast cell CL:0000097 +Monocyte monocyte CL:0000576 +NK natural killer cell CL:0000623 +Tcm memory T cell CL:0000813 +Tfh T follicular helper cell CL:0002038 +Th1 T-helper 1 cell CL:0000545 +Th17 T-helper 17 cell CL:0000899 +Treg regulatory T cell CL:0000815 +cDC1 conventional dendritic cell CL:0000990 +cDC2 conventional dendritic cell CL:0000990 +cycling DCs dendritic cell CL:0000451 +cycling gd T gamma-delta T cell CL:0000798 +gd T gamma-delta T cell CL:0000798 +pDC plasmacytoid dendritic cell CL:0000784 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py deleted file mode 100644 index 04b0f0bba..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_10x_braga_x.py +++ /dev/null @@ -1,98 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ - "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad", - "vieira19_Bronchi_anonymised.processed.h5ad", -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"human_lung_2019_10x_braga_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1038/s41591-019-0468-5" - - self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" - self.download_url_meta = None - - self.author = "Teichmann" - self.doi = "10.1038/s41591-019-0468-5" - self.healthy = True - self.organ = "bronchus" if sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - self.normalization = "norm" - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - if self.sample_fn == "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad": - self.class_maps = { - "0": { - "Ciliated 2": "Multiciliated lineage", - "Luminal_Macrophages": "Macrophages", - "Basal 1": "Basal", - "Dendritic cells": "Dendritic cells", - "Endothelial": "1_Endothelial", - "Lymphatic": "Lymphatic EC", - "Ciliated 1": "Multiciliated lineage", - "Smooth muscle": "2_Smooth Muscle", - "Type_1_alveolar": "AT1", - "Neutrophils": "Monocytes", - "Club": "Secretory", - "Basal 2": "Basal", - "B cells": "B cell lineage", - "T and NK": "2_Lymphoid", - "Mesothelium": "Mesothelium", - "Mast cells": "Mast cells", - "Fibroblasts": "2_Fibroblast lineage", - "Type 2 alveolar": "AT2", - }, - } - else: - self.class_maps = { - "0": { - "Ciliated 1": "Multiciliated lineage", - "Club": "Secretory", - "Ciliated 2": "Multiciliated lineage", - "Ionocytes": "Rare", - "Basal 2": "Basal", - "Goblet_1": "Secretory", - "Goblet 2": "Secretory", - "Basal 1": "Basal", - "Dendritic cells": "Dendritic cells", - "B cells": "B cell lineage", - "Luminal_Macrophages": "Macrophages", - "Neutrophils": "Monocytes", - "Endothelial": "1_Endothelial", - "Smooth muscle": "2_Smooth Muscle", - "T and NK": "2_Lymphoid", - "Fibroblasts": "2_Fibroblast lineage", - "Lymphatic": "Lymphatic EC", - "Mast cells": "Mast cells", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index 2656e52fe..b6131e392 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -16,12 +16,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2019_dropseq_braga_003_10.1038/s41591-019-0468-5" - self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" - self.author = "Teichmann" + self.author = "Braga" self.doi = "10.1038/s41591-019-0468-5" self.healthy = True self.normalization = "raw" @@ -32,26 +30,9 @@ def __init__( self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" - self.class_maps = { - "0": { - "Fibroblast": "Fibroblasts", - "Type 2": "AT2", - "B cell": "B cell lineage", - "Macrophages": "Macrophages", - "NK cell": "Innate lymphoid cells", - "T cell": "T cell lineage", - "Ciliated": "Multiciliated lineage", - "Lymphatic": "Lymphatic EC", - "Type 1": "AT1", - "Transformed epithelium": "1_Epithelial", - "Secretory": "Secretory", - "Endothelium": "1_Endothelial", - "Mast cell": "Mast cells", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -60,6 +41,6 @@ def _load(self): ] adata = anndata.read_csv(fn[0]).T adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.tsv new file mode 100644 index 000000000..1c02efcca --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.tsv @@ -0,0 +1,14 @@ +source target target_id +B cell B cell CL:0000236 +Ciliated lung ciliated cell CL:1000271 +Endothelium endothelial cell CL:0000115 +Fibroblast fibroblast of lung CL:0002553 +Lymphatic endothelial cell of lymphatic vessel CL:0002138 +Macrophages lung macrophage CL:1001603 +Mast cell mast cell CL:0000097 +NK cell natural killer cell CL:0000623 +Secretory secretory cell CL:0000151 +T cell T cell CL:0000084 +Transformed epithelium epithelial cell of tracheobronchial tree CL:0002202 +Type 1 type I pneumocyte CL:0002062 +Type 2 type II pneumocyte CL:0002063 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py new file mode 100644 index 000000000..94c25a3a5 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -0,0 +1,50 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad", + "vieira19_Bronchi_anonymised.processed.h5ad", +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_meta = None + + self.author = "Braga" + self.doi = "10.1038/s41591-019-0468-5" + self.healthy = True + self.organ = "bronchus" if sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + self.normalization = "norm" + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "CellType" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, self.sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.tsv new file mode 100644 index 000000000..ba2550b04 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.tsv @@ -0,0 +1,22 @@ +source target target_id +B cells B cell CL:0000236 +Basal 1 respiratory basal cell CL:0002633 +Basal 2 respiratory basal cell CL:0002633 +Ciliated 1 lung ciliated cell CL:1000271 +Ciliated 2 lung ciliated cell CL:1000271 +Club club cell CL:0000158 +Dendritic cells dendritic cell CL:0000451 +Endothelial endothelial cell CL:0000115 +Fibroblasts fibroblast of lung CL:0002553 +Goblet 2 lung goblet cell CL:1000143 +Goblet_1 lung goblet cell CL:1000143 +Ionocytes ionocyte CL:0005006 +Luminal_Macrophages lung macrophage CL:1001603 +Lymphatic endothelial cell of lymphatic vessel CL:0002138 +Mast cells mast cell CL:0000097 +Mesothelium mesothelial cell CL:0000077 +Neutrophils neutrophil CL:0000775 +Smooth muscle bronchial smooth muscle cell CL:0002598 +T and NK alpha-beta T cell CL:0000789 +Type 2 alveolar type II pneumocyte CL:0002063 +Type_1_alveolar type I pneumocyte CL:0002062 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py similarity index 85% rename from sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index bc4040d6b..b542e0e20 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_brain_2019_10x_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -25,7 +25,8 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) sample_organ_dict = { "Choroid plexus": "choroid plexus", "Dura mater": "dura mater", @@ -35,9 +36,6 @@ def __init__( self.obs_key_sample = "sample" self.organ = sample_organ_dict[self.sample_id] - self.id = f"mouse_{''.join(self.organ.split(' '))}_2019_10x_hove_" \ - f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_10.1038/s41593-019-0393-4" - self.download_url_data = \ "https://www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" self.download_url_meta = \ @@ -54,17 +52,10 @@ def __init__( self.var_ensembl_col = "ensembl" self.var_symbol_col = "name" - self.obs_key_cellontology_original = "cluster" self.obs_key_organ = "sample_anatomy" - self.class_maps = { - "0": { - "Microglia": "microglial cell", - "T/NKT cells": "CD8-positive, alpha-beta T cell", - "Monocytes": "monocyte" - }, - } + self.set_dataset_id(idx=1) def _load_full(self): fn = [ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv new file mode 100644 index 000000000..f7f86ef30 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv @@ -0,0 +1,15 @@ +source target target_id +B cells B cell CL:0000236 +BAM macrophage CL:0000235 +ILC innate lymphoid cell CL:0001065 +Microglia brain macroglial cell CL:2000005 +Monocytes/Mdc monocyte CL:0000576 +NK cells natural killer cell CL:0000623 +Neutorphils neutrophil CL:0000775 +Neutrophils neutrophil CL:0000775 +T/NKT cells alpha-beta T cell CL:0000789 +cDC1 conventional dendritic cell CL:0000990 +cDC2 conventional dendritic cell CL:0000990 +migDC dendritic cell CL:0000451 +pDC plasmacytoid dendritic cell CL:0000784 +yd T cells gamma-delta T cell CL:0000798 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py similarity index 96% rename from sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index e89f8a93c..e065e25d5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10x_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -19,8 +19,6 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2020_10x_liao_001_10.1038/s41597-019-0351-8" - self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None @@ -37,6 +35,8 @@ def __init__( self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" + self.set_dataset_id(idx=1) + def _load(self): fn = os.path.join(self.data_dir, "GSE131685_RAW.tar") adatas = [] diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py similarity index 63% rename from sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py rename to sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index 0bfa8cdf0..fa08f34f5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_eye_2019_10x_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -16,12 +16,10 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_eye_2019_10x_voigt_001_10.1073/pnas.1914143116" - self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" self.download_url_meta = None - self.author = "Mullins" + self.author = "Voigt" self.doi = "10.1073/pnas.1914143116" self.healthy = True self.normalization = "norm" @@ -32,24 +30,9 @@ def __init__( self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "B-cell": "B-cell", - "Endothelial": "Endothelial cell", - "Fibroblast": "Fibroblast", - "Macrophage": "Macrophage", - "Mast-cell": "Mast-cell", - "Melanocyte": "Melanocyte", - "Pericyte": "Pericyte", - "RPE": "Retinal pigment epithelium", - "Schwann1": "Schwann1", - "Schwann2": "Schwann2", - "T/NK-cell": "T/NK-cell", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "voigt19.processed.h5ad") diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.tsv b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.tsv new file mode 100644 index 000000000..cfde2906d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.tsv @@ -0,0 +1,12 @@ +source target target_id +B-cell B cell CL:0000236 +Endothelial endothelial cell CL:0000115 +Fibroblast fibroblast CL:0000057 +Macrophage macrophage CL:0000235 +Mast-cell mast cell CL:0000097 +Melanocyte retinal melanocyte CL:0002485 +Pericyte pericyte cell CL:0000669 +RPE retinal pigment epithelial cell CL:0002586 +Schwann1 terminal Schwann cell CL:0000692 +Schwann2 terminal Schwann cell CL:0000692 +T/NK-cell alpha-beta T cell CL:0000789 diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py deleted file mode 100644 index 2c6c807e7..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10x_wang_001.py +++ /dev/null @@ -1,90 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ - "wang20_colon.processed.h5ad", - "wang20_ileum.processed.h5ad", - "wang20_rectum.processed.h5ad" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - organ = self.sample_fn.split("_")[1].split(".")[0] - self.id = f"human_{organ}_2019_10x_wang_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1084/jem.20191130" - - self.download_url_data = f"https://covid19.cog.sanger.ac.uk/wang20_{organ}.processed.h5ad" - self.download_url_meta = None - - self.author = "Chen" - self.doi = "10.1084/jem.20191130" - self.healthy = True - self.normalization = "raw" - self.organ = "colon" if organ == "colon" else "ileum" if organ == "ileum" else "rectum" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - if organ == "colon": - self.class_maps = { - "0": { - "Progenitor": "Enterocyte Progenitors", - "Enterocyte": "Enterocytes", - "Goblet": "Goblet cells", - "TA": "TA", - "Paneth-like": "Paneth cells", - "Stem Cell": "Stem cells", - "Enteriendocrine": "Enteroendocrine cells", - }, - } - elif organ == "ileum": - self.class_maps = { - "0": { - "Progenitor": "Progenitors", - "Goblet": "Goblet cells", - "Enterocyte": "Enterocytes", - "Paneth-like": "Paneth cells", - "Stem Cell": "Stem Cell", - "TA": "TA", - "Enteriendocrine": "Enteroendocrine cells", - }, - } - else: - self.class_maps = { - "0": { - "Progenitor": "Enterocyte progenitor", - "Goblet": "Goblet", - "Enterocyte": "Enterocyte", - "Paneth-like": "Paneth-like", - "Stem Cell": "Stem Cell", - "TA": "TA", - "Enteriendocrine": "Enteroendocrine", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py new file mode 100644 index 000000000..d98e372eb --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -0,0 +1,54 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "wang20_colon.processed.h5ad", + "wang20_ileum.processed.h5ad", + "wang20_rectum.processed.h5ad" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_meta = None + + organ = self.sample_fn.split("_")[1].split(".")[0] + + self.author = "Wang" + self.doi = "10.1084/jem.20191130" + self.healthy = True + self.normalization = "raw" + self.organ = organ + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "CellType" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, self.sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.tsv b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.tsv new file mode 100644 index 000000000..8850c721b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.tsv @@ -0,0 +1,8 @@ +source target target_id +Enteriendocrine enteroendocrine cell CL:0000164 +Enterocyte enterocyte CL:0000584 +Goblet goblet cell CL:0000160 +Paneth-like paneth cell CL:0000510 +Progenitor transit amplifying cell of large intestine CL:0009011 +Stem Cell intestinal crypt stem cell CL:0002250 +TA transit amplifying cell of large intestine CL:0009011 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py deleted file mode 100644 index 1d881ef43..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10x_lukassen_001.py +++ /dev/null @@ -1,87 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np -import scipy.sparse - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ - "lukassen20_lung_orig.processed.h5ad", - "lukassen20_airway_orig.processed.h5ad" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = f"human_lung_2020_10x_lukassen_{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_" \ - f"10.1101/2020.03.13.991455" - - self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" - self.download_url_meta = None - - self.author = "Eils" - self.doi = "10.1101/2020.03.13.991455" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "CellType" - - if self.sample_fn == "lukassen20_lung_orig.processed.h5ad": - self.class_maps = { - "0": { - "AT1": "AT1", - "AT2": "AT2", - "Ciliated": "Multiciliated lineage", - "Club": "Secretory", - "Endothelial": "1_Endothelial", - "Fibroblasts": "2_Fibroblast lineage", - "Immuno_TCells": "T cell lineage", - "Immuno_Monocytes": "Monocytes", - "LymphaticEndothelium": "Lymphatic EC", - } - } - else: - self.class_maps = { - "0": { - "Basal_Mitotic": "Basal", - "Basal1": "Basal", - "Basal2": "Basal", - "Basal3": "Basal", - "Ciliated1": "Multiciliated lineage", - "Ciliated2": "Multiciliated lineage", - "Club": "Secretory", - "Fibroblast": "2_Fibroblast lineage", - "FOXN4": "Rare", - "Ionocyte": "Rare", - "Goblet": "Secretory", - "Secretory3": "Secretory", - "Secretory2": "Secretory", - "Secretory1": "Secretory", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nCount_RNA"].values[:, None])).multiply(1 / 10000) - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py new file mode 100644 index 000000000..37eb7b14b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -0,0 +1,52 @@ +import anndata +import os +from typing import Union +import numpy as np +import scipy.sparse + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "lukassen20_lung_orig.processed.h5ad", + "lukassen20_airway_orig.processed.h5ad" +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) + self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" + self.download_url_meta = None + + self.author = "Lukassen" + self.doi = "10.1101/2020.03.13.991455" + self.healthy = True + self.normalization = "raw" + self.organ = "lung" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "CellType" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, self.sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nCount_RNA"].values[:, None])).multiply(1 / 10000) + self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.tsv b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.tsv new file mode 100644 index 000000000..fc867ca74 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.tsv @@ -0,0 +1,23 @@ +source target target_id +AT1 type I pneumocyte CL:0002062 +AT2 type II pneumocyte CL:0002063 +Basal1 respiratory basal cell CL:0002633 +Basal2 respiratory basal cell CL:0002633 +Basal3 respiratory basal cell CL:0002633 +Basal_Mitotic respiratory basal cell CL:0002633 +Ciliated lung ciliated cell CL:1000271 +Ciliated1 lung ciliated cell CL:1000271 +Ciliated2 lung ciliated cell CL:1000271 +Club club cell CL:0000158 +Endothelial endothelial cell CL:0000115 +FOXN4 ciliated cell CL:0000064 +Fibroblast fibroblast of lung CL:0002553 +Fibroblasts fibroblast of lung CL:0002553 +Goblet lung goblet cell CL:1000143 +Immuno_Monocytes monocyte CL:0000576 +Immuno_TCells T cell CL:0000084 +Ionocyte ionocyte CL:0005006 +LymphaticEndothelium endothelial cell of lymphatic vessel CL:0002138 +Secretory1 secretory cell CL:0000151 +Secretory2 secretory cell CL:0000151 +Secretory3 secretory cell CL:0000151 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 8bb0d6b9e..d48f9ff07 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -57,8 +57,8 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - protocol = "10x" if sample_fn.split("-")[3] == "droplet" else "smartseq2" + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ "aorta" if organ in ["aorta"] else \ @@ -81,19 +81,16 @@ def __init__( "trachea" if organ in ["trachea"] else organ # ToDo: heart_and_aorta could be a distinct UBERON term, e.g. cardiovascular system? - self.id = f"mouse_{''.join(organ.split(' '))}_2019_{protocol}_pisco_" \ - f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1101/661728" - self.download_url_data = f"https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/{sample_fn}" self.download_url_meta = None - self.obs_key_cellontology_original = "free_annotation" + self.obs_key_cellontology_original = "cell_ontology_class" self.obs_key_age = "age" self.obs_key_dev_stage = "development_stage" # not given in all data sets self.obs_key_sex = "sex" # ToDo: further anatomical information for subtissue in "subtissue"? - self.author = "Quake" + self.author = "Pisco" self.doi = "10.1101/661728" self.healthy = True self.normalization = "norm" @@ -106,6 +103,8 @@ def __init__( self.var_ensembl_col = None self.var_symbol_col = "index" + self.set_dataset_id(idx=1) + def _load(self): fn = os.path.join(self.data_dir, self.sample_fn) adata = anndata.read_h5ad(fn) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.tsv b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.tsv new file mode 100644 index 000000000..d72fca0e4 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.tsv @@ -0,0 +1,154 @@ +source target target_id +B cell B cell CL:0000236 +Bergmann glial cell Bergmann glial cell CL:0000644 +Brush cell of epithelium proper of large intestine brush cell of epithelium proper of large intestine CL:0002203 +CD4-positive, alpha-beta T cell CD4-positive, alpha-beta T cell CL:0000624 +CD8-positive, alpha-beta T cell CD8-positive, alpha-beta T cell CL:0000625 +DN3 thymocyte DN3 thymocyte CL:0000807 +DN4 thymocyte DN4 thymocyte CL:0000808 +Kupffer cell Kupffer cell CL:0000091 +Langerhans cell Langerhans cell CL:0000453 +NK cell natural killer cell CL:0000623 +Schwann cell Schwann cell CL:0002573 +T cell T cell CL:0000084 +adventitial cell adventitial cell CL:0002503 +alveolar macrophage alveolar macrophage CL:0000583 +aortic endothelial cell aortic endothelial cell CL:0002544 +astrocyte astrocyte CL:0000127 +atrial myocyte regular atrial cardiac myocyte CL:0002129 +basal cell basal cell CL:0000646 +basal cell of epidermis basal cell of epidermis CL:0002187 +basal epithelial cell of tracheobronchial tree basal epithelial cell of tracheobronchial tree CL:0002329 +basophil basophil CL:0000767 +bladder cell bladder cell CL:1001319 +bladder urothelial cell bladder urothelial cell CL:1001428 +blood cell blood cell CL:0000081 +brain pericyte brain pericyte CL:2000043 +bronchial smooth muscle cell bronchial smooth muscle cell CL:0002598 +brush cell brush cell CL:0002204 +bulge keratinocyte keratinocyte stem cell CL:0002337 +cardiac neuron cardiac neuron CL:0010022 +cardiomyocyte cardiac muscle cell CL:0000746 +chondrocyte chondrocyte CL:0000138 +ciliated columnar cell of tracheobronchial tree ciliated columnar cell of tracheobronchial tree CL:0002145 +classical monocyte classical monocyte CL:0000860 +club cell of bronchiole club cell CL:0000158 +dendritic cell dendritic cell CL:0000451 +double negative T cell double negative thymocyte CL:0002489 +duct epithelial cell duct epithelial cell CL:0000068 +early pro-B cell early pro-B cell CL:0002046 +endocardial cell endocardial cell CL:0002350 +endothelial cell endothelial cell CL:0000115 +endothelial cell of coronary artery endothelial cell of coronary artery CL:2000018 +endothelial cell of hepatic sinusoid endothelial cell of hepatic sinusoid CL:1000398 +endothelial cell of lymphatic vessel endothelial cell of lymphatic vessel CL:0002138 +enterocyte of epithelium of large intestine enterocyte of epithelium of large intestine CL:0002071 +enteroendocrine cell enteroendocrine cell CL:0000164 +ependymal cell ependymal cell CL:0000065 +epidermal cell epidermal cell CL:0000362 +epithelial cell epithelial cell CL:0000066 +epithelial cell of large intestine epithelial cell of large intestine CL:0002253 +epithelial cell of proximal tubule epithelial cell of proximal tubule CL:0002306 +epithelial cell of thymus epithelial cell of thymus CL:0002293 +erythroblast erythroblast CL:0000765 +erythrocyte erythrocyte CL:0000232 +erythroid progenitor erythroid progenitor cell CL:0000038 +fenestrated cell fenestrated cell CL:0000666 +fibroblast fibroblast CL:0000057 +fibroblast of cardiac tissue fibroblast of cardiac tissue CL:0002548 +fibroblast of lung fibroblast of lung CL:0002553 +fibrocyte fibrocyte CL:0000135 +granulocyte granulocyte CL:0000094 +granulocyte monocyte progenitor cell granulocyte monocyte progenitor cell CL:0000557 +granulocytopoietic cell granulocytopoietic cell CL:0002191 +hematopoietic precursor cell hematopoietic precursor cell CL:0008001 +hematopoietic stem cell hematopoietic stem cell CL:0000037 +hepatic stellate cell hepatic stellate cell CL:0000632 +hepatocyte hepatocyte CL:0000182 +immature B cell immature B cell CL:0000816 +immature T cell immature T cell CL:0002420 +intermediate monocyte intermediate monocyte CL:0002393 +interneuron interneuron CL:0000099 +intestinal crypt stem cell intestinal crypt stem cell CL:0002250 +keratinocyte keratinocyte CL:0000312 +keratinocyte stem cell keratinocyte stem cell CL:0002337 +kidney capillary endothelial cell kidney capillary endothelial cell CL:1000892 +kidney cell kidney cell CL:1000497 +kidney collecting duct epithelial cell kidney collecting duct epithelial cell CL:1000454 +kidney collecting duct principal cell kidney collecting duct principal cell CL:1001431 +kidney cortex artery cell kidney cortex artery cell CL:1001045 +kidney distal convoluted tubule epithelial cell kidney distal convoluted tubule epithelial cell CL:1000849 +kidney interstitial fibroblast kidney interstitial fibroblast CL:1000692 +kidney loop of Henle ascending limb epithelial cell kidney loop of Henle ascending limb epithelial cell CL:1001016 +kidney loop of Henle thick ascending limb epithelial cell kidney loop of Henle thick ascending limb epithelial cell CL:1001106 +kidney mesangial cell mesangial cell CL:0000650 +kidney proximal convoluted tubule epithelial cell kidney proximal straight tubule epithelial cell CL:1000839 +kidney proximal straight tubule epithelial cell kidney proximal straight tubule epithelial cell CL:1000839 +large intestine goblet cell large intestine goblet cell CL:1000320 +late pro-B cell late pro-B cell CL:0002048 +leukocyte leukocyte CL:0000738 +luminal epithelial cell of mammary gland mammary alveolar cell CL:0002325 +lung macrophage macrophage CL:0000235 +lung neuroendocrine cell lung neuroendocrine cell CL:1000223 +lymphocyte lymphocyte CL:0000542 +lymphoid progenitor cell common lymphoid progenitor CL:0000051 +macrophage macrophage CL:0000235 +mast cell mast cell CL:0000097 +mature NK T cell mature NK T cell CL:0000814 +mature alpha-beta T cell mature alpha-beta T cell CL:0000791 +medium spiny neuron medium spiny neuron CL:1001474 +megakaryocyte-erythroid progenitor cell megakaryocyte-erythroid progenitor cell CL:0000050 +mesangial cell mesangial cell CL:0000650 +mesenchymal cell mesenchymal cell CL:0008019 +mesenchymal progenitor cell mesenchymal cell CL:0008019 +mesenchymal stem cell mesenchymal stem cell CL:0000134 +mesenchymal stem cell of adipose mesenchymal stem cell of adipose CL:0002570 +microglial cell microglial cell CL:0000129 +monocyte monocyte CL:0000576 +mucus secreting cell mucus secreting cell CL:0000319 +myeloid cell myeloid cell CL:0000763 +myeloid dendritic cell myeloid dendritic cell CL:0000782 +myeloid leukocyte myeloid leukocyte CL:0000766 +naive B cell naive B cell CL:0000788 +naive T cell naive T cell CL:0000898 +neuroendocrine cell neuroendocrine cell CL:0000165 +neuroepithelial cell neuroepithelial stem cell CL:0002259 +neuron neuron CL:0000540 +neuronal stem cell neuronal stem cell CL:0000047 +neutrophil neutrophil CL:0000775 +non-classical monocyte non-classical monocyte CL:0000875 +oligodendrocyte oligodendrocyte CL:0000128 +oligodendrocyte precursor cell oligodendrocyte precursor cell CL:0002453 +pancreatic A cell pancreatic A cell CL:0000171 +pancreatic B cell type B pancreatic cell CL:0000169 +pancreatic D cell pancreatic D cell CL:0000173 +pancreatic PP cell pancreatic PP cell CL:0002275 +pancreatic acinar cell pancreatic acinar cell CL:0002064 +pancreatic ductal cel pancreatic ductal cell CL:0002079 +pancreatic ductal cell pancreatic ductal cell CL:0002079 +pancreatic stellate cell pancreatic stellate cell CL:0002410 +pericyte cell pericyte cell CL:0000669 +plasma cell plasma cell CL:0000786 +plasmacytoid dendritic cell plasmacytoid dendritic cell CL:0000784 +podocyte glomerular visceral epithelial cell CL:0000653 +precursor B cell precursor B cell CL:0000817 +proerythroblast proerythroblast CL:0000547 +professional antigen presenting cell professional antigen presenting cell CL:0000145 +promonocyte promonocyte CL:0000559 +pulmonary interstitial fibroblast pulmonary interstitial fibroblast CL:0002241 +regulatory T cell regulatory T cell CL:0000815 +respiratory basal cell respiratory basal cell CL:0002633 +secretory cell secretory cell CL:0000151 +skeletal muscle cell cell of skeletal muscle CL:0000188 +skeletal muscle satellite cell skeletal muscle satellite cell CL:0000594 +smooth muscle cell smooth muscle cell CL:0000192 +smooth muscle cell of the pulmonary artery smooth muscle cell of the pulmonary artery CL:0002591 +smooth muscle cell of trachea smooth muscle cell of trachea CL:0002600 +stem cell of epidermis stem cell of epidermis CL:1000428 +stromal cell stromal cell CL:0000499 +thymocyte thymocyte CL:0000893 +type I pneumocyte type I pneumocyte CL:0002062 +type II pneumocyte type II pneumocyte CL:0002063 +valve cell valve cell CL:0000663 +vein endothelial cell vein endothelial cell CL:0002543 +ventricular myocyte regular ventricular cardiac myocyte CL:0002131 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py similarity index 53% rename from sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py rename to sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 959b4bf24..016cd9744 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lung_2020_10x_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -16,8 +16,6 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_lung_2020_10x_habermann_001_10.1101/753806" - self.download_url_data = [ "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", @@ -25,7 +23,7 @@ def __init__( ] self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5FIPF%5Fmetadata%2Ecsv%2Egz" - self.author = "Kropski" + self.author = "Habermann" self.doi = "10.1101/753806" self.normalization = "raw" self.organ = "lung parenchyma" @@ -34,47 +32,12 @@ def __init__( self.year = 2020 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" self.obs_key_state_exact = "Diagnosis" self.obs_key_healthy = "Status" self.healthy_state_healthy = "Control" - self.class_maps = { - "0": { - "Proliferating Macrophages": "Macrophages", - "Myofibroblasts": "Myofibroblasts", - "Proliferating Epithelial Cells": "Proliferating Epithelial Cells", - "Mesothelial Cells": "Mesothelium", - "cDCs": "Dendritic cells", - "Mast Cells": "Mast cells", - "Ciliated": "Multiciliated lineage", - "T Cells": "T cell lineage", - "pDCs": "Dendritic cells", - "Smooth Muscle Cells": "2_Smooth Muscle", - "Transitional AT2": "AT2", - "AT2": "AT2", - "B Cells": "B cell lineage", - "NK Cells": "Innate lymphoid cells", - "Monocytes": "Monocytes", - "Basal": "Basal", - "Plasma Cells": "B cell lineage", - "Differentiating Ciliated": "Multiciliated lineage", - "Macrophages": "Macrophages", - "MUC5B+": "Secretory", - "SCGB3A2+": "Secretory", - "Fibroblasts": "Fibroblasts", - "Lymphatic Endothelial Cells": "Lymphatic EC", - "Endothelial Cells": "2_Blood vessels", - "SCGB3A2+ SCGB1A1+": "Secretory", - "PLIN2+ Fibroblasts": "Fibroblasts", - "KRT5-/KRT17+": "KRT5-/KRT17+", - "MUC5AC+ High": "Secretory", - "Proliferating T Cells": "T cell lineage", - "AT1": "AT1", - "HAS1 High Fibroblasts": "Fibroblasts" - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = [ @@ -89,6 +52,6 @@ def _load(self): obs = pd.read_csv(fn[3], index_col=0) adata = adata[obs.index.tolist(), :].copy() adata.obs = obs - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) + self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.tsv b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.tsv new file mode 100644 index 000000000..da8eb96ef --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.tsv @@ -0,0 +1,32 @@ +source target target_id +AT1 type I pneumocyte CL:0002062 +AT2 type II pneumocyte CL:0002063 +B Cells B cell CL:0000236 +Basal respiratory basal cell CL:0002633 +Ciliated lung ciliated cell CL:1000271 +Differentiating Ciliated lung ciliated cell CL:1000271 +Endothelial Cells endothelial cell CL:0000115 +Fibroblasts fibroblast of lung CL:0002553 +HAS1 High Fibroblasts fibroblast of lung CL:0002553 +KRT5-/KRT17+ epithelial cell CL:0000066 +Lymphatic Endothelial Cells endothelial cell of lymphatic vessel CL:0002138 +MUC5AC+ High secretory cell CL:0000151 +MUC5B+ secretory cell CL:0000151 +Macrophages lung macrophage CL:1001603 +Mast Cells mast cell CL:0000097 +Mesothelial Cells mesothelial cell CL:0000077 +Monocytes monocyte CL:0000576 +Myofibroblasts myofibroblast cell CL:0000186 +NK Cells natural killer cell CL:0000623 +PLIN2+ Fibroblasts fibroblast of lung CL:0002553 +Plasma Cells plasma cell CL:0000786 +Proliferating Epithelial Cells respiratory epithelial cell CL:0002368 +Proliferating Macrophages macrophage CL:0000235 +Proliferating T Cells T cell CL:0000084 +SCGB3A2+ secretory cell CL:0000151 +SCGB3A2+ SCGB1A1+ secretory cell CL:0000151 +Smooth Muscle Cells bronchial smooth muscle cell CL:0002598 +T Cells T cell CL:0000084 +Transitional AT2 type II pneumocyte CL:0002063 +cDCs conventional dendritic cell CL:0000990 +pDCs plasmacytoid dendritic cell CL:0000784 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py deleted file mode 100644 index 74d31f688..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10x_stewart_001.py +++ /dev/null @@ -1,129 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_kidney_2019_10x_stewart_001_10.1126/science.aat5031" - - self.download_url_data = [ - "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", - "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" - ] - self.download_url_meta = None - - self.author = "Clatworthy" - self.doi = "10.1126/science.aat5031" - self.healthy = True - self.normalization = "norm" - self.organ = "kidney" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - self.var_ensembl_col = "ID" - - self.obs_key_cellontology_original = "celltype" - - self.class_maps = { - "0": { - "Ascending vasa recta endothelium": "Endothelial Cells - AVR", - "B cell": "B cell", - "CD4 T cell": "CD4 T cell", - "CD8 T cell": "CD8 T cell", - "CNT/PC - proximal UB": "CNT/PC - proximal UB", - "Cap mesenchyme": "Cap mesenchyme", - "Connecting tubule": "Connecting tubule", - "Descending vasa recta endothelium": "Endothelial Cells - AEA & DVR", - "Distal S shaped body": "Distal S shaped body", - "Distal renal vesicle": "Distal renal vesicle", - "Distinct proximal tubule 1": "Distinct proximal tubule 1", - "Distinct proximal tubule 2": "Distinct proximal tubule 2", - "Endothelium": "Endothelial Cells (unassigned)", - "Epithelial progenitor cell": "Epithelial progenitor", - "Erythroid": "Erythroid", - "Fibroblast": "Fibroblast", - "Fibroblast 1": "Fibroblast", - "Fibroblast 2": "Fibroblast", - "Glomerular endothelium": "Endothelial Cells - glomerular capillaries", - "Indistinct intercalated cell": "Indistinct intercalated cell", - "Innate like lymphocyte": "Innate like lymphocyte", - "Loop of Henle": "Loop of Henle", - "MNP-a/classical monocyte derived": "MNP-a/classical monocyte derived", - "MNP-b/non-classical monocyte derived": "MNP-b/non-classical monocyte derived", - "MNP-c/dendritic cell": "MNP-c/dendritic cell", - "MNP-d/Tissue macrophage": "MNP-d/Tissue macrophage", - "Macrophage 1": "Macrophage", - "Macrophage 2": "Macrophage", - "Mast cell": "Mast cell", - "Mast cells": "Mast cell", - "Medial S shaped body": "Medial S shaped body", - "Megakaryocyte": "Megakaryocyte", - "Monocyte": "Monocyte", - "Myofibroblast": "Myofibroblast", - "Myofibroblast 1": "Myofibroblast", - "Myofibroblast 2": "Myofibroblast", - "NK cell": "NK cell", - "NKT cell": "NKT cell", - "Neuron": "Neuron", - "Neutrophil": "Neutrophil", - "Pelvic epithelium": "Pelvic epithelium", - "Pelvic epithelium - distal UB": "Pelvic epithelium - distal UB", - "Peritubular capillary endothelium 1": "Peritubular capillary endothelium 1", - "Peritubular capillary endothelium 2": "Peritubular capillary endothelium 2", - "Plasmacytoid dendritic cell": "Plasmacytoid dendritic cell", - "Podocyte": "Podocyte", - "Principal cell": "Principal cell", - "Proliferating B cell": "Proliferating B cell", - "Proliferating NK cell": "Proliferating NK cell", - "Proliferating Proximal Tubule": "Proliferating Proximal Tubule", - "Proliferating cDC2": "Proliferating cDC2", - "Proliferating cap mesenchyme": "Proliferating cap mesenchyme", - "Proliferating distal renal vesicle": "Proliferating distal renal vesicle", - "Proliferating fibroblast": "Proliferating fibroblast", - "Proliferating macrophage": "Proliferating macrophage", - "Proliferating monocyte": "Proliferating monocyte", - "Proliferating myofibroblast": "Proliferating myofibroblast", - "Proliferating stroma progenitor": "Proliferating stroma progenitor", - "Proximal S shaped body": "Proximal S shaped body", - "Proximal UB": "Proximal UB", - "Proximal renal vesicle": "Proximal renal vesicle", - "Proximal tubule": "Proximal tubule", - "Stroma progenitor": "Stroma progenitor", - "Thick ascending limb of Loop of Henle": "Thick ascending limb of Loop of Henle", - "Transitional urothelium": "Transitional urothelium", - "Type A intercalated cell": "Type A intercalated cell", - "Type B intercalated cell": "Collecting Duct - Intercalated Cells Type B", - "cDC1": "cDC1", - "cDC2": "cDC2", - "pDC": "pDC", - }, - } - - def _load(self): - fn = [ - os.path.join(self.data_dir, "Mature_Full_v2.1.h5ad"), - os.path.join(self.data_dir, "Fetal_full.h5ad") - ] - adult = anndata.read(fn[0]) - fetal = anndata.read(fn[1]) - adult.obs["development"] = "adult" - fetal.obs["development"] = "fetal" - adata = adult.concatenate(fetal) - adata.X = np.expm1(adata.X) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py new file mode 100644 index 000000000..1c9a06f17 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -0,0 +1,62 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + """ + TODO: annotate developmental cell types in set_unknown_class_id + """ + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = [ + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", + "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" + ] + self.download_url_meta = None + + self.author = "Stewart" + self.doi = "10.1126/science.aat5031" + self.healthy = True + self.normalization = "norm" + self.organ = "kidney" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.var_ensembl_col = "ID" + self.obs_key_cellontology_original = "celltype" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = [ + os.path.join(self.data_dir, "Mature_Full_v2.1.h5ad"), + os.path.join(self.data_dir, "Fetal_full.h5ad") + ] + adult = anndata.read(fn[0]) + fetal = anndata.read(fn[1]) + adult.obs["development"] = "adult" + fetal.obs["development"] = "fetal" + adata = adult.concatenate(fetal) + adata.X = np.expm1(adata.X) + + self.set_unknown_class_id(ids=[ + "CNT/PC - proximal UB", "Distal S shaped body", "Medial S shaped body", "Proliferating stroma progenitor", + "Proximal S shaped body", "Stroma progenitor", "Proximal UB", + ]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv new file mode 100644 index 000000000..afd222bad --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv @@ -0,0 +1,64 @@ +source target target_id +Ascending vasa recta endothelium vasa recta ascending limb cell CL:1001131 +B cell B cell CL:0000236 +CD4 T cell CD4-positive, alpha-beta T cell CL:0000624 +CD8 T cell CD8-positive, alpha-beta T cell CL:0000625 +Cap mesenchyme mesenchymal cell CL:0008019 +Connecting tubule kidney connecting tubule epithelial cell CL:1000768 +Descending vasa recta endothelium vasa recta descending limb cell CL:1001285 +Distal renal vesicle epithelial cell CL:0000066 +Distinct proximal tubule 1 epithelial cell of proximal tubule CL:0002306 +Distinct proximal tubule 2 epithelial cell of proximal tubule CL:0002306 +Endothelium endothelial cell CL:0000115 +Epithelial progenitor cell epithelial cell CL:0000066 +Erythroid erythrocyte CL:0000232 +Fibroblast fibroblast CL:0000057 +Fibroblast 1 fibroblast CL:0000057 +Fibroblast 2 fibroblast CL:0000057 +Glomerular endothelium glomerular capillary endothelial cell CL:1001005 +Indistinct intercalated cell renal intercalated cell CL:0005010 +Innate like lymphocyte innate lymphoid cell CL:0001065 +Loop of Henle kidney loop of Henle epithelial cell CL:1000909 +MNP-a/classical monocyte derived classical monocyte CL:0000860 +MNP-b/non-classical monocyte derived non-classical monocyte CL:0000875 +MNP-c/dendritic cell dendritic cell CL:0000451 +MNP-d/Tissue macrophage macrophage CL:0000235 +Macrophage 1 macrophage CL:0000235 +Macrophage 2 macrophage CL:0000235 +Mast cell mast cell CL:0000097 +Mast cells mast cell CL:0000097 +Megakaryocyte megakaryocyte CL:0000556 +Monocyte monocyte CL:0000576 +Myofibroblast kidney interstitial myofibroblast CL:1000691 +Myofibroblast 1 kidney interstitial myofibroblast CL:1000691 +Myofibroblast 2 kidney interstitial myofibroblast CL:1000691 +NK cell natural killer cell CL:0000623 +NKT cell mature NK T cell CL:0000814 +Neuron neuron CL:0000540 +Neutrophil neutrophil CL:0000775 +Pelvic epithelium epithelial cell CL:0000066 +Pelvic epithelium - distal UB epithelial cell CL:0000066 +Peritubular capillary endothelium 1 peritubular capillary endothelial cell CL:1001033 +Peritubular capillary endothelium 2 peritubular capillary endothelial cell CL:1001033 +Plasmacytoid dendritic cell plasmacytoid dendritic cell CL:0000784 +Podocyte glomerular visceral epithelial cell CL:0000653 +Principal cell kidney collecting duct principal cell CL:1001431 +Proliferating B cell B cell CL:0000236 +Proliferating NK cell natural killer cell CL:0000623 +Proliferating Proximal Tubule kidney proximal straight tubule epithelial cell CL:1000839 +Proliferating cDC2 conventional dendritic cell CL:0000990 +Proliferating cap mesenchyme nephrogenic mesenchyme stem cell CL:0000383 +Proliferating distal renal vesicle kidney blood vessel cell CL:1000854 +Proliferating fibroblast fibroblast CL:0000057 +Proliferating macrophage macrophage CL:0000235 +Proliferating monocyte monocyte CL:0000576 +Proliferating myofibroblast kidney interstitial myofibroblast CL:1000691 +Proximal renal vesicle kidney blood vessel cell CL:1000854 +Proximal tubule epithelial cell of proximal tubule CL:0002306 +Thick ascending limb of Loop of Henle kidney loop of Henle thick ascending limb epithelial cell CL:1001106 +Transitional urothelium urothelial cell CL:0000731 +Type A intercalated cell renal intercalated cell CL:0005010 +Type B intercalated cell renal intercalated cell CL:0005010 +cDC1 conventional dendritic cell CL:0000990 +cDC2 conventional dendritic cell CL:0000990 +pDC plasmacytoid dendritic cell CL:0000784 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py deleted file mode 100644 index 31620b966..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10x_park_001.py +++ /dev/null @@ -1,92 +0,0 @@ -import anndata -import os -from typing import Union -import numpy as np - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_thymus_2020_10x_park_001_10.1126/science.aay3224" - - self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" - self.download_url_meta = None - - self.author = "Teichmann" - self.doi = "10.1126/science.aay3224" - self.healthy = True - self.normalization = "norm" - self.organ = "thymus" - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2020 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "Anno_level_fig1" - - self.class_maps = { - "0": { - "B_memory": "B_memory", - "B_naive": "B_naive", - "B_plasma": "B_plasma", - "B_pro/pre": "B_pro/pre", - "CD4+T": "CD4+T", - "CD4+Tmem": "CD4+Tmem", - "CD8+T": "CD8+T", - "CD8+Tmem": "CD8+Tmem", - "CD8αα": "CD8αα", - "DC1": "DC1", - "DC2": "DC2", - "DN": "DN", - "DP": "DP", - "ETP": "ETP", - "Endo": "Endo", - "Epi_GCM2": "Epi_GCM2", - "Ery": "Ery", - "Fb_1": "Fb_1", - "Fb_2": "Fb_2", - "Fb_cycling": "Fb_cycling", - "ILC3": "ILC3", - "Lymph": "Lymph", - "Mac": "Mac", - "Mast": "Mast", - "Mgk": "Mgk", - "Mono": "Mono", - "NK": "NK", - "NKT": "NKT", - "NMP": "NMP", - "T(agonist)": "T(agonist)", - "TEC(myo)": "TEC(myo)", - "TEC(neuro)": "TEC(neuro)", - "Treg": "Treg", - "VSMC": "VSMC", - "aDC": "aDC", - "cTEC": "cTEC", - "mTEC(I)": "mTEC(I)", - "mTEC(II)": "mTEC(II)", - "mTEC(III)": "mTEC(III)", - "mTEC(IV)": "mTEC(IV)", - "mcTEC": "mcTEC", - "pDC": "pDC", - "αβT(entry)": "alpha_beta_T(entry)", - "γδT": "gamma_delta_T", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, "park20.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py new file mode 100644 index 000000000..7bcb2b6cc --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -0,0 +1,42 @@ +import anndata +import os +from typing import Union +import numpy as np + +from sfaira.data import DatasetBase + + +class Dataset(DatasetBase): + + def __init__( + self, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" + self.download_url_meta = None + + self.author = "Park" + self.doi = "10.1126/science.aay3224" + self.healthy = True + self.normalization = "norm" + self.organ = "thymus" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2020 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "Anno_level_fig1" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, "park20.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.tsv b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.tsv new file mode 100644 index 000000000..a15d96522 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.tsv @@ -0,0 +1,45 @@ +source target target_id +B_memory memory B cell CL:0000787 +B_naive naive B cell CL:0000788 +B_plasma plasma cell CL:0000786 +B_pro/pre early pro-B cell CL:0002046 +CD4+T CD4-positive, alpha-beta T cell CL:0000624 +CD4+Tmem effector memory CD4-positive, alpha-beta T cell CL:0000905 +CD8+T CD8-positive, alpha-beta T cell CL:0000625 +CD8+Tmem effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8αα CD8-positive, alpha-beta thymocyte CL:0000811 +DC1 dendritic cell CL:0000451 +DC2 dendritic cell CL:0000451 +DN double negative thymocyte CL:0002489 +DP double-positive, alpha-beta thymocyte CL:0000809 +ETP early lymphoid progenitor CL:0000936 +Endo endothelial cell CL:0000115 +Epi_GCM2 epithelial cell CL:0000066 +Ery erythrocyte CL:0000232 +Fb_1 fibroblast CL:0000057 +Fb_2 fibroblast CL:0000057 +Fb_cycling fibroblast CL:0000057 +ILC3 group 3 innate lymphoid cell CL:0001071 +Lymph early lymphoid progenitor CL:0000936 +Mac thymic medullary macrophage CL:0000882 +Mast mast cell CL:0000097 +Mgk megakaryocyte CL:0000556 +Mono monocyte CL:0000576 +NK natural killer cell CL:0000623 +NKT immature NK T cell CL:0000914 +NMP common myeloid progenitor CL:0000049 +T(agonist) T cell CL:0000084 +TEC(myo) epithelial cell of thymus CL:0002293 +TEC(neuro) epithelial cell of thymus CL:0002293 +Treg regulatory T cell CL:0000815 +VSMC vascular associated smooth muscle cell CL:0000359 +aDC dendritic cell CL:0000451 +cTEC cortical thymic epithelial cell CL:0002364 +mTEC(I) medullary thymic epithelial cell CL:0002365 +mTEC(II) medullary thymic epithelial cell CL:0002365 +mTEC(III) medullary thymic epithelial cell CL:0002365 +mTEC(IV) medullary thymic epithelial cell CL:0002365 +mcTEC epithelial cell of thymus CL:0002293 +pDC plasmacytoid dendritic cell CL:0000784 +αβT(entry) alpha-beta T cell CL:0000789 +γδT gamma-delta T cell CL:0000798 diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py deleted file mode 100644 index 7da8ee04d..000000000 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10x_madissoon_001.py +++ /dev/null @@ -1,160 +0,0 @@ -import anndata -import os -from typing import Union -import scipy.sparse - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - -SAMPLE_FNS = [ - "madissoon19_lung.processed.h5ad", - "oesophagus.cellxgene.h5ad", - "spleen.cellxgene.h5ad", -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ - "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" - self.id = f"human_{''.join(organ.split(' '))}_2019_10x_madissoon_" \ - f"{str(SAMPLE_FNS.index(self.sample_fn)).zfill(3)}_10.1186/s13059-019-1906-x" - - if self.sample_fn == "madissoon19_lung.processed.h5ad": - self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" - self.var_ensembl_col = "gene.ids.HCATisStab7509734" - elif self.sample_fn == "oesophagus.cellxgene.h5ad": - self.download_url_data = \ - "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" - # Associated DCP: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 - self.var_ensembl_col = "gene_ids-HCATisStab7413619" - else: - self.download_url_data = \ - "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" - self.var_ensembl_col = "gene_ids-HCATisStab7463846" - - self.download_url_meta = None - self.author = "Meyer" - self.doi = "10.1186/s13059-019-1906-x" - self.healthy = True - self.normalization = "raw" # ToDo "madissoon19_lung.processed.h5ad" is close to integer but not quire (~1e-4) - self.organ = organ - self.organism = "human" - self.protocol = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - - self.var_symbol_col = "index" - - self.obs_key_cellontology_original = "Celltypes" - # ToDo: patient information in .obs["patient"] and sample information in .obs["sample"] (more samples than - # patients) - - if self.sample_fn == "madissoon19_lung.processed.h5ad": - self.class_maps = { - "0": { - "T_CD4": "T cell lineage", - "Mast_cells": "Mast cells", - "Monocyte": "Monocytes", - "Blood_vessel": "2_Blood vessels", - "Ciliated": "Multiciliated lineage", - "Macrophage_MARCOneg": "Macrophages", - "DC_plasmacytoid": "Dendritic cells", - "DC_1": "Dendritic cells", - "Muscle_cells": "2_Smooth Muscle", - "Macrophage_MARCOpos": "Macrophages", - "T_cells_Dividing": "T cell lineage", - "DC_Monocyte_Dividing": "Dendritic cells", - "B_cells": "B cell lineage", - "T_CD8_CytT": "T cell lineage", - "NK_Dividing": "Innate lymphoid cells", - "T_regulatory": "T cell lineage", - "DC_2": "Dendritic cells", - "Alveolar_Type2": "AT2", - "Plasma_cells": "B cell lineage", - "NK": "Innate lymphoid cells", - "Alveolar_Type1": "AT1", - "Fibroblast": "2_Fibroblast lineage", - "DC_activated": "Dendritic cells", - "Macrophage_Dividing": "Macrophages", - "Lymph_vessel": "Lymphatic EC", - }, - } - elif self.sample_fn == "oesophagus.cellxgene.h5ad": - self.class_maps = { - "0": { - "B_CD27neg": "B_CD27neg", - "B_CD27pos": "B_CD27pos", - "Blood_vessel": "Blood_vessel", - "Dendritic_Cells": "Dendritic cell", - "Epi_basal": "Basal cell", - "Epi_dividing": "Epi_dividing", - "Epi_stratified": "Stratified epithelial cell", - "Epi_suprabasal": "Epi_suprabasal", - "Epi_upper": "Epi_upper", - "Glands_duct": "Glands_duct", - "Glands_mucous": "Glands_mucous", - "Lymph_vessel": "Lymph_vessel", - "Mast_cell": "Mast cell", - "Mono_macro": "Mono_macro", - "NK_T_CD8_Cytotoxic": "NK_T_CD8_Cytotoxic", - "Stroma": "Stromal cell", - "T_CD4": "T_CD4", - "T_CD8": "T_CD8", - }, - } - else: - self.class_maps = { - "0": { - "B_Hypermutation": "B_Hypermutation", - "B_T_doublet": "B_T_doublet", - "B_follicular": "B_follicular", - "B_mantle": "B_mantle", - "CD34_progenitor": "CD34_progenitor", - "DC_1": "DC_1", - "DC_2": "DC_2", - "DC_activated": "DC_activated", - "DC_plasmacytoid": "DC_plasmacytoid", - "ILC": "ILC", - "Macrophage": "Macrophage", - "Monocyte": "Monocyte", - "NK_CD160pos": "NK_CD160pos", - "NK_FCGR3Apos": "NK_FCGR3Apos", - "NK_dividing": "NK_dividing", - "Plasma_IgG": "Plasma_IgG", - "Plasma_IgM": "Plasma_IgM", - "Plasmablast": "Plasmablast", - "Platelet": "Platelet", - "T_CD4_conv": "T_CD4_conv", - "T_CD4_fh": "T_CD4_fh", - "T_CD4_naive": "T_CD4_naive", - "T_CD4_reg": "T_CD4_reg", - "T_CD8_CTL": "T_CD8_CTL", - "T_CD8_MAIT": "T_CD8_MAIT", - "T_CD8_activated": "T_CD8_activated", - "T_CD8_gd": "T_CD8_gd", - "T_cell_dividing": "Proliferating T cell", - }, - } - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - if self.sample_fn != "madissoon19_lung.processed.h5ad": - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) - # Cell type column called differently in madissoon19_lung.processed.h5ad: - if self.sample_fn == "madissoon19_lung.processed.h5ad": - adata.obs["Celltypes"] = adata.obs["CellType"] - del adata.obs["CellType"] - self.set_unkown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py new file mode 100644 index 000000000..bcef2ba4e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -0,0 +1,73 @@ +import anndata +import os +from typing import Union +import scipy.sparse + +from sfaira.data import DatasetBaseGroupLoadingManyFiles + +SAMPLE_FNS = [ + "madissoon19_lung.processed.h5ad", + "oesophagus.cellxgene.h5ad", + "spleen.cellxgene.h5ad", +] + + +class Dataset(DatasetBaseGroupLoadingManyFiles): + """ + ToDo: patient information in .obs["patient"] and sample information in .obs["sample"] (more samples than patients) + """ + + def __init__( + self, + sample_fn: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) + if self.sample_fn == "madissoon19_lung.processed.h5ad": + self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" + self.var_ensembl_col = "gene.ids.HCATisStab7509734" + elif self.sample_fn == "oesophagus.cellxgene.h5ad": + self.download_url_data = \ + "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" + # Associated DCP: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 + self.var_ensembl_col = "gene_ids-HCATisStab7413619" + else: + self.download_url_data = \ + "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" + self.var_ensembl_col = "gene_ids-HCATisStab7463846" + + self.download_url_meta = None + self.author = "Madissoon" + self.doi = "10.1186/s13059-019-1906-x" + self.healthy = True + self.normalization = "raw" # ToDo "madissoon19_lung.processed.h5ad" is close to integer but not quire (~1e-4) + self.organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ + "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" + self.organism = "human" + self.protocol = "10X sequencing" + self.state_exact = "healthy" + self.year = 2019 + + self.var_symbol_col = "index" + self.obs_key_cellontology_original = "Celltypes" + + self.set_dataset_id(idx=1) + + def _load(self): + fn = os.path.join(self.data_dir, self.sample_fn) + adata = anndata.read(fn) + if self.sample_fn != "madissoon19_lung.processed.h5ad": + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + # Cell type column called differently in madissoon19_lung.processed.h5ad: + if self.sample_fn == "madissoon19_lung.processed.h5ad": + adata.obs["Celltypes"] = adata.obs["CellType"] + del adata.obs["CellType"] + + self.set_unknown_class_id(ids=["B_T_doublet", "CD34_progenitor", "Stroma"]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv new file mode 100644 index 000000000..dda6a1e33 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv @@ -0,0 +1,61 @@ +source target target_id +Alveolar_Type1 type I pneumocyte CL:0002062 +Alveolar_Type2 type II pneumocyte CL:0002063 +B_CD27neg B cell CL:0000236 +B_CD27pos B cell CL:0000236 +B_Hypermutation B cell CL:0000236 +B_cells B cell CL:0000236 +B_follicular follicular B cell CL:0000843 +B_mantle B cell CL:0000236 +Blood_vessel blood vessel endothelial cell CL:0000071 +Ciliated ciliated cell CL:0000064 +DC_1 dendritic cell CL:0000451 +DC_2 dendritic cell CL:0000451 +DC_Monocyte_Dividing myeloid leukocyte CL:0000766 +DC_activated dendritic cell CL:0000451 +DC_plasmacytoid plasmacytoid dendritic cell CL:0000784 +Dendritic_Cells dendritic cell CL:0000451 +Epi_basal basal cell CL:0000646 +Epi_dividing epithelial cell CL:0000066 +Epi_stratified epithelial cell of stratum germinativum of esophagus CL:1000447 +Epi_suprabasal epithelial cell CL:0000066 +Epi_upper epithelial cell CL:0000066 +Fibroblast fibroblast CL:0000057 +Glands_duct glandular cell of esophagus CL:0002657 +Glands_mucous epithelial cell of esophagus CL:0002252 +ILC innate lymphoid cell CL:0001065 +Lymph_vessel endothelial cell of lymphatic vessel CL:0002138 +Macrophage macrophage CL:0000235 +Macrophage_Dividing macrophage CL:0000235 +Macrophage_MARCOneg macrophage CL:0000235 +Macrophage_MARCOpos macrophage CL:0000235 +Mast_cell mast cell CL:0000097 +Mast_cells mast cell CL:0000097 +Mono_macro myeloid leukocyte CL:0000766 +Monocyte monocyte CL:0000576 +Muscle_cells muscle cell CL:0000187 +NK natural killer cell CL:0000623 +NK_CD160pos CD16-positive, CD56-dim natural killer cell, human CL:0000939 +NK_Dividing natural killer cell CL:0000623 +NK_FCGR3Apos natural killer cell CL:0000623 +NK_T_CD8_Cytotoxic mature NK T cell CL:0000814 +NK_dividing natural killer cell CL:0000623 +Plasma_IgG IgG short lived plasma cell CL:0000977 +Plasma_IgM IgM plasma cell CL:0000986 +Plasma_cells plasma cell CL:0000786 +Plasmablast plasmablast CL:0000980 +Platelet platelet CL:0000233 +T_CD4 CD4-positive, alpha-beta T cell CL:0000624 +T_CD4_conv CD4-positive, alpha-beta T cell CL:0000624 +T_CD4_fh T follicular helper cell CL:0002038 +T_CD4_naive naive thymus-derived CD4-positive, alpha-beta T cell CL:0000895 +T_CD4_reg CD4-positive, CD25-positive, alpha-beta regulatory T cell CL:0000792 +T_CD8 CD8-positive, alpha-beta T cell CL:0000625 +T_CD8_CTL CD8-positive, alpha-beta cytotoxic T cell CL:0000794 +T_CD8_CytT CD8-positive, alpha-beta cytotoxic T cell CL:0000794 +T_CD8_MAIT mucosal invariant T cell CL:0000940 +T_CD8_activated activated CD8-positive, alpha-beta T cell, human CL:0001049 +T_CD8_gd CD8-alpha alpha positive, gamma-delta intraepithelial T cell CL:0000802 +T_cell_dividing T cell CL:0000084 +T_cells_Dividing T cell CL:0000084 +T_regulatory regulatory T cell CL:0000815 diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py similarity index 55% rename from sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py rename to sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index 439e5e720..1a8e5b7ab 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_eye_2019_10x_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -17,12 +17,11 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_eye_2019_10x_lukowski_001_10.15252/embj.2018100811" self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" self.download_url_meta = None - self.author = "Wong" + self.author = "Lukowski" self.doi = "10.15252/embj.2018100811" self.healthy = True self.normalization = "raw" @@ -34,27 +33,9 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.obs_key_cellontology_original = "CellType" - self.class_maps = { - "0": { - "Muller cell": "Muller cell", - "amacrine cell": "Amacrine cell", - "microglial cell": "Microglia", - "retinal bipolar neuron type A": "Retinal bipolar neuron type A", - "retinal bipolar neuron type B": "Retinal bipolar neuron type B", - "retinal bipolar neuron type C": "Retinal bipolar neuron type C", - "retinal bipolar neuron type D": "Retinal bipolar neuron type D", - "retinal cone cell": "Retinal cone cell", - "retinal ganglion cell": "Retinal ganglion cell", - "retinal rod cell type A": "Retinal rod cell type A", - "retinal rod cell type B": "Retinal rod cell type B", - "retinal rod cell type C": "Retinal rod cell type C", - "unannotated": "Unknown", - "unspecified": "Unknown", - }, - } + self.set_dataset_id(idx=1) def _load(self): fn = os.path.join(self.data_dir, "lukowski19.processed.h5ad") @@ -62,4 +43,6 @@ def _load(self): adata.X = np.expm1(adata.X) adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + self.set_unknown_class_id(ids=["unannotated", "unspecified"]) + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv new file mode 100644 index 000000000..ffa68e820 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv @@ -0,0 +1,13 @@ +source target target_id +Muller cell Mueller cell CL:0000636 +amacrine cell amacrine cell CL:0000561 +microglial cell microglial cell CL:0000129 +retinal bipolar neuron type A retinal bipolar neuron CL:0000748 +retinal bipolar neuron type B retinal bipolar neuron CL:0000748 +retinal bipolar neuron type C retinal bipolar neuron CL:0000748 +retinal bipolar neuron type D retinal bipolar neuron CL:0000748 +retinal cone cell retinal cone cell CL:0000573 +retinal ganglion cell retinal ganglion cell CL:0000740 +retinal rod cell type A retinal rod cell CL:0000604 +retinal rod cell type B retinal rod cell CL:0000604 +retinal rod cell type C retinal rod cell CL:0000604 diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py similarity index 96% rename from sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py rename to sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index f4cbf2948..872a023ba 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10x_10xGenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -18,8 +18,6 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.id = "human_blood_2019_10x_10xGenomics_001_unknown" - self.download_url_data = \ "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.download_url_meta = None @@ -37,6 +35,8 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" + self.set_dataset_id(idx=1) + def _load(self): fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5") with tables.open_file(str(fn), 'r') as f: diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py similarity index 72% rename from sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py rename to sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 298a7f2de..970c5207e 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10x_ica_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -20,26 +20,19 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__( - sample_id=sample_id, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - **kwargs - ) + super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, + cache_path=cache_path, **kwargs) self.obs_key_sample = "derived_organ_parts_label" - self.id = f"human_{'blood' if sample_id == 'umbilical cord blood' else 'bone'}_2018_10x_ica_" \ - f"{str(SAMPLE_IDS.index(self.sample_id)).zfill(3)}_unknown" self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/" \ "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" self.download_url_meta = None self.author = "Regev" - self.doi = "no_doi_10x_genomics" + self.doi = "no_doi_regev" self.healthy = True self.normalization = "raw" - self.organ = "blood" if sample_id == "umbilical cord blood" else "bone marrow" + self.organ = sample_id self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" @@ -48,6 +41,8 @@ def __init__( self.var_symbol_col = "index" self.var_ensembl_col = "Accession" + self.set_dataset_id(idx=1) + def _load_full(self): fn = os.path.join(self.data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") adata = anndata.read_loom(fn) diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py index 3c87e58dc..6a10241ba 100644 --- a/sfaira/data/dataloaders/loaders/super_group.py +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -1,12 +1,14 @@ import pydoc import os -from typing import Union +from typing import List, Union from warnings import warn from sfaira.data import DatasetSuperGroup, DatasetGroupDirectoryOriented class DatasetSuperGroupLoaders(DatasetSuperGroup): + dataset_groups: List[DatasetGroupDirectoryOriented] + def __init__( self, data_path: Union[str, None] = None, diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py index 5c3c1cfba..e49093535 100644 --- a/sfaira/data/dataloaders/super_group.py +++ b/sfaira/data/dataloaders/super_group.py @@ -39,7 +39,7 @@ def __init__( ) ] if sfairae is not None: - dsgs.append(sfairae.data.loaders.DatasetSuperGroupLoaders( + dsgs.append(sfairae.data.dataloaders.loaders.DatasetSuperGroupLoaders( data_path=data_path, meta_path=meta_path, cache_path=cache_path, diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index a45ba4f0b..a0b970826 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -1,5 +1,6 @@ from typing import Dict, List, Union +from sfaira.consts import OntologyContainerSfaira from sfaira.versions.metadata import CelltypeUniverse @@ -45,7 +46,13 @@ def map_celltype_to_ontology( """ if isinstance(queries, str): queries = [queries] - cu = CelltypeUniverse(organism=organism, **kwargs) + oc = OntologyContainerSfaira() + cu = CelltypeUniverse( + cl=oc.ontology_cell_types, + uberon=oc.ontology_organ, + organism=organism, + **kwargs + ) matches_to_return = {} matches = cu.prepare_celltype_map_fuzzy( source=queries, diff --git a/sfaira/data/utils_scripts/clean_celltype_maps_global.py b/sfaira/data/utils_scripts/clean_celltype_maps_global.py new file mode 100644 index 000000000..cdcdf9caf --- /dev/null +++ b/sfaira/data/utils_scripts/clean_celltype_maps_global.py @@ -0,0 +1,25 @@ +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders + +print(tf.__version__) + +# Set global variables. +print("sys.argv", sys.argv) + +data_path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +processes = int(str(sys.argv[4])) + +dsgl = DatasetSuperGroupLoaders( + data_path=data_path, + meta_path=path_meta, + cache_path=path_cache +) + +for x in dsgl.dataset_groups: + print(x.ids) + x.clean_ontology_class_map() diff --git a/sfaira/data/utils_scripts/create_celltype_maps.py b/sfaira/data/utils_scripts/create_celltype_maps_global.py similarity index 99% rename from sfaira/data/utils_scripts/create_celltype_maps.py rename to sfaira/data/utils_scripts/create_celltype_maps_global.py index f7aa7dd55..fc77f0c9f 100644 --- a/sfaira/data/utils_scripts/create_celltype_maps.py +++ b/sfaira/data/utils_scripts/create_celltype_maps_global.py @@ -94,7 +94,7 @@ pass else: dsg_f.write_ontology_class_map( - fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".csv"), + fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".tsv"), protected_writing=True, n_suggest=4, ) diff --git a/sfaira/data/utils_scripts/create_celltype_maps_selected.py b/sfaira/data/utils_scripts/create_celltype_maps_selected.py new file mode 100644 index 000000000..ee839cb7a --- /dev/null +++ b/sfaira/data/utils_scripts/create_celltype_maps_selected.py @@ -0,0 +1,87 @@ +import os +import pydoc +import sfaira +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021 import FILE_PATH + +print(tf.__version__) + +# Set global variables. +print("sys.argv", sys.argv) + +data_path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +processes = int(str(sys.argv[4])) +dir_study = str(sys.argv[5]) + +dir_sfaira_dataloaders = "/" + str(os.path.join(*str(os.path.dirname(FILE_PATH)).split("/")[:-1])) + +dir_prefix = "d" +dir_exlcude = [] + +print(dir_study) +if os.path.isdir(os.path.join(dir_sfaira_dataloaders, dir_study)): # only directories + # Narrow down to data set directories: + if dir_study[:len(dir_prefix)] == dir_prefix and dir_study not in dir_exlcude: + for f_dataset in os.listdir(os.path.join(dir_sfaira_dataloaders, dir_study)): + if os.path.isfile(os.path.join(dir_sfaira_dataloaders, dir_study, f_dataset)): # only files + print(f_dataset) + # Narrow down to data set files: + if f_dataset.split(".")[-1] == "py" and \ + f_dataset.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(str(f_dataset).split(".")[:-1]) + DatasetFound = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".Dataset") + # Check if global objects are available: + # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles + # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + sample_fns = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_FNS") + sample_ids = pydoc.locate( + "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_IDS") + if sample_fns is not None and sample_ids is None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_fn=x, + data_path=data_path, + meta_path=path_meta, + cache_path=path_cache + ) + for x in sample_fns + ] + elif sample_fns is None and sample_ids is not None: + # DatasetBaseGroupLoadingManyFiles: + datasets_f = [ + DatasetFound( + sample_id=x, + data_path=data_path, + meta_path=path_meta, + cache_path=path_cache + ) + for x in sample_ids + ] + elif sample_fns is not None and sample_ids is not None: + raise ValueError(f"sample_fns and sample_ids both found for {f_dataset}") + else: + datasets_f = [DatasetFound( + data_path=data_path, + meta_path=path_meta, + cache_path=path_cache + )] + dsg_f = sfaira.data.DatasetGroup(datasets=dict([(x.id, x) for x in datasets_f])) + dsg_f.load( + load_raw=False, + allow_caching=True, + match_to_reference=False, + remove_gene_version=False, + ) + dsg_f.write_ontology_class_map( + fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".tsv"), + protected_writing=True, + n_suggest=4, + ) diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 68f540bdb..1c954dbea 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -14,7 +14,7 @@ def write_meta(args0, args1): load_raw=False, allow_caching=False, ) - args0.write_ontology_class_map(fn=args0.fn_ontology_class_map_csv) + args0.write_ontology_class_map(fn=args0.fn_ontology_class_map_tsv) return None diff --git a/sfaira/estimators/callbacks.py b/sfaira/estimators/callbacks.py index 690c3d29f..6c678976e 100644 --- a/sfaira/estimators/callbacks.py +++ b/sfaira/estimators/callbacks.py @@ -1,4 +1,7 @@ -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None import numpy as np diff --git a/sfaira/estimators/external.py b/sfaira/estimators/external.py deleted file mode 100644 index 70d179770..000000000 --- a/sfaira/estimators/external.py +++ /dev/null @@ -1,4 +0,0 @@ -from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.topology_versions import Topologies -from sfaira.models.base import BasicModel diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index aa21191b0..59615c2db 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -4,12 +4,18 @@ import numpy as np import pandas import scipy.sparse -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import Union import os import warnings from tqdm import tqdm -from .external import CelltypeUniverse, Topologies, BasicModel + +from sfaira.models import BasicModel +from sfaira.versions.metadata import CelltypeUniverse +from sfaira.versions.topology_versions import Topologies from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ CustomAccAgg, CustomF1Classwise, CustomFprClasswise, CustomTprClasswise, custom_cce_agg @@ -350,21 +356,22 @@ def train( } # Set callbacks. - cbs = [ - tf.keras.callbacks.EarlyStopping( + cbs = [] + if patience is not None and patience > 0: + cbs.append(tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=patience, restore_best_weights=True, verbose=verbose - ), - tf.keras.callbacks.ReduceLROnPlateau( + )) + if lr_schedule_factor is not None and lr_schedule_factor < 1.: + cbs.append(tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=lr_schedule_factor, patience=lr_schedule_patience, min_lr=lr_schedule_min_lr, verbose=verbose - ) - ] + )) if log_dir is not None: cbs.append(tf.keras.callbacks.TensorBoard( log_dir=log_dir, @@ -899,6 +906,7 @@ def __init__( cache_path=cache_path ) self.max_class_weight = max_class_weight + self.celltypes_version = CelltypeUniverse(organism=organism) def init_model( self, @@ -918,27 +926,22 @@ def init_model( raise ValueError('unknown topology %s for EstimatorKerasCelltype' % self.model_type) self.model = Model( - organism=self.organism, - organ=self.organ, + celltypes_version=self.celltypes_version, topology_container=self.topology_container, override_hyperpar=override_hyperpar ) @property def ids(self): - return self.model.celltypes_version.ids + return self.celltypes_version.target_universe @property def ntypes(self): - return self.model.celltypes_version.ntypes + return self.celltypes_version.ntypes @property def ontology_ids(self): - return self.model.celltypes_version.ontology_ids - - @property - def ontology(self): - return self.model.celltypes_version.ontology[self.model.celltypes_version.version] + return self.celltypes_version.target_universe def _get_celltype_out( self, diff --git a/sfaira/estimators/losses.py b/sfaira/estimators/losses.py index ad46d3ef9..38765fc80 100644 --- a/sfaira/estimators/losses.py +++ b/sfaira/estimators/losses.py @@ -1,6 +1,8 @@ -import abc -import tensorflow as tf import numpy as np +try: + import tensorflow as tf +except ImportError: + tf = None class LossLoglikelihoodNb(tf.keras.losses.Loss): diff --git a/sfaira/estimators/metrics.py b/sfaira/estimators/metrics.py index 864eb0134..f075b9da5 100644 --- a/sfaira/estimators/metrics.py +++ b/sfaira/estimators/metrics.py @@ -1,6 +1,8 @@ -import abc import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None def custom_mse(y_true, y_pred, sample_weight=None): diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 71c2ae34a..0b4549b7e 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -8,6 +8,7 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse +from sfaira.consts import OntologyContainerSfaira from sfaira.versions.topology_versions import Topologies @@ -34,6 +35,7 @@ def __init__( """ :param model_lookuptable: model_lookuptable. """ + self._ontology_container_sfaira = OntologyContainerSfaira() if model_lookuptable is not None: # check if models in repository self.ontology = self.load_ontology_from_model_ids(model_lookuptable['model_id'].values) self.model_id = None @@ -472,4 +474,8 @@ def set_latest( model_type=self.model_type, topology_id=self.model_topology ) - self.celltypes = CelltypeUniverse(organism=self.organism).load_target_universe(organ=self.organ) + self.celltypes = CelltypeUniverse( + cl=self._ontology_container_sfaira.ontology_cell_types, + uberon=self._ontology_container_sfaira.ontology_organ, + organism=self.organism + ).load_target_universe(organ=self.organ) diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index 318937651..2b67191e8 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -1,8 +1,11 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union -import sfaira.versions.metadata as metadata +from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -95,12 +98,10 @@ def __init__( class CellTypeMarkerVersioned(CellTypeMarker): - cell_type_version: metadata.CelltypeUniverse def __init__( self, - organism: str, - organ: str, + celltypes_version: CelltypeUniverse, topology_container: Topologies, override_hyperpar: Union[dict, None] = None ): @@ -113,14 +114,14 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) + unkown_already_included = np.any([x.lower() == "unknown" for x in celltypes_version.target_universe]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( in_dim=topology_container.ngenes, - out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, + out_dim=celltypes_version.ntypes if unkown_already_included else celltypes_version.ntypes + 1, **hyperpar ) print('passed hyperpar: \n', hyperpar) @@ -135,7 +136,6 @@ def __init__( ("genome_size", self.genome_size), ("model_class", self.model_class), ("model_type", self.model_type), - ("ntypes", self.celltypes_version.ntypes), - ("celltypes_version", self.celltypes_version.version) + ("ntypes", celltypes_version.ntypes), ] ) diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index fceb9aa55..59d998052 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -1,8 +1,11 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union -import sfaira.versions.metadata as metadata +from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topology_versions import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -70,12 +73,10 @@ def __init__( class CellTypeMlpVersioned(CellTypeMlp): - cell_type_version: metadata.CelltypeUniverse def __init__( self, - organism: str, - organ: str, + celltypes_version: CelltypeUniverse, topology_container: Topologies, override_hyperpar: Union[dict, None] = None ): @@ -88,14 +89,14 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - unkown_already_included = np.any([x.lower() == "unknown" for x in self.celltypes_version.ids]) + unkown_already_included = np.any([x.lower() == "unknown" for x in celltypes_version.target_universe]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( in_dim=topology_container.ngenes, - out_dim=self.celltypes_version.ntypes if unkown_already_included else self.celltypes_version.ntypes + 1, + out_dim=celltypes_version.ntypes if unkown_already_included else celltypes_version.ntypes + 1, **hyperpar ) print('passed hyperpar: \n', hyperpar) @@ -110,7 +111,6 @@ def __init__( ("genome_size", self.genome_size), ("model_class", self.model_class), ("model_type", self.model_type), - ("ntypes", self.celltypes_version.ntypes), - ("celltypes_version", self.celltypes_version.version) + ("ntypes", celltypes_version.ntypes), ] ) diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index 44b6a6649..b0ac37e08 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -1,5 +1,8 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union, Tuple from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 60da08920..8a8d91790 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -1,5 +1,8 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ diff --git a/sfaira/models/embedding/output_layers.py b/sfaira/models/embedding/output_layers.py index 65ac4a56a..1c7e699b0 100644 --- a/sfaira/models/embedding/output_layers.py +++ b/sfaira/models/embedding/output_layers.py @@ -1,4 +1,7 @@ -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None class NegBinOutput(tf.keras.layers.Layer): diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index 433372226..21008ecc1 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -1,5 +1,8 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union, Tuple from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 3dacf965b..ece67115b 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -1,5 +1,8 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import Union, Tuple from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index 6edace92a..b92369c45 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -1,5 +1,8 @@ import numpy as np -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None from typing import List, Union, Tuple from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ diff --git a/sfaira/models/made.py b/sfaira/models/made.py index eec724905..67b6ec873 100644 --- a/sfaira/models/made.py +++ b/sfaira/models/made.py @@ -2,15 +2,14 @@ import numpy as np -import tensorflow.keras.backend as K -from tensorflow.keras.layers import Layer, BatchNormalization -from tensorflow.keras import initializers -from tensorflow.keras import activations -from tensorflow.keras import regularizers -from tensorflow.keras import constraints +try: + import tensorflow as tf +except ImportError: + tf = None +# ToDo: we are using a lot of tf.keras.backend modules below, can we use tf core instead? -class MaskingDense(Layer): +class MaskingDense(tf.keras.layers.Layer): """ Just copied code from keras Dense layer and added masking and a few other tricks: - Direct auto-regressive connections to output - Allows a second (non-autoregressive) input that is fully connected to first hidden @@ -47,25 +46,24 @@ def __init__(self, units, out_units, self.units = units self.out_units = out_units self.hidden_layers = hidden_layers - self.activation = activations.get(activation) - self.out_activation = activations.get(out_activation) # None gives linear activation - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.out_kernel_initializer = initializers.get(out_kernel_initializer) - self.out_bias_initializer = initializers.get(out_bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) + self.activation = tf.keras.activations.get(activation) + self.out_activation = tf.keras.activations.get(out_activation) # None gives linear activation + self.kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self.bias_initializer = tf.keras.initializers.get(bias_initializer) + self.out_kernel_initializer = tf.keras.initializers.get(out_kernel_initializer) + self.out_bias_initializer = tf.keras.initializers.get(out_bias_initializer) + self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer) + self.activity_regularizer = tf.keras.regularizers.get(activity_regularizer) + self.kernel_constraint = tf.keras.constraints.get(kernel_constraint) + self.bias_constraint = tf.keras.constraints.get(bias_constraint) self.batchnorm = batchnorm def dropout_wrapper(self, inputs, training): if 0. < self.rate < 1.: def dropped_inputs(): - return K.dropout(inputs, self.rate, noise_shape=None, seed=None) - return K.in_train_phase(dropped_inputs, inputs, - training=training) + return tf.keras.backend.dropout(inputs, self.rate, noise_shape=None, seed=None) + return tf.keras.backend.in_train_phase(dropped_inputs, inputs, training=training) return inputs @@ -119,7 +117,7 @@ def vals(): else: yield 1 if prev_sel[x] <= input_sel[y] else 0 - return K.constant(list(vals()), dtype='float32', shape=shape), input_sel + return tf.keras.backend.constant(list(vals()), dtype='float32', shape=shape), input_sel def build(self, input_shape): if isinstance(input_shape, list): @@ -156,7 +154,7 @@ def build(self, input_shape): prev_sel = kernel_sel shape = (self.units, self.units) - self.batch_norms.append(BatchNormalization(center=True, scale=True)) + self.batch_norms.append(tf.keras.layers.BatchNormalization(center=True, scale=True)) # Direct connection between input/output if self.hidden_layers > 0: @@ -190,8 +188,8 @@ def call(self, inputs, training=None): output = inputs if other_input is not None: - other = K.dot(other_input, self.other_kernel) - other = K.bias_add(other, self.other_bias) + other = tf.keras.backend.dot(other_input, self.other_kernel) + other = tf.keras.backend.bias_add(other, self.other_bias) other = self.activation(other) # Hidden layer + mask @@ -199,13 +197,13 @@ def call(self, inputs, training=None): # i=0: input_dim -> masking_dim # i>0: masking_dim -> masking_dim weight = self.kernels[i] * self.kernel_masks[i] - output = K.dot(output, weight) + output = tf.keras.backend.dot(output, weight) # "other" input if i == 0 and other_input is not None: output = output + other - output = K.bias_add(output, self.biases[i]) + output = tf.keras.backend.bias_add(output, self.biases[i]) output = self.activation(output) if self.batchnorm: output = self.batch_norms[i](output) @@ -213,15 +211,15 @@ def call(self, inputs, training=None): # out_act(bias + (V dot M_v)h(x) + (A dot M_a)x + (other dot M_other)other) # masking_dim -> input_dim - output = K.dot(output, self.out_kernel * self.out_kernel_mask) + output = tf.keras.backend.dot(output, self.out_kernel * self.out_kernel_mask) # Direct connection if self.hidden_layers > 0: # input_dim -> input_dim - direct = K.dot(inputs, self.direct_kernel * self.direct_kernel_mask) + direct = tf.keras.backend.dot(inputs, self.direct_kernel * self.direct_kernel_mask) output = output + direct - output = K.bias_add(output, self.out_bias) + output = tf.keras.backend.bias_add(output, self.out_bias) output = self.out_activation(output) output = self.dropout_wrapper(output, training) diff --git a/sfaira/models/pp_layer.py b/sfaira/models/pp_layer.py index 0e7300e23..f1dcee25f 100644 --- a/sfaira/models/pp_layer.py +++ b/sfaira/models/pp_layer.py @@ -1,4 +1,7 @@ -import tensorflow as tf +try: + import tensorflow as tf +except ImportError: + tf = None class PreprocInput(tf.keras.layers.Layer): diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index aab593f21..742be3464 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -7,6 +7,7 @@ from typing import Union, List import os +from sfaira.versions.metadata import CelltypeUniverse from sfaira.train.train_model import TargetZoos from sfaira.estimators import EstimatorKerasEmbedding @@ -821,7 +822,6 @@ def plot_best_classwise_heatmap( organ: str, organism: str, datapath: str, - celltype_version: str = "0", partition_select: str = "val", metric_select: str = "custom_cce_agg", metric_show: str = "f1", @@ -836,7 +836,6 @@ def plot_best_classwise_heatmap( :param organ: Organ to plot in heatmap. :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository - :param celltype_version: Version in sfaira celltype universe :param partition_select: Based on which partition to select the best model - train - val @@ -876,7 +875,7 @@ def plot_best_classwise_heatmap( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - tz = TargetZoos(path=datapath) + tz = TargetZoos(data_path=datapath) if organism == "human": dataset = tz.data_human[organ] elif organism == "mouse": @@ -885,23 +884,18 @@ def plot_best_classwise_heatmap( raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) dataset.load() - raise NotImplementedError("deprecated metadata code here") - """ cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = ORGANISM_DICT.copy() - celltype_versions[organism][organ].set_version(celltype_version) - leafnodes = celltype_versions[organism][organ].ids - ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] - celltypelist = list(cell_counts.keys()).copy() + cu = CelltypeUniverse(organism=organism) + # TODO set target universe. for k in celltypelist: - if k not in leafnodes: - if k not in ontology.keys(): + if k not in cu.target_universe: + if k not in cu.ontology.node_names: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in ontology[k]: + for leaf in cu[k]: # TODO get leaves if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1 / len(ontology[k]) + cell_counts[leaf] += 1 / len(cu[k]) # TODO get leaves del cell_counts[k] # Compute class-wise metrics @@ -978,14 +972,12 @@ def plot_best_classwise_heatmap( cbar=False ) return fig, axs, sns_data_heatmap - """ def plot_best_classwise_scatter( self, organ: str, organism: str, datapath: str, - celltype_version: str = "0", partition_select: str = "val", metric_select: str = "custom_cce_agg", metric_show: str = "f1", @@ -1044,7 +1036,7 @@ def plot_best_classwise_scatter( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - tz = TargetZoos(path=datapath) + tz = TargetZoos(data_path=datapath) if organism == "human": dataset = tz.data_human[organ] elif organism == "mouse": @@ -1053,23 +1045,18 @@ def plot_best_classwise_scatter( raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) dataset.load() - raise NotImplementedError("deprecated metadata code here") - """ cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() - celltype_versions = ORGANISM_DICT.copy() - celltype_versions[organism][organ].set_version(celltype_version) - leafnodes = celltype_versions[organism][organ].ids - ontology = celltype_versions[organism][organ].ontology[celltype_version]["names"] - celltypelist = list(cell_counts.keys()).copy() + cu = CelltypeUniverse(organism=organism) + # TODO set target universe. for k in celltypelist: - if k not in leafnodes: - if k not in ontology.keys(): + if k not in cu.target_universe: + if k not in cu.ontology.node_names: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in ontology[k]: + for leaf in cu[k]: # TODO get leaves if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1 / len(ontology[k]) + cell_counts[leaf] += 1 / len(cu[k]) # TODO get leaves del cell_counts[k] # Compute class-wise metrics @@ -1115,7 +1102,7 @@ def plot_best_classwise_scatter( if c in cell_counts.keys(): n_cells.append(np.round(cell_counts[c])) else: - warnings.warn(f"Celltype {c} from cell ontology not found in {organism} {organ} dataset") + warnings.warn(f"Celltype {c} from cell cu not found in {organism} {organ} dataset") n_cells.append(np.nan) n_cells = np.array(n_cells)[:, None] sns_data_scatter = pandas.DataFrame( @@ -1153,7 +1140,6 @@ def plot_best_classwise_scatter( ) return fig, axs, sns_data_scatter - """ class SummarizeGridsearchEmbedding(GridsearchContainer): @@ -1388,7 +1374,7 @@ def get_gradients_by_celltype( else: print('Compute gradients (1/3): load data') # load data - tz = TargetZoos(path=datapath) + tz = TargetZoos(data_path=datapath) if organism == "human": dataset = tz.data_human[organ] elif organism == "mouse": diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index e95b20a55..fa845dbb0 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -16,88 +16,88 @@ class TargetZoos: Parameters ---------- - path : str + data_path : str Path to the files for this dataset on disk meta_path : str Path to the meta files for this dataset on disk """ - def __init__(self, path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None): - if path is not None: + def __init__(self, data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None): + if data_path is not None: from sfaira.data.dataloaders.anatomical_groups import mouse, human self.data_mouse = { - "bladder": mouse.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), - "brain": mouse.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), - "diaphragm": mouse.DatasetGroupDiaphragm(path=path, meta_path=meta_path, cache_path=cache_path), - "adipose": mouse.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), - "heart": mouse.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), - "kidney": mouse.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), - "colon": mouse.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), - "muscle": mouse.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), - "liver": mouse.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), - "lung": mouse.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), - "mammarygland": mouse.DatasetGroupMammaryGland(path=path, meta_path=meta_path, cache_path=cache_path), - "bone": mouse.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), - "femalegonad": mouse.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), - "pancreas": mouse.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), - "blood": mouse.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), - "placenta": mouse.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), - "prostate": mouse.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), - "rib": mouse.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), - "skin": mouse.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), - "ileum": mouse.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), - "spleen": mouse.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), - "stomach": mouse.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), - "malegonad": mouse.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), - "thymus": mouse.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), - "tongue": mouse.DatasetGroupTongue(path=path, meta_path=meta_path, cache_path=cache_path), - "trachea": mouse.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), - "uterus": mouse.DatasetGroupUterus(path=path, cache_path=cache_path), + "bladder": mouse.DatasetGroupBladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "brain": mouse.DatasetGroupBrain(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "diaphragm": mouse.DatasetGroupDiaphragm(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "adipose": mouse.DatasetGroupAdipose(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "heart": mouse.DatasetGroupHeart(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "kidney": mouse.DatasetGroupKidney(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "colon": mouse.DatasetGroupColon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "muscle": mouse.DatasetGroupMuscle(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "liver": mouse.DatasetGroupLiver(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "lung": mouse.DatasetGroupLung(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "mammarygland": mouse.DatasetGroupMammaryGland(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "bone": mouse.DatasetGroupBone(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "femalegonad": mouse.DatasetGroupFemalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "pancreas": mouse.DatasetGroupPancreas(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "blood": mouse.DatasetGroupBlood(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "placenta": mouse.DatasetGroupPlacenta(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "prostate": mouse.DatasetGroupProstate(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "rib": mouse.DatasetGroupRib(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "skin": mouse.DatasetGroupSkin(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "ileum": mouse.DatasetGroupIleum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "spleen": mouse.DatasetGroupSpleen(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "stomach": mouse.DatasetGroupStomach(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "malegonad": mouse.DatasetGroupMalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "thymus": mouse.DatasetGroupThymus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "tongue": mouse.DatasetGroupTongue(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "trachea": mouse.DatasetGroupTrachea(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + "uterus": mouse.DatasetGroupUterus(data_path=data_path, cache_path=cache_path), } self.data_human = { - 'adipose': human.DatasetGroupAdipose(path=path, meta_path=meta_path, cache_path=cache_path), - 'adrenalgland': human.DatasetGroupAdrenalgland(path=path, meta_path=meta_path, cache_path=cache_path), - 'artery': human.DatasetGroupArtery(path=path, meta_path=meta_path, cache_path=cache_path), - 'bladder': human.DatasetGroupBladder(path=path, meta_path=meta_path, cache_path=cache_path), - 'blood': human.DatasetGroupBlood(path=path, meta_path=meta_path, cache_path=cache_path), - 'bone': human.DatasetGroupBone(path=path, meta_path=meta_path, cache_path=cache_path), - 'brain': human.DatasetGroupBrain(path=path, meta_path=meta_path, cache_path=cache_path), - 'calvaria': human.DatasetGroupCalvaria(path=path, meta_path=meta_path, cache_path=cache_path), - 'cervix': human.DatasetGroupCervix(path=path, meta_path=meta_path, cache_path=cache_path), - 'chorionicvillus': human.DatasetGroupChorionicvillus(path=path, meta_path=meta_path, cache_path=cache_path), - 'colon': human.DatasetGroupColon(path=path, meta_path=meta_path, cache_path=cache_path), - 'duodenum': human.DatasetGroupDuodenum(path=path, meta_path=meta_path, cache_path=cache_path), - 'epityphlon': human.DatasetGroupEpityphlon(path=path, meta_path=meta_path, cache_path=cache_path), - 'esophagus': human.DatasetGroupEsophagus(path=path, meta_path=meta_path, cache_path=cache_path), - 'eye': human.DatasetGroupEye(path=path, meta_path=meta_path, cache_path=cache_path), - 'fallopiantube': human.DatasetGroupFallopiantube(path=path, meta_path=meta_path, cache_path=cache_path), - 'femalegonad': human.DatasetGroupFemalegonad(path=path, meta_path=meta_path, cache_path=cache_path), - 'gallbladder': human.DatasetGroupGallbladder(path=path, meta_path=meta_path, cache_path=cache_path), - 'heart': human.DatasetGroupHeart(path=path, meta_path=meta_path, cache_path=cache_path), - 'hesc': human.DatasetGroupHesc(path=path, meta_path=meta_path, cache_path=cache_path), - 'ileum': human.DatasetGroupIleum(path=path, meta_path=meta_path, cache_path=cache_path), - 'jejunum': human.DatasetGroupJejunum(path=path, meta_path=meta_path, cache_path=cache_path), - 'kidney': human.DatasetGroupKidney(path=path, meta_path=meta_path, cache_path=cache_path), - 'liver': human.DatasetGroupLiver(path=path, meta_path=meta_path, cache_path=cache_path), - 'lung': human.DatasetGroupLung(path=path, meta_path=meta_path, cache_path=cache_path), - 'malegonad': human.DatasetGroupMalegonad(path=path, meta_path=meta_path, cache_path=cache_path), - 'muscle': human.DatasetGroupMuscle(path=path, meta_path=meta_path, cache_path=cache_path), - 'omentum': human.DatasetGroupOmentum(path=path, meta_path=meta_path, cache_path=cache_path), - 'pancreas': human.DatasetGroupPancreas(path=path, meta_path=meta_path, cache_path=cache_path), - 'placenta': human.DatasetGroupPlacenta(path=path, meta_path=meta_path, cache_path=cache_path), - 'pleura': human.DatasetGroupPleura(path=path, meta_path=meta_path, cache_path=cache_path), - 'prostate': human.DatasetGroupProstate(path=path, meta_path=meta_path, cache_path=cache_path), - 'rectum': human.DatasetGroupRectum(path=path, meta_path=meta_path, cache_path=cache_path), - 'rib': human.DatasetGroupRib(path=path, meta_path=meta_path, cache_path=cache_path), - 'skin': human.DatasetGroupSkin(path=path, meta_path=meta_path, cache_path=cache_path), - 'spinalcord': human.DatasetGroupSpinalcord(path=path, meta_path=meta_path, cache_path=cache_path), - 'spleen': human.DatasetGroupSpleen(path=path, meta_path=meta_path, cache_path=cache_path), - 'stomach': human.DatasetGroupStomach(path=path, meta_path=meta_path, cache_path=cache_path), - 'thymus': human.DatasetGroupThymus(path=path, meta_path=meta_path, cache_path=cache_path), - 'thyroid': human.DatasetGroupThyroid(path=path, meta_path=meta_path, cache_path=cache_path), - 'trachea': human.DatasetGroupTrachea(path=path, meta_path=meta_path, cache_path=cache_path), - 'ureter': human.DatasetGroupUreter(path=path, meta_path=meta_path, cache_path=cache_path), - 'uterus': human.DatasetGroupUterus(path=path, meta_path=meta_path, cache_path=cache_path), + 'adipose': human.DatasetGroupAdipose(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'adrenalgland': human.DatasetGroupAdrenalgland(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'artery': human.DatasetGroupArtery(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'bladder': human.DatasetGroupBladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'blood': human.DatasetGroupBlood(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'bone': human.DatasetGroupBone(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'brain': human.DatasetGroupBrain(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'calvaria': human.DatasetGroupCalvaria(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'cervix': human.DatasetGroupCervix(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'chorionicvillus': human.DatasetGroupChorionicvillus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'colon': human.DatasetGroupColon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'duodenum': human.DatasetGroupDuodenum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'epityphlon': human.DatasetGroupEpityphlon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'esophagus': human.DatasetGroupEsophagus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'eye': human.DatasetGroupEye(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'fallopiantube': human.DatasetGroupFallopiantube(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'femalegonad': human.DatasetGroupFemalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'gallbladder': human.DatasetGroupGallbladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'heart': human.DatasetGroupHeart(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'hesc': human.DatasetGroupHesc(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'ileum': human.DatasetGroupIleum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'jejunum': human.DatasetGroupJejunum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'kidney': human.DatasetGroupKidney(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'liver': human.DatasetGroupLiver(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'lung': human.DatasetGroupLung(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'malegonad': human.DatasetGroupMalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'muscle': human.DatasetGroupMuscle(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'omentum': human.DatasetGroupOmentum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'pancreas': human.DatasetGroupPancreas(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'placenta': human.DatasetGroupPlacenta(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'pleura': human.DatasetGroupPleura(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'prostate': human.DatasetGroupProstate(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'rectum': human.DatasetGroupRectum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'rib': human.DatasetGroupRib(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'skin': human.DatasetGroupSkin(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'spinalcord': human.DatasetGroupSpinalcord(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'spleen': human.DatasetGroupSpleen(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'stomach': human.DatasetGroupStomach(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'thymus': human.DatasetGroupThymus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'thyroid': human.DatasetGroupThyroid(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'trachea': human.DatasetGroupTrachea(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'ureter': human.DatasetGroupUreter(data_path=data_path, meta_path=meta_path, cache_path=cache_path), + 'uterus': human.DatasetGroupUterus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), } else: @@ -167,7 +167,7 @@ def __init__(self, data_path: str, meta_path: str): fn_backed_obs = ".".join(data_path.split(".")[:-1]) + "_obs.csv" self.data.obs = pd.read_csv(fn_backed_obs) else: - super(TrainModel, self).__init__(path=data_path, meta_path=meta_path) + super(TrainModel, self).__init__(data_path=data_path, meta_path=meta_path) self.data = None @abc.abstractmethod @@ -375,22 +375,16 @@ def _save_specific( with open(fn + '_ontology_names.pickle', 'wb') as f: pickle.dump(obj=self.estimator.ids, file=f) - raise NotImplementedError("fix celltype versions code here, deprecated") - """ cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() - celltype_versions[self.zoo.organism][self.zoo.organ].set_version(self.zoo.model_version.split(".")[0]) - leafnodes = celltype_versions[self.zoo.organism][self.zoo.organ].ids - ontology = celltype_versions[self.zoo.organism][self.zoo.organ].ontology[self.zoo.model_version.split(".")[0]]["names"] for k in cell_counts.keys(): - if k not in leafnodes: - if k not in ontology.keys(): + if k not in self.estimator.ids: + if k not in self.estimator.celltypes_version.ontology.node_ids: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in ontology[k]: + for leaf in self.estimator.celltypes_version.ontology.node_ids: if leaf not in cell_counts_leaf.keys(): cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1 / len(ontology[k]) + cell_counts_leaf[leaf] += 1 / len(self.estimator.celltypes_version.ontology.node_ids) del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) - """ diff --git a/sfaira/versions/metadata/human/__init__.py b/sfaira/unit_tests/__init__.py similarity index 100% rename from sfaira/versions/metadata/human/__init__.py rename to sfaira/unit_tests/__init__.py diff --git a/sfaira/unit_tests/data/__init__.py b/sfaira/unit_tests/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/data/test_clean_celltype_maps.py b/sfaira/unit_tests/data/test_clean_celltype_maps.py new file mode 100644 index 000000000..4ce259e08 --- /dev/null +++ b/sfaira/unit_tests/data/test_clean_celltype_maps.py @@ -0,0 +1,13 @@ +from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders + + +def test_map_celltype_to_ontology(): + # Paths do not matter here as data sets are not loaded for these operations. + dsgl = DatasetSuperGroupLoaders( + data_path="~", + meta_path="~", + cache_path="~" + ) + for x in dsgl.dataset_groups: + print(x.ids) + x.clean_ontology_class_map() diff --git a/sfaira/unit_tests/test_data_utils.py b/sfaira/unit_tests/data/test_data_utils.py similarity index 100% rename from sfaira/unit_tests/test_data_utils.py rename to sfaira/unit_tests/data/test_data_utils.py diff --git a/sfaira/unit_tests/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py similarity index 93% rename from sfaira/unit_tests/test_dataset.py rename to sfaira/unit_tests/data/test_dataset.py index 207a7e7cd..3f84c595b 100644 --- a/sfaira/unit_tests/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -7,9 +7,9 @@ from sfaira.data import DatasetSuperGroupSfaira -class TestDatasetGroups(unittest.TestCase): - dir_data: str = "./test_data" - dir_meta: str = "./test_data/meta" +class TestDatasetGroupSfaira(unittest.TestCase): + dir_data: str = "../test_data" + dir_meta: str = "../test_data/meta" def test_load(self): ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) @@ -25,8 +25,8 @@ def test_adata(self): class TestDatasetSuperGroups(unittest.TestCase): - dir_data: str = "./test_data" - dir_meta: str = "./test_data/meta" + dir_data: str = "../test_data" + dir_meta: str = "../test_data/meta" def test_load(self): ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) diff --git a/sfaira/unit_tests/data_contribution/__init__.py b/sfaira/unit_tests/data_contribution/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/conftest.py b/sfaira/unit_tests/data_contribution/conftest.py similarity index 100% rename from sfaira/unit_tests/conftest.py rename to sfaira/unit_tests/data_contribution/conftest.py diff --git a/sfaira/unit_tests/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py similarity index 96% rename from sfaira/unit_tests/test_data_template.py rename to sfaira/unit_tests/data_contribution/test_data_template.py index 352288c27..6702d7f19 100644 --- a/sfaira/unit_tests/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -121,16 +121,18 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. # function. dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) # Write this directly into sfaira installation so that it can be committed via git. + fn = os.path.join(cwd, file_module + ".tsv") dsg_f.write_ontology_class_map( - fn=os.path.join(cwd, file_module + ".csv"), + fn=fn, protected_writing=True, n_suggest=4, ) else: for k, v in ds.datasets.items(): # Write this directly into sfaira installation so that it can be committed via git. + fn = os.path.join("/".join(file_path.split("/")[:-1]), v.fn_ontology_class_map_tsv) v.write_ontology_class_map( - fn=os.path.join("/".join(file_path.split("/")[:-1]), v.fn_ontology_class_map_csv), + fn=fn, protected_writing=True, n_suggest=10, ) @@ -152,5 +154,6 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. load_raw=False, allow_caching=False ) + ds.clean_ontology_class_map() # Test concatenation: _ = ds.adata diff --git a/sfaira/unit_tests/estimators/__init__.py b/sfaira/unit_tests/estimators/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py similarity index 97% rename from sfaira/unit_tests/test_estimator.py rename to sfaira/unit_tests/estimators/test_estimator.py index 9dd68a194..35f42a704 100644 --- a/sfaira/unit_tests/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -5,8 +5,8 @@ from typing import Union import unittest -from sfaira.unit_tests.external import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.unit_tests.external import metadata, SuperGenomeContainer, Topologies +from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.versions.topology_versions import Topologies class _TestEstimator: diff --git a/sfaira/unit_tests/external.py b/sfaira/unit_tests/external.py deleted file mode 100644 index 179f7c4d3..000000000 --- a/sfaira/unit_tests/external.py +++ /dev/null @@ -1,6 +0,0 @@ -from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding -from sfaira.interface.user_interface import UserInterface -import sfaira.versions.metadata as celltype_versions -from sfaira.versions.genome_versions import SuperGenomeContainer -from sfaira.versions.topology_versions import Topologies diff --git a/sfaira/unit_tests/interface/__init__.py b/sfaira/unit_tests/interface/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/test_userinterface.py b/sfaira/unit_tests/interface/test_userinterface.py similarity index 91% rename from sfaira/unit_tests/test_userinterface.py rename to sfaira/unit_tests/interface/test_userinterface.py index aa99a8ee7..504e91984 100644 --- a/sfaira/unit_tests/test_userinterface.py +++ b/sfaira/unit_tests/interface/test_userinterface.py @@ -3,7 +3,7 @@ from typing import Union import unittest -from sfaira.unit_tests.external import UserInterface +from sfaira.interface import UserInterface class TestUi(unittest.TestCase): @@ -34,7 +34,7 @@ def test_basic(self): :return: """ - temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), 'test_data') + temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), '../test_data') self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) def _test_kipoi(self): @@ -44,7 +44,7 @@ def _test_kipoi(self): :return: """ - temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), 'test_data') + temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), '../test_data') self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) self.ui.compute_embedding_kipoi() diff --git a/sfaira/unit_tests/models/__init__.py b/sfaira/unit_tests/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/test_models.py b/sfaira/unit_tests/models/test_models.py similarity index 100% rename from sfaira/unit_tests/test_models.py rename to sfaira/unit_tests/models/test_models.py diff --git a/sfaira/unit_tests/test_celltype_universe.py b/sfaira/unit_tests/test_celltype_universe.py deleted file mode 100644 index 7838f64cd..000000000 --- a/sfaira/unit_tests/test_celltype_universe.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -import pandas as pd -import unittest - -from sfaira.versions.metadata import OntologyObo, ORGANISM_DICT - - -class TestCellTypeUniverse(unittest.TestCase): - dir_debugging = "~/Desktop/temp/" - dir_debugging2 = "~/Desktop/temp2/" - dir_debugging3 = "~/Desktop/temp3/" - - def test_debugging(self, reduced=False): - import csv - onto = OntologyObo() - for k, v in ORGANISM_DICT.items(): - for kk, vv in v.items(): - universe = vv.celltype_universe["0"] - tab = onto.find_nodes_fuzzy(universe, match_only=True) - if not np.all(tab["matched"].values): - tab2 = onto.find_nodes_fuzzy(universe, match_only=False, include_old=True, omit_list=["unkown"]) - if not reduced: - tab2.to_csv( - self.dir_debugging + k + "_" + kk + "_universe.csv", - index=False, quoting=csv.QUOTE_NONE, sep=";" - ) - else: - tab2.loc[tab["matched"].values is False].to_csv( - self.dir_debugging + k + "_" + kk + "_universe.csv", - index=False, quoting=csv.QUOTE_NONE - ) - - def test_debugging2(self): - import csv - onto = OntologyObo() - for k, v in ORGANISM_DICT.items(): - for kk, vv in v.items(): - names = list(vv.ontology["0"]["names"].keys()) - tab = onto.find_nodes_fuzzy(names, match_only=True) - if not np.all(tab["matched"].values): - tab = onto.find_nodes_fuzzy(names, match_only=False, include_old=True, omit_list=["unkown"]) - tab.to_csv( - self.dir_debugging2 + k + "_" + kk + "_universe.csv", - index=False, quoting=csv.QUOTE_NONE, sep=";" - ) - - def test_debugging3(self): - import csv - onto = OntologyObo() - tab = pd.DataFrame({"name,id": [",".join([x, y]) for x, y in zip( - [v["name"] for k, v in onto.graph.nodes.items()], - list(onto.graph.nodes.keys()) - )]}) - tab.to_csv( - self.dir_debugging3 + "onto_full.csv", - index=False, quoting=csv.QUOTE_NONE, sep=";" - ) - - def test_only(self): - onto = OntologyObo() - for k, v in ORGANISM_DICT.items(): - for kk, vv in v.items(): - universe = vv.celltype_universe["0"] - tab = onto.find_nodes_fuzzy(universe, match_only=True) - print(tab.loc[tab["matched"].values is False]) - assert np.all(tab["matched"].values), f"{k} {kk}" - - -if __name__ == '__main__': - unittest.main() diff --git a/sfaira/unit_tests/versions/__init__.py b/sfaira/unit_tests/versions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py new file mode 100644 index 000000000..46c4a4385 --- /dev/null +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -0,0 +1,10 @@ +from sfaira.versions.metadata import OntologyUberon, OntologyCelltypes, OntologyMmusdv, OntologyHsapdv, \ + OntologyHancestro + + +def test_cl(): + _ = OntologyCelltypes(branch="v2021-02-01") + + +def test_uberon(): + _ = OntologyUberon() diff --git a/sfaira/unit_tests/test_zoo.py b/sfaira/unit_tests/versions/test_zoo.py similarity index 92% rename from sfaira/unit_tests/test_zoo.py rename to sfaira/unit_tests/versions/test_zoo.py index f1f7db52c..9204117ae 100644 --- a/sfaira/unit_tests/test_zoo.py +++ b/sfaira/unit_tests/versions/test_zoo.py @@ -5,7 +5,7 @@ from typing import Union import unittest -from sfaira.unit_tests.external import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding class _TestZoo: @@ -65,7 +65,7 @@ class TestZooKerasEmbedding(unittest.TestCase, _TestZoo): def init_zoo(self): package_dir = str(os.path.dirname(os.path.abspath(__file__))) lookup_table = pd.read_csv( - os.path.join(package_dir, 'test_data', 'model_lookuptable.csv'), + os.path.join(package_dir, '../test_data', 'model_lookuptable.csv'), header=0, index_col=0 ) self.zoo = ModelZooEmbedding(model_lookuptable=lookup_table) @@ -83,7 +83,7 @@ class TestZooKerasCelltype(unittest.TestCase, _TestZoo): def init_zoo(self): package_dir = str(os.path.dirname(os.path.abspath(__file__))) lookup_table = pd.read_csv( - os.path.join(package_dir, 'test_data', 'model_lookuptable.csv'), + os.path.join(package_dir, '../test_data', 'model_lookuptable.csv'), header=0, index_col=0 ) self.zoo = ModelZooCelltype(model_lookuptable=lookup_table) diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index bb253513f..b221c7ed6 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,7 +1,4 @@ -from .base import Ontology, OntologyList, OntologyObo, CelltypeUniverse, \ +from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyObo, \ OntologyCelltypes, OntologyUberon, OntologyHancestro, OntologyHsapdv, OntologyMmusdv, \ OntologySinglecellLibraryConstruction -ONTOLOGY_UBERON = OntologyUberon() -ONTOLOGY_HSAPDV = OntologyHsapdv() -ONTOLOGY_MMUSDV = OntologyMmusdv() -ONTOLOGY_SLC = OntologySinglecellLibraryConstruction() +from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index a1a85008d..6571993dd 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -2,13 +2,15 @@ import networkx import numpy as np import obonet -import pandas as pd +import os import requests from typing import Dict, List, Tuple, Union import warnings from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE +FILE_PATH = __file__ + """ Ontology managament classes. @@ -491,14 +493,34 @@ class OntologyCelltypes(OntologyExtendedObo): def __init__( self, + branch: str, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/cl.obo") + # Identify cache: + ontology_cache_dir = os.path.join("/".join(FILE_PATH.split("/")[:-4]), "cache/ontologies/cl/") + fn = f"{branch}_cl.obo" + fn_path = os.path.join(ontology_cache_dir, fn) + # Download if necessary: + if not os.path.isfile(fn_path): + + def download_cl(): + url = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" + print(f"Downloading: {fn}") + if not os.path.exists(ontology_cache_dir): + os.makedirs(ontology_cache_dir) + r = requests.get(url, allow_redirects=True) + open(fn_path, 'wb').write(r.content) + + download_cl() + super().__init__(obo=fn_path) # Clean up nodes: nodes_to_delete = [] for k, v in self.graph.nodes.items(): - if "namespace" not in v.keys() or v["namespace"] != "cell": + # Some terms are not associated with the namespace cell but are cell types, + # we identify these based on their ID nomenclature here. + if ("namespace" in v.keys() and v["namespace"] not in ["cell", "cl"]) or \ + ("namespace" not in v.keys() and str(k)[:2] != "CL"): nodes_to_delete.append(k) elif "name" not in v.keys(): nodes_to_delete.append(k) @@ -511,12 +533,14 @@ def __init__( # All edge types (based on previous download, assert below that this is not extended): edge_types = [ 'is_a', # nomenclature DAG -> include because of annotation coarseness differences + 'derives_from', 'develops_from', # developmental DAG -> include because of developmental differences 'has_part', # ? 'develops_into', # inverse developmental DAG -> do not include + 'part_of', 'RO:0002120', # ? 'RO:0002103', # ? - 'lacks_plasma_membrane_part' # ? + 'lacks_plasma_membrane_part', # ? ] edges_to_delete = [] for i, x in enumerate(self.graph.edges): @@ -623,397 +647,3 @@ def __init__( "microwell-seq": {"name": "microwell-seq"} } ) - - -class CelltypeUniverse: - """ - Cell type universe (list) and ontology (hierarchy) container class. - - - Basic checks on the organ specific instance are performed in the constructor. - """ - ontology: OntologyCelltypes - _target_universe: Union[List[str], None] - - def __init__(self, organism: str, **kwargs): - """ - - :param organism: Organism, defines ontology extension used. - :param kwargs: - """ - self.onto_cl = OntologyCelltypes(**kwargs) - self.onto_anatomy = OntologyUberon(**kwargs) - self._target_universe = None - self._set_extension(organism=organism) - - def _set_extension(self, organism): - """ - - :param organism: Organism, defines ontology extension used. - """ - if organism == "human": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) - elif organism == "mouse": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) - else: - raise ValueError(f"organism {organism} not found") - - @property - def target_universe(self): - """ - Ontology classes of target universe (understandable cell type names). - - :return: - """ - return self._target_universe - - @target_universe.setter - def target_universe(self, x: List[str]): - # Check that all nodes are valid: - for xx in x: - if xx not in self.onto_cl.nodes: - raise ValueError(f"cell type {xx} was not in ontology") - # Default universe is the full set of leave nodes of ontology: - self.target_universe = self.onto_cl.leaves - self.onto_cl.set_leaves(self.target_universe) - - @property - def target_universe_ids(self): - """ - Ontology IDs of target universe (codified cell type names). - - :return: - """ - return [self.onto_cl.map_class_to_id(x) for x in self._target_universe] - - @property - def ntypes(self): - """ - Number of different cell types in target universe. - """ - return len(self.target_universe) - - def __validate_target_universe_table(self, tab: pd.DataFrame): - assert len(tab.columns) == 2 - assert tab.columns[0] == "name" and tab.columns[1] == "id" - - def load_target_universe(self, organ): - """ - - :param organ: Anatomic structure to load target universe for. - :return: - """ - # ToDo: Use pydoc based query of universes stored in ./target_universes/.. - tab = None - self.__validate_target_universe_table(tab=tab) - self.target_universe = None # ToDo - - def read_target_universe_csv(self, fn): - """ - - :param fn: File containing target universe. - :return: - """ - tab = pd.read_csv(fn) - self.__validate_target_universe_table(tab=tab) - self.target_universe = tab["name"].values - - def map_to_target_leaves( - self, - nodes: List[str], - return_type: str = "elements" - ): - """ - Map a given list of nodes to leave nodes defined for this ontology. - :param nodes: - :param return_type: - - "elements": names of mapped leave nodes - "idx": indices in leave note list of of mapped leave nodes - :return: - """ - return [self.onto_cl.map_to_leaves(x, return_type=return_type) for x in nodes] - - def prepare_celltype_map_fuzzy( - self, - source, - match_only: bool = False, - include_synonyms: bool = True, - anatomical_constraint: Union[str, None] = None, - choices_for_perfect_match: bool = True, - omit_list: list = [], - omit_target_list: list = ["cell"], - n_suggest: int = 4, - threshold_for_partial_matching: float = 90., - ) -> Tuple[ - List[Dict[str, Union[List[str], str]]], - List[bool] - ]: - """ - Map free text node names to ontology node names via fuzzy string matching and return as list - - If this function does not yield good matches, consider querying this web interface: - https://www.ebi.ac.uk/ols/index - - Search strategies: - - - exact_match: Only exact string matches to name or synonym in ontology. This is the only strategy that is - enabled if match_only is True. - - lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of match errors - ((fuzz.ratio). - - very_lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of matches - characters from query (fuzz.partial_ratio) - - Search strategies with anatomical constraints: - An anatomic constraint is a name of an anatomical structure that can be mapped to UBERON. - - - anatomic_onotolgy_match: - We select cell types expected in this UBERON clade based on the link between CL and UBERON. - - anatomic_string_match: - We perform an additional fuzzy string matching with the anatomical structure added to the proposed - label. This is often beneficial because analysts do not always prefix such extension (e.g. pancreatic) - to the free text cell type labels if the entire sample consists only of cells from this anatomical - structure. Note that if the maps from 1) were perfect, this would not be necessary. In practice, we - find this to still recover some hits that are otherwise missed. - - Note that matches are shadowed in lower priorty strategies, ie a perfect match will not show up in the list - of hits of any other strategy. - - :param source: Free text node labels which are to be matched to ontology nodes. - :param match_only: Whether to include strict matches only in output. - :param include_synonyms: Whether to include synonyms of nodes in string search. - :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined - within UBERON. - :param choices_for_perfect_match: Whether to give additional matches if a perfect match was found and an - anatomical_constraint is not not defined. This is overridden by match_only. - :param omit_list: Free text node labels to omit in map. - :param omit_target_list: Ontology nodes to not match to. - :param n_suggest: Number of cell types to suggest per search strategy. - :param threshold_for_partial_matching: Maximum fuzzy match score below which lenient matching (ratio) is - extended through partial_ratio. - :return: Tuple - - - List with matches for each source, each entry is a dictionary, - of lists of search strategies named by strategy name. If a search strategy yields perfect matches, it - does not return a list of strings but just a single string. - - List with boolean indicator whether or not this output should be reported. - """ - from fuzzywuzzy import fuzz - matches = [] - nodes = self.onto_cl.nodes - nodes = [x for x in nodes if x[1]["name"] not in omit_target_list] - include_terms = [] - if isinstance(source, pd.DataFrame): - source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) - for x in source: - if not isinstance(x, list) and not isinstance(x, tuple): - x = [x, "nan"] - term = x[0].lower().strip("'").strip("\"").strip("'").strip("\"").strip("]").strip("[") - # Test for perfect string matching: - scores_strict = np.array([ - np.max([ - 100 if term == y[1]["name"].lower() else 0 - ] + [ - 100 if term == yy.lower() else 0 - for yy in y[1]["synonym"] - ]) if "synonym" in y[1].keys() and include_synonyms else - 100 if term == y[1]["name"].lower() else 0 - for y in nodes - ]) - # Test for partial string matching: - # fuzz ratio and partial_ratio capture different types of matches well, we use both here and decide below - # which scores are used in which scenario defined through the user input. - # Formatting of synonyms: These are richly annotated, we strip references following after either: - # BROAD, EXACT - # in the synonym string and characters: "' - - def synonym_string_processing(y): - return y.lower().split("broad")[0].split("exact")[0].lower().strip("'").strip("\"").split("\" ")[0] - - scores_lenient = np.array([ - np.max([fuzz.ratio(term, y[1]["name"].lower())] + [ - fuzz.ratio(term, synonym_string_processing(yy)) - for yy in y[1]["synonym"] - ]) if "synonym" in y[1].keys() and include_synonyms else - fuzz.ratio(term, y[1]["name"].lower()) - for y in nodes - ]) - scores_very_lenient = np.array([ - np.max([fuzz.partial_ratio(term, y[1]["name"].lower())] + [ - fuzz.partial_ratio(term, synonym_string_processing(yy)) - for yy in y[1]["synonym"] - ]) if "synonym" in y[1].keys() and include_synonyms else - fuzz.partial_ratio(term, y[1]["name"].lower()) - for y in nodes - ]) - include_terms.append(term not in omit_list) - if match_only and not anatomical_constraint: - # Explicitly trying to report perfect matches (match_only is True). - matches.append({"perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0]}) - else: - matches_i = {} - if np.any(scores_strict == 100) and not anatomical_constraint: - # Perfect match and not additional information through anatomical_constraint, ie no reason to assume - # that the user is not looking for this hit. - matches_i.update({ - "perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0] - }) - if choices_for_perfect_match: - matches_i.update({"lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_lenient)[::-1] - if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][:n_suggest]}) - if np.max(scores_lenient) < threshold_for_partial_matching: - matches_i.update({"very_lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_very_lenient)[::-1] - if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][:n_suggest]}) - else: - if anatomical_constraint is not None: - # Use anatomical constraints two fold: - # 1. Select cell types that are in the correct ontology. - # 2. Run a second string matching with the anatomical word included. - - # 1. Select cell types that are in the correct ontology. - # Check that anatomical constraint is a term in UBERON and get UBERON ID: - anatomical_constraint_id = self.onto_anatomy.id_from_name(anatomical_constraint) - # Select up to 5 nodes which match the anatomical constraint: - # The entries look as follows: - # node.value['relationship'] = ['part_of UBERON:0001885'] - # Find nodes that can be matched to UBERON: - anatomical_subselection = [ - "relationship" in y[1].keys() and - np.any(["part_of UBERON" in yy for yy in y[1]["relationship"]]) and - np.any([ - yy.split("part_of ")[-1] in self.onto_anatomy.node_ids - for yy in y[1]["relationship"] - ]) - for y in nodes - ] - uberon_ids = [ - y[1]["relationship"][ - np.where(["part_of UBERON" in yy for yy in y[1]["relationship"]])[0][0] - ].split("part_of ")[1] - if z else None - for y, z in zip(nodes, anatomical_subselection) - ] - # Check relationship in UBERON. Select for: - # a) parent -> a more general setting across anatomies from which one was sampled - # b) child -> a sub anatomy of the sampled tissue. - # Check this by checking if one is an ancestor of the other: - anatomical_subselection = [ - z and ( - anatomical_constraint_id in self.onto_anatomy.get_ancestors(node=y) or # noqa: E126 - y in self.onto_anatomy.get_ancestors(node=anatomical_constraint_id) - ) - for y, z in zip(uberon_ids, anatomical_subselection) - ] - # Iterate over nodes sorted by string match score and masked by constraint: - matches_i.update({ - "anatomic_onotolgy_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_lenient) - if anatomical_subselection[i] and not - np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][-n_suggest:][::-1] - }) # noqa: E122 - - # 2. Run a second string matching with the anatomical word included. - modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]"). \ - strip("[") - scores_anatomy = np.array([ - np.max([ - fuzz.partial_ratio(modified_term, y[1]["name"].lower()) - ] + [ - fuzz.partial_ratio(modified_term, synonym_string_processing(yy)) - for yy in y[1]["synonym"] - ]) if "synonym" in y[1].keys() and include_synonyms else - fuzz.partial_ratio(modified_term, y[1]["name"].lower()) - for y in nodes - ]) - matches_i.update({ - "anatomic_string_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_anatomy) - if nodes[i][1]["name"] and not - np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][-n_suggest:][::-1] - }) - - # Select best overall matches based on lenient and strict matching: - matches_i.update({"perfect_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_strict)[::-1] - ][:n_suggest]}) - matches_i.update({"lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_lenient)[::-1] - if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][:n_suggest]}) - if np.max(scores_lenient) < threshold_for_partial_matching: - matches_i.update({"very_lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_very_lenient)[::-1] - if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][:n_suggest]}) - else: - # Suggest top hits by string match: - matches_i.update({"lenient_match": [ - nodes[i][1]["name"] for i in np.argsort(scores_lenient)[::-1] - ][:n_suggest]}) - if np.max(scores_lenient) < threshold_for_partial_matching: - matches_i.update({"very_lenient_match": [ - nodes[i][1]["name"] - for i in np.argsort(scores_very_lenient)[::-1] - if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) - ][:n_suggest]}) - matches.append(matches_i) - return matches, include_terms - - def prepare_celltype_map_tab( - self, - source, - match_only: bool = False, - include_synonyms: bool = True, - anatomical_constraint: Union[str, None] = None, - omit_list: list = [], - n_suggest: int = 10, - separator_suggestions: str = ":", - separator_groups: str = ":|||:", - ) -> pd.DataFrame: - """ - Map free text node names to ontology node names via fuzzy string matching and return as matching table. - - :param source: Free text node labels which are to be matched to ontology nodes. - :param match_only: Whether to include strict matches only in output. - :param include_synonyms: Whether to include synonyms of nodes in string search. - :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within UBERON. - :param omit_list: Free text node labels to omit in map. - :param n_suggest: Number of cell types to suggest per search strategy. - :param separator_suggestions: String separator for matches of a single strategy in output target column. - :param separator_groups: String separator for search strategy grouped matches in output target column. - :return: Table with source and target node names. Columns: "source", "target" - """ - matches, include_terms = self.prepare_celltype_map_fuzzy( - source=source, - match_only=match_only, - include_synonyms=include_synonyms, - anatomical_constraint=anatomical_constraint, - choices_for_perfect_match=False, - omit_list=omit_list, - n_suggest=n_suggest, - ) - tab = pd.DataFrame({ - "source": source, - "target": [ - separator_groups.join([ - separator_suggestions.join(v) - if isinstance(v, list) else v - for v in x.values() - ]) - for x in matches - ] - }) - return tab.loc[include_terms] diff --git a/sfaira/versions/metadata/universe.py b/sfaira/versions/metadata/universe.py new file mode 100644 index 000000000..448095415 --- /dev/null +++ b/sfaira/versions/metadata/universe.py @@ -0,0 +1,415 @@ +import numpy as np +import pandas as pd +from typing import Dict, List, Tuple, Union + +from sfaira.versions.metadata import OntologyCelltypes, OntologyUberon +from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE + + +class CelltypeUniverse: + """ + Cell type universe (list) and ontology (hierarchy) container class. + + + Basic checks on the organ specific instance are performed in the constructor. + """ + onto_cl: OntologyCelltypes + onto_uberon: OntologyUberon + _target_universe: Union[List[str], None] + + def __init__(self, cl: OntologyCelltypes, uberon: OntologyUberon, organism: str, **kwargs): + """ + + :param organism: Organism, defines ontology extension used. + :param kwargs: + """ + self.onto_cl = cl + self.onto_uberon = uberon + self._target_universe = None + self._set_extension(organism=organism) + + def _set_extension(self, organism): + """ + + :param organism: Organism, defines ontology extension used. + """ + if organism == "human": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) + elif organism == "mouse": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) + else: + raise ValueError(f"organism {organism} not found") + + @property + def target_universe(self): + """ + Ontology classes of target universe (understandable cell type names). + + :return: + """ + return self._target_universe + + @target_universe.setter + def target_universe(self, x: List[str]): + # Check that all nodes are valid: + for xx in x: + if xx not in self.onto_cl.nodes: + raise ValueError(f"cell type {xx} was not in ontology") + # Default universe is the full set of leave nodes of ontology: + self.target_universe = self.onto_cl.leaves + self.onto_cl.set_leaves(self.target_universe) + + @property + def target_universe_ids(self): + """ + Ontology IDs of target universe (codified cell type names). + + :return: + """ + return [self.onto_cl.map_class_to_id(x) for x in self._target_universe] + + @property + def ntypes(self): + """ + Number of different cell types in target universe. + """ + return len(self.target_universe) + + def __validate_target_universe_table(self, tab: pd.DataFrame): + assert len(tab.columns) == 2 + assert tab.columns[0] == "name" and tab.columns[1] == "id" + + def load_target_universe(self, organ): + """ + + :param organ: Anatomic structure to load target universe for. + :return: + """ + # ToDo: Use pydoc based query of universes stored in ./target_universes/.. + tab = None + self.__validate_target_universe_table(tab=tab) + self.target_universe = None # ToDo + + def read_target_universe_csv(self, fn): + """ + + :param fn: File containing target universe. + :return: + """ + tab = pd.read_csv(fn) + self.__validate_target_universe_table(tab=tab) + self.target_universe = tab["name"].values + + def map_to_target_leaves( + self, + nodes: List[str], + return_type: str = "elements" + ): + """ + Map a given list of nodes to leave nodes defined for this ontology. + :param nodes: + :param return_type: + + "elements": names of mapped leave nodes + "idx": indices in leave note list of of mapped leave nodes + :return: + """ + return [self.onto_cl.map_to_leaves(x, return_type=return_type) for x in nodes] + + def prepare_celltype_map_fuzzy( + self, + source, + match_only: bool = False, + include_synonyms: bool = True, + anatomical_constraint: Union[str, None] = None, + choices_for_perfect_match: bool = True, + omit_list: list = [], + omit_target_list: list = ["cell"], + n_suggest: int = 4, + threshold_for_partial_matching: float = 90., + ) -> Tuple[ + List[Dict[str, Union[List[str], str]]], + List[bool] + ]: + """ + Map free text node names to ontology node names via fuzzy string matching and return as list + + If this function does not yield good matches, consider querying this web interface: + https://www.ebi.ac.uk/ols/index + + Search strategies: + + - exact_match: Only exact string matches to name or synonym in ontology. This is the only strategy that is + enabled if match_only is True. + - lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of match errors + ((fuzz.ratio). + - very_lenient_match: Fuzzy string matches to name or synonym in ontology based on ratio of matches + characters from query (fuzz.partial_ratio) + + Search strategies with anatomical constraints: + An anatomic constraint is a name of an anatomical structure that can be mapped to UBERON. + + - anatomic_onotolgy_match: + We select cell types expected in this UBERON clade based on the link between CL and UBERON. + - anatomic_string_match: + We perform an additional fuzzy string matching with the anatomical structure added to the proposed + label. This is often beneficial because analysts do not always prefix such extension (e.g. pancreatic) + to the free text cell type labels if the entire sample consists only of cells from this anatomical + structure. Note that if the maps from 1) were perfect, this would not be necessary. In practice, we + find this to still recover some hits that are otherwise missed. + + Note that matches are shadowed in lower priorty strategies, ie a perfect match will not show up in the list + of hits of any other strategy. + + :param source: Free text node labels which are to be matched to ontology nodes. + :param match_only: Whether to include strict matches only in output. + :param include_synonyms: Whether to include synonyms of nodes in string search. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined + within UBERON. + :param choices_for_perfect_match: Whether to give additional matches if a perfect match was found and an + anatomical_constraint is not not defined. This is overridden by match_only. + :param omit_list: Free text node labels to omit in map. + :param omit_target_list: Ontology nodes to not match to. + :param n_suggest: Number of cell types to suggest per search strategy. + :param threshold_for_partial_matching: Maximum fuzzy match score below which lenient matching (ratio) is + extended through partial_ratio. + :return: Tuple + + - List with matches for each source, each entry is a dictionary, + of lists of search strategies named by strategy name. If a search strategy yields perfect matches, it + does not return a list of strings but just a single string. + - List with boolean indicator whether or not this output should be reported. + """ + from fuzzywuzzy import fuzz + matches = [] + nodes = self.onto_cl.nodes + nodes = [x for x in nodes if x[1]["name"] not in omit_target_list] + include_terms = [] + if isinstance(source, pd.DataFrame): + source = list(zip(source.iloc[:, 0].values, source.iloc[:, 1].values)) + for x in source: + if not isinstance(x, list) and not isinstance(x, tuple): + x = [x, "nan"] + term = x[0].lower().strip("'").strip("\"").strip("'").strip("\"").strip("]").strip("[") + # Test for perfect string matching: + scores_strict = np.array([ + np.max( + [ + 100 if term == y[1]["name"].lower() else 0 + ] + [ + 100 if term == yy.lower() else 0 + for yy in y[1]["synonym"] + ] + ) if "synonym" in y[1].keys() and include_synonyms else 100 if term == y[1]["name"].lower() else 0 + for y in nodes + ]) + # Test for partial string matching: + # fuzz ratio and partial_ratio capture different types of matches well, we use both here and decide below + # which scores are used in which scenario defined through the user input. + # Formatting of synonyms: These are richly annotated, we strip references following after either: + # BROAD, EXACT + # in the synonym string and characters: "' + + def synonym_string_processing(y): + return y.lower().split("broad")[0].split("exact")[0].lower().strip("'").strip("\"").split("\" ")[0] + + scores_lenient = np.array([ + np.max([fuzz.ratio(term, y[1]["name"].lower())] + [ + fuzz.ratio(term, synonym_string_processing(yy)) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + fuzz.ratio(term, y[1]["name"].lower()) + for y in nodes + ]) + scores_very_lenient = np.array([ + np.max([fuzz.partial_ratio(term, y[1]["name"].lower())] + [ + fuzz.partial_ratio(term, synonym_string_processing(yy)) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + fuzz.partial_ratio(term, y[1]["name"].lower()) + for y in nodes + ]) + include_terms.append(term not in omit_list) + if match_only and not anatomical_constraint: + # Explicitly trying to report perfect matches (match_only is True). + matches.append({"perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0]}) + else: + matches_i = {} + if np.any(scores_strict == 100) and not anatomical_constraint: + # Perfect match and not additional information through anatomical_constraint, ie no reason to assume + # that the user is not looking for this hit. + matches_i.update({ + "perfect_match": [nodes[i][1]["name"] for i in np.where(scores_strict == 100)[0]][0] + }) + if choices_for_perfect_match: + matches_i.update({ + "lenient_match": [ + nodes[i][1]["name"] for i in np.argsort(scores_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest] + }) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({ + "very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest] + }) + else: + if anatomical_constraint is not None: + # Use anatomical constraints two fold: + # 1. Select cell types that are in the correct ontology. + # 2. Run a second string matching with the anatomical word included. + + # 1. Select cell types that are in the correct ontology. + # Check that anatomical constraint is a term in UBERON and get UBERON ID: + anatomical_constraint_id = self.onto_uberon.id_from_name(anatomical_constraint) + # Select up to 5 nodes which match the anatomical constraint: + # The entries look as follows: + # node.value['relationship'] = ['part_of UBERON:0001885'] + # Find nodes that can be matched to UBERON: + anatomical_subselection = [ + "relationship" in y[1].keys() and + np.any(["part_of UBERON" in yy for yy in y[1]["relationship"]]) and + np.any([ + yy.split("part_of ")[-1] in self.onto_uberon.node_ids + for yy in y[1]["relationship"] + ]) + for y in nodes + ] + uberon_ids = [ + y[1]["relationship"][ + np.where(["part_of UBERON" in yy for yy in y[1]["relationship"]])[0][0] + ].split("part_of ")[1] + if z else None + for y, z in zip(nodes, anatomical_subselection) + ] + # Check relationship in UBERON. Select for: + # a) parent -> a more general setting across anatomies from which one was sampled + # b) child -> a sub anatomy of the sampled tissue. + # Check this by checking if one is an ancestor of the other: + anatomical_subselection = [ + z and ( + anatomical_constraint_id in self.onto_uberon.get_ancestors(node=y) or + y in self.onto_uberon.get_ancestors(node=anatomical_constraint_id) + ) + for y, z in zip(uberon_ids, anatomical_subselection) + ] + # Iterate over nodes sorted by string match score and masked by constraint: + matches_i.update({ + "anatomic_onotolgy_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_lenient) + if anatomical_subselection[i] and not + np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][-n_suggest:][::-1] + }) + + # 2. Run a second string matching with the anatomical word included. + modified_term = anatomical_constraint + " " + x[0].lower().strip("'").strip("\"").strip("]"). \ + strip("[") + scores_anatomy = np.array([ + np.max([ + fuzz.partial_ratio(modified_term, y[1]["name"].lower()) + ] + [ + fuzz.partial_ratio(modified_term, synonym_string_processing(yy)) + for yy in y[1]["synonym"] + ]) if "synonym" in y[1].keys() and include_synonyms else + fuzz.partial_ratio(modified_term, y[1]["name"].lower()) + for y in nodes + ]) + matches_i.update({ + "anatomic_string_match": [ + nodes[i][1]["name"] for i in np.argsort(scores_anatomy) + if nodes[i][1]["name"] and not + np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][-n_suggest:][::-1] + }) + + # Select best overall matches based on lenient and strict matching: + matches_i.update({ + "perfect_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_strict)[::-1] + ][:n_suggest] + }) + matches_i.update({ + "lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest] + }) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({ + "very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest] + }) + else: + # Suggest top hits by string match: + matches_i.update({ + "lenient_match": [ + nodes[i][1]["name"] for i in np.argsort(scores_lenient)[::-1] + ][:n_suggest] + }) + if np.max(scores_lenient) < threshold_for_partial_matching: + matches_i.update({ + "very_lenient_match": [ + nodes[i][1]["name"] + for i in np.argsort(scores_very_lenient)[::-1] + if not np.any([nodes[i][1]["name"] in v for v in matches_i.values()]) + ][:n_suggest] + }) + matches.append(matches_i) + return matches, include_terms + + def prepare_celltype_map_tab( + self, + source, + match_only: bool = False, + include_synonyms: bool = True, + anatomical_constraint: Union[str, None] = None, + omit_list: list = [], + n_suggest: int = 10, + separator_suggestions: str = ":", + separator_groups: str = ":|||:", + ) -> pd.DataFrame: + """ + Map free text node names to ontology node names via fuzzy string matching and return as matching table. + + :param source: Free text node labels which are to be matched to ontology nodes. + :param match_only: Whether to include strict matches only in output. + :param include_synonyms: Whether to include synonyms of nodes in string search. + :param anatomical_constraint: Whether to require suggestions to be within a target anatomy defined within + UBERON. + :param omit_list: Free text node labels to omit in map. + :param n_suggest: Number of cell types to suggest per search strategy. + :param separator_suggestions: String separator for matches of a single strategy in output target column. + :param separator_groups: String separator for search strategy grouped matches in output target column. + :return: Table with source and target node names. Columns: "source", "target" + """ + matches, include_terms = self.prepare_celltype_map_fuzzy( + source=source, + match_only=match_only, + include_synonyms=include_synonyms, + anatomical_constraint=anatomical_constraint, + choices_for_perfect_match=False, + omit_list=omit_list, + n_suggest=n_suggest, + ) + tab = pd.DataFrame({ + "source": source, + "target": [ + separator_groups.join([ + separator_suggestions.join(v) + if isinstance(v, list) else v + for v in x.values() + ]) + for x in matches + ] + }) + return tab.loc[include_terms] From 26a00e504ba37b52f1c52402ff6ff7b656f1be29 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 1 Mar 2021 15:29:56 +0100 Subject: [PATCH 076/161] Yaml-alternative to constructor (#151) * added YAML example file * added meta data reading from YAML * optimised code in a few data loaders Co-authored-by: davidsebfischer --- requirements.txt | 1 + sfaira/consts/adata_fields.py | 40 +- sfaira/consts/meta_data_files.py | 2 + sfaira/data/base.py | 448 +++++++++++------- .../databases/cellxgene/cellxgene_loader.py | 4 +- .../mouse_x_2018_microwellseq_han_x.py | 3 +- ...aofcolon_2019_10xsequencing_kinchen_001.py | 50 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 53 +++ ..._pancreas_2019_10xsequencing_thompson_x.py | 3 +- .../human_x_2019_10xsequencing_szabo_001.py | 50 +- .../human_placenta_2018_x_ventotormo_001.py | 3 +- .../human_x_2020_microwellseq_han_x.py | 3 +- .../human_lung_2020_x_travaglini_001.py | 3 +- .../human_x_2019_10xsequencing_braga_x.py | 3 +- .../mouse_x_2019_10xsequencing_hove_001.py | 3 +- .../human_x_2019_10xsequencing_wang_001.py | 3 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 3 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 3 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 3 +- .../human_x_2018_10xsequencing_regev_001.py | 3 +- sfaira/data/utils.py | 25 + sfaira/unit_tests/data/test_dataset.py | 3 + .../data_contribution/test_data_template.py | 2 + 23 files changed, 443 insertions(+), 271 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml diff --git a/requirements.txt b/requirements.txt index c508cf4a6..1d8773db7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ openpyxl pandas pytest>=6.2.2 python-Levenshtein +PyYAML scanpy>=1.7.0 scipy>=1.2.1 seaborn diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index c768be09f..f426ad888 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -30,13 +30,17 @@ class AdataIdsBase: _normalization: str _organ: str _organism: str - _protocol: str + _assay: str _year: str @property def annotated(self) -> str: return self._annotated + @property + def assay(self) -> str: + return self._assay + @property def author(self) -> str: return self._author @@ -113,10 +117,6 @@ def organ(self) -> str: def organism(self) -> str: # TODO refactor into organism return self._organism - @property - def protocol(self) -> str: - return self._protocol - @property def year(self) -> str: return self._year @@ -127,23 +127,34 @@ class AdataIdsExtended(AdataIdsBase): Base class with extended set of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ _age: str - _dev_stage: str + _bio_sample: str + _development_stage: str _ethnicity: str + _individual: str _sex: str _state_exact: str + _tech_sample: str @property def age(self) -> str: return self._age @property - def dev_stage(self) -> str: - return self._dev_stage + def bio_sample(self) -> str: + return self._bio_sample + + @property + def development_stage(self) -> str: + return self._development_stage @property def ethnicity(self) -> str: return self._ethnicity + @property + def individual(self) -> str: + return self._individual + @property def sex(self) -> str: return self._sex @@ -152,6 +163,10 @@ def sex(self) -> str: def state_exact(self) -> str: return self._state_exact + @property + def tech_sample(self) -> str: + return self._tech_sample + class AdataIdsSfaira(AdataIdsExtended): """ @@ -161,6 +176,7 @@ class AdataIdsSfaira(AdataIdsExtended): def __init__(self): self._annotated = "annotated" self._author = "author" + self._bio_sample = "bio_sample" self._cell_types_original = "cell_types_original" self._cell_ontology_class = "cell_ontology_class" self._cell_ontology_id = "cell_ontology_id" @@ -174,15 +190,17 @@ def __init__(self): self._gene_id_names = "names" self._healthy = "healthy" self._id = "id" + self._individual = "individual" self._ncells = "ncells" self._normalization = "normalization" self._organ = "organ" self._organism = "organism" self._protocol = "protocol" + self._tech_sample = "bio_sample" self._year = "year" self._age = "age" - self._dev_stage = "dev_stage" + self._development_stage = "development_stage" self._ethnicity = "ethnicity" self._sex = "sex" self._state_exact = "state_exact" @@ -221,6 +239,7 @@ class AdataIdsCellxgene(AdataIdsExtended): accepted_file_names: List[str] def __init__(self): + self._assay = "assay" self._cell_types_original = "free_annotation" self._cell_ontology_class = "cell_type" self._cell_ontology_id = "cell_type_ontology_term_id" @@ -239,12 +258,11 @@ def __init__(self): self._normalization = "" # is always "raw" self._organ = "" # TODO self._organism = "organism" - self._protocol = "assay" self._year = "" # TODO self._age = "age" self._author = "contributors" - self._dev_stage = "development_stage" + self._development_stage = "development_stage" self._ethnicity = "ethnicity" self._sex = "sex" self._state_exact = "disease" diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 0a0ecc1ef..0e203d5a5 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -5,6 +5,7 @@ META_DATA_FIELDS = { "annotated": bool, "author": str, + "bio_sample": str, "cell_ontology_class": str, "doi": str, "download_url_data": str, @@ -17,5 +18,6 @@ "protocol": str, "organism": str, "state_exact": str, + "tech_sample": str, "year": int, } diff --git a/sfaira/data/base.py b/sfaira/data/base.py index b31a9e65c..0d1fa64b4 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -22,6 +22,7 @@ from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse from sfaira.consts import AdataIdsSfaira, META_DATA_FIELDS, OCS +from sfaira.data.utils import read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -71,37 +72,42 @@ class DatasetBase(abc.ABC): genome: Union[None, str] _age: Union[None, str] + _assay: Union[None, str] _author: Union[None, str] - _dev_stage: Union[None, str] + _bio_sample: Union[None, str] + _development_stage: Union[None, str] _doi: Union[None, str] _download_url_data: Union[Tuple[List[None]], Tuple[List[str]], None] _download_url_meta: Union[Tuple[List[None]], Tuple[List[str]], None] _ethnicity: Union[None, str] _healthy: Union[None, bool] _id: Union[None, str] + _individual: Union[None, str] _ncells: Union[None, int] _normalization: Union[None, str] _organ: Union[None, str] _organism: Union[None, str] - _protocol: Union[None, str] _sex: Union[None, str] _source: Union[None, str] _state_exact: Union[None, str] + _bio_sample: Union[None, str] _year: Union[None, int] - _obs_key_age: Union[None, str] - _obs_key_cellontology_id: Union[None, str] - _obs_key_cellontology_original: Union[None, str] - _obs_key_dev_stage: Union[None, str] - _obs_key_ethnicity: Union[None, str] - _obs_key_healthy: Union[None, str] - _obs_key_healthy: Union[None, str] - _obs_key_organ: Union[None, str] - _obs_key_organism: Union[None, str] - _obs_key_protocol: Union[None, str] - _obs_key_sample: Union[None, str] - _obs_key_sex: Union[None, str] - _obs_key_state_exact: Union[None, str] + _age_obs_key: Union[None, str] + _assay_obs_key: Union[None, str] + _cellontology_id_obs_key: Union[None, str] + _cellontology_original_obs_key: Union[None, str] + _development_stage_obs_key: Union[None, str] + _ethnicity_obs_key: Union[None, str] + _healthy_obs_key: Union[None, str] + _healthy_obs_key: Union[None, str] + _individual: Union[None, str] + _organ_obs_key: Union[None, str] + _organism_obs_key: Union[None, str] + _bio_sample_obs_key: Union[None, str] + _sex_obs_key: Union[None, str] + _state_exact_obs_key: Union[None, str] + _tech_sample_obs_key: Union[None, str] _healthy_state_healthy: Union[None, str] @@ -116,6 +122,7 @@ def __init__( data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, + yaml_path: Union[str, None] = None, **kwargs ): self._adata_ids_sfaira = AdataIdsSfaira() @@ -130,35 +137,40 @@ def __init__( self._age = None self._author = None - self._dev_stage = None + self._bio_sample = None + self._development_stage = None self._doi = None self._download_url_data = None self._download_url_meta = None self._ethnicity = None self._healthy = None self._id = None + self._individual = None self._ncells = None self._normalization = None self._organ = None self._organism = None - self._protocol = None + self._assay = None self._sex = None self._source = None self._state_exact = None + self._tech_sample = None self._year = None - self._obs_key_age = None - self._obs_key_cellontology_id = None - self._obs_key_cellontology_original = None - self._obs_key_dev_stage = None - self._obs_key_ethnicity = None - self._obs_key_healthy = None - self._obs_key_organ = None - self._obs_key_organism = None - self._obs_key_protocol = None - self._obs_key_sample = None - self._obs_key_sex = None - self._obs_key_state_exact = None + self._age_obs_key = None + self._cellontology_id_obs_key = None + self._cellontology_original_obs_key = None + self._development_stage_obs_key = None + self._ethnicity_obs_key = None + self._healthy_obs_key = None + self._individual_obs_key = None + self._organ_obs_key = None + self._organism_obs_key = None + self._assay_obs_key = None + self._bio_sample_obs_key = None + self._sex_obs_key = None + self._state_exact_obs_key = None + self._tech_sample_obs_key = None self._healthy_state_healthy = None @@ -171,6 +183,16 @@ def __init__( self._celltype_universe = None self._ontology_class_map = None + # Check if YAML files exists, read meta data from there if available: + if yaml_path is not None: + assert os.path.exists(yaml_path), f"did not find yaml {yaml_path}" + yaml_vals = read_yaml(fn=yaml_path) + for k, v in yaml_vals["attr"].items(): + if v is not None and k not in ["sample_fns", "sample_ids", "dataset_index"]: + setattr(self, k, v) + # ID can be set now already because YAML was used as input instead of child class constructor. + self.set_dataset_id(idx=yaml_vals["meta"]["dataset_index"]) + @abc.abstractmethod def _load(self) -> anndata.AnnData: pass @@ -572,23 +594,26 @@ def _set_metadata_in_adata(self): # Set cell-wise or data set-wide attributes (.uns / .obs): # These are saved in .uns if they are data set wide to save memory. for x, y, z, v in ( - [self.age, self._adata_ids_sfaira.age, self.obs_key_age, - self._ontology_container_sfaira.ontology_age], - [self.dev_stage, self._adata_ids_sfaira.dev_stage, self.obs_key_dev_stage, - self._ontology_container_sfaira.ontology_dev_stage], - [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.obs_key_ethnicity, - self._ontology_container_sfaira.ontology_ethnicity], - [self.healthy, self._adata_ids_sfaira.healthy, self.obs_key_healthy, - self._ontology_container_sfaira.ontology_healthy], - [self.organ, self._adata_ids_sfaira.organ, self.obs_key_organ, - self._ontology_container_sfaira.ontology_organism], - [self.protocol, self._adata_ids_sfaira.protocol, self.obs_key_protocol, - self._ontology_container_sfaira.ontology_protocol], - [self.sex, self._adata_ids_sfaira.sex, self.obs_key_sex, - self._ontology_container_sfaira.ontology_sex], - [self.organism, self._adata_ids_sfaira.organism, self.obs_key_organism, - self._ontology_container_sfaira.ontology_organism], - [self.state_exact, self._adata_ids_sfaira.state_exact, self.obs_key_state_exact, None], + [self.age, self._adata_ids_sfaira.age, self.age_obs_key, + self._ontology_container_sfaira.ontology_age], + [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key, None], + [self.development_stage, self._adata_ids_sfaira.development_stage, self.development_stage_obs_key, + self._ontology_container_sfaira.ontology_dev_stage], + [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.ethnicity_obs_key, + self._ontology_container_sfaira.ontology_ethnicity], + [self.healthy, self._adata_ids_sfaira.healthy, self.healthy_obs_key, + self._ontology_container_sfaira.ontology_healthy], + [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key, None], + [self.organ, self._adata_ids_sfaira.organ, self.organ_obs_key, + self._ontology_container_sfaira.ontology_organism], + [self.assay, self._adata_ids_sfaira.assay, self.assay_obs_key, + self._ontology_container_sfaira.ontology_protocol], + [self.sex, self._adata_ids_sfaira.sex, self.sex_obs_key, + self._ontology_container_sfaira.ontology_sex], + [self.organism, self._adata_ids_sfaira.organism, self.organism_obs_key, + self._ontology_container_sfaira.ontology_organism], + [self.state_exact, self._adata_ids_sfaira.state_exact, self.state_exact_obs_key, None], + [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key, None], ): if x is None and z is None: self.adata.uns[y] = None @@ -617,7 +642,7 @@ def _set_metadata_in_adata(self): # None so far other than celltypes. # Set cell types: # Map cell type names from raw IDs to ontology maintained ones:: - if self.obs_key_cellontology_original is not None: + if self.cellontology_original_obs_key is not None: self.project_celltypes_to_ontology() def load_tobacked( @@ -927,14 +952,17 @@ def write_meta( # Expand table by variably cell-wise or data set-wise meta data: for x in [ self._adata_ids_sfaira.age, - self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.assay, + self._adata_ids_sfaira.bio_sample, + self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.ethnicity, self._adata_ids_sfaira.healthy, + self._adata_ids_sfaira.individual, self._adata_ids_sfaira.organ, - self._adata_ids_sfaira.protocol, - self._adata_ids_sfaira.sex, self._adata_ids_sfaira.organism, + self._adata_ids_sfaira.sex, self._adata_ids_sfaira.state_exact, + self._adata_ids_sfaira.tech_sample, ]: if self.adata.uns[x] == UNS_STRING_META_IN_OBS: meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) @@ -967,10 +995,12 @@ def clean(s): else: author = self.author - self.id = f"{clean(self.organism)}_" \ - f"{clean(self.organ)}_" \ - f"{self.year}_" \ - f"{clean(self.protocol)}_" \ + # Note: access private attributes here, e.g. _organism, to avoid loading of content via meta data, which would + # invoke call to self.id before it is set. + self.id = f"{clean(self._organism)}_" \ + f"{clean(self._organ)}_" \ + f"{self._year}_" \ + f"{clean(self._assay)}_" \ f"{clean(author)}_" \ f"{idx}_" \ f"{self.doi}" @@ -997,7 +1027,7 @@ def age(self, x: str): @property def annotated(self) -> Union[bool, None]: - if self.obs_key_cellontology_id is not None or self.obs_key_cellontology_original is not None: + if self.cellontology_id_obs_key is not None or self.cellontology_original_obs_key is not None: return True else: if self.meta is None: @@ -1012,6 +1042,25 @@ def annotated(self) -> Union[bool, None]: # if also no meta data is available, we do not know the status of the data set. return None + @property + def assay(self) -> Union[None, str]: + if self._assay is not None: + return self._assay + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.assay in self.meta.columns: + return self.meta[self._adata_ids_sfaira.assay] + else: + return None + + @assay.setter + def assay(self, x: str): + self.__erasing_protection(attr="protocol", val_old=self._assay, val_new=x) + self._value_protection(attr="protocol", allowed=self._ontology_container_sfaira.ontology_protocol, + attempted=x) + self._assay = x + @property def author(self) -> str: if self._author is not None: @@ -1028,6 +1077,23 @@ def author(self, x: str): self.__erasing_protection(attr="author", val_old=self._author, val_new=x) self._author = x + @property + def bio_sample(self) -> Union[None, str]: + if self._bio_sample is not None: + return self._bio_sample + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.bio_sample in self.meta.columns: + return self.meta[self._adata_ids_sfaira.bio_sample] + else: + return None + + @bio_sample.setter + def bio_sample(self, x: str): + self.__erasing_protection(attr="bio_sample", val_old=self._bio_sample, val_new=x) + self._bio_sample = x + @property def data_dir(self): # Data is either directly in user supplied directory or in a sub directory if the overall directory is managed @@ -1039,23 +1105,23 @@ def data_dir(self): return self.data_dir_base @property - def dev_stage(self) -> Union[None, str]: - if self._dev_stage is not None: - return self._dev_stage + def development_stage(self) -> Union[None, str]: + if self._development_stage is not None: + return self._development_stage else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.dev_stage in self.meta.columns: - return self.meta[self._adata_ids_sfaira.dev_stage] + if self.meta is not None and self._adata_ids_sfaira.development_stage in self.meta.columns: + return self.meta[self._adata_ids_sfaira.development_stage] else: return None - @dev_stage.setter - def dev_stage(self, x: str): - self.__erasing_protection(attr="dev_stage", val_old=self._dev_stage, val_new=x) + @development_stage.setter + def development_stage(self, x: str): + self.__erasing_protection(attr="dev_stage", val_old=self._development_stage, val_new=x) self._value_protection(attr="dev_stage", allowed=self._ontology_container_sfaira.ontology_dev_stage, attempted=x) - self._dev_stage = x + self._development_stage = x @property def doi(self) -> str: @@ -1197,6 +1263,23 @@ def id(self, x: str): self.__erasing_protection(attr="id", val_old=self._id, val_new=x) self._id = x + @property + def individual(self) -> Union[None, str]: + if self._individual is not None: + return self._individual + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.individual in self.meta.columns: + return self.meta[self._adata_ids_sfaira.individual] + else: + return None + + @individual.setter + def individual(self, x: str): + self.__erasing_protection(attr="bio_sample", val_old=self._individual, val_new=x) + self._individual = x + @property def loaded(self) -> bool: """ @@ -1257,113 +1340,131 @@ def normalization(self, x: str): self._normalization = x @property - def obs_key_age(self) -> str: - return self._obs_key_age + def age_obs_key(self) -> str: + return self._age_obs_key - @obs_key_age.setter - def obs_key_age(self, x: str): - self.__erasing_protection(attr="obs_key_age", val_old=self._obs_key_age, val_new=x) - self._obs_key_age = x + @age_obs_key.setter + def age_obs_key(self, x: str): + self.__erasing_protection(attr="age_obs_key", val_old=self._age_obs_key, val_new=x) + self._age_obs_key = x @property - def obs_key_cellontology_id(self) -> str: - return self._obs_key_cellontology_id + def assay_obs_key(self) -> str: + return self._assay_obs_key - @obs_key_cellontology_id.setter - def obs_key_cellontology_id(self, x: str): - self.__erasing_protection(attr="obs_key_cellontology_id", val_old=self._obs_key_cellontology_id, val_new=x) - self._obs_key_cellontology_id = x + @assay_obs_key.setter + def assay_obs_key(self, x: str): + self.__erasing_protection(attr="assay_obs_key", val_old=self._assay_obs_key, val_new=x) + self._assay_obs_key = x @property - def obs_key_cellontology_original(self) -> str: - return self._obs_key_cellontology_original + def bio_sample_obs_key(self) -> str: + return self._bio_sample_obs_key - @obs_key_cellontology_original.setter - def obs_key_cellontology_original(self, x: str): - self.__erasing_protection(attr="obs_key_cellontology_original", val_old=self._obs_key_cellontology_original, + @bio_sample_obs_key.setter + def bio_sample_obs_key(self, x: str): + self.__erasing_protection(attr="bio_sample_obs_key", val_old=self._bio_sample_obs_key, val_new=x) + self._bio_sample_obs_key = x + + @property + def cellontology_id_obs_key(self) -> str: + return self._cellontology_id_obs_key + + @cellontology_id_obs_key.setter + def cellontology_id_obs_key(self, x: str): + self.__erasing_protection(attr="cellontology_id_obs_key", val_old=self._cellontology_id_obs_key, val_new=x) + self._cellontology_id_obs_key = x + + @property + def cellontology_original_obs_key(self) -> str: + return self._cellontology_original_obs_key + + @cellontology_original_obs_key.setter + def cellontology_original_obs_key(self, x: str): + self.__erasing_protection(attr="cellontology_original_obs_key", val_old=self._cellontology_original_obs_key, val_new=x) - self._obs_key_cellontology_original = x + self._cellontology_original_obs_key = x @property - def obs_key_dev_stage(self) -> str: - return self._obs_key_dev_stage + def development_stage_obs_key(self) -> str: + return self._development_stage_obs_key - @obs_key_dev_stage.setter - def obs_key_dev_stage(self, x: str): - self.__erasing_protection(attr="obs_key_dev_stage", val_old=self._obs_key_dev_stage, val_new=x) - self._obs_key_dev_stage = x + @development_stage_obs_key.setter + def development_stage_obs_key(self, x: str): + self.__erasing_protection(attr="dev_stage_obs_key", val_old=self._development_stage_obs_key, val_new=x) + self._development_stage_obs_key = x @property - def obs_key_ethnicity(self) -> str: - return self._obs_key_ethnicity + def ethnicity_obs_key(self) -> str: + return self._ethnicity_obs_key - @obs_key_ethnicity.setter - def obs_key_ethnicity(self, x: str): - self.__erasing_protection(attr="obs_key_ethnicity", val_old=self._obs_key_ethnicity, val_new=x) - self._obs_key_ethnicity = x + @ethnicity_obs_key.setter + def ethnicity_obs_key(self, x: str): + self.__erasing_protection(attr="ethnicity_obs_key", val_old=self._ethnicity_obs_key, val_new=x) + self._ethnicity_obs_key = x @property - def obs_key_healthy(self) -> str: - return self._obs_key_healthy + def healthy_obs_key(self) -> str: + return self._healthy_obs_key - @obs_key_healthy.setter - def obs_key_healthy(self, x: str): - self.__erasing_protection(attr="obs_key_healthy", val_old=self._obs_key_healthy, val_new=x) - self._obs_key_healthy = x + @healthy_obs_key.setter + def healthy_obs_key(self, x: str): + self.__erasing_protection(attr="healthy_obs_key", val_old=self._healthy_obs_key, val_new=x) + self._healthy_obs_key = x @property - def obs_key_organ(self) -> str: - return self._obs_key_organ + def individual_obs_key(self) -> str: + return self._individual_obs_key - @obs_key_organ.setter - def obs_key_organ(self, x: str): - self.__erasing_protection(attr="obs_key_organ", val_old=self._obs_key_organ, val_new=x) - self._obs_key_organ = x + @individual_obs_key.setter + def individual_obs_key(self, x: str): + self.__erasing_protection(attr="individual_obs_key", val_old=self._individual_obs_key, val_new=x) + self._individual_obs_key = x @property - def obs_key_organism(self) -> str: - return self._obs_key_organism + def organ_obs_key(self) -> str: + return self._organ_obs_key - @obs_key_organism.setter - def obs_key_organism(self, x: str): - self.__erasing_protection(attr="obs_key_organism", val_old=self._obs_key_organism, val_new=x) - self._obs_key_organism = x + @organ_obs_key.setter + def organ_obs_key(self, x: str): + self.__erasing_protection(attr="organ_obs_key", val_old=self._organ_obs_key, val_new=x) + self._organ_obs_key = x @property - def obs_key_protocol(self) -> str: - return self._obs_key_protocol + def organism_obs_key(self) -> str: + return self._organism_obs_key - @obs_key_protocol.setter - def obs_key_protocol(self, x: str): - self.__erasing_protection(attr="obs_key_protocol", val_old=self._obs_key_protocol, val_new=x) - self._obs_key_protocol = x + @organism_obs_key.setter + def organism_obs_key(self, x: str): + self.__erasing_protection(attr="organism_obs_key", val_old=self._organism_obs_key, val_new=x) + self._organism_obs_key = x @property - def obs_key_sample(self) -> str: - return self._obs_key_sample + def sex_obs_key(self) -> str: + return self._sex_obs_key - @obs_key_sample.setter - def obs_key_sample(self, x: str): - self.__erasing_protection(attr="obs_key_sample", val_old=self._obs_key_sample, val_new=x) - self._obs_key_sample = x + @sex_obs_key.setter + def sex_obs_key(self, x: str): + self.__erasing_protection(attr="sex_obs_key", val_old=self._sex_obs_key, val_new=x) + self._sex_obs_key = x @property - def obs_key_sex(self) -> str: - return self._obs_key_sex + def state_exact_obs_key(self) -> str: + return self._state_exact_obs_key - @obs_key_sex.setter - def obs_key_sex(self, x: str): - self.__erasing_protection(attr="obs_key_sex", val_old=self._obs_key_sex, val_new=x) - self._obs_key_sex = x + @state_exact_obs_key.setter + def state_exact_obs_key(self, x: str): + self.__erasing_protection(attr="state_exact_obs_key", val_old=self._state_exact_obs_key, val_new=x) + self._state_exact_obs_key = x @property - def obs_key_state_exact(self) -> str: - return self._obs_key_state_exact + def tech_sample_obs_key(self) -> str: + return self._tech_sample_obs_key - @obs_key_state_exact.setter - def obs_key_state_exact(self, x: str): - self.__erasing_protection(attr="obs_key_state_exact", val_old=self._obs_key_state_exact, val_new=x) - self._obs_key_state_exact = x + @tech_sample_obs_key.setter + def tech_sample_obs_key(self, x: str): + self.__erasing_protection(attr="tech_sample_obs_key", val_old=self._tech_sample_obs_key, val_new=x) + self._tech_sample_obs_key = x @property def organ(self) -> Union[None, str]: @@ -1401,25 +1502,6 @@ def organism(self, x: str): self._value_protection(attr="organism", allowed=self._ontology_container_sfaira.ontology_organism, attempted=x) self._organism = x - @property - def protocol(self) -> Union[None, str]: - if self._protocol is not None: - return self._protocol - else: - if self.meta is None: - self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.protocol in self.meta.columns: - return self.meta[self._adata_ids_sfaira.protocol] - else: - return None - - @protocol.setter - def protocol(self, x: str): - self.__erasing_protection(attr="protocol", val_old=self._protocol, val_new=x) - self._value_protection(attr="protocol", allowed=self._ontology_container_sfaira.ontology_protocol, - attempted=x) - self._protocol = x - @property def sex(self) -> Union[None, str]: if self._sex is not None: @@ -1464,6 +1546,23 @@ def state_exact(self, x: str): self.__erasing_protection(attr="state_exact", val_old=self._state_exact, val_new=x) self._state_exact = x + @property + def tech_sample(self) -> Union[None, str]: + if self._tech_sample is not None: + return self._tech_sample + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.tech_sample in self.meta.columns: + return self.meta[self._adata_ids_sfaira.tech_sample] + else: + return None + + @tech_sample.setter + def tech_sample(self, x: str): + self.__erasing_protection(attr="tech_sample", val_old=self._tech_sample, val_new=x) + self._tech_sample = x + @property def var_ensembl_col(self) -> str: return self._var_ensembl_col @@ -1685,8 +1784,8 @@ def _load_from_group(self): Override this method in the Dataset if this is relevant. """ - assert self.obs_key_sample is not None, "self.obs_key_sample needs to be set" - self._subset_from_group(subset_items={self.obs_key_sample: self.sample_id}) + assert self.bio_sample_obs_key is not None, "self.obs_key_sample needs to be set" + self._subset_from_group(subset_items={self.bio_sample_obs_key: self.sample_id}) def _subset_from_group( self, @@ -1939,11 +2038,11 @@ def adata(self): for adata in adata_ls: adata.obs[self._adata_ids_sfaira.author] = adata.uns[self._adata_ids_sfaira.author] adata.obs[self._adata_ids_sfaira.year] = adata.uns[self._adata_ids_sfaira.year] - adata.obs[self._adata_ids_sfaira.protocol] = adata.uns[self._adata_ids_sfaira.protocol] + adata.obs[self._adata_ids_sfaira.assay] = adata.uns[self._adata_ids_sfaira.assay] if self._adata_ids_sfaira.normalization in adata.uns.keys(): adata.obs[self._adata_ids_sfaira.normalization] = adata.uns[self._adata_ids_sfaira.normalization] - if self._adata_ids_sfaira.dev_stage in adata.obs.columns: - adata.obs[self._adata_ids_sfaira.dev_stage] = adata.uns[self._adata_ids_sfaira.dev_stage] + if self._adata_ids_sfaira.development_stage in adata.obs.columns: + adata.obs[self._adata_ids_sfaira.development_stage] = adata.uns[self._adata_ids_sfaira.development_stage] adata.obs[self._adata_ids_sfaira.annotated] = adata.uns[self._adata_ids_sfaira.annotated] # Workaround related to anndata bugs: # TODO remove this in future. for adata in adata_ls: @@ -1956,9 +2055,9 @@ def adata(self): 'neighbors', self._adata_ids_sfaira.author, self._adata_ids_sfaira.year, - self._adata_ids_sfaira.protocol, + self._adata_ids_sfaira.assay, self._adata_ids_sfaira.normalization, - self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.annotated, self._adata_ids_sfaira.mapped_features, ] @@ -2158,6 +2257,16 @@ def __init__( ".SAMPLE_FNS") sample_ids = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".SAMPLE_IDS") + fn_yaml = os.path.join(self._cwd, file_module + ".yaml") + fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None + # Check for sample_fns and sample_ids in yaml: + if fn_yaml is not None: + assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" + yaml_vals = read_yaml(fn=fn_yaml) + if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: + sample_fns = yaml_vals["meta"]["sample_fns"] + if sample_ids is None and yaml_vals["meta"]["sample_ids"] is not None: + sample_ids = yaml_vals["meta"]["sample_ids"] if sample_fns is not None and sample_ids is None: # DatasetBaseGroupLoadingManyFiles: datasets_f.extend([ @@ -2166,6 +2275,8 @@ def __init__( data_path=data_path, meta_path=meta_path, cache_path=cache_path, + sample_fns=sample_fns, + yaml_path=fn_yaml, ) for x in sample_fns ]) @@ -2177,6 +2288,8 @@ def __init__( data_path=data_path, meta_path=meta_path, cache_path=cache_path, + sample_ids=sample_ids, + yaml_path=fn_yaml, ) for x in sample_ids ]) @@ -2184,7 +2297,12 @@ def __init__( raise ValueError(f"sample_fns and sample_ids both found for {f}") else: datasets_f.append( - DatasetFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path)) + DatasetFound( + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + yaml_path=fn_yaml, + )) # Load cell type maps: for x in datasets_f: x.load_ontology_class_map(fn=os.path.join(self._cwd, file_module + ".tsv")) @@ -2422,13 +2540,13 @@ def load_all_tobacked( self.adata.X = X keys = [ self._adata_ids_sfaira.annotated, + self._adata_ids_sfaira.assay, self._adata_ids_sfaira.author, self._adata_ids_sfaira.dataset, self._adata_ids_sfaira.cell_ontology_class, - self._adata_ids_sfaira.dev_stage, + self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.normalization, self._adata_ids_sfaira.organ, - self._adata_ids_sfaira.protocol, self._adata_ids_sfaira.state_exact, self._adata_ids_sfaira.year, ] diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index 104a0d6a9..6d497c2ae 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -29,7 +29,7 @@ def __init__( self.obs_key_cellontology_class = self._adata_ids_cellxgene.cell_ontology_class self.obs_key_cellontology_id = self._adata_ids_cellxgene.cell_ontology_id self.obs_key_cellontology_original = self._adata_ids_cellxgene.cell_types_original - self.obs_key_dev_stage = self._adata_ids_cellxgene.dev_stage + self.obs_key_dev_stage = self._adata_ids_cellxgene.development_stage self.obs_key_ethnicity = self._adata_ids_cellxgene.ethnicity self.obs_key_healthy = self._adata_ids_cellxgene.healthy self.obs_key_sex = self._adata_ids_cellxgene.sex @@ -66,5 +66,5 @@ def _load(self): self.organ = str(self.fn).split("_")[3] # TODO interface this properly # self.organ = adata.obs["tissue"].values[0] self.organism = adata.obs[self._adata_ids_cellxgene.organism].values[0] - self.protocol = adata.obs[self._adata_ids_cellxgene.protocol].values[0] + self.protocol = adata.obs[self._adata_ids_cellxgene.assay].values[0] self.year = adata.uns[self._adata_ids_cellxgene.year] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index c776900b3..28fac712d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -113,8 +113,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) sample_organ_dict = { "Bladder_dge.txt.gz": "urinary bladder", "BoneMarrow1_dge.txt.gz": "bone marrow", diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py index ad7d805ca..2dfdeca40 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py @@ -1,53 +1,15 @@ -import os -from typing import Union -import pandas as pd -import anndata as ad -import scipy.sparse -import numpy as np - from sfaira.data import DatasetBaseGroupLoadingManyFiles -SAMPLE_FNS = [ - "HC", - "UC", -] - class Dataset(DatasetBaseGroupLoadingManyFiles): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) - self.download_url_data = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&" \ - f"file=GSE114374%5FHuman%5F{sample_fn}%5Fexpression%5Fmatrix%2Etxt%2Egz" - self.download_url_meta = f"private,{sample_fn.lower()}_meta_data_stromal_with_donor.txt" - - self.author = "Kinchen" - self.doi = "10.1016/j.cell.2018.08.067" - self.normalization = "norm" - self.organ = "lamina propria of mucosa of colon" - self.organism = "human" - self.protocol = "10X sequencing" - self.year = 2019 - - self.var_symbol_col = "index" - self.obs_key_state_exact = "state_exact" - self.obs_key_healthy = self.obs_key_state_exact - self.healthy_state_healthy = "healthy colon" - self.obs_key_cellontology_original = "Cluster" - self.obs_key_age = "Age" - self.obs_key_sex = "Sex" - - self.set_dataset_id(idx=1) - def _load(self): + import os + import pandas as pd + import anndata as ad + import scipy.sparse + import numpy as np + fn = [ os.path.join(self.data_dir, f"GSE114374_Human_{self.sample_fn}_expression_matrix.txt.gz"), os.path.join(self.data_dir, f"{self.sample_fn.lower()}_meta_data_stromal_with_donor.txt"), diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml new file mode 100644 index 000000000..f5530a9ea --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -0,0 +1,53 @@ +dataset_structure: + dataset_index: 1 + sample_ids: + sample_fns: + - "HC" + - "UC" +dataset_wise: + author: + - "Kinchen" + doi: + - "10.1016/j.cell.2018.08.067" + download_url_data: + - "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FHC%5Fexpression%5Fmatrix%2Etxt%2Egz" + - "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FUC%5Fexpression%5Fmatrix%2Etxt%2Egz" + download_url_meta: + - "private,hc_meta_data_stromal_with_donor.txt" + - "private,uc_meta_data_stromal_with_donor.txt" + normalization: "norm" + year: 2019 +dataset_or_observation_wise: + age: + age_obs_key: "Age" + assay: "10X sequencing" + assay_obs_key: + bio_sample: + bio_sample_obs_key: + development_stage: + development_stage_obs_key: + ethnicity: + ethnicity_obs_key: + healthy: + healthy_obs_key: "state_exact" + individual: + individual_obs_key: + organ: "lamina propria of mucosa of colon" + organ_obs_key: + organism: "human" + organism_obs_key: + sex: + sex_obs_key: "Sex" + state_exact: + state_exact_obs_key: "state_exact" + tech_sample: + tech_sample_obs_key: +observation_wise: + cellontology_original_obs_key: "Cluster" +feature_wise: + var_ensembl_col: + var_symbol_col: "index" +misc: + healthy_state_healthy: "healthy colon" +meta: + version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index d37a9ee58..dbda9a8a0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -29,8 +29,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117770/suppl/GSE117770_RAW.tar" self.download_url_meta = f"private,{self.sample_fn}_annotation.csv" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index b70005f30..66abaeab8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -26,6 +26,25 @@ "GSM3589421_PP020swap.filtered.matrix.txt.gz", ] +SAMPLE_DICT = { + "GSM3589406_PP001swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "healthy"], + "GSM3589407_PP002swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "stimulated"], + "GSM3589408_PP003swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "healthy"], + "GSM3589409_PP004swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "stimulated"], + "GSM3589410_PP005swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "healthy"], + "GSM3589411_PP006swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "stimulated"], + "GSM3589412_PP009swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "healthy"], + "GSM3589413_PP010swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "stimulated"], + "GSM3589414_PP011swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "healthy"], + "GSM3589415_PP012swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "stimulated"], + "GSM3589416_PP013swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "healthy"], + "GSM3589417_PP014swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "stimulated"], + "GSM3589418_PP017swap.filtered.matrix.txt.gz": ["blood", "Donor A", "stimulated"], + "GSM3589419_PP018swap.filtered.matrix.txt.gz": ["blood", "Donor A", "healthy"], + "GSM3589420_PP019swap.filtered.matrix.txt.gz": ["blood", "Donor B", "stimulated"], + "GSM3589421_PP020swap.filtered.matrix.txt.gz": ["blood", "Donor B", "healthy"], +} + class Dataset(DatasetBaseGroupLoadingManyFiles): @@ -37,41 +56,22 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" self.download_url_meta = [ "private,donor1.annotation.txt", "private,donor2.annotation.txt" ] - self.sample_dict = { - "GSM3589406_PP001swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "healthy"], - "GSM3589407_PP002swap.filtered.matrix.txt.gz": ["lung", "Donor 1", "stimulated"], - "GSM3589408_PP003swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "healthy"], - "GSM3589409_PP004swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 1", "stimulated"], - "GSM3589410_PP005swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "healthy"], - "GSM3589411_PP006swap.filtered.matrix.txt.gz": ["lymph node", "Donor 1", "stimulated"], - "GSM3589412_PP009swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "healthy"], - "GSM3589413_PP010swap.filtered.matrix.txt.gz": ["lung", "Donor 2", "stimulated"], - "GSM3589414_PP011swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "healthy"], - "GSM3589415_PP012swap.filtered.matrix.txt.gz": ["bone marrow", "Donor 2", "stimulated"], - "GSM3589416_PP013swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "healthy"], - "GSM3589417_PP014swap.filtered.matrix.txt.gz": ["lymph node", "Donor 2", "stimulated"], - "GSM3589418_PP017swap.filtered.matrix.txt.gz": ["blood", "Donor A", "stimulated"], - "GSM3589419_PP018swap.filtered.matrix.txt.gz": ["blood", "Donor A", "healthy"], - "GSM3589420_PP019swap.filtered.matrix.txt.gz": ["blood", "Donor B", "stimulated"], - "GSM3589421_PP020swap.filtered.matrix.txt.gz": ["blood", "Donor B", "healthy"], - } - self.author = "Szabo" self.doi = "10.1038/s41467-019-12464-3" + self.individual = SAMPLE_DICT[self.sample_fn][1] self.normalization = "raw" - self.organ = self.sample_dict[self.sample_fn][0] + self.organ = SAMPLE_DICT[self.sample_fn][0] self.organism = "human" self.protocol = "10X sequencing" - self.state_exact = self.sample_dict[self.sample_fn][2] - self.healthy = self.sample_dict[self.sample_fn][2] == "healthy" + self.state_exact = SAMPLE_DICT[self.sample_fn][2] + self.healthy = SAMPLE_DICT[self.sample_fn][2] == "healthy" self.year = 2019 self.var_symbol_col = "Gene" @@ -95,7 +95,7 @@ def _load(self): df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) adata = anndata.AnnData(df.T) adata.var = var - adata.obs["donor"] = self.sample_dict[self.sample_fn][1] + adata.obs["donor"] = SAMPLE_DICT[self.sample_fn][1] adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index adata.obs["cell_ontology_class"] = "unknown" df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 3410fca6d..88b83c165 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -21,8 +21,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.1.zip" self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index fbdfa14b0..46477101a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -128,8 +128,7 @@ def __init__( **kwargs ): - super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) sample_organ_dict = { 'AdultAdipose_1': 'adipose tissue of abdominal region', diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py index 32e0a11c9..3c845116c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py @@ -29,8 +29,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) synapse_id = { "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625095", "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625142" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index 94c25a3a5..d588287fc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -21,8 +21,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index b542e0e20..d52b32ac1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -25,8 +25,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) sample_organ_dict = { "Choroid plexus": "choroid plexus", "Dura mater": "dura mater", diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index d98e372eb..63445e4c7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -23,8 +23,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 37eb7b14b..a5332f219 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -22,8 +22,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index d48f9ff07..a0f27e68c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -57,8 +57,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ "aorta" if organ in ["aorta"] else \ diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index bcef2ba4e..6dddd955e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -25,8 +25,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) if self.sample_fn == "madissoon19_lung.processed.h5ad": self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" self.var_ensembl_col = "gene.ids.HCATisStab7509734" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 970c5207e..fe0252711 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -20,8 +20,7 @@ def __init__( cache_path: Union[str, None] = None, **kwargs ): - super().__init__(sample_id=sample_id, sample_ids=SAMPLE_IDS, data_path=data_path, meta_path=meta_path, - cache_path=cache_path, **kwargs) + super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) self.obs_key_sample = "derived_organ_parts_label" self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/" \ diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index a0b970826..5f4ca0eb6 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -1,3 +1,4 @@ +import yaml from typing import Dict, List, Union from sfaira.consts import OntologyContainerSfaira @@ -92,3 +93,27 @@ def map_celltype_to_ontology( return matches_to_return[queries[0]] else: return matches_to_return + + +def read_yaml(fn) -> Dict[str, Dict[str, Union[str, int, bool]]]: + """ + Read data yaml file. + + Matches format names to Dataset attribute names. + + :param fn: YAML file name. + :return: Dictionary of dictionaries of names of Dataset attributes and their values. + + - "attr": Data set attributes. + - "meta": Meta information of yaml and representation. + """ + with open(fn) as f: + yaml_dict = yaml.safe_load(f) + attr_dict = {} + meta_dict = {} + for k, v in yaml_dict.items(): + if k not in ["dataset_structure", "meta"]: + attr_dict.update(v) + else: + meta_dict.update(v) + return {"attr": attr_dict, "meta": meta_dict} diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 3f84c595b..94130ab7e 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -11,6 +11,9 @@ class TestDatasetGroupSfaira(unittest.TestCase): dir_data: str = "../test_data" dir_meta: str = "../test_data/meta" + def test_instantiate(self): + _ = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) + def test_load(self): ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) ds.subset(key="organism", values=["mouse"]) diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index 6702d7f19..bda0aefcc 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -127,6 +127,7 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. protected_writing=True, n_suggest=4, ) + dsg_f.clean_ontology_class_map(fn=fn) else: for k, v in ds.datasets.items(): # Write this directly into sfaira installation so that it can be committed via git. @@ -136,6 +137,7 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. protected_writing=True, n_suggest=10, ) + v.clean_ontology_class_map(fn=fn) # ToDo: conflicts are not automatically resolved, please go back to # https://www.ebi.ac.uk/ols/ontologies/cl From 5b5984a4439ea00e5bf1539bfedad4ecf3469983 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Mon, 1 Mar 2021 17:26:47 +0100 Subject: [PATCH 077/161] add model_id validation; closes #75 (#153) Signed-off-by: Zethson --- sfaira/interface/model_zoo.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 0b4549b7e..77b1591c9 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -78,8 +78,9 @@ def set_model_id( Set model ID to a manually supplied ID. :param model_id: Model ID to set. Format: pipeline_genome_organ_model_organisation_topology_version - :return: """ + if len(model_id.split('_')) < 7: + raise RuntimeError(f'Model ID {model_id} is invalid! Must follow the format: pipeline_genome_organ_model_organisation_topology_version') self.model_id = model_id ixs = self.model_id.split('_') self.model_class = ixs[0] @@ -102,8 +103,6 @@ def save_weights_to_remote(self, path=None): Saves model weights to repository XY. Increments 3rd digit of version number. Adds model_id to the text file, updates model_index - - :return: """ raise NotImplementedError() @@ -112,8 +111,6 @@ def save_weights_to_public(self): Saves model weights to cloud under an organization name. Increments 2nd digit of version number. Adds model_id to the text file, updates model_index - - :return: """ raise NotImplementedError() From 23d76f1a8a0491e45f9df651bc1f3004fd208320 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Mon, 1 Mar 2021 21:44:22 +0100 Subject: [PATCH 078/161] fix loading of Rdata dataset. fixes #126 (#157) --- .../__init__.py | 0 ...ver_2019_10xsequencing_ramachandran_001.py | 38 ++++++------------- ...er_2019_10xsequencing_ramachandran_001.tsv | 0 3 files changed, 11 insertions(+), 27 deletions(-) rename sfaira/data/dataloaders/loaders/{_d10_1038_s41586_019_1631_3 => d10_1038_s41586_019_1631_3}/__init__.py (100%) rename sfaira/data/dataloaders/loaders/{_d10_1038_s41586_019_1631_3 => d10_1038_s41586_019_1631_3}/human_liver_2019_10xsequencing_ramachandran_001.py (56%) rename sfaira/data/dataloaders/loaders/{_d10_1038_s41586_019_1631_3 => d10_1038_s41586_019_1631_3}/human_liver_2019_10xsequencing_ramachandran_001.tsv (100%) diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/__init__.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/__init__.py diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py similarity index 56% rename from sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py rename to sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 3f396a8ba..80a5ad546 100644 --- a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -1,36 +1,13 @@ import anndata import os from typing import Union +import anndata2ri +from rpy2.robjects import r from sfaira.data import DatasetBase class Dataset(DatasetBase): - """ - This dataloader requires manual preprocessing of the Rdata file that can be obtained from the link in the - `download_website` attribute of this class. The preprocessing code below uses the rpy2 and anndata2ri python - packages to convert the R object to anndata (pip install anndata2ri), run it in a jupyter notebook: - - ## Notebook Cell 1 - import anndata2ri - anndata2ri.activate() - %load_ext rpy2.ipython - - ## Notebook Cell 2 - %%R -o sce - library(Seurat) - load("tissue.rdata") - new_obj = CreateSeuratObject(counts = tissue@raw.data) - new_obj@meta.data = tissue@meta.data - sce <- as.SingleCellExperiment(new_obj) - - ## Notebook cell 3 - sce.write("ramachandran.h5ad") - - :param data_path: - :param meta_path: - :param kwargs: - """ def __init__( self, @@ -61,7 +38,14 @@ def __init__( self.set_dataset_id(idx=1) def _load(self): - fn = os.path.join(self.data_dir, "ramachandran.h5ad") - adata = anndata.read(fn) + fn = os.path.join(self.data_dir, "tissue.rdata") + anndata2ri.activate() # TODO: remove global activation of anndata2ri and use localconverter once it's fixed + adata = r( + f"library(Seurat)\n" + f"load('{fn}')\n" + f"new_obj = CreateSeuratObject(counts = tissue@raw.data)\n" + f"new_obj@meta.data = tissue@meta.data\n" + f"as.SingleCellExperiment(new_obj)\n" + ) return adata diff --git a/sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv rename to sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv From c864d10b5f4651afe0120fd46955444d0766a5bb Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 3 Mar 2021 16:04:19 +0100 Subject: [PATCH 079/161] refurbished data split and load interface (#155) * moved all Dataset._load() function to local name space, class-free load() functions * depreceated DatasetBaseGroupLoadingManyFiles and DatasetBaseGroupLoadingOneFile * moved d10_1038_s41586_020_2922_4 to yaml interface * adapated yaml to index meta data by entry file * added fragementation * enabled mapping of anndata to streamlined formats * moved unkown cell types into mapping csvs --- sfaira/consts/adata_fields.py | 4 +- sfaira/data/__init__.py | 2 +- sfaira/data/base.py | 487 ++++++++--------- ...letoflangerhans_2017_smartseq2_enge_001.py | 78 ++- ...etoflangerhans_2017_smartseq2_enge_001.tsv | 1 + .../mouse_x_2018_microwellseq_han_x.py | 58 +- .../mouse_x_2018_microwellseq_han_x.tsv | 11 + ...aofcolon_2019_10xsequencing_kinchen_001.py | 41 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 8 +- ...pithelium_2019_10xsequencing_smilie_001.py | 24 +- ...man_ileum_2019_10xsequencing_martin_001.py | 27 +- ...an_ileum_2019_10xsequencing_martin_001.tsv | 1 + ...stategland_2018_10xsequencing_henry_001.py | 24 +- .../human_pancreas_2016_indrop_baron_001.py | 24 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 43 +- ...ancreas_2016_smartseq2_segerstolpe_001.tsv | 2 + ..._pancreas_2019_10xsequencing_thompson_x.py | 57 +- ...uman_lung_2020_10xsequencing_miller_001.py | 32 +- ...man_lung_2020_10xsequencing_miller_001.tsv | 4 + .../human_brain_2017_droncseq_habib_001.py | 25 +- .../human_brain_2017_droncseq_habib_001.tsv | 1 + ...human_testis_2018_10xsequencing_guo_001.py | 24 +- ...liver_2018_10xsequencing_macparland_001.py | 30 +- .../human_kidney_2019_droncseq_lake_001.py | 30 +- .../human_kidney_2019_droncseq_lake_001.tsv | 2 + .../human_x_2019_10xsequencing_szabo_001.py | 67 ++- ...man_retina_2019_10xsequencing_menon_001.py | 20 +- .../human_placenta_2018_x_ventotormo_001.py | 51 +- .../human_liver_2019_celseq2_aizarani_001.py | 32 +- .../human_liver_2019_celseq2_aizarani_001.tsv | 5 + ...ver_2019_10xsequencing_ramachandran_001.py | 17 +- ...an_liver_2019_10xsequencing_popescu_001.py | 20 +- .../human_x_2020_microwellseq_han_x.py | 499 +++++++----------- .../human_x_2020_microwellseq_han_x.tsv | 21 +- .../human_lung_2020_x_travaglini_001.py | 78 +-- .../human_lung_2020_x_travaglini_001.yaml | 57 ++ ...uman_colon_2020_10xsequencing_james_001.py | 24 +- .../human_lung_2019_dropseq_braga_001.py | 29 +- .../human_x_2019_10xsequencing_braga_x.py | 30 +- .../mouse_x_2019_10xsequencing_hove_001.py | 104 ++-- ...uman_kidney_2020_10xsequencing_liao_001.py | 56 +- ...man_retina_2019_10xsequencing_voigt_001.py | 22 +- .../human_x_2019_10xsequencing_wang_001.py | 29 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 30 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 44 +- ...nchyma_2020_10xsequencing_habermann_001.py | 43 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 47 +- ..._kidney_2019_10xsequencing_stewart_001.tsv | 7 + ...uman_thymus_2020_10xsequencing_park_001.py | 22 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 39 +- ...man_x_2019_10xsequencing_madissoon_001.tsv | 3 + ..._retina_2019_10xsequencing_lukowski_001.py | 24 +- ...retina_2019_10xsequencing_lukowski_001.tsv | 2 + ...lood_2019_10xsequencing_10xgenomics_001.py | 66 ++- .../human_x_2018_10xsequencing_regev_001.py | 36 +- sfaira/unit_tests/data/test_dataset.py | 154 +++--- 56 files changed, 1165 insertions(+), 1553 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index f426ad888..7195b477d 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -213,8 +213,8 @@ def __init__(self): self.classmap_target_key = "target" self.classmap_target_id_key = "target_id" - self.unknown_celltype_name = "unknown" - self.unknown_celltype_identifiers = ["nan", "none", "unknown", np.nan, None] + self.unknown_celltype_identifier = "UNKNOWN" + self.not_a_cell_celltype_identifier = "NOT_A_CELL" @property def load_raw(self) -> str: diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 963d64839..c60748dc5 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,4 @@ -from .base import DatasetBase, DatasetBaseGroupLoadingOneFile, DatasetBaseGroupLoadingManyFiles, \ +from .base import DatasetBase, \ DatasetGroup, DatasetGroupDirectoryOriented, \ DatasetSuperGroup from . import dataloaders diff --git a/sfaira/data/base.py b/sfaira/data/base.py index 0d1fa64b4..02522a42f 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base.py @@ -21,7 +21,7 @@ from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse -from sfaira.consts import AdataIdsSfaira, META_DATA_FIELDS, OCS +from sfaira.consts import AdataIdsExtended, AdataIdsSfaira, META_DATA_FIELDS, OCS from sfaira.data.utils import read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -117,12 +117,18 @@ class DatasetBase(abc.ABC): _celltype_universe: Union[None, CelltypeUniverse] _ontology_class_map: Union[None, dict] + sample_fn: Union[None, str] + _sample_fns: Union[None, List[str]] + def __init__( self, data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, + load_func=None, yaml_path: Union[str, None] = None, + sample_fn: Union[str, None] = None, + sample_fns: Union[List[str], None] = None, **kwargs ): self._adata_ids_sfaira = AdataIdsSfaira() @@ -178,24 +184,29 @@ def __init__( self._var_ensembl_col = None self.class_maps = {"0": {}} - self._unknown_celltype_identifiers = self._adata_ids_sfaira.unknown_celltype_identifiers + self._unknown_celltype_identifiers = self._adata_ids_sfaira.unknown_celltype_identifier self._celltype_universe = None self._ontology_class_map = None + self.sample_fn = sample_fn + self._sample_fns = sample_fns + # Check if YAML files exists, read meta data from there if available: if yaml_path is not None: assert os.path.exists(yaml_path), f"did not find yaml {yaml_path}" yaml_vals = read_yaml(fn=yaml_path) for k, v in yaml_vals["attr"].items(): if v is not None and k not in ["sample_fns", "sample_ids", "dataset_index"]: - setattr(self, k, v) + if isinstance(v, dict): # v is a dictionary over file-wise meta-data items + assert self.sample_fn in v.keys(), f"did not find key {self.sample_fn} in yamls keys for {k}" + setattr(self, k, v[self.sample_fn]) + else: # v is a meta-data item + setattr(self, k, v) # ID can be set now already because YAML was used as input instead of child class constructor. self.set_dataset_id(idx=yaml_vals["meta"]["dataset_index"]) - @abc.abstractmethod - def _load(self) -> anndata.AnnData: - pass + self.load_func = load_func @property def _directory_formatted_id(self) -> str: @@ -297,16 +308,6 @@ def _download_synapse(self, synapse_entity, fn, **kwargs): dataset = syn.get(entity=synapse_entity) shutil.move(dataset.data_dir_base, os.path.join(self.data_dir, fn)) - def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None) -> bool: - """ - Only relevant for DatasetBaseGroupLoading but has to be a method of this class - because it is used in DatasetGroup. - - :param adata_group: - :return: Whether group loading is used. - """ - return False - @property def cache_fn(self): if self.directory_formatted_doi is None or self._directory_formatted_id is None: @@ -341,9 +342,9 @@ def _cached_reading(filename): else: warnings.warn(f"Cached loading enabled, but cache file {filename} not found. " f"Loading from raw files.") - self.adata = self._load() + self.adata = self.load_func(self.data_dir, self.sample_fn) else: - self.adata = self._load() + self.adata = self.load_func(self.data_dir, self.sample_fn) def _cached_writing(filename): if filename is not None: @@ -353,10 +354,10 @@ def _cached_writing(filename): self.adata.write_h5ad(filename) if load_raw and allow_caching: - self.adata = self._load() + self.adata = self.load_func(self.data_dir, self.sample_fn) _cached_writing(self.cache_fn) elif load_raw and not allow_caching: - self.adata = self._load() + self.adata = self.load_func(self.data_dir, self.sample_fn) elif not load_raw and allow_caching: _cached_reading(self.cache_fn) _cached_writing(self.cache_fn) @@ -399,7 +400,7 @@ def load( # Run data set-specific loading script: self._load_cached(load_raw=load_raw, allow_caching=allow_caching) # Set data-specific meta data in .adata: - self._set_metadata_in_adata() + self._set_metadata_in_adata(adata_ids=self._adata_ids_sfaira) # Set loading hyper-parameter-specific meta data: self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference @@ -575,45 +576,44 @@ def _match_features_to_reference(self): uns=self.adata.uns ) - def _set_metadata_in_adata(self): + def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): """ Copy meta data from dataset class in .anndata. :return: """ # Set data set-wide attributes (.uns): - self.adata.uns[self._adata_ids_sfaira.annotated] = self.annotated - self.adata.uns[self._adata_ids_sfaira.author] = self.author - self.adata.uns[self._adata_ids_sfaira.doi] = self.doi - self.adata.uns[self._adata_ids_sfaira.download_url_data] = self.download_url_data - self.adata.uns[self._adata_ids_sfaira.download_url_meta] = self.download_url_meta - self.adata.uns[self._adata_ids_sfaira.id] = self.id - self.adata.uns[self._adata_ids_sfaira.normalization] = self.normalization - self.adata.uns[self._adata_ids_sfaira.year] = self.year + self.adata.uns[adata_ids.annotated] = self.annotated + self.adata.uns[adata_ids.author] = self.author + self.adata.uns[adata_ids.doi] = self.doi + self.adata.uns[adata_ids.download_url_data] = self.download_url_data + self.adata.uns[adata_ids.download_url_meta] = self.download_url_meta + self.adata.uns[adata_ids.id] = self.id + self.adata.uns[adata_ids.normalization] = self.normalization + self.adata.uns[adata_ids.year] = self.year # Set cell-wise or data set-wide attributes (.uns / .obs): # These are saved in .uns if they are data set wide to save memory. for x, y, z, v in ( - [self.age, self._adata_ids_sfaira.age, self.age_obs_key, + [self.age, adata_ids.age, self.age_obs_key, self._ontology_container_sfaira.ontology_age], - [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key, None], - [self.development_stage, self._adata_ids_sfaira.development_stage, self.development_stage_obs_key, + [self.assay, adata_ids.assay, self.assay_obs_key, + self._ontology_container_sfaira.ontology_protocol], + [self.bio_sample, adata_ids.bio_sample, self.bio_sample_obs_key, None], + [self.development_stage, adata_ids.development_stage, self.development_stage_obs_key, self._ontology_container_sfaira.ontology_dev_stage], - [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.ethnicity_obs_key, + [self.ethnicity, adata_ids.ethnicity, self.ethnicity_obs_key, self._ontology_container_sfaira.ontology_ethnicity], - [self.healthy, self._adata_ids_sfaira.healthy, self.healthy_obs_key, + [self.healthy, adata_ids.healthy, self.healthy_obs_key, self._ontology_container_sfaira.ontology_healthy], - [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key, None], - [self.organ, self._adata_ids_sfaira.organ, self.organ_obs_key, + [self.individual, adata_ids.individual, self.individual_obs_key, None], + [self.organ, adata_ids.organ, self.organ_obs_key, self._ontology_container_sfaira.ontology_organism], - [self.assay, self._adata_ids_sfaira.assay, self.assay_obs_key, - self._ontology_container_sfaira.ontology_protocol], - [self.sex, self._adata_ids_sfaira.sex, self.sex_obs_key, - self._ontology_container_sfaira.ontology_sex], - [self.organism, self._adata_ids_sfaira.organism, self.organism_obs_key, + [self.organism, adata_ids.organism, self.organism_obs_key, self._ontology_container_sfaira.ontology_organism], - [self.state_exact, self._adata_ids_sfaira.state_exact, self.state_exact_obs_key, None], - [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key, None], + [self.sex, adata_ids.sex, self.sex_obs_key, self._ontology_container_sfaira.ontology_sex], + [self.state_exact, adata_ids.state_exact, self.state_exact_obs_key, None], + [self.tech_sample, adata_ids.tech_sample, self.tech_sample_obs_key, None], ): if x is None and z is None: self.adata.uns[y] = None @@ -645,6 +645,65 @@ def _set_metadata_in_adata(self): if self.cellontology_original_obs_key is not None: self.project_celltypes_to_ontology() + def streamline(self, format: str = "sfaira", clean: bool = False): + """ + Streamline the adata instance to output format. + + Output format are saved in ADATA_FIELDS* classes. + + :param format: Export format. + + - "sfaira" + - "cellxgene" + :param clean: Whether to delete non-streamlined fields. + :return: + """ + if format == "sfaira": + adata_fields = self._adata_ids_sfaira + elif format == "sfaira": + from sfaira.consts import AdataIdsCellxgene + adata_fields = AdataIdsCellxgene() + self._set_metadata_in_adata(adata_ids=adata_fields) + if clean: + if self.adata.varm is not None: + del self.adata.varm + if self.adata.obsm is not None: + del self.adata.obsm + if self.adata.varm is not None: + del self.adata.varp + if self.adata.obsp is not None: + del self.adata.obsp + # Only retain target elements in adata.uns: + self.adata.obs = self.adata.uns[[ + adata_fields.annotated, + adata_fields.author, + adata_fields.doi, + adata_fields.download_url_data, + adata_fields.download_url_meta, + adata_fields.id, + adata_fields.normalization, + adata_fields.year, + ]] + # Only retain target elements in adata.var: + self.adata.obs = self.adata.var[[ + adata_fields.gene_id_names, + adata_fields.gene_id_ensembl, + ]] + # Only retain target columns in adata.obs: + self.adata.obs = self.adata.obs[[ + adata_fields.age, + adata_fields.bio_sample, + adata_fields.development_stage, + adata_fields.ethnicity, + adata_fields.healthy, + adata_fields.individual, + adata_fields.organ, + adata_fields.organism, + adata_fields.sex, + adata_fields.state_exact, + adata_fields.tech_sample, + ]] + def load_tobacked( self, adata_backed: anndata.AnnData, @@ -719,17 +778,6 @@ def load_tobacked( else: raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def set_unknown_class_id(self, ids: List[str]): - """ - Sets list of custom identifiers of unknown cell types data annotation. - - :param ids: IDs in cell type name column to replace by "unknown identifier. - :return: - """ - self._unknown_celltype_identifiers.extend( - [x for x in ids if x not in self._adata_ids_sfaira.unknown_celltype_identifiers] - ) - def _set_genome(self, genome: Union[str, None]): if genome is not None: if genome.lower().startswith("homo_sapiens"): @@ -834,7 +882,6 @@ def project_celltypes_to_ontology(self): if self.cell_ontology_map is not None: # only if this was defined labels_mapped = [ self.cell_ontology_map[x] if x in self.cell_ontology_map.keys() - else self._adata_ids_sfaira.unknown_celltype_name if x.lower() in self._unknown_celltype_identifiers else x for x in labels_original ] else: @@ -855,8 +902,10 @@ def project_celltypes_to_ontology(self): # files with and without the ID in the third column. ids_mapped = [ self._ontology_container_sfaira.ontology_cell_types.id_from_name(x) - if x != self._adata_ids_sfaira.unknown_celltype_name - else self._adata_ids_sfaira.unknown_celltype_name + if x not in [ + self._adata_ids_sfaira.unknown_celltype_identifier, + self._adata_ids_sfaira.not_a_cell_celltype_identifier + ] else x for x in labels_mapped ] self.adata.obs[self._adata_ids_sfaira.cell_ontology_id] = ids_mapped @@ -986,8 +1035,8 @@ def clean(s): s = s.replace(' ', '').replace('-', '').replace('_', '').lower() return s - if hasattr(self, 'sample_idx'): - idx += self.sample_idx + if self.sample_fn is not None: + idx += self._sample_fns.index(self.sample_fn) idx = str(idx).zfill(3) if isinstance(self.author, List): @@ -1724,121 +1773,6 @@ def get_subset_idx(samplewise_key, cellwise_key): self.adata = self.adata[idx_keep, :].copy() -class DatasetBaseGroupLoadingOneFile(DatasetBase): - """ - Container class specific to datasets which come in groups and in which data sets are saved in a single file. - """ - _unprocessed_full_group_object: bool - _sample_id: str - - def __init__( - self, - sample_id: str, - sample_ids: List, - data_path: Union[str, None], - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self._unprocessed_full_group_object = False - self._sample_id = sample_id - self._SAMPLE_IDS = sample_ids - - @property - def sample_id(self): - return self._sample_id - - @property - def sample_idx(self): - return self._SAMPLE_IDS.index(self.sample_id) - - @abc.abstractmethod - def _load_full(self) -> anndata.AnnData: - """ - Loads a raw anndata object that correponds to a superset of the data belonging to this Dataset. - - Overload this method in the Dataset if this is relevant. - :return: adata_group - """ - pass - - def set_raw_full_group_object(self, adata_group: Union[None, anndata.AnnData] = None): - if self.adata is None and adata_group is not None: - self.adata = adata_group - elif self.adata is None and adata_group is None: - self.adata = self._load_full() - elif self.adata is not None and not self._unprocessed_full_group_object: - self.adata = self._load_full() - elif self.adata is not None and self._unprocessed_full_group_object: - pass - else: - assert False, "switch error" - self._unprocessed_full_group_object = True - return True - - def _load_from_group(self): - """ - Sets .adata based on a raw anndata object that correponds to a superset of the data belonging to this Dataset, - including subsetting. - - Override this method in the Dataset if this is relevant. - """ - assert self.bio_sample_obs_key is not None, "self.obs_key_sample needs to be set" - self._subset_from_group(subset_items={self.bio_sample_obs_key: self.sample_id}) - - def _subset_from_group( - self, - subset_items: dict, - ): - """ - Subsets a raw anndata object to the data corresponding to this Dataset. - - :param subset_items: Key-value pairs for subsetting: Keys are columns in .obs, values are entries that should - be kept. If the dictionary has multiple entries, these are sequentially subsetted (AND-gate). - :return: - """ - assert self.adata is not None, "this method should only be called if .adata is not None" - for k, v in subset_items.items(): - self.adata = self.adata[[x in v for x in self.adata.obs[k].values], :] - self._unprocessed_full_group_object = False - - def _load(self) -> anndata.AnnData: - _ = self.set_raw_full_group_object(adata_group=None) - if self._unprocessed_full_group_object: - self._load_from_group() - return self.adata - - -class DatasetBaseGroupLoadingManyFiles(DatasetBase, abc.ABC): - """ - Container class specific to datasets which come in groups and in which data sets are saved in separate but - streamlined files. - """ - _sample_fn: str - - def __init__( - self, - sample_fn: str, - sample_fns: List, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self._sample_fn = sample_fn - self._SAMPLE_FNS = sample_fns - - @property - def sample_fn(self): - return self._sample_fn - - @property - def sample_idx(self): - return self._SAMPLE_FNS.index(self.sample_fn) - - class DatasetGroup: """ Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through @@ -1853,7 +1787,7 @@ class DatasetGroup: #dsg_humanlung[some_id] #dsg_humanlung.adata """ - datasets: Dict + datasets: Dict[str, DatasetBase] def __init__(self, datasets: dict): self._adata_ids_sfaira = AdataIdsSfaira() @@ -1863,9 +1797,6 @@ def __init__(self, datasets: dict): def _unknown_celltype_identifiers(self): return np.unqiue(np.concatenate([v._unknown_celltype_identifiers for _, v in self.datasets.items()])) - def _load_group(self, **kwargs): - return None - def load( self, annotated_only: bool = False, @@ -1918,13 +1849,9 @@ def func(dataset, **kwargs_func): print(x[1]) del self.datasets[x[0]] else: # for loop - adata_group = None datasets_to_remove = [] for k, v in self.datasets.items(): print(f"loading {k}") - group_loading = v.set_raw_full_group_object(adata_group=adata_group) - if adata_group is None and group_loading: # cache full adata object for subsequent Datasets - adata_group = v.adata.copy() x = map_fn(tuple([v] + args)) # Clear data sets that were not successfully loaded because of missing data: if x is not None: @@ -1932,10 +1859,56 @@ def func(dataset, **kwargs_func): datasets_to_remove.append(k) for k in datasets_to_remove: del self.datasets[k] - del adata_group load.__doc__ += load_doc + def streamline(self, format: str = "sfaira", clean: bool = False): + """ + Streamline the adata instance in each data set to output format. + + Output format are saved in ADATA_FIELDS* classes. + + :param format: Export format. + + - "sfaira" + - "cellxgene" + :param clean: Whether to delete non-streamlined fields. + :return: + """ + for x in self.ids: + self.datasets[x].streamline(format=format, clean=clean) + + def fragment(self) -> Dict[str, anndata.AnnData]: + """ + Fragment data sets into largest consistent parititions based on meta data. + + ToDo return this as a DatasetGroup again. + the streamlined Datasets are similar to anndata instances here, worth considering whether to use anndata + instead because it can be indexed. + + :return: + """ + # TODO: assert that data is streamlined. + print("make sure data is streamlined") + datasets_new = {} + for k, v in self.datasets.items(): + # Define fragments and fragment names. + # Because the data is streamlined, fragments are partitions of the .obs space, excluding the cell-wise + # annotation columns: + # - cellontology_class + # - cellontology_id + # - cellontology_original + cols_exclude = ["cellontology_class", "cellontology_id", "cellontology_original"] + tab = v.adata.obs.loc[:, [x not in cols_exclude for x in v.adata.obs.columns]] + tab_unique = tab.drop_duplicates() + idx_sets = [ + np.where([np.all(tab_unique.iloc[i, :] == tab.iloc[j, :])[0] for j in range(tab.shape[0])]) + for i in range(tab_unique.shape[0]) + ] + for i, x in enumerate(idx_sets): + datasets_new[k + "_fragment" + str(i)] = v.adata[x, :] + return datasets_new + def load_tobacked( self, adata_backed: anndata.AnnData, @@ -2033,40 +2006,9 @@ def adata_ls(self): def adata(self): if not self.adata_ls: return None + self.streamline(format="sfaira", clean=True) adata_ls = self.adata_ls - # Save uns attributes that are fixed for entire data set to .obs to retain during concatenation: - for adata in adata_ls: - adata.obs[self._adata_ids_sfaira.author] = adata.uns[self._adata_ids_sfaira.author] - adata.obs[self._adata_ids_sfaira.year] = adata.uns[self._adata_ids_sfaira.year] - adata.obs[self._adata_ids_sfaira.assay] = adata.uns[self._adata_ids_sfaira.assay] - if self._adata_ids_sfaira.normalization in adata.uns.keys(): - adata.obs[self._adata_ids_sfaira.normalization] = adata.uns[self._adata_ids_sfaira.normalization] - if self._adata_ids_sfaira.development_stage in adata.obs.columns: - adata.obs[self._adata_ids_sfaira.development_stage] = adata.uns[self._adata_ids_sfaira.development_stage] - adata.obs[self._adata_ids_sfaira.annotated] = adata.uns[self._adata_ids_sfaira.annotated] - # Workaround related to anndata bugs: # TODO remove this in future. - for adata in adata_ls: - # Fix 1: - if adata.raw is not None: - adata.raw._varm = None - # Fix 2: - if adata.uns is not None: - keys_to_keep = [ - 'neighbors', - self._adata_ids_sfaira.author, - self._adata_ids_sfaira.year, - self._adata_ids_sfaira.assay, - self._adata_ids_sfaira.normalization, - self._adata_ids_sfaira.development_stage, - self._adata_ids_sfaira.annotated, - self._adata_ids_sfaira.mapped_features, - ] - for k in list(adata.uns.keys()): - if k not in keys_to_keep: - del adata.uns[k] - # Fix 3: - if not isinstance(adata.X, scipy.sparse.csr_matrix): - adata.X = scipy.sparse.csr_matrix(adata.X) + # .var entries are renamed and copied upon concatenation. # To preserve gene names in .var, the target gene names are copied into var_names and are then copied # back into .var. @@ -2250,13 +2192,12 @@ def __init__( datasets_f = [] file_module = ".".join(f.split(".")[:-1]) DatasetFound = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".Dataset") - # Check if global objects are available: - # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles - # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile + # Load objects from name space: + # - load(): Loading function that return anndata instance. + # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles + load_func = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".load") sample_fns = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".SAMPLE_FNS") - sample_ids = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + - ".SAMPLE_IDS") fn_yaml = os.path.join(self._cwd, file_module + ".yaml") fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None # Check for sample_fns and sample_ids in yaml: @@ -2265,44 +2206,36 @@ def __init__( yaml_vals = read_yaml(fn=fn_yaml) if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: sample_fns = yaml_vals["meta"]["sample_fns"] - if sample_ids is None and yaml_vals["meta"]["sample_ids"] is not None: - sample_ids = yaml_vals["meta"]["sample_ids"] - if sample_fns is not None and sample_ids is None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f.extend([ - DatasetFound( - sample_fn=x, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - sample_fns=sample_fns, - yaml_path=fn_yaml, - ) - for x in sample_fns - ]) - elif sample_fns is None and sample_ids is not None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f.extend([ - DatasetFound( - sample_id=x, - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - sample_ids=sample_ids, - yaml_path=fn_yaml, + if sample_fns is None: + sample_fns = [None] + # Here we distinguish between class that are already defined and those that are not. + # The latter case arises if meta data are defined in YAMLs and _load is given as a function. + if DatasetFound is None: + for x in sample_fns: + datasets_f.append( + DatasetBase( + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + load_func=load_func, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) ) - for x in sample_ids - ]) - elif sample_fns is not None and sample_ids is not None: - raise ValueError(f"sample_fns and sample_ids both found for {f}") else: - datasets_f.append( - DatasetFound( - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - yaml_path=fn_yaml, - )) + for x in sample_fns: + datasets_f.append( + DatasetFound( + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + load_func=load_func, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) + ) # Load cell type maps: for x in datasets_f: x.load_ontology_class_map(fn=os.path.join(self._cwd, file_module + ".tsv")) @@ -2474,15 +2407,18 @@ def load_all( allow_caching=allow_caching, processes=processes, ) - # making sure that concatenate is not used on a None adata object resulting from organ filtering - for i in range(len(self.dataset_groups)): - if self.dataset_groups[i].adata is not None: - break - self.adata = self.dataset_groups[i].adata.concatenate( - *[x.adata for x in self.dataset_groups[1:] if x is not None], - join="outer", - batch_key=self._adata_ids_sfaira.dataset_group - ) + # Make sure that concatenate is not used on a None adata object: + adatas = [x.adata for x in self.dataset_groups if x.adata is not None] + if len(adatas) > 1: + self.adata = adatas[0].adata.concatenate( + *adatas[1:], + join="outer", + batch_key=self._adata_ids_sfaira.dataset_group + ) + elif len(adatas) == 1: + self.adata = adatas[0] + else: + warnings.warn("no anndata instances to concatenate") def load_all_tobacked( self, @@ -2601,6 +2537,23 @@ def delete_backed(self): def load_cached_backed(self, fn: PathLike): self.adata = anndata.read(fn, backed='r') + def streamline(self, format: str = "sfaira", clean: bool = False): + """ + Streamline the adata instance in each group and each data set to output format. + + Output format are saved in ADATA_FIELDS* classes. + + :param format: Export format. + + - "sfaira" + - "cellxgene" + :param clean: Whether to delete non-streamlined fields. + :return: + """ + for x in self.dataset_groups: + for xx in x.ids: + x.datasets[xx].streamline(format=format, clean=clean) + def subset(self, key, values): """ Subset list of adata objects based on match to values in key property. diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index 8a2e9e6f1..6e56d9231 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -1,5 +1,4 @@ import os -from typing import Union import tarfile import gzip from io import StringIO @@ -12,14 +11,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/suppl/GSE81547_RAW.tar" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" @@ -37,39 +30,38 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE81547_RAW.tar"), - os.path.join(self.data_dir, "GSE81547_series_matrix.txt.gz") - ] - dfs = [] - with tarfile.open(fn[0]) as tar: - for member in tar.getmembers(): - d = pd.read_csv(tar.extractfile(member), compression="gzip", header=None, sep="\t", index_col=0, - names=[member.name.split("_")[0]]) - dfs.append(d) - adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) - adata.X = scipy.sparse.csc_matrix(adata.X) - with gzip.open(fn[1]) as f: - file_content = [i.decode("utf-8") for i in f.readlines()] - inputstring = "" - for line in file_content: - if "ID_REF" in line: - inputstring += line - if "!Sample_title" in line: - inputstring += line[1:] - if "!Sample_characteristics_ch1\t\"inferred_cell_type: alpha" in line: - inputstring += line[1:] - data = StringIO(inputstring) - d = pd.read_csv(data, sep="\t").T - d.columns = d.iloc[0] - d.drop("Sample_title", inplace=True) - d = d.reset_index().set_index("ID_REF") - d.columns.name = None - d.index.name = None - adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in adata.obs.index] - adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in adata.obs.index] - self.set_unknown_class_id(ids=["unsure"]) +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE81547_RAW.tar"), + os.path.join(data_dir, "GSE81547_series_matrix.txt.gz") + ] + dfs = [] + with tarfile.open(fn[0]) as tar: + for member in tar.getmembers(): + d = pd.read_csv(tar.extractfile(member), compression="gzip", header=None, sep="\t", index_col=0, + names=[member.name.split("_")[0]]) + dfs.append(d) + adata = ad.AnnData(pd.concat(dfs, axis=1).iloc[1:-6].T) + adata.X = scipy.sparse.csc_matrix(adata.X) + with gzip.open(fn[1]) as f: + file_content = [i.decode("utf-8") for i in f.readlines()] + inputstring = "" + for line in file_content: + if "ID_REF" in line: + inputstring += line + if "!Sample_title" in line: + inputstring += line[1:] + if "!Sample_characteristics_ch1\t\"inferred_cell_type: alpha" in line: + inputstring += line[1:] + data = StringIO(inputstring) + d = pd.read_csv(data, sep="\t").T + d.columns = d.iloc[0] + d.drop("Sample_title", inplace=True) + d = d.reset_index().set_index("ID_REF") + d.columns.name = None + d.index.name = None + adata.obs["celltype"] = [d.loc[i]["Sample_characteristics_ch1"].split(": ")[1] for i in adata.obs.index] + adata.obs["patient"] = ["_".join(d.loc[i]["index"].split("_")[:2]) for i in adata.obs.index] - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv index 74cd9094d..16ed5cdce 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.tsv @@ -5,3 +5,4 @@ beta type B pancreatic cell CL:0000169 delta pancreatic D cell CL:0000173 ductal pancreatic ductal cell CL:0002079 mesenchymal mesenchymal cell CL:0008019 +unsure UNKNOWN UNKNOWN diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 28fac712d..56e138f23 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -1,12 +1,11 @@ import anndata import numpy as np import pandas -from typing import Union import zipfile import tarfile import os -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "Bladder_dge.txt.gz", @@ -103,17 +102,10 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) sample_organ_dict = { "Bladder_dge.txt.gz": "urinary bladder", "BoneMarrow1_dge.txt.gz": "bone marrow", @@ -330,31 +322,25 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, '5435866.zip') - with zipfile.ZipFile(fn) as archive: - celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) - celltypes = celltypes.drop(["Unnamed: 0"], axis=1) - with tarfile.open(fileobj=archive.open('MCA_500more_dge.tar.gz')) as tar: - data = pandas.read_csv(tar.extractfile(f'500more_dge/{self.sample_fn}'), - compression="gzip", - sep=" ", - header=0 - ) +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, '5435866.zip') + with zipfile.ZipFile(fn) as archive: + celltypes = pandas.read_csv(archive.open('MCA_CellAssignments.csv'), index_col=1) + celltypes = celltypes.drop(["Unnamed: 0"], axis=1) - adata = anndata.AnnData(data.T) - annotated_cells = np.array([x in celltypes.index for x in adata.obs_names]) - # Subset to annotated cells if any are annotated: - if np.sum(annotated_cells) > 0: - adata = adata[annotated_cells].copy() - adata.obs = celltypes.loc[adata.obs_names, :] + with tarfile.open(fileobj=archive.open('MCA_500more_dge.tar.gz')) as tar: + data = pandas.read_csv(tar.extractfile(f'500more_dge/{sample_fn}'), + compression="gzip", + sep=" ", + header=0 + ) - self.set_unknown_class_id(ids=[ - "Cell in cell cycle(Fetal_Kidney)", "Stomach cell_Gkn2 high(Stomach)", "Stomach cell_Mt2 high(Stomach)", - "Dividing cell(Mammary-Gland-Virgin)", "Dividing cell(Neonatal-Heart)", "Dividing cell(Neonatal-Rib)", - "Dividing cell(Neonatal-Skin)", "Dividing cell(Pancreas)", "Dividing cell(Stomach)", "Dividing cells(Lung)", - "Dividng cell(Neonatal-Calvaria)" - ]) + adata = anndata.AnnData(data.T) + annotated_cells = np.array([x in celltypes.index for x in adata.obs_names]) + # Subset to annotated cells if any are annotated: + if np.sum(annotated_cells) > 0: + adata = adata[annotated_cells].copy() + adata.obs = celltypes.loc[adata.obs_names, :] - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv index 66a69cacd..f0b24e36a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv @@ -53,6 +53,7 @@ Cartilage cell_Col2a1 high(Neonatal-Rib) chondrocyte CL:0000138 Cartilage cell_Cxcl14 high(Neonatal-Rib) chondrocyte CL:0000138 Cartilage cell_Ppa1 high(Neonatal-Rib) chondrocyte CL:0000138 Cartilage cell_Prg4 high(Neonatal-Rib) chondrocyte CL:0000138 +Cell in cell cycle(Fetal_Kidney) UNKNOWN UNKNOWN Chondrocyte(Neonatal-Muscle) chondrocyte CL:0000138 Ciliated cell(Lung) ciliated cell CL:0000064 Clara Cell(Lung) club cell CL:0000158 @@ -93,6 +94,14 @@ Distal collecting duct principal cell_Cldn4 high(Kidney) kidney collecting duct Distal collecting duct principal cell_Hsd11b2 high(Kidney) kidney collecting duct principal cell CL:1001431 Distal convoluted tubule_Pvalb high(Kidney) kidney distal convoluted tubule epithelial cell CL:1000849 Distal convoluted tubule_S100g high(Kidney) kidney distal convoluted tubule epithelial cell CL:1000849 +Dividing cell(Mammary-Gland-Virgin) UNKNOWN UNKNOWN +Dividing cell(Neonatal-Heart) UNKNOWN UNKNOWN +Dividing cell(Neonatal-Rib) UNKNOWN UNKNOWN +Dividing cell(Neonatal-Skin) UNKNOWN UNKNOWN +Dividing cell(Pancreas) UNKNOWN UNKNOWN +Dividing cell(Stomach) UNKNOWN UNKNOWN +Dividing cells(Lung) UNKNOWN UNKNOWN +Dividng cell(Neonatal-Calvaria) Dividing T cells(Lung) T cell CL:0000084 Dividing dendritic cells(Lung) dendritic cell CL:0000451 Ductal cell(Pancreas) pancreatic ductal cell CL:0002079 @@ -375,6 +384,8 @@ Spermatocyte_Slc2a3 high(Testis) spermatocyte CL:0000017 Spermatogonia_1700001P01Rik high(Testis) primary spermatocyte CL:0000656 Spermatogonia_Tbc1d23 high(Testis) primary spermatocyte CL:0000656 Stem and progenitor cell(Mammary-Gland-Virgin) stem cell CL:0000034 +Stomach cell_Gkn2 high(Stomach) UNKNOWN UNKNOWN +Stomach cell_Mt2 high(Stomach) UNKNOWN UNKNOWN Stomach cell_Muc5ac high(Stomach) mucous cell of stomach CL:0002180 Stroma cell (Ovary) stromal cell of ovary CL:0002132 Stromal cell(Liver) stromal cell CL:0000499 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py index 2dfdeca40..c78eb42c6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py @@ -1,26 +1,21 @@ -from sfaira.data import DatasetBaseGroupLoadingManyFiles +import os +import pandas as pd +import anndata as ad +import scipy.sparse +import numpy as np -class Dataset(DatasetBaseGroupLoadingManyFiles): +def load(data_dir, sample_fn, **kwargs): + fn = [ + os.path.join(data_dir, f"GSE114374_Human_{sample_fn}_expression_matrix.txt.gz"), + os.path.join(data_dir, f"{sample_fn.lower()}_meta_data_stromal_with_donor.txt"), + ] + matrix = pd.read_csv(fn[0], sep="\t") + obs = pd.read_csv(fn[1], sep="\t", index_col=3) + adata = ad.AnnData(matrix.T) + adata.X = scipy.sparse.csc_matrix(np.expm1(adata.X)) + adata.obs = obs + s_dict = {"F": "female", "M": "male"} + adata.obs['Sex'] = [s_dict[i] for i in adata.obs['Sex']] - def _load(self): - import os - import pandas as pd - import anndata as ad - import scipy.sparse - import numpy as np - - fn = [ - os.path.join(self.data_dir, f"GSE114374_Human_{self.sample_fn}_expression_matrix.txt.gz"), - os.path.join(self.data_dir, f"{self.sample_fn.lower()}_meta_data_stromal_with_donor.txt"), - ] - matrix = pd.read_csv(fn[0], sep="\t") - obs = pd.read_csv(fn[1], sep="\t", index_col=3) - adata = ad.AnnData(matrix.T) - adata.X = scipy.sparse.csc_matrix(np.expm1(adata.X)) - adata.obs = obs - adata.obs['state_exact'] = "healthy colon" if self.sample_fn == "HC" else "ulcerative colitis" - s_dict = {"F": "female", "M": "male"} - adata.obs['Sex'] = [s_dict[i] for i in adata.obs['Sex']] - - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index f5530a9ea..40207457e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -29,7 +29,7 @@ dataset_or_observation_wise: ethnicity: ethnicity_obs_key: healthy: - healthy_obs_key: "state_exact" + healthy_obs_key: individual: individual_obs_key: organ: "lamina propria of mucosa of colon" @@ -39,7 +39,9 @@ dataset_or_observation_wise: sex: sex_obs_key: "Sex" state_exact: - state_exact_obs_key: "state_exact" + HC: "healthy" + UC: "ulcerative colitis" + state_exact_obs_key: tech_sample: tech_sample_obs_key: observation_wise: @@ -48,6 +50,6 @@ feature_wise: var_ensembl_col: var_symbol_col: "index" misc: - healthy_state_healthy: "healthy colon" + healthy_state_healthy: "healthy" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index 027400290..0c93b5125 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" self.download_url_meta = None @@ -35,10 +28,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "smillie19_epi.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "smillie19_epi.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index b1248a41c..232e62d84 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" self.download_url_meta = None @@ -35,12 +28,12 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "martin19.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - adata = adata[adata.obs["CellType"] != "Doublets"].copy() - self.set_unknown_class_id(ids=["Cycling"]) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "martin19.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + adata = adata[adata.obs["CellType"] != "Doublets"].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv index 9ca20f7a7..9f9d59ca5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.tsv @@ -2,6 +2,7 @@ source target target_id ACKR1+ endothelium endothelial cell CL:0000115 B cells B cell CL:0000236 CD36+ endothelium endothelial cell CL:0000115 +Cycling UNKNOWN UNKNOWN Enterocytes enterocyte CL:0000584 Enteroendocrines enteroendocrine cell CL:0000164 Fibs fibroblast CL:0000057 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index ae6b33ed3..52d75a72d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -14,14 +13,8 @@ class Dataset(DatasetBase): Hillock,epithelial cell of prostate """ - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" self.download_url_meta = None @@ -40,10 +33,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "henry18_0.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "henry18_0.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 3fd237a09..a07679c77 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -12,14 +11,8 @@ class Dataset(DatasetBase): ToDo: revisit gamma cell missing in CO """ - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" self.download_url_meta = None @@ -57,10 +50,11 @@ def __init__( }, } - def _load(self): - fn = os.path.join(self.data_dir, "baron16.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "baron16.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index 110f13997..3863b4acc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -11,14 +10,8 @@ class Dataset(DatasetBase): ToDo: revisit gamma cell missing in CO """ - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.processed.1.zip" self.download_url_meta = "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-5061/E-MTAB-5061.sdrf.txt" @@ -38,19 +31,19 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "E-MTAB-5061.processed.1.zip"), - os.path.join(self.data_dir, "E-MTAB-5061.sdrf.txt") - ] - df = pd.read_csv(fn[0], sep="\t") - df.index = df.index.get_level_values(0) - df = df.drop("#samples", axis=1) - df = df.T.iloc[:, :26178] - adata = anndata.AnnData(df) - adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[adata.obs.index] - # filter observations which are not cells (empty wells, low quality cells etc.) - adata = adata[adata.obs["Characteristics[cell type]"] != "not applicable"].copy() - self.set_unknown_class_id(ids=["unclassified cell", "MHC class II cell"]) - - return adata + +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "E-MTAB-5061.processed.1.zip"), + os.path.join(data_dir, "E-MTAB-5061.sdrf.txt") + ] + df = pd.read_csv(fn[0], sep="\t") + df.index = df.index.get_level_values(0) + df = df.drop("#samples", axis=1) + df = df.T.iloc[:, :26178] + adata = anndata.AnnData(df) + adata.obs = pd.read_csv(fn[1], sep="\t").set_index("Source Name").loc[adata.obs.index] + # filter observations which are not cells (empty wells, low quality cells etc.) + adata = adata[adata.obs["Characteristics[cell type]"] != "not applicable"].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv index 48da67f57..8d536b19b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.tsv @@ -1,4 +1,5 @@ source target target_id +MHC class II cell UNKNOWN UNKNOWN PSC cell pancreatic stellate cell CL:0002410 acinar cell pancreatic acinar cell CL:0002064 alpha cell pancreatic A cell CL:0000171 @@ -10,4 +11,5 @@ endothelial cell endothelial cell CL:0000115 epsilon cell pancreatic epsilon cell CL:0005019 gamma cell pancreatic endocrine cell CL:0008024 mast cell mast cell CL:0000097 +unclassified cell UNKNOWN UNKNOWN unclassified endocrine cell pancreatic endocrine cell CL:0008024 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index dbda9a8a0..bb199f6a3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -4,8 +4,7 @@ import scipy.io import os import pandas as pd -from typing import Union -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "GSM3308545_NOD_08w_A", @@ -19,17 +18,10 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117770/suppl/GSE117770_RAW.tar" self.download_url_meta = f"private,{self.sample_fn}_annotation.csv" @@ -48,24 +40,25 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - with tarfile.open(os.path.join(self.data_dir, 'GSE117770_RAW.tar')) as tar: - for member in tar.getmembers(): - if "_matrix.mtx.gz" in member.name and self.sample_fn in member.name: - name = "_".join(member.name.split("_")[:-1]) - with gzip.open(tar.extractfile(member), "rb") as mm: - x = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, - sep="\t", index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name + "_genes.tsv.gz"), compression="gzip", header=None, - sep="\t") - var.columns = ["ensembl", "names"] - var.index = var["ensembl"].values - adata = anndata.AnnData(X=x, obs=obs, var=var) - adata.var_names_make_unique() - celltypes = pd.read_csv(os.path.join(self.data_dir, self.sample_fn + "_annotation.csv"), index_col=0) - adata = adata[celltypes.index] - adata.obs["celltypes"] = celltypes - return adata +def load(data_dir, sample_fn, **kwargs): + with tarfile.open(os.path.join(data_dir, 'GSE117770_RAW.tar')) as tar: + for member in tar.getmembers(): + if "_matrix.mtx.gz" in member.name and sample_fn in member.name: + name = "_".join(member.name.split("_")[:-1]) + with gzip.open(tar.extractfile(member), "rb") as mm: + x = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, + sep="\t", index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(name + "_genes.tsv.gz"), compression="gzip", header=None, + sep="\t") + var.columns = ["ensembl", "names"] + var.index = var["ensembl"].values + adata = anndata.AnnData(X=x, obs=obs, var=var) + adata.var_names_make_unique() + celltypes = pd.read_csv(os.path.join(data_dir, sample_fn + "_annotation.csv"), index_col=0) + adata = adata[celltypes.index] + adata.obs["celltypes"] = celltypes + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index c6e3fed8d..40a9e13b6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" self.download_url_meta = None @@ -35,16 +28,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "miller20.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / 10000) - self.set_unknown_class_id(ids=[ - "Bud tip adjacent", - "Bud tip progenitor", - "Submucosal gland", - "Submucosal gland basal", - ]) - - return adata + +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "miller20.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nUMI"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv index 219d954f4..339fd1671 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv @@ -1,6 +1,8 @@ source target target_id Airway Smooth Muscle bronchial smooth muscle cell CL:0002598 Basal cell respiratory basal cell CL:0002633 +Bud tip adjacent UNKNOWN UNKNOWN +Bud tip progenitor UNKNOWN UNKNOWN Cartilage chondrocyte CL:0000138 Club-like secretory secretory cell CL:0000151 Endothelial endothelial cell CL:0000115 @@ -20,3 +22,5 @@ Neuroendocrine neuroendocrine cell CL:0000165 Pericyte pericyte cell CL:0000669 RBC erythrocyte CL:0000232 Secretory progenitor secretory cell CL:0000151 +Submucosal gland UNKNOWN UNKNOWN +Submucosal gland basal UNKNOWN UNKNOWN diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index be308522f..9fd1b1219 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" self.download_url_meta = None @@ -35,11 +28,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "habib17.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - self.set_unknown_class_id(ids=["Unclassified"]) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "habib17.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv index b15f31bdf..caafc031c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.tsv @@ -8,6 +8,7 @@ MG microglial cell CL:0000129 NSC neuronal stem cell CL:0000047 ODC1 oligodendrocyte CL:0000128 OPC oligodendrocyte precursor cell CL:0002453 +Unclassified UNKNOWN UNKNOWN exCA1 hippocampal pyramidal neuron CL:1001571 exCA3 hippocampal pyramidal neuron CL:1001571 exDG dentate gyrus of hippocampal formation granule cell CL:2000089 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 65520ea3d..692f36012 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_url_meta = None @@ -35,10 +28,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "guo18_donor.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "guo18_donor.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index 6cace3eb6..f7d3b038b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "private,GSE115469.csv.gz" self.download_url_meta = "private,GSE115469_labels.txt" @@ -34,13 +27,14 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE115469.csv.gz"), - os.path.join(self.data_dir, "GSE115469_labels.txt") - ] - adata = anndata.read_csv(fn[0]).T - celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") - adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in adata.obs.index] - return adata +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE115469.csv.gz"), + os.path.join(data_dir, "GSE115469_labels.txt") + ] + adata = anndata.read_csv(fn[0]).T + celltype_df = pd.read_csv(fn[1], sep="\t").set_index("CellName") + adata.obs["celltype"] = [str(celltype_df.loc[i]["Cluster#"]) for i in adata.obs.index] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index a364add29..7ce8a365b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotated%5FRaw%5FUMI%5FMatrix%2Etsv%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ @@ -36,15 +29,14 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), - os.path.join(self.data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") - ] - adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) - annot = pd.read_csv(fn[1], index_col=0, dtype="category") - adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in adata.obs.index] - self.set_unknown_class_id(ids=["Unknown"]) +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotated_Raw_UMI_Matrix.tsv.gz"), + os.path.join(data_dir, "GSE121862_UCSD-WU_Single_Nuclei_Cluster_Annotations.csv.gz") + ] + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + annot = pd.read_csv(fn[1], index_col=0, dtype="category") + adata.obs["celltype"] = [annot.loc[i.split("_")[0][1:]]["Annotation"] for i in adata.obs.index] - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv index 86af4d6cd..b58def11d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv @@ -25,3 +25,5 @@ Proximal Tubule Epithelial Cells - Stress/Inflam epithelial cell of proximal tub Thick Ascending Limb kidney loop of Henle thick ascending limb epithelial cell CL:1001106 Thin ascending limb kidney loop of Henle thin ascending limb epithelial cell CL:1001107 Vascular Smooth Muscle Cells and pericytes kidney pelvis smooth muscle cell CL:1000702 +Unknown UNKNOWN UNKNOWN + diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index 66abaeab8..1e6bfd0c9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -1,11 +1,10 @@ import anndata import os -from typing import Union import tarfile import pandas as pd import scipy.sparse -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "GSM3589406_PP001swap.filtered.matrix.txt.gz", @@ -46,17 +45,10 @@ } -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126030/suppl/GSE126030_RAW.tar" self.download_url_meta = [ "private,donor1.annotation.txt", @@ -81,29 +73,30 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE126030_RAW.tar"), - os.path.join(self.data_dir, "donor1.annotation.txt"), - os.path.join(self.data_dir, "donor2.annotation.txt") - ] - with tarfile.open(fn[0]) as tar: - df = pd.read_csv(tar.extractfile(self.sample_fn), compression="gzip", sep="\t") - df.index = [i.split(".")[0] for i in df["Accession"]] - var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) - if df.columns[-1].startswith("Un"): - df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) - adata = anndata.AnnData(df.T) - adata.var = var - adata.obs["donor"] = SAMPLE_DICT[self.sample_fn][1] - adata.obs.index = self.sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index - adata.obs["cell_ontology_class"] = "unknown" - df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) - df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) - for i in df1.index: - adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] - for i in df2.index: - adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] - adata.X = scipy.sparse.csc_matrix(adata.X) - return adata +def load(data_dir, sample_fn, **kwargs): + fn = [ + os.path.join(data_dir, "GSE126030_RAW.tar"), + os.path.join(data_dir, "donor1.annotation.txt"), + os.path.join(data_dir, "donor2.annotation.txt") + ] + with tarfile.open(fn[0]) as tar: + df = pd.read_csv(tar.extractfile(sample_fn), compression="gzip", sep="\t") + df.index = [i.split(".")[0] for i in df["Accession"]] + var = pd.concat([df.pop(x) for x in ["Gene", "Accession"]], 1) + if df.columns[-1].startswith("Un"): + df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) + adata = anndata.AnnData(df.T) + adata.var = var + adata.obs["donor"] = SAMPLE_DICT[sample_fn][1] + adata.obs.index = sample_fn.split("_")[1].split("s")[0] + "nskept." + adata.obs.index + adata.obs["cell_ontology_class"] = "unknown" + df1 = pd.read_csv(fn[1], sep="\t", index_col=0, header=None) + df2 = pd.read_csv(fn[2], sep="\t", index_col=0, header=None) + for i in df1.index: + adata.obs["cell_ontology_class"].loc[i] = df1.loc[i][1] + for i in df2.index: + adata.obs["cell_ontology_class"].loc[i] = df2.loc[i][1] + adata.X = scipy.sparse.csc_matrix(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 84240f33b..194a2a5a5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -1,20 +1,13 @@ import anndata import os -from typing import Union from sfaira.data import DatasetBase class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" self.download_url_meta = None @@ -33,8 +26,9 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "menon19.processed.h5ad") - adata = anndata.read(fn) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "menon19.processed.h5ad") + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 88b83c165..ba0af97e8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -1,9 +1,8 @@ import os -from typing import Union import pandas as pd import anndata -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "E-MTAB-6678.processed", @@ -11,17 +10,10 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.1.zip" self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ @@ -43,20 +35,21 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, f"{self.sample_fn}.1.zip"), - os.path.join(self.data_dir, f"{self.sample_fn}.2.zip"), - ] - adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) - df = pd.read_csv(fn[1], sep="\t") - for i in df.columns: - adata.obs[i] = [df.loc[j][i] for j in adata.obs.index] - - adata.var["ensembl"] = [i.split("_")[1] for i in adata.var.index] - adata.var["names"] = [i.split("_")[0] for i in adata.var.index] - adata.var = adata.var.reset_index().reset_index().drop("index", axis=1) - adata = adata[:, ~adata.var.index.isin( - ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() - - return adata + +def load(data_dir, sample_fn, **kwargs): + fn = [ + os.path.join(data_dir, f"{sample_fn}.1.zip"), + os.path.join(data_dir, f"{sample_fn}.2.zip"), + ] + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t", index_col="Gene").T) + df = pd.read_csv(fn[1], sep="\t") + for i in df.columns: + adata.obs[i] = [df.loc[j][i] for j in adata.obs.index] + + adata.var["ensembl"] = [i.split("_")[1] for i in adata.var.index] + adata.var["names"] = [i.split("_")[0] for i in adata.var.index] + adata.var = adata.var.reset_index().reset_index().drop("index", axis=1) + adata = adata[:, ~adata.var.index.isin( + ["", "-1", "-10", "-11", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9", "A.2", "A.3"])].copy() + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py index dfe3473d7..3d268ae2b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" @@ -34,16 +27,15 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), - os.path.join(self.data_dir, "GSE124395_clusterpartition.txt.gz") - ] - adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) - celltype_df = pd.read_csv(fn[1], sep=" ") - adata = adata[[i in celltype_df.index for i in adata.obs.index]].copy() - adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in adata.obs.index] - self.set_unknown_class_id(ids=["16", "19", "27", "36", "37"]) +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE124395_Normalhumanlivercellatlasdata.txt.gz"), + os.path.join(data_dir, "GSE124395_clusterpartition.txt.gz") + ] + adata = anndata.AnnData(pd.read_csv(fn[0], sep="\t").T) + celltype_df = pd.read_csv(fn[1], sep=" ") + adata = adata[[i in celltype_df.index for i in adata.obs.index]].copy() + adata.obs["CellType"] = [str(celltype_df.loc[i]["sct@cpart"]) for i in adata.obs.index] - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv index 883651b69..1057481d9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.tsv @@ -6,8 +6,10 @@ source target target_id 13 endothelial cell of hepatic sinusoid CL:1000398 14 hepatocyte CL:0000182 15 endothelial cell CL:0000115 +16 UNKNOWN UNKNOWN 17 hepatocyte CL:0000182 18 alpha-beta T cell CL:0000789 +19 UNKNOWN UNKNOWN 2 Kupffer cell CL:0000091 20 endothelial cell of hepatic sinusoid CL:1000398 21 endothelial cell CL:0000115 @@ -16,6 +18,7 @@ source target target_id 24 cholangiocyte CL:1000488 25 Kupffer cell CL:0000091 26 endothelial cell CL:0000115 +27 UNKNOWN UNKNOWN 28 alpha-beta T cell CL:0000789 29 endothelial cell of vascular tree CL:0002139 3 alpha-beta T cell CL:0000789 @@ -25,6 +28,8 @@ source target target_id 33 hepatic stellate cell CL:0000632 34 B cell CL:0000236 35 endothelial cell CL:0000115 +36 UNKNOWN UNKNOWN +37 UNKNOWN UNKNOWN 38 B cell CL:0000236 39 cholangiocyte CL:1000488 4 cholangiocyte CL:1000488 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 80a5ad546..7b2d89963 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -1,22 +1,12 @@ -import anndata import os -from typing import Union -import anndata2ri -from rpy2.robjects import r from sfaira.data import DatasetBase class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" self.download_url_meta = None @@ -38,6 +28,9 @@ def __init__( self.set_dataset_id(idx=1) def _load(self): + import anndata2ri + from rpy2.robjects import r + fn = os.path.join(self.data_dir, "tissue.rdata") anndata2ri.activate() # TODO: remove global activation of anndata2ri and use localconverter once it's fixed adata = r( diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index 931081b0c..4c8b991fe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -1,20 +1,13 @@ import anndata import os -from typing import Union from sfaira.data import DatasetBase class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "private,fetal_liver_alladata_.h5ad" self.download_url_meta = None @@ -33,8 +26,9 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "fetal_liver_alladata_.h5ad") - adata = anndata.read(fn) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "fetal_liver_alladata_.h5ad") + adata = anndata.read(fn) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 46477101a..0f9d135c4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -3,251 +3,21 @@ import os import pandas as pd import scipy.sparse -from typing import Union import zipfile -from sfaira.data import DatasetBaseGroupLoadingOneFile +from sfaira.data import DatasetBase -SAMPLE_IDS = [ - 'AdultAdipose_1', - 'AdultAdrenalGland_2', - 'AdultAdrenalGland_3', - 'AdultArtery_1', - 'AdultAscendingColon_1', - 'AdultBladder_1', - 'AdultBladder_2', - 'AdultCerebellum_1', - 'AdultCervix_1', - 'AdultColon_1', - 'AdultDuodenum_1', - 'AdultEpityphlon_1', - 'AdultEsophagus_1', - 'AdultEsophagus_2', - 'AdultFallopiantube_1', - 'AdultGallbladder_1', - 'AdultGallbladder_2', - 'AdultHeart_1', - 'AdultHeart_2', - 'AdultIleum_2', - 'AdultJejunum_2', - 'AdultKidney_2', - 'AdultKidney_3', - 'AdultKidney_4', - 'AdultLiver_1', - 'AdultLiver_2', - 'AdultLiver_4', - 'AdultLung_1', - 'AdultLung_2', - 'AdultLung_3', - 'AdultMuscle_1', - 'AdultOmentum_1', - 'AdultOmentum_2', - 'AdultOmentum_3', - 'AdultPancreas_1', - 'AdultPeripheralBlood_3', - 'AdultPeripheralBlood_4', - 'AdultPleura_1', - 'AdultProstate_1', - 'AdultRectum_1', - 'AdultSigmoidColon_1', - 'AdultSpleenParenchyma_1', - 'AdultSpleen_1', - 'AdultStomach_1', - 'AdultStomach_2', - 'AdultStomach_3', - 'AdultTemporalLobe_1', - 'AdultThyroid_1', - 'AdultThyroid_2', - 'AdultTrachea_2', - 'AdultTransverseColon_2', - 'AdultUreter_1', - 'AdultUterus_1', - 'BoneMarrow_1', - 'BoneMarrow_2', - 'ChorionicVillus_1', - 'CordBloodCD34P_1', - 'CordBloodCD34P_2', - 'CordBlood_1', - 'CordBlood_2', - 'FetalAdrenalGland_2', - 'FetalAdrenalGland_3', - 'FetalAdrenalGland_4', - 'FetalBrain_3', - 'FetalBrain_4', - 'FetalBrain_5', - 'FetalBrain_6', - 'FetalCalvaria_1', - 'FetalEyes_1', - 'FetalFemaleGonad_1', - 'FetalFemaleGonad_2', - 'FetalHeart_1', - 'FetalHeart_2', - 'FetalIntestine_1', - 'FetalIntestine_2', - 'FetalIntestine_3', - 'FetalIntestine_4', - 'FetalIntestine_5', - 'FetalKidney_3', - 'FetalKidney_4', - 'FetalKidney_5', - 'FetalKidney_6', - 'FetalLung_1', - 'FetalLung_2', - 'FetalMaleGonad_1', - 'FetalMaleGonad_2', - 'FetalMuscle_1', - 'FetalPancreas_1', - 'FetalPancreas_2', - 'FetalPancreas_3', - 'FetalRib_2', - 'FetalRib_3', - 'FetalSkin_2', - 'FetalSkin_3', - 'FetalSpinalCord_1', - 'FetalStomach_1', - 'FetalStomach_2', - 'FetalThymus_1', - 'FetalThymus_2', - 'HESC_1', - 'Liver_1', - 'Liver_2', - 'NeonatalAdrenalGland_1', - 'PeripheralBlood_1', - 'Placenta_1' -] +class Dataset(DatasetBase): -class Dataset(DatasetBaseGroupLoadingOneFile): - - def __init__( - self, - sample_id: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - - super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - sample_organ_dict = { - 'AdultAdipose_1': 'adipose tissue of abdominal region', - 'AdultAdrenalGland_2': 'adrenal gland', - 'AdultAdrenalGland_3': 'adrenal gland', - 'AdultArtery_1': 'artery', - 'AdultAscendingColon_1': 'ascending colon', - 'AdultBladder_1': 'urinary bladder', - 'AdultBladder_2': 'urinary bladder', - 'AdultCerebellum_1': 'cerebellum', - 'AdultCervix_1': 'uterine cervix', - 'AdultColon_1': 'colon', - 'AdultDuodenum_1': 'duodenum', - 'AdultEpityphlon_1': 'caecum', - 'AdultEsophagus_1': 'esophagus', - 'AdultEsophagus_2': 'esophagus', - 'AdultFallopiantube_1': 'fallopian tube', - 'AdultGallbladder_1': 'gall bladder', - 'AdultGallbladder_2': 'gall bladder', - 'AdultHeart_1': 'heart', - 'AdultHeart_2': 'heart', - 'AdultIleum_2': 'ileum', - 'AdultJejunum_2': 'jejunum', - 'AdultKidney_2': 'kidney', - 'AdultKidney_3': 'kidney', - 'AdultKidney_4': 'kidney', - 'AdultLiver_1': 'liver', - 'AdultLiver_2': 'liver', - 'AdultLiver_4': 'liver', - 'AdultLung_1': 'lung', - 'AdultLung_2': 'lung', - 'AdultLung_3': 'lung', - 'AdultMuscle_1': 'skeletal muscle organ', - 'AdultOmentum_1': 'omentum', - 'AdultOmentum_2': 'omentum', - 'AdultOmentum_3': 'omentum', - 'AdultPancreas_1': 'pancreas', - 'AdultPeripheralBlood_3': 'blood', - 'AdultPeripheralBlood_4': 'blood', - 'AdultPleura_1': 'pleura', - 'AdultProstate_1': 'prostate gland', - 'AdultRectum_1': 'rectum', - 'AdultSigmoidColon_1': 'sigmoid colon', - 'AdultSpleenParenchyma_1': 'parenchyma of spleen', - 'AdultSpleen_1': 'spleen', - 'AdultStomach_1': 'stomach', - 'AdultStomach_2': 'stomach', - 'AdultStomach_3': 'stomach', - 'AdultTemporalLobe_1': 'temporal lobe', - 'AdultThyroid_1': 'thyroid gland', - 'AdultThyroid_2': 'thyroid gland', - 'AdultTrachea_2': 'trachea', - 'AdultTransverseColon_2': 'transverse colon', - 'AdultUreter_1': 'ureter', - 'AdultUterus_1': 'uterus', - 'BoneMarrow_1': 'bone marrow', - 'BoneMarrow_2': 'bone marrow', - 'ChorionicVillus_1': 'chorionic villus', - 'CordBloodCD34P_1': 'umbilical cord blood', - 'CordBloodCD34P_2': 'umbilical cord blood', - 'CordBlood_1': 'umbilical cord blood', - 'CordBlood_2': 'umbilical cord blood', - 'FetalAdrenalGland_2': 'adrenal gland', - 'FetalAdrenalGland_3': 'adrenal gland', - 'FetalAdrenalGland_4': 'adrenal gland', - 'FetalBrain_3': 'brain', - 'FetalBrain_4': 'brain', - 'FetalBrain_5': 'brain', - 'FetalBrain_6': 'brain', - 'FetalCalvaria_1': 'vault of skull', - 'FetalEyes_1': 'eye', - 'FetalFemaleGonad_1': 'ovary', - 'FetalFemaleGonad_2': 'ovary', - 'FetalHeart_1': 'heart', - 'FetalHeart_2': 'heart', - 'FetalIntestine_1': 'intestine', - 'FetalIntestine_2': 'intestine', - 'FetalIntestine_3': 'intestine', - 'FetalIntestine_4': 'intestine', - 'FetalIntestine_5': 'intestine', - 'FetalKidney_3': 'kidney', - 'FetalKidney_4': 'kidney', - 'FetalKidney_5': 'kidney', - 'FetalKidney_6': 'kidney', - 'FetalLung_1': 'lung', - 'FetalLung_2': 'lung', - 'FetalMaleGonad_1': 'testis', - 'FetalMaleGonad_2': 'testis', - 'FetalMuscle_1': 'skeletal muscle organ', - 'FetalPancreas_1': 'pancreas', - 'FetalPancreas_2': 'pancreas', - 'FetalPancreas_3': 'pancreas', - 'FetalRib_2': 'rib', - 'FetalRib_3': 'rib', - 'FetalSkin_2': 'skin of body', - 'FetalSkin_3': 'skin of body', - 'FetalSpinalCord_1': 'spinal cord', - 'FetalStomach_1': 'stomach', - 'FetalStomach_2': 'stomach', - 'FetalThymus_1': 'thymus', - 'FetalThymus_2': 'thymus', - 'HESC_1': 'blastocyst', - 'Liver_1': 'liver', - 'Liver_2': 'liver', - 'NeonatalAdrenalGland_1': 'adrenal gland', - 'PeripheralBlood_1': 'blood', - 'Placenta_1': 'placenta', - } - + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ndownloader.figshare.com/files/17727365" self.download_url_meta = [ "https://ndownloader.figshare.com/files/21758835", "https://ndownloader.figshare.com/files/22447898", ] - self.obs_key_sample = "sample" - - self.organ = sample_organ_dict[self.sample_id] - self.author = "Han" self.doi = "10.1038/s41586-020-2157-4" self.healthy = True @@ -257,94 +27,183 @@ def __init__( self.state_exact = "healthy" self.year = 2020 - self.obs_key_cellontology_original = "celltype_specific" - self.obs_key_dev_stage = "dev_stage" - self.obs_key_sex = "gender" - self.obs_key_age = "age" + self.bio_sample_obs_key = "sample" + self.cellontology_original_obs_key = "celltype_specific" + self.development_stage_obs_key = "dev_stage" + self.organ_obs_key = "organ" + self.sex_obs_key = "gender" + self.age_obs_key = "age" + self.var_symbol_col = "index" self.set_dataset_id(idx=1) - def _load_full(self): - adata = anndata.read(os.path.join(self.data_dir, "HCL_Fig1_adata.h5ad")) - # convert to sparse matrix - adata.X = scipy.sparse.csr_matrix(adata.X).copy() - - # harmonise annotations - for col in ["batch", "tissue"]: - adata.obs[col] = adata.obs[col].astype("str") - adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( - "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( - "FetalFemaleGonald", "FetalFemaleGonad", regex=True) - adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", - "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) - adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] - - # load celltype labels and harmonise them - # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: - fig1_anno = pd.read_excel( - os.path.join(self.data_dir_base, self.directory_formatted_doi, "HCL_Fig1_cell_Info.xlsx"), - index_col="cellnames", - engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables - ) - fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( - "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( - "FetalFemaleGonald", "FetalFemaleGonad", regex=True) - - # check that the order of cells and cell labels is the same - assert np.all(fig1_anno.index == adata.obs.index) - - # add annotations to adata object and rename columns - adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) - adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", - "celltype_global"] - - # add sample-wise annotations to the full adata object - df = pd.DataFrame( - columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", - "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) - archive = zipfile.ZipFile(os.path.join(self.data_dir, "annotation_rmbatch_data_revised417.zip")) - for f in archive.namelist(): - df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") - df = pd.concat([df, df1], sort=True) - df = df.set_index("Cell_id") - adata = adata[[i in df.index for i in adata.obs.index]].copy() - a_idx = adata.obs.index.copy() - adata.obs = pd.concat([adata.obs, df[ - ["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"] - ]], axis=1) - assert np.all(a_idx == adata.obs.index) - - # remove mouse cells from the object # ToDo: add this back in as mouse data sets? - adata = adata[adata.obs["Source"] != "MCA2.0"].copy() - - # tidy up the column names of the obs annotations - adata.obs.columns = [ - "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", - "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] - - self.set_unknown_class_id( - ids=[ - "0", - "Unknown1", - "Unknown2", - "Intermediated cell", - "MT high", - "MT-gene high cell", - "Proliferating cell", - "Proliferating cell", - "Proliferating cell_C7 high", - "Proliferating cell_CCNB1 high", - "Proliferating cell_FABP5 high", - "Proliferating cell_HMGB2 high", - "Proliferating cell_KIAA0101 high", - "Proliferating cell_KIAA0101_high", - "Proliferating cell_PTTG1 high", - "Proliferating cell_TOP2A high", - "Proliferating cell_UBE2C high", - "Proliferating cell_UBE2C high", - "Proliferating cell_UBE2C_high" - ] - ) - return adata +def load(data_dir, **kwargs): + sample_organ_dict = { + 'AdultAdipose_1': 'adipose tissue of abdominal region', + 'AdultAdrenalGland_2': 'adrenal gland', + 'AdultAdrenalGland_3': 'adrenal gland', + 'AdultArtery_1': 'artery', + 'AdultAscendingColon_1': 'ascending colon', + 'AdultBladder_1': 'urinary bladder', + 'AdultBladder_2': 'urinary bladder', + 'AdultCerebellum_1': 'cerebellum', + 'AdultCervix_1': 'uterine cervix', + 'AdultColon_1': 'colon', + 'AdultDuodenum_1': 'duodenum', + 'AdultEpityphlon_1': 'caecum', + 'AdultEsophagus_1': 'esophagus', + 'AdultEsophagus_2': 'esophagus', + 'AdultFallopiantube_1': 'fallopian tube', + 'AdultGallbladder_1': 'gall bladder', + 'AdultGallbladder_2': 'gall bladder', + 'AdultHeart_1': 'heart', + 'AdultHeart_2': 'heart', + 'AdultIleum_2': 'ileum', + 'AdultJejunum_2': 'jejunum', + 'AdultKidney_2': 'kidney', + 'AdultKidney_3': 'kidney', + 'AdultKidney_4': 'kidney', + 'AdultLiver_1': 'liver', + 'AdultLiver_2': 'liver', + 'AdultLiver_4': 'liver', + 'AdultLung_1': 'lung', + 'AdultLung_2': 'lung', + 'AdultLung_3': 'lung', + 'AdultMuscle_1': 'skeletal muscle organ', + 'AdultOmentum_1': 'omentum', + 'AdultOmentum_2': 'omentum', + 'AdultOmentum_3': 'omentum', + 'AdultPancreas_1': 'pancreas', + 'AdultPeripheralBlood_3': 'blood', + 'AdultPeripheralBlood_4': 'blood', + 'AdultPleura_1': 'pleura', + 'AdultProstate_1': 'prostate gland', + 'AdultRectum_1': 'rectum', + 'AdultSigmoidColon_1': 'sigmoid colon', + 'AdultSpleenParenchyma_1': 'parenchyma of spleen', + 'AdultSpleen_1': 'spleen', + 'AdultStomach_1': 'stomach', + 'AdultStomach_2': 'stomach', + 'AdultStomach_3': 'stomach', + 'AdultTemporalLobe_1': 'temporal lobe', + 'AdultThyroid_1': 'thyroid gland', + 'AdultThyroid_2': 'thyroid gland', + 'AdultTrachea_2': 'trachea', + 'AdultTransverseColon_2': 'transverse colon', + 'AdultUreter_1': 'ureter', + 'AdultUterus_1': 'uterus', + 'BoneMarrow_1': 'bone marrow', + 'BoneMarrow_2': 'bone marrow', + 'ChorionicVillus_1': 'chorionic villus', + 'CordBloodCD34P_1': 'umbilical cord blood', + 'CordBloodCD34P_2': 'umbilical cord blood', + 'CordBlood_1': 'umbilical cord blood', + 'CordBlood_2': 'umbilical cord blood', + 'FetalAdrenalGland_2': 'adrenal gland', + 'FetalAdrenalGland_3': 'adrenal gland', + 'FetalAdrenalGland_4': 'adrenal gland', + 'FetalBrain_3': 'brain', + 'FetalBrain_4': 'brain', + 'FetalBrain_5': 'brain', + 'FetalBrain_6': 'brain', + 'FetalCalvaria_1': 'vault of skull', + 'FetalEyes_1': 'eye', + 'FetalFemaleGonad_1': 'ovary', + 'FetalFemaleGonad_2': 'ovary', + 'FetalHeart_1': 'heart', + 'FetalHeart_2': 'heart', + 'FetalIntestine_1': 'intestine', + 'FetalIntestine_2': 'intestine', + 'FetalIntestine_3': 'intestine', + 'FetalIntestine_4': 'intestine', + 'FetalIntestine_5': 'intestine', + 'FetalKidney_3': 'kidney', + 'FetalKidney_4': 'kidney', + 'FetalKidney_5': 'kidney', + 'FetalKidney_6': 'kidney', + 'FetalLung_1': 'lung', + 'FetalLung_2': 'lung', + 'FetalMaleGonad_1': 'testis', + 'FetalMaleGonad_2': 'testis', + 'FetalMuscle_1': 'skeletal muscle organ', + 'FetalPancreas_1': 'pancreas', + 'FetalPancreas_2': 'pancreas', + 'FetalPancreas_3': 'pancreas', + 'FetalRib_2': 'rib', + 'FetalRib_3': 'rib', + 'FetalSkin_2': 'skin of body', + 'FetalSkin_3': 'skin of body', + 'FetalSpinalCord_1': 'spinal cord', + 'FetalStomach_1': 'stomach', + 'FetalStomach_2': 'stomach', + 'FetalThymus_1': 'thymus', + 'FetalThymus_2': 'thymus', + 'HESC_1': 'blastocyst', + 'Liver_1': 'liver', + 'Liver_2': 'liver', + 'NeonatalAdrenalGland_1': 'adrenal gland', + 'PeripheralBlood_1': 'blood', + 'Placenta_1': 'placenta', + } + + adata = anndata.read(os.path.join(data_dir, "HCL_Fig1_adata.h5ad")) + # convert to sparse matrix + adata.X = scipy.sparse.csr_matrix(adata.X).copy() + + # harmonise annotations + for col in ["batch", "tissue"]: + adata.obs[col] = adata.obs[col].astype("str") + adata.obs.index = adata.obs.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + adata.obs.replace({"AdultJeJunum": "AdultJejunum", "AdultGallBladder": "AdultGallbladder", + "FetalFemaleGonald": "FetalFemaleGonad"}, regex=True, inplace=True) + adata.obs.index = ["-".join(i.split("-")[:-1]) for i in adata.obs.index] + + # load celltype labels and harmonise them + # This pandas code should work with pandas 1.2 but it does not and yields an empty data frame: + fig1_anno = pd.read_excel( + os.path.join(data_dir, "HCL_Fig1_cell_Info.xlsx"), + index_col="cellnames", + engine="xlrd", # ToDo: Update when pandas xlsx reading with openpyxl is fixed: yields empty tables + ) + fig1_anno.index = fig1_anno.index.str.replace("AdultJeJunum", "AdultJejunum", regex=True).str.replace( + "AdultGallBladder", "AdultGallbladder", regex=True).str.replace( + "FetalFemaleGonald", "FetalFemaleGonad", regex=True) + + # check that the order of cells and cell labels is the same + assert np.all(fig1_anno.index == adata.obs.index) + + # add annotations to adata object and rename columns + adata.obs = pd.concat([adata.obs, fig1_anno[["cluster", "stage", "donor", "celltype"]]], axis=1) + adata.obs.columns = ["sample", "tissue", "n_genes", "n_counts", "cluster_global", "stage", "donor", + "celltype_global"] + + # add sample-wise annotations to the full adata object + df = pd.DataFrame( + columns=["Cell_barcode", "Sample", "Batch", "Cell_id", "Cluster_id", "Ages", "Development_stage", "Method", + "Gender", "Source", "Biomaterial", "Name", "ident", "Celltype"]) + archive = zipfile.ZipFile(os.path.join(data_dir, "annotation_rmbatch_data_revised417.zip")) + for f in archive.namelist(): + df1 = pd.read_csv(archive.open(f), encoding="unicode_escape") + df = pd.concat([df, df1], sort=True) + df = df.set_index("Cell_id") + adata = adata[[i in df.index for i in adata.obs.index]].copy() + a_idx = adata.obs.index.copy() + adata.obs = pd.concat([adata.obs, df[ + ["Ages", "Celltype", "Cluster_id", "Gender", "Method", "Source"] + ]], axis=1) + assert np.all(a_idx == adata.obs.index) + + # remove mouse cells from the object # ToDo: add this back in as mouse data sets? + adata = adata[adata.obs["Source"] != "MCA2.0"].copy() + + # tidy up the column names of the obs annotations + adata.obs.columns = [ + "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", + "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] + adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv index 920b736b1..4d721cb61 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv @@ -1,4 +1,5 @@ source target target_id +0 UNKNOWN UNKNOWN AT1 cell type I pneumocyte CL:0002062 AT1 cell type I pneumocyte CL:0002062 AT2 cell type II pneumocyte CL:0002063 @@ -209,8 +210,6 @@ Epithelial progenitor cell epithelial cell CL:0000066 Epithelial_cell_NUPR1 high epithelial cell CL:0000066 Epithelial¨Cmesenchymal transition epithelial cell CL:0000066 Erythroid cell erythroid lineage cell CL:0000764 -"Erythroid cell -" erythroid lineage cell CL:0000764 Erythroid cell_AHSP high erythroid lineage cell CL:0000764 Erythroid cell_HBA1 high erythroid lineage cell CL:0000764 Erythroid cell_HBB high erythroid lineage cell CL:0000764 @@ -300,6 +299,7 @@ Intercalated cell renal intercalated cell CL:0005010 Intercalated cell_SLC26A4 high renal intercalated cell CL:0005010 Intercalated cell_SPINK1 high renal intercalated cell CL:0005010 Intermediate Epithelial cell intermediate epitheliocyte CL:0002209 +Intermediated cell UNKNOWN UNKNOWN Interneuron interneuron CL:0000099 Interstitial cell_POSTN high kidney interstitial cell CL:1000500 Interstitial cell_PTN high kidney interstitial cell CL:1000500 @@ -391,6 +391,8 @@ Monocyte_S100A9 high monocyte CL:0000576 Monocyte_TPPP3 high monocyte CL:0000576 Motile liver macrophage macrophage CL:0000235 Motor neuron motor neuron CL:0000100 +MT high UNKNOWN UNKNOWN +MT-gene high cell UNKNOWN UNKNOWN Mucosal aquamous Epithelial cell mucous cell of stomach CL:0002180 Mucous Epithelial cell epithelial cell CL:0000066 Mucous Epithelial cell_REG1A high epithelial cell CL:0000066 @@ -487,11 +489,24 @@ Primordial germ cell_GTSF1 high primordial germ cell CL:0000670 Primordial germ cell_TCL1A high primordial germ cell CL:0000670 Principle cell renal principal cell CL:0005009 Proliferating B cell B cell CL:0000236 +Proliferating cell UNKNOWN UNKNOWN Proliferating keratinocyte keratinocyte CL:0000312 Proliferating B cell B cell CL:0000236 Proliferating Intra-adrenal ganglion neuron neuron CL:0000540 Proliferating T cell T cell CL:0000084 Proliferating alveolar bipotent progenitor cell pneumocyte CL:0000322 +Proliferating cell UNKNOWN UNKNOWN +Proliferating cell_C7 high UNKNOWN UNKNOWN +Proliferating cell_CCNB1 high UNKNOWN UNKNOWN +Proliferating cell_FABP5 high UNKNOWN UNKNOWN +Proliferating cell_HMGB2 high UNKNOWN UNKNOWN +Proliferating cell_KIAA0101 high UNKNOWN UNKNOWN +Proliferating cell_KIAA0101_high UNKNOWN UNKNOWN +Proliferating cell_PTTG1 high UNKNOWN UNKNOWN +Proliferating cell_TOP2A high UNKNOWN UNKNOWN +Proliferating cell_UBE2C high UNKNOWN UNKNOWN +Proliferating cell_UBE2C high UNKNOWN UNKNOWN +Proliferating cell_UBE2C_high UNKNOWN UNKNOWN Proliferating endothelial cell endothelial cell CL:0000115 Proliferating fibroblast fibroblast CL:0000057 Proliferating lung mesenchyme cell_HIST1H4C high mesenchymal cell CL:0008019 @@ -585,6 +600,8 @@ Thyroid follicular cell thyroid follicular cell CL:0002258 Treg cell regulatory T cell CL:0000815 Unknown Epithelial cell_EFNA1 high epithelial cell CL:0000066 Unknown Epithelial cell_FOS high epithelial cell CL:0000066 +Unknown1 UNKNOWN UNKNOWN +Unknown2 UNKNOWN UNKNOWN Ureteric Epithelial cell epithelial cell CL:0000066 Ureteric bud cell epithelial cell CL:0000066 Ureteric epithelial cell epithelial cell CL:0000066 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py index 3c845116c..4966e2c21 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.py @@ -1,70 +1,20 @@ import anndata import os -from typing import Union import scipy.sparse import numpy as np -from sfaira.data import DatasetBaseGroupLoadingManyFiles -SAMPLE_FNS = [ - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad", - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - """ - ToDo split by sample / patient in obs columns: - bio replicates droplet file "orig.ident"+"sample"+"magnetic.selection", - bio replicates facs file "patient"+"sample" - tech replicates droplet file "channel", - tech replicates facs file "plate.barcode" - """ - - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - synapse_id = { - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625095", - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad": "syn21625142" - } - - self.download_url_data = f"{synapse_id[self.sample_fn]},{self.sample_fn}" - self.download_url_meta = None - - self.author = "Travaglini" - self.doi = "10.1038/s41586-020-2922-4" - self.healthy = True - self.normalization = "raw" - self.organ = "lung" - self.organism = "human" - self.protocol = "10X sequencing" if self.sample_fn.split("_")[0] == "droplet" else "Smart-seq2" - self.state_exact = "healthy" - self.year = 2020 - - self.obs_key_cellontology_original = "free_annotation" - self.var_symbol_col = "index" - - self.set_dataset_id(idx=1) - - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - if self.sample_fn.split("_")[0] == "droplet": - norm_const = 10000 - sf_key = "nUMI" - else: - norm_const = 1000000 - sf_key = "nReads" - adata = anndata.read(fn) - adata.X = scipy.sparse.csc_matrix(adata.X) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs[sf_key].values[:, None])).multiply(1 / norm_const) - self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + if sample_fn.split("_")[0] == "droplet": + norm_const = 10000 + sf_key = "nUMI" + else: + norm_const = 1000000 + sf_key = "nReads" + adata = anndata.read(fn) + adata.X = scipy.sparse.csc_matrix(adata.X) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs[sf_key].values[:, None])).multiply(1 / norm_const) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml new file mode 100644 index 000000000..5273f033d --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -0,0 +1,57 @@ +dataset_structure: + dataset_index: 1 + sample_ids: + sample_fns: + - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad" + - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" +dataset_wise: + author: + - "Travaglini" + doi: + - "10.1038/s41586-020-2922-4" + download_url_data: + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "syn21625095,droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad" + facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "syn21625142,facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" + download_url_meta: + normalization: "raw" + year: 2020 +dataset_or_observation_wise: + age: + age_obs_key: + assay: + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10X sequencing" + facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "Smart-seq2" + assay_obs_key: + bio_sample: + bio_sample_obs_key: + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "orig.ident*sample*magnetic.selection" + facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "patient*sample" + development_stage: + development_stage_obs_key: + ethnicity: + ethnicity_obs_key: + healthy: True + healthy_obs_key: + individual: + individual_obs_key: "patient" + organ: "lung" + organ_obs_key: + organism: "human" + organism_obs_key: + sex: + sex_obs_key: + state_exact: "healthy" + state_exact_obs_key: "state_exact" + tech_sample: + tech_sample_obs_key: + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "channel" + facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "plate.barcode" +observation_wise: + cellontology_original_obs_key: "free_annotation" +feature_wise: + var_ensembl_col: + var_symbol_col: "index" +misc: + healthy_state_healthy: +meta: + version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 03efe4a2c..877ead07a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" self.download_url_meta = None @@ -36,10 +29,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "james20.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "james20.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index b6131e392..e01e29822 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" @@ -34,13 +27,13 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE130148_raw_counts.csv.gz"), - os.path.join(self.data_dir, "GSE130148_barcodes_cell_types.txt.gz"), - ] - adata = anndata.read_csv(fn[0]).T - adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) - self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) - return adata +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE130148_raw_counts.csv.gz"), + os.path.join(data_dir, "GSE130148_barcodes_cell_types.txt.gz"), + ] + adata = anndata.read_csv(fn[0]).T + adata.obs = pd.read_csv(fn[1], sep="\t", index_col=0) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index d588287fc..7235a6a3a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -1,9 +1,8 @@ import anndata import os -from typing import Union import numpy as np -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "vieira19_Alveoli_and_parenchyma_anonymised.processed.h5ad", @@ -11,24 +10,17 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None self.author = "Braga" self.doi = "10.1038/s41591-019-0468-5" self.healthy = True - self.organ = "bronchus" if sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" + self.organ = "bronchus" if self.sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" @@ -40,10 +32,10 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) - return adata +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index d52b32ac1..694bbc303 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -4,37 +4,13 @@ import pandas import zipfile import scipy.io -from typing import Union -from sfaira.data import DatasetBaseGroupLoadingOneFile +from sfaira.data import DatasetBase -SAMPLE_IDS = [ - "Choroid plexus", - "Dura mater", - "Enr. SDM", - "Whole brain", -] +class Dataset(DatasetBase): -class Dataset(DatasetBaseGroupLoadingOneFile): - - def __init__( - self, - sample_id: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - sample_organ_dict = { - "Choroid plexus": "choroid plexus", - "Dura mater": "dura mater", - "Enr. SDM": "brain meninx", - "Whole brain": "brain", - } - self.obs_key_sample = "sample" - self.organ = sample_organ_dict[self.sample_id] - + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = \ "https://www.brainimmuneatlas.org/data_files/toDownload/filtered_gene_bc_matrices_mex_WT_fullAggr.zip" self.download_url_meta = \ @@ -49,43 +25,53 @@ def __init__( self.state_exact = "healthy" self.year = 2019 + self.bio_sample_obs_key = "sample" + self.cellontology_original_obs_key = "cluster" + self.organ_obs_key = "sample_anatomy" + self.var_ensembl_col = "ensembl" self.var_symbol_col = "name" - self.obs_key_cellontology_original = "cluster" - self.obs_key_organ = "sample_anatomy" self.set_dataset_id(idx=1) - def _load_full(self): - fn = [ - os.path.join(self.data_dir, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), - os.path.join(self.data_dir, "annot_fullAggr.csv") - ] - with zipfile.ZipFile(fn[0]) as archive: - x = scipy.io.mmread(archive.open('filtered_gene_bc_matrices_mex/mm10/matrix.mtx')).T.tocsr() - adata = anndata.AnnData(x) - var = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/genes.tsv'), sep="\t", header=None) - var.columns = ["ensembl", "name"] - obs_names = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/barcodes.tsv'), - sep="\t", - header=None - )[0].values - obs = pandas.read_csv(fn[1]) +def load(data_dir, **kwargs): + sample_organ_dict = { + "Choroid plexus": "choroid plexus", + "Dura mater": "dura mater", + "Enr. SDM": "brain meninx", + "Whole brain": "brain", + } + fn = [ + os.path.join(data_dir, "filtered_gene_bc_matrices_mex_WT_fullAggr.zip"), + os.path.join(data_dir, "annot_fullAggr.csv") + ] + + with zipfile.ZipFile(fn[0]) as archive: + x = scipy.io.mmread(archive.open('filtered_gene_bc_matrices_mex/mm10/matrix.mtx')).T.tocsr() + adata = anndata.AnnData(x) + var = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/genes.tsv'), sep="\t", header=None) + var.columns = ["ensembl", "name"] + obs_names = pandas.read_csv(archive.open('filtered_gene_bc_matrices_mex/mm10/barcodes.tsv'), + sep="\t", + header=None + )[0].values + obs = pandas.read_csv(fn[1]) - # Match annotation to raw data. - obs.index = obs["cell"].values - obs = obs.loc[[i in obs_names for i in obs.index], :] - idx_tokeep = np.where([i in obs.index for i in obs_names])[0] - adata = adata[idx_tokeep, :] - obs_names = obs_names[idx_tokeep] - idx_map = np.array([obs.index.tolist().index(i) for i in obs_names]) - adata = adata[idx_map, :] - obs_names = obs_names[idx_map] + # Match annotation to raw data. + obs.index = obs["cell"].values + obs = obs.loc[[i in obs_names for i in obs.index], :] + idx_tokeep = np.where([i in obs.index for i in obs_names])[0] + adata = adata[idx_tokeep, :] + obs_names = obs_names[idx_tokeep] + idx_map = np.array([obs.index.tolist().index(i) for i in obs_names]) + adata = adata[idx_map, :] + obs_names = obs_names[idx_map] + obs["organ"] = [sample_organ_dict[x] for x in obs["sample"].values] - # Assign attributes - adata.obs_names = obs_names - adata.var = var - adata.obs = obs + # Assign attributes + adata.obs_names = obs_names + adata.var = var + adata.obs = obs - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index e065e25d5..c873b84ca 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd import scipy.io import gzip @@ -11,14 +10,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None @@ -37,25 +30,26 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "GSE131685_RAW.tar") - adatas = [] - with tarfile.open(fn) as tar: - for member in tar.getmembers(): - if "_matrix.mtx.gz" in member.name: - name = "_".join(member.name.split("_")[:-1]) - with gzip.open(tar.extractfile(member), "rb") as mm: - X = scipy.io.mmread(mm).T.tocsr() - obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, - sep="\t", index_col=0) - obs.index.name = None - var = pd.read_csv(tar.extractfile(name + "_features.tsv.gz"), compression="gzip", header=None, - sep="\t").iloc[:, :2] - var.columns = ["ensembl", "names"] - var.index = var["ensembl"].values - adata = anndata.AnnData(X=X, obs=obs, var=var) - adata.obs["sample"] = name - adatas.append(adata) - adata = adatas[0].concatenate(adatas[1:]) - - return adata + +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "GSE131685_RAW.tar") + adatas = [] + with tarfile.open(fn) as tar: + for member in tar.getmembers(): + if "_matrix.mtx.gz" in member.name: + name = "_".join(member.name.split("_")[:-1]) + with gzip.open(tar.extractfile(member), "rb") as mm: + X = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(name + "_barcodes.tsv.gz"), compression="gzip", header=None, + sep="\t", index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(name + "_features.tsv.gz"), compression="gzip", header=None, + sep="\t").iloc[:, :2] + var.columns = ["ensembl", "names"] + var.index = var["ensembl"].values + adata = anndata.AnnData(X=X, obs=obs, var=var) + adata.obs["sample"] = name + adatas.append(adata) + adata = adatas[0].concatenate(adatas[1:]) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index fa08f34f5..3e7c56103 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" self.download_url_meta = None @@ -34,9 +27,10 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "voigt19.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "voigt19.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 63445e4c7..30708efe8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -1,10 +1,9 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "wang20_colon.processed.h5ad", @@ -13,17 +12,10 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None @@ -44,10 +36,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index a5332f219..370d84eb5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -1,10 +1,9 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "lukassen20_lung_orig.processed.h5ad", @@ -12,17 +11,10 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None @@ -41,11 +33,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nCount_RNA"].values[:, None])).multiply(1 / 10000) - self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) - return adata +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["nCount_RNA"].values[:, None])).multiply(1 / 10000) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index a0f27e68c..902b9a861 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -1,8 +1,7 @@ import anndata import os -from typing import Union -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad", @@ -47,18 +46,11 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - organ = "-".join(sample_fn.split("-")[7:]).split(".")[0].lower() + def __init__(self, **kwargs): + super().__init__(**kwargs) + organ = "-".join(self.sample_fn.split("-")[7:]).split(".")[0].lower() organ = "adipose tissue" if organ in ["fat", "bat", "gat", "mat", "scat"] else \ "aorta" if organ in ["aorta"] else \ "urinary bladder" if organ in ["bladder"] else \ @@ -80,7 +72,8 @@ def __init__( "trachea" if organ in ["trachea"] else organ # ToDo: heart_and_aorta could be a distinct UBERON term, e.g. cardiovascular system? - self.download_url_data = f"https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/{sample_fn}" + self.download_url_data = f"https://czb-tabula-muris-senis.s3-us-west-2.amazonaws.com/Data-objects/" \ + f"{self.sample_fn}" self.download_url_meta = None self.obs_key_cellontology_original = "cell_ontology_class" @@ -95,7 +88,7 @@ def __init__( self.normalization = "norm" self.organism = "mouse" self.organ = organ - self.protocol = "10X sequencing" if sample_fn.split("-")[3] == "droplet" else "Smart-seq2" + self.protocol = "10X sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" self.state_exact = "healthy" self.year = 2019 @@ -104,14 +97,15 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read_h5ad(fn) - adata.X = adata.raw.X - adata.var = adata.raw.var - del adata.raw - adata.obsm = {} - adata.varm = {} - adata.uns = {} - return adata +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read_h5ad(fn) + adata.X = adata.raw.X + adata.var = adata.raw.var + del adata.raw + adata.obsm = {} + adata.varm = {} + adata.uns = {} + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 016cd9744..6eea746a2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import pandas as pd from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = [ "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fmatrix%2Emtx%2Egz", "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE135nnn/GSE135893/suppl/GSE135893%5Fgenes%2Etsv%2Egz", @@ -39,19 +32,19 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "GSE135893_matrix.mtx.gz"), - os.path.join(self.data_dir, "GSE135893_genes.tsv.gz"), - os.path.join(self.data_dir, "GSE135893_barcodes.tsv.gz"), - os.path.join(self.data_dir, "GSE135893_IPF_metadata.csv.gz"), - ] - adata = anndata.read_mtx(fn[0]).T - adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) - adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) - obs = pd.read_csv(fn[3], index_col=0) - adata = adata[obs.index.tolist(), :].copy() - adata.obs = obs - self.set_unknown_class_id(ids=["1_Unicorns and artifacts"]) - - return adata + +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "GSE135893_matrix.mtx.gz"), + os.path.join(data_dir, "GSE135893_genes.tsv.gz"), + os.path.join(data_dir, "GSE135893_barcodes.tsv.gz"), + os.path.join(data_dir, "GSE135893_IPF_metadata.csv.gz"), + ] + adata = anndata.read_mtx(fn[0]).T + adata.var = pd.read_csv(fn[1], index_col=0, header=None, names=["ids"]) + adata.obs = pd.read_csv(fn[2], index_col=0, header=None, names=["barcodes"]) + obs = pd.read_csv(fn[3], index_col=0) + adata = adata[obs.index.tolist(), :].copy() + adata.obs = obs + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index 1c9a06f17..fc3e19123 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np from sfaira.data import DatasetBase @@ -8,18 +7,8 @@ class Dataset(DatasetBase): - """ - TODO: annotate developmental cell types in set_unknown_class_id - """ - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = [ "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Mature_Full_v2.1.h5ad", "https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad" @@ -42,21 +31,17 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = [ - os.path.join(self.data_dir, "Mature_Full_v2.1.h5ad"), - os.path.join(self.data_dir, "Fetal_full.h5ad") - ] - adult = anndata.read(fn[0]) - fetal = anndata.read(fn[1]) - adult.obs["development"] = "adult" - fetal.obs["development"] = "fetal" - adata = adult.concatenate(fetal) - adata.X = np.expm1(adata.X) - - self.set_unknown_class_id(ids=[ - "CNT/PC - proximal UB", "Distal S shaped body", "Medial S shaped body", "Proliferating stroma progenitor", - "Proximal S shaped body", "Stroma progenitor", "Proximal UB", - ]) - - return adata + +def load(data_dir, **kwargs): + fn = [ + os.path.join(data_dir, "Mature_Full_v2.1.h5ad"), + os.path.join(data_dir, "Fetal_full.h5ad") + ] + adult = anndata.read(fn[0]) + fetal = anndata.read(fn[1]) + adult.obs["development"] = "adult" + fetal.obs["development"] = "fetal" + adata = adult.concatenate(fetal) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv index afd222bad..9b4b28439 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.tsv @@ -3,9 +3,11 @@ Ascending vasa recta endothelium vasa recta ascending limb cell CL:1001131 B cell B cell CL:0000236 CD4 T cell CD4-positive, alpha-beta T cell CL:0000624 CD8 T cell CD8-positive, alpha-beta T cell CL:0000625 +CNT/PC - proximal UB UNKNOWN UNKNOWN Cap mesenchyme mesenchymal cell CL:0008019 Connecting tubule kidney connecting tubule epithelial cell CL:1000768 Descending vasa recta endothelium vasa recta descending limb cell CL:1001285 +Distal S shaped body UNKNOWN UNKNOWN Distal renal vesicle epithelial cell CL:0000066 Distinct proximal tubule 1 epithelial cell of proximal tubule CL:0002306 Distinct proximal tubule 2 epithelial cell of proximal tubule CL:0002306 @@ -27,6 +29,7 @@ Macrophage 1 macrophage CL:0000235 Macrophage 2 macrophage CL:0000235 Mast cell mast cell CL:0000097 Mast cells mast cell CL:0000097 +Medial S shaped body UNKNOWN UNKNOWN Megakaryocyte megakaryocyte CL:0000556 Monocyte monocyte CL:0000576 Myofibroblast kidney interstitial myofibroblast CL:1000691 @@ -53,8 +56,12 @@ Proliferating fibroblast fibroblast CL:0000057 Proliferating macrophage macrophage CL:0000235 Proliferating monocyte monocyte CL:0000576 Proliferating myofibroblast kidney interstitial myofibroblast CL:1000691 +Proliferating stroma progenitor UNKNOWN UNKNOWN +Proximal S shaped body UNKNOWN UNKNOWN +Proximal UB UNKNOWN UNKNOWN Proximal renal vesicle kidney blood vessel cell CL:1000854 Proximal tubule epithelial cell of proximal tubule CL:0002306 +Stroma progenitor UNKNOWN UNKNOWN Thick ascending limb of Loop of Henle kidney loop of Henle thick ascending limb epithelial cell CL:1001106 Transitional urothelium urothelial cell CL:0000731 Type A intercalated cell renal intercalated cell CL:0005010 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index 7bcb2b6cc..697ea9c05 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np from sfaira.data import DatasetBase @@ -8,14 +7,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" self.download_url_meta = None @@ -34,9 +27,10 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "park20.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "park20.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index 6dddd955e..ffb3d2933 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -1,9 +1,8 @@ import anndata import os -from typing import Union import scipy.sparse -from sfaira.data import DatasetBaseGroupLoadingManyFiles +from sfaira.data import DatasetBase SAMPLE_FNS = [ "madissoon19_lung.processed.h5ad", @@ -12,20 +11,13 @@ ] -class Dataset(DatasetBaseGroupLoadingManyFiles): +class Dataset(DatasetBase): """ ToDo: patient information in .obs["patient"] and sample information in .obs["sample"] (more samples than patients) """ - def __init__( - self, - sample_fn: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) if self.sample_fn == "madissoon19_lung.processed.h5ad": self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" self.var_ensembl_col = "gene.ids.HCATisStab7509734" @@ -56,17 +48,16 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, self.sample_fn) - adata = anndata.read(fn) - if self.sample_fn != "madissoon19_lung.processed.h5ad": - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None]))\ - .multiply(1 / 10000) - # Cell type column called differently in madissoon19_lung.processed.h5ad: - if self.sample_fn == "madissoon19_lung.processed.h5ad": - adata.obs["Celltypes"] = adata.obs["CellType"] - del adata.obs["CellType"] - self.set_unknown_class_id(ids=["B_T_doublet", "CD34_progenitor", "Stroma"]) +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + if sample_fn != "madissoon19_lung.processed.h5ad": + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None]))\ + .multiply(1 / 10000) + # Cell type column called differently in madissoon19_lung.processed.h5ad: + if sample_fn == "madissoon19_lung.processed.h5ad": + adata.obs["Celltypes"] = adata.obs["CellType"] + del adata.obs["CellType"] - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv index dda6a1e33..9544b4f32 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv @@ -4,10 +4,12 @@ Alveolar_Type2 type II pneumocyte CL:0002063 B_CD27neg B cell CL:0000236 B_CD27pos B cell CL:0000236 B_Hypermutation B cell CL:0000236 +B_T_doublet NOT_A_CELL NOT_A_CELL B_cells B cell CL:0000236 B_follicular follicular B cell CL:0000843 B_mantle B cell CL:0000236 Blood_vessel blood vessel endothelial cell CL:0000071 +CD34_progenitor UNKNOWN UNKNOWN Ciliated ciliated cell CL:0000064 DC_1 dendritic cell CL:0000451 DC_2 dendritic cell CL:0000451 @@ -45,6 +47,7 @@ Plasma_IgM IgM plasma cell CL:0000986 Plasma_cells plasma cell CL:0000786 Plasmablast plasmablast CL:0000980 Platelet platelet CL:0000233 +Stroma UNKNOWN UNKNOWN T_CD4 CD4-positive, alpha-beta T cell CL:0000624 T_CD4_conv CD4-positive, alpha-beta T cell CL:0000624 T_CD4_fh T follicular helper cell CL:0002038 diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index 1a8e5b7ab..ffa12a670 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -1,6 +1,5 @@ import anndata import os -from typing import Union import numpy as np import scipy.sparse @@ -9,14 +8,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" self.download_url_meta = None @@ -37,12 +30,11 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "lukowski19.processed.h5ad") - adata = anndata.read(fn) - adata.X = np.expm1(adata.X) - adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - self.set_unknown_class_id(ids=["unannotated", "unspecified"]) +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "lukowski19.processed.h5ad") + adata = anndata.read(fn) + adata.X = np.expm1(adata.X) + adata.X = adata.X.multiply(scipy.sparse.csc_matrix(adata.obs["n_counts"].values[:, None])).multiply(1 / 10000) - return adata + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv index ffa68e820..2e1aed660 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.tsv @@ -11,3 +11,5 @@ retinal ganglion cell retinal ganglion cell CL:0000740 retinal rod cell type A retinal rod cell CL:0000604 retinal rod cell type B retinal rod cell CL:0000604 retinal rod cell type C retinal rod cell CL:0000604 +unannotated UNKNOWN UNKNOWN +unspecified UNKNOWN UNKNOWN diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index 872a023ba..9c13411d7 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -1,5 +1,4 @@ import os -from typing import Union import scipy.sparse import anndata as ad import numpy as np @@ -10,14 +9,8 @@ class Dataset(DatasetBase): - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = \ "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.download_url_meta = None @@ -37,31 +30,32 @@ def __init__( self.set_dataset_id(idx=1) - def _load(self): - fn = os.path.join(self.data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5") - with tables.open_file(str(fn), 'r') as f: - dsets = {} - for node in f.walk_nodes('/matrix', 'Array'): - dsets[node.name] = node.read() - M, N = dsets['shape'] - data = dsets['data'] - if dsets['data'].dtype == np.dtype('int32'): - data = dsets['data'].view('float32') - data[:] = dsets['data'] - matrix = scipy.sparse.csr_matrix( - (data, dsets['indices'], dsets['indptr']), - shape=(N, M), - ) - adata = ad.AnnData( - matrix, - dict(obs_names=dsets['barcodes'].astype(str)), - dict( - var_names=dsets['name'].astype(str), - gene_ids=dsets['id'].astype(str), - feature_types=dsets['feature_type'].astype(str), - genome=dsets['genome'].astype(str), - ), - ) - - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5") + with tables.open_file(str(fn), 'r') as f: + dsets = {} + for node in f.walk_nodes('/matrix', 'Array'): + dsets[node.name] = node.read() + + M, N = dsets['shape'] + data = dsets['data'] + if dsets['data'].dtype == np.dtype('int32'): + data = dsets['data'].view('float32') + data[:] = dsets['data'] + matrix = scipy.sparse.csr_matrix( + (data, dsets['indices'], dsets['indptr']), + shape=(N, M), + ) + adata = ad.AnnData( + matrix, + dict(obs_names=dsets['barcodes'].astype(str)), + dict( + var_names=dsets['name'].astype(str), + gene_ids=dsets['id'].astype(str), + feature_types=dsets['feature_type'].astype(str), + genome=dsets['genome'].astype(str), + ), + ) + + return adata diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index fe0252711..34d6a21ea 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -1,28 +1,13 @@ import anndata import os -from typing import Union -from sfaira.data import DatasetBaseGroupLoadingOneFile +from sfaira.data import DatasetBase -SAMPLE_IDS = [ - "umbilical cord blood", - "bone marrow" -] +class Dataset(DatasetBase): -class Dataset(DatasetBaseGroupLoadingOneFile): - - def __init__( - self, - sample_id: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_id=sample_id, data_path=data_path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.obs_key_sample = "derived_organ_parts_label" - + def __init__(self, **kwargs): + super().__init__(**kwargs) self.download_url_data = "https://data.humancellatlas.org/project-assets/project-matrices/" \ "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" self.download_url_meta = None @@ -31,7 +16,7 @@ def __init__( self.doi = "no_doi_regev" self.healthy = True self.normalization = "raw" - self.organ = sample_id + self.organ_obs_key = "derived_organ_parts_label" self.organism = "human" self.protocol = "10X sequencing" self.state_exact = "healthy" @@ -42,9 +27,10 @@ def __init__( self.set_dataset_id(idx=1) - def _load_full(self): - fn = os.path.join(self.data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") - adata = anndata.read_loom(fn) - adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() - return adata +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom") + adata = anndata.read_loom(fn) + adata = adata[adata.obs["emptydrops_is_cell"] == "t"].copy() + + return adata diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 94130ab7e..bd8f6d2ae 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -1,78 +1,92 @@ import numpy as np import os +import pytest import scipy.sparse -import unittest from sfaira.data import DatasetSuperGroup from sfaira.data import DatasetSuperGroupSfaira +dir_data = "../test_data" +dir_meta = "../test_data/meta" -class TestDatasetGroupSfaira(unittest.TestCase): - dir_data: str = "../test_data" - dir_meta: str = "../test_data/meta" - - def test_instantiate(self): - _ = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - - def test_load(self): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds.load_all() - - def test_adata(self): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - _ = ds.adata - - -class TestDatasetSuperGroups(unittest.TestCase): - dir_data: str = "../test_data" - dir_meta: str = "../test_data/meta" - - def test_load(self): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all() - - def test_adata(self): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - _ = ds.adata - - def test_load_backed_dense(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all_tobacked( - fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), - genome=genome, - shuffled=True, - as_dense=True, - annotated_only=False - ) - assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) - - def test_load_backed_sparse(self, genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(data_path=self.dir_data, meta_path=self.dir_meta, cache_path=self.dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all_tobacked( - fn_backed=os.path.join(self.dir_data, 'test_backed_data.h5ad'), - genome=genome, - shuffled=False, - as_dense=False, - annotated_only=False - ) - assert isinstance(ds.adata.X[:], scipy.sparse.csr_matrix), "%s" % type(ds.adata.X) - - -if __name__ == '__main__': - unittest.main() + +def test_dsgs_instantiate(): + _ = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + + +def test_dsgs_load(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds.load_all() + + +def test_dsgs_adata(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + _ = ds.adata + + +@pytest.mark.parametrize("format", ["sfaira", "cellxgene"]) +@pytest.mark.parametrize("clean", [True, False]) +def test_dsgs_streamline(format: str, clean: bool): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds.load_all() + ds.streamline(format=format, clean=clean) + + +def test_dsgs_streamline_cellxgene(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds.load_all() + ds.streamline(format="cellxgene", clean=True) + + +def test_dsg_load(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + ds.load_all() + + +def test_dsg_adata(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + _ = ds.adata + + +def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + ds.load_all_tobacked( + fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), + genome=genome, + shuffled=True, + as_dense=True, + annotated_only=False + ) + assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) + + +def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["bladder"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + ds.load_all_tobacked( + fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), + genome=genome, + shuffled=False, + as_dense=False, + annotated_only=False + ) + assert isinstance(ds.adata.X[:], scipy.sparse.csr_matrix), "%s" % type(ds.adata.X) From ab36d54399a6dbb192d64f68406cc3d6e2e305de Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Mar 2021 17:44:50 +0100 Subject: [PATCH 080/161] Dataset configs (#159) * added config writing and loadinadded config writing and loadingg * added util script to create ID configs per anatomical region * added ontology based subsetting of dataset groupadded ontology based subsetting of dataset groupss * added cell-wise meta-data based subsetting, including relational subsetting * fix load function signature handling * fix windows builds by defining paths system agnostic addresses #103 Fixing (#160) Co-authored-by: david.seb.fischer Co-authored-by: Leander <20015434+le-ander@users.noreply.github.com> --- sfaira/consts/adata_fields.py | 5 +- sfaira/consts/ontologies.py | 37 +- sfaira/data/__init__.py | 2 +- sfaira/data/base/__init__.py | 2 + sfaira/data/{base.py => base/dataset.py} | 1068 +++-------------- sfaira/data/base/dataset_group.py | 935 +++++++++++++++ .../anatomical_groups/human/human_adipose.py | 2 +- .../human/human_adrenalgland.py | 2 +- .../anatomical_groups/human/human_artery.py | 2 +- .../anatomical_groups/human/human_bladder.py | 2 +- .../anatomical_groups/human/human_blood.py | 2 +- .../anatomical_groups/human/human_bone.py | 2 +- .../anatomical_groups/human/human_brain.py | 2 +- .../anatomical_groups/human/human_calvaria.py | 2 +- .../anatomical_groups/human/human_cervix.py | 2 +- .../human/human_chorionicvillus.py | 2 +- .../anatomical_groups/human/human_colon.py | 2 +- .../anatomical_groups/human/human_duodenum.py | 2 +- .../human/human_epityphlon.py | 2 +- .../human/human_esophagus.py | 2 +- .../anatomical_groups/human/human_eye.py | 2 +- .../human/human_fallopiantube.py | 2 +- .../human/human_femalegonad.py | 2 +- .../human/human_gallbladder.py | 2 +- .../anatomical_groups/human/human_heart.py | 2 +- .../anatomical_groups/human/human_hesc.py | 2 +- .../anatomical_groups/human/human_ileum.py | 2 +- .../anatomical_groups/human/human_jejunum.py | 2 +- .../anatomical_groups/human/human_kidney.py | 2 +- .../anatomical_groups/human/human_liver.py | 2 +- .../anatomical_groups/human/human_lung.py | 2 +- .../human/human_malegonad.py | 2 +- .../anatomical_groups/human/human_muscle.py | 2 +- .../anatomical_groups/human/human_omentum.py | 2 +- .../anatomical_groups/human/human_pancreas.py | 2 +- .../anatomical_groups/human/human_placenta.py | 2 +- .../anatomical_groups/human/human_pleura.py | 2 +- .../anatomical_groups/human/human_prostate.py | 2 +- .../anatomical_groups/human/human_rectum.py | 2 +- .../anatomical_groups/human/human_rib.py | 2 +- .../anatomical_groups/human/human_skin.py | 2 +- .../human/human_spinalcord.py | 2 +- .../anatomical_groups/human/human_spleen.py | 2 +- .../anatomical_groups/human/human_stomach.py | 2 +- .../anatomical_groups/human/human_thymus.py | 2 +- .../anatomical_groups/human/human_thyroid.py | 2 +- .../anatomical_groups/human/human_trachea.py | 2 +- .../anatomical_groups/human/human_ureter.py | 2 +- .../anatomical_groups/human/human_uterus.py | 2 +- .../anatomical_groups/mouse/mouse_adipose.py | 2 +- .../anatomical_groups/mouse/mouse_bladder.py | 2 +- .../anatomical_groups/mouse/mouse_blood.py | 2 +- .../anatomical_groups/mouse/mouse_bone.py | 2 +- .../anatomical_groups/mouse/mouse_brain.py | 2 +- .../anatomical_groups/mouse/mouse_colon.py | 2 +- .../mouse/mouse_diaphragm.py | 2 +- .../mouse/mouse_femalegonad.py | 2 +- .../anatomical_groups/mouse/mouse_heart.py | 2 +- .../anatomical_groups/mouse/mouse_ileum.py | 2 +- .../anatomical_groups/mouse/mouse_kidney.py | 2 +- .../anatomical_groups/mouse/mouse_liver.py | 2 +- .../anatomical_groups/mouse/mouse_lung.py | 2 +- .../mouse/mouse_malegonad.py | 2 +- .../mouse/mouse_mammarygland.py | 2 +- .../anatomical_groups/mouse/mouse_muscle.py | 2 +- .../anatomical_groups/mouse/mouse_pancreas.py | 2 +- .../anatomical_groups/mouse/mouse_placenta.py | 2 +- .../anatomical_groups/mouse/mouse_prostate.py | 2 +- .../anatomical_groups/mouse/mouse_rib.py | 2 +- .../anatomical_groups/mouse/mouse_skin.py | 2 +- .../anatomical_groups/mouse/mouse_spleen.py | 2 +- .../anatomical_groups/mouse/mouse_stomach.py | 2 +- .../anatomical_groups/mouse/mouse_thymus.py | 2 +- .../anatomical_groups/mouse/mouse_tongue.py | 2 +- .../anatomical_groups/mouse/mouse_trachea.py | 2 +- .../anatomical_groups/mouse/mouse_uterus.py | 2 +- ...letoflangerhans_2017_smartseq2_enge_001.py | 2 +- .../mouse_x_2018_microwellseq_han_x.py | 2 +- ...pithelium_2019_10xsequencing_smilie_001.py | 2 +- ...man_ileum_2019_10xsequencing_martin_001.py | 2 +- ...stategland_2018_10xsequencing_henry_001.py | 2 +- .../human_pancreas_2016_indrop_baron_001.py | 2 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 8 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 2 +- ...uman_lung_2020_10xsequencing_miller_001.py | 2 +- .../human_brain_2017_droncseq_habib_001.py | 2 +- ...human_testis_2018_10xsequencing_guo_001.py | 2 +- ...liver_2018_10xsequencing_macparland_001.py | 2 +- .../human_kidney_2019_droncseq_lake_001.py | 2 +- .../human_x_2019_10xsequencing_szabo_001.py | 5 +- ...man_retina_2019_10xsequencing_menon_001.py | 2 +- .../human_placenta_2018_x_ventotormo_001.py | 2 +- .../human_liver_2019_celseq2_aizarani_001.py | 2 +- ...ver_2019_10xsequencing_ramachandran_001.py | 6 +- ...an_liver_2019_10xsequencing_popescu_001.py | 2 +- ...uman_colon_2020_10xsequencing_james_001.py | 2 +- .../human_lung_2019_dropseq_braga_001.py | 2 +- .../human_x_2019_10xsequencing_braga_x.py | 2 +- ...man_retina_2019_10xsequencing_voigt_001.py | 2 +- .../human_x_2019_10xsequencing_wang_001.py | 2 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 2 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 8 +- ...nchyma_2020_10xsequencing_habermann_001.py | 7 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 2 +- ...uman_thymus_2020_10xsequencing_park_001.py | 2 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 2 +- ..._retina_2019_10xsequencing_lukowski_001.py | 2 +- sfaira/data/utils.py | 4 +- .../create_anatomical_configs.py | 101 ++ sfaira/data/utils_scripts/create_meta.py | 3 +- .../utils_scripts/create_meta_and_cache.py | 3 +- .../data/utils_scripts/write_backed_human.py | 2 +- .../data/utils_scripts/write_backed_mouse.py | 2 +- sfaira/interface/model_zoo.py | 4 +- sfaira/unit_tests/data/test_dataset.py | 96 +- sfaira/unit_tests/versions/test_ontologies.py | 31 +- sfaira/versions/metadata/base.py | 37 +- 117 files changed, 1450 insertions(+), 1108 deletions(-) create mode 100644 sfaira/data/base/__init__.py rename sfaira/data/{base.py => base/dataset.py} (62%) create mode 100644 sfaira/data/base/dataset_group.py create mode 100644 sfaira/data/utils_scripts/create_anatomical_configs.py diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 7195b477d..8a2145f4f 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,4 +1,3 @@ -import numpy as np from typing import List """ @@ -12,6 +11,7 @@ class AdataIdsBase: Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ _annotated: str + _assay: str _author: str _cell_types_original: str _cell_ontology_class: str @@ -30,7 +30,6 @@ class AdataIdsBase: _normalization: str _organ: str _organism: str - _assay: str _year: str @property @@ -175,6 +174,7 @@ class AdataIdsSfaira(AdataIdsExtended): def __init__(self): self._annotated = "annotated" + self._assay = "assay" self._author = "author" self._bio_sample = "bio_sample" self._cell_types_original = "cell_types_original" @@ -195,7 +195,6 @@ def __init__(self): self._normalization = "normalization" self._organ = "organ" self._organism = "organism" - self._protocol = "protocol" self._tech_sample = "bio_sample" self._year = "year" diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 4639eb70e..5de6b8425 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -5,25 +5,26 @@ class OntologyContainerSfaira: + _cellontology_class: OntologyCelltypes + def __init__(self): - self.ontology_age = None - self._ontology_cell_types = None - self.ontology_cell_types = "v2021-02-01" - self.ontology_dev_stage = None - self.ontology_ethnicity = None - self.ontology_healthy = [True, False] - self.ontology_normalization = None - self.ontology_organ = OntologyUberon() - self.ontology_organism = OntologyList(terms=["mouse", "human"]) - self.ontology_protocol = OntologySinglecellLibraryConstruction() - self.ontology_sex = OntologyList(terms=["female", "male"]) - self.ontology_subtissue = None - self.ontology_year = list(range(2000, 3000)) + self.age = None + self.assay = OntologySinglecellLibraryConstruction() + self.cellontology_class = "v2021-02-01" + self.cellontology_original = None + self.developmental_stage = None + self.ethnicity = None + self.healthy = [True, False] + self.normalization = None + self.organ = OntologyUberon() + self.organism = OntologyList(terms=["mouse", "human"]) + self.sex = OntologyList(terms=["female", "male"]) + self.year = list(range(2000, 3000)) @property - def ontology_cell_types(self): - return self._ontology_cell_types + def cellontology_class(self): + return self._cellontology_class - @ontology_cell_types.setter - def ontology_cell_types(self, x: str): - self._ontology_cell_types = OntologyCelltypes(branch=x) + @cellontology_class.setter + def cellontology_class(self, x: str): + self._cellontology_class = OntologyCelltypes(branch=x) diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index c60748dc5..921814a8d 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,4 @@ -from .base import DatasetBase, \ +from sfaira.data.base import DatasetBase, \ DatasetGroup, DatasetGroupDirectoryOriented, \ DatasetSuperGroup from . import dataloaders diff --git a/sfaira/data/base/__init__.py b/sfaira/data/base/__init__.py new file mode 100644 index 000000000..d2e2df6fc --- /dev/null +++ b/sfaira/data/base/__init__.py @@ -0,0 +1,2 @@ +from sfaira.data.base.dataset import DatasetBase +from sfaira.data.base.dataset_group import DatasetGroup, DatasetGroupDirectoryOriented, DatasetSuperGroup diff --git a/sfaira/data/base.py b/sfaira/data/base/dataset.py similarity index 62% rename from sfaira/data/base.py rename to sfaira/data/base/dataset.py index 02522a42f..28668a4f7 100644 --- a/sfaira/data/base.py +++ b/sfaira/data/base/dataset.py @@ -3,13 +3,11 @@ import abc import anndata import h5py -import multiprocessing import numpy as np import pandas as pd import os from os import PathLike import pandas -import pydoc import scipy.sparse from typing import Dict, List, Tuple, Union import warnings @@ -27,31 +25,6 @@ UNS_STRING_META_IN_OBS = "__obs__" -def map_fn(inputs): - """ - Functional to load data set with predefined additional actions. - - :param inputs: - :return: None if function ran, error report otherwise - """ - ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, kwargs_func = inputs - try: - ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw, - allow_caching=allow_caching, - ) - if func is not None: - x = func(ds, **kwargs_func) - ds.clear() - return x - else: - return None - except FileNotFoundError as e: - return ds.id, e, - - load_doc = \ """ :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. @@ -61,6 +34,33 @@ def map_fn(inputs): """ +def is_term( + query, + ontology: Union[Ontology, bool, int, float, str, List[bool], List[int], List[float], List[str]], + ontology_parent=None, +) -> True: + """ + Check whether value is from set of allowed values using ontology. + + :param query: Value to attempt to set, only yield a single value and not a list. + :param ontology: Constraint for values. + Either ontology instance used to constrain entries, or list of allowed values. + :param ontology_parent: If ontology is a DAG, not only check if node is a DAG node but also whether it is a child + of this parent node. + :return: Whether attempted term is sub-term of allowed term in ontology + """ + if ontology is not None: + if isinstance(ontology, Ontology): + if ontology_parent is None: + return ontology.is_node(query) + else: + return ontology.is_a(query=query, reference=ontology_parent) + else: + return query in ontology + else: + return True + + class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict @@ -95,6 +95,7 @@ class DatasetBase(abc.ABC): _age_obs_key: Union[None, str] _assay_obs_key: Union[None, str] + _cellontology_class_obs_key: Union[None, str] _cellontology_id_obs_key: Union[None, str] _cellontology_original_obs_key: Union[None, str] _development_stage_obs_key: Union[None, str] @@ -132,7 +133,7 @@ def __init__( **kwargs ): self._adata_ids_sfaira = AdataIdsSfaira() - self._ontology_container_sfaira = OCS # Using a pre-instantiated version of this yields drastic speed-ups. + self.ontology_container_sfaira = OCS # Using a pre-instantiated version of this yields drastic speed-ups. self.adata = None self.meta = None @@ -164,6 +165,7 @@ def __init__( self._year = None self._age_obs_key = None + self._cellontology_class_obs_key = None self._cellontology_id_obs_key = None self._cellontology_original_obs_key = None self._development_stage_obs_key = None @@ -340,11 +342,9 @@ def _cached_reading(filename): if os.path.exists(filename): self.adata = anndata.read_h5ad(filename) else: - warnings.warn(f"Cached loading enabled, but cache file {filename} not found. " - f"Loading from raw files.") - self.adata = self.load_func(self.data_dir, self.sample_fn) + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) else: - self.adata = self.load_func(self.data_dir, self.sample_fn) + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) def _cached_writing(filename): if filename is not None: @@ -354,10 +354,10 @@ def _cached_writing(filename): self.adata.write_h5ad(filename) if load_raw and allow_caching: - self.adata = self.load_func(self.data_dir, self.sample_fn) + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) _cached_writing(self.cache_fn) elif load_raw and not allow_caching: - self.adata = self.load_func(self.data_dir, self.sample_fn) + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) elif not load_raw and allow_caching: _cached_reading(self.cache_fn) _cached_writing(self.cache_fn) @@ -504,14 +504,17 @@ def _collapse_gene_versions(self, remove_gene_version): # last element of each block as block boundaries: # n_genes - 1 - idx_map_sorted_rev.index(x) # Note that the blocks are named as positive integers starting at 1, without gaps. - counts = np.concatenate([np.sum(x, axis=1, keepdims=True) - for x in np.split(self.adata[:, idx_map_sorted_fwd].X, # forward ordered data - indices_or_sections=[ - n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order - for x in np.arange(0, len(new_index_collapsed) - 1)], # -1: do not need end of last partition - axis=1 - ) - ][::-1], axis=1) + counts = np.concatenate([ + np.sum(x, axis=1, keepdims=True) + for x in np.split( + self.adata[:, idx_map_sorted_fwd].X, # forward ordered data + indices_or_sections=[ + n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order + for x in np.arange(0, len(new_index_collapsed) - 1) + ], # -1: do not need end of last partition + axis=1 + ) + ][::-1], axis=1) # Remove varm and populate var with first occurrence only: obs_names = self.adata.obs_names self.adata = anndata.AnnData( @@ -595,23 +598,19 @@ def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): # Set cell-wise or data set-wide attributes (.uns / .obs): # These are saved in .uns if they are data set wide to save memory. for x, y, z, v in ( - [self.age, adata_ids.age, self.age_obs_key, - self._ontology_container_sfaira.ontology_age], - [self.assay, adata_ids.assay, self.assay_obs_key, - self._ontology_container_sfaira.ontology_protocol], + [self.age, adata_ids.age, self.age_obs_key, self.ontology_container_sfaira.age], + [self.assay, adata_ids.assay, self.assay_obs_key, self.ontology_container_sfaira.assay], [self.bio_sample, adata_ids.bio_sample, self.bio_sample_obs_key, None], [self.development_stage, adata_ids.development_stage, self.development_stage_obs_key, - self._ontology_container_sfaira.ontology_dev_stage], + self.ontology_container_sfaira.developmental_stage], [self.ethnicity, adata_ids.ethnicity, self.ethnicity_obs_key, - self._ontology_container_sfaira.ontology_ethnicity], - [self.healthy, adata_ids.healthy, self.healthy_obs_key, - self._ontology_container_sfaira.ontology_healthy], + self.ontology_container_sfaira.ethnicity], + [self.healthy, adata_ids.healthy, self.healthy_obs_key, self.ontology_container_sfaira.healthy], [self.individual, adata_ids.individual, self.individual_obs_key, None], - [self.organ, adata_ids.organ, self.organ_obs_key, - self._ontology_container_sfaira.ontology_organism], + [self.organ, adata_ids.organ, self.organ_obs_key, self.ontology_container_sfaira.organism], [self.organism, adata_ids.organism, self.organism_obs_key, - self._ontology_container_sfaira.ontology_organism], - [self.sex, adata_ids.sex, self.sex_obs_key, self._ontology_container_sfaira.ontology_sex], + self.ontology_container_sfaira.organism], + [self.sex, adata_ids.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], [self.state_exact, adata_ids.state_exact, self.state_exact_obs_key, None], [self.tech_sample, adata_ids.tech_sample, self.tech_sample_obs_key, None], ): @@ -641,7 +640,7 @@ def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): # Set cell-wise attributes (.obs): # None so far other than celltypes. # Set cell types: - # Map cell type names from raw IDs to ontology maintained ones:: + # Map cell type names from raw IDs to ontology maintained ones: if self.cellontology_original_obs_key is not None: self.project_celltypes_to_ontology() @@ -660,9 +659,11 @@ def streamline(self, format: str = "sfaira", clean: bool = False): """ if format == "sfaira": adata_fields = self._adata_ids_sfaira - elif format == "sfaira": + elif format == "cellxgene": from sfaira.consts import AdataIdsCellxgene adata_fields = AdataIdsCellxgene() + else: + raise ValueError(f"did not recognize format {format}") self._set_metadata_in_adata(adata_ids=adata_fields) if clean: if self.adata.varm is not None: @@ -674,23 +675,25 @@ def streamline(self, format: str = "sfaira", clean: bool = False): if self.adata.obsp is not None: del self.adata.obsp # Only retain target elements in adata.uns: - self.adata.obs = self.adata.uns[[ - adata_fields.annotated, - adata_fields.author, - adata_fields.doi, - adata_fields.download_url_data, - adata_fields.download_url_meta, - adata_fields.id, - adata_fields.normalization, - adata_fields.year, - ]] + self.adata.uns = dict([ + (k, v) for k, v in self.adata.uns.items() if k in [ + adata_fields.annotated, + adata_fields.author, + adata_fields.doi, + adata_fields.download_url_data, + adata_fields.download_url_meta, + adata_fields.id, + adata_fields.normalization, + adata_fields.year, + ] + ]) # Only retain target elements in adata.var: self.adata.obs = self.adata.var[[ adata_fields.gene_id_names, adata_fields.gene_id_ensembl, ]] # Only retain target columns in adata.obs: - self.adata.obs = self.adata.obs[[ + self.adata.obs = self.adata.obs.loc[:, [ adata_fields.age, adata_fields.bio_sample, adata_fields.development_stage, @@ -878,7 +881,7 @@ def project_celltypes_to_ontology(self): :return: """ - labels_original = self.adata.obs[self.obs_key_cellontology_original].values + labels_original = self.adata.obs[self.cellontology_original_obs_key].values if self.cell_ontology_map is not None: # only if this was defined labels_mapped = [ self.cell_ontology_map[x] if x in self.cell_ontology_map.keys() @@ -895,13 +898,14 @@ def project_celltypes_to_ontology(self): attempted=np.unique(labels_mapped).tolist() ) self.adata.obs[self._adata_ids_sfaira.cell_ontology_class] = labels_mapped + self.cellontology_class_obs_key = self._adata_ids_sfaira.cell_ontology_class self.adata.obs[self._adata_ids_sfaira.cell_types_original] = labels_original # Add cell type IDs into object: # The IDs are not read from a source file but inferred based on the class name. # TODO this could be changed in the future, this allows this function to be used both on cell type name mapping # files with and without the ID in the third column. ids_mapped = [ - self._ontology_container_sfaira.ontology_cell_types.id_from_name(x) + self.ontology_container_sfaira.cellontology_class.id_from_name(x) if x not in [ self._adata_ids_sfaira.unknown_celltype_identifier, self._adata_ids_sfaira.not_a_cell_celltype_identifier @@ -1071,7 +1075,7 @@ def age(self) -> Union[None, str]: @age.setter def age(self, x: str): self.__erasing_protection(attr="age", val_old=self._age, val_new=x) - self._value_protection(attr="age", allowed=self._ontology_container_sfaira.ontology_age, attempted=x) + self._value_protection(attr="age", allowed=self.ontology_container_sfaira.age, attempted=x) self._age = x @property @@ -1106,7 +1110,7 @@ def assay(self) -> Union[None, str]: @assay.setter def assay(self, x: str): self.__erasing_protection(attr="protocol", val_old=self._assay, val_new=x) - self._value_protection(attr="protocol", allowed=self._ontology_container_sfaira.ontology_protocol, + self._value_protection(attr="protocol", allowed=self.ontology_container_sfaira.assay, attempted=x) self._assay = x @@ -1168,7 +1172,7 @@ def development_stage(self) -> Union[None, str]: @development_stage.setter def development_stage(self, x: str): self.__erasing_protection(attr="dev_stage", val_old=self._development_stage, val_new=x) - self._value_protection(attr="dev_stage", allowed=self._ontology_container_sfaira.ontology_dev_stage, + self._value_protection(attr="dev_stage", allowed=self.ontology_container_sfaira.developmental_stage, attempted=x) self._development_stage = x @@ -1384,7 +1388,7 @@ def normalization(self) -> Union[None, str]: @normalization.setter def normalization(self, x: str): self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) - self._value_protection(attr="normalization", allowed=self._ontology_container_sfaira.ontology_normalization, + self._value_protection(attr="normalization", allowed=self.ontology_container_sfaira.normalization, attempted=x) self._normalization = x @@ -1415,6 +1419,16 @@ def bio_sample_obs_key(self, x: str): self.__erasing_protection(attr="bio_sample_obs_key", val_old=self._bio_sample_obs_key, val_new=x) self._bio_sample_obs_key = x + @property + def cellontology_class_obs_key(self) -> str: + return self._cellontology_class_obs_key + + @cellontology_class_obs_key.setter + def cellontology_class_obs_key(self, x: str): + self.__erasing_protection(attr="cellontology_class_obs_key", val_old=self._cellontology_class_obs_key, + val_new=x) + self._cellontology_class_obs_key = x\ + @property def cellontology_id_obs_key(self) -> str: return self._cellontology_id_obs_key @@ -1530,7 +1544,7 @@ def organ(self) -> Union[None, str]: @organ.setter def organ(self, x: str): self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) - self._value_protection(attr="organ", allowed=self._ontology_container_sfaira.ontology_organ, attempted=x) + self._value_protection(attr="organ", allowed=self.ontology_container_sfaira.organ, attempted=x) self._organ = x @property @@ -1548,7 +1562,7 @@ def organism(self) -> Union[None, str]: @organism.setter def organism(self, x: str): self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) - self._value_protection(attr="organism", allowed=self._ontology_container_sfaira.ontology_organism, attempted=x) + self._value_protection(attr="organism", allowed=self.ontology_container_sfaira.organism, attempted=x) self._organism = x @property @@ -1566,7 +1580,7 @@ def sex(self) -> Union[None, str]: @sex.setter def sex(self, x: str): self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) - self._value_protection(attr="sex", allowed=self._ontology_container_sfaira.ontology_sex, attempted=x) + self._value_protection(attr="sex", allowed=self.ontology_container_sfaira.sex, attempted=x) self._sex = x @property @@ -1645,23 +1659,23 @@ def year(self) -> Union[None, int]: @year.setter def year(self, x: int): self.__erasing_protection(attr="year", val_old=self._year, val_new=x) - self._value_protection(attr="year", allowed=self._ontology_container_sfaira.ontology_year, attempted=x) + self._value_protection(attr="year", allowed=self.ontology_container_sfaira.year, attempted=x) self._year = x @property def ontology_celltypes(self): - return self._ontology_container_sfaira.ontology_cell_types + return self.ontology_container_sfaira.cellontology_class @property def ontology_organ(self): - return self._ontology_container_sfaira.ontology_organ + return self.ontology_container_sfaira.organ @property def celltypes_universe(self): if self._celltype_universe: self._celltype_universe = CelltypeUniverse( cl=self.ontology_celltypes, - uberon=self._ontology_container_sfaira.ontology_organ, + uberon=self.ontology_container_sfaira.organ, organism=self.organism, ) return self._celltype_universe @@ -1713,16 +1727,15 @@ def _value_protection( :param attempted: Value(s) to attempt to set in `attr`. :return: """ - if allowed is not None: - if not isinstance(attempted, list) and not isinstance(attempted, tuple): - attempted = [attempted] - if isinstance(allowed, Ontology): - for x in attempted: - allowed.validate_node(x) - else: - for x in attempted: - if x not in allowed: - raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + if isinstance(attempted, np.ndarray): + attempted = attempted.tolist() + if isinstance(attempted, tuple): + attempted = list(attempted) + if not isinstance(attempted, list): + attempted = [attempted] + for x in attempted: + if not is_term(query=x, ontology=allowed): + raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") def subset_cells(self, key, values): """ @@ -1733,16 +1746,16 @@ def subset_cells(self, key, values): :param key: Property to subset by. Options: - - "age" points to self.obs_key_age - - "cell_ontology_class" points to self.obs_key_cellontology_original - - "dev_stage" points to self.obs_key_dev_stage - - "ethnicity" points to self.obs_key_ethnicity - - "healthy" points to self.obs_key_healthy - - "organ" points to self.obs_key_organ - - "organism" points to self.obs_key_organism - - "protocol" points to self.obs_key_protocol - - "sex" points to self.obs_key_sex - - "state_exact" points to self.obs_key_state_exact + - "age" points to self.age_obs_key + - "assay" points to self.assay_obs_key + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "healthy" points to self.healthy_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key :param values: Classes to overlap to. :return: """ @@ -1750,8 +1763,11 @@ def subset_cells(self, key, values): values = [values] def get_subset_idx(samplewise_key, cellwise_key): + try: + sample_attr = getattr(self, samplewise_key) + except AttributeError: + sample_attr = None obs_key = getattr(self, cellwise_key) - sample_attr = getattr(self, samplewise_key) if sample_attr is not None and obs_key is None: if not isinstance(sample_attr, list): sample_attr = [sample_attr] @@ -1762,842 +1778,26 @@ def get_subset_idx(samplewise_key, cellwise_key): elif sample_attr is None and obs_key is not None: assert self.adata is not None, "adata was not yet loaded" values_found = self.adata.obs[obs_key].values - idx = np.where([x in values for x in values_found]) + values_found_unique = np.unique(values_found) + try: + ontology = getattr(self.ontology_container_sfaira, samplewise_key) + except AttributeError: + raise ValueError(f"{key} not a valid property of ontology_container object") + # Test only unique elements found in ontology to save time. + values_found_unique_matched = [ + x for x in values_found_unique if np.any([ + is_term(query=x, ontology=ontology, ontology_parent=y) + for y in values + ]) + ] + # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. + print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {self.id}") + idx = np.where([x in values_found_unique_matched for x in values_found])[0] elif sample_attr is not None and obs_key is not None: assert False, f"both cell-wise and sample-wise attribute {samplewise_key} given" else: assert False, "no subset chosen" return idx - idx_keep = get_subset_idx(samplewise_key="obs_key_" + key, cellwise_key=key) - self.adata = self.adata[idx_keep, :].copy() - - -class DatasetGroup: - """ - Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through - wrapping them. - - Example: - - #query loaders lung - #from sfaira.dev.data.loaders.lung import DatasetGroupLung as DatasetGroup - #dsg_humanlung = DatasetGroupHuman(path='path/to/data') - #dsg_humanlung.load_all(match_to_reference='Homo_sapiens_GRCh38_97') - #dsg_humanlung[some_id] - #dsg_humanlung.adata - """ - datasets: Dict[str, DatasetBase] - - def __init__(self, datasets: dict): - self._adata_ids_sfaira = AdataIdsSfaira() - self.datasets = datasets - - @property - def _unknown_celltype_identifiers(self): - return np.unqiue(np.concatenate([v._unknown_celltype_identifiers for _, v in self.datasets.items()])) - - def load( - self, - annotated_only: bool = False, - remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, - load_raw: bool = False, - allow_caching: bool = True, - processes: int = 1, - func=None, - kwargs_func: Union[None, dict] = None, - ): - """ - Load all datasets in group (option for temporary loading). - - Note: This method automatically subsets to the group to the data sets for which input files were found. - - This method also allows temporarily loading data sets to execute function on loaded data sets (supply func). - In this setting, datasets are removed from memory after the function has been executed. - - :param annotated_only: - :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. - :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset - instance. The return can be empty: - - def func(dataset, **kwargs_func): - # code manipulating dataset and generating output x. - return x - :param kwargs_func: Kwargs of func. - """ - args = [ - remove_gene_version, - match_to_reference, - load_raw, - allow_caching, - func, - kwargs_func - ] - - if processes > 1 and len(self.datasets.items()) > 1: # multiprocessing parallelisation - print(f"using python multiprocessing (processes={processes}), " - f"for easier debugging revert to sequential execution (processes=1)") - with multiprocessing.Pool(processes=processes) as pool: - res = pool.starmap(map_fn, [ - (tuple([v] + args),) - for k, v in self.datasets.items() if v.annotated or not annotated_only - ]) - # Clear data sets that were not successfully loaded because of missing data: - for x in res: - if x is not None: - print(x[1]) - del self.datasets[x[0]] - else: # for loop - datasets_to_remove = [] - for k, v in self.datasets.items(): - print(f"loading {k}") - x = map_fn(tuple([v] + args)) - # Clear data sets that were not successfully loaded because of missing data: - if x is not None: - warnings.warn(f"data set {k} not loaded") - datasets_to_remove.append(k) - for k in datasets_to_remove: - del self.datasets[k] - - load.__doc__ += load_doc - - def streamline(self, format: str = "sfaira", clean: bool = False): - """ - Streamline the adata instance in each data set to output format. - - Output format are saved in ADATA_FIELDS* classes. - - :param format: Export format. - - - "sfaira" - - "cellxgene" - :param clean: Whether to delete non-streamlined fields. - :return: - """ - for x in self.ids: - self.datasets[x].streamline(format=format, clean=clean) - - def fragment(self) -> Dict[str, anndata.AnnData]: - """ - Fragment data sets into largest consistent parititions based on meta data. - - ToDo return this as a DatasetGroup again. - the streamlined Datasets are similar to anndata instances here, worth considering whether to use anndata - instead because it can be indexed. - - :return: - """ - # TODO: assert that data is streamlined. - print("make sure data is streamlined") - datasets_new = {} - for k, v in self.datasets.items(): - # Define fragments and fragment names. - # Because the data is streamlined, fragments are partitions of the .obs space, excluding the cell-wise - # annotation columns: - # - cellontology_class - # - cellontology_id - # - cellontology_original - cols_exclude = ["cellontology_class", "cellontology_id", "cellontology_original"] - tab = v.adata.obs.loc[:, [x not in cols_exclude for x in v.adata.obs.columns]] - tab_unique = tab.drop_duplicates() - idx_sets = [ - np.where([np.all(tab_unique.iloc[i, :] == tab.iloc[j, :])[0] for j in range(tab.shape[0])]) - for i in range(tab_unique.shape[0]) - ] - for i, x in enumerate(idx_sets): - datasets_new[k + "_fragment" + str(i)] = v.adata[x, :] - return datasets_new - - def load_tobacked( - self, - adata_backed: anndata.AnnData, - genome: str, - idx: List[np.ndarray], - annotated_only: bool = False, - load_raw: bool = False, - allow_caching: bool = True, - ): - """ - Loads data set group into slice of backed anndata object. - - Subsets self.datasets to the data sets that were found. Note that feature space is automatically formatted as - this is necessary for concatenation. - - :param adata_backed: Anndata instance to load into. - :param genome: Genome container target genomes loaded. - :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a - shuffled object. This has to be a list of the length of self.data, one index array for each dataset. - :param annotated_only: - :param load_raw: See .load(). - :param allow_caching: See .load(). - :return: New row index for next element to be written into backed anndata. - """ - i = 0 - for x in self.ids: - # if this is for celltype prediction, only load the data with have celltype annotation - try: - if self.datasets[x].annotated or not annotated_only: - self.datasets[x].load_tobacked( - adata_backed=adata_backed, - genome=genome, - idx=idx[i], - load_raw=load_raw, - allow_caching=allow_caching - ) - i += 1 - except FileNotFoundError: - del self.datasets[x] - - def write_ontology_class_map( - self, - fn, - protected_writing: bool = True, - **kwargs - ): - """ - Write cell type maps of free text cell types to ontology classes. - - :param fn: File name of csv to load class maps from. - :param protected_writing: Only write if file was not already found. - """ - tab = [] - for k, v in self.datasets.items(): - if v.annotated: - labels_original = np.sort(np.unique(np.concatenate([ - v.adata.obs[self._adata_ids_sfaira.cell_types_original].values - ]))) - tab.append(v.celltypes_universe.prepare_celltype_map_tab( - source=labels_original, - match_only=False, - anatomical_constraint=v.organ, - include_synonyms=True, - omit_list=v._unknown_celltype_identifiers, - **kwargs - )) - if len(tab) == 0: - warnings.warn("attempted to write ontology classmaps for group without annotated data sets") - else: - tab = pandas.concat(tab, axis=0) - # Take out columns with the same source: - tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() - tab = tab.sort_values(self._adata_ids_sfaira.classmap_source_key) - if not os.path.exists(fn) or not protected_writing: - tab.to_csv(fn, index=False, sep="\t") - - def download(self, **kwargs): - for _, v in self.datasets.items(): - v.download(**kwargs) - - @property - def ids(self): - return list(self.datasets.keys()) - - @property - def adata_ls(self): - adata_ls = [] - for i in self.ids: - if self.datasets[i] is not None: - if self.datasets[i].adata is not None: - adata_ls.append(self.datasets[i].adata) - return adata_ls - - @property - def adata(self): - if not self.adata_ls: - return None - self.streamline(format="sfaira", clean=True) - adata_ls = self.adata_ls - - # .var entries are renamed and copied upon concatenation. - # To preserve gene names in .var, the target gene names are copied into var_names and are then copied - # back into .var. - for adata in adata_ls: - adata.var.index = adata.var[self._adata_ids_sfaira.gene_id_ensembl].tolist() - if len(adata_ls) > 1: - # TODO: need to keep this? -> yes, still catching errors here (March 2020) - # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. - try: - adata_concat = adata_ls[0].concatenate( - *adata_ls[1:], - join="outer", - batch_key=self._adata_ids_sfaira.dataset, - batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] - ) - except ValueError: - adata_concat = adata_ls[0].concatenate( - *adata_ls[1:], - join="outer", - batch_key=self._adata_ids_sfaira.dataset, - batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] - ) - - adata_concat.var[self._adata_ids_sfaira.gene_id_ensembl] = adata_concat.var.index - - if len(set([a.uns[self._adata_ids_sfaira.mapped_features] for a in adata_ls])) == 1: - adata_concat.uns[self._adata_ids_sfaira.mapped_features] = \ - adata_ls[0].uns[self._adata_ids_sfaira.mapped_features] - else: - adata_concat.uns[self._adata_ids_sfaira.mapped_features] = False - else: - adata_concat = adata_ls[0] - adata_concat.obs[self._adata_ids_sfaira.dataset] = self.ids[0] - - adata_concat.var_names_make_unique() - return adata_concat - - def obs_concat(self, keys: Union[list, None] = None): - """ - Returns concatenation of all .obs. - - Uses union of all keys if keys is not provided. - - :param keys: - :return: - """ - if keys is None: - keys = np.unique(np.concatenate([list(x.obs.columns) for x in self.adata_ls])) - obs_concat = pandas.concat([pandas.DataFrame(dict( - [ - (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns - else (k, ["nan" for _ in range(self.datasets[x].adata.obs.shape[0])]) - for k in keys - ] + [(self._adata_ids_sfaira.dataset, [x for _ in range(self.datasets[x].adata.obs.shape[0])])] - )) for x in self.ids if self.datasets[x].adata is not None]) - return obs_concat - - def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: - cells = [] - for x in self.ids: - # if this is for celltype prediction, only load the data with have celltype annotation - try: - if self.datasets[x].annotated or not annotated_only: - cells.append(self.datasets[x].ncells) - except FileNotFoundError: - del self.datasets[x] - return np.asarray(cells) - - def ncells(self, annotated_only: bool = False): - cells = self.ncells_bydataset(annotated_only=annotated_only) - return np.sum(cells) - - @property - def ontology_celltypes(self): - organism = np.unique([v.organism for _, v in self.datasets.items()]) - if len(organism) > 1: - # ToDo: think about whether this should be handled differently. - warnings.warn("found more than one organism in group, this could cause problems with using a joined cell " - "type ontology. Using only the ontology of the first data set in the group.") - return self.datasets[self.ids[0]].ontology_celltypes - - def project_celltypes_to_ontology(self): - """ - Project free text cell type names to ontology based on mapping table. - :return: - """ - for _, v in self.datasets.items(): - v.project_celltypes_to_ontology() - - def subset(self, key, values): - """ - Subset list of adata objects based on sample-wise properties. - - These keys are properties that are available in lazy model. - Subsetting happens on .datasets. - - :param key: Property to subset by. - :param values: Classes to overlap to. - :return: - """ - ids_del = [] - if not isinstance(values, list): - values = [values] - for x in self.ids: - try: - values_found = getattr(self.datasets[x], key) - if not isinstance(values_found, list): - values_found = [values_found] - if not np.any([xx in values for xx in values_found]): - ids_del.append(x) - except AttributeError: - raise ValueError(f"{key} not a valid property of data set object") - for x in ids_del: - del self.datasets[x] - - def subset_cells(self, key, values: Union[str, List[str]]): - """ - Subset list of adata objects based on cell-wise properties. - - These keys are properties that are not available in lazy model and require loading first because the - subsetting works on the cell-level: .adata are maintained but reduced to matches. - - :param key: Property to subset by. Options: - - - "age" points to self.obs_key_age - - "cell_ontology_class" points to self.obs_key_cellontology_original - - "dev_stage" points to self.obs_key_dev_stage - - "ethnicity" points to self.obs_key_ethnicity - - "healthy" points to self.obs_key_healthy - - "organ" points to self.obs_key_organ - - "organism" points to self.obs_key_organism - - "protocol" points to self.obs_key_protocol - - "sex" points to self.obs_key_sex - - "state_exact" points to self.obs_key_state_exact - :param values: Classes to overlap to. - :return: - """ - for x in self.ids: - self.datasets[x].subset_cells(key=key, values=values) - if self.datasets[x].ncells == 0: # none left - del self.datasets[x] - - -class DatasetGroupDirectoryOriented(DatasetGroup): - - _cwd: os.PathLike - - def __init__( - self, - file_base: str, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - ): - """ - Automatically collects Datasets from python files in directory. - - Uses a pre-built DatasetGroup if this is defined in a group.py file, otherwise, the DatasetGroup is initialised - here. - - :param file_base: - :param data_path: - :param meta_path: - :param cache_path: - """ - # Collect all data loaders from files in directory: - datasets = [] - self._cwd = os.path.dirname(file_base) - dataset_module = str(self._cwd.split("/")[-1]) - loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(self._cwd.split("/")[-5]) == "sfaira" else \ - "sfaira_extension.data.dataloaders.loaders." - if "group.py" in os.listdir(self._cwd): - DatasetGroupFound = pydoc.locate(loader_pydoc_path + dataset_module + ".group.DatasetGroup") - dsg = DatasetGroupFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - datasets.extend(list(dsg.datasets.values)) - else: - for f in os.listdir(self._cwd): - if os.path.isfile(os.path.join(self._cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - datasets_f = [] - file_module = ".".join(f.split(".")[:-1]) - DatasetFound = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".Dataset") - # Load objects from name space: - # - load(): Loading function that return anndata instance. - # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles - load_func = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".load") - sample_fns = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + - ".SAMPLE_FNS") - fn_yaml = os.path.join(self._cwd, file_module + ".yaml") - fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None - # Check for sample_fns and sample_ids in yaml: - if fn_yaml is not None: - assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" - yaml_vals = read_yaml(fn=fn_yaml) - if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: - sample_fns = yaml_vals["meta"]["sample_fns"] - if sample_fns is None: - sample_fns = [None] - # Here we distinguish between class that are already defined and those that are not. - # The latter case arises if meta data are defined in YAMLs and _load is given as a function. - if DatasetFound is None: - for x in sample_fns: - datasets_f.append( - DatasetBase( - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) - ) - else: - for x in sample_fns: - datasets_f.append( - DatasetFound( - data_path=data_path, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) - ) - # Load cell type maps: - for x in datasets_f: - x.load_ontology_class_map(fn=os.path.join(self._cwd, file_module + ".tsv")) - datasets.extend(datasets_f) - - keys = [x.id for x in datasets] - super().__init__(datasets=dict(zip(keys, datasets))) - - def clean_ontology_class_map(self): - """ - Finalises processed class maps of free text cell types to ontology classes. - - Checks that the assigned ontology class names appear in the ontology. - Adds a third column with the corresponding ontology IDs into the file. - - :return: - """ - for f in os.listdir(self._cwd): - if os.path.isfile(os.path.join(self._cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(f.split(".")[:-1]) - fn_map = os.path.join(self._cwd, file_module + ".tsv") - if os.path.exists(fn_map): - # Access reading and value protection mechanisms from first data set loaded in group. - tab = list(self.datasets.values())[0]._read_class_map(fn=fn_map) - # Checks that the assigned ontology class names appear in the ontology. - list(self.datasets.values())[0]._value_protection( - attr="celltypes", - allowed=self.ontology_celltypes, - attempted=np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() - ) - # Adds a third column with the corresponding ontology IDs into the file. - tab[self._adata_ids_sfaira.classmap_target_id_key] = [ - self.ontology_celltypes.id_from_name(x) if x != self._adata_ids_sfaira.unknown_celltype_name - else self._adata_ids_sfaira.unknown_celltype_name - for x in tab[self._adata_ids_sfaira.classmap_target_key].values - ] - list(self.datasets.values())[0]._write_class_map(fn=fn_map, tab=tab) - - -class DatasetSuperGroup: - """ - Container for multiple DatasetGroup instances. - - Used to manipulate structured dataset collections. Primarly designed for this manipulation, convert to DatasetGroup - via flatten() for more functionalities. - """ - adata: Union[None, anndata.AnnData] - fn_backed: Union[None, PathLike] - dataset_groups: Union[list, List[DatasetGroup], List[DatasetSuperGroup]] - - def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetSuperGroup]]): - self.adata = None - self.fn_backed = None - self.set_dataset_groups(dataset_groups=dataset_groups) - - self._adata_ids_sfaira = AdataIdsSfaira() - - def set_dataset_groups(self, dataset_groups: Union[DatasetGroup, DatasetSuperGroup, List[DatasetGroup], - List[DatasetSuperGroup]]): - if isinstance(dataset_groups, DatasetGroup) or isinstance(dataset_groups, DatasetSuperGroup): - dataset_groups = [dataset_groups] - if len(dataset_groups) > 0: - if isinstance(dataset_groups[0], DatasetGroup): - self.dataset_groups = dataset_groups - elif isinstance(dataset_groups[0], DatasetSuperGroup): - # Decompose super groups first - dataset_groups_proc = [] - for x in dataset_groups: - dataset_groups_proc.extend(x.dataset_groups) - self.dataset_groups = dataset_groups_proc - else: - assert False - else: - self.dataset_groups = [] - - def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): - if isinstance(dataset_groups[0], DatasetGroup): - self.dataset_groups.extend(dataset_groups) - elif isinstance(dataset_groups[0], DatasetSuperGroup): - # Decompose super groups first - dataset_groups_proc = [] - for x in dataset_groups: - dataset_groups_proc.extend(x.datasets) - self.dataset_groups.extend(dataset_groups_proc) - else: - assert False - - def get_gc( - self, - genome: str = None - ): - if genome.lower().startswith("homo_sapiens"): - g = SuperGenomeContainer( - organism="human", - genome=genome - ) - elif genome.lower().startswith("mus_musculus"): - g = SuperGenomeContainer( - organism="mouse", - genome=genome - ) - else: - raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") - return g - - def ncells_bydataset(self, annotated_only: bool = False): - """ - List of list of length of all data sets by data set group. - :return: - """ - return [x.ncells_bydataset(annotated_only=annotated_only) for x in self.dataset_groups] - - def ncells_bydataset_flat(self, annotated_only: bool = False): - """ - Flattened list of length of all data sets. - :return: - """ - return [xx for x in self.ncells_bydataset(annotated_only=annotated_only) for xx in x] - - def ncells(self, annotated_only: bool = False): - return np.sum(self.ncells_bydataset_flat(annotated_only=annotated_only)) - - def flatten(self) -> DatasetGroup: - """ - Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. - - :return: - """ - ds = {} - for x in self.dataset_groups: - for k, v in x.datasets.items(): - assert k not in ds.keys(), f"{k} was duplicated in super group, purge duplicates before flattening" - ds[k] = v - return DatasetGroup(datasets=ds) - - def download(self, **kwargs): - for x in self.dataset_groups: - x.download(**kwargs) - - def load_all( - self, - annotated_only: bool = False, - match_to_reference: Union[str, bool, None] = None, - remove_gene_version: bool = True, - load_raw: bool = False, - allow_caching: bool = True, - processes: int = 1, - ): - """ - Loads data set human into anndata object. - - :param annotated_only: - :param match_to_reference: See .load(). - :param remove_gene_version: See .load(). - :param load_raw: See .load(). - :param allow_caching: See .load(). - :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. - Note: parallelises loading of each dataset group, but not across groups. - :return: - """ - for x in self.dataset_groups: - x.load( - annotated_only=annotated_only, - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw, - allow_caching=allow_caching, - processes=processes, - ) - # Make sure that concatenate is not used on a None adata object: - adatas = [x.adata for x in self.dataset_groups if x.adata is not None] - if len(adatas) > 1: - self.adata = adatas[0].adata.concatenate( - *adatas[1:], - join="outer", - batch_key=self._adata_ids_sfaira.dataset_group - ) - elif len(adatas) == 1: - self.adata = adatas[0] - else: - warnings.warn("no anndata instances to concatenate") - - def load_all_tobacked( - self, - fn_backed: PathLike, - genome: str, - shuffled: bool = False, - as_dense: bool = False, - annotated_only: bool = False, - load_raw: bool = False, - allow_caching: bool = True, - ): - """ - Loads data set human into backed anndata object. - - Example usage: - - ds = DatasetSuperGroup([...]) - ds.load_all_tobacked( - fn_backed="...", - target_genome="...", - annotated_only=False - ) - adata_backed = anndata.read(ds.fn_backed, backed='r') - adata_slice = ad_full[idx] - - :param fn_backed: File name to save backed anndata to temporarily. - :param genome: ID of target genomes. - :param shuffled: Whether to shuffle data when writing to backed. - :param as_dense: Whether to load into dense count matrix. - :param annotated_only: - :param load_raw: See .load(). - :param allow_caching: See .load(). - """ - if shuffled and not as_dense: - raise ValueError("cannot write backed shuffled and sparse") - scatter_update = shuffled or as_dense - self.fn_backed = fn_backed - n_cells = self.ncells(annotated_only=annotated_only) - gc = self.get_gc(genome=genome) - n_genes = gc.ngenes - if scatter_update: - self.adata = anndata.AnnData( - scipy.sparse.csr_matrix((n_cells, n_genes), dtype=np.float32) - ) # creates an empty anndata object with correct dimensions that can be filled with cells from data sets - else: - self.adata = anndata.AnnData( - scipy.sparse.csr_matrix((0, n_genes), dtype=np.float32) - ) - self.adata.filename = fn_backed # setting this attribute switches this anndata to a backed object - # Note that setting .filename automatically redefines .X as dense, so we have to redefine it as sparse: - if not as_dense: - X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse - X.indices = X.indices.astype(np.int64) - X.indptr = X.indptr.astype(np.int64) - self.adata.X = X - keys = [ - self._adata_ids_sfaira.annotated, - self._adata_ids_sfaira.assay, - self._adata_ids_sfaira.author, - self._adata_ids_sfaira.dataset, - self._adata_ids_sfaira.cell_ontology_class, - self._adata_ids_sfaira.development_stage, - self._adata_ids_sfaira.normalization, - self._adata_ids_sfaira.organ, - self._adata_ids_sfaira.state_exact, - self._adata_ids_sfaira.year, - ] - if scatter_update: - self.adata.obs = pandas.DataFrame({ - k: ["nan" for _ in range(n_cells)] for k in keys - }) - else: - for k in keys: - self.adata.obs[k] = [] - # Define index vectors to write to: - idx_vector = np.arange(0, n_cells) - if shuffled: - np.random.shuffle(idx_vector) - idx_ls = [] - row = 0 - ncells = self.ncells_bydataset(annotated_only=annotated_only) - if np.all([len(x) == 0 for x in ncells]): - raise ValueError("no datasets found") - for x in ncells: - temp_ls = [] - for y in x: - temp_ls.append(idx_vector[row:(row + y)]) - row += y - idx_ls.append(temp_ls) - print("checking expected and received data set sizes, rerun meta data generation if mismatch is found:") - print(self.ncells_bydataset(annotated_only=annotated_only)) - print([[len(x) for x in xx] for xx in idx_ls]) - for i, x in enumerate(self.dataset_groups): - x.load_tobacked( - adata_backed=self.adata, - genome=genome, - idx=idx_ls[i], - annotated_only=annotated_only, - load_raw=load_raw, - allow_caching=allow_caching, - ) - # If the sparse non-shuffled approach is used, make sure that self.adata.obs.index is unique() before saving - if not scatter_update: - self.adata.obs.index = pd.RangeIndex(0, len(self.adata.obs.index)) - # Explicitly write backed file to disk again to make sure that obs are included and that n_obs is set correctly - self.adata.write() - # Saving obs separately below is therefore no longer required (hence commented out) - # fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" - # self.adata.obs.to_csv(fn_backed_obs) - - def delete_backed(self): - del self.adata - self.adata = None - os.remove(str(self.fn_backed)) - - def load_cached_backed(self, fn: PathLike): - self.adata = anndata.read(fn, backed='r') - - def streamline(self, format: str = "sfaira", clean: bool = False): - """ - Streamline the adata instance in each group and each data set to output format. - - Output format are saved in ADATA_FIELDS* classes. - - :param format: Export format. - - - "sfaira" - - "cellxgene" - :param clean: Whether to delete non-streamlined fields. - :return: - """ - for x in self.dataset_groups: - for xx in x.ids: - x.datasets[xx].streamline(format=format, clean=clean) - - def subset(self, key, values): - """ - Subset list of adata objects based on match to values in key property. - - These keys are properties that are available in lazy model. - Subsetting happens on .datasets. - - :param key: Property to subset by. - :param values: Classes to overlap to. - :return: - """ - for x in self.dataset_groups: - x.subset(key=key, values=values) - self.dataset_groups = [x for x in self.dataset_groups if x.datasets] # Delete empty DatasetGroups - - def subset_cells(self, key, values: Union[str, List[str]]): - """ - Subset list of adata objects based on cell-wise properties. - - These keys are properties that are not available in lazy model and require loading first because the - subsetting works on the cell-level: .adata are maintained but reduced to matches. - - :param key: Property to subset by. Options: - - - "age" points to self.obs_key_age - - "cell_ontology_class" points to self.obs_key_cellontology_original - - "dev_stage" points to self.obs_key_dev_stage - - "ethnicity" points to self.obs_key_ethnicity - - "healthy" points to self.obs_key_healthy - - "organ" points to self.obs_key_organ - - "organism" points to self.obs_key_organism - - "protocol" points to self.obs_key_protocol - - "sex" points to self.obs_key_sex - - "state_exact" points to self.obs_key_state_exact - :param values: Classes to overlap to. - :return: - """ - for i in range(len(self.dataset_groups)): - self.dataset_groups[i].subset_cells(key=key, values=values) - - def project_celltypes_to_ontology(self): - """ - Project free text cell type names to ontology based on mapping table. - :return: - """ - for _, v in self.dataset_groups: - v.project_celltypes_to_ontology() + idx_keep = get_subset_idx(samplewise_key=key, cellwise_key=key + "_obs_key") + self.adata = self.adata[idx_keep, :].copy() # if len(idx_keep) > 0 else None diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py new file mode 100644 index 000000000..c100ed528 --- /dev/null +++ b/sfaira/data/base/dataset_group.py @@ -0,0 +1,935 @@ +from __future__ import annotations + +import anndata +import multiprocessing +import numpy as np +import pandas as pd +import os +from os import PathLike +import pandas +import pydoc +import scipy.sparse +from typing import Dict, List, Tuple, Union +import warnings + +from sfaira.data.base.dataset import is_term, DatasetBase +from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.consts import AdataIdsSfaira +from sfaira.data.utils import read_yaml + +UNS_STRING_META_IN_OBS = "__obs__" + + +def map_fn(inputs): + """ + Functional to load data set with predefined additional actions. + + :param inputs: + :return: None if function ran, error report otherwise + """ + ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, kwargs_func = inputs + try: + ds.load( + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=load_raw, + allow_caching=allow_caching, + ) + if func is not None: + x = func(ds, **kwargs_func) + ds.clear() + return x + else: + return None + except FileNotFoundError as e: + return ds.id, e, + + +load_doc = \ + """ + :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different data sets are superimposed. + :param match_to_reference: Reference genomes name or False to keep original feature space. + :param load_raw: Loads unprocessed version of data if available in data loader. + :param allow_caching: Whether to allow method to cache adata object for faster re-loading. + """ + + +class DatasetGroup: + """ + Container class that co-manages multiple data sets, removing need to call Dataset() methods directly through + wrapping them. + + Example: + + #query loaders lung + #from sfaira.dev.data.loaders.lung import DatasetGroupLung as DatasetGroup + #dsg_humanlung = DatasetGroupHuman(path='path/to/data') + #dsg_humanlung.load_all(match_to_reference='Homo_sapiens_GRCh38_97') + #dsg_humanlung[some_id] + #dsg_humanlung.adata + """ + datasets: Dict[str, DatasetBase] + + def __init__(self, datasets: dict): + self._adata_ids_sfaira = AdataIdsSfaira() + self.datasets = datasets + + @property + def _unknown_celltype_identifiers(self): + return np.unqiue(np.concatenate([v._unknown_celltype_identifiers for _, v in self.datasets.items()])) + + def load( + self, + annotated_only: bool = False, + remove_gene_version: bool = True, + match_to_reference: Union[str, bool, None] = None, + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, + func=None, + kwargs_func: Union[None, dict] = None, + ): + """ + Load all datasets in group (option for temporary loading). + + Note: This method automatically subsets to the group to the data sets for which input files were found. + + This method also allows temporarily loading data sets to execute function on loaded data sets (supply func). + In this setting, datasets are removed from memory after the function has been executed. + + :param annotated_only: + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset + instance. The return can be empty: + + def func(dataset, **kwargs_func): + # code manipulating dataset and generating output x. + return x + :param kwargs_func: Kwargs of func. + """ + args = [ + remove_gene_version, + match_to_reference, + load_raw, + allow_caching, + func, + kwargs_func + ] + + if processes > 1 and len(self.datasets.items()) > 1: # multiprocessing parallelisation + print(f"using python multiprocessing (processes={processes}), " + f"for easier debugging revert to sequential execution (processes=1)") + with multiprocessing.Pool(processes=processes) as pool: + res = pool.starmap(map_fn, [ + (tuple([v] + args),) + for k, v in self.datasets.items() if v.annotated or not annotated_only + ]) + # Clear data sets that were not successfully loaded because of missing data: + for x in res: + if x is not None: + print(x[1]) + del self.datasets[x[0]] + else: # for loop + datasets_to_remove = [] + for k, v in self.datasets.items(): + print(f"loading {k}") + x = map_fn(tuple([v] + args)) + # Clear data sets that were not successfully loaded because of missing data: + if x is not None: + warnings.warn(f"data set {k} not loaded") + datasets_to_remove.append(k) + for k in datasets_to_remove: + del self.datasets[k] + + load.__doc__ += load_doc + + def streamline(self, format: str = "sfaira", clean: bool = False): + """ + Streamline the adata instance in each data set to output format. + + Output format are saved in ADATA_FIELDS* classes. + + :param format: Export format. + + - "sfaira" + - "cellxgene" + :param clean: Whether to delete non-streamlined fields. + :return: + """ + for x in self.ids: + self.datasets[x].streamline(format=format, clean=clean) + + def fragment(self) -> Dict[str, anndata.AnnData]: + """ + Fragment data sets into largest consistent parititions based on meta data. + + ToDo return this as a DatasetGroup again. + the streamlined Datasets are similar to anndata instances here, worth considering whether to use anndata + instead because it can be indexed. + + :return: + """ + # TODO: assert that data is streamlined. + print("make sure data is streamlined") + datasets_new = {} + for k, v in self.datasets.items(): + # Define fragments and fragment names. + # Because the data is streamlined, fragments are partitions of the .obs space, excluding the cell-wise + # annotation columns: + # - cellontology_class + # - cellontology_id + # - cellontology_original + cols_exclude = ["cellontology_class", "cellontology_id", "cellontology_original"] + tab = v.adata.obs.loc[:, [x not in cols_exclude for x in v.adata.obs.columns]] + tab_unique = tab.drop_duplicates() + idx_sets = [ + np.where([np.all(tab_unique.iloc[i, :] == tab.iloc[j, :])[0] for j in range(tab.shape[0])]) + for i in range(tab_unique.shape[0]) + ] + for i, x in enumerate(idx_sets): + datasets_new[k + "_fragment" + str(i)] = v.adata[x, :] + return datasets_new + + def load_tobacked( + self, + adata_backed: anndata.AnnData, + genome: str, + idx: List[np.ndarray], + annotated_only: bool = False, + load_raw: bool = False, + allow_caching: bool = True, + ): + """ + Loads data set group into slice of backed anndata object. + + Subsets self.datasets to the data sets that were found. Note that feature space is automatically formatted as + this is necessary for concatenation. + + :param adata_backed: Anndata instance to load into. + :param genome: Genome container target genomes loaded. + :param idx: Indices in adata_backed to write observations to. This can be used to immediately create a + shuffled object. This has to be a list of the length of self.data, one index array for each dataset. + :param annotated_only: + :param load_raw: See .load(). + :param allow_caching: See .load(). + :return: New row index for next element to be written into backed anndata. + """ + i = 0 + for x in self.ids: + # if this is for celltype prediction, only load the data with have celltype annotation + try: + if self.datasets[x].annotated or not annotated_only: + self.datasets[x].load_tobacked( + adata_backed=adata_backed, + genome=genome, + idx=idx[i], + load_raw=load_raw, + allow_caching=allow_caching + ) + i += 1 + except FileNotFoundError: + del self.datasets[x] + + def write_ontology_class_map( + self, + fn, + protected_writing: bool = True, + **kwargs + ): + """ + Write cell type maps of free text cell types to ontology classes. + + :param fn: File name of csv to load class maps from. + :param protected_writing: Only write if file was not already found. + """ + tab = [] + for k, v in self.datasets.items(): + if v.annotated: + labels_original = np.sort(np.unique(np.concatenate([ + v.adata.obs[self._adata_ids_sfaira.cell_types_original].values + ]))) + tab.append(v.celltypes_universe.prepare_celltype_map_tab( + source=labels_original, + match_only=False, + anatomical_constraint=v.organ, + include_synonyms=True, + omit_list=v._unknown_celltype_identifiers, + **kwargs + )) + if len(tab) == 0: + warnings.warn("attempted to write ontology classmaps for group without annotated data sets") + else: + tab = pandas.concat(tab, axis=0) + # Take out columns with the same source: + tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() + tab = tab.sort_values(self._adata_ids_sfaira.classmap_source_key) + if not os.path.exists(fn) or not protected_writing: + tab.to_csv(fn, index=False, sep="\t") + + def download(self, **kwargs): + for _, v in self.datasets.items(): + v.download(**kwargs) + + @property + def ids(self): + return list(self.datasets.keys()) + + @property + def adata_ls(self): + adata_ls = [] + for i in self.ids: + if self.datasets[i] is not None: + if self.datasets[i].adata is not None: + adata_ls.append(self.datasets[i].adata) + return adata_ls + + @property + def adata(self): + if not self.adata_ls: + return None + self.streamline(format="sfaira", clean=True) + adata_ls = self.adata_ls + + # .var entries are renamed and copied upon concatenation. + # To preserve gene names in .var, the target gene names are copied into var_names and are then copied + # back into .var. + for adata in adata_ls: + adata.var.index = adata.var[self._adata_ids_sfaira.gene_id_ensembl].tolist() + if len(adata_ls) > 1: + # TODO: need to keep this? -> yes, still catching errors here (March 2020) + # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. + try: + adata_concat = adata_ls[0].concatenate( + *adata_ls[1:], + join="outer", + batch_key=self._adata_ids_sfaira.dataset, + batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] + ) + except ValueError: + adata_concat = adata_ls[0].concatenate( + *adata_ls[1:], + join="outer", + batch_key=self._adata_ids_sfaira.dataset, + batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] + ) + + adata_concat.var[self._adata_ids_sfaira.gene_id_ensembl] = adata_concat.var.index + + if len(set([a.uns[self._adata_ids_sfaira.mapped_features] for a in adata_ls])) == 1: + adata_concat.uns[self._adata_ids_sfaira.mapped_features] = \ + adata_ls[0].uns[self._adata_ids_sfaira.mapped_features] + else: + adata_concat.uns[self._adata_ids_sfaira.mapped_features] = False + else: + adata_concat = adata_ls[0] + adata_concat.obs[self._adata_ids_sfaira.dataset] = self.ids[0] + + adata_concat.var_names_make_unique() + return adata_concat + + def obs_concat(self, keys: Union[list, None] = None): + """ + Returns concatenation of all .obs. + + Uses union of all keys if keys is not provided. + + :param keys: + :return: + """ + if keys is None: + keys = np.unique(np.concatenate([list(x.obs.columns) for x in self.adata_ls])) + obs_concat = pandas.concat([pandas.DataFrame(dict( + [ + (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns + else (k, ["nan" for _ in range(self.datasets[x].adata.obs.shape[0])]) + for k in keys + ] + [(self._adata_ids_sfaira.dataset, [x for _ in range(self.datasets[x].adata.obs.shape[0])])] + )) for x in self.ids if self.datasets[x].adata is not None]) + return obs_concat + + def ncells_bydataset(self, annotated_only: bool = False) -> np.ndarray: + cells = [] + for x in self.ids: + # if this is for celltype prediction, only load the data with have celltype annotation + try: + if self.datasets[x].annotated or not annotated_only: + cells.append(self.datasets[x].ncells) + except FileNotFoundError: + del self.datasets[x] + return np.asarray(cells) + + def ncells(self, annotated_only: bool = False): + cells = self.ncells_bydataset(annotated_only=annotated_only) + return np.sum(cells) + + @property + def ontology_celltypes(self): + organism = np.unique([v.organism for _, v in self.datasets.items()]) + if len(organism) > 1: + # ToDo: think about whether this should be handled differently. + warnings.warn("found more than one organism in group, this could cause problems with using a joined cell " + "type ontology. Using only the ontology of the first data set in the group.") + return self.datasets[self.ids[0]].ontology_celltypes + + def project_celltypes_to_ontology(self): + """ + Project free text cell type names to ontology based on mapping table. + :return: + """ + for _, v in self.datasets.items(): + v.project_celltypes_to_ontology() + + def subset(self, key, values: Union[list, tuple, np.ndarray]): + """ + Subset list of adata objects based on sample-wise properties. + + These keys are properties that are available in lazy model. + Subsetting happens on .datasets. + + :param key: Property to subset by. + :param values: Classes to overlap to. Return if elements match any of these classes. + :return: + """ + ids_del = [] + if isinstance(values, np.ndarray): + values = values.tolist() + if isinstance(values, tuple): + values = list(values) + if not isinstance(values, list): + values = [values] + for x in self.ids: + try: + values_found = getattr(self.datasets[x], key) + except AttributeError: + raise ValueError(f"{key} not a valid property of data set object") + try: + ontology = getattr(self.datasets[x].ontology_container_sfaira, key) + except AttributeError: + raise ValueError(f"{key} not a valid property of ontology_container object") + if values_found is None: + # Delete entries which do not have this meta data item annotated. + ids_del.append(x) + else: + if not isinstance(values_found, list): + values_found = [values_found] + if not np.any([ + np.any([ + is_term(query=y, ontology=ontology, ontology_parent=z) + for z in values + ]) for y in values_found + ]): + # Delete entries which a non-matching meta data value associated with this item. + ids_del.append(x) + for x in ids_del: + del self.datasets[x] + + def subset_cells(self, key, values: Union[str, List[str]]): + """ + Subset list of adata objects based on cell-wise properties. + + These keys are properties that are not available in lazy model and require loading first because the + subsetting works on the cell-level: .adata are maintained but reduced to matches. + + :param key: Property to subset by. Options: + + - "age" points to self.age_obs_key + - "assay" points to self.assay_obs_key + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "healthy" points to self.healthy_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key + :param values: Classes to overlap to. + :return: + """ + for x in self.ids: + self.datasets[x].subset_cells(key=key, values=values) + if self.datasets[x].ncells == 0: # No observations (cells) left. + del self.datasets[x] + + +class DatasetGroupDirectoryOriented(DatasetGroup): + + _cwd: os.PathLike + + def __init__( + self, + file_base: str, + data_path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + ): + """ + Automatically collects Datasets from python files in directory. + + Uses a pre-built DatasetGroup if this is defined in a group.py file, otherwise, the DatasetGroup is initialised + here. + + :param file_base: + :param data_path: + :param meta_path: + :param cache_path: + """ + # Collect all data loaders from files in directory: + datasets = [] + self._cwd = os.path.dirname(file_base) + dataset_module = str(self._cwd.split("/")[-1]) + loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(self._cwd.split("/")[-5]) == "sfaira" else \ + "sfaira_extension.data.dataloaders.loaders." + if "group.py" in os.listdir(self._cwd): + DatasetGroupFound = pydoc.locate(loader_pydoc_path + dataset_module + ".group.DatasetGroup") + dsg = DatasetGroupFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + datasets.extend(list(dsg.datasets.values)) + else: + for f in os.listdir(self._cwd): + if os.path.isfile(os.path.join(self._cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + datasets_f = [] + file_module = ".".join(f.split(".")[:-1]) + DatasetFound = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".Dataset") + # Load objects from name space: + # - load(): Loading function that return anndata instance. + # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles + load_func = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".load") + sample_fns = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + + ".SAMPLE_FNS") + fn_yaml = os.path.join(self._cwd, file_module + ".yaml") + fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None + # Check for sample_fns and sample_ids in yaml: + if fn_yaml is not None: + assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" + yaml_vals = read_yaml(fn=fn_yaml) + if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: + sample_fns = yaml_vals["meta"]["sample_fns"] + if sample_fns is None: + sample_fns = [None] + # Here we distinguish between class that are already defined and those that are not. + # The latter case arises if meta data are defined in YAMLs and _load is given as a function. + if DatasetFound is None: + for x in sample_fns: + datasets_f.append( + DatasetBase( + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + load_func=load_func, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) + ) + else: + for x in sample_fns: + datasets_f.append( + DatasetFound( + data_path=data_path, + meta_path=meta_path, + cache_path=cache_path, + load_func=load_func, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) + ) + # Load cell type maps: + for x in datasets_f: + x.load_ontology_class_map(fn=os.path.join(self._cwd, file_module + ".tsv")) + datasets.extend(datasets_f) + + keys = [x.id for x in datasets] + super().__init__(datasets=dict(zip(keys, datasets))) + + def clean_ontology_class_map(self): + """ + Finalises processed class maps of free text cell types to ontology classes. + + Checks that the assigned ontology class names appear in the ontology. + Adds a third column with the corresponding ontology IDs into the file. + + :return: + """ + for f in os.listdir(self._cwd): + if os.path.isfile(os.path.join(self._cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + fn_map = os.path.join(self._cwd, file_module + ".tsv") + if os.path.exists(fn_map): + # Access reading and value protection mechanisms from first data set loaded in group. + tab = list(self.datasets.values())[0]._read_class_map(fn=fn_map) + # Checks that the assigned ontology class names appear in the ontology. + list(self.datasets.values())[0]._value_protection( + attr="celltypes", + allowed=self.ontology_celltypes, + attempted=np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() + ) + # Adds a third column with the corresponding ontology IDs into the file. + tab[self._adata_ids_sfaira.classmap_target_id_key] = [ + self.ontology_celltypes.id_from_name(x) + if x != self._adata_ids_sfaira.unknown_celltype_identifier + else self._adata_ids_sfaira.unknown_celltype_identifier + for x in tab[self._adata_ids_sfaira.classmap_target_key].values + ] + list(self.datasets.values())[0]._write_class_map(fn=fn_map, tab=tab) + + +class DatasetSuperGroup: + """ + Container for multiple DatasetGroup instances. + + Used to manipulate structured dataset collections. Primarly designed for this manipulation, convert to DatasetGroup + via flatten() for more functionalities. + """ + _adata: Union[None, anndata.AnnData] + fn_backed: Union[None, PathLike] + dataset_groups: Union[list, List[DatasetGroup], List[DatasetSuperGroup]] + + def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetSuperGroup]]): + self._adata = None + self.fn_backed = None + self.set_dataset_groups(dataset_groups=dataset_groups) + + self._adata_ids_sfaira = AdataIdsSfaira() + + def set_dataset_groups(self, dataset_groups: Union[DatasetGroup, DatasetSuperGroup, List[DatasetGroup], + List[DatasetSuperGroup]]): + if isinstance(dataset_groups, DatasetGroup) or isinstance(dataset_groups, DatasetSuperGroup): + dataset_groups = [dataset_groups] + if len(dataset_groups) > 0: + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups = dataset_groups + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.dataset_groups) + self.dataset_groups = dataset_groups_proc + else: + assert False + else: + self.dataset_groups = [] + + def extend_dataset_groups(self, dataset_groups: Union[List[DatasetGroup], List[DatasetSuperGroup]]): + if isinstance(dataset_groups[0], DatasetGroup): + self.dataset_groups.extend(dataset_groups) + elif isinstance(dataset_groups[0], DatasetSuperGroup): + # Decompose super groups first + dataset_groups_proc = [] + for x in dataset_groups: + dataset_groups_proc.extend(x.datasets) + self.dataset_groups.extend(dataset_groups_proc) + else: + assert False + + @property + def ids(self): + ids = [] + for x in self.dataset_groups: + ids.extend(x.ids) + return ids + + def get_gc( + self, + genome: str = None + ): + if genome.lower().startswith("homo_sapiens"): + g = SuperGenomeContainer( + organism="human", + genome=genome + ) + elif genome.lower().startswith("mus_musculus"): + g = SuperGenomeContainer( + organism="mouse", + genome=genome + ) + else: + raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") + return g + + def ncells_bydataset(self, annotated_only: bool = False): + """ + List of list of length of all data sets by data set group. + :return: + """ + return [x.ncells_bydataset(annotated_only=annotated_only) for x in self.dataset_groups] + + def ncells_bydataset_flat(self, annotated_only: bool = False): + """ + Flattened list of length of all data sets. + :return: + """ + return [xx for x in self.ncells_bydataset(annotated_only=annotated_only) for xx in x] + + def ncells(self, annotated_only: bool = False): + return np.sum(self.ncells_bydataset_flat(annotated_only=annotated_only)) + + def flatten(self) -> DatasetGroup: + """ + Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. + + :return: + """ + ds = {} + for x in self.dataset_groups: + for k, v in x.datasets.items(): + assert k not in ds.keys(), f"{k} was duplicated in super group, purge duplicates before flattening" + ds[k] = v + return DatasetGroup(datasets=ds) + + def download(self, **kwargs): + for x in self.dataset_groups: + x.download(**kwargs) + + def load( + self, + annotated_only: bool = False, + match_to_reference: Union[str, bool, None] = None, + remove_gene_version: bool = True, + load_raw: bool = False, + allow_caching: bool = True, + processes: int = 1, + ): + """ + Loads data set human into anndata object. + + :param annotated_only: + :param match_to_reference: See .load(). + :param remove_gene_version: See .load(). + :param load_raw: See .load(). + :param allow_caching: See .load(). + :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. + Note: parallelises loading of each dataset group, but not across groups. + :return: + """ + for x in self.dataset_groups: + x.load( + annotated_only=annotated_only, + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + load_raw=load_raw, + allow_caching=allow_caching, + processes=processes, + ) + + @property + def adata(self): + if self._adata is None: + # Make sure that concatenate is not used on a None adata object: + adatas = [x.adata for x in self.dataset_groups if x.adata is not None] + if len(adatas) > 1: + self._adata = adatas[0].adata.concatenate( + *adatas[1:], + join="outer", + batch_key=self._adata_ids_sfaira.dataset_group + ) + elif len(adatas) == 1: + self._adata = adatas[0] + else: + warnings.warn("no anndata instances to concatenate") + return self._adata + + def load_tobacked( + self, + fn_backed: PathLike, + genome: str, + shuffled: bool = False, + as_dense: bool = False, + annotated_only: bool = False, + load_raw: bool = False, + allow_caching: bool = True, + ): + """ + Loads data set human into backed anndata object. + + Example usage: + + ds = DatasetSuperGroup([...]) + ds.load_all_tobacked( + fn_backed="...", + target_genome="...", + annotated_only=False + ) + adata_backed = anndata.read(ds.fn_backed, backed='r') + adata_slice = ad_full[idx] + + :param fn_backed: File name to save backed anndata to temporarily. + :param genome: ID of target genomes. + :param shuffled: Whether to shuffle data when writing to backed. + :param as_dense: Whether to load into dense count matrix. + :param annotated_only: + :param load_raw: See .load(). + :param allow_caching: See .load(). + """ + if shuffled and not as_dense: + raise ValueError("cannot write backed shuffled and sparse") + scatter_update = shuffled or as_dense + self.fn_backed = fn_backed + n_cells = self.ncells(annotated_only=annotated_only) + gc = self.get_gc(genome=genome) + n_genes = gc.ngenes + if scatter_update: + self.adata = anndata.AnnData( + scipy.sparse.csr_matrix((n_cells, n_genes), dtype=np.float32) + ) # creates an empty anndata object with correct dimensions that can be filled with cells from data sets + else: + self.adata = anndata.AnnData( + scipy.sparse.csr_matrix((0, n_genes), dtype=np.float32) + ) + self.adata.filename = fn_backed # setting this attribute switches this anndata to a backed object + # Note that setting .filename automatically redefines .X as dense, so we have to redefine it as sparse: + if not as_dense: + X = scipy.sparse.csr_matrix(self.adata.X) # redefines this backed anndata as sparse + X.indices = X.indices.astype(np.int64) + X.indptr = X.indptr.astype(np.int64) + self.adata.X = X + keys = [ + self._adata_ids_sfaira.annotated, + self._adata_ids_sfaira.assay, + self._adata_ids_sfaira.author, + self._adata_ids_sfaira.dataset, + self._adata_ids_sfaira.cell_ontology_class, + self._adata_ids_sfaira.development_stage, + self._adata_ids_sfaira.normalization, + self._adata_ids_sfaira.organ, + self._adata_ids_sfaira.state_exact, + self._adata_ids_sfaira.year, + ] + if scatter_update: + self.adata.obs = pandas.DataFrame({ + k: ["nan" for _ in range(n_cells)] for k in keys + }) + else: + for k in keys: + self.adata.obs[k] = [] + # Define index vectors to write to: + idx_vector = np.arange(0, n_cells) + if shuffled: + np.random.shuffle(idx_vector) + idx_ls = [] + row = 0 + ncells = self.ncells_bydataset(annotated_only=annotated_only) + if np.all([len(x) == 0 for x in ncells]): + raise ValueError("no datasets found") + for x in ncells: + temp_ls = [] + for y in x: + temp_ls.append(idx_vector[row:(row + y)]) + row += y + idx_ls.append(temp_ls) + print("checking expected and received data set sizes, rerun meta data generation if mismatch is found:") + print(self.ncells_bydataset(annotated_only=annotated_only)) + print([[len(x) for x in xx] for xx in idx_ls]) + for i, x in enumerate(self.dataset_groups): + x.load_tobacked( + adata_backed=self.adata, + genome=genome, + idx=idx_ls[i], + annotated_only=annotated_only, + load_raw=load_raw, + allow_caching=allow_caching, + ) + # If the sparse non-shuffled approach is used, make sure that self.adata.obs.index is unique() before saving + if not scatter_update: + self.adata.obs.index = pd.RangeIndex(0, len(self.adata.obs.index)) + # Explicitly write backed file to disk again to make sure that obs are included and that n_obs is set correctly + self.adata.write() + # Saving obs separately below is therefore no longer required (hence commented out) + # fn_backed_obs = ".".join(self.fn_backed.split(".")[:-1]) + "_obs.csv" + # self.adata.obs.to_csv(fn_backed_obs) + + def delete_backed(self): + del self.adata + self.adata = None + os.remove(str(self.fn_backed)) + + def load_cached_backed(self, fn: PathLike): + self.adata = anndata.read(fn, backed='r') + + def streamline(self, format: str = "sfaira", clean: bool = False): + """ + Streamline the adata instance in each group and each data set to output format. + + Output format are saved in ADATA_FIELDS* classes. + + :param format: Export format. + + - "sfaira" + - "cellxgene" + :param clean: Whether to delete non-streamlined fields. + :return: + """ + for x in self.dataset_groups: + for xx in x.ids: + x.datasets[xx].streamline(format=format, clean=clean) + + def subset(self, key, values): + """ + Subset list of adata objects based on match to values in key property. + + These keys are properties that are available in lazy model. + Subsetting happens on .datasets. + + :param key: Property to subset by. + :param values: Classes to overlap to. + :return: + """ + for x in self.dataset_groups: + x.subset(key=key, values=values) + self.dataset_groups = [x for x in self.dataset_groups if x.datasets is not None] # Delete empty DatasetGroups + + def subset_cells(self, key, values: Union[str, List[str]]): + """ + Subset list of adata objects based on cell-wise properties. + + These keys are properties that are not available in lazy model and require loading first because the + subsetting works on the cell-level: .adata are maintained but reduced to matches. + + :param key: Property to subset by. Options: + + - "age" points to self.age_obs_key + - "assay" points to self.assay_obs_key + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "healthy" points to self.healthy_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key + :param values: Classes to overlap to. + :return: + """ + for i in range(len(self.dataset_groups)): + self.dataset_groups[i].subset_cells(key=key, values=values) + + def project_celltypes_to_ontology(self): + """ + Project free text cell type names to ontology based on mapping table. + :return: + """ + for _, v in self.dataset_groups: + v.project_celltypes_to_ontology() + + def write_config(self, fn: Union[str, os.PathLike]): + """ + Writes a config file that describes the current data sub-setting. + + This config file can be loaded later to recreate a sub-setting. + + :param fn: Output file. + """ + pd.DataFrame({"id": np.sort(self.ids)}).to_csv(fn, index=False, sep="\t") + + def load_config(self, fn: Union[str, os.PathLike]): + """ + Load a config file and recreates a data sub-setting. + + :param fn: Output file. + """ + tab = pd.read_csv(fn, header=0, index_col=None, sep="\t") + ids_keep = tab["id"].values + self.subset(key="id", values=ids_keep) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py index 7dee16978..a6a152d07 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py index 0f259eaa9..2e06f3448 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py index ee0f8128b..39c91c2cc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py index fd83fc20e..5ba646162 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py index 88571e2c9..5d71f1ff5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py index 3b0b34c4a..2f0425a5b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py index 91bc43110..6720f55ac 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py index a5f4507bb..b8bf79613 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py index 100583c1f..ee3c3f7d7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py index ec0c91901..8a2af277e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py index 94d8cda78..01c6fbc26 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py index 55314e37b..9564a3f29 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py index a379e4c9c..48041903f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py index fdcd7987d..98009e77d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py index d28ff3061..2311943e9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py index eb25058c5..a2b28d3da 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py index dc75d9f4d..9a658b5be 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py index a7e5ccb2d..5914ddd2f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py index 56a81bed8..1e118813d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py index dd3189b0a..77f282d46 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py index 5a39d70ea..5d2699c4f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py index e0dc41cbd..f098857eb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py index 7041b1361..ef7c715f8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py index 1d5b40fac..29d4a6bbb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py index 8643327fe..f690aac3c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py index 4b666f4aa..2ab0527ab 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py index 1119b8fda..5774d3f73 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py index 739712ca9..882ffcde1 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py index b5c5e135c..4892cc93e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py index b9437b352..4aeac2457 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py index 04f7da782..e77a55a62 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py index 2517d62e7..00a805ca4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py index d0da3c8b9..f674b0384 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py index 18af63f1b..fbd0c0b86 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py index f38682fc9..b9b848057 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py index 1789c37e7..b6369cc1d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py index 04e4005ed..386772100 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py index 7135862de..3f68fac80 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py index f8ea9a05f..f47ce5203 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py index d85b6a021..fe5839f09 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py index cf60b31f4..fef634fa2 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py index f95e77a98..4873e3440 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py index 7abeea47b..542969abc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py index 42356fc4a..f8db234d4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py index 575964baa..bfa86db4d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py index e49858759..86baebf1a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py index aad754e29..534445900 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py index d4a094bdc..132026f9d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py index 93b7c2963..d9e90d128 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py index c8ab2a149..933f412f1 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py index 4dbadb8b0..56237bbf0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py index b13e76723..f3b434758 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py index d41430f05..5f78d124e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py index bb63a1b17..b5b236f09 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py index 5dc85bd40..c67e8f82c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py index 29d064e7e..ceef059d0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py index 4d0e07b3c..52e9daaf4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py index c952ab20c..1a73b0d39 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py index 073ec9f04..c0535f364 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py index db927c906..788c7b508 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py index 4515cdb98..92c4816d3 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py index a932a1b5c..9bdf31cdc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py index c847d4474..ec80bc7e6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py index 6c9f7a56d..77512772c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py index 31e7619df..dc49ce938 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py index 4a1c988d8..310278f8a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py index 1c052942c..14eb435d8 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py index 96e087e4f..cfe9995ab 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py index d7f9a812c..30fcef445 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py index c67a0893a..85a214b96 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -1,6 +1,6 @@ from typing import Union -from sfaira.data.base import DatasetGroup +from sfaira.data import DatasetGroup from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index 6e56d9231..9b21d28b6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -26,7 +26,7 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2017 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" + self.cellontology_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 56e138f23..db3a4daba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -311,7 +311,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "index" # Only adult and neonatal samples are annotated: - self.obs_key_cellontology_original = "Annotation" \ + self.cellontology_original_obs_key = "Annotation" \ if sample_dev_stage_dict[self.sample_fn] in ["adult", "neonatal"] and \ self.sample_fn not in [ "NeontalBrain1_dge.txt.gz", diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index 0c93b5125..a0dece529 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index 232e62d84..a732e7f8f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index 52d75a72d..328c92995 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -29,7 +29,7 @@ def __init__(self, **kwargs): self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index a07679c77..ba51fac79 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -27,7 +27,7 @@ def __init__(self, **kwargs): self.year = 2016 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index 3863b4acc..a5ac571ae 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -24,9 +24,11 @@ def __init__(self, **kwargs): self.year = 2016 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Characteristics[cell type]" - self.obs_key_state_exact = "Characteristics[disease]" - self.obs_key_healthy = self.obs_key_state_exact + + self.cellontology_original_obs_key = "Characteristics[cell type]" + self.state_exact_obs_key = "Characteristics[disease]" + self.healthy_obs_key = "Characteristics[disease]" + self.healthy_state_healthy = "normal" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index bb199f6a3..c79eeea5a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -36,7 +36,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltypes" + self.cellontology_original_obs_key = "celltypes" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index 40a9e13b6..fe1097067 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.year = 2020 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Cell_type" + self.cellontology_original_obs_key = "Cell_type" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index 9fd1b1219..cb6c7e413 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.year = 2017 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 692f36012..58459a2fd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index f7d3b038b..a140306a4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.year = 2018 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" + self.cellontology_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 7ce8a365b..558d5e99a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -25,7 +25,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" + self.cellontology_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index 1e6bfd0c9..b6fbee6cf 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -68,8 +68,9 @@ def __init__(self, **kwargs): self.var_symbol_col = "Gene" self.var_ensembl_col = "Accession" - self.obs_key_cellontology_original = "cell_ontology_class" - self.obs_key_organ = "organ" + + self.cellontology_original_obs_key = "cell_ontology_class" + self.organ_obs_key = "organ" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 194a2a5a5..4344c2d0d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -22,7 +22,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index ba0af97e8..3e9f3e8c2 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -31,7 +31,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" - self.obs_key_cellontology_original = "annotation" + self.cellontology_original_obs_key = "annotation" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py index 3d268ae2b..89087d9c1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 7b2d89963..6ec35942e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -20,9 +20,9 @@ def __init__(self, **kwargs): self.var_symbol_col = "index" - self.obs_key_cellontology_original = "annotation_lineage" - self.obs_key_state_exact = "condition" - self.obs_key_healthy = self.obs_key_state_exact + self.cellontology_original_obs_key = "annotation_lineage" + self.state_exact_obs_key = "condition" + self.key_healthy_obs_key = "condition" self.healthy_state_healthy = "Uninjured" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index 4c8b991fe..a652ef6fc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -22,7 +22,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "cell.labels" + self.cellontology_original_obs_key = "cell.labels" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 877ead07a..f38b1105a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -25,7 +25,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.obs_key_cellontology_original = "cell_type" + self.cellontology_original_obs_key = "cell_type" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index e01e29822..bbf6cc8c6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" + self.cellontology_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index 7235a6a3a..406b81122 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -28,7 +28,7 @@ def __init__(self, **kwargs): self.normalization = "norm" self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index 3e7c56103..984672017 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 30708efe8..8ae0e23a0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -32,7 +32,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 370d84eb5..6414f9197 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -29,7 +29,7 @@ def __init__(self, **kwargs): self.year = 2020 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 902b9a861..9caa27439 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -76,10 +76,10 @@ def __init__(self, **kwargs): f"{self.sample_fn}" self.download_url_meta = None - self.obs_key_cellontology_original = "cell_ontology_class" - self.obs_key_age = "age" - self.obs_key_dev_stage = "development_stage" # not given in all data sets - self.obs_key_sex = "sex" + self.cellontology_original_obs_key = "cell_ontology_class" + self.age_obs_key = "age" + self.development_stage_obs_key = "development_stage" # not given in all data sets + self.sex_obs_key = "sex" # ToDo: further anatomical information for subtissue in "subtissue"? self.author = "Pisco" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 6eea746a2..a4aeb13c5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -25,9 +25,10 @@ def __init__(self, **kwargs): self.year = 2020 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "celltype" - self.obs_key_state_exact = "Diagnosis" - self.obs_key_healthy = "Status" + + self.cellontology_original_obs_key = "celltype" + self.state_exact_obs_key = "Diagnosis" + self.healthy_obs_key = "Status" self.healthy_state_healthy = "Control" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index fc3e19123..c030fcd9c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -27,7 +27,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "index" self.var_ensembl_col = "ID" - self.obs_key_cellontology_original = "celltype" + self.cellontology_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index 697ea9c05..b0fcc5b7e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): self.year = 2020 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Anno_level_fig1" + self.cellontology_original_obs_key = "Anno_level_fig1" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index ffb3d2933..d2e8cc534 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -44,7 +44,7 @@ def __init__(self, **kwargs): self.year = 2019 self.var_symbol_col = "index" - self.obs_key_cellontology_original = "Celltypes" + self.cellontology_original_obs_key = "Celltypes" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index ffa12a670..3d61bde9f 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -26,7 +26,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" - self.obs_key_cellontology_original = "CellType" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index 5f4ca0eb6..5d272782b 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -49,8 +49,8 @@ def map_celltype_to_ontology( queries = [queries] oc = OntologyContainerSfaira() cu = CelltypeUniverse( - cl=oc.ontology_cell_types, - uberon=oc.ontology_organ, + cl=oc.cellontology_class, + uberon=oc.organ, organism=organism, **kwargs ) diff --git a/sfaira/data/utils_scripts/create_anatomical_configs.py b/sfaira/data/utils_scripts/create_anatomical_configs.py new file mode 100644 index 000000000..34b84bf6f --- /dev/null +++ b/sfaira/data/utils_scripts/create_anatomical_configs.py @@ -0,0 +1,101 @@ +import os +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data import DatasetSuperGroupSfaira + +print(tf.__version__) + +# Set global variables. +print("sys.argv", sys.argv) + +config_path = str(sys.argv[1]) + + +def clean(s): + if s is not None: + s = s.replace(' ', '').replace('-', '').replace('_', '').lower() + return s + + +configs_to_write = { + "human": [ + "adipose tissue", + "adrenal gland", + "artery", + "blood", + "bone marrow", + "brain", + "chorionic villus", + "diaphragm", + "esophagus", + "eye", + "gall bladder", + "heart", + "intestine", + "kidney", + "liver", + "lung", + "muscle organ", + "ovary", + "pancreas", + "placenta", + "pleura", + "prostate gland", + "rib" + "skeleton", + "skin of body", + "spinal cord", + "spleen", + "stomach", + "testis", + "tongue", + "thymus", + "thyroid gland", + "trachea", + "ureter", + "urinary bladder", + "uterine cervix", + "uterus", + "vault of skull", + ], + "mouse": [ + "adipose tissue", + "blood", + "bone marrow", + "brain", + "diaphragm", + "heart", + "intestine", + "kidney", + "liver", + "lung", + "mammary gland", + "muscle organ", + "ovary", + "pancreas", + "placenta", + "prostate gland", + "skin of body", + "spleen", + "stomach", + "testis", + "thymus", + "tongue", + "trachea", + "urinary bladder", + "uterus", + ] +} + +for organism, organs in configs_to_write.items(): + for organ in organs: + dsgs = DatasetSuperGroupSfaira( + data_path=".", + meta_path=".", + cache_path="." + ) + dsgs.subset(key="organism", values=[organism]) + dsgs.subset(key="organ", values=[organ]) + dsgs.write_config(os.path.join(config_path, f"config_{clean(organism)}_{clean(organ)}.csv")) diff --git a/sfaira/data/utils_scripts/create_meta.py b/sfaira/data/utils_scripts/create_meta.py index e60981baa..a132f0bc4 100644 --- a/sfaira/data/utils_scripts/create_meta.py +++ b/sfaira/data/utils_scripts/create_meta.py @@ -20,8 +20,7 @@ def write_meta(args0, args1): ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( data_path=data_path, meta_path=path_meta, cache_path=path_meta ) -dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. -dsg.load( +ds.load( annotated_only=False, match_to_reference=None, remove_gene_version=True, diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 1c954dbea..474a0375c 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -29,9 +29,8 @@ def write_meta(args0, args1): ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( data_path=data_path, meta_path=path_meta, cache_path=path_cache ) -dsg = ds.flatten() # need to flatten in this case to parallelise across Groups and not just within. # Write meta data, cache and test load from cache: -dsg.load( +ds.load( annotated_only=False, match_to_reference=None, remove_gene_version=True, diff --git a/sfaira/data/utils_scripts/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py index d5f1ce497..acc12a185 100644 --- a/sfaira/data/utils_scripts/write_backed_human.py +++ b/sfaira/data/utils_scripts/write_backed_human.py @@ -17,7 +17,7 @@ data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["human"]) -ds.load_all_tobacked( +ds.load_tobacked( fn_backed=fn, genome=genome, shuffled=False, diff --git a/sfaira/data/utils_scripts/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py index e3470a73f..8f33dadd4 100644 --- a/sfaira/data/utils_scripts/write_backed_mouse.py +++ b/sfaira/data/utils_scripts/write_backed_mouse.py @@ -17,7 +17,7 @@ data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["mouse"]) -ds.load_all_tobacked( +ds.load_tobacked( fn_backed=fn, genome=genome, shuffled=False, diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 77b1591c9..e47314d41 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -472,7 +472,7 @@ def set_latest( topology_id=self.model_topology ) self.celltypes = CelltypeUniverse( - cl=self._ontology_container_sfaira.ontology_cell_types, - uberon=self._ontology_container_sfaira.ontology_organ, + cl=self._ontology_container_sfaira.cellontology_class, + uberon=self._ontology_container_sfaira.organ, organism=self.organism ).load_target_universe(organ=self.organ) diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index bd8f6d2ae..b641db637 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -14,60 +14,102 @@ def test_dsgs_instantiate(): _ = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) -def test_dsgs_load(): +@pytest.mark.parametrize("organ", ["intestine", "ileum"]) +def test_dsgs_subset_dataset_wise(organ: str): + """ + Tests if subsetting results only in datasets of the desired characteristics. + """ ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - ds.load_all() + ds.subset(key="organ", values=[organ]) + for x in ds.dataset_groups: + for k, v in x.datasets.items(): + assert v.organism == "mouse", v.organism + assert v.ontology_container_sfaira.organ.is_a(query=v.organ, reference=organ), v.organ -def test_dsgs_adata(): +def test_dsgs_config_write_load(): + fn = dir_data + "/config.csv" ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - _ = ds.adata + ds.subset(key="organ", values=["lung"]) + ds.write_config(fn=fn) + ds2 = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds2.load_config(fn=fn) + assert np.all(ds.ids == ds2.ids) -@pytest.mark.parametrize("format", ["sfaira", "cellxgene"]) -@pytest.mark.parametrize("clean", [True, False]) -def test_dsgs_streamline(format: str, clean: bool): +def test_dsg_load(): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - ds.load_all() - ds.streamline(format=format, clean=clean) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + ds.load() -def test_dsgs_streamline_cellxgene(): +def test_dsg_adata(): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - ds.load_all() - ds.streamline(format="cellxgene", clean=True) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + _ = ds.adata -def test_dsg_load(): +""" +TODO tests from here on down require cached data for mouse lung +""" + + +def test_dsgs_adata(): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all() + ds.load(remove_gene_version=True) + _ = ds.adata -def test_dsg_adata(): +def test_dsgs_load(): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - _ = ds.adata + ds.subset(key="organ", values=["lung"]) + ds.load(remove_gene_version=False) + + +@pytest.mark.parametrize("organ", ["lung"]) +@pytest.mark.parametrize("celltype", ["T cell"]) +def test_dsgs_subset_cell_wise(organ: str, celltype: str): + """ + Tests if subsetting results only in datasets of the desired characteristics. + """ + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=[organ]) + ds.load(remove_gene_version=False) + ds.subset_cells(key="cellontology_class", values=celltype) + for x in ds.dataset_groups: + for k, v in x.datasets.items(): + assert v.organism == "mouse", v.id + assert v.ontology_container_sfaira.organ.is_a(query=v.organ, reference=organ), v.organ + for y in np.unique(v.adata.obs[v._adata_ids_sfaira.cell_ontology_class].values): + assert v.ontology_container_sfaira.cellontology_class.is_a(query=y, reference=celltype), y + + +@pytest.mark.parametrize("out_format", ["sfaira", "cellxgene"]) +@pytest.mark.parametrize("clean_objects", [True, False]) +def test_dsgs_streamline(out_format: str, clean_objects: bool): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds.load(remove_gene_version=True) + ds.streamline(format=out_format, clean=clean_objects) def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) + ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all_tobacked( + ds.load_tobacked( fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), genome=genome, shuffled=True, @@ -80,9 +122,9 @@ def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["bladder"]) + ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_all_tobacked( + ds.load_tobacked( fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), genome=genome, shuffled=False, diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index 46c4a4385..6c13dfa8b 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -1,10 +1,37 @@ from sfaira.versions.metadata import OntologyUberon, OntologyCelltypes, OntologyMmusdv, OntologyHsapdv, \ OntologyHancestro +""" +CL +""" -def test_cl(): + +def test_cl_loading(): _ = OntologyCelltypes(branch="v2021-02-01") -def test_uberon(): +def test_cl_subsetting(): + oc = OntologyCelltypes(branch="v2021-02-01") + assert oc.is_a(query="T cell", reference="lymphocyte") + assert oc.is_a(query="lymphocyte", reference="lymphocyte") + assert not oc.is_a(query="lymphocyte", reference="T cell") + + +""" +UBERON +""" + + +def test_uberon_loading(): _ = OntologyUberon() + + +def test_uberon_subsetting(): + ou = OntologyUberon() + assert ou.is_a(query="lobe of lung", reference="lung") + assert ou.is_a(query="lobe of lung", reference="lobe of lung") + assert not ou.is_a(query="lung", reference="lobe of lung") + + assert ou.is_a(query="lobar bronchus", reference="lung") + assert ou.is_a(query="lobar bronchus", reference="lobar bronchus") + assert not ou.is_a(query="lung", reference="lobar bronchus") diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 6571993dd..cc9bd8e6e 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -46,8 +46,11 @@ def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: """ pass + def is_node(self, x: str): + return x in self.node_names + def validate_node(self, x: str): - if x not in self.node_names: + if not self.is_node(x=x): suggestions = self.map_node_suggestion(x=x, include_synonyms=False) raise ValueError(f"Node label {x} not found. Did you mean any of {suggestions}?") @@ -89,6 +92,18 @@ def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: def synonym_node_properties(self) -> List[str]: return [] + def is_a(self, query: str, reference: str) -> bool: + """ + Checks if query node is reference node. + + Note that there is no notion of ancestors for list ontologies. + + :param query: Query node name. Node ID or name. + :param reference: Reference node name. Node ID or name. + :return: If query node is reference node or an ancestor thereof. + """ + return query == reference + class OntologyEbi(Ontology): """ @@ -238,8 +253,24 @@ def get_all_roots(self) -> List[str]: return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] def get_ancestors(self, node: str) -> List[str]: + if node not in self.node_ids: + node = self.id_from_name(node) return list(networkx.ancestors(self.graph, node)) + def is_a(self, query: str, reference: str) -> bool: + """ + Checks if query node is reference node or an ancestor thereof. + + :param query: Query node name. Node ID or name. + :param reference: Reference node name. Node ID or name. + :return: If query node is reference node or an ancestor thereof. + """ + if query not in self.node_ids: + query = self.id_from_name(query) + if reference not in self.node_ids: + reference = self.id_from_name(reference) + return query in self.get_ancestors(node=reference) or query == reference + def map_to_leaves(self, node: str, return_type: str = "elements", include_self: bool = True): """ Map a given list of nodes to leave nodes. @@ -497,7 +528,9 @@ def __init__( **kwargs ): # Identify cache: - ontology_cache_dir = os.path.join("/".join(FILE_PATH.split("/")[:-4]), "cache/ontologies/cl/") + folder = FILE_PATH.split(os.sep)[:-4] + folder.insert(1, os.sep) + ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cl") fn = f"{branch}_cl.obo" fn_path = os.path.join(ontology_cache_dir, fn) # Download if necessary: From 0fd6f8d1b2f1b7293d51f81e7186876b774a1353 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Mar 2021 18:03:28 +0100 Subject: [PATCH 081/161] depreceated sample_ids (#162) * depreceated sample_ids --- sfaira/data/base/dataset.py | 4 ++-- sfaira/data/base/dataset_group.py | 2 +- ...iaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml | 9 ++++----- .../human_pancreas_2016_smartseq2_segerstolpe_001.py | 4 ++-- .../human_x_2019_10xsequencing_szabo_001.py | 2 +- .../human_lung_2020_x_travaglini_001.yaml | 1 - ...an_lungparenchyma_2020_10xsequencing_habermann_001.py | 2 +- 7 files changed, 11 insertions(+), 13 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 28668a4f7..477057239 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -199,7 +199,7 @@ def __init__( assert os.path.exists(yaml_path), f"did not find yaml {yaml_path}" yaml_vals = read_yaml(fn=yaml_path) for k, v in yaml_vals["attr"].items(): - if v is not None and k not in ["sample_fns", "sample_ids", "dataset_index"]: + if v is not None and k not in ["sample_fns", "dataset_index"]: if isinstance(v, dict): # v is a dictionary over file-wise meta-data items assert self.sample_fn in v.keys(), f"did not find key {self.sample_fn} in yamls keys for {k}" setattr(self, k, v[self.sample_fn]) @@ -1427,7 +1427,7 @@ def cellontology_class_obs_key(self) -> str: def cellontology_class_obs_key(self, x: str): self.__erasing_protection(attr="cellontology_class_obs_key", val_old=self._cellontology_class_obs_key, val_new=x) - self._cellontology_class_obs_key = x\ + self._cellontology_class_obs_key = x @property def cellontology_id_obs_key(self) -> str: diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index c100ed528..cf7ba070d 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -499,7 +499,7 @@ def __init__( ".SAMPLE_FNS") fn_yaml = os.path.join(self._cwd, file_module + ".yaml") fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None - # Check for sample_fns and sample_ids in yaml: + # Check for sample_fns in yaml: if fn_yaml is not None: assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" yaml_vals = read_yaml(fn=fn_yaml) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index 40207457e..5015bbd13 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -1,6 +1,5 @@ dataset_structure: dataset_index: 1 - sample_ids: sample_fns: - "HC" - "UC" @@ -10,11 +9,11 @@ dataset_wise: doi: - "10.1016/j.cell.2018.08.067" download_url_data: - - "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FHC%5Fexpression%5Fmatrix%2Etxt%2Egz" - - "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FUC%5Fexpression%5Fmatrix%2Etxt%2Egz" + HC: "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FHC%5Fexpression%5Fmatrix%2Etxt%2Egz" + UC: "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE114374&format=file&file=GSE114374%5FHuman%5FUC%5Fexpression%5Fmatrix%2Etxt%2Egz" download_url_meta: - - "private,hc_meta_data_stromal_with_donor.txt" - - "private,uc_meta_data_stromal_with_donor.txt" + HC: "private,hc_meta_data_stromal_with_donor.txt" + UC: "private,uc_meta_data_stromal_with_donor.txt" normalization: "norm" year: 2019 dataset_or_observation_wise: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index a5ac571ae..a6d85b5f6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -24,11 +24,11 @@ def __init__(self, **kwargs): self.year = 2016 self.var_symbol_col = "index" - + self.cellontology_original_obs_key = "Characteristics[cell type]" self.state_exact_obs_key = "Characteristics[disease]" self.healthy_obs_key = "Characteristics[disease]" - + self.healthy_state_healthy = "normal" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index b6fbee6cf..fb894d478 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -68,7 +68,7 @@ def __init__(self, **kwargs): self.var_symbol_col = "Gene" self.var_ensembl_col = "Accession" - + self.cellontology_original_obs_key = "cell_ontology_class" self.organ_obs_key = "organ" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index 5273f033d..7cbbab6e1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -1,6 +1,5 @@ dataset_structure: dataset_index: 1 - sample_ids: sample_fns: - "droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad" - "facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index a4aeb13c5..177ddabce 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -25,7 +25,7 @@ def __init__(self, **kwargs): self.year = 2020 self.var_symbol_col = "index" - + self.cellontology_original_obs_key = "celltype" self.state_exact_obs_key = "Diagnosis" self.healthy_obs_key = "Status" From 5879b54d75280b583257135ac86843ccbf10b6d8 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 4 Mar 2021 18:06:17 +0100 Subject: [PATCH 082/161] fix windows builds (#164) --- sfaira/versions/metadata/base.py | 40 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index cc9bd8e6e..c9b97c33c 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -527,25 +527,27 @@ def __init__( branch: str, **kwargs ): - # Identify cache: - folder = FILE_PATH.split(os.sep)[:-4] - folder.insert(1, os.sep) - ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cl") - fn = f"{branch}_cl.obo" - fn_path = os.path.join(ontology_cache_dir, fn) - # Download if necessary: - if not os.path.isfile(fn_path): - - def download_cl(): - url = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" - print(f"Downloading: {fn}") - if not os.path.exists(ontology_cache_dir): - os.makedirs(ontology_cache_dir) - r = requests.get(url, allow_redirects=True) - open(fn_path, 'wb').write(r.content) - - download_cl() - super().__init__(obo=fn_path) + if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet + obofile = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" + else: + # Identify cache: + folder = FILE_PATH.split(os.sep)[:-4] + folder.insert(1, os.sep) + ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cl") + fn = f"{branch}_cl.obo" + obofile = os.path.join(ontology_cache_dir, fn) + # Download if necessary: + if not os.path.isfile(obofile): + def download_cl(): + url = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" + print(f"Downloading: {fn}") + if not os.path.exists(ontology_cache_dir): + os.makedirs(ontology_cache_dir) + r = requests.get(url, allow_redirects=True) + open(obofile, 'wb').write(r.content) + download_cl() + + super().__init__(obo=obofile) # Clean up nodes: nodes_to_delete = [] From 9b4c75a3be92c6f522418d34e9be5eb10893f790 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Mar 2021 18:26:50 +0100 Subject: [PATCH 083/161] added entry of unconstrained ontology for doi and id into container (#168) --- sfaira/consts/ontologies.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 5de6b8425..5751bd630 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -13,8 +13,10 @@ def __init__(self): self.cellontology_class = "v2021-02-01" self.cellontology_original = None self.developmental_stage = None + self.doi = None self.ethnicity = None self.healthy = [True, False] + self.id = None self.normalization = None self.organ = OntologyUberon() self.organism = OntologyList(terms=["mouse", "human"]) From a713ab0a679e2b409ae841f3d35bebda758c413d Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 4 Mar 2021 18:32:49 +0100 Subject: [PATCH 084/161] None ontology matching (#169) * added entry of unconstrained ontology for doi and id into container * allowed mathcing of terms to None ontology --- sfaira/data/base/dataset.py | 10 ++++++---- sfaira/data/base/dataset_group.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 477057239..4ef9f7fc4 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -34,7 +34,7 @@ """ -def is_term( +def is_child( query, ontology: Union[Ontology, bool, int, float, str, List[bool], List[int], List[float], List[str]], ontology_parent=None, @@ -55,8 +55,10 @@ def is_term( return ontology.is_node(query) else: return ontology.is_a(query=query, reference=ontology_parent) + elif ontology is None: + return query == ontology_parent else: - return query in ontology + raise ValueError(f"did not recognize ontology type {type(ontology)}") else: return True @@ -1734,7 +1736,7 @@ def _value_protection( if not isinstance(attempted, list): attempted = [attempted] for x in attempted: - if not is_term(query=x, ontology=allowed): + if not is_child(query=x, ontology=allowed): raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") def subset_cells(self, key, values): @@ -1786,7 +1788,7 @@ def get_subset_idx(samplewise_key, cellwise_key): # Test only unique elements found in ontology to save time. values_found_unique_matched = [ x for x in values_found_unique if np.any([ - is_term(query=x, ontology=ontology, ontology_parent=y) + is_child(query=x, ontology=ontology, ontology_parent=y) for y in values ]) ] diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index cf7ba070d..204674717 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -12,7 +12,7 @@ from typing import Dict, List, Tuple, Union import warnings -from sfaira.data.base.dataset import is_term, DatasetBase +from sfaira.data.base.dataset import is_child, DatasetBase from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.consts import AdataIdsSfaira from sfaira.data.utils import read_yaml @@ -414,7 +414,7 @@ def subset(self, key, values: Union[list, tuple, np.ndarray]): values_found = [values_found] if not np.any([ np.any([ - is_term(query=y, ontology=ontology, ontology_parent=z) + is_child(query=y, ontology=ontology, ontology_parent=z) for z in values ]) for y in values_found ]): From f8e34467aaecb09a93ee9b08f79d832a63ba33b1 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Fri, 5 Mar 2021 12:19:33 +0100 Subject: [PATCH 085/161] fix year ontology (#170) --- sfaira/consts/ontologies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 5751bd630..20b071ed2 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -21,7 +21,7 @@ def __init__(self): self.organ = OntologyUberon() self.organism = OntologyList(terms=["mouse", "human"]) self.sex = OntologyList(terms=["female", "male"]) - self.year = list(range(2000, 3000)) + self.year = OntologyList(terms=list(range(2000, 3000))) @property def cellontology_class(self): From ab78dc6a06bef5d433a44d0849d09d6df04b9c18 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 5 Mar 2021 17:06:18 +0100 Subject: [PATCH 086/161] fixed load call with function handle (#172) --- sfaira/data/utils_scripts/create_meta_and_cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 474a0375c..e22902250 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -29,6 +29,7 @@ def write_meta(args0, args1): ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( data_path=data_path, meta_path=path_meta, cache_path=path_cache ) +ds = ds.flatten() # Write meta data, cache and test load from cache: ds.load( annotated_only=False, From dbf4fa8578a035d09ee3745adfef8f75a4cb2b4a Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Mon, 8 Mar 2021 09:36:31 +0100 Subject: [PATCH 087/161] add fetal data (#171) --- .../d10_1126_science_aba7721/__init__.py | 1 + .../human_x_2020_scirnaseq_cao_001.py | 16 +++++++ .../human_x_2020_scirnaseq_cao_001.yaml | 46 +++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py new file mode 100644 index 000000000..69999c1a6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py @@ -0,0 +1,16 @@ +import os +import gzip +import anndata +import shutil + + +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "GSE156793_S3_gene_count.loom.gz") + fn_tmp = os.path.join(data_dir, "tmp.loom") + with gzip.open(fn, 'rb') as f_in: + with open(fn_tmp, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + adata = anndata.read_loom(fn_tmp) + os.remove(fn_tmp) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml new file mode 100644 index 000000000..d4b120761 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml @@ -0,0 +1,46 @@ +dataset_structure: + dataset_index: 1 + sample_fns: +dataset_wise: + author: + - "Cao" + doi: + - "10.1126/science.aba7721" + download_url_data: "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE156793&format=file&file=GSE156793%5FS3%5Fgene%5Fcount%2Eloom%2Egz" + download_url_meta: + normalization: "raw" + year: 2020 +dataset_or_observation_wise: + age: + age_obs_key: "Age" + assay: "sci-RNA-seq" + assay_obs_key: + bio_sample: + bio_sample_obs_key: + development_stage: + development_stage_obs_key: "Development_day" + ethnicity: + ethnicity_obs_key: + healthy: True + healthy_obs_key: + individual: + individual_obs_key: "Fetus_id" + organ: + organ_obs_key: "Organ" + organism: "human" + organism_obs_key: + sex: + sex_obs_key: "Sex" + state_exact: + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: "Experiment_batch" +observation_wise: + cellontology_original_obs_key: "Main_cluster_name" +feature_wise: + var_ensembl_col: "gene_id" + var_symbol_col: "gene_short_name" +misc: + healthy_state_healthy: "healthy" +meta: + version: "1.0" From 57e19c7d71df932918a468d66597e044f4661509 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 8 Mar 2021 17:08:03 +0100 Subject: [PATCH 088/161] rewrote collapse_matrix, exported to util and wrapped into unit test (#176) --- sfaira/data/base/dataset.py | 55 +++++------------------ sfaira/data/utils.py | 37 +++++++++++++++ sfaira/unit_tests/data/test_data_utils.py | 52 ++++++++++++++++++++- 3 files changed, 99 insertions(+), 45 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 4ef9f7fc4..631f9a52a 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -20,7 +20,7 @@ from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse from sfaira.consts import AdataIdsExtended, AdataIdsSfaira, META_DATA_FIELDS, OCS -from sfaira.data.utils import read_yaml +from sfaira.data.utils import collapse_matrix, read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -409,7 +409,7 @@ def load( self.adata.uns[self._adata_ids_sfaira.remove_gene_version] = remove_gene_version # Streamline feature space: self._convert_and_set_var_names(match_to_reference=match_to_reference) - self._collapse_gene_versions(remove_gene_version=remove_gene_version) + self._collapse_genes(remove_gene_version=remove_gene_version) if match_to_reference: self._match_features_to_reference() @@ -482,7 +482,7 @@ def _convert_and_set_var_names( raise KeyError(e) self.adata.var_names_make_unique() - def _collapse_gene_versions(self, remove_gene_version): + def _collapse_genes(self, remove_gene_version): """ Remove version tag on ensembl gene ID so that different versions are superimposed downstream. @@ -490,47 +490,14 @@ def _collapse_gene_versions(self, remove_gene_version): :return: """ if remove_gene_version: - new_index = [x.split(".")[0] for x in self.adata.var_names.tolist()] - # Collapse if necessary: - new_index_collapsed = list(np.unique(new_index)) - if len(new_index_collapsed) < self.adata.n_vars: - print("WARNING: duplicate features detected after removing gene versions. " - "the code to collapse these features is implemented but not tested.") - idx_map = np.array([new_index_collapsed.index(x) for x in new_index]) - # Need reverse sorting to find index of last element in sorted list to split array using list index(). - idx_map_sorted_fwd = np.argsort(idx_map) - idx_map_sorted_rev = idx_map_sorted_fwd[::-1].tolist() - n_genes = len(idx_map_sorted_rev) - # 1. Sort array in non-reversed order: idx_map_sorted_rev[::-1] - # 2. Split into chunks based on blocks of identical entries in idx_map, using the occurrence of the - # last element of each block as block boundaries: - # n_genes - 1 - idx_map_sorted_rev.index(x) - # Note that the blocks are named as positive integers starting at 1, without gaps. - counts = np.concatenate([ - np.sum(x, axis=1, keepdims=True) - for x in np.split( - self.adata[:, idx_map_sorted_fwd].X, # forward ordered data - indices_or_sections=[ - n_genes - 1 - idx_map_sorted_rev.index(x) # last occurrence of element in forward order - for x in np.arange(0, len(new_index_collapsed) - 1) - ], # -1: do not need end of last partition - axis=1 - ) - ][::-1], axis=1) - # Remove varm and populate var with first occurrence only: - obs_names = self.adata.obs_names - self.adata = anndata.AnnData( - X=counts, - obs=self.adata.obs, - obsm=self.adata.obsm, - var=self.adata.var.iloc[[new_index.index(x) for x in new_index_collapsed]], - uns=self.adata.uns - ) - self.adata.obs_names = obs_names - self.adata.var_names = new_index_collapsed - new_index = new_index_collapsed - self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = new_index - self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values + self.adata.var_names = [ + x.split(".")[0] for x in self.adata.var[self._adata_ids_sfaira.gene_id_index].values + ] + # Collapse if necessary: + self.adata = collapse_matrix(adata=self.adata) + + self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = self.adata.var_names + self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values def _match_features_to_reference(self): """ diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index 5d272782b..e229976c6 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -1,3 +1,6 @@ +import anndata +import numpy as np +import scipy.sparse import yaml from typing import Dict, List, Union @@ -117,3 +120,37 @@ def read_yaml(fn) -> Dict[str, Dict[str, Union[str, int, bool]]]: else: meta_dict.update(v) return {"attr": attr_dict, "meta": meta_dict} + + +def collapse_matrix(adata: anndata.AnnData) -> anndata.AnnData: + """ + Collapses (sum) features with the same var_name. + + Does not retain .varm if duplicated var_names are found. + keeps .var column of first occurrence of duplicated variables. + + :param adata: Input anndata instance with potential duplicated var_names. + :return: Processed anndata instance without duplicated var_names. + """ + new_index = np.unique(adata.var_names).tolist() + if len(new_index) < adata.n_vars: + idx_map = np.array([np.where(x == adata.var_names)[0] for x in new_index]) + # Build initial matrix from first match. + data = adata.X[:, np.array([x[0] for x in idx_map])].copy() + # Add additional matched (duplicates) on top: + for i, idx in enumerate(idx_map): + if len(idx) > 1: + data[:, i] = data[:, i] + adata.X[:, idx[1:]].sum(axis=1) + + # Remove varm and populate var with first occurrence only: + obs_names = adata.obs_names + adata = anndata.AnnData( + X=data, + obs=adata.obs, + obsm=adata.obsm, + var=adata.var.iloc[[adata.var_names.tolist().index(x) for x in new_index]], + uns=adata.uns + ) + adata.obs_names = obs_names + adata.var_names = new_index + return adata diff --git a/sfaira/unit_tests/data/test_data_utils.py b/sfaira/unit_tests/data/test_data_utils.py index b67e3e191..352c59462 100644 --- a/sfaira/unit_tests/data/test_data_utils.py +++ b/sfaira/unit_tests/data/test_data_utils.py @@ -1,7 +1,11 @@ +import anndata +import numpy as np +import pandas as pd import pytest +import scipy.sparse from typing import Union -from sfaira.data.utils import map_celltype_to_ontology +from sfaira.data.utils import map_celltype_to_ontology, collapse_matrix @pytest.mark.parametrize("trial_cell_type_labels", @@ -39,3 +43,49 @@ def test_map_celltype_to_ontology( else: assert isinstance(matches, list), matches assert "type B pancreatic cell" in matches + + +@pytest.mark.parametrize("data", ["scipy.sparse.lil_matrix", "scipy.sparse.csr_matrix", "numpy"]) +@pytest.mark.parametrize("duplications", [False, True]) +def test_collapse_matrix( + data: str, + duplications: bool, +): + """ + Tests collapse_matrix. + + Tests if: + + - matrix type is maintained + - row sums are maintained + - feature dimension is correct + + :param data: Data format. + :param duplications: Whether feature names are duplicated. + :return: + """ + x = np.asarray(np.random.randint(0, 100, size=(10, 10)), dtype="float32") + if data == "scipy.sparse.lil_matrix": + x = scipy.sparse.lil_matrix(x) + elif data == "scipy.sparse.csr_matrix": + x = scipy.sparse.csr_matrix(x) + elif data == "numpy": + pass + else: + assert False + if duplications: + index = ["g" + str(i) for i in range(x.shape[1])] + else: + # Create triplicate and duplicate gene names: + index = ["g" + str(i) for i in range(2)] + ["g" + str(i) for i in range(3)] + \ + ["g" + str(i) for i in range(x.shape[1] - 3 - 2)] + adata = anndata.AnnData(x, var=pd.DataFrame(index=index)) + adata.var_names = index + adata2 = collapse_matrix(adata=adata) + assert adata.X.shape[0] == adata2.X.shape[0], "observation dimension mismatch" + assert adata.X.dtype == adata2.X.dtype, "type mismatch" + assert adata2.X.shape[1] == len(np.unique(adata.var_names)), "feature dimension mismatch" + assert np.all(np.asarray(adata.X.sum()).flatten() == np.asarray(adata2.X.sum().flatten())), \ + "total count mismatch" + assert np.all(np.asarray(adata.X.sum(axis=1)).flatten() == np.asarray(adata2.X.sum(axis=1).flatten())), \ + "observation-wise count mismatch" From 2b15f0b5c4e24d41f80580eb03b10b5370903d5c Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Tue, 9 Mar 2021 15:13:15 +0100 Subject: [PATCH 089/161] Support samples other than from primary tissue (#173) * deprecate adata fields `protocol` and `assay` in favour of `assay_sc` * merge AdataIdsBase and AdataIdsExtended into single class AdataIds [skip CI] * fix AdataIds [skip CI] * add sample_source property to existing dataloaders and additional fields to yaml dataloaders * implement new organoid ontology fields * introduce cellosaurus ontology constraint for cell_line field * fix flake8 --- docs/api/sfaira.data.DatasetBase.rst | 12 +- docs/api/sfaira.data.DatasetInteractive.rst | 12 +- docs/data.rst | 41 +++- sfaira/commands/create_dataloader.py | 14 +- sfaira/commands/lint_dataloader.py | 8 +- .../cookiecutter.json | 5 +- .../{{ cookiecutter.id_without_doi }}.py | 12 +- .../cookiecutter.json | 5 +- .../{{ cookiecutter.id_without_doi }}.py | 12 +- .../cookiecutter.json | 5 +- .../{{ cookiecutter.id_without_doi }}.py | 12 +- .../single_dataset/cookiecutter.json | 5 +- .../{{ cookiecutter.id_without_doi }}.py | 12 +- sfaira/consts/__init__.py | 2 +- sfaira/consts/adata_fields.py | 113 +++++----- sfaira/consts/meta_data_files.py | 6 +- sfaira/consts/ontologies.py | 8 +- sfaira/data/base/dataset.py | 195 +++++++++++++++--- sfaira/data/base/dataset_group.py | 18 +- .../databases/cellxgene/cellxgene_loader.py | 2 +- ...letoflangerhans_2017_smartseq2_enge_001.py | 3 +- .../mouse_x_2018_microwellseq_han_x.py | 3 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 12 +- ...pithelium_2019_10xsequencing_smilie_001.py | 4 +- ...man_ileum_2019_10xsequencing_martin_001.py | 3 +- ...stategland_2018_10xsequencing_henry_001.py | 3 +- .../human_pancreas_2016_indrop_baron_001.py | 3 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 3 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 3 +- ...uman_lung_2020_10xsequencing_miller_001.py | 3 +- .../human_brain_2017_droncseq_habib_001.py | 3 +- ...human_testis_2018_10xsequencing_guo_001.py | 3 +- ...liver_2018_10xsequencing_macparland_001.py | 3 +- .../human_kidney_2019_droncseq_lake_001.py | 3 +- .../human_x_2019_10xsequencing_szabo_001.py | 3 +- ...man_retina_2019_10xsequencing_menon_001.py | 3 +- .../human_placenta_2018_x_ventotormo_001.py | 3 +- .../human_liver_2019_celseq2_aizarani_001.py | 3 +- ...ver_2019_10xsequencing_ramachandran_001.py | 3 +- ...an_liver_2019_10xsequencing_popescu_001.py | 3 +- .../human_x_2020_microwellseq_han_x.py | 5 +- .../human_lung_2020_x_travaglini_001.yaml | 12 +- ...uman_colon_2020_10xsequencing_james_001.py | 3 +- .../human_lung_2019_dropseq_braga_001.py | 3 +- .../human_x_2019_10xsequencing_braga_x.py | 3 +- .../mouse_x_2019_10xsequencing_hove_001.py | 3 +- ...uman_kidney_2020_10xsequencing_liao_001.py | 3 +- ...man_retina_2019_10xsequencing_voigt_001.py | 3 +- .../human_x_2019_10xsequencing_wang_001.py | 3 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 3 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 3 +- ...nchyma_2020_10xsequencing_habermann_001.py | 3 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 3 +- ...uman_thymus_2020_10xsequencing_park_001.py | 3 +- .../human_x_2020_scirnaseq_cao_001.yaml | 12 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 3 +- ..._retina_2019_10xsequencing_lukowski_001.py | 3 +- ...lood_2019_10xsequencing_10xgenomics_001.py | 3 +- .../human_x_2018_10xsequencing_regev_001.py | 3 +- sfaira/data/interactive/loader.py | 12 +- sfaira/train/train_model.py | 6 +- sfaira/versions/metadata/__init__.py | 2 +- sfaira/versions/metadata/base.py | 42 ++++ 63 files changed, 537 insertions(+), 171 deletions(-) diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst index 7e8dd3be0..6237f0786 100644 --- a/docs/api/sfaira.data.DatasetBase.rst +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -36,10 +36,14 @@ ~DatasetBase.age ~DatasetBase.annotated + ~DatasetBase.assay_sc + ~DatasetBase.assay_differentiation + ~DatasetBase.assay_type_differentiation ~DatasetBase.author ~DatasetBase.citation ~DatasetBase.dev_stage ~DatasetBase.directory_formatted_doi + ~DatasetBase.cell_line ~DatasetBase.doi ~DatasetBase.doi_cleaned_id ~DatasetBase.download @@ -55,6 +59,10 @@ ~DatasetBase.ncells ~DatasetBase.normalization ~DatasetBase.obs_key_age + ~DatasetBase.obs_key_assay_sc + ~DatasetBase.obs_key_assay_differentiation + ~DatasetBase.obs_key_assay_type_differentiation + ~DatasetBase.obs_key_cell_line ~DatasetBase.obs_key_cellontology_id ~DatasetBase.obs_key_cellontology_original ~DatasetBase.obs_key_dev_stage @@ -62,16 +70,16 @@ ~DatasetBase.obs_key_healthy ~DatasetBase.obs_key_organ ~DatasetBase.obs_key_organism - ~DatasetBase.obs_key_protocol ~DatasetBase.obs_key_sample + ~DatasetBase.obs_key_sample_source ~DatasetBase.obs_key_sex ~DatasetBase.obs_key_state_exact ~DatasetBase.ontology_celltypes ~DatasetBase.ontology_class_map ~DatasetBase.organ ~DatasetBase.organism - ~DatasetBase.protocol ~DatasetBase.sex + ~DatasetBase.sample_source ~DatasetBase.source ~DatasetBase.state_exact ~DatasetBase.var_ensembl_col diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst index 0bb616838..ca64fd27a 100644 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -37,6 +37,10 @@ ~DatasetInteractive.age ~DatasetInteractive.annotated ~DatasetInteractive.author + ~DatasetInteractive.assay_sc + ~DatasetInteractive.assay_differentiation + ~DatasetInteractive.assay_type_differentiation + ~DatasetInteractive.cell_line ~DatasetInteractive.citation ~DatasetInteractive.dev_stage ~DatasetInteractive.directory_formatted_doi @@ -55,6 +59,10 @@ ~DatasetInteractive.ncells ~DatasetInteractive.normalization ~DatasetInteractive.obs_key_age + ~DatasetInteractive.obs_key_assay_sc + ~DatasetInteractive.obs_key_assay_differentiation + ~DatasetInteractive.obs_key_assay_type_differentiation + ~DatasetInteractive.obs_key_cell_line ~DatasetInteractive.obs_key_cellontology_id ~DatasetInteractive.obs_key_cellontology_original ~DatasetInteractive.obs_key_dev_stage @@ -62,16 +70,16 @@ ~DatasetInteractive.obs_key_healthy ~DatasetInteractive.obs_key_organ ~DatasetInteractive.obs_key_organism - ~DatasetInteractive.obs_key_protocol ~DatasetInteractive.obs_key_sample + ~DatasetInteractive.obs_key_sample_source ~DatasetInteractive.obs_key_sex ~DatasetInteractive.obs_key_state_exact ~DatasetInteractive.ontology_celltypes ~DatasetInteractive.ontology_class_map ~DatasetInteractive.organ ~DatasetInteractive.organism - ~DatasetInteractive.protocol ~DatasetInteractive.sex + ~DatasetInteractive.sample_source ~DatasetInteractive.source ~DatasetInteractive.state_exact ~DatasetInteractive.var_ensembl_col diff --git a/docs/data.rst b/docs/data.rst index 7bb8386cc..a3f2a1dc8 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -80,7 +80,7 @@ preprint and publication DOIs if both are available. We will also mention public The data loader python file ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Each data set (organsism, organ, protocol, optionally also batches) has its own data loader class. Each such class is +Each data set (organsism, organ, assay_sc, optionally also batches) has its own data loader class. Each such class is in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class looks very similar in parts to a cell in a juypter notebook that performs data loading. The core features that must be included are: @@ -102,7 +102,7 @@ before it is loaded into memory: # The meta data attributes labeled with (*) may als be supplied per cell, see below, # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - self.id = x # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). self.author = x # author (list) who sampled / created the data set self.doi = x # doi of data set accompanying manuscript @@ -111,13 +111,17 @@ before it is loaded into memory: self.download_url_meta = x # download website(s) of meta data files self.age = x # (*, optional) age of sample + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) self.dev_stage = x # (*, optional) developmental stage of organism self.ethnicity = x # (*, optional) ethnicity of sample self.healthy = x # (*, optional) whether sample represents a healthy organism self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") self.organ = x # (*, optional) organ (anatomical structure) self.organism = x # (*) species / organism - self.protocol = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture self.sex = x # (*, optional) sex self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample self.year = x # year in which sample was acquired @@ -133,7 +137,7 @@ before it is loaded into memory: self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. @@ -179,8 +183,9 @@ In summary, a simply example data loader for a mouse lung data set could look li self.normalisation = "raw" # because I uploaded raw counts, which is good practice! self.organ = "lung" self.organism = "mouse" - self.protocol = "smart-seq2" + self.assay_sc = "smart-seq2" self.year = "2020" + self.sample_source = "primary_tissue" self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here @@ -292,10 +297,13 @@ Metadata management We constrain meta data by ontologies where possible. The current restrictions are: - - .age: unconstrained string, try using units of years for human and units of months for mice + - .age: unconstrained string, try using units of years for human, units of months for mice and units of days for + cell culture samples - .dev_stage: unconstrained string, this will constrained to an ontology in the future, try choosing from HSAPDV (http://www.obofoundry.org/ontology/hsapdv.html) for human or from MMUSDEV (http://www.obofoundry.org/ontology/mmusdv.html) for mouse + - .cell_line: unconstrained string, this will be constrained to an ontology later. try choosing from cellosaurus + cell line database (https://web.expasy.org/cellosaurus/) - .ethnicity: unconstrained string, this will constrained to an ontology in the future, try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) - .healthy: bool @@ -307,8 +315,12 @@ We constrain meta data by ontologies where possible. The current restrictions ar or from EMAPA (http://www.obofoundry.org/ontology/emapa.html) for mouse - .organism: constrained string, {"mouse", "human"}. In the future, we will use NCBITAXON (http://www.obofoundry.org/ontology/ncbitaxon.html). - - .protocol: unconstrained string, this will constrained to an anatomic ontology in the future, + - .assay_sc: unconstrained string, this will constrained to an experimental protocol ontology in the future, try choosing a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false + - .assay_differentiation: unconstrained string, try to provide a base differentiation protocol (eg. Lancaster, 2014) + as well as any amendments to the original protocol + - .assay_type_differentiation: constrained string, {"guided", "unguided"} + - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "cancer"} - .sex: constrained string, {"female", "male"} - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity @@ -338,7 +350,7 @@ FAQ How is the dataset’s ID structured? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi +Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi How do I assemble the data set ID if some of its element meta data are not unique? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -357,7 +369,7 @@ a Dataset attribute contains the name of the `.obs` column that contains these c (e.g. self.obs_key_organism). Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. -Which meta data objects are optional? +Which meta data objects are mandatory? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): @@ -377,6 +389,8 @@ Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): Example: self.doi = "10.1016/j.cell.2019.06.029" - .organism (or .obs_key_organism): Organism sampled. Example: self.organism = “human” + - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture + Example: self.sample_source = "primary_tissue" Highly recommended: @@ -384,8 +398,8 @@ Highly recommended: Example: self.normalization = “raw” - .organ (or .obs_key_organ): Organ sampled. Example: self.organ = “liver” - - .protocol (or .obs_key_protocol): Protocol with which data was collected. - Example: self.protocol = “10x” + - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. + Example: self.assay_sc = “10x” Optional (if available): @@ -405,6 +419,11 @@ Optional (if available): Example: self.obs_key_cellontology_original = 'CellType' - .year: Year of publication: Example: self.year = 2019 + - .cell_line: Which cell line was used for the experiment (for cell culture samples) + Example: self.cell_line = "409B2 (CVCL_K092)" + - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) + - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided + (for cell culture samples) How do I cache data sets? ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py index c54fcccbb..f8b3e8c82 100644 --- a/sfaira/commands/create_dataloader.py +++ b/sfaira/commands/create_dataloader.py @@ -15,7 +15,7 @@ @dataclass class TemplateAttributes: dataloader_type: str = '' # One of single_dataset, multiple_datasets_single_file, multiple_datasets_streamlined, multiple_datasets_not_streamlined - id: str = '' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). + id: str = '' # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). id_without_doi: str = '' # complete id without the doi -> usually used to name the python scripts author: Union[str, list] = '' # author (list) who sampled / created the data set @@ -27,8 +27,9 @@ class TemplateAttributes: organ: str = '' # (*, optional) organ (anatomical structure) organism: str = '' # (*) species / organism - protocol: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) + assay_sc: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) year: str = 2021 # year in which sample was acquired + sample_source: str = '' # (*) whether the sample came from primary tissue or cell culture number_of_datasets: str = 1 # Required to determine the file names @@ -105,12 +106,15 @@ def _prompt_dataloader_configuration(self): self.template_attributes.organ = sfaira_questionary(function='text', question='Organ:', default='NA') - self.template_attributes.protocol = sfaira_questionary(function='text', - question='Protocol:', + self.template_attributes.assay_sc = sfaira_questionary(function='text', + question='AssaySc:', default='NA') self.template_attributes.year = sfaira_questionary(function='text', question='Year:', default='2021') + self.template_attributes.sample_source = sfaira_questionary(function='text', + question='SampleSource:', + default='NA') first_author = author[0] if isinstance(author, list) else author try: first_author_lastname = first_author.split(',')[0] @@ -118,7 +122,7 @@ def _prompt_dataloader_configuration(self): print('[bold yellow] First author was not in the expected format. Using full first author for the id.') first_author_lastname = first_author self.template_attributes.id_without_doi = f'{self.template_attributes.organism}_{self.template_attributes.organ}_' \ - f'{self.template_attributes.year}_{self.template_attributes.protocol}_' \ + f'{self.template_attributes.year}_{self.template_attributes.assay_sc}_' \ f'{first_author_lastname}_001' self.template_attributes.id = self.template_attributes.id_without_doi + f'_{self.template_attributes.doi_sfaira_repr}' self.template_attributes.download_url_data = sfaira_questionary(function='text', diff --git a/sfaira/commands/lint_dataloader.py b/sfaira/commands/lint_dataloader.py index d64d870cc..02705735c 100644 --- a/sfaira/commands/lint_dataloader.py +++ b/sfaira/commands/lint_dataloader.py @@ -90,15 +90,17 @@ def _lint_required_attributes(self): 'self.download_url_data', 'self.organ', 'self.organism', - 'self.protocol', - 'self.year'] + 'self.assay_sc', + 'self.year', + 'self.sample_source'] for attribute in attributes: try: line, attribute = list(filter(lambda line_attribute: line_attribute[1].startswith(attribute), enumerate(self.content)))[0] except IndexError: passed_required_attributes = False - self.failed['-1'] = 'One of required attributes set_dataset_id, author, doi, download_url_data, organ, organism, protocol, year is missing.' + self.failed['-1'] = 'One of required attributes set_dataset_id, author, doi, download_url_data, ' \ + 'organ, organism, assay_sc, year, sample_source is missing.' if passed_required_attributes: self.passed[0] = 'Passed required dataloader attributes checks.' diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json index 92c82b0ec..d0ccfd5ac 100644 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json @@ -9,6 +9,7 @@ "download_url_data": "", "organ": "", "organism": "", - "protocol": "", - "year": "" + "assay_sc": "", + "year": "", + "sample_source": "" } diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 128e8d722..5a33693ac 100644 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -27,9 +27,13 @@ def __init__( self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) self.year = {{cookiecutter.year}} # year in which sample was acquired + self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture # self.age = 'x' # (*, optional) age of sample + # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + # self.cell_line = x # (*, optional) cell line used (for cell culture samples) # self.dev_stage = x # (*, optional) developmental stage of organism # self.ethnicity = x # (*, optional) ethnicity of sample # self.healthy = x # (*, optional) whether sample represents a healthy organism @@ -41,12 +45,16 @@ def __init__( # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) + # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. diff --git a/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json index 92c82b0ec..d0ccfd5ac 100644 --- a/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json +++ b/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json @@ -9,6 +9,7 @@ "download_url_data": "", "organ": "", "organism": "", - "protocol": "", - "year": "" + "assay_sc": "", + "year": "", + "sample_source": "" } diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 9aaa6afc8..57a632eb6 100644 --- a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -36,9 +36,13 @@ def __init__( self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) self.year = {{ cookiecutter.year }} # year in which sample was acquired + self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture # self.age = 'x' # (*, optional) age of sample + # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + # self.cell_line = x # (*, optional) cell line used (for cell culture samples) # self.dev_stage = x # (*, optional) developmental stage of organism # self.ethnicity = x # (*, optional) ethnicity of sample # self.healthy = x # (*, optional) whether sample represents a healthy organism @@ -50,12 +54,16 @@ def __init__( # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) + # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json index 92c82b0ec..d0ccfd5ac 100644 --- a/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json +++ b/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json @@ -9,6 +9,7 @@ "download_url_data": "", "organ": "", "organism": "", - "protocol": "", - "year": "" + "assay_sc": "", + "year": "", + "sample_source": "" } diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 9e6f282bd..e8709b9bb 100644 --- a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -36,9 +36,13 @@ def __init__( self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) self.year = {{cookiecutter.year}} # year in which sample was acquired + self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture # self.age = 'x' # (*, optional) age of sample + # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + # self.cell_line = x # (*, optional) cell line used (for cell culture samples) # self.dev_stage = x # (*, optional) developmental stage of organism # self.ethnicity = x # (*, optional) ethnicity of sample # self.healthy = x # (*, optional) whether sample represents a healthy organism @@ -50,12 +54,16 @@ def __init__( # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) + # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. diff --git a/sfaira/commands/templates/single_dataset/cookiecutter.json b/sfaira/commands/templates/single_dataset/cookiecutter.json index 92c82b0ec..d0ccfd5ac 100644 --- a/sfaira/commands/templates/single_dataset/cookiecutter.json +++ b/sfaira/commands/templates/single_dataset/cookiecutter.json @@ -9,6 +9,7 @@ "download_url_data": "", "organ": "", "organism": "", - "protocol": "", - "year": "" + "assay_sc": "", + "year": "", + "sample_source": "" } diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index b445b3319..25cb8f9d4 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -27,9 +27,13 @@ def __init__( self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.protocol = '{{ cookiecutter.protocol }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) self.year = {{cookiecutter.year}} # year in which sample was acquired + self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture # self.age = 'x' # (*, optional) age of sample + # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + # self.cell_line = x # (*, optional) cell line used (for cell culture samples) # self.dev_stage = x # (*, optional) developmental stage of organism # self.ethnicity = x # (*, optional) ethnicity of sample # self.healthy = x # (*, optional) whether sample represents a healthy organism @@ -41,12 +45,16 @@ def __init__( # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) + # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_protocol = x # (optional, see above, do not provide if .protocol is provided) + # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py index 6a4f59ac7..c48140cbe 100644 --- a/sfaira/consts/__init__.py +++ b/sfaira/consts/__init__.py @@ -1,4 +1,4 @@ -from sfaira.consts.adata_fields import AdataIdsBase, AdataIdsExtended, AdataIdsSfaira, AdataIdsCellxgene +from sfaira.consts.adata_fields import AdataIds, AdataIdsSfaira, AdataIdsCellxgene from sfaira.consts.meta_data_files import META_DATA_FIELDS from sfaira.consts.ontologies import OntologyContainerSfaira diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 8a2145f4f..697e22dcc 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -6,44 +6,76 @@ """ -class AdataIdsBase: +class AdataIds: """ - Base class of minimal constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. + Base class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ + _age: str _annotated: str - _assay: str + _assay_sc: str + _assay_differentiation: str + _assay_type_differentiation: str _author: str + _bio_sample: str + _cell_line: str _cell_types_original: str _cell_ontology_class: str _cell_ontology_id: str + _development_stage: str _doi: str _download_url_data: str _download_url_meta: str _dataset: str _dataset_group: str + _ethnicity: str _gene_id_ensembl: str _gene_id_index: str _gene_id_names: str _healthy: str _id: str + _individual: str _ncells: str _normalization: str _organ: str _organism: str + _sample_source: str + _sex: str + _state_exact: str + _tech_sample: str _year: str + @property + def age(self) -> str: + return self._age + @property def annotated(self) -> str: return self._annotated @property - def assay(self) -> str: - return self._assay + def assay_sc(self) -> str: + return self._assay_sc + + @property + def assay_differentiation(self) -> str: + return self._assay_differentiation + + @property + def assay_type_differentiation(self) -> str: + return self._assay_type_differentiation @property def author(self) -> str: return self._author + @property + def bio_sample(self) -> str: + return self._bio_sample + + @property + def cell_line(self) -> str: + return self._cell_line + @property def cell_types_original(self) -> str: return self._cell_types_original @@ -64,6 +96,10 @@ def dataset(self) -> str: def dataset_group(self) -> str: return self._dataset_group + @property + def development_stage(self) -> str: + return self._development_stage + @property def doi(self) -> str: return self._doi @@ -76,6 +112,10 @@ def download_url_data(self) -> str: def download_url_meta(self) -> str: return self._download_url_meta + @property + def ethnicity(self) -> str: + return self._ethnicity + @property def gene_id_ensembl(self) -> str: return self._gene_id_ensembl @@ -100,6 +140,10 @@ def healthy(self) -> str: def id(self) -> str: return self._id + @property + def individual(self) -> str: + return self._individual + @property def ncells(self) -> str: return self._ncells @@ -113,46 +157,12 @@ def organ(self) -> str: return self._organ @property - def organism(self) -> str: # TODO refactor into organism + def organism(self) -> str: return self._organism @property - def year(self) -> str: - return self._year - - -class AdataIdsExtended(AdataIdsBase): - """ - Base class with extended set of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. - """ - _age: str - _bio_sample: str - _development_stage: str - _ethnicity: str - _individual: str - _sex: str - _state_exact: str - _tech_sample: str - - @property - def age(self) -> str: - return self._age - - @property - def bio_sample(self) -> str: - return self._bio_sample - - @property - def development_stage(self) -> str: - return self._development_stage - - @property - def ethnicity(self) -> str: - return self._ethnicity - - @property - def individual(self) -> str: - return self._individual + def sample_source(self) -> str: + return self._sample_source @property def sex(self) -> str: @@ -166,17 +176,25 @@ def state_exact(self) -> str: def tech_sample(self) -> str: return self._tech_sample + @property + def year(self) -> str: + return self._year + -class AdataIdsSfaira(AdataIdsExtended): +class AdataIdsSfaira(AdataIds): """ - Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. + Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in sfaira + dataloader objects. """ def __init__(self): self._annotated = "annotated" - self._assay = "assay" + self._assay_sc = "assay_sc" + self._assay_differentiation = "assay_differentiation" + self._assay_type_differentiation = "assay_type_differentiation" self._author = "author" self._bio_sample = "bio_sample" + self._cell_line = "cell_line" self._cell_types_original = "cell_types_original" self._cell_ontology_class = "cell_ontology_class" self._cell_ontology_id = "cell_ontology_id" @@ -195,6 +213,7 @@ def __init__(self): self._normalization = "normalization" self._organ = "organ" self._organism = "organism" + self._sample_source = "sample_source" self._tech_sample = "bio_sample" self._year = "year" @@ -228,7 +247,7 @@ def remove_gene_version(self) -> str: return self._remove_gene_version -class AdataIdsCellxgene(AdataIdsExtended): +class AdataIdsCellxgene(AdataIds): """ Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene objects. @@ -238,7 +257,7 @@ class AdataIdsCellxgene(AdataIdsExtended): accepted_file_names: List[str] def __init__(self): - self._assay = "assay" + self._assay_sc = "assay" self._cell_types_original = "free_annotation" self._cell_ontology_class = "cell_type" self._cell_ontology_id = "cell_type_ontology_term_id" diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index 0e203d5a5..f9073f56f 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -6,6 +6,7 @@ "annotated": bool, "author": str, "bio_sample": str, + "cell_line": str, "cell_ontology_class": str, "doi": str, "download_url_data": str, @@ -15,8 +16,11 @@ "ncells": int, "normalization": str, "organ": str, - "protocol": str, + "assay_sc": str, + "assay_differentiation": str, + "assay_type_differentiation": str, "organism": str, + "sample_source": str, "state_exact": str, "tech_sample": str, "year": int, diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 20b071ed2..a920652b5 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -1,6 +1,6 @@ from sfaira.versions.metadata import OntologyList, OntologyCelltypes from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMmusdv, \ - OntologySinglecellLibraryConstruction + OntologySinglecellLibraryConstruction, OntologyCellosaurus class OntologyContainerSfaira: @@ -9,7 +9,10 @@ class OntologyContainerSfaira: def __init__(self): self.age = None - self.assay = OntologySinglecellLibraryConstruction() + self.assay_sc = OntologySinglecellLibraryConstruction() + self.assay_differentiation = None + self.assay_type_differentiation = OntologyList(terms=["guided", "unguided"]) + self.cell_line = OntologyCellosaurus() self.cellontology_class = "v2021-02-01" self.cellontology_original = None self.developmental_stage = None @@ -20,6 +23,7 @@ def __init__(self): self.normalization = None self.organ = OntologyUberon() self.organism = OntologyList(terms=["mouse", "human"]) + self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "cancer"]) self.sex = OntologyList(terms=["female", "male"]) self.year = OntologyList(terms=list(range(2000, 3000))) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 631f9a52a..6a2212320 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -19,7 +19,7 @@ from sfaira.versions.genome_versions import SuperGenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse -from sfaira.consts import AdataIdsExtended, AdataIdsSfaira, META_DATA_FIELDS, OCS +from sfaira.consts import AdataIds, AdataIdsSfaira, META_DATA_FIELDS, OCS from sfaira.data.utils import collapse_matrix, read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -74,9 +74,12 @@ class DatasetBase(abc.ABC): genome: Union[None, str] _age: Union[None, str] - _assay: Union[None, str] + _assay_sc: Union[None, str] + _assay_differentiation: Union[None, str] + _assay_type_differentiation: Union[None, str] _author: Union[None, str] _bio_sample: Union[None, str] + _cell_line: Union[None, str] _development_stage: Union[None, str] _doi: Union[None, str] _download_url_data: Union[Tuple[List[None]], Tuple[List[str]], None] @@ -91,12 +94,16 @@ class DatasetBase(abc.ABC): _organism: Union[None, str] _sex: Union[None, str] _source: Union[None, str] + _sample_source: Union[None, str] _state_exact: Union[None, str] _bio_sample: Union[None, str] _year: Union[None, int] _age_obs_key: Union[None, str] - _assay_obs_key: Union[None, str] + _assay_sc_obs_key: Union[None, str] + _assay_differentiation_obs_key: Union[None, str] + _assay_type_differentiation_obs_key: Union[None, str] + _assay_cell_line_obs_key: Union[None, str] _cellontology_class_obs_key: Union[None, str] _cellontology_id_obs_key: Union[None, str] _cellontology_original_obs_key: Union[None, str] @@ -108,6 +115,7 @@ class DatasetBase(abc.ABC): _organ_obs_key: Union[None, str] _organism_obs_key: Union[None, str] _bio_sample_obs_key: Union[None, str] + _sample_source_obs_key: Union[None, str] _sex_obs_key: Union[None, str] _state_exact_obs_key: Union[None, str] _tech_sample_obs_key: Union[None, str] @@ -146,7 +154,11 @@ def __init__( self._age = None self._author = None + self._assay_sc = None + self._assay_differentiation = None + self._assay_type_differentiation = None self._bio_sample = None + self._cell_line = None self._development_stage = None self._doi = None self._download_url_data = None @@ -159,7 +171,7 @@ def __init__( self._normalization = None self._organ = None self._organism = None - self._assay = None + self._sample_source = None self._sex = None self._source = None self._state_exact = None @@ -167,6 +179,11 @@ def __init__( self._year = None self._age_obs_key = None + self._assay_sc_obs_key = None + self._assay_differentiation_obs_key = None + self._assay_type_differentiation_obs_key = None + self._bio_sample_obs_key = None + self._cell_line_obs_key = None self._cellontology_class_obs_key = None self._cellontology_id_obs_key = None self._cellontology_original_obs_key = None @@ -176,8 +193,7 @@ def __init__( self._individual_obs_key = None self._organ_obs_key = None self._organism_obs_key = None - self._assay_obs_key = None - self._bio_sample_obs_key = None + self._sample_source_obs_key = None self._sex_obs_key = None self._state_exact_obs_key = None self._tech_sample_obs_key = None @@ -548,7 +564,7 @@ def _match_features_to_reference(self): uns=self.adata.uns ) - def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): + def _set_metadata_in_adata(self, adata_ids: AdataIds): """ Copy meta data from dataset class in .anndata. @@ -568,8 +584,13 @@ def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): # These are saved in .uns if they are data set wide to save memory. for x, y, z, v in ( [self.age, adata_ids.age, self.age_obs_key, self.ontology_container_sfaira.age], - [self.assay, adata_ids.assay, self.assay_obs_key, self.ontology_container_sfaira.assay], + [self.assay_sc, adata_ids.assay_sc, self.assay_sc_obs_key, self.ontology_container_sfaira.assay_sc], + [self.assay_differentiation, adata_ids.assay_differentiation, self.assay_differentiation_obs_key, + self.ontology_container_sfaira.assay_differentiation], + [self.assay_type_differentiation, adata_ids.assay_type_differentiation, + self.assay_type_differentiation_obs_key, self.ontology_container_sfaira.assay_type_differentiation], [self.bio_sample, adata_ids.bio_sample, self.bio_sample_obs_key, None], + [self.cell_line, adata_ids.cell_line, self.cell_line_obs_key, adata_ids.cell_line], [self.development_stage, adata_ids.development_stage, self.development_stage_obs_key, self.ontology_container_sfaira.developmental_stage], [self.ethnicity, adata_ids.ethnicity, self.ethnicity_obs_key, @@ -579,6 +600,8 @@ def _set_metadata_in_adata(self, adata_ids: AdataIdsExtended): [self.organ, adata_ids.organ, self.organ_obs_key, self.ontology_container_sfaira.organism], [self.organism, adata_ids.organism, self.organism_obs_key, self.ontology_container_sfaira.organism], + [self.sample_source, adata_ids.sample_source, self.sample_source_obs_key, + self.ontology_container_sfaira.sample_source], [self.sex, adata_ids.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], [self.state_exact, adata_ids.state_exact, self.state_exact_obs_key, None], [self.tech_sample, adata_ids.tech_sample, self.tech_sample_obs_key, None], @@ -974,14 +997,18 @@ def write_meta( # Expand table by variably cell-wise or data set-wise meta data: for x in [ self._adata_ids_sfaira.age, - self._adata_ids_sfaira.assay, + self._adata_ids_sfaira.assay_sc, + self._adata_ids_sfaira.assay_differentiation, + self._adata_ids_sfaira.assay_type_differentiation, self._adata_ids_sfaira.bio_sample, + self._adata_ids_sfaira.cell_line, self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.ethnicity, self._adata_ids_sfaira.healthy, self._adata_ids_sfaira.individual, self._adata_ids_sfaira.organ, self._adata_ids_sfaira.organism, + self._adata_ids_sfaira.sample_source, self._adata_ids_sfaira.sex, self._adata_ids_sfaira.state_exact, self._adata_ids_sfaira.tech_sample, @@ -1022,7 +1049,7 @@ def clean(s): self.id = f"{clean(self._organism)}_" \ f"{clean(self._organ)}_" \ f"{self._year}_" \ - f"{clean(self._assay)}_" \ + f"{clean(self._assay_sc)}_" \ f"{clean(author)}_" \ f"{idx}_" \ f"{self.doi}" @@ -1065,23 +1092,60 @@ def annotated(self) -> Union[bool, None]: return None @property - def assay(self) -> Union[None, str]: - if self._assay is not None: - return self._assay + def assay_sc(self) -> Union[None, str]: + if self._assay_sc is not None: + return self._assay_sc else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.assay in self.meta.columns: - return self.meta[self._adata_ids_sfaira.assay] + if self.meta is not None and self._adata_ids_sfaira.assay_sc in self.meta.columns: + return self.meta[self._adata_ids_sfaira.assay_sc] else: return None - @assay.setter - def assay(self, x: str): - self.__erasing_protection(attr="protocol", val_old=self._assay, val_new=x) - self._value_protection(attr="protocol", allowed=self.ontology_container_sfaira.assay, - attempted=x) - self._assay = x + @assay_sc.setter + def assay_sc(self, x: str): + self.__erasing_protection(attr="assay_sc", val_old=self._assay_sc, val_new=x) + self._value_protection(attr="assay_sc", allowed=self.ontology_container_sfaira.assay_sc, attempted=x) + self._assay_sc = x + + @property + def assay_differentiation(self) -> Union[None, str]: + if self._assay_differentiation is not None: + return self._assay_differentiation + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.assay_differentiation in self.meta.columns: + return self.meta[self._adata_ids_sfaira.assay_differentiation] + else: + return None + + @assay_differentiation.setter + def assay_differentiation(self, x: str): + self.__erasing_protection(attr="assay_differentiation", val_old=self._assay_differentiation, val_new=x) + self._value_protection(attr="assay_differentiation", + allowed=self.ontology_container_sfaira.assay_differentiation, attempted=x) + self._assay_differentiation = x + + @property + def assay_type_differentiation(self) -> Union[None, str]: + if self._assay_type_differentiation is not None: + return self._assay_type_differentiation + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.assay_type_differentiation in self.meta.columns: + return self.meta[self._adata_ids_sfaira.assay_type_differentiation] + else: + return None + + @assay_type_differentiation.setter + def assay_type_differentiation(self, x: str): + self.__erasing_protection(attr="assay_type_differentiation", val_old=self._assay_type_differentiation, val_new=x) + self._value_protection(attr="assay_type_differentiation", + allowed=self.ontology_container_sfaira.assay_type_differentiation, attempted=x) + self._assay_type_differentiation = x @property def author(self) -> str: @@ -1116,6 +1180,23 @@ def bio_sample(self, x: str): self.__erasing_protection(attr="bio_sample", val_old=self._bio_sample, val_new=x) self._bio_sample = x + @property + def cell_line(self) -> Union[None, str]: + if self._cell_line is not None: + return self._cell_line + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.cell_line in self.meta.columns: + return self.meta[self._adata_ids_sfaira.cell_line] + else: + return None + + @cell_line.setter + def cell_line(self, x: str): + self.__erasing_protection(attr="cell_line", val_old=self._cell_line, val_new=x) + self._cell_line = x + @property def data_dir(self): # Data is either directly in user supplied directory or in a sub directory if the overall directory is managed @@ -1371,13 +1452,31 @@ def age_obs_key(self, x: str): self._age_obs_key = x @property - def assay_obs_key(self) -> str: - return self._assay_obs_key + def assay_sc_obs_key(self) -> str: + return self._assay_sc_obs_key + + @assay_sc_obs_key.setter + def assay_sc_obs_key(self, x: str): + self.__erasing_protection(attr="assay_sc_obs_key", val_old=self._assay_sc_obs_key, val_new=x) + self._assay_sc_obs_key = x + + @property + def assay_differentiation_obs_key(self) -> str: + return self._assay_differentiation_obs_key - @assay_obs_key.setter - def assay_obs_key(self, x: str): - self.__erasing_protection(attr="assay_obs_key", val_old=self._assay_obs_key, val_new=x) - self._assay_obs_key = x + @assay_differentiation_obs_key.setter + def assay_differentiation_obs_key(self, x: str): + self.__erasing_protection(attr="assay_differentiation_obs_key", val_old=self._assay_differentiation_obs_key, val_new=x) + self._assay_differentiation_obs_key = x + + @property + def assay_type_differentiation_obs_key(self) -> str: + return self._assay_type_differentiation_obs_key + + @assay_type_differentiation_obs_key.setter + def assay_type_differentiation_obs_key(self, x: str): + self.__erasing_protection(attr="assay_type_differentiation_otype_bs_key", val_old=self._assay_differentiation_obs_key, val_new=x) + self._assay_type_differentiation_obs_key = x @property def bio_sample_obs_key(self) -> str: @@ -1388,6 +1487,15 @@ def bio_sample_obs_key(self, x: str): self.__erasing_protection(attr="bio_sample_obs_key", val_old=self._bio_sample_obs_key, val_new=x) self._bio_sample_obs_key = x + @property + def cell_line_obs_key(self) -> str: + return self._cell_line_obs_key + + @cell_line_obs_key.setter + def cell_line_obs_key(self, x: str): + self.__erasing_protection(attr="cell_line_obs_key", val_old=self._cell_line_obs_key, val_new=x) + self._cell_line_obs_key = x + @property def cellontology_class_obs_key(self) -> str: return self._cellontology_class_obs_key @@ -1471,6 +1579,15 @@ def organism_obs_key(self, x: str): self.__erasing_protection(attr="organism_obs_key", val_old=self._organism_obs_key, val_new=x) self._organism_obs_key = x + @property + def sample_source_obs_key(self) -> str: + return self._sample_source_obs_key + + @sample_source_obs_key.setter + def sample_source_obs_key(self, x: str): + self.__erasing_protection(attr="sample_source_obs_key", val_old=self._sample_source_obs_key, val_new=x) + self._sample_source_obs_key = x + @property def sex_obs_key(self) -> str: return self._sex_obs_key @@ -1534,6 +1651,24 @@ def organism(self, x: str): self._value_protection(attr="organism", allowed=self.ontology_container_sfaira.organism, attempted=x) self._organism = x + @property + def sample_source(self) -> Union[None, str]: + if self._sample_source is not None: + return self._sample_source + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.sample_source in self.meta.columns: + return self.meta[self._adata_ids_sfaira.sample_source] + else: + return None + + @sample_source.setter + def sample_source(self, x: str): + self.__erasing_protection(attr="sample_source", val_old=self._sample_source, val_new=x) + self._value_protection(attr="sample_source", allowed=self.ontology_container_sfaira.sample_source, attempted=x) + self._sample_source = x + @property def sex(self) -> Union[None, str]: if self._sex is not None: @@ -1716,13 +1851,17 @@ def subset_cells(self, key, values): :param key: Property to subset by. Options: - "age" points to self.age_obs_key - - "assay" points to self.assay_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key - "sex" points to self.sex_obs_key - "state_exact" points to self.state_exact_obs_key :param values: Classes to overlap to. diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 204674717..746ba88d1 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -433,13 +433,17 @@ def subset_cells(self, key, values: Union[str, List[str]]): :param key: Property to subset by. Options: - "age" points to self.age_obs_key - - "assay" points to self.assay_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key - "sex" points to self.sex_obs_key - "state_exact" points to self.state_exact_obs_key :param values: Classes to overlap to. @@ -788,13 +792,17 @@ def load_tobacked( self.adata.X = X keys = [ self._adata_ids_sfaira.annotated, - self._adata_ids_sfaira.assay, + self._adata_ids_sfaira.assay_sc, + self._adata_ids_sfaira.assay_differentiation, + self._adata_ids_sfaira.assay_type_differentiation, self._adata_ids_sfaira.author, + self._adata_ids_sfaira.cell_line, self._adata_ids_sfaira.dataset, self._adata_ids_sfaira.cell_ontology_class, self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.normalization, self._adata_ids_sfaira.organ, + self._adata_ids_sfaira.sample_type, self._adata_ids_sfaira.state_exact, self._adata_ids_sfaira.year, ] @@ -891,13 +899,17 @@ def subset_cells(self, key, values: Union[str, List[str]]): :param key: Property to subset by. Options: - "age" points to self.age_obs_key - - "assay" points to self.assay_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key - "sex" points to self.sex_obs_key - "state_exact" points to self.state_exact_obs_key :param values: Classes to overlap to. diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index 6d497c2ae..f4d358d29 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -66,5 +66,5 @@ def _load(self): self.organ = str(self.fn).split("_")[3] # TODO interface this properly # self.organ = adata.obs["tissue"].values[0] self.organism = adata.obs[self._adata_ids_cellxgene.organism].values[0] - self.protocol = adata.obs[self._adata_ids_cellxgene.assay].values[0] + self.assay_sc = adata.obs[self._adata_ids_cellxgene.assay_sc].values[0] self.year = adata.uns[self._adata_ids_cellxgene.year] diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index 9b21d28b6..4a6562bbe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -20,13 +20,14 @@ def __init__(self, **kwargs): self.doi = "10.1016/j.cell.2017.09.004" self.healthy = True self.normalization = "raw" - self.protocol = "Smart-seq2" + self.assay_sc = "Smart-seq2" self.organ = "islet of Langerhans" self.organism = "human" self.state_exact = "healthy" self.year = 2017 self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" + self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index db3a4daba..3a6e604a3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -304,9 +304,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.healthy = True self.organism = "mouse" - self.protocol = "microwell-seq" + self.assay_sc = "microwell-seq" self.state_exact = "healthy" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index 5015bbd13..8c0865c7b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -19,10 +19,16 @@ dataset_wise: dataset_or_observation_wise: age: age_obs_key: "Age" - assay: "10X sequencing" - assay_obs_key: + assay_sc: "10X sequencing" + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: bio_sample: bio_sample_obs_key: + cell_line: + cell_line_obs_key: development_stage: development_stage_obs_key: ethnicity: @@ -35,6 +41,8 @@ dataset_or_observation_wise: organ_obs_key: organism: "human" organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: sex: sex_obs_key: "Sex" state_exact: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index a0dece529..35a859794 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -19,12 +19,12 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "colonic epithelium" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 - self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" + self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index a732e7f8f..41efd5bc5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -19,12 +19,13 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "ileum" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" self.cellontology_original_obs_key = "CellType" + self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index 328c92995..85d7b9680 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -25,8 +25,9 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.organ = "prostate gland" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index ba51fac79..9e4292cb6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -22,9 +22,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "human" - self.protocol = "inDrop" + self.assay_sc = "inDrop" self.state_exact = "healthy" self.year = 2016 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index a6d85b5f6..c09b1c1bd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -20,8 +20,9 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "human" - self.protocol = "Smart-seq2" + self.assay_sc = "Smart-seq2" self.year = 2016 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index c79eeea5a..42db715c5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -31,9 +31,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "diabetic" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltypes" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index fe1097067..ad68f67fd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -19,9 +19,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2020 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "Cell_type" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index cb6c7e413..85b03be11 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -19,9 +19,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "brain" self.organism = "human" - self.protocol = "DroNc-seq" + self.assay_sc = "DroNc-seq" self.state_exact = "healthy" self.year = 2017 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 58459a2fd..5080b35b8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -19,9 +19,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "testis" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index a140306a4..4b5c84fe6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "caudate lobe of liver" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 558d5e99a..9e6fe338d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -20,9 +20,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.protocol = "DroNc-seq" + self.assay_sc = "DroNc-seq" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index fb894d478..abb8bc000 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -61,10 +61,11 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = SAMPLE_DICT[self.sample_fn][0] self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = SAMPLE_DICT[self.sample_fn][2] self.healthy = SAMPLE_DICT[self.sample_fn][2] == "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "Gene" self.var_ensembl_col = "Accession" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 4344c2d0d..306b32d31 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -17,9 +17,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "retina" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 3e9f3e8c2..66fe750f5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -25,9 +25,10 @@ def __init__(self, **kwargs): self.organ = "placenta" self.organism = "human" self.doi = "10.1038/s41586-018-0698-6" - self.protocol = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" + self.assay_sc = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" self.state_exact = "healthy" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py index 89087d9c1..b0720dd5d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "CEL-seq2" + self.assay_sc = "CEL-seq2" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 6ec35942e..aaa209e7f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -15,8 +15,9 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index a652ef6fc..eaeb10890 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -17,9 +17,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "cell.labels" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 0f9d135c4..8fe73a14d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -23,9 +23,10 @@ def __init__(self, **kwargs): self.healthy = True self.normalization = "raw" self.organism = "human" - self.protocol = "microwell-seq" + self.assay_sc = "microwell-seq" self.state_exact = "healthy" self.year = 2020 + self.sample_source = "primary_tissue" self.bio_sample_obs_key = "sample" self.cellontology_original_obs_key = "celltype_specific" @@ -203,7 +204,7 @@ def load(data_dir, **kwargs): # tidy up the column names of the obs annotations adata.obs.columns = [ "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", - "age", "celltype_specific", "cluster_specific", "gender", "protocol", "source"] + "age", "celltype_specific", "cluster_specific", "gender", "assay_sc", "source"] adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index 7cbbab6e1..c2f7e9626 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -17,14 +17,20 @@ dataset_wise: dataset_or_observation_wise: age: age_obs_key: - assay: + assay_sc: droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10X sequencing" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "Smart-seq2" - assay_obs_key: + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: bio_sample: bio_sample_obs_key: droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "orig.ident*sample*magnetic.selection" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "patient*sample" + cell_line: + cell_line_obs_key: development_stage: development_stage_obs_key: ethnicity: @@ -37,6 +43,8 @@ dataset_or_observation_wise: organ_obs_key: organism: "human" organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: sex: sex_obs_key: state_exact: "healthy" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index f38b1105a..82cfefc99 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -19,9 +19,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "colon" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2020 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index bbf6cc8c6..ba992d30e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "Drop-seq" + self.assay_sc = "Drop-seq" self.state_exact = "uninvolved areas of tumour resection material" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index 406b81122..178073dad 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -22,10 +22,11 @@ def __init__(self, **kwargs): self.healthy = True self.organ = "bronchus" if self.sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 self.normalization = "norm" + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index 694bbc303..b5fce142e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -21,9 +21,10 @@ def __init__(self, **kwargs): self.healthy = True self.normalization = "raw" self.organism = "mouse" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.bio_sample_obs_key = "sample" self.cellontology_original_obs_key = "cluster" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index c873b84ca..38268662e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -20,10 +20,11 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2020 self.doi = "10.1038/s41597-019-0351-8" + self.sample_source = "primary_tissue" self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index 984672017..e7217c49d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organ = "retina" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 8ae0e23a0..1a71d7dbd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -27,9 +27,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = organ self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 6414f9197..08087c757 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -24,9 +24,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2020 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 9caa27439..7f926341f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -88,9 +88,10 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organism = "mouse" self.organ = organ - self.protocol = "10X sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" + self.assay_sc = "10X sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_ensembl_col = None self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 177ddabce..76c3916f9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -21,8 +21,9 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung parenchyma" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.year = 2020 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index c030fcd9c..4d43a8cf7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -21,9 +21,10 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organ = "kidney" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "ID" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index b0fcc5b7e..68830d195 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organ = "thymus" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2020 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "Anno_level_fig1" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml index d4b120761..fa2c1945f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml @@ -13,10 +13,16 @@ dataset_wise: dataset_or_observation_wise: age: age_obs_key: "Age" - assay: "sci-RNA-seq" - assay_obs_key: + assay_sc: "sci-RNA-seq" + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: bio_sample: bio_sample_obs_key: + cell_line: + cell_line_obs_key: development_stage: development_stage_obs_key: "Development_day" ethnicity: @@ -29,6 +35,8 @@ dataset_or_observation_wise: organ_obs_key: "Organ" organism: "human" organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: sex: sex_obs_key: "Sex" state_exact: diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index d2e8cc534..18e848698 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -39,9 +39,10 @@ def __init__(self, **kwargs): self.organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "Celltypes" diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index 3d61bde9f..74dab7473 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -20,9 +20,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "retina" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index 9c13411d7..46040dfdf 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -21,9 +21,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "blood" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2019 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 34d6a21ea..7d68ca517 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -18,9 +18,10 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ_obs_key = "derived_organ_parts_label" self.organism = "human" - self.protocol = "10X sequencing" + self.assay_sc = "10X sequencing" self.state_exact = "healthy" self.year = 2018 + self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "Accession" diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 742e3cad3..1eecb32eb 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -45,13 +45,17 @@ def __init__( self.download_url_meta = "." # self.age # not currently supported + # self.assay_sc # not currently supported + # self.assay_differentiation # not currently supported + # self.assay_type_differentiation # not currently supported + # self.cell_line # not currently supported # self.dev_stage # not currently supported # self.ethnicity # not currently supported # self.healthy # not currently supported # self.normalisation # not currently supported self.organ = organ self.organism = organism - # self.protocol # not currently supported + # self.sample_source # not currently supported # self.sex # not currently supported # self.state_exact # not currently supported # self.year # not currently supported @@ -59,12 +63,16 @@ def __init__( self.obs_key_cellontology_original = obs_key_celltypes # self.obs_key_age # not currently supported + # self.obs_key_assay_sc # not currently supported + # self.obs_key_assay_differentiation # not currently supported + # self.obs_key_assay_type_differentiation # not currently supported + # self.obs_key_cell_line # not currently supported # self.obs_key_dev_stage # not currently supported # self.obs_key_ethnicity # not currently supported # self.obs_key_healthy # not currently supported # self.obs_key_organ # not currently supported # self.obs_key_organism # not currently supported - # self.obs_key_protocol # not currently supported + # self.obs_key_sample_source # not currently supported # self.obs_key_sex # not currently supported # self.obs_key_state_exact # not currently supported diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index fa845dbb0..cdfe6dd09 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -294,7 +294,8 @@ def _save_specific( """ embedding = self.estimator.predict_embedding() df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "protocol"] + ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "assay_sc", + "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] ] df_summary["ncounts"] = np.asarray( self.estimator.data.X[np.sort(self.estimator.idx_test), :].sum(axis=1)[np.argsort(self.estimator.idx_test)] @@ -366,7 +367,8 @@ def _save_specific( ytrue = self.estimator.ytrue() yhat = self.estimator.predict() df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "protocol"] + ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "assay_sc", + "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] ] df_summary["ncounts"] = np.asarray(self.estimator.data.X[self.estimator.idx_test, :].sum(axis=1)).flatten() np.save(file=fn + "_ytrue", arr=ytrue) diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index b221c7ed6..349707146 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,4 +1,4 @@ from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyObo, \ OntologyCelltypes, OntologyUberon, OntologyHancestro, OntologyHsapdv, OntologyMmusdv, \ - OntologySinglecellLibraryConstruction + OntologySinglecellLibraryConstruction, OntologyCellosaurus from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index c9b97c33c..e1f50bed4 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -668,6 +668,48 @@ def synonym_node_properties(self) -> List[str]: return ["synonym"] +class OntologyCellosaurus(OntologyExtendedObo): + + def __init__( + self, + **kwargs + ): + download_link = "https://ftp.expasy.org/databases/cellosaurus/cellosaurus.obo" + + if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet + super().__init__(obo=download_link) + else: + # Identify cache: + folder = FILE_PATH.split(os.sep)[:-4] + folder.insert(1, os.sep) + ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cellosaurus") + fn = "cellosaurus.obo" + obofile = os.path.join(ontology_cache_dir, fn) + # Download if necessary: + if not os.path.isfile(obofile): + def download_cl(): + print(f"Downloading: {fn}") + if not os.path.exists(ontology_cache_dir): + os.makedirs(ontology_cache_dir) + r = requests.get(download_link, allow_redirects=True) + open(obofile, 'wb').write(r.content) + download_cl() + super().__init__(obo=obofile) + + # Clean up nodes: + # edge_types = ["derived_from", "originate_from_same_individual_as"] + nodes_to_delete = [] + for k, v in self.graph.nodes.items(): + if "name" not in v.keys(): + nodes_to_delete.append(k) + for k in nodes_to_delete: + self.graph.remove_node(k) + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonym"] + + class OntologySinglecellLibraryConstruction(OntologyEbi): def __init__( From 945f0a9f6dc3b558697cefbf19662268de7a3e87 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 11 Mar 2021 12:29:28 +0100 Subject: [PATCH 090/161] Add organoid datasets (#178) * support special character ' in assay_sc field * add kanton organoid datasets * add doi to kanton dataset * write kanton load function * fix cell line ontology reading [skip CI] * rename cancer to tumor [skip CI] * map kanton cell lines to ontology [skip CI] * fix dataset streamlining [skip CI] * Revert "fix dataset streamlining" This reverts commit 1608f4362eb44656b362c449107458e1ce06557a. * temporarily remove kanton celltype annotation [skip ci] --- sfaira/consts/ontologies.py | 2 +- sfaira/data/base/dataset.py | 5 +- .../d10_1038_s41586_019_1654_9/__init__.py | 1 + ..._brain_2019_10x3v2sequencing_kanton_001.py | 37 ++++++++++++ ...rain_2019_10x3v2sequencing_kanton_001.yaml | 57 +++++++++++++++++++ .../create_anatomical_configs.py | 2 +- 6 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index a920652b5..bdacbfe34 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -23,7 +23,7 @@ def __init__(self): self.normalization = None self.organ = OntologyUberon() self.organism = OntologyList(terms=["mouse", "human"]) - self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "cancer"]) + self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "tumor"]) self.sex = OntologyList(terms=["female", "male"]) self.year = OntologyList(terms=list(range(2000, 3000))) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 6a2212320..d780d2cdb 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -590,7 +590,8 @@ def _set_metadata_in_adata(self, adata_ids: AdataIds): [self.assay_type_differentiation, adata_ids.assay_type_differentiation, self.assay_type_differentiation_obs_key, self.ontology_container_sfaira.assay_type_differentiation], [self.bio_sample, adata_ids.bio_sample, self.bio_sample_obs_key, None], - [self.cell_line, adata_ids.cell_line, self.cell_line_obs_key, adata_ids.cell_line], + [self.cell_line, adata_ids.cell_line, self.cell_line_obs_key, + self.ontology_container_sfaira.cell_line], [self.development_stage, adata_ids.development_stage, self.development_stage_obs_key, self.ontology_container_sfaira.developmental_stage], [self.ethnicity, adata_ids.ethnicity, self.ethnicity_obs_key, @@ -1032,7 +1033,7 @@ def set_dataset_id( ): def clean(s): if s is not None: - s = s.replace(' ', '').replace('-', '').replace('_', '').lower() + s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() return s if self.sample_fn is not None: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/__init__.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py new file mode 100644 index 000000000..3e0f9756b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py @@ -0,0 +1,37 @@ +import anndata +import os +import scipy.io +import zipfile +import pandas + + +def load(data_dir, **kwargs): + + cell_line_dict = { + '409b2': '409B2', + 'H9': 'WA09', + 'Wibj2': 'HPSI0214i-wibj_2', + 'Sc102a1': 'SC102A-1', + 'Kucg2': 'HPSI0214i-kucg_2', + 'Hoik1': 'HPSI0314i-hoik_1', + 'Sojd3': 'HPSI0314i-sojd_3', + } + + fn = [ + os.path.join(data_dir, "E-MTAB-7552.processed.3.zip"), + os.path.join(data_dir, "E-MTAB-7552.processed.1.zip"), + os.path.join(data_dir, "E-MTAB-7552.processed.7.zip") + ] + with zipfile.ZipFile(fn[0]) as archive: + x = scipy.io.mmread(archive.open('human_cell_counts_GRCh38.mtx')).T.tocsr() + with zipfile.ZipFile(fn[1]) as archive: + var = pandas.read_csv(archive.open('genes_GRCh38.txt'), sep="\t", index_col=1, names=['ensembl', 'genetype']) + with zipfile.ZipFile(fn[2]) as archive: + obs = pandas.read_csv(archive.open('metadata_human_cells.tsv'), sep="\t", index_col=0) + adata = anndata.AnnData(X=x, var=var, obs=obs) + + adata.obs["Line"] = [cell_line_dict[x] for x in adata.obs["Line"]] + + # TODO: remove non-protein coding genes? + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml new file mode 100644 index 000000000..455de3e9c --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -0,0 +1,57 @@ +dataset_structure: + dataset_index: 1 + sample_fns: +dataset_wise: + author: + - "Kanton" + doi: + - "10.1038/s41586-019-1654-9" + download_url_data: + - "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7552/E-MTAB-7552.processed.3.zip" + download_url_meta: + - "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7552/E-MTAB-7552.processed.1.zip" + - "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7552/E-MTAB-7552.processed.7.zip" + normalization: "raw" + year: 2019 +dataset_or_observation_wise: + age: + age_obs_key: + assay_sc: "10X 3' v2 sequencing" + assay_sc_obs_key: + assay_differentiation: "Lancaster, 2014 (doi: 10.1038/nprot.2014.158)" + assay_differentiation_obs_key: + assay_type_differentiation: "unguided" + assay_type_differentiation_obs_key: + bio_sample: + bio_sample_obs_key: "Sample" + cell_line: + cell_line_obs_key: "Line" + development_stage: + development_stage_obs_key: "Stage" + ethnicity: + ethnicity_obs_key: + healthy: True + healthy_obs_key: + individual: + individual_obs_key: + organ: "brain" + organ_obs_key: + organism: "human" + organism_obs_key: + sample_source: "3d_culture" + sample_source_obs_key: + sex: + sex_obs_key: + state_exact: "healthy" + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: +observation_wise: + cellontology_original_obs_key: None # TODO: figure out which celltype labels to add here +feature_wise: + var_ensembl_col: "ensembl" + var_symbol_col: "index" +misc: + healthy_state_healthy: +meta: + version: "1.0" diff --git a/sfaira/data/utils_scripts/create_anatomical_configs.py b/sfaira/data/utils_scripts/create_anatomical_configs.py index 34b84bf6f..7ccb94900 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs.py @@ -15,7 +15,7 @@ def clean(s): if s is not None: - s = s.replace(' ', '').replace('-', '').replace('_', '').lower() + s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() return s From 09f372c3b1b8d6992d38d04dea6c99f102b53496 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Thu, 11 Mar 2021 16:42:26 +0100 Subject: [PATCH 091/161] Moving the CLI to yaml based dataloaders (#182) * first draft of yaml creation Signed-off-by: Zethson * add extra_description.txt to template Signed-off-by: Zethson * fix template creation wf Signed-off-by: Zethson * add new clean and lint Signed-off-by: Zethson * add adding datasets documentation Signed-off-by: Zethson * fix extra_description.txt content Signed-off-by: Zethson * remove clean and test dataloader docs Signed-off-by: Zethson * add multiple datasets template Signed-off-by: Zethson * fix create template wf Signed-off-by: Zethson * fix YAML structure Signed-off-by: Zethson * add lint for multiple datasets Signed-off-by: Zethson --- .bandit.yml | 2 +- .github/workflows/create_templates.yml | 20 +- .github/workflows/run_bandit.yml | 2 +- docs/adding_datasets.rst | 305 +++++++++++++++++ docs/data.rst | 306 ------------------ docs/index.rst | 1 + requirements.txt | 5 +- setup.cfg | 2 +- sfaira/cli.py | 4 +- sfaira/commands/clean_dataloader.py | 45 +-- sfaira/commands/create_dataloader.py | 96 +++--- sfaira/commands/lint_dataloader.py | 101 ++---- .../cookiecutter.json | 9 +- .../hooks}/__init__.py | 0 .../hooks/post_gen_project.py | 15 + .../__init__.py | 0 .../extra_description.txt} | 0 .../{{ cookiecutter.id_without_doi }}.py | 11 + .../{{ cookiecutter.id_without_doi }}.yaml | 58 ++++ .../{{ cookiecutter.id_without_doi }}.py | 67 ---- .../cookiecutter.json | 15 - .../{{ cookiecutter.id_without_doi }}.py | 79 ----- .../cookiecutter.json | 15 - .../{{ cookiecutter.id_without_doi }}.py | 76 ----- .../single_dataset/cookiecutter.json | 9 +- .../single_dataset/hooks/__init__.py | 0 .../single_dataset/hooks/post_gen_project.py | 15 + .../extra_description.txt | 0 .../{{ cookiecutter.id_without_doi }}.py | 70 +--- .../{{ cookiecutter.id_without_doi }}.yaml | 44 +++ sfaira/commands/test_dataloader.py | 9 +- 31 files changed, 566 insertions(+), 815 deletions(-) create mode 100644 docs/adding_datasets.rst rename sfaira/commands/templates/{multiple_datasets_not_streamlined => multiple_datasets}/cookiecutter.json (58%) rename sfaira/commands/templates/{multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }} => multiple_datasets/hooks}/__init__.py (100%) create mode 100644 sfaira/commands/templates/multiple_datasets/hooks/post_gen_project.py rename sfaira/commands/templates/{multiple_datasets_single_file => multiple_datasets}/{{ cookiecutter.doi_sfaira_repr }}/__init__.py (100%) rename sfaira/commands/templates/{multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py => multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt} (100%) create mode 100644 sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml delete mode 100644 sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py delete mode 100644 sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json delete mode 100644 sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py delete mode 100644 sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json delete mode 100644 sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py create mode 100644 sfaira/commands/templates/single_dataset/hooks/__init__.py create mode 100644 sfaira/commands/templates/single_dataset/hooks/post_gen_project.py create mode 100644 sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt create mode 100644 sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml diff --git a/.bandit.yml b/.bandit.yml index 21892526a..21917d362 100644 --- a/.bandit.yml +++ b/.bandit.yml @@ -4,4 +4,4 @@ tests: [] # (optional) list skipped tests here: -skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310'] +skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506'] diff --git a/.github/workflows/create_templates.yml b/.github/workflows/create_templates.yml index 62868a1fd..d6b62c024 100644 --- a/.github/workflows/create_templates.yml +++ b/.github/workflows/create_templates.yml @@ -31,23 +31,5 @@ jobs: - name: Create single_dataset template run: | cd .. - echo -e "\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader - rm -rf d10_1000_j_journal_2021_01_001/ - - - name: Create multiple_datasets_single_file template - run: | - cd .. - echo -e "\033[B\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader - rm -rf d10_1000_j_journal_2021_01_001/ - - - name: Create multiple_datasets_streamlined template - run: | - cd .. - echo -e "\033[B\n\033[B\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader - rm -rf d10_1000_j_journal_2021_01_001/ - - - name: Create multiple_datasets_not_streamlined template - run: | - cd .. - echo -e "\033[B\n\033[B\n\033[B\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + echo -e "\n\n\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader rm -rf d10_1000_j_journal_2021_01_001/ diff --git a/.github/workflows/run_bandit.yml b/.github/workflows/run_bandit.yml index 95719c26f..ddef63684 100644 --- a/.github/workflows/run_bandit.yml +++ b/.github/workflows/run_bandit.yml @@ -9,7 +9,7 @@ on: - "**/*.py" jobs: - build: + run-bandit: runs-on: ubuntu-latest if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst new file mode 100644 index 000000000..8a2ccc1bf --- /dev/null +++ b/docs/adding_datasets.rst @@ -0,0 +1,305 @@ +Adding data sets +=================== + +Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. +This process requires a couple of steps as outlined in the following sections. + + 1. Write a dataloader as outlined below. + 2. Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as required by your data loader. + 3. You can contribute the data loader to public sfaira, we do not manage data upload though. + During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. + +The following sections will first describe the underlying design principles of sfaira dataloaders and +then explain how to interactively create, validate and test dataloaders. + +Use data loaders on existing data repository +-------------------------------------------- + +You only want to use data sets with existing data loaders and have adapted your directory structure as above? +In that case, you can immediately start using the data loader functions, you just need to supply the root directory +of the directory structure as `path to the constructor of the class that you are using. +Depending on the functionalities you want to use, you would often want to create a directory with cached meta data +first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to +anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. + +Writing dataloaders +--------------------- + +The study-centric data loader module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the sfaira code, data loaders are organised into directories, which correspond to publications. +All data loaders corresponding to data sets of one study are grouped into this directory. +Next, each data set is represented by one data loader python file in this directory. +See below for more complex set ups with repetitive data loader code. + +Check that the data loader was not already implemented +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We will open issues for all planned data loaders, so you can search both the code_ base and our GitHub issues_ for +matching data loaders before you start writing one. +The core data loader identified is the directory compatible doi, +which is the doi with all special characters replaced by "_" and a "d" prefix is used: +"10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". +Searching for this string should yield a match if it is already implemented, take care to look for both +preprint and publication DOIs if both are available. We will also mention publication names in issues, you will however not find these in the code. + +.. _code: https://github.com/theislab/sfaira/tree/dev +.. _issues: https://github.com/theislab/sfaira/issues + + +The data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each data set (organsism, organ, assay_sc, optionally also batches) has its own data loader class. Each such class is +in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class +looks very similar in parts to a cell in a juypter notebook that performs data loading. The core features that must be included are: + +1. A constructor of the following form that can be used to interact with the data set +before it is loaded into memory: + +.. code-block:: python + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files + + self.age = x # (*, optional) age of sample + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cellontology_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and + # directory as the python file of this data loader (see below). + + +2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. + +.. code-block:: python + + def _load(self, fn=None): + # assuming that i uploaded an h5ad somewhere (in self.download) + if fn is None: + fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") + self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad + # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. + + +In summary, a simply example data loader for a mouse lung data set could look like this: + +.. code-block:: python + + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.author = "me" + self.doi = "my preprint" + self.download_url_data = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.assay_sc = "smart-seq2" + self.year = "2020" + self.sample_source = "primary_tissue" + + self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here + + def _load(self, fn=None): + # assuming that i uploaded an h5ad somewhere (in self.download) + if fn is None: + fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") + self.adata = anndata.read(fn) + + +Data loaders can be added into a copy of the sfaira repository and can be used locally before they are contributed to +the public sfaira repository. +Alternatively, we also provide the optional dependency sfaira_extensions (https://github.com/theislab/sfaira_extension) +in which local data and cell type annotation can be managed separately but still be loaded as usual through sfaira. +The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily +copied over. + +Handling multiple data sources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have multiple data sets in a study which are all saved in separate files which come in similar formats: +You can subclass `DatasetBaseGroupLoadingManyFiles` instead of `DatasetBase` and proceed as usual, +only with adding `SAMPLE_FNS` in the data loader file name space, +which is a list of all file names addressed with this file. +You can then refer to an additional property of the Dataset class, `self.sample_fn` during loading +or when dynamically defining meta data in the constructor. +Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. + +If you have multiple data sets in a study which are all saved in one file: +You can subclass `DatasetBaseGroupLoadingOneFile` instead of `DatasetBase` and proceed as usual, +only with adding `SAMPLE_IDS` in the data loader file name space, +which is a list of all sample IDs addressed with this file. +You can then refer to an additional property of the Dataset class, `self.sample_id` during loading +or when dynamically defining meta data in the constructor. +Note that `self.sample_id` refers to a `self.adata.obs` column in the loaded data set, +this column has to be defined in `self.obs_key_sample`, which needs to be defined in the constructor. +Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. + +Creating dataloaders with the commandline interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +sfaira features an interactive way of creating, formatting and testing dataloaders. +The common workflow look as follows: + +1. Create a new dataloader with ``sfaira create-dataloader`` +2. Validate the dataloader with ``sfaira lint-dataloader `` + +When creating a dataloader with ``sfaira create-dataloader`` common information such as +your name and email are prompted for, followed by dataloader specific attributes such as organ, organism and many more. +If the requested information is not available simply hit enter and continue until done. If you have mixed organ or organism +data you will have to resolve this manually later. Your dataloader template will be created in your current working directory +in a folder resembling your doi. + +The created files are: + +.. code-block:: + + ├── extra_description.txt <- Optional extra description file + ├── __init__.py + ├── NA_NA_2021_NA_Einstein_001.py <- Contains the load function to load the data + ├── NA_NA_2021_NA_Einstein_001.yaml <- Specifies all data loader data + +Now simply fill in all missing properties in your dataloader scripts and yaml file. +When done optionally run ``sfaira clean-dataloader `` on the just filled out dataloader yaml file. +All unused attributes will be removed. + +Next validate the integrity of your dataloader content with ``sfaira lint-dataloader ``. +All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information. + +Map cell type labels to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with +the same name and directory as the python file in which the data loader is located. +This .csv contains two columns with one row for each unique cell type label. +The free text identifiers in the first column "source", +and the corresponding ontology term in the second column "target". +You can write this file entirely from scratch. +Sfaira also allows you to generate a first guess of this file using fuzzy string matching +which is automatically executed when you run the template data loader unit test for the first time with you new loader. +Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which +ontology term in the case of conflicts. +Still, this first guess usually drastically speeds up this annotation harmonization. + +Cell type ontology management +----------------------------- + +Sfaira maintains a wrapper of the Cell Ontology as a class which allows additions to this ontology. +This allows us to use the core ontology used in the community as a backbone and to keep up with newly identifed cell types on our own. +We require all extensions of the core ontology not to break the directed acyclic graph that is the ontology: +Usually, such extensions would be additional leave nodes. + +Second, we maintain cell type universes for anatomic structures. +These are dedicated for cell type-dependent models which require a defined set of cell types. +Such a universe is a set of nodes in the ontology. + +Contribute cell types to ontology +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Please open an issue on the sfaira repo with a description what type of cell type you want to add. + +Using ontologies to train cell type classifiers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate +cross-entropy as a loss and aggregate accuracy as a metric. +The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms +that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. + +Metadata management +------------------- + +We constrain meta data by ontologies where possible. The current restrictions are: + + - .age: unconstrained string, try using units of years for human, units of months for mice and units of days for + cell culture samples + - .dev_stage: unconstrained string, this will constrained to an ontology in the future, + try choosing from HSAPDV (http://www.obofoundry.org/ontology/hsapdv.html) for human + or from MMUSDEV (http://www.obofoundry.org/ontology/mmusdv.html) for mouse + - .cell_line: unconstrained string, this will be constrained to an ontology later. try choosing from cellosaurus + cell line database (https://web.expasy.org/cellosaurus/) + - .ethnicity: unconstrained string, this will constrained to an ontology in the future, + try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) + - .healthy: bool + - .normalisation: unconstrained string, this will constrained to an ontology in the future, + try using {"raw", "scaled"} + - .organ: unconstrained string, this will constrained to an ontology in the future, try to choose + term from Uberon (http://www.obofoundry.org/ontology/ehdaa2.html) + or from EHDAA2 (http://www.obofoundry.org/ontology/ehdaa2.html) for human + or from EMAPA (http://www.obofoundry.org/ontology/emapa.html) for mouse + - .organism: constrained string, {"mouse", "human"}. In the future, we will use NCBITAXON + (http://www.obofoundry.org/ontology/ncbitaxon.html). + - .assay_sc: unconstrained string, this will constrained to an experimental protocol ontology in the future, + try choosing a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false + - .assay_differentiation: unconstrained string, try to provide a base differentiation protocol (eg. Lancaster, 2014) + as well as any amendments to the original protocol + - .assay_type_differentiation: constrained string, {"guided", "unguided"} + - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "cancer"} + - .sex: constrained string, {"female", "male"} + - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. + If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity + and use units: `[1g]` + - .year: must be an integer year, e.g. 2020 + +Follow this issue_ for details on upcoming ontology integrations. + +.. _issue: https://github.com/theislab/sfaira/issues/16 diff --git a/docs/data.rst b/docs/data.rst index a3f2a1dc8..0999d5c04 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -28,315 +28,9 @@ Contact us for support of any other repositories. .. _cellxgene: https://cellxgene.cziscience.com/ -Adding data sets -~~~~~~~~~~~~~~~~~ - -Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. -This process requires a couple of steps as outlined in the following sections. - - 1. Write a dataloader as outlined below. - 2. Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as required by your data loader. - 3. You can contribute the data loader to public sfaira, we do not manage data upload though. - During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. - -The following sections will first describe the underlying design principles of sfaira dataloaders and -then explain how to interactively create, validate and test dataloaders. - -Use data loaders on existing data repository --------------------------------------------- - -You only want to use data sets with existing data loaders and have adapted your directory structure as above? -In that case, you can immediately start using the data loader functions, you just need to supply the root directory -of the directory structure as `path to the constructor of the class that you are using. -Depending on the functionalities you want to use, you would often want to create a directory with cached meta data -first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to -anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. - -Writing dataloaders ---------------------- - -The study-centric data loader module -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In the sfaira code, data loaders are organised into directories, which correspond to publications. -All data loaders corresponding to data sets of one study are grouped into this directory. -Next, each data set is represented by one data loader python file in this directory. -See below for more complex set ups with repetitive data loader code. - -Check that the data loader was not already implemented -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We will open issues for all planned data loaders, so you can search both the code_ base and our GitHub issues_ for -matching data loaders before you start writing one. -The core data loader identified is the directory compatible doi, -which is the doi with all special characters replaced by "_" and a "d" prefix is used: -"10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". -Searching for this string should yield a match if it is already implemented, take care to look for both -preprint and publication DOIs if both are available. We will also mention publication names in issues, you will however not find these in the code. - -.. _code: https://github.com/theislab/sfaira/tree/dev -.. _issues: https://github.com/theislab/sfaira/issues - - -The data loader python file -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Each data set (organsism, organ, assay_sc, optionally also batches) has its own data loader class. Each such class is -in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class -looks very similar in parts to a cell in a juypter notebook that performs data loading. The core features that must be included are: - -1. A constructor of the following form that can be used to interact with the data set -before it is loaded into memory: - -.. code-block:: python - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # Data set meta data: You do not have to include all of these and can simply skip lines corresponding - # to attritbutes that you do not have access to. These are meta data on a sample level. - # The meta data attributes labeled with (*) may als be supplied per cell, see below, - # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - - self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). - - self.author = x # author (list) who sampled / created the data set - self.doi = x # doi of data set accompanying manuscript - - self.download_url_data = x # download website(s) of data files - self.download_url_meta = x # download website(s) of meta data files - - self.age = x # (*, optional) age of sample - self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - self.cell_line = x # (*, optional) cell line used (for cell culture samples) - self.dev_stage = x # (*, optional) developmental stage of organism - self.ethnicity = x # (*, optional) ethnicity of sample - self.healthy = x # (*, optional) whether sample represents a healthy organism - self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - self.organ = x # (*, optional) organ (anatomical structure) - self.organism = x # (*) species / organism - self.sample_source = x # (*) whether the sample came from primary tissue or cell culture - self.sex = x # (*, optional) sex - self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - self.year = x # year in which sample was acquired - - # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the - # anndata instance (specifically in .obs) after loading. - # You need to make sure this is loaded in the loading script)! - # See above for a description what these meta data attributes mean. - # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cellontology_original = x # (optional) - # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and - # directory as the python file of this data loader (see below). - - -2. A function called to load the data set into memory: -It is important to set an automated path indicating the location of the raw files here. -Our recommendation for this directory set-up is that you define a directory folder in your directory structure -in which all of these raw files will be (self.path) and then add a sub-directory named as -`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files -directly into this sub directory. - -.. code-block:: python - - def _load(self, fn=None): - # assuming that i uploaded an h5ad somewhere (in self.download) - if fn is None: - fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") - self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad - # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. - - -In summary, a simply example data loader for a mouse lung data set could look like this: - -.. code-block:: python - - class MyDataset(DatasetBase) - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.author = "me" - self.doi = "my preprint" - self.download_url_data = "my GEO upload" - self.normalisation = "raw" # because I uploaded raw counts, which is good practice! - self.organ = "lung" - self.organism = "mouse" - self.assay_sc = "smart-seq2" - self.year = "2020" - self.sample_source = "primary_tissue" - - self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here - - def _load(self, fn=None): - # assuming that i uploaded an h5ad somewhere (in self.download) - if fn is None: - fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") - self.adata = anndata.read(fn) - - -Data loaders can be added into a copy of the sfaira repository and can be used locally before they are contributed to -the public sfaira repository. -Alternatively, we also provide the optional dependency sfaira_extensions (https://github.com/theislab/sfaira_extension) -in which local data and cell type annotation can be managed separately but still be loaded as usual through sfaira. -The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily -copied over. - -Handling multiple data sources -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you have multiple data sets in a study which are all saved in separate files which come in similar formats: -You can subclass `DatasetBaseGroupLoadingManyFiles` instead of `DatasetBase` and proceed as usual, -only with adding `SAMPLE_FNS` in the data loader file name space, -which is a list of all file names addressed with this file. -You can then refer to an additional property of the Dataset class, `self.sample_fn` during loading -or when dynamically defining meta data in the constructor. -Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. - -If you have multiple data sets in a study which are all saved in one file: -You can subclass `DatasetBaseGroupLoadingOneFile` instead of `DatasetBase` and proceed as usual, -only with adding `SAMPLE_IDS` in the data loader file name space, -which is a list of all sample IDs addressed with this file. -You can then refer to an additional property of the Dataset class, `self.sample_id` during loading -or when dynamically defining meta data in the constructor. -Note that `self.sample_id` refers to a `self.adata.obs` column in the loaded data set, -this column has to be defined in `self.obs_key_sample`, which needs to be defined in the constructor. -Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. - -Creating dataloaders with the commandline interface -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -sfaira features an interactive way of creating, formatting and testing dataloaders. -The common workflow look as follows: - -1. Create a new dataloader with ``sfaira create-dataloader`` -2. Format and clean the dataloader with ``sfaira clean-dataloader `` -3. Validate the dataloader with ``sfaira lint-dataloader `` -4. Test the dataloader using ``sfaira test-dataloader `` - -When creating a dataloader with ``sfaira create-dataloader`` you are first asked for the dataloader type -which will be determined by the structure of your data (one vs many files etc). Next, common information such as -your name and email are prompted for followed by dataloader specific attributes such as organ, organism and many more. -If the requested information is not available simply hit enter and continue until done. If you have mixed organ or organism -data you will have to resolve this manually. Your dataloader template will be created in your current working directory -in a folder resembling your doi. - -Now simply fill in all missing properties in your dataloader script(s). Leave all unneeded properties outcommented. -When done run ``sfaira clean-dataloader `` on the just filled out dataloader script. -All unused attributes will be removed and the file is reformatted. - -Next validate the integrity of your dataloader with ``sfaira lint-dataloader ``. -All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information/function. - -Finally, test your dataloader with ``sfaira test-dataloader ``. -If all tests pass you can proceed to use your dataloader or to submit a pull request to sfaira. - -Map cell type labels to ontology -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with -the same name and directory as the python file in which the data loader is located. -This .csv contains two columns with one row for each unique cell type label. -The free text identifiers in the first column "source", -and the corresponding ontology term in the second column "target". -You can write this file entirely from scratch. -Sfaira also allows you to generate a first guess of this file using fuzzy string matching -which is automatically executed when you run the template data loader unit test for the first time with you new loader. -Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds to which -ontology term in the case of conflicts. -Still, this first guess usually drastically speeds up this annotation harmonization. - -Cell type ontology management ------------------------------ - -Sfaira maintains a wrapper of the Cell Ontology as a class which allows additions to this ontology. -This allows us to use the core ontology used in the community as a backbone and to keep up with newly identifed cell types on our own. -We require all extensions of the core ontology not to break the directed acyclic graph that is the ontology: -Usually, such extensions would be additional leave nodes. - -Second, we maintain cell type universes for anatomic structures. -These are dedicated for cell type-dependent models which require a defined set of cell types. -Such a universe is a set of nodes in the ontology. - -Contribute cell types to ontology -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Please open an issue on the sfaira repo with a description what type of cell type you want to add. - -Using ontologies to train cell type classifiers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate -cross-entropy as a loss and aggregate accuracy as a metric. -The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms -that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. - -Metadata management -------------------- - -We constrain meta data by ontologies where possible. The current restrictions are: - - - .age: unconstrained string, try using units of years for human, units of months for mice and units of days for - cell culture samples - - .dev_stage: unconstrained string, this will constrained to an ontology in the future, - try choosing from HSAPDV (http://www.obofoundry.org/ontology/hsapdv.html) for human - or from MMUSDEV (http://www.obofoundry.org/ontology/mmusdv.html) for mouse - - .cell_line: unconstrained string, this will be constrained to an ontology later. try choosing from cellosaurus - cell line database (https://web.expasy.org/cellosaurus/) - - .ethnicity: unconstrained string, this will constrained to an ontology in the future, - try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) - - .healthy: bool - - .normalisation: unconstrained string, this will constrained to an ontology in the future, - try using {"raw", "scaled"} - - .organ: unconstrained string, this will constrained to an ontology in the future, try to choose - term from Uberon (http://www.obofoundry.org/ontology/ehdaa2.html) - or from EHDAA2 (http://www.obofoundry.org/ontology/ehdaa2.html) for human - or from EMAPA (http://www.obofoundry.org/ontology/emapa.html) for mouse - - .organism: constrained string, {"mouse", "human"}. In the future, we will use NCBITAXON - (http://www.obofoundry.org/ontology/ncbitaxon.html). - - .assay_sc: unconstrained string, this will constrained to an experimental protocol ontology in the future, - try choosing a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false - - .assay_differentiation: unconstrained string, try to provide a base differentiation protocol (eg. Lancaster, 2014) - as well as any amendments to the original protocol - - .assay_type_differentiation: constrained string, {"guided", "unguided"} - - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "cancer"} - - .sex: constrained string, {"female", "male"} - - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. - If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity - and use units: `[1g]` - - .year: must be an integer year, e.g. 2020 - -Follow this issue_ for details on upcoming ontology integrations. - -.. _issue: https://github.com/theislab/sfaira/issues/16 - Genome management ----------------- -You do not have to worry about this unless you are interested, -this section is not required reading for writing data loaders. - We streamline feature spaces used by models by defining standardized gene sets that are used as model input. Per default, sfaira works with the protein coding genes of a genome assembly right now. A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. diff --git a/docs/index.rst b/docs/index.rst index ed3256547..c6c357a76 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,7 @@ Latest additions commandline_interface tutorials data + adding_datasets models ecosystem roadmap diff --git a/requirements.txt b/requirements.txt index 1d8773db7..aa79086d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,5 +26,6 @@ questionary>=1.8.1 packaging>=20.8 requests>=2.25.1 switchlang>=0.1.0 -cookiecutter==1.7.2 -black>=20.8b1 +cookiecutter>=1.7.2 +boltons +flatten-dict diff --git a/setup.cfg b/setup.cfg index 2e3eb8d69..88bb3cf20 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ tag_prefix = inplace = 1 [flake8] -ignore=F401, W504 +ignore=F401, W504, E731 exclude = docs, sfaira/commands/templates max-line-length = 160 diff --git a/sfaira/cli.py b/sfaira/cli.py index 55b0b8dbd..65025f448 100644 --- a/sfaira/cli.py +++ b/sfaira/cli.py @@ -98,8 +98,8 @@ def lint_dataloader(path) -> None: PATH to the dataloader script. """ - dataloader_linter = DataloaderLinter() - dataloader_linter.lint(path) + dataloader_linter = DataloaderLinter(path) + dataloader_linter.lint() @sfaira_cli.command() diff --git a/sfaira/commands/clean_dataloader.py b/sfaira/commands/clean_dataloader.py index 43d08c0c5..1823c203a 100644 --- a/sfaira/commands/clean_dataloader.py +++ b/sfaira/commands/clean_dataloader.py @@ -1,5 +1,7 @@ import logging -from subprocess import Popen + +import yaml +from boltons.iterutils import remap log = logging.getLogger(__name__) @@ -11,39 +13,12 @@ def __init__(self, path): def clean_dataloader(self) -> None: """ - Removes any unwanted artifacts from a dataloader Python script and formats the code. - 1. Any line that starts with # self. <- outcommented attribute - 2. Any line that starts with # SFARA: <- explicitly marked full comments - 3. Any line with # something <- comments after attributes - 4. Runs black + Removes unused keys from the yaml file """ - # Remove all unwanted artifacts - cleaned_content = [] - - with open(self.path, 'r') as data_loader_file: - content = data_loader_file.readlines() - for line in content: - line_stripped = line.strip() - if line_stripped.startswith('# self.'): - continue - elif line_stripped.startswith('# SFAIRA:'): - continue - else: - if '#' in line: - if len(line.split('#')) > 1: - try: - cleaned_content += f'{line.split("#")[0]}\n' - except KeyError: - cleaned_content += line - else: - cleaned_content += line - - with open(self.path, 'w') as data_loader_file: - for line in cleaned_content: - data_loader_file.write(line) - data_loader_file.write('\n') + with open(self.path) as yaml_file: + content = yaml.load(yaml_file, Loader=yaml.FullLoader) + drop_falsey = lambda path, key, value: bool(value) + clean = remap(content, visit=drop_falsey) - # run black - black = Popen(['black', self.path], - universal_newlines=True, shell=False, close_fds=True) - (black_stdout, black_stderr) = black.communicate() + with open(self.path, 'w') as file: + yaml.dump(clean, file) diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py index f8b3e8c82..6238d7172 100644 --- a/sfaira/commands/create_dataloader.py +++ b/sfaira/commands/create_dataloader.py @@ -2,8 +2,7 @@ import os import re from dataclasses import dataclass, asdict -from shutil import copyfile -from typing import Union +from typing import Union, Dict from sfaira.commands.questionary import sfaira_questionary from rich import print @@ -15,21 +14,24 @@ @dataclass class TemplateAttributes: dataloader_type: str = '' # One of single_dataset, multiple_datasets_single_file, multiple_datasets_streamlined, multiple_datasets_not_streamlined - id: str = '' # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + id: str = '' # unique identifier of data set (Organism_Organ_Year_Protocol_NumberOfDataset_FirstAuthorLastname_doi). id_without_doi: str = '' # complete id without the doi -> usually used to name the python scripts + create_extra_description: str = '' # Whether to create an optional extra description file or not author: Union[str, list] = '' # author (list) who sampled / created the data set doi: str = '' # doi of data set accompanying manuscript doi_sfaira_repr: str = '' # internal representation with any special characters replaced with underscores + sample_fns: Union[str, Dict[str, list]] = '' # file name of the first *.h5ad file download_url_data: str = '' # download website(s) of data files download_url_meta: str = '' # download website(s) of meta data files - - organ: str = '' # (*, optional) organ (anatomical structure) + organ: str = '' # (*) organ (anatomical structure) organism: str = '' # (*) species / organism - assay_sc: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) + assay: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) + normalization: str = '' # raw or the used normalization technique + ethnicity: str = '' # ethnicity of the sample + state_exact: str = '' # state of the sample year: str = 2021 # year in which sample was acquired - sample_source: str = '' # (*) whether the sample came from primary tissue or cell culture number_of_datasets: str = 1 # Required to determine the file names @@ -61,25 +63,8 @@ def _prompt_dataloader_template(self) -> None: # One dataset if number_datasets == 'One': self.template_attributes.dataloader_type = 'single_dataset' - return - # More than one dataset - dataset_counts = sfaira_questionary(function='select', - question='Are your datasets in a single file or is there one file per dataset?', - choices=['Single dataset file', 'Multiple dataset files']) - if dataset_counts == 'Single dataset file': - self.template_attributes.dataloader_type = 'multiple_datasets_single_file' - return - - # streamlined? - streamlined_datasets = sfaira_questionary(function='select', - question='Are your datasets in a similar format?', - choices=['Same format', 'Different formats']) - if streamlined_datasets == 'Same format': - self.template_attributes.dataloader_type = 'multiple_datasets_streamlined' - return else: - self.template_attributes.dataloader_type = 'multiple_datasets_not_streamlined' - return + self.template_attributes.dataloader_type = 'multiple_datasets' def _prompt_dataloader_configuration(self): """ @@ -100,21 +85,41 @@ def _prompt_dataloader_configuration(self): self.template_attributes.doi = doi self.template_attributes.doi_sfaira_repr = f'd{doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + self.template_attributes.number_of_datasets = sfaira_questionary(function='text', + question='Number of datasets:', + default='1') + + # Differentiate between a single dataset or multiple datasets to get sample file names + if self.template_attributes.dataloader_type == 'multiple_datasets': + self.template_attributes.sample_fns = {'fns': []} + for ds in range(int(self.template_attributes.number_of_datasets)): + fn = sfaira_questionary(function='text', + question='Sample file name:', + default=f'data_{ds}.h5ad') + self.template_attributes.sample_fns['fns'].append(fn) + else: + self.template_attributes.sample_fns = sfaira_questionary(function='text', + question='Sample file name of the first dataset:', + default='data.h5ad') + self.template_attributes.organism = sfaira_questionary(function='text', question='Organism:', default='NA') self.template_attributes.organ = sfaira_questionary(function='text', question='Organ:', default='NA') - self.template_attributes.assay_sc = sfaira_questionary(function='text', - question='AssaySc:', - default='NA') + self.template_attributes.assay = sfaira_questionary(function='text', + question='Assay:', + default='NA') + self.template_attributes.normalization = sfaira_questionary(function='text', + question='Normalization:', + default='raw') + self.template_attributes.state_exact = sfaira_questionary(function='text', + question='Sample state:', + default='healthy') self.template_attributes.year = sfaira_questionary(function='text', question='Year:', default='2021') - self.template_attributes.sample_source = sfaira_questionary(function='text', - question='SampleSource:', - default='NA') first_author = author[0] if isinstance(author, list) else author try: first_author_lastname = first_author.split(',')[0] @@ -122,15 +127,18 @@ def _prompt_dataloader_configuration(self): print('[bold yellow] First author was not in the expected format. Using full first author for the id.') first_author_lastname = first_author self.template_attributes.id_without_doi = f'{self.template_attributes.organism}_{self.template_attributes.organ}_' \ - f'{self.template_attributes.year}_{self.template_attributes.assay_sc}_' \ + f'{self.template_attributes.year}_{self.template_attributes.assay}_' \ f'{first_author_lastname}_001' self.template_attributes.id = self.template_attributes.id_without_doi + f'_{self.template_attributes.doi_sfaira_repr}' self.template_attributes.download_url_data = sfaira_questionary(function='text', question='URL to download the data', default='https://ftp.ncbi.nlm.nih.gov/geo/') - self.template_attributes.number_of_datasets = sfaira_questionary(function='text', - question='Number of datasets:', - default='1').zfill(3) + self.template_attributes.download_url_meta = sfaira_questionary(function='text', + question='URL to download the meta data', + default='https://ftp.ncbi.nlm.nih.gov/geo/') + self.template_attributes.create_extra_description = sfaira_questionary(function='confirm', + question='Do you want to add additional custom metadata?', + default='Yes') def _template_attributes_to_dict(self) -> dict: """ @@ -145,21 +153,3 @@ def _create_dataloader_template(self): no_input=True, overwrite_if_exists=True, extra_context=self._template_attributes_to_dict()) - - # multiple datasets not streamlined are not contained in a single file but in multiple files - # Hence, we create one copy per dataset and adapt the ID per dataloader script - if self.template_attributes.dataloader_type == 'multiple_datasets_not_streamlined': - for i in range(2, int(self.template_attributes.number_of_datasets.lstrip('0')) + 1): - copyfile(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi}.py', - f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py') - - # Replace the default ID of 1 with the file specific ID - with open(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py', 'r') as file: - content = file.readlines() - idx_fixed = list(map(lambda line: f' self.set_dataset_id(idx={i}) # autogenerated by sfaira' - if line.strip().startswith('self.set_dataset_id(idx=1)') - else line, - content)) - with open(f'{self.template_attributes.doi_sfaira_repr}/{self.template_attributes.id_without_doi[:-3]}{str(i).zfill(3)}.py', 'w') as file: - for line in idx_fixed: - file.write(line) diff --git a/sfaira/commands/lint_dataloader.py b/sfaira/commands/lint_dataloader.py index 02705735c..88d343eef 100644 --- a/sfaira/commands/lint_dataloader.py +++ b/sfaira/commands/lint_dataloader.py @@ -1,7 +1,10 @@ import logging import rich +import yaml from rich.panel import Panel +from flatten_dict import flatten +from flatten_dict.reducer import make_reducer from rich.progress import Progress, BarColumn log = logging.getLogger(__name__) @@ -11,25 +14,21 @@ class DataloaderLinter: def __init__(self, path='.'): self.path: str = path - self.content: list = [] + self.content: dict = {} self.passed: dict = {} self.warned: dict = {} self.failed: dict = {} self.linting_functions: list = [ - '_lint_dataloader_object', '_lint_required_attributes', - '_lint_sfaira_todos', - '_lint_load' ] - def lint(self, path) -> None: + def lint(self) -> None: """ - Statically verifies a dataloader against a predefined set of rules. + Statically verifies a yaml dataloader file against a predefined set of rules. Every rule is a function defined in this class, which must be part of this class' linting_functions. - :param path: Path to an existing dataloader """ - with open(path, 'r') as f: - self.content = list(map(lambda line: line.strip(), f.readlines())) + with open(self.path) as yaml_file: + self.content = yaml.load(yaml_file, Loader=yaml.FullLoader) progress = Progress("[bold green]{task.description}", BarColumn(bar_width=None), "[bold yellow]{task.completed} of {task.total}[reset] [bold green]{task.fields[func_name]}") @@ -43,82 +42,40 @@ def lint(self, path) -> None: self._print_results() - def _lint_dataloader_object(self): - """ - Verifies that the Dataloader Object itself (no the attributes) is valid - """ - # TODO Could be more strict by checking also whether the constructor is valid, but too much of a hazzle with Black formatting. - passed_lint_dataloader_object = True - - try: - line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('class Dataset(DatasetBaseGroupLoadingManyFiles):', - 'class Dataset(DatasetBase):')), enumerate(self.content)))[0] - except IndexError: - passed_lint_dataloader_object = False - self.failed['-1'] = 'Missing one of class Dataset(DatasetBase) or class Dataset(DatasetBaseGroupLoadingManyFiles)' - - if passed_lint_dataloader_object: - self.passed[line] = 'Passed dataloader object checks.' - - def _lint_load(self): - """ - Verifies that the method _load_any_object(self, fn=None) is present. - """ - passed_load = True - - try: - line, dl_object = list(filter(lambda line_dl_object: line_dl_object[1].startswith(('def _load_any_object(self, fn=None):', - 'def _load(self, fn):', - 'def _load(self)')), - enumerate(self.content)))[0] - except IndexError: - passed_load = False - self.failed['-1'] = 'Missing one of methods _load_any_object(self, fn=None) or def _load(self, fn)' - - if passed_load: - self.passed[line] = 'Passed dataloader object checks.' - def _lint_required_attributes(self): """ Verifies that all required attributes for every dataloader are present. """ passed_required_attributes = True - attributes = ['self.set_dataset_id', - 'self.author', - 'self.doi', - 'self.download_url_data', - 'self.organ', - 'self.organism', - 'self.assay_sc', - 'self.year', - 'self.sample_source'] - + attributes = ['dataset_structure:sample_fns', + 'dataset_wise:author', + 'dataset_wise:doi', + 'dataset_wise:download_url_data', + 'dataset_wise:download_url_meta', + 'dataset_wise:normalization', + 'dataset_wise:year', + 'dataset_or_observation_wise:assay', + 'dataset_or_observation_wise:organ', + 'dataset_or_observation_wise:organism'] + + flattened_dict = flatten(self.content, reducer=make_reducer(delimiter=':')) for attribute in attributes: try: - line, attribute = list(filter(lambda line_attribute: line_attribute[1].startswith(attribute), enumerate(self.content)))[0] - except IndexError: + detected = False + for key in flattened_dict.keys(): + if key.startswith(attribute): + detected = True + if not detected: + passed_required_attributes = False + self.failed['-1'] = f'Missing attribute: {attribute}' + except KeyError: passed_required_attributes = False - self.failed['-1'] = 'One of required attributes set_dataset_id, author, doi, download_url_data, ' \ - 'organ, organism, assay_sc, year, sample_source is missing.' + self.failed['-1'] = f'Missing attribute: {attribute}' if passed_required_attributes: self.passed[0] = 'Passed required dataloader attributes checks.' - def _lint_sfaira_todos(self): - """ - Warns if any SFAIRA TODO: statements were found - """ - passed_sfaira_todos = True - - for index, line in enumerate(self.content): - if 'SFAIRA TODO' in line: - passed_sfaira_todos = False - self.warned[f'{index}'] = f'Line {index}: {line[2:]}' - - if passed_sfaira_todos: - self.passed['0'] = 'Passed sfaira TODOs checks.' - def _print_results(self): console = rich.console.Console() console.print() diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets/cookiecutter.json similarity index 58% rename from sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json rename to sfaira/commands/templates/multiple_datasets/cookiecutter.json index d0ccfd5ac..4960fc7a1 100644 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/cookiecutter.json +++ b/sfaira/commands/templates/multiple_datasets/cookiecutter.json @@ -1,4 +1,5 @@ { + "sample_fns": {"fns": []}, "dataloader_author_name": "", "dataloader_author_email": "", "id": "", @@ -7,9 +8,13 @@ "doi": "", "doi_sfaira_repr": "", "download_url_data": "", + "download_url_meta": "", + "normalization": "", "organ": "", "organism": "", - "assay_sc": "", + "assay": "", "year": "", - "sample_source": "" + "individual": "", + "state_exact": "", + "create_extra_description": "" } diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets/hooks/__init__.py similarity index 100% rename from sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py rename to sfaira/commands/templates/multiple_datasets/hooks/__init__.py diff --git a/sfaira/commands/templates/multiple_datasets/hooks/post_gen_project.py b/sfaira/commands/templates/multiple_datasets/hooks/post_gen_project.py new file mode 100644 index 000000000..44c42355b --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets/hooks/post_gen_project.py @@ -0,0 +1,15 @@ +import os +import shutil + + +def remove(filepath): + if os.path.isfile(filepath): + os.remove(filepath) + elif os.path.isdir(filepath): + shutil.rmtree(filepath) + + +create_extra_description = '{{ cookiecutter.create_extra_description }}' == 'True' + +if not create_extra_description: + remove('extra_description.txt') diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py similarity index 100% rename from sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/__init__.py rename to sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt similarity index 100% rename from sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/__init__.py rename to sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt diff --git a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py new file mode 100644 index 000000000..50311248b --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -0,0 +1,11 @@ +import anndata +import os +import scipy.sparse + + +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + adata.X = scipy.sparse.csc_matrix(adata.X) + + return adata diff --git a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml new file mode 100644 index 000000000..872fa0188 --- /dev/null +++ b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -0,0 +1,58 @@ +dataset_structure: + dataset_index: 1 + sample_fns: +{% for fn in cookiecutter.sample_fns.fns %} - "{{ fn }}" +{% endfor %}dataset_wise: + author: "{{ cookiecutter.author }}" + doi: "{{ cookiecutter.doi }}" + download_url_data: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} download_url_meta: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} normalization: "{{ cookiecutter.normalization }}" + year: "{{ cookiecutter.year }}" +dataset_or_observation_wise: + age: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} age_obs_key: + assay: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.assay }}" +{% endfor %} assay_obs_key: + bio_sample: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} bio_sample_obs_key: + development_stage: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} development_stage_obs_key: + ethnicity: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} ethnicity_obs_key: + healthy: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} healthy_obs_key: + individual: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} individual_obs_key: + organ: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.organ }}" +{% endfor %} organ_obs_key: + organism: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.organism }}" +{% endfor %} organism_obs_key: + sex: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} sex_obs_key: + state_exact: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.state_exact }}" +{% endfor %} state_exact_obs_key: + tech_sample: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: +{% endfor %} tech_sample_obs_key: +observation_wise: + cellontology_original_obs_key: +feature_wise: + var_ensembl_col: + var_symbol_col: +misc: +meta: + version: "1.0" diff --git a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py deleted file mode 100644 index 5a33693ac..000000000 --- a/sfaira/commands/templates/multiple_datasets_not_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -from typing import Union -import anndata as ad - -from sfaira.data import DatasetBase - - -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - # SFAIRA TODO Add your meta data here - self.set_dataset_id(idx=1) # autogenerated by sfaira - - self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set - self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript - - self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files - # self.download_url_meta = 'x' # download website(s) of meta data files - - self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) - self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.year = {{cookiecutter.year}} # year in which sample was acquired - self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture - # self.age = 'x' # (*, optional) age of sample - # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - # self.cell_line = x # (*, optional) cell line used (for cell culture samples) - # self.dev_stage = x # (*, optional) developmental stage of organism - # self.ethnicity = x # (*, optional) ethnicity of sample - # self.healthy = x # (*, optional) whether sample represents a healthy organism - # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - # self.sex = x # (*, optional) sex - # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - - # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column - # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! - # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. - # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) - # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # SFAIRA: name of column which contain streamlined cell ontology cell type classes: - # self.obs_key_cellontology_original = x # (optional) - - def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir, ) # SFAIRA TODO: add the name of the raw file - # SFAIRA TODO: add code that loads to raw file into an AnnData object and return it - pass diff --git a/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json deleted file mode 100644 index d0ccfd5ac..000000000 --- a/sfaira/commands/templates/multiple_datasets_single_file/cookiecutter.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "dataloader_author_name": "", - "dataloader_author_email": "", - "id": "", - "id_without_doi": "", - "author": "", - "doi": "", - "doi_sfaira_repr": "", - "download_url_data": "", - "organ": "", - "organism": "", - "assay_sc": "", - "year": "", - "sample_source": "" - } diff --git a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py deleted file mode 100644 index 57a632eb6..000000000 --- a/sfaira/commands/templates/multiple_datasets_single_file/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import anndata as ad -from typing import Union - -from sfaira.data import DatasetBaseGroupLoadingOneFile - - -# SFAIRA TODO: Add correct sample IDs here. -SAMPLE_IDS = [ - # "your_sample_id_1", - # "your_sample_id_2" -] - - -class Dataset(DatasetBaseGroupLoadingOneFile): - - def __init__( - self, - sample_fn: str, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, sample_ids=SAMPLE_IDS, meta_path=meta_path, cache_path=cache_path, - path=path, **kwargs) - - # SFAIRA TODO: Add you meta data here. - self.set_dataset_id(idx=1) # autogenerated by sfaira # SFAIRA TODO: Increase index ID by file - - self.author = {{ cookiecutter.author }} # author (list) who sampled / created the data set - self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript - - self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files - # self.download_url_meta = 'x' # download website(s) of meta data files - - self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) - self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.year = {{ cookiecutter.year }} # year in which sample was acquired - self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture - # self.age = 'x' # (*, optional) age of sample - # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - # self.cell_line = x # (*, optional) cell line used (for cell culture samples) - # self.dev_stage = x # (*, optional) developmental stage of organism - # self.ethnicity = x # (*, optional) ethnicity of sample - # self.healthy = x # (*, optional) whether sample represents a healthy organism - # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - # self.sex = x # (*, optional) sex - # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - - # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column - # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! - # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. - # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) - # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # SFAIRA: name of column which contain streamlined cell ontology cell type classes: - # self.obs_key_cellontology_original = x # (optional) - - # SFAIRA TODO: Make sure to include this attribute which indicates the column in self.adata in which you saved the sample IDs. - self.obs_key_sample = 'x' - - def _load_full(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir,) # SFAIRA TODO: add the name of the raw file - # SFAIRA TODO: load full data into AnnData object (no subsetting!) and return it - pass diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json b/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json deleted file mode 100644 index d0ccfd5ac..000000000 --- a/sfaira/commands/templates/multiple_datasets_streamlined/cookiecutter.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "dataloader_author_name": "", - "dataloader_author_email": "", - "id": "", - "id_without_doi": "", - "author": "", - "doi": "", - "doi_sfaira_repr": "", - "download_url_data": "", - "organ": "", - "organism": "", - "assay_sc": "", - "year": "", - "sample_source": "" - } diff --git a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py deleted file mode 100644 index e8709b9bb..000000000 --- a/sfaira/commands/templates/multiple_datasets_streamlined/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ /dev/null @@ -1,76 +0,0 @@ -import anndata as ad -import os -from typing import Union - -from sfaira.data import DatasetBaseGroupLoadingManyFiles - - -# SFARA TODO: Add correct sample IDs here. -SAMPLE_FNS = [ - "your_sample_fn_1", - "your_sample_fn_2" -] - - -class Dataset(DatasetBaseGroupLoadingManyFiles): - - def __init__( - self, - sample_fn: str, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(sample_fn=sample_fn, sample_fns=SAMPLE_FNS, meta_path=meta_path, cache_path=cache_path, - path=path, **kwargs) - - # SFAIRA TODO: Add you meta data here. - self.set_dataset_id(idx=1) # autogenerated by sfaira # SFAIRA TODO: Increase index ID by file - - self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set - self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript - - self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files - # self.download_url_meta = 'x' # download website(s) of meta data files - - self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) - self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.year = {{cookiecutter.year}} # year in which sample was acquired - self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture - # self.age = 'x' # (*, optional) age of sample - # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - # self.cell_line = x # (*, optional) cell line used (for cell culture samples) - # self.dev_stage = x # (*, optional) developmental stage of organism - # self.ethnicity = x # (*, optional) ethnicity of sample - # self.healthy = x # (*, optional) whether sample represents a healthy organism - # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - # self.sex = x # (*, optional) sex - # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - - # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column - # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! - # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. - # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) - # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # SFAIRA: name of column which contain streamlined cell ontology cell type classes: - # self.obs_key_cellontology_original = x # (optional) - - def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir, self.sample_fn) # SFAIRA TODO: add the name of the raw file - # SFAIRA TODO: load file fn into self.adata and return it, self.sample_fn represents the current filename. - pass \ No newline at end of file diff --git a/sfaira/commands/templates/single_dataset/cookiecutter.json b/sfaira/commands/templates/single_dataset/cookiecutter.json index d0ccfd5ac..4f9207a62 100644 --- a/sfaira/commands/templates/single_dataset/cookiecutter.json +++ b/sfaira/commands/templates/single_dataset/cookiecutter.json @@ -1,4 +1,5 @@ { + "sample_fns": "", "dataloader_author_name": "", "dataloader_author_email": "", "id": "", @@ -7,9 +8,13 @@ "doi": "", "doi_sfaira_repr": "", "download_url_data": "", + "download_url_meta": "", + "normalization": "", "organ": "", "organism": "", - "assay_sc": "", + "assay": "", "year": "", - "sample_source": "" + "individual": "", + "state_exact": "", + "create_extra_description": "" } diff --git a/sfaira/commands/templates/single_dataset/hooks/__init__.py b/sfaira/commands/templates/single_dataset/hooks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/commands/templates/single_dataset/hooks/post_gen_project.py b/sfaira/commands/templates/single_dataset/hooks/post_gen_project.py new file mode 100644 index 000000000..44c42355b --- /dev/null +++ b/sfaira/commands/templates/single_dataset/hooks/post_gen_project.py @@ -0,0 +1,15 @@ +import os +import shutil + + +def remove(filepath): + if os.path.isfile(filepath): + os.remove(filepath) + elif os.path.isdir(filepath): + shutil.rmtree(filepath) + + +create_extra_description = '{{ cookiecutter.create_extra_description }}' == 'True' + +if not create_extra_description: + remove('extra_description.txt') diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/extra_description.txt new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py index 25cb8f9d4..50311248b 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.py @@ -1,67 +1,11 @@ +import anndata import os -from typing import Union -import anndata as ad +import scipy.sparse -from sfaira.data import DatasetBase +def load(data_dir, sample_fn, **kwargs): + fn = os.path.join(data_dir, sample_fn) + adata = anndata.read(fn) + adata.X = scipy.sparse.csc_matrix(adata.X) -class Dataset(DatasetBase): - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - - # SFAIRA TODO Add your meta data here - self.set_dataset_id(idx=1) # autogenerated by sfaira - - self.author = {{cookiecutter.author}} # author (list) who sampled / created the data set - self.doi = '{{ cookiecutter.doi }}' # doi of data set accompanying manuscript - - self.download_url_data = '{{ cookiecutter.download_url_data }}' # download website(s) of data files - # self.download_url_meta = 'x' # download website(s) of meta data files - - self.organ = '{{ cookiecutter.organ }}' # organ (anatomical structure) - self.organism = '{{ cookiecutter.organism }}' # (*) species / organism - self.assay_sc = '{{ cookiecutter.assay_sc }}' # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.year = {{cookiecutter.year}} # year in which sample was acquired - self.sample_source = '{{ cookiecutter.sample_source }}' # (*) whether the sample came from primary tissue or cell culture - # self.age = 'x' # (*, optional) age of sample - # self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - # self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - # self.cell_line = x # (*, optional) cell line used (for cell culture samples) - # self.dev_stage = x # (*, optional) developmental stage of organism - # self.ethnicity = x # (*, optional) ethnicity of sample - # self.healthy = x # (*, optional) whether sample represents a healthy organism - # self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - # self.sex = x # (*, optional) sex - # self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - - # SFAIRA: The following meta data may instead also be supplied on a cell level if an appropriate column - # SFAIRA: is present in the anndata instance (specifically in .obs) after loading. You need to make sure this is loaded in the loading script)! - # SFAIRA: See above for a description what these meta data attributes mean. If these attributes are note available, you can simply leave this out. - # self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - # self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - # self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .assay_differentiation is provided) - # self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - # self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - # self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - # self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - # self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - # self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - # self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - # self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - # self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - # self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # SFAIRA: Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # SFAIRA: name of column which contain streamlined cell ontology cell type classes: - # self.obs_key_cellontology_original = x # (optional) - - def _load(self) -> ad.AnnData: - # fn = os.path.join(self.data_dir,) # SFAIRA TODO: add the name of the raw file - # SFAIRA TODO: add code that loads to raw file into an AnnData object and return it - pass + return adata diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml new file mode 100644 index 000000000..32f9c38a1 --- /dev/null +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -0,0 +1,44 @@ +dataset_structure: + dataset_index: 1 + sample_fns: + - "{{ cookiecutter.sample_fns }}" +dataset_wise: + author: "{{ cookiecutter.author }}" + doi: "{{ cookiecutter.doi }}" + download_url_data: "{{ cookiecutter.download_url_data }}" + download_url_meta: "{{ cookiecutter.download_url_meta }}" + normalization: "{{ cookiecutter.normalization }}" + year: "{{ cookiecutter.year }}" +dataset_or_observation_wise: + age: + age_obs_key: + assay: "{{ cookiecutter.assay }}" + assay_obs_key: + bio_sample: + bio_sample_obs_key: + development_stage: + development_stage_obs_key: + ethnicity: + ethnicity_obs_key: + healthy: + healthy_obs_key: + individual: + individual_obs_key: + organ: "{{ cookiecutter.organ }}" + organ_obs_key: + organism: "{{ cookiecutter.organism }}" + organism_obs_key: + sex: + sex_obs_key: + state_exact: "{{ cookiecutter.state_exact }}" + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: +observation_wise: + cellontology_original_obs_key: +feature_wise: + var_ensembl_col: + var_symbol_col: +misc: +meta: + version: "1.0" diff --git a/sfaira/commands/test_dataloader.py b/sfaira/commands/test_dataloader.py index 9b2a0157c..4dde41710 100644 --- a/sfaira/commands/test_dataloader.py +++ b/sfaira/commands/test_dataloader.py @@ -20,10 +20,11 @@ def test_dataloader(self): """ Runs a predefined unit test on a given dataloader. """ - print('[bold blue]Please ensure that your dataloader is in sfaira/dataloaders/loaders/.') - print('[bold blue]Please ensure that your test data is in sfaira/unit_tests/template_data/.') - self._prompt_doi() - self._run_unittest() + print('[bold red]This command is currently disabled.') + # print('[bold blue]Please ensure that your dataloader is in sfaira/dataloaders/loaders/.') + # print('[bold blue]Please ensure that your test data is in sfaira/unit_tests/template_data/.') + # self._prompt_doi() + # self._run_unittest() def _prompt_doi(self): self.doi = sfaira_questionary(function='text', From b6d1b50adc97d222b83986a43d1be9d9f3d1a81e Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 12 Mar 2021 11:18:06 +0100 Subject: [PATCH 092/161] fix data loading (#177) * fixed matchc to reference * fixed caching scirpts * removed erasing protection from cellontology obs keys * fixed building of directory_formatted_doi in cases where there are multiple DOIs * fixed ID generation with multiple DOIs present via doi_main * added nan cell type into tsv for 10.1038/s41593-019-0393-4 * fixed d10_1038_s41593_019_0393_4 * fixed set meta data * changed nan labelling * added excpetion cases into cell type value protection * fixed tsv in /loaders/d10_1016_j_devcel_2020_01_03fixed tsv in /loaders/d10_1016_j_devcel_2020_01_0333 * fixed healthy ontology * fix subsetting with unconstrained metadata terms * fix determination of allowed ontology values * fix HCL loading [skip CI] * improve error message when an illegal label is used for a constrained metadata field [skip CI] * improve error message when an illegal label is used for a constrained metadata field [skip CI] * add sex dict in HCL loading code to adhere to ontology [skip CI] * fix dataset streamlining * add author field to ontology container to enable subsetting [skip ci] * fixed usage of healthly_state_healthy * fixed _set_metadata_in_adata * remove kanton celltype annotation alltogeher (temporarily) [skip ci] * Third party annotation (#174) * added optional loading of further cell-wise annotation into Dataset.load() and included this in DatasetGroupDirectoryOriented also linked to sfaira_extensions. * allowed getting and setting of additional_annotation_keys in DatasetGroups and SuperGroups * added documentation of load_annotation into docs * only showing load_ontology_class_map missing file warning if obs key is set addresses parts of #156 * fixed sfaira streamlining * fixed streamlining * reoreder unit tests * moved load to function * add more obs keys to adata [skip ci] * fix bug in adata_fields [skip ci] * updated rst * improved dat rst * add cell type adata_fields [skip ci] * updating dataloading rst * move class-based dataloading to a separate file * fix cleaning of empty dataset groups * fix docstring [skip CI] * fix _celltype_universe property [skip CI] * improved unit test * fixed unit test for sfaira contributed Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> --- docs/adding_dataset_classes.rst | 114 +++++ docs/adding_datasets.rst | 359 +++++++++------ docs/index.rst | 3 +- docs/{data.rst => using_data.rst} | 4 +- sfaira/consts/adata_fields.py | 417 +++++++----------- sfaira/consts/ontologies.py | 6 +- sfaira/data/base/dataset.py | 270 ++++++++---- sfaira/data/base/dataset_group.py | 88 +++- ...man_lung_2020_10xsequencing_miller_001.tsv | 8 +- ...ver_2019_10xsequencing_ramachandran_001.py | 31 +- ...rain_2019_10x3v2sequencing_kanton_001.yaml | 2 +- .../human_x_2020_microwellseq_han_x.py | 9 +- .../mouse_x_2019_10xsequencing_hove_001.py | 3 +- .../mouse_x_2019_10xsequencing_hove_001.tsv | 1 + sfaira/data/utils_scripts/create_meta.py | 2 +- .../utils_scripts/create_meta_and_cache.py | 3 +- sfaira/unit_tests/data/test_dataset.py | 32 +- .../unit_tests/data_contribution/conftest.py | 15 - .../data_contribution/test_data_template.py | 156 +++---- sfaira/versions/metadata/base.py | 4 +- 20 files changed, 894 insertions(+), 633 deletions(-) create mode 100644 docs/adding_dataset_classes.rst rename docs/{data.rst => using_data.rst} (99%) delete mode 100644 sfaira/unit_tests/data_contribution/conftest.py diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst new file mode 100644 index 000000000..07e65c654 --- /dev/null +++ b/docs/adding_dataset_classes.rst @@ -0,0 +1,114 @@ +The class-based data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. +In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. + +1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. + +.. code-block:: python + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files + + self.age = x # (*, optional) age of sample + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_age = x # (optional, see above, do not provide if .age is provided) + self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) + self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cellontology_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and + # directory as the python file of this data loader (see below). + + +2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. + +.. code-block:: python + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad + return adata + +In summary, a python file for a mouse lung data set could look like this: + +.. code-block:: python + + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.author = "me" + self.doi = ["my preprint", "my peer-reviewed publication"] + self.download_url_data = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.assay_sc = "smart-seq2" + self.year = "2020" + self.sample_source = "primary_tissue" + + self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) + return adata diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index 8a2ccc1bf..53bf24ee4 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -6,13 +6,15 @@ This process requires a couple of steps as outlined in the following sections. 1. Write a dataloader as outlined below. 2. Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as required by your data loader. + If the raw file your dataloader uses as input is publically available, sfaira will be able to automatically download the raw file, so no manual copying is required. + For the purpose of testing the data loader with a unit test, you can also copy the data into `sfaira/unit_tests/template_data/` as a DOI structured folder if you do not want to maintain a data collection on the machine that you are testing on. 3. You can contribute the data loader to public sfaira, we do not manage data upload though. During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. The following sections will first describe the underlying design principles of sfaira dataloaders and then explain how to interactively create, validate and test dataloaders. -Use data loaders on existing data repository +Use data loaders with an existing data repository -------------------------------------------- You only want to use data sets with existing data loaders and have adapted your directory structure as above? @@ -50,71 +52,69 @@ preprint and publication DOIs if both are available. We will also mention public The data loader python file ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Each data set (organsism, organ, assay_sc, optionally also batches) has its own data loader class. Each such class is -in a separate file and inherits from a base class that contains most functionalities. Accordingly, the data loader class -looks very similar in parts to a cell in a juypter notebook that performs data loading. The core features that must be included are: +Each data set, ie a single file or a set of files with similar structures, has its own data loader function and a yaml +files that describes its meta data. +Alternatively to the (preffered) yaml file, meta data can be also be described in a constructor of a class in the same python file +as the loading function. For a documentation on writing a python class-based dataloader, please see here: https://github.com/theislab/sfaira/blob/dev/docs/adding_dataset_classes.rst +A detailed description of all meta data is given at the bottom of this page. -1. A constructor of the following form that can be used to interact with the data set +1. A yaml file or constructor of the following form that can be used to interact with the data set before it is loaded into memory: -.. code-block:: python - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # Data set meta data: You do not have to include all of these and can simply skip lines corresponding - # to attritbutes that you do not have access to. These are meta data on a sample level. - # The meta data attributes labeled with (*) may als be supplied per cell, see below, - # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - - self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). - - self.author = x # author (list) who sampled / created the data set - self.doi = x # doi of data set accompanying manuscript - - self.download_url_data = x # download website(s) of data files - self.download_url_meta = x # download website(s) of meta data files - - self.age = x # (*, optional) age of sample - self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - self.cell_line = x # (*, optional) cell line used (for cell culture samples) - self.dev_stage = x # (*, optional) developmental stage of organism - self.ethnicity = x # (*, optional) ethnicity of sample - self.healthy = x # (*, optional) whether sample represents a healthy organism - self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - self.organ = x # (*, optional) organ (anatomical structure) - self.organism = x # (*) species / organism - self.sample_source = x # (*) whether the sample came from primary tissue or cell culture - self.sex = x # (*, optional) sex - self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - self.year = x # year in which sample was acquired - - # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the - # anndata instance (specifically in .obs) after loading. - # You need to make sure this is loaded in the loading script)! - # See above for a description what these meta data attributes mean. - # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_age = x # (optional, see above, do not provide if .age is provided) - self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cellontology_original = x # (optional) - # This cell type annotation is free text but is mapped to an ontology via a .csv file with the same name and - # directory as the python file of this data loader (see below). +.. code-block:: yaml + + dataset_structure: + dataset_index: 1 + sample_fns: + dataset_wise: + author: + doi: + download_url_data: + download_url_meta: + normalization: + year: + dataset_or_observation_wise: + age: + age_obs_key: + assay_sc: + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: + bio_sample: + bio_sample_obs_key: + cell_line: + cell_line_obs_key: + development_stage: + development_stage_obs_key: + ethnicity: + ethnicity_obs_key: + healthy: + healthy_obs_key: + individual: + individual_obs_key: + organ: + organ_obs_key: + organism: + organism_obs_key: + sample_source: + sample_source_obs_key: + sex: + sex_obs_key: + state_exact: + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: + observation_wise: + cellontology_original_obs_key: + feature_wise: + var_ensembl_col: + var_symbol_col: + misc: + healthy_state_healthy: + meta: + version: "1.0" 2. A function called to load the data set into memory: @@ -126,44 +126,76 @@ directly into this sub directory. .. code-block:: python - def _load(self, fn=None): - # assuming that i uploaded an h5ad somewhere (in self.download) - if fn is None: - fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") - self.adata = anndata.read(fn) # loading instruction into .adata, use other ones if the data is not h5ad - # Some times, you need to load multiple files (e.g. counts and annotation), all of this code would be here. - - -In summary, a simply example data loader for a mouse lung data set could look like this: + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad + return adata + +In summary, a the dataloader for a mouse lung data set could look like this: + +.. code-block:: yaml + + dataset_structure: + dataset_index: 1 + sample_fns: + dataset_wise: + author: "me" + doi: + - "my preprint" + - "my peer-reviewed publication" + download_url_data: "my GEO upload" + download_url_meta: + normalization: "raw" + year: + dataset_or_observation_wise: + age: + age_obs_key: + assay_sc: "smart-seq2" + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: + bio_sample: + bio_sample_obs_key: + cell_line: + cell_line_obs_key: + development_stage: + development_stage_obs_key: + ethnicity: + ethnicity_obs_key: + healthy: + healthy_obs_key: + individual: + individual_obs_key: + organ: "lung" + organ_obs_key: + organism: "mouse" + organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: + sex: + sex_obs_key: + state_exact: + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: + observation_wise: + cellontology_original_obs_key: "louvain_named" + feature_wise: + var_ensembl_col: + var_symbol_col: + misc: + healthy_state_healthy: + meta: + version: "1.0" .. code-block:: python - class MyDataset(DatasetBase) - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.author = "me" - self.doi = "my preprint" - self.download_url_data = "my GEO upload" - self.normalisation = "raw" # because I uploaded raw counts, which is good practice! - self.organ = "lung" - self.organism = "mouse" - self.assay_sc = "smart-seq2" - self.year = "2020" - self.sample_source = "primary_tissue" - - self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here - - def _load(self, fn=None): - # assuming that i uploaded an h5ad somewhere (in self.download) - if fn is None: - fn = os.path.join(self.path, self.directory_formatted_doi, "my.h5ad") - self.adata = anndata.read(fn) + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) + return adata Data loaders can be added into a copy of the sfaira repository and can be used locally before they are contributed to @@ -173,26 +205,57 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. -Handling multiple data sources -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you have multiple data sets in a study which are all saved in separate files which come in similar formats: -You can subclass `DatasetBaseGroupLoadingManyFiles` instead of `DatasetBase` and proceed as usual, -only with adding `SAMPLE_FNS` in the data loader file name space, -which is a list of all file names addressed with this file. -You can then refer to an additional property of the Dataset class, `self.sample_fn` during loading -or when dynamically defining meta data in the constructor. -Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. - -If you have multiple data sets in a study which are all saved in one file: -You can subclass `DatasetBaseGroupLoadingOneFile` instead of `DatasetBase` and proceed as usual, -only with adding `SAMPLE_IDS` in the data loader file name space, -which is a list of all sample IDs addressed with this file. -You can then refer to an additional property of the Dataset class, `self.sample_id` during loading -or when dynamically defining meta data in the constructor. -Note that `self.sample_id` refers to a `self.adata.obs` column in the loaded data set, -this column has to be defined in `self.obs_key_sample`, which needs to be defined in the constructor. -Note that you can always add additional data loaders for further, less streamlined, data sets to such a study. +Loading third party annotation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some cases, the data set in question is already in the sfaira zoo but there is alternative (third party), cell-wise +annotation of the data. +This could be different cell type annotation for example. +The underlying data (count matrix and variable names) stay the same in these cases, and often, even some cell-wise +meta data are kept and only some are added or replaced. +Therefore, these cases do not require an additional `load()` function. +Instead, you can contribute `load_annotation_*()` functions into the `.py` file of the corresponding study. +You can chose an arbitrary suffix for the function but ideally one that identifies the source of this additional +annotation in a human readable manner at least to someone who is familiar with this data set. +Second you need to add this function into the dictionary `LOAD_ANNOTATION` in the `.py` file, with the suffix as a key. +If this dictionary does not exist yet, you need to add it into the `.py` file with this function as its sole entry. +Here an example of a `.py` file with additional annotation: + +.. code-block:: python + + def load(data_dir, sample_fn, **kwargs): + pass + + def load_annotation_meta_study_x(data_dir, sample_fn, **kwargs): + # Read a tabular file indexed with the observation names used in the adata used in load(). + pass + + def load_annotation_meta_study_y(data_dir, sample_fn, **kwargs): + # Read a tabular file indexed with the observation names used in the adata used in load(). + pass + + LOAD_ANNOTATION = { + "meta_study_x": load_annotation_meta_study_x, + "meta_study_y": load_annotation_meta_study_y, + } + + +The table returned by `load_annotation_meta_study_x` needs to be indexed with the observation names used in `.adata`, +the object generated in `load()`. +If `load_annotation_meta_study_x` contains a subset of the observations defined in `load()`, +and this alternative annotation is chosen, +`.adata` is subsetted to these observations during loading. + +You can also add functions in the `.py` file in the same DOI-based module in sfaira_extensions if you want to keep this +additional annotation private. +For this to work with a public data loader, you need nothing more than the `.py` file with this `load_annotation_*()` +function and the `LOAD_ANNOTATION` of these private functions in sfaira_extensions. + +To access additional annotation during loading, use the setter functions `additional_annotation_key` on an instance of +either `Dataset`, `DatasetGroup` or `DatasetSuperGroup` to define data sets +for which you want to load additional annotation and which additional you want to load for these. +See also the docstrings of these functions for further details on how these can be set. + Creating dataloaders with the commandline interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -228,9 +291,9 @@ All tests must pass! If any of the tests fail please revisit your dataloader and Map cell type labels to ontology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The entries in `self.obs_key_cellontology_original` are free text but are mapped to an ontology via a .csv file with +The entries in `self.cellontology_original_obs_key` are free text but are mapped to an ontology via a .tsv file with the same name and directory as the python file in which the data loader is located. -This .csv contains two columns with one row for each unique cell type label. +This .tsv contains two columns with one row for each unique cell type label. The free text identifiers in the first column "source", and the corresponding ontology term in the second column "target". You can write this file entirely from scratch. @@ -270,35 +333,45 @@ Metadata management We constrain meta data by ontologies where possible. The current restrictions are: - - .age: unconstrained string, try using units of years for human, units of months for mice and units of days for - cell culture samples - - .dev_stage: unconstrained string, this will constrained to an ontology in the future, - try choosing from HSAPDV (http://www.obofoundry.org/ontology/hsapdv.html) for human - or from MMUSDEV (http://www.obofoundry.org/ontology/mmusdv.html) for mouse - - .cell_line: unconstrained string, this will be constrained to an ontology later. try choosing from cellosaurus - cell line database (https://web.expasy.org/cellosaurus/) - - .ethnicity: unconstrained string, this will constrained to an ontology in the future, - try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) + - .age: unconstrained string + Use + - units of years for humans, + - the E{day} nomenclature for mouse embryos + - the P{day} nomenclature for young post-natal mice + - units of weeks for mice older than one week and + - units of days for cell culture samples. + - .assay_sc: EFO-constrained string + Choose a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false + - .assay_differentiation: unconstrained string + Try to provide a base differentiation protocol (eg. "Lancaster, 2014") as well as any amendments to the original protocol. + - .assay_type_differentiation: constrained string, {"guided", "unguided"} + For cell-culture samples: Whether a guided (patterned) differentiation protocol was used in the experiment. + - .developmental_stage: unconstrained string + This will constrained to an ontology in the future, + try choosing from HSAPDV (https://www.ebi.ac.uk/ols/ontologies/hsapdv) for human + or from MMUSDEV (https://www.ebi.ac.uk/ols/ontologies/mmusdv) for mouse. + - .cell_line: cellosaurus-constrained string + Cell line name from the cellosaurus cell line database (https://web.expasy.org/cellosaurus/) + - .ethnicity: unconstrained string, this will constrained to an ontology in the future. + Try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) - .healthy: bool + Whether the sample is from healthy tissue ({True, False}). - .normalisation: unconstrained string, this will constrained to an ontology in the future, - try using {"raw", "scaled"} - - .organ: unconstrained string, this will constrained to an ontology in the future, try to choose - term from Uberon (http://www.obofoundry.org/ontology/ehdaa2.html) - or from EHDAA2 (http://www.obofoundry.org/ontology/ehdaa2.html) for human - or from EMAPA (http://www.obofoundry.org/ontology/emapa.html) for mouse - - .organism: constrained string, {"mouse", "human"}. In the future, we will use NCBITAXON - (http://www.obofoundry.org/ontology/ncbitaxon.html). - - .assay_sc: unconstrained string, this will constrained to an experimental protocol ontology in the future, - try choosing a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false - - .assay_differentiation: unconstrained string, try to provide a base differentiation protocol (eg. Lancaster, 2014) - as well as any amendments to the original protocol - - .assay_type_differentiation: constrained string, {"guided", "unguided"} - - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "cancer"} - - .sex: constrained string, {"female", "male"} + Try to use {"raw", "scaled"}. + - .organ: UBERON-constrained string + The anatomic location of the sample (https://www.ebi.ac.uk/ols/ontologies/uberon). + - .organism: constrained string, {"mouse", "human"}. + The organism from which the sample originates. + In the future, we will use NCBITAXON (https://www.ebi.ac.uk/ols/ontologies/ncbitaxon). + - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "tumor"} + Which cellular system the sample was derived from. + - .sex: constrained string, {"female", "male", None} + Sex of the individual sampled. - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity and use units: `[1g]` - .year: must be an integer year, e.g. 2020 + Year in which sample was first described (e.g. pre-print publication). Follow this issue_ for details on upcoming ontology integrations. diff --git a/docs/index.rst b/docs/index.rst index c6c357a76..157321b12 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,8 +38,9 @@ Latest additions api/index commandline_interface tutorials - data + using_data adding_datasets + adding_dataset_classes models ecosystem roadmap diff --git a/docs/data.rst b/docs/using_data.rst similarity index 99% rename from docs/data.rst rename to docs/using_data.rst index 0999d5c04..1d8bbb944 100644 --- a/docs/data.rst +++ b/docs/using_data.rst @@ -1,5 +1,5 @@ -Data -====== +Using Data +========== .. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png :width: 600px diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 697e22dcc..a696cbc3a 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -10,175 +10,40 @@ class AdataIds: """ Base class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ - _age: str - _annotated: str - _assay_sc: str - _assay_differentiation: str - _assay_type_differentiation: str - _author: str - _bio_sample: str - _cell_line: str - _cell_types_original: str - _cell_ontology_class: str - _cell_ontology_id: str - _development_stage: str - _doi: str - _download_url_data: str - _download_url_meta: str - _dataset: str - _dataset_group: str - _ethnicity: str - _gene_id_ensembl: str - _gene_id_index: str - _gene_id_names: str - _healthy: str - _id: str - _individual: str - _ncells: str - _normalization: str - _organ: str - _organism: str - _sample_source: str - _sex: str - _state_exact: str - _tech_sample: str - _year: str - - @property - def age(self) -> str: - return self._age - - @property - def annotated(self) -> str: - return self._annotated - - @property - def assay_sc(self) -> str: - return self._assay_sc - - @property - def assay_differentiation(self) -> str: - return self._assay_differentiation - - @property - def assay_type_differentiation(self) -> str: - return self._assay_type_differentiation - - @property - def author(self) -> str: - return self._author - - @property - def bio_sample(self) -> str: - return self._bio_sample - - @property - def cell_line(self) -> str: - return self._cell_line - - @property - def cell_types_original(self) -> str: - return self._cell_types_original - - @property - def cell_ontology_class(self) -> str: - return self._cell_ontology_class - - @property - def cell_ontology_id(self) -> str: - return self._cell_ontology_id - - @property - def dataset(self) -> str: - return self._dataset - - @property - def dataset_group(self) -> str: - return self._dataset_group - - @property - def development_stage(self) -> str: - return self._development_stage - - @property - def doi(self) -> str: - return self._doi - - @property - def download_url_data(self) -> str: - return self._download_url_data - - @property - def download_url_meta(self) -> str: - return self._download_url_meta - - @property - def ethnicity(self) -> str: - return self._ethnicity - - @property - def gene_id_ensembl(self) -> str: - return self._gene_id_ensembl - - @property - def gene_id_index(self) -> str: - return self._gene_id_index - - @gene_id_index.setter - def gene_id_index(self, x: str): - self._gene_id_index = x - - @property - def gene_id_names(self) -> str: - return self._gene_id_names - - @property - def healthy(self) -> str: - return self._healthy - - @property - def id(self) -> str: - return self._id - - @property - def individual(self) -> str: - return self._individual - - @property - def ncells(self) -> str: - return self._ncells - - @property - def normalization(self) -> str: - return self._normalization - - @property - def organ(self) -> str: - return self._organ - - @property - def organism(self) -> str: - return self._organism - - @property - def sample_source(self) -> str: - return self._sample_source - - @property - def sex(self) -> str: - return self._sex - - @property - def state_exact(self) -> str: - return self._state_exact - - @property - def tech_sample(self) -> str: - return self._tech_sample - - @property - def year(self) -> str: - return self._year + age: str + annotated: str + assay_sc: str + author: str + cell_types_original: str + cell_ontology_class: str + cell_ontology_id: str + development_stage: str + disease: str + doi: str + download_url_data: str + download_url_meta: str + dataset: str + dataset_group: str + ethnicity: str + gene_id_ensembl: str + gene_id_index: str + gene_id_names: str + healthy: str + id: str + individual: str + ncells: str + normalization: str + organ: str + organism: str + sample_source: str + sex: str + state_exact: str + tech_sample: str + year: str + + obs_keys: List[str] + var_keys: List[str] + uns_keys: List[str] class AdataIdsSfaira(AdataIds): @@ -187,45 +52,51 @@ class AdataIdsSfaira(AdataIds): dataloader objects. """ - def __init__(self): - self._annotated = "annotated" - self._assay_sc = "assay_sc" - self._assay_differentiation = "assay_differentiation" - self._assay_type_differentiation = "assay_type_differentiation" - self._author = "author" - self._bio_sample = "bio_sample" - self._cell_line = "cell_line" - self._cell_types_original = "cell_types_original" - self._cell_ontology_class = "cell_ontology_class" - self._cell_ontology_id = "cell_ontology_id" - self._doi = "doi" - self._dataset = "dataset" - self._dataset_group = "dataset_group" - self._download_url_data = "download_url_data" - self._download_url_meta = "download_url_meta" - self._gene_id_ensembl = "ensembl" - self._gene_id_index = "ensembl" - self._gene_id_names = "names" - self._healthy = "healthy" - self._id = "id" - self._individual = "individual" - self._ncells = "ncells" - self._normalization = "normalization" - self._organ = "organ" - self._organism = "organism" - self._sample_source = "sample_source" - self._tech_sample = "bio_sample" - self._year = "year" - - self._age = "age" - self._development_stage = "development_stage" - self._ethnicity = "ethnicity" - self._sex = "sex" - self._state_exact = "state_exact" + assay_differentiation: str + assay_type_differentiation: str + bio_sample: str + cell_line: str - self._load_raw = "load_raw" - self._mapped_features = "mapped_features" - self._remove_gene_version = "remove_gene_version" + def __init__(self): + self.annotated = "annotated" + self.assay_sc = "assay_sc" + self.assay_differentiation = "assay_differentiation" + self.assay_type_differentiation = "assay_type_differentiation" + self.author = "author" + self.bio_sample = "bio_sample" + self.cell_line = "cell_line" + self.cell_types_original = "cell_types_original" + self.cell_ontology_class = "cell_ontology_class" + self.cell_ontology_id = "cell_ontology_id" + self.disease = "disease" + self.doi = "doi" + self.dataset = "dataset" + self.dataset_group = "dataset_group" + self.download_url_data = "download_url_data" + self.download_url_meta = "download_url_meta" + self.gene_id_ensembl = "ensembl" + self.gene_id_index = "ensembl" + self.gene_id_names = "names" + self.healthy = "healthy" + self.id = "id" + self.individual = "individual" + self.ncells = "ncells" + self.normalization = "normalization" + self.organ = "organ" + self.organism = "organism" + self.sample_source = "sample_source" + self.tech_sample = "tech_sample" + self.year = "year" + + self.age = "age" + self.development_stage = "development_stage" + self.ethnicity = "ethnicity" + self.sex = "sex" + self.state_exact = "state_exact" + + self.load_raw = "load_raw" + self.mapped_features = "mapped_features" + self.remove_gene_version = "remove_gene_version" self.classmap_source_key = "source" self.classmap_target_key = "target" @@ -234,70 +105,102 @@ def __init__(self): self.unknown_celltype_identifier = "UNKNOWN" self.not_a_cell_celltype_identifier = "NOT_A_CELL" - @property - def load_raw(self) -> str: - return self._load_raw - - @property - def mapped_features(self) -> str: - return self._mapped_features - - @property - def remove_gene_version(self) -> str: - return self._remove_gene_version + self.obs_keys = [ + "age", + "assay_sc", + "assay_differentiation", + "assay_type_differentiation", + "bio_sample", + "cell_line", + "cell_types_original", + "cell_ontology_class", + "cell_ontology_id", + "development_stage", + "ethnicity", + "healthy", + "individual", + "organ", + "organism", + "sex", + "state_exact", + "sample_source", + "tech_sample", + ] + self.var_keys = [ + "gene_id_ensembl", + "gene_id_names", + ] + self.uns_keys = [ + "annotated", + "author", + "doi", + "download_url_data", + "download_url_meta", + "id", + "mapped_features", + "normalization", + "year", + ] class AdataIdsCellxgene(AdataIds): """ - Class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns in cellxgene + Class of constant field names of anndata.AnnData object entries", such as .uns keys and .obs columns in cellxgene objects. """ - _author_names: str - _disease_state_healthy: str + disease_state_healthy: str accepted_file_names: List[str] def __init__(self): - self._assay_sc = "assay" - self._cell_types_original = "free_annotation" - self._cell_ontology_class = "cell_type" - self._cell_ontology_id = "cell_type_ontology_term_id" - self._doi = "" # TODO - self._dataset = "dataset" - self._dataset_group = "dataset_group" - self._download_url_data = "" # TODO - self._download_url_meta = "" # never necessary as we interface via anndata objects - self._gene_id_ensembl = "" # TODO - self._gene_id_index = "ensembl" - self._gene_id_names = "" # TODO - self._has_celltypes = "" # TODO - self._healthy = None # is inferred from _disease - self._id = "" # TODO - self._ncells = "ncells" - self._normalization = "" # is always "raw" - self._organ = "" # TODO - self._organism = "organism" - self._year = "" # TODO - - self._age = "age" - self._author = "contributors" - self._development_stage = "development_stage" - self._ethnicity = "ethnicity" - self._sex = "sex" - self._state_exact = "disease" + self.assay_sc = "assay" + self.cell_types_original = "free_annotation" + self.cell_ontology_class = "cell_type" + self.cell_ontology_id = "cell_type_ontology_term_id" + self.doi = "doi" + self.disease = "disease" + self.gene_id_names = "names" + self.id = "id" + self.ncells = "ncells" + self.normalization = "" # is always "raw" + self.organ = "organ" + self.organism = "organism" + self.year = "year" + + self.age = "age" + self.author = "contributors" + self.development_stage = "development_stage" + self.ethnicity = "ethnicity" + self.sex = "sex" + self.state_exact = "disease" # selected element entries used for parsing: - self._disease_state_healthy = "normal" - self._author_names = "names" + self.disease_state_healthy = "normal" + self.author_names = "names" # accepted file names self.accepted_file_names = [ "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad", ] - @property - def author_names(self) -> str: - return self._author_names - - @property - def disease_state_healthy(self) -> str: - return self._disease_state_healthy + self.obs_keys = [ + "age", + "development_stage", + "disease", + "ethnicity", + "healthy", + "individual", + "organ", + "organism", + "sex", + "tech_sample", + ] + self.var_keys = [ + "gene_id_names", + ] + self.uns_keys = [ + "author", + "doi", + "id", + "normalization", + "year", + ] diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index bdacbfe34..740a2c468 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -9,8 +9,10 @@ class OntologyContainerSfaira: def __init__(self): self.age = None - self.assay_sc = OntologySinglecellLibraryConstruction() + self.annotated = OntologyList(terms=[True, False]) + self.author = None self.assay_differentiation = None + self.assay_sc = OntologySinglecellLibraryConstruction() self.assay_type_differentiation = OntologyList(terms=["guided", "unguided"]) self.cell_line = OntologyCellosaurus() self.cellontology_class = "v2021-02-01" @@ -18,7 +20,7 @@ def __init__(self): self.developmental_stage = None self.doi = None self.ethnicity = None - self.healthy = [True, False] + self.healthy = OntologyList(terms=[True, False]) self.id = None self.normalization = None self.organ = OntologyUberon() diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index d780d2cdb..9a9e77dea 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -49,7 +49,9 @@ def is_child( of this parent node. :return: Whether attempted term is sub-term of allowed term in ontology """ - if ontology is not None: + if ontology_parent is None and ontology is None: + return True + else: if isinstance(ontology, Ontology): if ontology_parent is None: return ontology.is_node(query) @@ -59,8 +61,6 @@ def is_child( return query == ontology_parent else: raise ValueError(f"did not recognize ontology type {type(ontology)}") - else: - return True class DatasetBase(abc.ABC): @@ -131,17 +131,41 @@ class DatasetBase(abc.ABC): sample_fn: Union[None, str] _sample_fns: Union[None, List[str]] + _additional_annotation_key: Union[None, str] + def __init__( self, data_path: Union[str, None] = None, meta_path: Union[str, None] = None, cache_path: Union[str, None] = None, load_func=None, + dict_load_func_annotation=None, yaml_path: Union[str, None] = None, sample_fn: Union[str, None] = None, sample_fns: Union[List[str], None] = None, + additional_annotation_key: Union[str, None] = None, **kwargs ): + """ + + :param data_path: + :param meta_path: + :param cache_path: + :param load_func: Function to load data from disk into memory. + + Signature: load(data_dir, sample_fn, **kwargs) + :param dict_load_func_annotation: Dictionary of functions to load additional observatino-wise annotation. The + functions in the values of the dictionary can be selected via self.additional_annotation_key which needs + to correspond to a key of the dictionary. + + Signature: Dict[str, load_annotation(data_dir, sample_fn, additional_annotation_key, **kwargs)] + :param yaml_path: + :param sample_fn: + :param sample_fns: + :param additional_annotation_key: Key used by dict_load_func_annotation to identify which additional annotation + is to be loaded. + :param kwargs: + """ self._adata_ids_sfaira = AdataIdsSfaira() self.ontology_container_sfaira = OCS # Using a pre-instantiated version of this yields drastic speed-ups. @@ -227,6 +251,8 @@ def __init__( self.set_dataset_id(idx=yaml_vals["meta"]["dataset_index"]) self.load_func = load_func + self.dict_load_func_annotation = dict_load_func_annotation + self._additional_annotation_key = additional_annotation_key @property def _directory_formatted_id(self) -> str: @@ -355,6 +381,20 @@ def _load_cached( :return: """ + def _assembly_wrapper(): + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + # Enable loading of additional annotation, e.g. secondary cell type annotation + # The additional annotation `obs2 needs to be on a subset of the original annotation `self.adata.obs`. + if self.dict_load_func_annotation is not None: + obs2 = self.dict_load_func_annotation[self.additional_annotation_key]( + data_dir=self.data_dir, sample_fn=self.sample_fn) + assert np.all([x in self.adata.obs.index for x in obs2.index]), \ + "index mismatch between additional annotation and original" + self.adata = self.adata[obs2.index, :] + # Overwrite annotation + for k, v in obs2.items(): + self.adata.obs[k] = v + def _cached_reading(filename): if filename is not None: if os.path.exists(filename): @@ -372,10 +412,10 @@ def _cached_writing(filename): self.adata.write_h5ad(filename) if load_raw and allow_caching: - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + _assembly_wrapper() _cached_writing(self.cache_fn) elif load_raw and not allow_caching: - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + _assembly_wrapper() elif not load_raw and allow_caching: _cached_reading(self.cache_fn) _cached_writing(self.cache_fn) @@ -388,6 +428,7 @@ def load( match_to_reference: Union[str, bool, None] = None, load_raw: bool = False, allow_caching: bool = True, + set_metadata: bool = True, ): if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" @@ -417,8 +458,9 @@ def load( # Run data set-specific loading script: self._load_cached(load_raw=load_raw, allow_caching=allow_caching) - # Set data-specific meta data in .adata: - self._set_metadata_in_adata(adata_ids=self._adata_ids_sfaira) + if set_metadata: + # Set data-specific meta data in .adata: + self._set_metadata_in_adata() # Set loading hyper-parameter-specific meta data: self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference @@ -467,7 +509,7 @@ def _convert_and_set_var_names( axis='columns' ) # If only symbol or ensembl was supplied, the other one is inferred from a genome mapping dictionary. - if not ensembl_col and match_to_reference: + if not ensembl_col and not (isinstance(match_to_reference, bool) and not match_to_reference): id_dict = self.genome_container.names_to_id_dict id_strip_dict = self.genome_container.strippednames_to_id_dict # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, @@ -483,7 +525,7 @@ def _convert_and_set_var_names( ensids.append('n/a') self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = ensids - if not symbol_col and match_to_reference: + if not symbol_col and not (isinstance(match_to_reference, bool) and not match_to_reference): id_dict = self.genome_container.id_to_names_dict self.adata.var[self._adata_ids_sfaira.gene_id_names] = [ id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' @@ -512,7 +554,7 @@ def _collapse_genes(self, remove_gene_version): # Collapse if necessary: self.adata = collapse_matrix(adata=self.adata) - self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = self.adata.var_names + self.adata.var[self._adata_ids_sfaira.gene_id_index] = self.adata.var_names self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values def _match_features_to_reference(self): @@ -564,48 +606,47 @@ def _match_features_to_reference(self): uns=self.adata.uns ) - def _set_metadata_in_adata(self, adata_ids: AdataIds): + def _set_metadata_in_adata(self): """ Copy meta data from dataset class in .anndata. :return: """ # Set data set-wide attributes (.uns): - self.adata.uns[adata_ids.annotated] = self.annotated - self.adata.uns[adata_ids.author] = self.author - self.adata.uns[adata_ids.doi] = self.doi - self.adata.uns[adata_ids.download_url_data] = self.download_url_data - self.adata.uns[adata_ids.download_url_meta] = self.download_url_meta - self.adata.uns[adata_ids.id] = self.id - self.adata.uns[adata_ids.normalization] = self.normalization - self.adata.uns[adata_ids.year] = self.year + self.adata.uns[self._adata_ids_sfaira.annotated] = self.annotated + self.adata.uns[self._adata_ids_sfaira.author] = self.author + self.adata.uns[self._adata_ids_sfaira.doi] = self.doi + self.adata.uns[self._adata_ids_sfaira.download_url_data] = self.download_url_data + self.adata.uns[self._adata_ids_sfaira.download_url_meta] = self.download_url_meta + self.adata.uns[self._adata_ids_sfaira.id] = self.id + self.adata.uns[self._adata_ids_sfaira.normalization] = self.normalization + self.adata.uns[self._adata_ids_sfaira.year] = self.year # Set cell-wise or data set-wide attributes (.uns / .obs): # These are saved in .uns if they are data set wide to save memory. for x, y, z, v in ( - [self.age, adata_ids.age, self.age_obs_key, self.ontology_container_sfaira.age], - [self.assay_sc, adata_ids.assay_sc, self.assay_sc_obs_key, self.ontology_container_sfaira.assay_sc], - [self.assay_differentiation, adata_ids.assay_differentiation, self.assay_differentiation_obs_key, + [self.age, self._adata_ids_sfaira.age, self.age_obs_key, self.ontology_container_sfaira.age], + [self.assay_sc, self._adata_ids_sfaira.assay_sc, self.assay_sc_obs_key, self.ontology_container_sfaira.assay_sc], + [self.assay_differentiation, self._adata_ids_sfaira.assay_differentiation, self.assay_differentiation_obs_key, self.ontology_container_sfaira.assay_differentiation], - [self.assay_type_differentiation, adata_ids.assay_type_differentiation, + [self.assay_type_differentiation, self._adata_ids_sfaira.assay_type_differentiation, self.assay_type_differentiation_obs_key, self.ontology_container_sfaira.assay_type_differentiation], - [self.bio_sample, adata_ids.bio_sample, self.bio_sample_obs_key, None], - [self.cell_line, adata_ids.cell_line, self.cell_line_obs_key, + [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key, None], + [self.cell_line, self._adata_ids_sfaira.cell_line, self.cell_line_obs_key, self.ontology_container_sfaira.cell_line], - [self.development_stage, adata_ids.development_stage, self.development_stage_obs_key, + [self.development_stage, self._adata_ids_sfaira.development_stage, self.development_stage_obs_key, self.ontology_container_sfaira.developmental_stage], - [self.ethnicity, adata_ids.ethnicity, self.ethnicity_obs_key, + [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.ethnicity_obs_key, self.ontology_container_sfaira.ethnicity], - [self.healthy, adata_ids.healthy, self.healthy_obs_key, self.ontology_container_sfaira.healthy], - [self.individual, adata_ids.individual, self.individual_obs_key, None], - [self.organ, adata_ids.organ, self.organ_obs_key, self.ontology_container_sfaira.organism], - [self.organism, adata_ids.organism, self.organism_obs_key, + [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key, None], + [self.organ, self._adata_ids_sfaira.organ, self.organ_obs_key, self.ontology_container_sfaira.organ], + [self.organism, self._adata_ids_sfaira.organism, self.organism_obs_key, self.ontology_container_sfaira.organism], - [self.sample_source, adata_ids.sample_source, self.sample_source_obs_key, + [self.sample_source, self._adata_ids_sfaira.sample_source, self.sample_source_obs_key, self.ontology_container_sfaira.sample_source], - [self.sex, adata_ids.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], - [self.state_exact, adata_ids.state_exact, self.state_exact_obs_key, None], - [self.tech_sample, adata_ids.tech_sample, self.tech_sample_obs_key, None], + [self.sex, self._adata_ids_sfaira.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], + [self.state_exact, self._adata_ids_sfaira.state_exact, self.state_exact_obs_key, None], + [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key, None], ): if x is None and z is None: self.adata.uns[y] = None @@ -626,10 +667,45 @@ def _set_metadata_in_adata(self, adata_ids: AdataIds): self.adata.uns[y] = UNS_STRING_META_IN_OBS # Remove potential pd.Categorical formatting: self._value_protection( - attr="obs", allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) + attr=y, allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) self.adata.obs[y] = self.adata.obs[z].values.tolist() else: assert False, "switch option should not occur" + # Load boolean labels: + for x, y, z, v, w in ( + [self.healthy, self._adata_ids_sfaira.healthy, self.healthy_obs_key, self.ontology_container_sfaira.healthy, + self.healthy_state_healthy], + ): + if x is None and z is None: + self.adata.uns[y] = None + elif x is not None and z is None: + # Attribute supplied per data set: Write into .uns. + if w is None: + self.adata.uns[y] = x + else: + self.adata.uns[y] = x == w + elif z is not None: + # Attribute supplied per cell: Write into .obs. + # Search for direct match of the sought-after column name or for attribute specific obs key. + if z not in self.adata.obs.keys(): + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + self.adata.uns[y] = None + print(f"WARNING: attribute {y} of data set {self.id} was not found in column {z}") # debugging + else: + # Include flag in .uns that this attribute is in .obs: + self.adata.uns[y] = UNS_STRING_META_IN_OBS + # Remove potential pd.Categorical formatting: + label_y = self.adata.obs[z].values + # Use reference string to establish equality if available: + if w is not None: + label_y = label_y == w + self._value_protection( + attr=y, allowed=v, attempted=np.unique(label_y).tolist()) + self.adata.obs[y] = label_y.tolist() + else: + assert False, "switch option should not occur" # Set cell-wise attributes (.obs): # None so far other than celltypes. # Set cell types: @@ -657,7 +733,6 @@ def streamline(self, format: str = "sfaira", clean: bool = False): adata_fields = AdataIdsCellxgene() else: raise ValueError(f"did not recognize format {format}") - self._set_metadata_in_adata(adata_ids=adata_fields) if clean: if self.adata.varm is not None: del self.adata.varm @@ -669,36 +744,22 @@ def streamline(self, format: str = "sfaira", clean: bool = False): del self.adata.obsp # Only retain target elements in adata.uns: self.adata.uns = dict([ - (k, v) for k, v in self.adata.uns.items() if k in [ - adata_fields.annotated, - adata_fields.author, - adata_fields.doi, - adata_fields.download_url_data, - adata_fields.download_url_meta, - adata_fields.id, - adata_fields.normalization, - adata_fields.year, - ] + (getattr(adata_fields, k), self.adata.uns[getattr(self._adata_ids_sfaira, k)]) + if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() else None + for k in adata_fields.uns_keys ]) # Only retain target elements in adata.var: - self.adata.obs = self.adata.var[[ - adata_fields.gene_id_names, - adata_fields.gene_id_ensembl, - ]] + self.adata.var = pd.DataFrame(dict([ + (getattr(adata_fields, k), self.adata.var[getattr(self._adata_ids_sfaira, k)]) + for k in adata_fields.var_keys + if getattr(self._adata_ids_sfaira, k) in self.adata.var.keys() + ])) # Only retain target columns in adata.obs: - self.adata.obs = self.adata.obs.loc[:, [ - adata_fields.age, - adata_fields.bio_sample, - adata_fields.development_stage, - adata_fields.ethnicity, - adata_fields.healthy, - adata_fields.individual, - adata_fields.organ, - adata_fields.organism, - adata_fields.sex, - adata_fields.state_exact, - adata_fields.tech_sample, - ]] + self.adata.obs = pd.DataFrame(dict([ + (getattr(adata_fields, k), self.adata.obs[getattr(self._adata_ids_sfaira, k)]) + for k in adata_fields.obs_keys + if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() + ])) def load_tobacked( self, @@ -811,7 +872,7 @@ def write_ontology_class_map( """ Load class maps of free text cell types to ontology classes. - :param fn: File name of csv to load class maps from. + :param fn: File name of tsv to write class maps to. :param protected_writing: Only write if file was not already found. :return: """ @@ -864,7 +925,8 @@ def load_ontology_class_map(self, fn): if os.path.exists(fn): self.cell_ontology_map = self._read_class_map(fn=fn) else: - warnings.warn(f"file {fn} does not exist") + if self.cellontology_original_obs_key is not None: + warnings.warn(f"file {fn} does not exist but cellontology_original_obs_key is given") def project_celltypes_to_ontology(self): """ @@ -885,11 +947,17 @@ def project_celltypes_to_ontology(self): # Validate mapped IDs based on ontology: # This aborts with a readable error if there was a target in the mapping file that does not match the # ontology. - self._value_protection( - attr="celltypes", - allowed=self.ontology_celltypes, - attempted=np.unique(labels_mapped).tolist() - ) + if self.cell_ontology_map is not None: + # This protection blocks progression in the unit test if not deactivated. + self._value_protection( + attr="celltypes", + allowed=self.ontology_celltypes, + attempted=[ + x for x in np.unique(labels_mapped).tolist() + if x != self._adata_ids_sfaira.unknown_celltype_identifier and + x != self._adata_ids_sfaira.not_a_cell_celltype_identifier + ] + ) self.adata.obs[self._adata_ids_sfaira.cell_ontology_class] = labels_mapped self.cellontology_class_obs_key = self._adata_ids_sfaira.cell_ontology_class self.adata.obs[self._adata_ids_sfaira.cell_types_original] = labels_original @@ -897,15 +965,17 @@ def project_celltypes_to_ontology(self): # The IDs are not read from a source file but inferred based on the class name. # TODO this could be changed in the future, this allows this function to be used both on cell type name mapping # files with and without the ID in the third column. - ids_mapped = [ - self.ontology_container_sfaira.cellontology_class.id_from_name(x) - if x not in [ - self._adata_ids_sfaira.unknown_celltype_identifier, - self._adata_ids_sfaira.not_a_cell_celltype_identifier - ] else x - for x in labels_mapped - ] - self.adata.obs[self._adata_ids_sfaira.cell_ontology_id] = ids_mapped + if self.cell_ontology_map is not None: + # This mapping blocks progression in the unit test if not deactivated. + ids_mapped = [ + self.ontology_container_sfaira.cellontology_class.id_from_name(x) + if x not in [ + self._adata_ids_sfaira.unknown_celltype_identifier, + self._adata_ids_sfaira.not_a_cell_celltype_identifier + ] else x + for x in labels_mapped + ] + self.adata.obs[self._adata_ids_sfaira.cell_ontology_id] = ids_mapped @property def citation(self): @@ -1053,10 +1123,18 @@ def clean(s): f"{clean(self._assay_sc)}_" \ f"{clean(author)}_" \ f"{idx}_" \ - f"{self.doi}" + f"{self.doi_main}" # Properties: + @property + def additional_annotation_key(self) -> Union[None, str]: + return self._additional_annotation_key + + @additional_annotation_key.setter + def additional_annotation_key(self, x: str): + self._additional_annotation_key = x + @property def age(self) -> Union[None, str]: if self._age is not None: @@ -1228,7 +1306,7 @@ def development_stage(self, x: str): self._development_stage = x @property - def doi(self) -> str: + def doi(self) -> Union[str, List[str]]: if self._doi is not None: return self._doi else: @@ -1239,13 +1317,21 @@ def doi(self) -> str: return self.meta[self._adata_ids_sfaira.doi] @doi.setter - def doi(self, x: str): + def doi(self, x: Union[str, List[str]]): self.__erasing_protection(attr="doi", val_old=self._doi, val_new=x) self._doi = x + @property + def doi_main(self) -> str: + """ + Yields the main DOI associated with the study, defined as the DOI that comes first in alphabetical order. + """ + return self.doi if isinstance(self.doi, str) else np.sort(self.doi)[0] + @property def directory_formatted_doi(self) -> str: - return "d" + "_".join("_".join("_".join(self.doi.split("/")).split(".")).split("-")) + # Chose first doi in list. + return "d" + "_".join("_".join("_".join(self.doi_main.split("/")).split(".")).split("-")) @property def download_url_data(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: @@ -1359,8 +1445,8 @@ def id(self) -> str: if self._id is not None: return self._id else: - raise AttributeError(f"Dataset ID was not set in dataloader in {self.doi}, please ensure the dataloader " - f"constructor of this dataset contains a call to self.set_dataset_id()") + raise AttributeError(f"Dataset ID was not set in dataloader in {self.doi_main}, please ensure the " + f"dataloader constructor of this dataset contains a call to self.set_dataset_id()") @id.setter def id(self, x: str): @@ -1503,8 +1589,6 @@ def cellontology_class_obs_key(self) -> str: @cellontology_class_obs_key.setter def cellontology_class_obs_key(self, x: str): - self.__erasing_protection(attr="cellontology_class_obs_key", val_old=self._cellontology_class_obs_key, - val_new=x) self._cellontology_class_obs_key = x @property @@ -1513,7 +1597,6 @@ def cellontology_id_obs_key(self) -> str: @cellontology_id_obs_key.setter def cellontology_id_obs_key(self, x: str): - self.__erasing_protection(attr="cellontology_id_obs_key", val_old=self._cellontology_id_obs_key, val_new=x) self._cellontology_id_obs_key = x @property @@ -1777,7 +1860,7 @@ def ontology_organ(self): @property def celltypes_universe(self): - if self._celltype_universe: + if self._celltype_universe is None: self._celltype_universe = CelltypeUniverse( cl=self.ontology_celltypes, uberon=self.ontology_container_sfaira.organ, @@ -1826,7 +1909,7 @@ def _value_protection( Does not check if allowed is None. - :param attr: Attribut to set. + :param attr: Attribute to set. :param allowed: Constraint for values of `attr`. Either ontology instance used to constrain entries, or list of allowed values. :param attempted: Value(s) to attempt to set in `attr`. @@ -1840,7 +1923,10 @@ def _value_protection( attempted = [attempted] for x in attempted: if not is_child(query=x, ontology=allowed): - raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {str(allowed)}") + if isinstance(allowed, Ontology): + # use node names instead of ontology object to produce a readable error message + allowed = allowed.node_names + raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {allowed}") def subset_cells(self, key, values): """ diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 746ba88d1..5a43ad1f5 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -27,13 +27,14 @@ def map_fn(inputs): :param inputs: :return: None if function ran, error report otherwise """ - ds, remove_gene_version, match_to_reference, load_raw, allow_caching, func, kwargs_func = inputs + ds, remove_gene_version, match_to_reference, load_raw, allow_caching, set_metadata, func, kwargs_func = inputs try: ds.load( remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, load_raw=load_raw, allow_caching=allow_caching, + set_metadata=set_metadata, ) if func is not None: x = func(ds, **kwargs_func) @@ -85,6 +86,7 @@ def load( match_to_reference: Union[str, bool, None] = None, load_raw: bool = False, allow_caching: bool = True, + set_metadata: bool = True, processes: int = 1, func=None, kwargs_func: Union[None, dict] = None, @@ -112,6 +114,7 @@ def func(dataset, **kwargs_func): match_to_reference, load_raw, allow_caching, + set_metadata, func, kwargs_func ] @@ -239,7 +242,7 @@ def write_ontology_class_map( """ Write cell type maps of free text cell types to ontology classes. - :param fn: File name of csv to load class maps from. + :param fn: File name of tsv to write class maps to. :param protected_writing: Only write if file was not already found. """ tab = [] @@ -454,6 +457,29 @@ def subset_cells(self, key, values: Union[str, List[str]]): if self.datasets[x].ncells == 0: # No observations (cells) left. del self.datasets[x] + @property + def additional_annotation_key(self) -> Dict[str, Union[None, str]]: + """" + Return dictionary of additional_annotation_key for each data set with ids as keys. + """ + return dict([ + (k, self.datasets[k].additional_annotation_key) + for k, v in self.datasets.items() + ]) + + @additional_annotation_key.setter + def additional_annotation_key(self, x: Dict[str, Union[None, str]]): + """ + Allows setting of additional_annotation_key in a subset of datasets identifed by keys in x. + + :param x: Dictionary with data set ids in keys and new _additional_annotation_key values to be setted in values. + Note that you can either add or change secondary annotation by setting a value to a string or remove it + by setting a value to None. + :return: + """ + for k, v in x.items(): + self.datasets[k].additional_annotation_key = v + class DatasetGroupDirectoryOriented(DatasetGroup): @@ -481,8 +507,10 @@ def __init__( datasets = [] self._cwd = os.path.dirname(file_base) dataset_module = str(self._cwd.split("/")[-1]) - loader_pydoc_path = "sfaira.data.dataloaders.loaders." if str(self._cwd.split("/")[-5]) == "sfaira" else \ - "sfaira_extension.data.dataloaders.loaders." + package_source = "sfaira" if str(self._cwd.split("/")[-5]) == "sfaira" else "sfairae" + loader_pydoc_path_sfaira = "sfaira.data.dataloaders.loaders." + loader_pydoc_path_sfairae = "sfaira_extension.data.dataloaders.loaders." + loader_pydoc_path = loader_pydoc_path_sfaira if package_source == "sfaira" else loader_pydoc_path_sfairae if "group.py" in os.listdir(self._cwd): DatasetGroupFound = pydoc.locate(loader_pydoc_path + dataset_module + ".group.DatasetGroup") dsg = DatasetGroupFound(data_path=data_path, meta_path=meta_path, cache_path=cache_path) @@ -499,6 +527,17 @@ def __init__( # - load(): Loading function that return anndata instance. # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles load_func = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".load") + load_func_annotation = \ + pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".LOAD_ANNOTATION") + # Also check sfaira_extension for additional load_func_annotation: + if package_source != "sfairae": + load_func_annotation_sfairae = pydoc.locate(loader_pydoc_path_sfairae + dataset_module + + "." + file_module + ".LOAD_ANNOTATION") + # LOAD_ANNOTATION is a dictionary so we can use update to extend it. + if load_func_annotation_sfairae is not None and load_func_annotation is not None: + load_func_annotation.update(load_func_annotation_sfairae) + elif load_func_annotation_sfairae is not None and load_func_annotation is None: + load_func_annotation = load_func_annotation_sfairae sample_fns = pydoc.locate(loader_pydoc_path + dataset_module + "." + file_module + ".SAMPLE_FNS") fn_yaml = os.path.join(self._cwd, file_module + ".yaml") @@ -521,6 +560,7 @@ def __init__( meta_path=meta_path, cache_path=cache_path, load_func=load_func, + dict_load_func_annotation=load_func_annotation, sample_fn=x, sample_fns=sample_fns if sample_fns != [None] else None, yaml_path=fn_yaml, @@ -534,6 +574,7 @@ def __init__( meta_path=meta_path, cache_path=cache_path, load_func=load_func, + load_func_annotation=load_func_annotation, sample_fn=x, sample_fns=sample_fns if sample_fns != [None] else None, yaml_path=fn_yaml, @@ -694,6 +735,7 @@ def load( match_to_reference: Union[str, bool, None] = None, remove_gene_version: bool = True, load_raw: bool = False, + set_metadata: bool = True, allow_caching: bool = True, processes: int = 1, ): @@ -716,6 +758,7 @@ def load( match_to_reference=match_to_reference, load_raw=load_raw, allow_caching=allow_caching, + set_metadata=set_metadata, processes=processes, ) @@ -887,7 +930,7 @@ def subset(self, key, values): """ for x in self.dataset_groups: x.subset(key=key, values=values) - self.dataset_groups = [x for x in self.dataset_groups if x.datasets is not None] # Delete empty DatasetGroups + self.dataset_groups = [x for x in self.dataset_groups if x.datasets] # Delete empty DatasetGroups def subset_cells(self, key, values: Union[str, List[str]]): """ @@ -945,3 +988,38 @@ def load_config(self, fn: Union[str, os.PathLike]): tab = pd.read_csv(fn, header=0, index_col=None, sep="\t") ids_keep = tab["id"].values self.subset(key="id", values=ids_keep) + + @property + def additional_annotation_key(self) -> List[Dict[str, Union[None, str]]]: + """" + Return list (by data set group) of dictionaries of additional_annotation_key for each data set with ids as keys. + """ + return [ + dict([ + (k, x.datasets[k].additional_annotation_key) + for k, v in x.datasets.items() + ]) for x in self.dataset_groups + ] + + @additional_annotation_key.setter + def additional_annotation_key(self, x: Dict[str, Union[None, str]]): + """ + Allows setting of additional_annotation_key in a subset of datasets identifed by keys in x. + + The input is not structured by DatasetGroups but only by ID, all groups are checked for matching IDs. + + :param x: Dictionary with data set ids in keys and new _additional_annotation_key values to be setted in values. + Note that you can either add or change secondary annotation by setting a value to a string or remove it + by setting a value to None. + :return: + """ + for k, v in x.items(): + counter = 0 + for x in self.dataset_groups: + if k in x.ids: + x.datasets[k].additional_annotation_key = v + counter += 1 + if counter == 0: + warnings.warn(f"did not data set matching ID {k}") + elif counter > 1: + warnings.warn(f"found more than one ({counter}) data set matching ID {k}") diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv index 339fd1671..35c66097c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.tsv @@ -8,10 +8,10 @@ Club-like secretory secretory cell CL:0000151 Endothelial endothelial cell CL:0000115 Epithelial epithelial cell of lung CL:0000082 Goblet-like secretory secretory cell CL:0000151 -Hematopoietic B Cells B cell CL:0000236 -Hematopoietic Macrophage macrophage CL:0000235 -Hematopoietic Natural Killer Cell natural killer cell CL:0000623 -Hematopoietic T Cells T cell CL:0000084 +Hematopoietic, B Cells B cell CL:0000236 +Hematopoietic, Macrophage macrophage CL:0000235 +Hematopoietic, Natural Killer Cell natural killer cell CL:0000623 +Hematopoietic, T Cells T cell CL:0000084 Immune leukocyte CL:0000738 Intermediate ciliated ciliated cell CL:0000064 Mesenchyme RSPO2+ mesenchymal cell CL:0008019 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index aaa209e7f..cccb6c596 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -28,18 +28,19 @@ def __init__(self, **kwargs): self.set_dataset_id(idx=1) - def _load(self): - import anndata2ri - from rpy2.robjects import r - - fn = os.path.join(self.data_dir, "tissue.rdata") - anndata2ri.activate() # TODO: remove global activation of anndata2ri and use localconverter once it's fixed - adata = r( - f"library(Seurat)\n" - f"load('{fn}')\n" - f"new_obj = CreateSeuratObject(counts = tissue@raw.data)\n" - f"new_obj@meta.data = tissue@meta.data\n" - f"as.SingleCellExperiment(new_obj)\n" - ) - - return adata + +def load(data_dir, **kwargs): + import anndata2ri + from rpy2.robjects import r + + fn = os.path.join(data_dir, "tissue.rdata") + anndata2ri.activate() # TODO: remove global activation of anndata2ri and use localconverter once it's fixed + adata = r( + f"library(Seurat)\n" + f"load('{fn}')\n" + f"new_obj = CreateSeuratObject(counts = tissue@raw.data)\n" + f"new_obj@meta.data = tissue@meta.data\n" + f"as.SingleCellExperiment(new_obj)\n" + ) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index 455de3e9c..0dbe7c026 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -47,7 +47,7 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: None # TODO: figure out which celltype labels to add here + cellontology_original_obs_key: feature_wise: var_ensembl_col: "ensembl" var_symbol_col: "index" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 8fe73a14d..8a8725436 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -117,7 +117,7 @@ def load(data_dir, **kwargs): 'FetalHeart_2': 'heart', 'FetalIntestine_1': 'intestine', 'FetalIntestine_2': 'intestine', - 'FetalIntestine_3': 'intestine', + 'FetalIntetsine_3': 'intestine', 'FetalIntestine_4': 'intestine', 'FetalIntestine_5': 'intestine', 'FetalKidney_3': 'kidney', @@ -148,6 +148,12 @@ def load(data_dir, **kwargs): 'PeripheralBlood_1': 'blood', 'Placenta_1': 'placenta', } + sex_dict = { + 'Male': "male", + 'Female': "female", + 'nan': "nan", + 'FeM=male': "nan", + } adata = anndata.read(os.path.join(data_dir, "HCL_Fig1_adata.h5ad")) # convert to sparse matrix @@ -206,5 +212,6 @@ def load(data_dir, **kwargs): "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", "age", "celltype_specific", "cluster_specific", "gender", "assay_sc", "source"] adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] + adata.obs["gender"] = [sex_dict[x] for x in adata.obs["gender"].values] return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index b5fce142e..6707eab05 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -28,7 +28,7 @@ def __init__(self, **kwargs): self.bio_sample_obs_key = "sample" self.cellontology_original_obs_key = "cluster" - self.organ_obs_key = "sample_anatomy" + self.organ_obs_key = "organ" self.var_ensembl_col = "ensembl" self.var_symbol_col = "name" @@ -58,6 +58,7 @@ def load(data_dir, **kwargs): header=None )[0].values obs = pandas.read_csv(fn[1]) + obs.fillna("isnan", inplace=True) # Match annotation to raw data. obs.index = obs["cell"].values diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv index f7f86ef30..b5ed7859b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.tsv @@ -11,5 +11,6 @@ T/NKT cells alpha-beta T cell CL:0000789 cDC1 conventional dendritic cell CL:0000990 cDC2 conventional dendritic cell CL:0000990 migDC dendritic cell CL:0000451 +isnan UNKNOWN UNKNOWN pDC plasmacytoid dendritic cell CL:0000784 yd T cells gamma-delta T cell CL:0000798 diff --git a/sfaira/data/utils_scripts/create_meta.py b/sfaira/data/utils_scripts/create_meta.py index a132f0bc4..f14c714ca 100644 --- a/sfaira/data/utils_scripts/create_meta.py +++ b/sfaira/data/utils_scripts/create_meta.py @@ -6,7 +6,7 @@ def write_meta(args0, args1): - args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) + args0.write_meta(fn_meta=None, dir_out=args1) return None diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index e22902250..3699bc3b4 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -7,14 +7,13 @@ def write_meta(args0, args1): # Write meta data, cache. - args0.write_meta(fn_meta=None, dir_out=args1, fn_data=None) + args0.write_meta(fn_meta=None, dir_out=args1) # Test load from cache. args0.load( remove_gene_version=True, load_raw=False, allow_caching=False, ) - args0.write_ontology_class_map(fn=args0.fn_ontology_class_map_tsv) return None diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index b641db637..ad1974ab0 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -39,22 +39,6 @@ def test_dsgs_config_write_load(): assert np.all(ds.ids == ds2.ids) -def test_dsg_load(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load() - - -def test_dsg_adata(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - _ = ds.adata - - """ TODO tests from here on down require cached data for mouse lung """ @@ -132,3 +116,19 @@ def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): annotated_only=False ) assert isinstance(ds.adata.X[:], scipy.sparse.csr_matrix), "%s" % type(ds.adata.X) + + +def test_dsg_load(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + ds.load() + + +def test_dsg_adata(): + ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds = DatasetSuperGroup(dataset_groups=[ds]) + _ = ds.adata diff --git a/sfaira/unit_tests/data_contribution/conftest.py b/sfaira/unit_tests/data_contribution/conftest.py deleted file mode 100644 index 94f90450e..000000000 --- a/sfaira/unit_tests/data_contribution/conftest.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Parameterizing test according to https://stackoverflow.com/questions/40880259/how-to-pass-arguments-in-pytest-by-command-line -""" - - -def pytest_addoption(parser): - parser.addoption("--doi_sfaira_repr", action="store", default="d10_1016_j_cmet_2019_01_021") - - -def pytest_generate_tests(metafunc): - # This is called for every test. Only get/set command line arguments - # if the argument is specified in the list of test "fixturenames". - option_value = metafunc.config.option.name - if "doi_sfaira_repr" in metafunc.fixturenames and option_value is not None: - metafunc.parametrize("doi_sfaira_repr", [option_value]) diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index bda0aefcc..9e2d98e92 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -1,14 +1,15 @@ import os import pydoc -from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup +from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup, DatasetBase +from sfaira.data.utils import read_yaml try: import sfaira_extension as sfairae except ImportError: sfairae = None -def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j.journal.2021.01.001"): +def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_j_cmet_2019_01_021"): """ Unit test to assist with data set contribution. @@ -34,10 +35,8 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. """ remove_gene_version = True match_to_reference = None - classmap_by_file = True - # ToDo build one class map per file or per data loader (potentially many per file) - flattened_doi = doi_sfaira_repr # ToDo: add correct module here as "YOUR_STUDY" + flattened_doi = doi_sfaira_repr # Define file names and loader paths in sfaira or sfaira_extension: # Define base paths of loader collections in sfaira and sfaira_extension: dir_loader_sfaira = "sfaira.data.dataloaders.loaders." @@ -50,9 +49,10 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. # Check if loader name is a directory either in sfaira or sfaira_extension loader collections: if flattened_doi in os.listdir(file_path_sfaira): dir_loader = dir_loader_sfaira + "." + flattened_doi - file_path = pydoc.locate(dir_loader + ".FILE_PATH") + package_source = "sfaira" elif flattened_doi in os.listdir(file_path_sfairae): dir_loader = dir_loader_sfairae + "." + flattened_doi + package_source = "sfairae" else: raise ValueError("data loader not found in sfaira and also not in sfaira_extension") file_path = pydoc.locate(dir_loader + ".FILE_PATH") @@ -67,81 +67,89 @@ def test_load(dir_template: str = "./template_data", doi_sfaira_repr="10.1000/j. # You can set load_raw to True while debugging when caching works already to speed the test up, # but be sure to set load_raw to True for final tests. ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, + remove_gene_version=False, + match_to_reference=False, load_raw=True, # tests raw loading - allow_caching=True # tests caching + allow_caching=True, # tests caching ) + assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {dir_template}" # Create cell type conversion table: cwd = os.path.dirname(file_path) dataset_module = str(cwd.split("/")[-1]) - if classmap_by_file: - for f in os.listdir(cwd): - if os.path.isfile(os.path.join(cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(f.split(".")[:-1]) - DatasetFound = pydoc.locate(dir_loader + "." + file_module + ".Dataset") - # Check if global objects are available: - # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles - # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile - sample_fns = pydoc.locate(dir_loader + "." + file_module + ".SAMPLE_FNS") - sample_ids = pydoc.locate(dir_loader + dataset_module + "." + file_module + ".SAMPLE_IDS") - if sample_fns is not None and sample_ids is None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_fn=x, - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template - ) - for x in sample_fns - ] - elif sample_fns is None and sample_ids is not None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_id=x, - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template - ) - for x in sample_ids - ] - elif sample_fns is not None and sample_ids is not None: - raise ValueError(f"sample_fns and sample_ids both found for {f}") - else: - datasets_f = [DatasetFound( + # Group data sets by file module: + # Note that if we were not grouping the cell type map .tsv files by file module, we could directly call + # write_ontology_class_map on the ds. + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + + # I) Instantiate Data set group to get all IDs of data sets associated with this .py file. + # Note that all data sets in this directory are already loaded in ds, so we just need the IDs. + DatasetFound = pydoc.locate(dir_loader + "." + file_module + ".Dataset") + # Load objects from name space: + # - load(): Loading function that return anndata instance. + # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles + load_func = pydoc.locate(dir_loader + "." + file_module + ".load") + load_func_annotation = pydoc.locate(dir_loader + "." + file_module + ".LOAD_ANNOTATION") + # Also check sfaira_extension for additional load_func_annotation: + if package_source != "sfairae": + load_func_annotation_sfairae = pydoc.locate(dir_loader_sfairae + "." + dataset_module + + "." + file_module + ".LOAD_ANNOTATION") + # LOAD_ANNOTATION is a dictionary so we can use update to extend it. + if load_func_annotation_sfairae is not None and load_func_annotation is not None: + load_func_annotation.update(load_func_annotation_sfairae) + elif load_func_annotation_sfairae is not None and load_func_annotation is None: + load_func_annotation = load_func_annotation_sfairae + sample_fns = pydoc.locate(dir_loader + "." + file_module + ".SAMPLE_FNS") + fn_yaml = os.path.join(cwd, file_module + ".yaml") + fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None + # Check for sample_fns in yaml: + if fn_yaml is not None: + assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" + yaml_vals = read_yaml(fn=fn_yaml) + if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: + sample_fns = yaml_vals["meta"]["sample_fns"] + if sample_fns is None: + sample_fns = [None] + # Here we distinguish between class that are already defined and those that are not. + # The latter case arises if meta data are defined in YAMLs and _load is given as a function. + if DatasetFound is None: + datasets_f = [ + DatasetBase( data_path=dir_template, meta_path=dir_template, - cache_path=dir_template - )] - # Build a data set group from the already loaded data sets and use the group ontology writing - # function. - dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) - # Write this directly into sfaira installation so that it can be committed via git. - fn = os.path.join(cwd, file_module + ".tsv") - dsg_f.write_ontology_class_map( - fn=fn, - protected_writing=True, - n_suggest=4, - ) - dsg_f.clean_ontology_class_map(fn=fn) - else: - for k, v in ds.datasets.items(): - # Write this directly into sfaira installation so that it can be committed via git. - fn = os.path.join("/".join(file_path.split("/")[:-1]), v.fn_ontology_class_map_tsv) - v.write_ontology_class_map( - fn=fn, - protected_writing=True, - n_suggest=10, - ) - v.clean_ontology_class_map(fn=fn) - - # ToDo: conflicts are not automatically resolved, please go back to - # https://www.ebi.ac.uk/ols/ontologies/cl - # for every mismatch or conflict and add the correct cell ontology class name into the .csv "target" column. + cache_path=dir_template, + load_func=load_func, + dict_load_func_annotation=load_func_annotation, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) for x in sample_fns + ] + else: + datasets_f = [ + DatasetFound( + data_path=dir_template, + meta_path=dir_template, + cache_path=dir_template, + load_func=load_func, + load_func_annotation=load_func_annotation, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) for x in sample_fns + ] + # II) Build a data set group from the already loaded data sets and use the group ontology writing + # function. + dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) + # III) Write this directly into sfaira installation so that it can be committed via git. + dsg_f.write_ontology_class_map( + fn=os.path.join(cwd, file_module + ".tsv"), + protected_writing=True, + n_suggest=4, + ) # Test loading from cache: ds = DatasetGroupDirectoryOriented( diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index e1f50bed4..3a0e73d35 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -7,6 +7,7 @@ from typing import Dict, List, Tuple, Union import warnings +from sfaira.consts.adata_fields import AdataIdsSfaira from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE FILE_PATH = __file__ @@ -62,7 +63,7 @@ class OntologyList(Ontology): def __init__( self, - terms: List[str], + terms: Union[List[Union[str, bool, int]]], **kwargs ): self.nodes = terms @@ -77,6 +78,7 @@ def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: :param x: Free text node label which is to be matched to ontology nodes. :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :param n_suggest: number of suggestions returned :return List of proposed matches in ontology. """ from fuzzywuzzy import fuzz From 492464c83996120b82a6d8b0cb5d1ecf1311c5af Mon Sep 17 00:00:00 2001 From: Abdul Moeed Date: Fri, 12 Mar 2021 15:45:54 +0100 Subject: [PATCH 093/161] Change hard-coded path to os.path.join (#191) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 09c36c2f8..fd08e61db 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ def package_files(directory): WD = os.path.dirname(__file__) -templates = package_files(f'{WD}/sfaira/commands/templates') +templates = package_files(os.path.join(WD, "sfaira", "commands", "templates")) setup( name='sfaira', From 63c1b06b8350c591e4884a124d48336db6c245d2 Mon Sep 17 00:00:00 2001 From: Zethson Date: Mon, 15 Mar 2021 16:24:26 +0100 Subject: [PATCH 094/161] add FILE_PATH = __file__ to __init__.py in templates Signed-off-by: Zethson --- .../{{ cookiecutter.doi_sfaira_repr }}/__init__.py | 1 + .../{{ cookiecutter.doi_sfaira_repr }}/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py index e69de29bb..b1d5b2c2b 100644 --- a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py +++ b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py index e69de29bb..b1d5b2c2b 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ From 72a25e9d90dd93361d14ee8935adfe93fef19546 Mon Sep 17 00:00:00 2001 From: Zethson Date: Mon, 15 Mar 2021 16:35:06 +0100 Subject: [PATCH 095/161] add " strip for questionary answers Signed-off-by: Zethson --- sfaira/commands/questionary.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sfaira/commands/questionary.py b/sfaira/commands/questionary.py index ad5db11c1..34ee8e1b1 100644 --- a/sfaira/commands/questionary.py +++ b/sfaira/commands/questionary.py @@ -64,4 +64,6 @@ def sfaira_questionary(function: str, log.debug(f'User was asked the question: ||{question}|| as: {function}') log.debug(f'User selected {answer}') + if isinstance(answer, str): + answer = answer.strip('\"') return answer # type: ignore From 33f14d32d4dfe0cb7023064448886e44a8f82c0a Mon Sep 17 00:00:00 2001 From: Zethson Date: Mon, 15 Mar 2021 16:40:11 +0100 Subject: [PATCH 096/161] fix asking for DL urls for multiple ds Signed-off-by: Zethson --- sfaira/commands/create_dataloader.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py index 6238d7172..710f63db9 100644 --- a/sfaira/commands/create_dataloader.py +++ b/sfaira/commands/create_dataloader.py @@ -130,12 +130,13 @@ def _prompt_dataloader_configuration(self): f'{self.template_attributes.year}_{self.template_attributes.assay}_' \ f'{first_author_lastname}_001' self.template_attributes.id = self.template_attributes.id_without_doi + f'_{self.template_attributes.doi_sfaira_repr}' - self.template_attributes.download_url_data = sfaira_questionary(function='text', - question='URL to download the data', - default='https://ftp.ncbi.nlm.nih.gov/geo/') - self.template_attributes.download_url_meta = sfaira_questionary(function='text', - question='URL to download the meta data', - default='https://ftp.ncbi.nlm.nih.gov/geo/') + if self.template_attributes.dataloader_type == 'single_dataset': + self.template_attributes.download_url_data = sfaira_questionary(function='text', + question='URL to download the data', + default='https://ftp.ncbi.nlm.nih.gov/geo/') + self.template_attributes.download_url_meta = sfaira_questionary(function='text', + question='URL to download the meta data', + default='https://ftp.ncbi.nlm.nih.gov/geo/') self.template_attributes.create_extra_description = sfaira_questionary(function='confirm', question='Do you want to add additional custom metadata?', default='Yes') From 95df92e7b0635e2bf9e222607df8a79dd7ef68b6 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 22 Mar 2021 11:01:14 +0100 Subject: [PATCH 097/161] fixed numeric label reading from mapping tsv --- sfaira/data/base/dataset.py | 3 ++- sfaira/data/utils_scripts/create_meta_and_cache.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 9a9e77dea..cb364f7ef 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -909,7 +909,8 @@ def _read_class_map(self, fn) -> pd.DataFrame: :return: """ try: - tab = pd.read_csv(fn, header=0, index_col=None, sep="\t") + # Need dtype="str" to force numeric cell type identifiers, e.g. cluster numbers to be in string format. + tab = pd.read_csv(fn, header=0, index_col=None, sep="\t", dtype="str") except pandas.errors.ParserError as e: print(f"{self.id}") raise pandas.errors.ParserError(e) diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 3699bc3b4..41c81575d 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -34,7 +34,7 @@ def write_meta(args0, args1): annotated_only=False, match_to_reference=None, remove_gene_version=True, - load_raw=True, + load_raw=False, allow_caching=True, processes=processes, func=write_meta, From 9c1c4c448ff8199bc734e754c25b08bc62b93ae9 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 7 Apr 2021 11:26:03 +0200 Subject: [PATCH 098/161] Implementation of test-dataloader (#211) * add test-dataloader implementation Signed-off-by: zethson * add dcs Signed-off-by: zethson * Remove outdated comments * add test_data parameter Signed-off-by: zethson * add documentation Signed-off-by: zethson --- docs/adding_datasets.rst | 24 +++++++++++++- sfaira/cli.py | 11 ++++--- sfaira/commands/test_dataloader.py | 32 +++++++++++++------ sfaira/unit_tests/conftest.py | 22 +++++++++++++ .../data_contribution/test_data_template.py | 30 ++++++++--------- 5 files changed, 88 insertions(+), 31 deletions(-) create mode 100644 sfaira/unit_tests/conftest.py diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index 53bf24ee4..a9441279f 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -54,7 +54,7 @@ The data loader python file Each data set, ie a single file or a set of files with similar structures, has its own data loader function and a yaml files that describes its meta data. -Alternatively to the (preffered) yaml file, meta data can be also be described in a constructor of a class in the same python file +Alternatively to the (preferred) yaml file, meta data can be also be described in a constructor of a class in the same python file as the loading function. For a documentation on writing a python class-based dataloader, please see here: https://github.com/theislab/sfaira/blob/dev/docs/adding_dataset_classes.rst A detailed description of all meta data is given at the bottom of this page. @@ -265,6 +265,7 @@ The common workflow look as follows: 1. Create a new dataloader with ``sfaira create-dataloader`` 2. Validate the dataloader with ``sfaira lint-dataloader `` +3. Test the dataloader with ``sfaira test-dataloader . --doi --test-data `` When creating a dataloader with ``sfaira create-dataloader`` common information such as your name and email are prompted for, followed by dataloader specific attributes such as organ, organism and many more. @@ -288,6 +289,27 @@ All unused attributes will be removed. Next validate the integrity of your dataloader content with ``sfaira lint-dataloader ``. All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information. +Finally, copy your dataloader into the ``sfaira/dataloaders/loaders/`` folder. +Now you can test your dataloader with ``sfaira test-dataloader --doi --test-data ``. +Note that sfaira expects a folder structure for the test data such as: + +.. code-block:: + + ├── template_data + │   └── d10_1016_j_cmet_2019_01_021 + │   ├── GSE117770_RAW.tar + │   ├── GSM3308545_NOD_08w_A_annotation.csv + │   ├── GSM3308547_NOD_08w_C_annotation.csv + │   ├── GSM3308548_NOD_14w_A_annotation.csv + │   ├── GSM3308549_NOD_14w_B_annotation.csv + │   ├── GSM3308550_NOD_14w_C_annotation.csv + │   ├── GSM3308551_NOD_16w_A_annotation.csv + │   ├── GSM3308552_NOD_16w_B_annotation.csv + │   └── GSM3308553_NOD_16w_C_annotation.csv + +Pass the path to the template_data folder, not the doi. Sfaira will use this path to cache further data for speedups. +All tests must pass! If any of the tests fail please revisit your dataloader and fix the error. + Map cell type labels to ontology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/sfaira/cli.py b/sfaira/cli.py index 65025f448..596b61d0f 100644 --- a/sfaira/cli.py +++ b/sfaira/cli.py @@ -104,11 +104,14 @@ def lint_dataloader(path) -> None: @sfaira_cli.command() @click.argument('path', type=click.Path(exists=True)) -def test_dataloader(path) -> None: - """ - Runs a dataloader unit test. +@click.option('--test-data', type=click.Path(exists=True)) +@click.option('--doi', type=str, default=None) +def test_dataloader(path, test_data, doi) -> None: + """Runs a dataloader integration test. + + PATH is the absolute path of the root of your sfaira clone. """ - dataloader_tester = DataloaderTester(path) + dataloader_tester = DataloaderTester(path, test_data, doi) dataloader_tester.test_dataloader() diff --git a/sfaira/commands/test_dataloader.py b/sfaira/commands/test_dataloader.py index 4dde41710..6fa796102 100644 --- a/sfaira/commands/test_dataloader.py +++ b/sfaira/commands/test_dataloader.py @@ -10,33 +10,45 @@ class DataloaderTester: - def __init__(self, path): + def __init__(self, path, test_data, doi): self.WD = os.path.dirname(__file__) self.path = path - self.doi = '' + self.test_data = test_data + self.cwd = os.getcwd() + self.doi = doi self.doi_sfaira_repr = '' def test_dataloader(self): """ Runs a predefined unit test on a given dataloader. """ - print('[bold red]This command is currently disabled.') - # print('[bold blue]Please ensure that your dataloader is in sfaira/dataloaders/loaders/.') - # print('[bold blue]Please ensure that your test data is in sfaira/unit_tests/template_data/.') - # self._prompt_doi() - # self._run_unittest() + print('[bold blue]Please ensure that your dataloader is in sfaira/dataloaders/loaders/.') + if not self.doi: + self._prompt_doi() + self.doi_sfaira_repr = f'd{self.doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + self._run_unittest() def _prompt_doi(self): self.doi = sfaira_questionary(function='text', question='Enter your DOI', default='10.1000/j.journal.2021.01.001') - self.doi_sfaira_repr = f'd{self.doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' def _run_unittest(self): + """ + Runs the actual integration test by invoking pytest on it. + """ print('[bold blue]Conflicts are not automatically resolved.') print('[bold blue]Please go back to [bold]https://www.ebi.ac.uk/ols/ontologies/cl[blue] for every mismatch or conflicts ' 'and add the correct cell ontology class name into the .csv "target" column.') - pytest = Popen(['pytest', '-s', self.path, '--doi_sfaira_repr', self.doi_sfaira_repr], + + os.chdir(f'{self.path}/sfaira/unit_tests/data_contribution') + + pytest = Popen(['pytest', 'test_data_template.py', '--doi_sfaira_repr', self.doi_sfaira_repr, '--test_data', self.test_data], universal_newlines=True, shell=False, close_fds=True) (pytest_stdout, pytest_stderr) = pytest.communicate() - print(pytest_stderr) + if pytest_stdout: + print(pytest_stdout) + if pytest_stderr: + print(pytest_stderr) + + os.chdir(self.cwd) diff --git a/sfaira/unit_tests/conftest.py b/sfaira/unit_tests/conftest.py new file mode 100644 index 000000000..795385d37 --- /dev/null +++ b/sfaira/unit_tests/conftest.py @@ -0,0 +1,22 @@ +from pytest import fixture + + +def pytest_addoption(parser): + parser.addoption( + "--doi_sfaira_repr", + action="store" + ) + parser.addoption( + "--test_data", + action="store" + ) + + +@fixture() +def doi_sfaira_repr(request): + return request.config.getoption("--doi_sfaira_repr") + + +@fixture() +def test_data(request): + return request.config.getoption("--test_data") diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index 9e2d98e92..75525e6c9 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -9,7 +9,7 @@ sfairae = None -def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_j_cmet_2019_01_021"): +def test_load(doi_sfaira_repr: str, test_data: str): """ Unit test to assist with data set contribution. @@ -30,8 +30,6 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ (Note that columns are separated by ",") You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl 5. Run this unit test for a last time to check the cell type maps. - - :return: """ remove_gene_version = True match_to_reference = None @@ -59,9 +57,9 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ ds = DatasetGroupDirectoryOriented( file_base=file_path, - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template + data_path=test_data, + meta_path=test_data, + cache_path=test_data ) # Test raw loading and caching: # You can set load_raw to True while debugging when caching works already to speed the test up, @@ -72,7 +70,7 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ load_raw=True, # tests raw loading allow_caching=True, # tests caching ) - assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {dir_template}" + assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {test_data}" # Create cell type conversion table: cwd = os.path.dirname(file_path) dataset_module = str(cwd.split("/")[-1]) @@ -118,9 +116,9 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ if DatasetFound is None: datasets_f = [ DatasetBase( - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template, + data_path=test_data, + meta_path=test_data, + cache_path=test_data, load_func=load_func, dict_load_func_annotation=load_func_annotation, sample_fn=x, @@ -131,9 +129,9 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ else: datasets_f = [ DatasetFound( - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template, + data_path=test_data, + meta_path=test_data, + cache_path=test_data, load_func=load_func, load_func_annotation=load_func_annotation, sample_fn=x, @@ -154,9 +152,9 @@ def test_load(dir_template: str = "../template_data", doi_sfaira_repr="d10_1016_ # Test loading from cache: ds = DatasetGroupDirectoryOriented( file_base=file_path, - data_path=dir_template, - meta_path=dir_template, - cache_path=dir_template + data_path=test_data, + meta_path=test_data, + cache_path=test_data ) ds.load( remove_gene_version=remove_gene_version, From 051dbed502b4a533c2b2f0045b1a654778ded0e8 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 12 Apr 2021 10:13:38 +0200 Subject: [PATCH 099/161] Fix loading (#197) * fixed tsv files * removed erasing protection from cellontology obs keys * fixed building of directory_formatted_doi in cases where there are multiple DOIs * fixed ID generation with multiple DOIs present via doi_main * improve error message when an illegal label is used for a constrained metadata field * improve error message when an illegal label is used for a constrained metadata field * add sex dict in HCL loading code to adhere to ontology * fix dataset streamlining * add author field to ontology container to enable subsetting * added optional loading of further cell-wise annotation into Dataset.load() and included this in DatasetGroupDirectoryOriented * moved load to function * enabled batch translation * fix anatomical config writing * depreceated age addresses #203 * depreceated attribute healthy in favour of disease - transferred healthy annotation to disease anntotation - removed healthy_state_healthy attribute - left todos in data loaders where disease was not mapped to mondo yet - expanded mondo by healthy node * Remove age and healthy from templates & add disease (#206) * removed healthy and age from templates Signed-off-by: zethson * add disease to templates Signed-off-by: zethson * added crossref interface addresses #186 * fix streamlining call * make sure uns is copied into obs before concatenating adata objects. closes #212 * make sure uns attributes are written toobs correctly when streamlining * introduced new genome management - replace old genome containers by new ones that can query ensemble ftp - updated default genome assembly to newest available - removed genome based subsetting from load and put into separate function - allowed output of streamlined objects in full genome feature space addresses points from #215 Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> Co-authored-by: Lukas Heumos --- .github/workflows/create_templates.yml | 2 +- docs/adding_dataset_classes.rst | 2 - docs/adding_datasets.rst | 442 +- requirements.txt | 1 + sfaira/commands/create_dataloader.py | 12 + .../multiple_datasets/cookiecutter.json | 3 + .../{{ cookiecutter.id_without_doi }}.yaml | 16 +- .../single_dataset/cookiecutter.json | 3 + .../{{ cookiecutter.id_without_doi }}.yaml | 8 +- sfaira/consts/adata_fields.py | 42 +- sfaira/consts/meta_data_files.py | 10 +- sfaira/consts/ontologies.py | 19 +- sfaira/data/base/dataset.py | 644 +- sfaira/data/base/dataset_group.py | 115 +- .../databases/cellxgene/__init__.py | 2 +- .../databases/cellxgene/cellxgene_group.py | 2 +- .../databases/cellxgene/cellxgene_loader.py | 26 +- .../data/dataloaders/databases/super_group.py | 2 +- ...letoflangerhans_2017_smartseq2_enge_001.py | 3 +- .../mouse_x_2018_microwellseq_han_x.py | 11 +- .../mouse_x_2018_microwellseq_han_x.tsv | 3 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 12 +- ...pithelium_2019_10xsequencing_smilie_001.py | 9 +- ...man_ileum_2019_10xsequencing_martin_001.py | 9 +- ...stategland_2018_10xsequencing_henry_001.py | 7 +- .../human_pancreas_2016_indrop_baron_001.py | 6 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 4 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 4 +- ...uman_lung_2020_10xsequencing_miller_001.py | 6 +- .../human_brain_2017_droncseq_habib_001.py | 7 +- ...human_testis_2018_10xsequencing_guo_001.py | 7 +- ...liver_2018_10xsequencing_macparland_001.py | 7 +- .../human_kidney_2019_droncseq_lake_001.py | 7 +- .../human_kidney_2019_droncseq_lake_001.tsv | 2 +- .../human_x_2019_10xsequencing_szabo_001.py | 9 +- .../human_x_2019_10xsequencing_szabo_001.tsv | 1 + ...man_retina_2019_10xsequencing_menon_001.py | 7 +- .../human_placenta_2018_x_ventotormo_001.py | 9 +- .../human_liver_2019_celseq2_aizarani_001.py | 7 +- ...ver_2019_10xsequencing_ramachandran_001.py | 11 +- ...er_2019_10xsequencing_ramachandran_001.tsv | 26 +- ...an_liver_2019_10xsequencing_popescu_001.py | 6 +- ...rain_2019_10x3v2sequencing_kanton_001.yaml | 8 +- .../human_x_2020_microwellseq_han_x.py | 15 +- .../human_x_2020_microwellseq_han_x.tsv | 24 +- .../human_lung_2020_x_travaglini_001.yaml | 14 +- ...uman_colon_2020_10xsequencing_james_001.py | 6 +- .../human_lung_2019_dropseq_braga_001.py | 6 +- .../human_x_2019_10xsequencing_braga_x.py | 9 +- .../mouse_x_2019_10xsequencing_hove_001.py | 6 +- ...uman_kidney_2020_10xsequencing_liao_001.py | 7 +- ...man_retina_2019_10xsequencing_voigt_001.py | 7 +- .../human_x_2019_10xsequencing_wang_001.py | 7 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 7 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 6 +- ...nchyma_2020_10xsequencing_habermann_001.py | 5 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 6 +- ...uman_thymus_2020_10xsequencing_park_001.py | 7 +- .../human_x_2020_scirnaseq_cao_001.py | 51 +- .../human_x_2020_scirnaseq_cao_001.tsv | 78 + .../human_x_2020_scirnaseq_cao_001.yaml | 12 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 6 +- ...man_x_2019_10xsequencing_madissoon_001.tsv | 1 + ..._retina_2019_10xsequencing_lukowski_001.py | 7 +- ...lood_2019_10xsequencing_10xgenomics_001.py | 7 +- .../human_x_2018_10xsequencing_regev_001.py | 7 +- .../data/dataloaders/loaders/super_group.py | 4 +- .../clean_celltype_maps_global.py | 25 - .../create_anatomical_configs.py | 3 +- .../create_celltype_maps_global.py | 100 - .../utils_scripts/create_meta_and_cache.py | 52 +- .../data/utils_scripts/streamline_selected.py | 36 + sfaira/estimators/keras.py | 22 +- sfaira/interface/model_zoo.py | 2 +- sfaira/models/celltype/marker.py | 2 +- sfaira/models/celltype/mlp.py | 2 +- sfaira/models/embedding/ae.py | 2 +- sfaira/models/embedding/linear.py | 2 +- sfaira/models/embedding/vae.py | 2 +- sfaira/models/embedding/vaeiaf.py | 2 +- sfaira/models/embedding/vaevamp.py | 2 +- sfaira/train/__init__.py | 2 +- sfaira/train/summaries.py | 44 +- sfaira/train/train_model.py | 217 +- sfaira/unit_tests/data/test_dataset.py | 8 +- .../data_contribution/test_data_template.py | 17 +- .../unit_tests/estimators/test_estimator.py | 2 +- sfaira/unit_tests/versions/test_ontologies.py | 37 +- sfaira/versions/__init__.py | 4 +- sfaira/versions/genome_versions/__init__.py | 3 - .../genome_versions/class_interface.py | 82 - .../human/Homo_sapiens_GRCh38_97.csv | 19987 -------------- .../genome_versions/human/__init__.py | 2 - .../genome_versions/human/genome_container.py | 19 - .../genome_versions/human/genome_sizes.py | 3 - .../mouse/Mus_musculus_GRCm38_97.csv | 21901 ---------------- .../genome_versions/mouse/__init__.py | 2 - .../genome_versions/mouse/genome_container.py | 19 - .../genome_versions/mouse/genome_sizes.py | 3 - sfaira/versions/genomes.py | 139 + sfaira/versions/metadata/__init__.py | 2 +- sfaira/versions/metadata/base.py | 65 +- sfaira/versions/metadata/universe.py | 17 +- .../__init__.py | 0 .../class_interface.py | 4 +- sfaira/versions/topologies/human/__init__.py | 2 + .../topologies/human/celltype/__init__.py | 2 + .../human/celltype/celltypemarker.py | 3 +- .../human/celltype/celltypemlp.py | 12 +- .../topologies/human/embedding/__init__.py | 6 + .../human/embedding/ae.py | 12 +- .../human/embedding/linear.py | 9 +- .../human/embedding/nmf.py | 9 +- .../human/embedding/vae.py | 12 +- .../human/embedding/vaeiaf.py | 6 +- .../human/embedding/vaevamp.py | 6 +- sfaira/versions/topologies/mouse/__init__.py | 2 + .../topologies/mouse/celltype/__init__.py | 2 + .../mouse/celltype/celltypemarker.py | 3 +- .../mouse/celltype/celltypemlp.py | 12 +- .../topologies/mouse/embedding/__init__.py | 6 + .../mouse/embedding/ae.py | 12 +- .../mouse/embedding/linear.py | 9 +- .../mouse/embedding/nmf.py | 9 +- .../mouse/embedding/vae.py | 12 +- .../mouse/embedding/vaeiaf.py | 6 +- .../mouse/embedding/vaevamp.py | 6 +- sfaira/versions/topology_versions/external.py | 1 - .../topology_versions/human/__init__.py | 2 - .../human/celltype/__init__.py | 2 - .../human/embedding/__init__.py | 6 - .../topology_versions/mouse/__init__.py | 2 - .../mouse/celltype/__init__.py | 2 - .../mouse/embedding/__init__.py | 6 - 134 files changed, 1709 insertions(+), 43147 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv delete mode 100644 sfaira/data/utils_scripts/clean_celltype_maps_global.py delete mode 100644 sfaira/data/utils_scripts/create_celltype_maps_global.py create mode 100644 sfaira/data/utils_scripts/streamline_selected.py delete mode 100644 sfaira/versions/genome_versions/__init__.py delete mode 100644 sfaira/versions/genome_versions/class_interface.py delete mode 100644 sfaira/versions/genome_versions/human/Homo_sapiens_GRCh38_97.csv delete mode 100644 sfaira/versions/genome_versions/human/__init__.py delete mode 100644 sfaira/versions/genome_versions/human/genome_container.py delete mode 100644 sfaira/versions/genome_versions/human/genome_sizes.py delete mode 100644 sfaira/versions/genome_versions/mouse/Mus_musculus_GRCm38_97.csv delete mode 100644 sfaira/versions/genome_versions/mouse/__init__.py delete mode 100644 sfaira/versions/genome_versions/mouse/genome_container.py delete mode 100644 sfaira/versions/genome_versions/mouse/genome_sizes.py create mode 100644 sfaira/versions/genomes.py rename sfaira/versions/{topology_versions => topologies}/__init__.py (100%) rename sfaira/versions/{topology_versions => topologies}/class_interface.py (94%) create mode 100644 sfaira/versions/topologies/human/__init__.py create mode 100644 sfaira/versions/topologies/human/celltype/__init__.py rename sfaira/versions/{topology_versions => topologies}/human/celltype/celltypemarker.py (89%) rename sfaira/versions/{topology_versions => topologies}/human/celltype/celltypemlp.py (86%) create mode 100644 sfaira/versions/topologies/human/embedding/__init__.py rename sfaira/versions/{topology_versions => topologies}/human/embedding/ae.py (84%) rename sfaira/versions/{topology_versions => topologies}/human/embedding/linear.py (80%) rename sfaira/versions/{topology_versions => topologies}/human/embedding/nmf.py (80%) rename sfaira/versions/{topology_versions => topologies}/human/embedding/vae.py (83%) rename sfaira/versions/{topology_versions => topologies}/human/embedding/vaeiaf.py (86%) rename sfaira/versions/{topology_versions => topologies}/human/embedding/vaevamp.py (86%) create mode 100644 sfaira/versions/topologies/mouse/__init__.py create mode 100644 sfaira/versions/topologies/mouse/celltype/__init__.py rename sfaira/versions/{topology_versions => topologies}/mouse/celltype/celltypemarker.py (89%) rename sfaira/versions/{topology_versions => topologies}/mouse/celltype/celltypemlp.py (86%) create mode 100644 sfaira/versions/topologies/mouse/embedding/__init__.py rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/ae.py (84%) rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/linear.py (80%) rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/nmf.py (80%) rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/vae.py (83%) rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/vaeiaf.py (86%) rename sfaira/versions/{topology_versions => topologies}/mouse/embedding/vaevamp.py (86%) delete mode 100644 sfaira/versions/topology_versions/external.py delete mode 100644 sfaira/versions/topology_versions/human/__init__.py delete mode 100644 sfaira/versions/topology_versions/human/celltype/__init__.py delete mode 100644 sfaira/versions/topology_versions/human/embedding/__init__.py delete mode 100644 sfaira/versions/topology_versions/mouse/__init__.py delete mode 100644 sfaira/versions/topology_versions/mouse/celltype/__init__.py delete mode 100644 sfaira/versions/topology_versions/mouse/embedding/__init__.py diff --git a/.github/workflows/create_templates.yml b/.github/workflows/create_templates.yml index d6b62c024..4e894113f 100644 --- a/.github/workflows/create_templates.yml +++ b/.github/workflows/create_templates.yml @@ -31,5 +31,5 @@ jobs: - name: Create single_dataset template run: | cd .. - echo -e "\n\n\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader + echo -e "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" | sfaira create-dataloader rm -rf d10_1000_j_journal_2021_01_001/ diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst index 07e65c654..8924fd8e1 100644 --- a/docs/adding_dataset_classes.rst +++ b/docs/adding_dataset_classes.rst @@ -28,7 +28,6 @@ In this scenario, meta data is described in a constructor of a class in the same self.download_url_data = x # download website(s) of data files self.download_url_meta = x # download website(s) of meta data files - self.age = x # (*, optional) age of sample self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) @@ -49,7 +48,6 @@ In this scenario, meta data is described in a constructor of a class in the same # You need to make sure this is loaded in the loading script)! # See above for a description what these meta data attributes mean. # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_age = x # (optional, see above, do not provide if .age is provided) self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index a9441279f..31ad2652f 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -3,26 +3,157 @@ Adding data sets Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. This process requires a couple of steps as outlined in the following sections. +sfaira features an interactive way of creating, formatting and testing dataloaders through a command line interface (CLI). +The common workflow using the CLI looks as follows: + +1. Check that the data loader was not already implemented. + We will open issues for all planned data loaders, so you can search both the code_ base and our GitHub issues_ for + matching data loaders before you start writing one. + The core data loader identified is the directory compatible doi, + which is the doi with all special characters replaced by "_" and a "d" prefix is used: + "10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". + Searching for this string should yield a match if it is already implemented, take care to look for both + preprint and publication DOIs if both are available. + We will also mention publication names in issues, you will however not find these in the code. - 1. Write a dataloader as outlined below. - 2. Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as required by your data loader. - If the raw file your dataloader uses as input is publically available, sfaira will be able to automatically download the raw file, so no manual copying is required. - For the purpose of testing the data loader with a unit test, you can also copy the data into `sfaira/unit_tests/template_data/` as a DOI structured folder if you do not want to maintain a data collection on the machine that you are testing on. - 3. You can contribute the data loader to public sfaira, we do not manage data upload though. - During publication, you would upload this data set to a server like GEO and the data loader contributed to sfaira would use this download link. +.. _code: https://github.com/theislab/sfaira/tree/dev +.. _issues: https://github.com/theislab/sfaira/issues + +2. Install sfaira. + Clone sfaira into a local repository from `dev` branch and install via pip. + +.. code-block:: + + cd target_directory + git clone https://github.com/theislab/sfaira.git + git checkout dev + # git pull # use this to update your installation + cd sfaira # go into sfaira directory + pip install -e . # install +.. + +3. Create a new dataloader. + When creating a dataloader with ``sfaira create-dataloader`` dataloader specific attributes such as organ, organism + and many more are prompted for. + We provide a description of all meta data items at the bottom of this file. + If the requested information is not available simply hit enter and continue until done. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + git checkout -b YOUR_BRANCH_NAME # create a new branch for your data loader. + sfaira create-dataloader + + +The created files are created in the sfaira installation under `sfaira/data/dataloaders/loaders/--DOI-folder--`, +where the DOI-specific folder starts with `d` and is followed by the DOI in which all special characters are replaced +by `_`, below referred to as `--DOI-folder--`: + +.. code-block:: + + ├──sfaira/data/dataloaders/loaders/--DOI-folder-- + ├── extra_description.txt <- Optional extra description file + ├── __init__.py + ├── NA_NA_2021_NA_Einstein_001.py <- Contains the load function to load the data + ├── NA_NA_2021_NA_Einstein_001.yaml <- Specifies all data loader data +.. + +4. Correct yaml file. + Correct errors in `sfaira/data/dataloaders/loaders/--DOI-folder--/NA_NA_2021_NA_Einstein_001.yaml` file and add + further attributes you may have forgotten in step 2. + This step is optional. + +5. Make downloaded data available to sfaira data loader testing. + Identify the raw files as indicated in the dataloader classes and copy them into your directory structure as + required by your data loader. + Note that this should be the exact files that are uploaded to cloud servers such as GEO: + Do not decompress these files ff these files are archives such as zip, tar or gz. + Instead, navigate the archives directly in the load function (step 5). + Copy the data into `sfaira/unit_tests/template_data/--DOI-folder--/`. + This folder is masked from git and only serves for temporarily using this data for loader testing. + After finishing loader contribution, you can delete this data again without any consequences for your loader. + +6. Write load function. + Fill load function in `sfaira/data/dataloaders/loaders/--DOI-folder--NA_NA_2021_NA_Einstein_001.py`. + +7. Clean the dataloader with a supervicial check (lint). + This step is optional. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + sfaira clean-dataloader +.. + +8. Validate the dataloader with the CLI. + Next validate the integrity of your dataloader content with ``sfaira lint-dataloader ``. + All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + sfaira lint-dataloader `` +.. + +9. Create cell type annotation if your data set is annotated. + Note that this will abort with error if there are bugs in your data loader. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + # sfaira annotate `` TODO +.. + +10. Mitigate automated cell type maps. + Sfaira creates a cell type mapping `.tsv` file in the directory in which your data loaders is located if you + indicated that annotation is present by filling `cellontology_original_obs_key`. + This file is: `NA_NA_2021_NA_Einstein_001.tsv`. + This file contains two columns with one row for each unique cell type label. + The free text identifiers in the first column "source", + and the corresponding ontology term in the second column "target". + You can write this file entirely from scratch. + Sfaira also allows you to generate a first guess of this file using fuzzy string matching + which is automatically executed when you run the template data loader unit test for the first time with you new + loader. + Conflicts are not resolved in this first guess and you have to manually decide which free text field corresponds + to which ontology term in the case of conflicts. + Still, this first guess usually drastically speeds up this annotation harmonization. + Note that you do not have to include the non-human-readable IDs here as they are added later in a fully + automated fashion. + +11. Test data loader. + Note that this will abort with error if there are bugs in your data loader. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + # sfaira test `` TODO +.. + +12. Make loader public. + You can contribute the data loader to public sfaira as code through a pull request. + Note that you can also just keep the data loader in your local installation or keep it in sfaira_extensions + if you do not want to make it public. + Note that we do not manage data upload! + During publication, you would upload this data set to a server like GEO and the data loader contributed to + sfaira would use this download link. + +.. code-block:: + + # make sure you are in the top-level sfaira directory from step 1 + git add * + git commit # enter your commit description + # Next make sure you are up to date with dev + git checkout dev + git pull + git checkout YOUR_BRANCH_NAME + git merge dev + git push # this starts the pull request. +.. The following sections will first describe the underlying design principles of sfaira dataloaders and then explain how to interactively create, validate and test dataloaders. -Use data loaders with an existing data repository --------------------------------------------- - -You only want to use data sets with existing data loaders and have adapted your directory structure as above? -In that case, you can immediately start using the data loader functions, you just need to supply the root directory -of the directory structure as `path to the constructor of the class that you are using. -Depending on the functionalities you want to use, you would often want to create a directory with cached meta data -first. This can be easily done via the script sfaira.data.utils.create_meta.py. This meta information is necessary to -anticipate file sizes for backing merged adata objects, for example, and is used for lazy loading. Writing dataloaders --------------------- @@ -35,19 +166,6 @@ All data loaders corresponding to data sets of one study are grouped into this d Next, each data set is represented by one data loader python file in this directory. See below for more complex set ups with repetitive data loader code. -Check that the data loader was not already implemented -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We will open issues for all planned data loaders, so you can search both the code_ base and our GitHub issues_ for -matching data loaders before you start writing one. -The core data loader identified is the directory compatible doi, -which is the doi with all special characters replaced by "_" and a "d" prefix is used: -"10.1016/j.cell.2019.06.029" becomes "d10_1016_j_cell_2019_06_029". -Searching for this string should yield a match if it is already implemented, take care to look for both -preprint and publication DOIs if both are available. We will also mention publication names in issues, you will however not find these in the code. - -.. _code: https://github.com/theislab/sfaira/tree/dev -.. _issues: https://github.com/theislab/sfaira/issues - The data loader python file ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -72,10 +190,9 @@ before it is loaded into memory: download_url_data: download_url_meta: normalization: + primary_data: year: dataset_or_observation_wise: - age: - age_obs_key: assay_sc: assay_sc_obs_key: assay_differentiation: @@ -88,10 +205,10 @@ before it is loaded into memory: cell_line_obs_key: development_stage: development_stage_obs_key: + disease_stage: + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: - healthy_obs_key: individual: individual_obs_key: organ: @@ -111,8 +228,6 @@ before it is loaded into memory: feature_wise: var_ensembl_col: var_symbol_col: - misc: - healthy_state_healthy: meta: version: "1.0" @@ -146,10 +261,9 @@ In summary, a the dataloader for a mouse lung data set could look like this: download_url_data: "my GEO upload" download_url_meta: normalization: "raw" + primary_data: year: dataset_or_observation_wise: - age: - age_obs_key: assay_sc: "smart-seq2" assay_sc_obs_key: assay_differentiation: @@ -162,10 +276,10 @@ In summary, a the dataloader for a mouse lung data set could look like this: cell_line_obs_key: development_stage: development_stage_obs_key: + disease_stage: + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: - healthy_obs_key: individual: individual_obs_key: organ: "lung" @@ -185,8 +299,6 @@ In summary, a the dataloader for a mouse lung data set could look like this: feature_wise: var_ensembl_col: var_symbol_col: - misc: - healthy_state_healthy: meta: version: "1.0" @@ -205,6 +317,56 @@ in which local data and cell type annotation can be managed separately but still The data loaders and cell type annotation formats between sfaira and sfaira_extensions are identical and can be easily copied over. +Loading multiple files of similar structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Only one loader has to be written for each set of files that are similarly structured which belong to one DOI. +`sample_fns` in `dataset_structure` in the `.yaml` indicates the presence of these files. +The identifiers listed there do not have to be the full file names. +They are received by `load()` as the argument `sample_fn` and can then be used in custom code in `load()` to load +the correct file. +This allows sharing code across these files in `load()`. +If these files share all meta data in the `.yaml`, you do not have to change anything else here. +If a some meta data items are file specific, you can further subdefine them under the keys in this `.yaml` via their +identifiers stated here. +In the following example, we show how this formalism can be used to identify one file declared as "A" as a healthy +lung sample and another file "B" as a healthy pancreas sample. + +.. code-block:: python + + dataset_structure: + dataset_index: 1 + sample_fns: + - "A" + - "B" + dataset_wise: + # ... part of yaml omitted ... + dataset_or_observation_wise: + # ... part of yaml omitted + healthy: True + healthy_obs_key: + individual: + individual_obs_key: + organ: + A: "lung" + B: "pancreas" + organ_obs_key: + # part of yaml omitted ... +.. + +Note that not all meta data items have to subdefined into "A" and "B" but only the ones with differing values! +The corresponding `load` function would be: + +.. code-block:: python + + def load(data_dir, sample_fn, fn=None) -> anndata.AnnData: + # The following reads either my_file_A.h5ad or my_file_B.h5ad which correspond to A and B in the yaml. + fn = os.path.join(data_dir, f"my_file_{sample_fn}.h5ad") + adata = anndata.read(fn) + return adata +.. + + Loading third party annotation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -342,59 +504,147 @@ Contribute cell types to ontology Please open an issue on the sfaira repo with a description what type of cell type you want to add. -Using ontologies to train cell type classifiers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Cell type classifiers can be trained on data sets with different coarsity of cell type annotation using aggregate -cross-entropy as a loss and aggregate accuracy as a metric. -The one-hot encoded cell type label matrix is accordingly modified in the estimator class in data loading if terms -that correspond to intermediate nodes (rather than leave nodes) are encountered in the label set. - -Metadata management -------------------- - -We constrain meta data by ontologies where possible. The current restrictions are: - - - .age: unconstrained string - Use - - units of years for humans, - - the E{day} nomenclature for mouse embryos - - the P{day} nomenclature for young post-natal mice - - units of weeks for mice older than one week and - - units of days for cell culture samples. - - .assay_sc: EFO-constrained string - Choose a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false - - .assay_differentiation: unconstrained string - Try to provide a base differentiation protocol (eg. "Lancaster, 2014") as well as any amendments to the original protocol. - - .assay_type_differentiation: constrained string, {"guided", "unguided"} - For cell-culture samples: Whether a guided (patterned) differentiation protocol was used in the experiment. - - .developmental_stage: unconstrained string - This will constrained to an ontology in the future, - try choosing from HSAPDV (https://www.ebi.ac.uk/ols/ontologies/hsapdv) for human - or from MMUSDEV (https://www.ebi.ac.uk/ols/ontologies/mmusdv) for mouse. - - .cell_line: cellosaurus-constrained string - Cell line name from the cellosaurus cell line database (https://web.expasy.org/cellosaurus/) - - .ethnicity: unconstrained string, this will constrained to an ontology in the future. - Try choosing from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) - - .healthy: bool - Whether the sample is from healthy tissue ({True, False}). - - .normalisation: unconstrained string, this will constrained to an ontology in the future, - Try to use {"raw", "scaled"}. - - .organ: UBERON-constrained string - The anatomic location of the sample (https://www.ebi.ac.uk/ols/ontologies/uberon). - - .organism: constrained string, {"mouse", "human"}. - The organism from which the sample originates. - In the future, we will use NCBITAXON (https://www.ebi.ac.uk/ols/ontologies/ncbitaxon). - - .sample_source: constrained string, {"primary_tissue", "2d_culture", "3d_culture", "tumor"} - Which cellular system the sample was derived from. - - .sex: constrained string, {"female", "male", None} - Sex of the individual sampled. - - .state_exact: unconstrained string, try to be concise and anticipate that this field is queried by automatised searches. - If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity - and use units: `[1g]` - - .year: must be an integer year, e.g. 2020 - Year in which sample was first described (e.g. pre-print publication). - -Follow this issue_ for details on upcoming ontology integrations. - -.. _issue: https://github.com/theislab/sfaira/issues/16 + +Metadata +-------- + +Required fields +~~~~~~~~~~~~~~~ + +Most meta data fields are optional in sfaira. +Required are: + +- dataset_structure: dataset_index is required. +- dataset_wise: author, doi, download_url_data, normalisation and year are required. +- dataset_or_observation_wise: organism is required. +- observation_wise: None are required. +- feature_wise: var_ensembl_col or var_symbol_col is required. +- misc: None are required. + +Field descriptions +~~~~~~~~~~~~~~~~~~ + +We constrain meta data by ontologies where possible. +Meta data can either be dataset-wise, observation-wise or feature-wise. + +Dataset structure meta data are in the section `dataset_structure` in the `.yaml` file. + +- dataset_index [int] + Numeric identifier of the first loader defined by this python file. + Only relevant if multiple python files for one DOI generate loaders of the same name. + In these cases, this numeric index can be used to distinguish them. +- sample_fns [list of strings] + If there are multiple data files which can be covered by one `load()` function and `.yaml` file because they are + structured similarly, these can identified here. + See also section `Loading multiple files of similar structure`. + +Dataset-wise meta data are in the section `dataset_wise` in the `.yaml` file. + +- author [list of strings] + List of author names of dataset (not of loader). +- doi [list of strings] + DOIs associated with dataset. + These can be preprints and journal publication DOIs. +- download_url_data [list of strings] + Download links for data. + Full URLs of all data files such as count matrices. Note that distinct observation-wise annotation files can be + supplied in download_url_meta. +- download_url_meta [list of strings] + Download links for observation-wise data. + Full URLs of all observation-wise meta data files such as count matrices. + This attribute is optional and not necessary ff observation-wise meta data is already in the files defined in + `download_url_data`, e.g. often the case for .h5ad`. +- normalization: Data normalisation {"raw", "scaled"} + Type of normalisation of data stored in `adata.X` emitted by the `load()` function. +- year: Year in which sample was first described [integer] + Pre-print publication year. + +Meta-data which can either be dataset- or observation-wise are in the section `dataset_or_observation_wise` in the +`.yaml` file. +They can all be supplied as `NAME` or as `NAME_obs_key`: +The former indicates that the entire data set has the value stated in the yaml. +The latter, `NAME_obs_key`, indicates that there is a column in `adata.obs` emitted by the `load()` function of the name +`NAME_obs_key` which contains the annotation per observation for this meta data item. +Note that in both cases the value, or the column values, have to fulfill contraints imposed on the meta data item as +outlined below. + +- assay_sc and assay_sc_obs_key [ontology term] + Choose a term from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false +- assay_differentiation and assay_differentiation_obs_key [string] + Try to provide a base differentiation protocol (eg. "Lancaster, 2014") as well as any amendments to the original + protocol. +- assay_type_differentiation and assay_type_differentiation_obs_key {"guided", "unguided"} + For cell-culture samples: Whether a guided (patterned) differentiation protocol was used in the experiment. +- bio_sample and bio_sample_obs_key [string] + Column name in `adata.obs` emitted by the `load()` function which reflects biologically distinct samples, either + different in condition or biological replicates, as a categorical variable. + The values of this column are not constrained and can be arbitrary identifiers of observation groups. + You can concatenate multiple columns to build more fine grained observation groupings by concatenating the column + keys with `*` in this string, e.g. `patient*treatment` to get one `bio_sample` for each patient and treatment. + Note that the notion of biologically distinct sample is slightly subjective, we allow this element to allow + researchers to distinguish technical and biological replicates within one study for example. + See also the meta data items `individual` and `tech_sample`. +- cell_line and cell_line_obs_key [ontology term] + Cell line name from the cellosaurus cell line database (https://web.expasy.org/cellosaurus/) +- developmental_stage and developmental_stage_obs_key [ontology term] + Developmental stage (age) of individual sampled. + Choose from HSAPDV (https://www.ebi.ac.uk/ols/ontologies/hsapdv) for human + or from MMUSDEV (https://www.ebi.ac.uk/ols/ontologies/mmusdv) for mouse. +- disease and disease_obs_key [ontology term] + Choose from MONDO (https://www.ebi.ac.uk/ols/ontologies/mondo) for human +- ethnicity and ethnicity_obs_key [ontology term] + Choose from HANCESTRO (https://www.ebi.ac.uk/ols/ontologies/hancestro) +- individual and individual_obs_key [string] + Column name in `adata.obs` emitted by the `load()` function which reflects the indvidual sampled as a categorical + variable. + The values of this column are not constrained and can be arbitrary identifiers of observation groups. + You can concatenate multiple columns to build more fine grained observation groupings by concatenating the column + keys with `*` in this string, e.g. `group1*group2` to get one `individual` for each group1 and group2 entry. + Note that the notion of individuals is slightly mal-defined in some cases, we allow this element to allow + researchers to distinguish sample groups that originate from biological material with distinct genotypes. + See also the meta data items `individual` and `tech_sample`. +- organ and organ_obs_key [ontology term] + The UBERON anatomic location of the sample (https://www.ebi.ac.uk/ols/ontologies/uberon). +- organism and organism_obs_key. {"mouse", "human"}. + The organism from which the sample originates. + In the future, we will use NCBITAXON (https://www.ebi.ac.uk/ols/ontologies/ncbitaxon). +- primary_data [bool] + Whether contains cells that were measured in this study (ie this is not a meta study on published data). +- sample_source and sample_source_obs_key. {"primary_tissue", "2d_culture", "3d_culture", "tumor"} + Which cellular system the sample was derived from. +- sex and sex_obs_key. Sex of individual sampled. {"female", "male", None} + Sex of the individual sampled. +- state_exact and state_exact_obs_key [string] + Free text description of condition. + If you give treatment concentrations, intervals or similar measurements use square brackets around the quantity + and use units: `[1g]` +- tech_sample and tech_sample_obs_key [string] + Column name in `adata.obs` emitted by the `load()` function which reflects technically distinct samples, either + different in condition or technical replicates, as a categorical variable. + Any data batch is a `tech_sample`. + The values of this column are not constrained and can be arbitrary identifiers of observation groups. + You can concatenate multiple columns to build more fine grained observation groupings by concatenating the column + keys with `*` in this string, e.g. `patient*treatment*protocol` to get one `tech_sample` for each patient, treatment + and measurement protocol. + See also the meta data items `individual` and `tech_sample`. + +Meta-data which are strictly observation-wise are in the section `observation_wise` in the `.yaml` file: + +- cellontology_original_obs_key [string] + Column name in `adata.obs` emitted by the `load()` function which contains free text cell type labels. + +Meta-data which are feature-wise are in the section `feature_wise` in the `.yaml` file: + +- var_ensembl_col [string] + Name of the column in `adata.var` emitted by the `load()` which contains ENSEMBL gene IDs. + This can also be "index" if the ENSEMBL gene names are in the index of the `adata.var` data frame. +- var_symbol_col:.[string] + Name of the column in `adata.var` emitted by the `load()` which contains gene symbol: + HGNC for human and MGI for mouse. + This can also be "index" if the gene symbol are in the index of the `adata.var` data frame. + +The meta data on the meta data file do not have to modified by you are automatically controlled are in the section +`meta` in the `.yaml` file: + +- version: [string] + Version identifier of meta data scheme. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index aa79086d5..131f41f98 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ anndata>=0.7 +crossref_commons docutils fuzzywuzzy loompy diff --git a/sfaira/commands/create_dataloader.py b/sfaira/commands/create_dataloader.py index 710f63db9..286a88511 100644 --- a/sfaira/commands/create_dataloader.py +++ b/sfaira/commands/create_dataloader.py @@ -29,6 +29,9 @@ class TemplateAttributes: organism: str = '' # (*) species / organism assay: str = '' # (*, optional) protocol used to sample data (e.g. smart-seq2) normalization: str = '' # raw or the used normalization technique + default_embedding: str = '' # Default embedding of the data + primary_data: str = '' # Is this a primary dataset? + disease: str = '' # name of the disease of the condition ethnicity: str = '' # ethnicity of the sample state_exact: str = '' # state of the sample year: str = 2021 # year in which sample was acquired @@ -102,6 +105,12 @@ def _prompt_dataloader_configuration(self): question='Sample file name of the first dataset:', default='data.h5ad') + self.template_attributes.primary_data = str(sfaira_questionary(function='confirm', + question='Primary data:', + default='Yes')) + self.template_attributes.default_embedding = sfaira_questionary(function='text', + question='Default embedding:', + default='NA') self.template_attributes.organism = sfaira_questionary(function='text', question='Organism:', default='NA') @@ -114,6 +123,9 @@ def _prompt_dataloader_configuration(self): self.template_attributes.normalization = sfaira_questionary(function='text', question='Normalization:', default='raw') + self.template_attributes.disease = sfaira_questionary(function='text', + question='Disease:', + default='NA') self.template_attributes.state_exact = sfaira_questionary(function='text', question='Sample state:', default='healthy') diff --git a/sfaira/commands/templates/multiple_datasets/cookiecutter.json b/sfaira/commands/templates/multiple_datasets/cookiecutter.json index 4960fc7a1..ddd7f38ba 100644 --- a/sfaira/commands/templates/multiple_datasets/cookiecutter.json +++ b/sfaira/commands/templates/multiple_datasets/cookiecutter.json @@ -5,6 +5,7 @@ "id": "", "id_without_doi": "", "author": "", + "disease": "", "doi": "", "doi_sfaira_repr": "", "download_url_data": "", @@ -16,5 +17,7 @@ "year": "", "individual": "", "state_exact": "", + "primary_data": "", + "default_embedding": "", "create_extra_description": "" } diff --git a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml index 872fa0188..5f73dc4ce 100644 --- a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml +++ b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -4,17 +4,17 @@ dataset_structure: {% for fn in cookiecutter.sample_fns.fns %} - "{{ fn }}" {% endfor %}dataset_wise: author: "{{ cookiecutter.author }}" - doi: "{{ cookiecutter.doi }}" + default_embedding: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.default_embedding }}" +{% endfor %}doi: "{{ cookiecutter.doi }}" download_url_data: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: {% endfor %} download_url_meta: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: -{% endfor %} normalization: "{{ cookiecutter.normalization }}" +{% endfor %} primary_data: {{ cookiecutter.primary_data }} + normalization: "{{ cookiecutter.normalization }}" year: "{{ cookiecutter.year }}" dataset_or_observation_wise: - age: -{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: -{% endfor %} age_obs_key: assay: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.assay }}" {% endfor %} assay_obs_key: @@ -24,12 +24,12 @@ dataset_or_observation_wise: development_stage: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: {% endfor %} development_stage_obs_key: + disease: +{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: "{{ cookiecutter.disease }}" +{% endfor %} disease_obs_key: ethnicity: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: {% endfor %} ethnicity_obs_key: - healthy: -{% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: -{% endfor %} healthy_obs_key: individual: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: {% endfor %} individual_obs_key: diff --git a/sfaira/commands/templates/single_dataset/cookiecutter.json b/sfaira/commands/templates/single_dataset/cookiecutter.json index 4f9207a62..faafc16cd 100644 --- a/sfaira/commands/templates/single_dataset/cookiecutter.json +++ b/sfaira/commands/templates/single_dataset/cookiecutter.json @@ -5,6 +5,7 @@ "id": "", "id_without_doi": "", "author": "", + "disease": "", "doi": "", "doi_sfaira_repr": "", "download_url_data": "", @@ -16,5 +17,7 @@ "year": "", "individual": "", "state_exact": "", + "primary_data": "", + "default_embedding": "", "create_extra_description": "" } diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml index 32f9c38a1..5a505dccd 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -4,24 +4,24 @@ dataset_structure: - "{{ cookiecutter.sample_fns }}" dataset_wise: author: "{{ cookiecutter.author }}" + default_embedding: "{{ cookiecutter.default_embedding }}" doi: "{{ cookiecutter.doi }}" download_url_data: "{{ cookiecutter.download_url_data }}" download_url_meta: "{{ cookiecutter.download_url_meta }}" + primary_data: {{ cookiecutter.primary_data }} normalization: "{{ cookiecutter.normalization }}" year: "{{ cookiecutter.year }}" dataset_or_observation_wise: - age: - age_obs_key: assay: "{{ cookiecutter.assay }}" assay_obs_key: bio_sample: bio_sample_obs_key: development_stage: development_stage_obs_key: + disease: "{{ cookiecutter.disease }}" + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: - healthy_obs_key: individual: individual_obs_key: organ: "{{ cookiecutter.organ }}" diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index a696cbc3a..afa095805 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -10,7 +10,6 @@ class AdataIds: """ Base class of constant field names of anndata.AnnData object entries, such as .uns keys and .obs columns. """ - age: str annotated: str assay_sc: str author: str @@ -28,7 +27,6 @@ class AdataIds: gene_id_ensembl: str gene_id_index: str gene_id_names: str - healthy: str id: str individual: str ncells: str @@ -68,6 +66,7 @@ def __init__(self): self.cell_types_original = "cell_types_original" self.cell_ontology_class = "cell_ontology_class" self.cell_ontology_id = "cell_ontology_id" + self.default_embedding = "default_embedding" self.disease = "disease" self.doi = "doi" self.dataset = "dataset" @@ -77,18 +76,18 @@ def __init__(self): self.gene_id_ensembl = "ensembl" self.gene_id_index = "ensembl" self.gene_id_names = "names" - self.healthy = "healthy" self.id = "id" self.individual = "individual" self.ncells = "ncells" self.normalization = "normalization" self.organ = "organ" self.organism = "organism" + self.primary_data = "primary_data" self.sample_source = "sample_source" self.tech_sample = "tech_sample" + self.title = "title" self.year = "year" - self.age = "age" self.development_stage = "development_stage" self.ethnicity = "ethnicity" self.sex = "sex" @@ -104,9 +103,9 @@ def __init__(self): self.unknown_celltype_identifier = "UNKNOWN" self.not_a_cell_celltype_identifier = "NOT_A_CELL" + self.unknown_metadata_identifier = None self.obs_keys = [ - "age", "assay_sc", "assay_differentiation", "assay_type_differentiation", @@ -116,8 +115,8 @@ def __init__(self): "cell_ontology_class", "cell_ontology_id", "development_stage", + "disease", "ethnicity", - "healthy", "individual", "organ", "organism", @@ -133,12 +132,15 @@ def __init__(self): self.uns_keys = [ "annotated", "author", + "default_embedding", "doi", "download_url_data", "download_url_meta", "id", "mapped_features", "normalization", + "primary_data", + "title", "year", ] @@ -148,7 +150,6 @@ class AdataIdsCellxgene(AdataIds): Class of constant field names of anndata.AnnData object entries", such as .uns keys and .obs columns in cellxgene objects. """ - disease_state_healthy: str accepted_file_names: List[str] def __init__(self): @@ -156,39 +157,42 @@ def __init__(self): self.cell_types_original = "free_annotation" self.cell_ontology_class = "cell_type" self.cell_ontology_id = "cell_type_ontology_term_id" - self.doi = "doi" + self.default_embedding = "default_embedding" + self.doi = "preprint_doi" self.disease = "disease" - self.gene_id_names = "names" + self.gene_id_names = "gene_symbol" self.id = "id" self.ncells = "ncells" - self.normalization = "" # is always "raw" - self.organ = "organ" + self.organ = "tissue" self.organism = "organism" + self.title = "title" self.year = "year" - self.age = "age" self.author = "contributors" self.development_stage = "development_stage" self.ethnicity = "ethnicity" self.sex = "sex" self.state_exact = "disease" + self.tech_sample = "batch" # selected element entries used for parsing: - self.disease_state_healthy = "normal" self.author_names = "names" + self.unknown_metadata_identifier = "unknown" + # accepted file names self.accepted_file_names = [ "krasnow_lab_human_lung_cell_atlas_smartseq2-2-remixed.h5ad", ] self.obs_keys = [ - "age", + "assay_sc", + "cell_types_original", + "cell_ontology_class", + "cell_ontology_id", "development_stage", "disease", "ethnicity", - "healthy", - "individual", "organ", "organism", "sex", @@ -198,9 +202,7 @@ def __init__(self): "gene_id_names", ] self.uns_keys = [ - "author", - "doi", + "default_embedding", "id", - "normalization", - "year", + "title", ] diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index f9073f56f..c5a61e56e 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -5,20 +5,22 @@ META_DATA_FIELDS = { "annotated": bool, "author": str, + "assay_sc": str, + "assay_differentiation": str, + "assay_type_differentiation": str, "bio_sample": str, "cell_line": str, "cell_ontology_class": str, + "cell_ontology_id": str, + "development_stage": str, + "disease": str, "doi": str, "download_url_data": str, "download_url_meta": str, - "healthy": str, "id": str, "ncells": int, "normalization": str, "organ": str, - "assay_sc": str, - "assay_differentiation": str, - "assay_type_differentiation": str, "organism": str, "sample_source": str, "state_exact": str, diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 740a2c468..6c7623245 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -1,5 +1,5 @@ from sfaira.versions.metadata import OntologyList, OntologyCelltypes -from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMmusdv, \ +from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus @@ -8,25 +8,30 @@ class OntologyContainerSfaira: _cellontology_class: OntologyCelltypes def __init__(self): - self.age = None self.annotated = OntologyList(terms=[True, False]) self.author = None self.assay_differentiation = None self.assay_sc = OntologySinglecellLibraryConstruction() self.assay_type_differentiation = OntologyList(terms=["guided", "unguided"]) + self.bio_sample = None self.cell_line = OntologyCellosaurus() self.cellontology_class = "v2021-02-01" self.cellontology_original = None - self.developmental_stage = None + self.default_embedding = None + self.development_stage = None # OntologyHsapdv() # TODO allow for other organisms here too. + self.disease = OntologyMondo() self.doi = None - self.ethnicity = None - self.healthy = OntologyList(terms=[True, False]) + self.ethnicity = None # OntologyHancestro() self.id = None + self.individual = None self.normalization = None self.organ = OntologyUberon() - self.organism = OntologyList(terms=["mouse", "human"]) + self.organism = OntologyList(terms=["mouse", "human"]) # TODO introduce NCBItaxon here + self.primary_data = OntologyList(terms=[True, False]) self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "tumor"]) - self.sex = OntologyList(terms=["female", "male"]) + self.sex = OntologyList(terms=["female", "male", "mixed", "unknown", "other"]) + self.tech_sample = None + self.title = None self.year = OntologyList(terms=list(range(2000, 3000))) @property diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index cb364f7ef..845d886c2 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -17,7 +17,7 @@ import cgi import ssl -from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.versions.genomes import GenomeContainer from sfaira.versions.metadata import Ontology, CelltypeUniverse from sfaira.consts import AdataIds, AdataIdsSfaira, META_DATA_FIELDS, OCS from sfaira.data.utils import collapse_matrix, read_yaml @@ -73,33 +73,34 @@ class DatasetBase(abc.ABC): id: Union[None, str] genome: Union[None, str] - _age: Union[None, str] _assay_sc: Union[None, str] _assay_differentiation: Union[None, str] _assay_type_differentiation: Union[None, str] _author: Union[None, str] _bio_sample: Union[None, str] _cell_line: Union[None, str] + _default_embedding: Union[None, str] _development_stage: Union[None, str] + _disease: Union[None, str] _doi: Union[None, str] _download_url_data: Union[Tuple[List[None]], Tuple[List[str]], None] _download_url_meta: Union[Tuple[List[None]], Tuple[List[str]], None] _ethnicity: Union[None, str] - _healthy: Union[None, bool] _id: Union[None, str] _individual: Union[None, str] _ncells: Union[None, int] _normalization: Union[None, str] _organ: Union[None, str] _organism: Union[None, str] + _primary_data: Union[None, bool] _sex: Union[None, str] _source: Union[None, str] _sample_source: Union[None, str] _state_exact: Union[None, str] + _title: Union[None, str] _bio_sample: Union[None, str] _year: Union[None, int] - _age_obs_key: Union[None, str] _assay_sc_obs_key: Union[None, str] _assay_differentiation_obs_key: Union[None, str] _assay_type_differentiation_obs_key: Union[None, str] @@ -108,9 +109,8 @@ class DatasetBase(abc.ABC): _cellontology_id_obs_key: Union[None, str] _cellontology_original_obs_key: Union[None, str] _development_stage_obs_key: Union[None, str] + _disease_obs_key: Union[None, str] _ethnicity_obs_key: Union[None, str] - _healthy_obs_key: Union[None, str] - _healthy_obs_key: Union[None, str] _individual: Union[None, str] _organ_obs_key: Union[None, str] _organism_obs_key: Union[None, str] @@ -120,8 +120,6 @@ class DatasetBase(abc.ABC): _state_exact_obs_key: Union[None, str] _tech_sample_obs_key: Union[None, str] - _healthy_state_healthy: Union[None, str] - _var_symbol_col: Union[None, str] _var_ensembl_col: Union[None, str] @@ -176,33 +174,34 @@ def __init__( self.meta_path = meta_path self.cache_path = cache_path - self._age = None self._author = None self._assay_sc = None self._assay_differentiation = None self._assay_type_differentiation = None self._bio_sample = None self._cell_line = None + self._default_embedding = None self._development_stage = None + self._disease = None self._doi = None self._download_url_data = None self._download_url_meta = None self._ethnicity = None - self._healthy = None self._id = None self._individual = None self._ncells = None self._normalization = None self._organ = None self._organism = None + self._primary_data = None self._sample_source = None self._sex = None self._source = None self._state_exact = None self._tech_sample = None + self._title = None self._year = None - self._age_obs_key = None self._assay_sc_obs_key = None self._assay_differentiation_obs_key = None self._assay_type_differentiation_obs_key = None @@ -212,8 +211,9 @@ def __init__( self._cellontology_id_obs_key = None self._cellontology_original_obs_key = None self._development_stage_obs_key = None + self._disease_obs_key = None self._ethnicity_obs_key = None - self._healthy_obs_key = None + self._individual_obs_key = None self._organ_obs_key = None self._organism_obs_key = None @@ -222,8 +222,6 @@ def __init__( self._state_exact_obs_key = None self._tech_sample_obs_key = None - self._healthy_state_healthy = None - self._var_symbol_col = None self._var_ensembl_col = None @@ -375,6 +373,7 @@ def _load_cached( Wraps data set specific load and allows for caching. Cache is written into director named after doi and h5ad named after data set id. + Cache is not over-written. :param load_raw: Loads unprocessed version of data if available in data loader. :param allow_caching: Whether to allow method to cache adata object for faster re-loading. @@ -409,7 +408,8 @@ def _cached_writing(filename): dir_cache = os.path.dirname(filename) if not os.path.exists(dir_cache): os.makedirs(dir_cache) - self.adata.write_h5ad(filename) + if not os.path.exists(filename): + self.adata.write_h5ad(filename) if load_raw and allow_caching: _assembly_wrapper() @@ -433,24 +433,8 @@ def load( if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" "while not removing gene versions. this can lead to very poor matching results") - - # Set default genomes per organism if none provided: - if isinstance(match_to_reference, str): - genome = match_to_reference - elif match_to_reference is None or (isinstance(match_to_reference, bool) and match_to_reference): - if self.organism == "human": - genome = "Homo_sapiens_GRCh38_97" - warnings.warn(f"using default genome {genome}") - elif self.organism == "mouse": - genome = "Mus_musculus_GRCm38_97" - warnings.warn(f"using default genome {genome}") - else: - raise ValueError(f"genome was not supplied and no default genome found for organism {self.organism}") - elif not match_to_reference: - genome = None - else: - raise ValueError(f"invalid choice for match_to_reference={match_to_reference}") - self._set_genome(genome=genome) + if not (isinstance(match_to_reference, bool) and not match_to_reference): + self._set_genome(organism=self.organism, assembly=match_to_reference) # Set path to dataset directory if self.data_dir is None: @@ -458,18 +442,16 @@ def load( # Run data set-specific loading script: self._load_cached(load_raw=load_raw, allow_caching=allow_caching) - if set_metadata: - # Set data-specific meta data in .adata: - self._set_metadata_in_adata() # Set loading hyper-parameter-specific meta data: self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference self.adata.uns[self._adata_ids_sfaira.remove_gene_version] = remove_gene_version + if set_metadata: + # Set data-specific meta data in .adata: + self._set_metadata_in_adata(allow_uns=True) # Streamline feature space: self._convert_and_set_var_names(match_to_reference=match_to_reference) self._collapse_genes(remove_gene_version=remove_gene_version) - if match_to_reference: - self._match_features_to_reference() load.__doc__ = load_doc @@ -557,9 +539,14 @@ def _collapse_genes(self, remove_gene_version): self.adata.var[self._adata_ids_sfaira.gene_id_index] = self.adata.var_names self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values - def _match_features_to_reference(self): + def subset_genes(self, subset_type: Union[None, str, List[str]] = None): """ - Match feature space to a genomes provided with sfaira + Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. + + :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: + + - None: All genes in assembly. + - "protein_coding": All protein coding genes in assembly. """ # Convert data matrix to csc matrix if isinstance(self.adata.X, np.ndarray): @@ -574,9 +561,21 @@ def _match_features_to_reference(self): # Compute indices of genes to keep data_ids = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values - idx_feature_kept = np.where([x in self.genome_container.ensembl for x in data_ids])[0] - idx_feature_map = np.array([self.genome_container.ensembl.index(x) - for x in data_ids[idx_feature_kept]]) + if subset_type is None: + subset_ids = self.genome_container.ensembl + else: + if isinstance(subset_type, str): + subset_type = [subset_type] + keys = np.unique(self.genome_container.type) + if subset_type not in keys: + raise ValueError(f"subset type {subset_type} not available in list {keys}") + subset_ids = [ + x for x, y in zip(self.genome_container.ensembl, self.genome_container.type) + if y in subset_type + ] + + idx_feature_kept = np.where([x in subset_ids for x in data_ids])[0] + idx_feature_map = np.array([subset_ids.index(x) for x in data_ids[idx_feature_kept]]) # Remove unmapped genes x = x[:, idx_feature_kept] @@ -606,39 +605,44 @@ def _match_features_to_reference(self): uns=self.adata.uns ) - def _set_metadata_in_adata(self): + def _set_metadata_in_adata(self, allow_uns: bool): """ Copy meta data from dataset class in .anndata. + :param allow_uns: Allow writing of constant meta data into uns rather than .obs. :return: """ - # Set data set-wide attributes (.uns): - self.adata.uns[self._adata_ids_sfaira.annotated] = self.annotated - self.adata.uns[self._adata_ids_sfaira.author] = self.author - self.adata.uns[self._adata_ids_sfaira.doi] = self.doi - self.adata.uns[self._adata_ids_sfaira.download_url_data] = self.download_url_data - self.adata.uns[self._adata_ids_sfaira.download_url_meta] = self.download_url_meta - self.adata.uns[self._adata_ids_sfaira.id] = self.id - self.adata.uns[self._adata_ids_sfaira.normalization] = self.normalization - self.adata.uns[self._adata_ids_sfaira.year] = self.year + # Set data set-wide attributes (.uns) (write to .obs if .uns is not allowed): + if allow_uns: + for k in self._adata_ids_sfaira.uns_keys: + if k not in self.adata.uns.keys(): + self.adata.uns[getattr(self._adata_ids_sfaira, k)] = getattr(self, k) + else: + for k in self._adata_ids_sfaira.uns_keys: + if k in self.adata.uns.keys(): + val = self.adata.uns[k] + else: + val = getattr(self, k) + while hasattr(val, '__len__') and not isinstance(val, str) and len(val) == 1: # unpack nested lists + val = val[0] + self.adata.obs[getattr(self._adata_ids_sfaira, k)] = [val for i in range(len(self.adata.obs))] # Set cell-wise or data set-wide attributes (.uns / .obs): - # These are saved in .uns if they are data set wide to save memory. + # These are saved in .uns if they are data set wide to save memory if allow_uns is True. for x, y, z, v in ( - [self.age, self._adata_ids_sfaira.age, self.age_obs_key, self.ontology_container_sfaira.age], [self.assay_sc, self._adata_ids_sfaira.assay_sc, self.assay_sc_obs_key, self.ontology_container_sfaira.assay_sc], [self.assay_differentiation, self._adata_ids_sfaira.assay_differentiation, self.assay_differentiation_obs_key, self.ontology_container_sfaira.assay_differentiation], [self.assay_type_differentiation, self._adata_ids_sfaira.assay_type_differentiation, self.assay_type_differentiation_obs_key, self.ontology_container_sfaira.assay_type_differentiation], - [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key, None], [self.cell_line, self._adata_ids_sfaira.cell_line, self.cell_line_obs_key, self.ontology_container_sfaira.cell_line], [self.development_stage, self._adata_ids_sfaira.development_stage, self.development_stage_obs_key, - self.ontology_container_sfaira.developmental_stage], + self.ontology_container_sfaira.development_stage], + [self.disease, self._adata_ids_sfaira.disease, self.disease_obs_key, + self.ontology_container_sfaira.disease], [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.ethnicity_obs_key, self.ontology_container_sfaira.ethnicity], - [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key, None], [self.organ, self._adata_ids_sfaira.organ, self.organ_obs_key, self.ontology_container_sfaira.organ], [self.organism, self._adata_ids_sfaira.organism, self.organism_obs_key, self.ontology_container_sfaira.organism], @@ -646,13 +650,11 @@ def _set_metadata_in_adata(self): self.ontology_container_sfaira.sample_source], [self.sex, self._adata_ids_sfaira.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], [self.state_exact, self._adata_ids_sfaira.state_exact, self.state_exact_obs_key, None], - [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key, None], ): - if x is None and z is None: + if z is None and allow_uns: self.adata.uns[y] = None - elif x is not None and z is None: - # Attribute supplied per data set: Write into .uns. - self.adata.uns[y] = x + elif z is None and not allow_uns: + self.adata.obs[y] = x elif z is not None: # Attribute supplied per cell: Write into .obs. # Search for direct match of the sought-after column name or for attribute specific obs key. @@ -671,39 +673,35 @@ def _set_metadata_in_adata(self): self.adata.obs[y] = self.adata.obs[z].values.tolist() else: assert False, "switch option should not occur" - # Load boolean labels: - for x, y, z, v, w in ( - [self.healthy, self._adata_ids_sfaira.healthy, self.healthy_obs_key, self.ontology_container_sfaira.healthy, - self.healthy_state_healthy], + # Add batch annotation which can be rule-based + for x, y, z in ( + [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key], + [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key], + [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key], ): - if x is None and z is None: - self.adata.uns[y] = None - elif x is not None and z is None: - # Attribute supplied per data set: Write into .uns. - if w is None: - self.adata.uns[y] = x - else: - self.adata.uns[y] = x == w + if z is None and allow_uns: + self.adata.uns[y] = x + elif z is None and not allow_uns: + self.adata.uns[y] = UNS_STRING_META_IN_OBS + self.adata.obs[y] = x elif z is not None: - # Attribute supplied per cell: Write into .obs. - # Search for direct match of the sought-after column name or for attribute specific obs key. - if z not in self.adata.obs.keys(): - # This should not occur in single data set loaders (see warning below) but can occur in - # streamlined data loaders if not all instances of the streamlined data sets have all columns - # in .obs set. - self.adata.uns[y] = None - print(f"WARNING: attribute {y} of data set {self.id} was not found in column {z}") # debugging - else: - # Include flag in .uns that this attribute is in .obs: - self.adata.uns[y] = UNS_STRING_META_IN_OBS - # Remove potential pd.Categorical formatting: - label_y = self.adata.obs[z].values - # Use reference string to establish equality if available: - if w is not None: - label_y = label_y == w - self._value_protection( - attr=y, allowed=v, attempted=np.unique(label_y).tolist()) - self.adata.obs[y] = label_y.tolist() + self.adata.uns[y] = UNS_STRING_META_IN_OBS + zs = z.split("*") # Separator for indicate multiple columns. + keys_to_use = [] + for zz in zs: + if zz not in self.adata.obs.keys(): + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + print(f"WARNING: attribute {y} of data set {self.id} was not found in column {zz}") # debugging + else: + keys_to_use.append(zz) + if len(keys_to_use) > 0: + # Build a combination label out of all columns used to describe this group. + self.adata.obs[y] = [ + "_".join([str(xxx) for xxx in xx]) + for xx in zip(*[self.adata.obs[k].values.tolist() for k in keys_to_use]) + ] else: assert False, "switch option should not occur" # Set cell-wise attributes (.obs): @@ -713,7 +711,14 @@ def _set_metadata_in_adata(self): if self.cellontology_original_obs_key is not None: self.project_celltypes_to_ontology() - def streamline(self, format: str = "sfaira", clean: bool = False): + def streamline( + self, + format: str = "sfaira", + allow_uns_sfaira: bool = True, + clean_obs: bool = True, + clean_var: bool = True, + clean_uns: bool = True + ): """ Streamline the adata instance to output format. @@ -723,43 +728,161 @@ def streamline(self, format: str = "sfaira", clean: bool = False): - "sfaira" - "cellxgene" - :param clean: Whether to delete non-streamlined fields. + :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. + :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. + :param clean_uns: Whether to delete non-streamlined fields in .uns. :return: """ if format == "sfaira": adata_fields = self._adata_ids_sfaira + self._set_metadata_in_adata(allow_uns=allow_uns_sfaira) elif format == "cellxgene": from sfaira.consts import AdataIdsCellxgene adata_fields = AdataIdsCellxgene() + self._set_metadata_in_adata(allow_uns=False) else: raise ValueError(f"did not recognize format {format}") - if clean: + if clean_var: if self.adata.varm is not None: del self.adata.varm + if self.adata.varp is not None: + del self.adata.varp + if clean_obs: if self.adata.obsm is not None: del self.adata.obsm - if self.adata.varm is not None: - del self.adata.varp if self.adata.obsp is not None: del self.adata.obsp - # Only retain target elements in adata.uns: - self.adata.uns = dict([ - (getattr(adata_fields, k), self.adata.uns[getattr(self._adata_ids_sfaira, k)]) - if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() else None - for k in adata_fields.uns_keys - ]) - # Only retain target elements in adata.var: - self.adata.var = pd.DataFrame(dict([ - (getattr(adata_fields, k), self.adata.var[getattr(self._adata_ids_sfaira, k)]) - for k in adata_fields.var_keys - if getattr(self._adata_ids_sfaira, k) in self.adata.var.keys() - ])) - # Only retain target columns in adata.obs: - self.adata.obs = pd.DataFrame(dict([ + # Only retain target elements in adata.uns: + uns_new = dict([ + (getattr(adata_fields, k), self.adata.uns[getattr(self._adata_ids_sfaira, k)]) + if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() + else (getattr(adata_fields, k), None) + for k in adata_fields.uns_keys + ]) + if clean_uns: + del self.adata.uns + # Remove old keys in sfaira scheme: + for k in adata_fields.uns_keys: + if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys(): + del self.adata.uns[getattr(self._adata_ids_sfaira, k)] + # Add new keys in new scheme: + for k, v in uns_new.items(): + self.adata.uns[k] = v + # Catch issues with data structures in uns that cannot be written to h5ad: + for k, v in self.adata.uns.items(): + replace = False + if isinstance(v, tuple) and len(v) == 1 and (isinstance(v[0], tuple) or isinstance(v[0], list)): + v = v[0] + replace = True + if isinstance(v, tuple) and len(v) == 1 and (isinstance(v[0], tuple) or isinstance(v[0], list)): + v = v[0] + replace = True + if replace: + if v == self._adata_ids_sfaira.unknown_metadata_identifier: + self.adata.uns[k] = adata_fields.unknown_metadata_identifier + else: + self.adata.uns[k] = v + # Only retain target elements in adata.var: + var_old = self.adata.var.copy() + self.adata.var = pd.DataFrame(dict([ + (getattr(adata_fields, k), self.adata.var[getattr(self._adata_ids_sfaira, k)]) + for k in adata_fields.var_keys + if getattr(self._adata_ids_sfaira, k) in self.adata.var.keys() + ])) + # Add old columns in if they are not overwritten and object is not cleaned: + if not clean_var: + for k, v in var_old.items(): + if k not in self.adata.var.keys(): + self.adata.var[k] = v + # Only retain target columns in adata.obs: + obs_old = self.adata.obs.copy() + self.adata.obs = pd.DataFrame( + data=dict([ (getattr(adata_fields, k), self.adata.obs[getattr(self._adata_ids_sfaira, k)]) for k in adata_fields.obs_keys if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() - ])) + ]), + index=self.adata.obs.index + ) + # Add old columns in if they are not overwritten and object is not cleaned: + if not clean_obs: + for k, v in obs_old.items(): + if k not in self.adata.obs.keys() and \ + k not in [getattr(self._adata_ids_sfaira, k) for k in adata_fields.obs_keys] and \ + k not in self._adata_ids_sfaira.obs_keys: + self.adata.obs[k] = v + # Add additional constant description changes based on output format: + if format == "cellxgene": + self.adata.uns["layer_descriptions"] = {"X": "raw"} + self.adata.uns["version"] = { + "corpora_encoding_version": "0.1.0", + "corpora_schema_version": "1.1.0", + } + for k in ["author", "doi", "download_url_data", "download_url_meta", "id", "year"]: + if k in self.adata.uns.keys(): + del self.adata.uns[k] + # TODO port this into organism ontology handling. + if self.organism == "mouse": + self.adata.uns["organism"] = "Mus musculus" + self.adata.uns["organism_ontology_term_id"] = "NCBITaxon:10090" + elif self.organism == "human": + self.adata.uns["organism"] = "Homo sapiens" + self.adata.uns["organism_ontology_term_id"] = "NCBITaxon:9606" + else: + assert False, self.organism + # Add ontology IDs where necessary (note that human readable terms are also kept): + for k in [ + "organ", + "assay_sc", + "disease", + "ethnicity", + "development_stage", + ]: + if getattr(adata_fields, k) in self.adata.obs.columns: + self.__project_name_to_id_obs( + ontology=getattr(self._adata_ids_sfaira, k), + key_in=getattr(adata_fields, k), + key_out=getattr(adata_fields, k) + "_ontology_term_id", + map_exceptions=[], + map_exceptions_value="", + ) + else: + self.adata.obs[getattr(adata_fields, k)] = adata_fields.unknown_metadata_identifier + self.adata.obs[getattr(adata_fields, k) + "_ontology_term_id"] = "" + # Clean up readable fields. + for k in [ + "organ", + "assay_sc", + "disease", + "ethnicity", + "development_stage", + "sex", + ]: + self.adata.obs[getattr(adata_fields, k)] = [ + x if x is not None else adata_fields.unknown_metadata_identifier + for x in self.adata.obs[getattr(adata_fields, k)].values + ] + # Adapt var columns naming. + if self.organism == "mouse": + gene_id_new = "hgnc_gene_symbol" + elif self.organism == "human": + gene_id_new = "mgi_gene_symbol" + else: + assert False, self.organism + self.adata.var[gene_id_new] = self.adata.var[getattr(adata_fields, "gene_id_names")] + self.adata.var.index = self.adata.var[gene_id_new].values + if gene_id_new != getattr(adata_fields, "gene_id_names"): + del self.adata.var[getattr(adata_fields, "gene_id_names")] + if format != "sfaira": + # Remove sfaira intrinsic .uns fields: + keys_to_delete = ["load_raw", "mapped_features", "remove_gene_version", "annotated"] + for k, v in self.adata.uns.items(): + if isinstance(v, str) and v == UNS_STRING_META_IN_OBS: + keys_to_delete.append(k) + for k in np.unique(keys_to_delete): + if k in self.adata.uns.keys(): + del self.adata.uns[k] def load_tobacked( self, @@ -835,24 +958,11 @@ def load_tobacked( else: raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def _set_genome(self, genome: Union[str, None]): - if genome is not None: - if genome.lower().startswith("homo_sapiens"): - g = SuperGenomeContainer( - organism="human", - genome=genome - ) - elif genome.lower().startswith("mus_musculus"): - g = SuperGenomeContainer( - organism="mouse", - genome=genome - ) - else: - raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or " - f"'Homo_Sapiens'.") - else: - g = None - self.genome_container = g + def _set_genome(self, organism: str, assembly: Union[str, None]): + self.genome_container = GenomeContainer( + organism=organism, + assembly=assembly, + ) @property def doi_cleaned_id(self): @@ -955,8 +1065,10 @@ def project_celltypes_to_ontology(self): allowed=self.ontology_celltypes, attempted=[ x for x in np.unique(labels_mapped).tolist() - if x != self._adata_ids_sfaira.unknown_celltype_identifier and - x != self._adata_ids_sfaira.not_a_cell_celltype_identifier + if x not in [ + self._adata_ids_sfaira.unknown_celltype_identifier, + self._adata_ids_sfaira.not_a_cell_celltype_identifier + ] ] ) self.adata.obs[self._adata_ids_sfaira.cell_ontology_class] = labels_mapped @@ -968,15 +1080,46 @@ def project_celltypes_to_ontology(self): # files with and without the ID in the third column. if self.cell_ontology_map is not None: # This mapping blocks progression in the unit test if not deactivated. - ids_mapped = [ - self.ontology_container_sfaira.cellontology_class.id_from_name(x) - if x not in [ + self.__project_name_to_id_obs( + ontology="cellontology_class", + key_in=self._adata_ids_sfaira.cell_ontology_class, + key_out=self._adata_ids_sfaira.cell_ontology_id, + map_exceptions=[ self._adata_ids_sfaira.unknown_celltype_identifier, self._adata_ids_sfaira.not_a_cell_celltype_identifier - ] else x - for x in labels_mapped - ] - self.adata.obs[self._adata_ids_sfaira.cell_ontology_id] = ids_mapped + ], + ) + + def __project_name_to_id_obs( + self, + ontology: str, + key_in: str, + key_out: str, + map_exceptions: list, + map_exceptions_value=None, + ): + """ + Project ontology names to IDs for a given ontology in .obs entries. + + :param ontology: + :param key_in: + :param key_out: + :param map_exceptions: + :param map_exceptions_value: + :return: + """ + ontology = getattr(self.ontology_container_sfaira, ontology) + map_vals = dict([ + (x, ontology.id_from_name(x)) + for x in np.unique([ + xx for xx in self.adata.obs[key_in].values + if (xx not in map_exceptions and xx is not None) + ]) + ]) + self.adata.obs[key_out] = [ + map_vals[x] if x in map_vals.keys() else map_exceptions_value + for x in self.adata.obs[key_in].values + ] @property def citation(self): @@ -1068,7 +1211,6 @@ def write_meta( }, index=range(1)) # Expand table by variably cell-wise or data set-wise meta data: for x in [ - self._adata_ids_sfaira.age, self._adata_ids_sfaira.assay_sc, self._adata_ids_sfaira.assay_differentiation, self._adata_ids_sfaira.assay_type_differentiation, @@ -1076,7 +1218,6 @@ def write_meta( self._adata_ids_sfaira.cell_line, self._adata_ids_sfaira.development_stage, self._adata_ids_sfaira.ethnicity, - self._adata_ids_sfaira.healthy, self._adata_ids_sfaira.individual, self._adata_ids_sfaira.organ, self._adata_ids_sfaira.organism, @@ -1136,24 +1277,6 @@ def additional_annotation_key(self) -> Union[None, str]: def additional_annotation_key(self, x: str): self._additional_annotation_key = x - @property - def age(self) -> Union[None, str]: - if self._age is not None: - return self._age - else: - if self.meta is None: - self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.age in self.meta.columns: - return self.meta[self._adata_ids_sfaira.age] - else: - return None - - @age.setter - def age(self, x: str): - self.__erasing_protection(attr="age", val_old=self._age, val_new=x) - self._value_protection(attr="age", allowed=self.ontology_container_sfaira.age, attempted=x) - self._age = x - @property def annotated(self) -> Union[bool, None]: if self.cellontology_id_obs_key is not None or self.cellontology_original_obs_key is not None: @@ -1227,22 +1350,6 @@ def assay_type_differentiation(self, x: str): allowed=self.ontology_container_sfaira.assay_type_differentiation, attempted=x) self._assay_type_differentiation = x - @property - def author(self) -> str: - if self._author is not None: - return self._author - else: - if self.meta is None: - self.load_meta(fn=None) - if self.meta is None or self._adata_ids_sfaira.author not in self.meta.columns: - raise ValueError("author must be set but was neither set in constructor nor in meta data") - return self.meta[self._adata_ids_sfaira.author] - - @author.setter - def author(self, x: str): - self.__erasing_protection(attr="author", val_old=self._author, val_new=x) - self._author = x - @property def bio_sample(self) -> Union[None, str]: if self._bio_sample is not None: @@ -1287,6 +1394,25 @@ def data_dir(self): else: return self.data_dir_base + @property + def default_embedding(self) -> Union[None, str]: + if self._default_embedding is not None: + return self._default_embedding + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.default_embedding in self.meta.columns: + return self.meta[self._adata_ids_sfaira.default_embedding] + else: + return None + + @default_embedding.setter + def default_embedding(self, x: str): + self.__erasing_protection(attr="default_embedding", val_old=self._development_stage, val_new=x) + self._value_protection(attr="default_embedding", allowed=self.ontology_container_sfaira.default_embedding, + attempted=x) + self._default_embedding = x + @property def development_stage(self) -> Union[None, str]: if self._development_stage is not None: @@ -1301,11 +1427,30 @@ def development_stage(self) -> Union[None, str]: @development_stage.setter def development_stage(self, x: str): - self.__erasing_protection(attr="dev_stage", val_old=self._development_stage, val_new=x) - self._value_protection(attr="dev_stage", allowed=self.ontology_container_sfaira.developmental_stage, + self.__erasing_protection(attr="development_stage", val_old=self._development_stage, val_new=x) + self._value_protection(attr="development_stage", allowed=self.ontology_container_sfaira.development_stage, attempted=x) self._development_stage = x + @property + def disease(self) -> Union[None, str]: + if self._disease is not None: + return self._disease + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.disease in self.meta.columns: + return self.meta[self._adata_ids_sfaira.disease] + else: + return None + + @disease.setter + def disease(self, x: str): + self.__erasing_protection(attr="disease", val_old=self._disease, val_new=x) + self._value_protection(attr="disease", allowed=self.ontology_container_sfaira.disease, + attempted=x) + self._disease = x + @property def doi(self) -> Union[str, List[str]]: if self._doi is not None: @@ -1415,32 +1560,6 @@ def ethnicity(self, x: str): self._value_protection(attr="ethnicity", allowed=self._adata_ids_sfaira.ontology_ethnicity, attempted=x) self._ethnicity = x - @property - def healthy(self) -> Union[None, bool]: - if self._healthy is not None: - return self._healthy - else: - if self.meta is None: - self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.healthy in self.meta.columns: - return self.meta[self._adata_ids_sfaira.healthy] - else: - return None - - @healthy.setter - def healthy(self, x: bool): - self.__erasing_protection(attr="healthy", val_old=self._healthy, val_new=x) - self._healthy = x - - @property - def healthy_state_healthy(self) -> str: - return self._healthy_state_healthy - - @healthy_state_healthy.setter - def healthy_state_healthy(self, x: str): - self.__erasing_protection(attr="healthy_state_healthy", val_old=self._healthy_state_healthy, val_new=x) - self._healthy_state_healthy = x - @property def id(self) -> str: if self._id is not None: @@ -1531,13 +1650,23 @@ def normalization(self, x: str): self._normalization = x @property - def age_obs_key(self) -> str: - return self._age_obs_key + def primary_data(self) -> Union[None, bool]: + if self._primary_data is not None: + return self._primary_data + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.primary_data in self.meta.columns: + return self.meta[self._adata_ids_sfaira.primary_data] + else: + return None - @age_obs_key.setter - def age_obs_key(self, x: str): - self.__erasing_protection(attr="age_obs_key", val_old=self._age_obs_key, val_new=x) - self._age_obs_key = x + @primary_data.setter + def primary_data(self, x: bool): + self.__erasing_protection(attr="primary_data", val_old=self._primary_data, val_new=x) + self._value_protection(attr="primary_data", allowed=self.ontology_container_sfaira.primary_data, + attempted=x) + self._primary_data = x @property def assay_sc_obs_key(self) -> str: @@ -1616,9 +1745,18 @@ def development_stage_obs_key(self) -> str: @development_stage_obs_key.setter def development_stage_obs_key(self, x: str): - self.__erasing_protection(attr="dev_stage_obs_key", val_old=self._development_stage_obs_key, val_new=x) + self.__erasing_protection(attr="development_stage_obs_key", val_old=self._development_stage_obs_key, val_new=x) self._development_stage_obs_key = x + @property + def disease_obs_key(self) -> str: + return self._disease_obs_key + + @disease_obs_key.setter + def disease_obs_key(self, x: str): + self.__erasing_protection(attr="disease_obs_key", val_old=self._disease_obs_key, val_new=x) + self._disease_obs_key = x + @property def ethnicity_obs_key(self) -> str: return self._ethnicity_obs_key @@ -1628,15 +1766,6 @@ def ethnicity_obs_key(self, x: str): self.__erasing_protection(attr="ethnicity_obs_key", val_old=self._ethnicity_obs_key, val_new=x) self._ethnicity_obs_key = x - @property - def healthy_obs_key(self) -> str: - return self._healthy_obs_key - - @healthy_obs_key.setter - def healthy_obs_key(self, x: str): - self.__erasing_protection(attr="healthy_obs_key", val_old=self._healthy_obs_key, val_new=x) - self._healthy_obs_key = x - @property def individual_obs_key(self) -> str: return self._individual_obs_key @@ -1879,12 +2008,78 @@ def cell_ontology_map(self, x: pd.DataFrame): assert x.shape[1] in [2, 3], f"{x.shape} in {self.id}" assert x.columns[0] == self._adata_ids_sfaira.classmap_source_key assert x.columns[1] == self._adata_ids_sfaira.classmap_target_key + # Check for weird entries: + # nan arises if columns was empty in that row. + nan_vals = np.where([ + False if isinstance(x, str) else (np.isnan(x) or x is None) + for x in x[self._adata_ids_sfaira.classmap_target_key].values.tolist() + ])[0] + assert len(nan_vals) == 0, \ + f"Found nan target values in {self.id} for {x[self._adata_ids_sfaira.classmap_target_key].values[nan_vals]}" # Transform data frame into a mapping dictionary: self._ontology_class_map = dict(list(zip( x[self._adata_ids_sfaira.classmap_source_key].values.tolist(), x[self._adata_ids_sfaira.classmap_target_key].values.tolist() ))) + def __crossref_query(self, k): + """ + Queries cross REST API via package crossref_commons. + + :param k: Key to extract from crossref query container. + :return: + """ + from crossref_commons.retrieval import get_entity + from crossref_commons.types import EntityType, OutputType + try: + attempt_counter = 0 + while True: + x = None + try: + attempt_counter += 1 + x = get_entity(self.doi_main, EntityType.PUBLICATION, OutputType.JSON)[k] + except ConnectionError as e: + # Terminate trial after 5 attempts with ConnectionError: + if attempt_counter > 5: + raise ConnectionError(e) + finally: + if k == "author": + pass + return x + except ValueError: + return None + except ConnectionError as e: + print(f"ConnectionError: {e}") + return None + + @property + def author(self) -> str: + if self._author is not None: + return self._author + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is None or self._adata_ids_sfaira.author not in self.meta.columns: + raise ValueError("author must be set but was neither set in constructor nor in meta data") + return self.meta[self._adata_ids_sfaira.author] + + @author.setter + def author(self, x: str): + self.__erasing_protection(attr="author", val_old=self._author, val_new=x) + self._author = x + + @property + def title(self): + if self._title is not None: + return self._title + else: + if self.meta is None: + self.load_meta(fn=None) + if self.meta is not None and self._adata_ids_sfaira.title in self.meta.columns: + return self.meta[self._adata_ids_sfaira.title] + else: + return self.__crossref_query(k="title") + # Private methods: def __erasing_protection(self, attr, val_old, val_new): @@ -1924,10 +2119,7 @@ def _value_protection( attempted = [attempted] for x in attempted: if not is_child(query=x, ontology=allowed): - if isinstance(allowed, Ontology): - # use node names instead of ontology object to produce a readable error message - allowed = allowed.node_names - raise ValueError(f"{x} is not a valid entry for {attr}, choose from: {allowed}") + raise ValueError(f"'{x}' is not a valid entry for {attr}.") def subset_cells(self, key, values): """ @@ -1938,7 +2130,6 @@ def subset_cells(self, key, values): :param key: Property to subset by. Options: - - "age" points to self.age_obs_key - "assay_sc" points to self.assay_sc_obs_key - "assay_differentiation" points to self.assay_differentiation_obs_key - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key @@ -1946,7 +2137,6 @@ def subset_cells(self, key, values): - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key - "sample_source" points to self.sample_source_obs_key diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 5a43ad1f5..39273065e 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -9,11 +9,11 @@ import pandas import pydoc import scipy.sparse -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Union import warnings from sfaira.data.base.dataset import is_child, DatasetBase -from sfaira.versions.genome_versions import SuperGenomeContainer +from sfaira.versions.genomes import GenomeContainer from sfaira.consts import AdataIdsSfaira from sfaira.data.utils import read_yaml @@ -146,7 +146,14 @@ def func(dataset, **kwargs_func): load.__doc__ += load_doc - def streamline(self, format: str = "sfaira", clean: bool = False): + def streamline( + self, + format: str = "sfaira", + allow_uns_sfaira: bool = False, + clean_obs: bool = True, + clean_var: bool = True, + clean_uns: bool = True + ): """ Streamline the adata instance in each data set to output format. @@ -156,42 +163,27 @@ def streamline(self, format: str = "sfaira", clean: bool = False): - "sfaira" - "cellxgene" - :param clean: Whether to delete non-streamlined fields. + :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. + :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. + :param clean_uns: Whether to delete non-streamlined fields in .uns. :return: """ for x in self.ids: - self.datasets[x].streamline(format=format, clean=clean) + self.datasets[x].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, + clean_var=clean_var, clean_uns=clean_uns) - def fragment(self) -> Dict[str, anndata.AnnData]: + def subset_genes(self, subset_type: Union[None, str, List[str]] = None): """ - Fragment data sets into largest consistent parititions based on meta data. + Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - ToDo return this as a DatasetGroup again. - the streamlined Datasets are similar to anndata instances here, worth considering whether to use anndata - instead because it can be indexed. + :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - :return: + - None: All genes in assembly. + - "protein_coding": All protein coding genes in assembly. """ - # TODO: assert that data is streamlined. - print("make sure data is streamlined") - datasets_new = {} - for k, v in self.datasets.items(): - # Define fragments and fragment names. - # Because the data is streamlined, fragments are partitions of the .obs space, excluding the cell-wise - # annotation columns: - # - cellontology_class - # - cellontology_id - # - cellontology_original - cols_exclude = ["cellontology_class", "cellontology_id", "cellontology_original"] - tab = v.adata.obs.loc[:, [x not in cols_exclude for x in v.adata.obs.columns]] - tab_unique = tab.drop_duplicates() - idx_sets = [ - np.where([np.all(tab_unique.iloc[i, :] == tab.iloc[j, :])[0] for j in range(tab.shape[0])]) - for i in range(tab_unique.shape[0]) - ] - for i, x in enumerate(idx_sets): - datasets_new[k + "_fragment" + str(i)] = v.adata[x, :] - return datasets_new + for x in self.ids: + self.datasets[x].subset_genes(subset_type=subset_type) def load_tobacked( self, @@ -288,10 +280,10 @@ def adata_ls(self): @property def adata(self): - if not self.adata_ls: - return None - self.streamline(format="sfaira", clean=True) adata_ls = self.adata_ls + if not adata_ls: + return None + self.streamline(format="sfaira", allow_uns_sfaira=False, clean_obs=True, clean_var=True, clean_uns=True) # .var entries are renamed and copied upon concatenation. # To preserve gene names in .var, the target gene names are copied into var_names and are then copied @@ -435,15 +427,13 @@ def subset_cells(self, key, values: Union[str, List[str]]): :param key: Property to subset by. Options: - - "age" points to self.age_obs_key - - "assay_sc" points to self.assay_sc_obs_key - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_sc" points to self.assay_sc_obs_key - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key - "cell_line" points to self.cell_line - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key - "sample_source" points to self.sample_source_obs_key @@ -610,7 +600,13 @@ def clean_ontology_class_map(self): list(self.datasets.values())[0]._value_protection( attr="celltypes", allowed=self.ontology_celltypes, - attempted=np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() + attempted=[ + x for x in np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() + if x not in [ + self._adata_ids_sfaira.unknown_celltype_identifier, + self._adata_ids_sfaira.not_a_cell_celltype_identifier + ] + ] ) # Adds a third column with the corresponding ontology IDs into the file. tab[self._adata_ids_sfaira.classmap_target_id_key] = [ @@ -682,14 +678,14 @@ def get_gc( genome: str = None ): if genome.lower().startswith("homo_sapiens"): - g = SuperGenomeContainer( + g = GenomeContainer( organism="human", - genome=genome + assembly=genome ) elif genome.lower().startswith("mus_musculus"): - g = SuperGenomeContainer( + g = GenomeContainer( organism="mouse", - genome=genome + assembly=genome ) else: raise ValueError(f"Genome {genome} not recognised. Needs to start with 'Mus_Musculus' or 'Homo_Sapiens'.") @@ -746,6 +742,7 @@ def load( :param match_to_reference: See .load(). :param remove_gene_version: See .load(). :param load_raw: See .load(). + :param set_metadata: See .load(). :param allow_caching: See .load(). :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. Note: parallelises loading of each dataset group, but not across groups. @@ -762,13 +759,25 @@ def load( processes=processes, ) + def subset_genes(self, subset_type: Union[None, str, List[str]] = None): + """ + Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. + + :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: + + - None: All genes in assembly. + - "protein_coding": All protein coding genes in assembly. + """ + for x in self.dataset_groups: + x.subset_genes(subset_type=subset_type) + @property def adata(self): if self._adata is None: # Make sure that concatenate is not used on a None adata object: - adatas = [x.adata for x in self.dataset_groups if x.adata is not None] + adatas = [x.adata for x in self.dataset_groups if x.adata_ls] if len(adatas) > 1: - self._adata = adatas[0].adata.concatenate( + self._adata = adatas[0].concatenate( *adatas[1:], join="outer", batch_key=self._adata_ids_sfaira.dataset_group @@ -900,7 +909,14 @@ def delete_backed(self): def load_cached_backed(self, fn: PathLike): self.adata = anndata.read(fn, backed='r') - def streamline(self, format: str = "sfaira", clean: bool = False): + def streamline( + self, + format: str = "sfaira", + allow_uns_sfaira: bool = False, + clean_obs: bool = True, + clean_var: bool = True, + clean_uns: bool = True + ): """ Streamline the adata instance in each group and each data set to output format. @@ -910,12 +926,15 @@ def streamline(self, format: str = "sfaira", clean: bool = False): - "sfaira" - "cellxgene" - :param clean: Whether to delete non-streamlined fields. + :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. + :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. + :param clean_uns: Whether to delete non-streamlined fields in .uns. :return: """ for x in self.dataset_groups: for xx in x.ids: - x.datasets[xx].streamline(format=format, clean=clean) + x.datasets[xx].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns) def subset(self, key, values): """ @@ -941,7 +960,6 @@ def subset_cells(self, key, values: Union[str, List[str]]): :param key: Property to subset by. Options: - - "age" points to self.age_obs_key - "assay_sc" points to self.assay_sc_obs_key - "assay_differentiation" points to self.assay_differentiation_obs_key - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key @@ -949,7 +967,6 @@ def subset_cells(self, key, values: Union[str, List[str]]): - "cellontology_class" points to self.cellontology_class_obs_key - "developmental_stage" points to self.developmental_stage_obs_key - "ethnicity" points to self.ethnicity_obs_key - - "healthy" points to self.healthy_obs_key - "organ" points to self.organ_obs_key - "organism" points to self.organism_obs_key - "sample_source" points to self.sample_source_obs_key diff --git a/sfaira/data/dataloaders/databases/cellxgene/__init__.py b/sfaira/data/dataloaders/databases/cellxgene/__init__.py index 2d6a4a900..472d880aa 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/__init__.py +++ b/sfaira/data/dataloaders/databases/cellxgene/__init__.py @@ -1,2 +1,2 @@ -from sfaira.data.dataloaders.databases.cellxgene.cellxgene_group import DatasetGroup +from sfaira.data.dataloaders.databases.cellxgene.cellxgene_group import DatasetGroupCellxgene from sfaira.data.dataloaders.databases.cellxgene.cellxgene_loader import Dataset diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py index 8224aaff1..4091e5765 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_group.py @@ -7,7 +7,7 @@ from .cellxgene_loader import Dataset -class DatasetGroup(DatasetGroup): +class DatasetGroupCellxgene(DatasetGroup): def __init__( self, diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index f4d358d29..5e8857f47 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -23,28 +23,22 @@ def __init__( **kwargs ): super().__init__(data_path=data_path, meta_path=meta_path, **kwargs) - self._self._adata_ids_cellxgene = AdataIdsCellxgene() + self._adata_ids_cellxgene = AdataIdsCellxgene() self.fn = fn - self.obs_key_cellontology_class = self._adata_ids_cellxgene.cell_ontology_class - self.obs_key_cellontology_id = self._adata_ids_cellxgene.cell_ontology_id - self.obs_key_cellontology_original = self._adata_ids_cellxgene.cell_types_original - self.obs_key_dev_stage = self._adata_ids_cellxgene.development_stage - self.obs_key_ethnicity = self._adata_ids_cellxgene.ethnicity - self.obs_key_healthy = self._adata_ids_cellxgene.healthy - self.obs_key_sex = self._adata_ids_cellxgene.sex - self.obs_key_organism = self._adata_ids_cellxgene.organism - self.obs_key_state_exact = self._adata_ids_cellxgene.state_exact - - self.healthy_state_healthy = self._adata_ids_cellxgene.disease_state_healthy + self.cellontology_class_obs_key = self._adata_ids_cellxgene.cell_ontology_class + self.cellontology_id_obs_key = self._adata_ids_cellxgene.cell_ontology_id + self.cellontology_original_obs_key = self._adata_ids_cellxgene.cell_types_original + self.development_stage_obs_key = self._adata_ids_cellxgene.development_stage + self.disease_obs_key = self._adata_ids_cellxgene.disease + self.ethnicity_obs_key = self._adata_ids_cellxgene.ethnicity + self.sex_obs_key = self._adata_ids_cellxgene.sex + self.organ_obs_key = self._adata_ids_cellxgene.organism + self.state_exact_obs_key = self._adata_ids_cellxgene.state_exact self.var_ensembl_col = self._adata_ids_cellxgene.gene_id_ensembl self.var_symbol_col = self._adata_ids_cellxgene.gene_id_names - self.class_maps = { - "0": {}, - } - def _load(self): """ Note that in contrast to data set specific data loaders, here, the core attributes are only identified from diff --git a/sfaira/data/dataloaders/databases/super_group.py b/sfaira/data/dataloaders/databases/super_group.py index cf748c851..96405ed62 100644 --- a/sfaira/data/dataloaders/databases/super_group.py +++ b/sfaira/data/dataloaders/databases/super_group.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetSuperGroup -from sfaira.data.dataloaders.databases.cellxgene import DatasetGroup as DatasetGroupCellxgene +from sfaira.data.dataloaders.databases.cellxgene import DatasetGroupCellxgene class DatasetSuperGroupDatabases(DatasetSuperGroup): diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index 4a6562bbe..e0c4f6e65 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -17,13 +17,12 @@ def __init__(self, **kwargs): self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE81nnn/GSE81547/matrix/GSE81547_series_matrix.txt.gz" self.author = "Enge" + self.disease = "healthy" self.doi = "10.1016/j.cell.2017.09.004" - self.healthy = True self.normalization = "raw" self.assay_sc = "Smart-seq2" self.organ = "islet of Langerhans" self.organism = "human" - self.state_exact = "healthy" self.year = 2017 self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index 3a6e604a3..c338040d9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -300,12 +300,11 @@ def __init__(self, **kwargs): self.author = "Han" self.dev_stage = sample_dev_stage_dict[self.sample_fn] + self.disease = "healthy" self.doi = "10.1016/j.cell.2018.02.001" self.normalization = "raw" - self.healthy = True self.organism = "mouse" self.assay_sc = "microwell-seq" - self.state_exact = "healthy" self.year = 2018 self.sample_source = "primary_tissue" @@ -339,9 +338,15 @@ def load(data_dir, sample_fn, **kwargs): adata = anndata.AnnData(data.T) annotated_cells = np.array([x in celltypes.index for x in adata.obs_names]) - # Subset to annotated cells if any are annotated: + # Add annotation if available for this data set: if np.sum(annotated_cells) > 0: + # Subset to annotated cells if any are annotated: adata = adata[annotated_cells].copy() + # Clean nans in data frame to avoid issues with cell type annotation: + celltypes["Annotation"] = [ + x if x not in [np.nan, "nan"] else "unknown" + for x in celltypes["Annotation"].values + ] adata.obs = celltypes.loc[adata.obs_names, :] return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv index f0b24e36a..46d9825e1 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.tsv @@ -101,7 +101,7 @@ Dividing cell(Neonatal-Skin) UNKNOWN UNKNOWN Dividing cell(Pancreas) UNKNOWN UNKNOWN Dividing cell(Stomach) UNKNOWN UNKNOWN Dividing cells(Lung) UNKNOWN UNKNOWN -Dividng cell(Neonatal-Calvaria) +Dividng cell(Neonatal-Calvaria) UNKNOWN UNKNOWN Dividing T cells(Lung) T cell CL:0000084 Dividing dendritic cells(Lung) dendritic cell CL:0000451 Ductal cell(Pancreas) pancreatic ductal cell CL:0002079 @@ -459,4 +459,5 @@ Ventricle cardiomyocyte_Kcnj8 high(Neonatal-Heart) ventricular cardiac muscle ce abT cell(Thymus) immature alpha-beta T cell CL:0000790 gdT cell (Thymus) gamma-delta thymocyte CL:0002405 luteal cells(Ovary) luteal cell CL:0000175 +unknown UNKNOWN UNKNOWN β-cell(Pancreas) type B pancreatic cell CL:0000169 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index 8c0865c7b..a3276d4c8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -6,6 +6,7 @@ dataset_structure: dataset_wise: author: - "Kinchen" + default_embedding: doi: - "10.1016/j.cell.2018.08.067" download_url_data: @@ -15,10 +16,9 @@ dataset_wise: HC: "private,hc_meta_data_stromal_with_donor.txt" UC: "private,uc_meta_data_stromal_with_donor.txt" normalization: "norm" + primary_data: year: 2019 dataset_or_observation_wise: - age: - age_obs_key: "Age" assay_sc: "10X sequencing" assay_sc_obs_key: assay_differentiation: @@ -30,11 +30,11 @@ dataset_or_observation_wise: cell_line: cell_line_obs_key: development_stage: - development_stage_obs_key: + development_stage_obs_key: "Age" + disease: + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: - healthy_obs_key: individual: individual_obs_key: organ: "lamina propria of mucosa of colon" @@ -56,7 +56,5 @@ observation_wise: feature_wise: var_ensembl_col: var_symbol_col: "index" -misc: - healthy_state_healthy: "healthy" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index 35a859794..d865d427f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -13,18 +13,19 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Smilie" + self.disease = "healthy" self.doi = "10.1016/j.cell.2019.06.029" - self.healthy = True self.normalization = "raw" self.organ = "colonic epithelium" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" + self.sample_source = "primary_tissue" self.year = 2019 + self.var_symbol_col = "index" + self.cellontology_original_obs_key = "CellType" - self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index 41efd5bc5..2e81a828d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -13,19 +13,20 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Martin" + self.disease = "healthy" self.doi = "10.1016/j.cell.2019.08.008" - self.healthy = True self.normalization = "raw" self.organ = "ileum" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" + self.sample_source = "primary_tissue" self.year = 2019 + self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" + self.cellontology_original_obs_key = "CellType" - self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index 85d7b9680..fbd5b242a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -18,18 +18,19 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Henry" + self.disease = "healthy" self.doi = "10.1016/j.celrep.2018.11.086" - self.healthy = True self.normalization = "raw" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.organ = "prostate gland" self.organism = "human" - self.assay_sc = "10X sequencing" self.year = 2018 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" + self.cellontology_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 9e4292cb6..5e4ee0daf 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -16,16 +16,16 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/baron16.processed.h5ad" self.download_url_meta = None + self.assay_sc = "inDrop" self.author = "Baron" + self.disease = "healthy" self.doi = "10.1016/j.cels.2016.08.011" - self.healthy = True self.normalization = "raw" self.organ = "pancreas" self.organism = "human" - self.assay_sc = "inDrop" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2016 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index c09b1c1bd..dcee6e1e3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -8,6 +8,7 @@ class Dataset(DatasetBase): """ ToDo: revisit gamma cell missing in CO + TODO: move state exact to diesase """ def __init__(self, **kwargs): @@ -28,9 +29,6 @@ def __init__(self, **kwargs): self.cellontology_original_obs_key = "Characteristics[cell type]" self.state_exact_obs_key = "Characteristics[disease]" - self.healthy_obs_key = "Characteristics[disease]" - - self.healthy_state_healthy = "normal" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 42db715c5..94b53b1f7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -19,6 +19,9 @@ class Dataset(DatasetBase): + """ + TODO add disease + """ def __init__(self, **kwargs): super().__init__(**kwargs) @@ -27,7 +30,6 @@ def __init__(self, **kwargs): self.author = "Thompson" self.doi = "10.1016/j.cmet.2019.01.021" - self.healthy = False self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index ad68f67fd..e3ff49bef 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -13,16 +13,16 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Miller" + self.disease = "healthy" self.doi = "10.1016/j.devcel.2020.01.033" - self.healthy = True self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2020 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "Cell_type" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index 85b03be11..ae6097663 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -13,16 +13,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad" self.download_url_meta = None + self.assay_sc = "DroNc-seq" self.author = "Habib" + self.disease = "healthy" self.doi = "10.1038/nmeth.4407" - self.healthy = True self.normalization = "raw" self.organ = "brain" self.organism = "human" - self.assay_sc = "DroNc-seq" - self.state_exact = "healthy" - self.year = 2017 self.sample_source = "primary_tissue" + self.year = 2017 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 5080b35b8..586c78391 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -13,16 +13,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Guo" + self.disease = "healthy" self.doi = "10.1038/s41422-018-0099-2" - self.healthy = True self.normalization = "raw" self.organ = "testis" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2018 self.sample_source = "primary_tissue" + self.year = 2018 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index 4b5c84fe6..6d394eef6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -12,16 +12,15 @@ def __init__(self, **kwargs): self.download_url_data = "private,GSE115469.csv.gz" self.download_url_meta = "private,GSE115469_labels.txt" + self.assay_sc = "10X sequencing" self.author = "MacParland" + self.disease = "healthy" self.doi = "10.1038/s41467-018-06318-7" - self.healthy = True self.normalization = "raw" self.organ = "caudate lobe of liver" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2018 self.sample_source = "primary_tissue" + self.year = 2018 self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 9e6fe338d..8fdb37558 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -14,16 +14,15 @@ def __init__(self, **kwargs): self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE121nnn/GSE121862/suppl/" \ "GSE121862%5FUCSD%2DWU%5FSingle%5FNuclei%5FCluster%5FAnnotations%2Ecsv%2Egz" + self.assay_sc = "DroNc-seq" self.author = "Lake" + self.disease = "healthy" self.doi = "10.1038/s41467-019-10861-2" - self.healthy = True self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.assay_sc = "DroNc-seq" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv index b58def11d..3fdf8c073 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.tsv @@ -25,5 +25,5 @@ Proximal Tubule Epithelial Cells - Stress/Inflam epithelial cell of proximal tub Thick Ascending Limb kidney loop of Henle thick ascending limb epithelial cell CL:1001106 Thin ascending limb kidney loop of Henle thin ascending limb epithelial cell CL:1001107 Vascular Smooth Muscle Cells and pericytes kidney pelvis smooth muscle cell CL:1000702 +Unknown - Novel PT CFH+ Subpopulation (S2) UNKNOWN UNKNOWN Unknown UNKNOWN UNKNOWN - diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index abb8bc000..7fced312b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -46,6 +46,9 @@ class Dataset(DatasetBase): + """ + TODO: move state exact to disease + """ def __init__(self, **kwargs): super().__init__(**kwargs) @@ -55,23 +58,21 @@ def __init__(self, **kwargs): "private,donor2.annotation.txt" ] + self.assay_sc = "10X sequencing" self.author = "Szabo" self.doi = "10.1038/s41467-019-12464-3" self.individual = SAMPLE_DICT[self.sample_fn][1] self.normalization = "raw" self.organ = SAMPLE_DICT[self.sample_fn][0] self.organism = "human" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = SAMPLE_DICT[self.sample_fn][2] - self.healthy = SAMPLE_DICT[self.sample_fn][2] == "healthy" self.year = 2019 - self.sample_source = "primary_tissue" self.var_symbol_col = "Gene" self.var_ensembl_col = "Accession" self.cellontology_original_obs_key = "cell_ontology_class" - self.organ_obs_key = "organ" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv index 908ddf806..632538543 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.tsv @@ -19,3 +19,4 @@ source target target_id 8.CD8EM/TRMrest effector memory CD8-positive, alpha-beta T cell CL:0000913 9.CD8TEMRArest effector memory CD8-positive, alpha-beta T cell CL:0000913 9.CD8TRMrest effector memory CD8-positive, alpha-beta T cell CL:0000913 +unknown UNKNOWN UNKNOWN diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 306b32d31..6d912ca2d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -11,16 +11,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Menon" + self.disease = "healthy" self.doi = "10.1038/s41467-019-12780-8" - self.healthy = True self.normalization = "raw" self.organ = "retina" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 66fe750f5..be7fb5d14 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -19,16 +19,15 @@ def __init__(self, **kwargs): self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.2.zip" + self.assay_sc = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" self.author = "Ventotormo" - self.healthy = True + self.disease = "healthy" + self.doi = "10.1038/s41586-018-0698-6" self.normalization = "raw" self.organ = "placenta" self.organism = "human" - self.doi = "10.1038/s41586-018-0698-6" - self.assay_sc = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" - self.state_exact = "healthy" - self.year = 2018 self.sample_source = "primary_tissue" + self.year = 2018 self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py index b0720dd5d..a9ed3eb01 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -12,16 +12,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5FNormalhumanlivercellatlasdata%2Etxt%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE124nnn/GSE124395/suppl/GSE124395%5Fclusterpartition%2Etxt%2Egz" + self.assay_sc = "CEL-seq2" self.author = "Aizarani" + self.disease = "healthy" self.doi = "10.1038/s41586-019-1373-2" - self.healthy = True self.normalization = "raw" + self.sample_source = "primary_tissue" self.organ = "liver" self.organism = "human" - self.assay_sc = "CEL-seq2" - self.state_exact = "healthy" self.year = 2019 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index cccb6c596..18561b807 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -1,30 +1,32 @@ import os +import numpy as np from sfaira.data import DatasetBase class Dataset(DatasetBase): + """ + TODO move state exact to disease, condition Uninjured is healthy + """ def __init__(self, **kwargs): super().__init__(**kwargs) self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Ramachandran" self.doi = "10.1038/s41586-019-1631-3" self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.assay_sc = "10X sequencing" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "annotation_lineage" self.state_exact_obs_key = "condition" - self.key_healthy_obs_key = "condition" - self.healthy_state_healthy = "Uninjured" self.set_dataset_id(idx=1) @@ -42,5 +44,6 @@ def load(data_dir, **kwargs): f"new_obj@meta.data = tissue@meta.data\n" f"as.SingleCellExperiment(new_obj)\n" ) + adata.obs["nGene"] = adata.obs["nGene"].astype(np.int32) return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv index b9bd8d703..a1bd07ccd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.tsv @@ -1,13 +1,13 @@ -source target -Bcells B cell -Cholangiocytes intrahepatic cholangiocyte -Endotheliaendothelial cell -Hepatocytes hepatocyte -ILCs innate lymphoid cell -MPs mononuclear phagocytes # ToDo this are subclustered in the manuscript, is this annotated in the object? -Mast cells mast cell -Mesenchyme mesenchymal cell -Mesothelia mesothelial cell -Plasma Bcells plasma cell -Tcells T cell -pDCs plasmacytoid dendritic cell +source target target_id +Bcells B cell CL:0000236 +Cholangiocytes intrahepatic cholangiocyte CL:0002538 +Endothelia endothelial cell CL:0000115 +Hepatocytes hepatocyte CL:0000182 +ILCs innate lymphoid cell CL:0001065 +MPs mononuclear phagocyte CL:0000113 +Mast cells mast cell CL:0000097 +Mesenchyme mesenchymal cell CL:0008019 +Mesothelia mesothelial cell CL:0000077 +Plasma Bcells plasma cell CL:0000786 +Tcells T cell CL:0000084 +pDCs plasmacytoid dendritic cell CL:0000784 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index eaeb10890..735b25a32 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -11,16 +11,16 @@ def __init__(self, **kwargs): self.download_url_data = "private,fetal_liver_alladata_.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Popescu" + self.disease = "healthy" self.doi = "10.1038/s41586-019-1652-y" - self.healthy = True self.normalization = "raw" self.organ = "liver" self.organism = "human" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2019 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "cell.labels" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index 0dbe7c026..06a779d14 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -14,8 +14,6 @@ dataset_wise: normalization: "raw" year: 2019 dataset_or_observation_wise: - age: - age_obs_key: assay_sc: "10X 3' v2 sequencing" assay_sc_obs_key: assay_differentiation: "Lancaster, 2014 (doi: 10.1038/nprot.2014.158)" @@ -28,10 +26,10 @@ dataset_or_observation_wise: cell_line_obs_key: "Line" development_stage: development_stage_obs_key: "Stage" + disease: "healthy" + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: True - healthy_obs_key: individual: individual_obs_key: organ: "brain" @@ -51,7 +49,5 @@ observation_wise: feature_wise: var_ensembl_col: "ensembl" var_symbol_col: "index" -misc: - healthy_state_healthy: meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 8a8725436..2aa629412 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -32,7 +32,7 @@ def __init__(self, **kwargs): self.cellontology_original_obs_key = "celltype_specific" self.development_stage_obs_key = "dev_stage" self.organ_obs_key = "organ" - self.sex_obs_key = "gender" + self.sex_obs_key = "sex" self.age_obs_key = "age" self.var_symbol_col = "index" @@ -151,8 +151,8 @@ def load(data_dir, **kwargs): sex_dict = { 'Male': "male", 'Female': "female", - 'nan': "nan", - 'FeM=male': "nan", + 'nan': "unknown", + 'FeM=male': "unknown", } adata = anndata.read(os.path.join(data_dir, "HCL_Fig1_adata.h5ad")) @@ -210,8 +210,13 @@ def load(data_dir, **kwargs): # tidy up the column names of the obs annotations adata.obs.columns = [ "sample", "sub_tissue", "n_genes", "n_counts", "cluster_global", "dev_stage", "donor", "celltype_global", - "age", "celltype_specific", "cluster_specific", "gender", "assay_sc", "source"] + "age", "celltype_specific", "cluster_specific", "sex", "assay_sc", "source"] + # Remove new line characters from cell type: + adata.obs["celltype_specific"] = [ + x.replace("\n", "").rstrip() + for x in adata.obs["celltype_specific"].values + ] adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] - adata.obs["gender"] = [sex_dict[x] for x in adata.obs["gender"].values] + adata.obs["sex"] = [sex_dict[str(x)] for x in adata.obs["sex"].values] return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv index 4d721cb61..6765cf7ee 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.tsv @@ -1,7 +1,6 @@ source target target_id 0 UNKNOWN UNKNOWN AT1 cell type I pneumocyte CL:0002062 -AT1 cell type I pneumocyte CL:0002062 AT2 cell type II pneumocyte CL:0002063 Absorptive cell absorptive cell CL:0000212 Acinar cell_CLPS high acinar cell CL:0000622 @@ -101,7 +100,6 @@ Cytotrophoblast_PAGE4 high mononuclear cytotrophoblast cell CL:0000523 Cytotrophoblast_PEG10 high mononuclear cytotrophoblast cell CL:0000523 D cell/ X/A cell type D cell of stomach CL:0002267 Dendritic cell dendritic cell CL:0000451 -Dendritic cell dendritic cell CL:0000451 Dendritic cell_CPVL high dendritic cell CL:0000451 Dendritic cell_FCER1A high dendritic cell CL:0000451 Dendritic cell_HLA-DRA high dendritic cell CL:0000451 @@ -158,17 +156,21 @@ Endothelial progenitor cell endothelial cell CL:0000115 Enteric glial cell glial cell CL:0000125 Enteric nerval cell neuron CL:0000540 Enterocyte enterocyte CL:0000584 -Enterocyte enterocyte CL:0000584 Enterocyte progenitor enterocyte CL:0000584 Enterocyte progenitor_APOA4 high enterocyte CL:0000584 +Enterocyte progenitor_APOC3 high enterocyte CL:0000584 +Enterocyte progenitor_GUCA2B high enterocyte CL:0000584 Enterocyte progenitor_OLFM4 high enterocyte CL:0000584 Enterocyte progenitor_REG1A high enterocyte CL:0000584 +Enterocyte progenitor_TSPAN8 high enterocyte CL:0000584 Enterocyte_AGR2 high enterocyte CL:0000584 Enterocyte_APOA1 high enterocyte CL:0000584 Enterocyte_APOA4 high enterocyte CL:0000584 Enterocyte_BEST4 high enterocyte CL:0000584 Enterocyte_CA1 high enterocyte CL:0000584 Enterocyte_CA7 high enterocyte CL:0000584 +Enterocyte_FABP1 low enterocyte CL:0000584 +Enterocyte_GUCA2A high enterocyte CL:0000584 Enterocyte_MT gene high enterocyte CL:0000584 Enterocyte_OTOP2 high enterocyte CL:0000584 Enterocyte_PHGR1 high enterocyte CL:0000584 @@ -245,7 +247,6 @@ Fibroblast_A2M high fibroblast CL:0000057 Fibroblast_APOD high fibroblast CL:0000057 Fibroblast_COL1A1 high fibroblast of choroid plexus CL:0002549 Fibroblast_COL3A1 high fibroblast CL:0000057 -Fibroblast_COL3A1 high fibroblast CL:0000057 Fibroblast_COL5A2 high fibroblast of dermis CL:0002551 Fibroblast_DCN high fibroblast CL:0000057 Fibroblast_EFEMP1 high fibroblast of dermis CL:0002551 @@ -313,6 +314,7 @@ Kupffer cell Kupffer cell CL:0000091 Kuppfer Cell Kupffer cell CL:0000091 Kuppfer cell Kupffer cell CL:0000091 Lens epithelial cell lens epithelial cell CL:0002224 +Loop of Henle(Thick ascending limb) kidney loop of Henle thick ascending limb epithelial cell CL:1001106 Loop of Henle (Thick ascending limb) kidney loop of Henle thick ascending limb epithelial cell CL:1001106 Loop of henle _ANXA1 high kidney loop of Henle epithelial cell CL:1000909 Loop of henle _KNG1 high kidney loop of Henle epithelial cell CL:1000909 @@ -323,7 +325,7 @@ Loop of henle_SLPI high kidney loop of Henle epithelial cell CL:1000909 Loop of henle_SOD3 high kidney loop of Henle epithelial cell CL:1000909 Loop of henle_SPP1 high kidney loop of Henle epithelial cell CL:1000909 Luminal cell luminal epithelial cell of prostatic duct CL:0002237 -Luminal epithelium placental epithelial cell CL:0002577 +Luminal epithelium placental epithelial cell CL:0002577 Lung mesenchyme cell (cardiopulmonary progenitor) mesenchymal cell CL:0008019 Lymphatic endothelial cell endothelial cell CL:0000115 Lymphatic endothelial cell_CCL21 high endothelial cell CL:0000115 @@ -334,6 +336,7 @@ M1 Macrophage inflammatory macrophage CL:0000863 M1 Macrophage_CCL2_high inflammatory macrophage CL:0000863 M1 Macrophage_CXCL8_high inflammatory macrophage CL:0000863 M2 Macrophage alternatively activated macrophage CL:0000890 +M2 macrophage alternatively activated macrophage CL:0000890 M2 macrophage_CXCL8 high alternatively activated macrophage CL:0000890 M2 macrophage_MALAT1 high alternatively activated macrophage CL:0000890 Macrophage macrophage CL:0000235 @@ -361,7 +364,6 @@ Mast mast cell CL:0000097 Mast cell mast cell CL:0000097 Mast progenitor cell mast cell CL:0000097 Megakaryocyte megakaryocyte CL:0000556 -Megakaryocyte megakaryocyte CL:0000556 Megakaryocyte/Erythroid Progenitor megakaryocyte-erythroid progenitor cell CL:0000050 Megakaryocyte/Erythtoid progenitor cell megakaryocyte-erythroid progenitor cell CL:0000050 Melanocyte melanocyte CL:0000148 @@ -429,9 +431,8 @@ Neuron_RELN high neuron CL:0000540 Neuron_S100B high neuron CL:0000540 Neuron_TMEM233 high neuron CL:0000540 Neuron_XPR1 high neuron CL:0000540 -Neutriophil neutrophil CL:0000775 +Neutriophil neutrophil CL:0000775 Neutrophil neutrophil CL:0000775 -Neutrophil neutrophil CL:0000775 Neutrophil _S100A8 high neutrophil CL:0000775 Neutrophil_CAMP high neutrophil CL:0000775 Neutrophil_CAMP_high neutrophil CL:0000775 @@ -484,7 +485,6 @@ Plasmacytoid dendritic cell plasmacytoid dendritic cell, human CL:0001058 Podocyte glomerular visceral epithelial cell CL:0000653 Pre B cell precursor B cell CL:0000817 Primordial germ cell primordial germ cell CL:0000670 -Primordial germ cell primordial germ cell CL:0000670 Primordial germ cell_GTSF1 high primordial germ cell CL:0000670 Primordial germ cell_TCL1A high primordial germ cell CL:0000670 Principle cell renal principal cell CL:0005009 @@ -549,6 +549,7 @@ Smooth muscle cell_CCL19 high smooth muscle cell CL:0000192 Smooth muscle cell_CCL4L2 high smooth muscle cell CL:0000192 Smooth muscle cell_CYCS high smooth muscle cell of the pulmonary artery CL:0002591 Smooth muscle cell_MYL9 high smooth muscle cell CL:0000192 +Smooth muscle cell_MYLK high smooth muscle cell CL:0000192 Smooth muscle cell_PDK4 high smooth muscle cell CL:0000192 Smooth muscle cell_TAGLN high smooth muscle cell CL:0000192 Soomth muscle cell smooth muscle cell CL:0000192 @@ -556,7 +557,6 @@ Spermatocyte spermatocyte CL:0000017 Stomach fundus cell mucous cell of stomach CL:0002180 Striated muscle cell striated muscle cell CL:0000737 Stromal cell stromal cell CL:0000499 -Stromal cell stromal cell CL:0000499 Stromal cell_APOD high stromal cell CL:0000499 Stromal cell_ASPN high stromal cell CL:0000499 Stromal cell_CLEC3B high stromal cell CL:0000499 @@ -583,7 +583,7 @@ Stromal cell_SULT1E1 high stromal cell CL:0000499 Stromal cell_TSLP high stromal cell CL:0000499 Superficial cell ureteral cell CL:1000601 Sympathetic neuron sympathetic neuron CL:0011103 -Syncytiotrophoblast syncytiotrophoblast cell CL:0000525 +Syncytiotrophoblast syncytiotrophoblast cell CL:0000525 Syncytiotrophoblast cell syncytiotrophoblast cell CL:0000525 T cell T cell CL:0000084 T cell_CCL4 high T cell CL:0000084 @@ -598,6 +598,7 @@ Tendon cell_GCG high tendon cell CL:0000388 Theca cell theca cell CL:0000503 Thyroid follicular cell thyroid follicular cell CL:0002258 Treg cell regulatory T cell CL:0000815 +Unknown UNKNOWN UNKNOWN Unknown Epithelial cell_EFNA1 high epithelial cell CL:0000066 Unknown Epithelial cell_FOS high epithelial cell CL:0000066 Unknown1 UNKNOWN UNKNOWN @@ -607,7 +608,6 @@ Ureteric bud cell epithelial cell CL:0000066 Ureteric epithelial cell epithelial cell CL:0000066 Ureteric smooth muscle cell ureter smooth muscle cell CL:1000979 Urothelial cell bladder urothelial cell CL:1001428 -Urothelial cell bladder urothelial cell CL:1001428 Vascular endothelial cell endothelial cell of vascular tree CL:0002139 Vascular endothelial cell_A2M high endothelial cell of vascular tree CL:0002139 Vascular endothelial cell_AQP1 high endothelial cell of vascular tree CL:0002139 diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index c2f7e9626..c34567f29 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -6,6 +6,7 @@ dataset_structure: dataset_wise: author: - "Travaglini" + default_embedding: "X_tSNE" doi: - "10.1038/s41586-020-2922-4" download_url_data: @@ -13,10 +14,9 @@ dataset_wise: facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "syn21625142,facs_normal_lung_blood_scanpy.20200205.RC4.h5ad" download_url_meta: normalization: "raw" + primary_data: year: 2020 dataset_or_observation_wise: - age: - age_obs_key: assay_sc: droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10X sequencing" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "Smart-seq2" @@ -33,10 +33,10 @@ dataset_or_observation_wise: cell_line_obs_key: development_stage: development_stage_obs_key: + disease: "healthy" + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: True - healthy_obs_key: individual: individual_obs_key: "patient" organ: "lung" @@ -47,8 +47,8 @@ dataset_or_observation_wise: sample_source_obs_key: sex: sex_obs_key: - state_exact: "healthy" - state_exact_obs_key: "state_exact" + state_exact: + state_exact_obs_key: tech_sample: tech_sample_obs_key: droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "channel" @@ -58,7 +58,5 @@ observation_wise: feature_wise: var_ensembl_col: var_symbol_col: "index" -misc: - healthy_state_healthy: meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 82cfefc99..8b1828f6a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -13,16 +13,16 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "James" + self.disease = "healthy" self.doi = "10.1038/s41590-020-0602-z" - self.healthy = True self.normalization = "raw" self.organ = "colon" self.organism = "human" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2020 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index ba992d30e..aacfdd85b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -12,16 +12,16 @@ def __init__(self, **kwargs): self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2Ecsv%2Egz" self.download_url_meta = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" + self.assay_sc = "Drop-seq" self.author = "Braga" + self.disease = "healthy" self.doi = "10.1038/s41591-019-0468-5" - self.healthy = True self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.assay_sc = "Drop-seq" + self.sample_source = "primary_tissue" self.state_exact = "uninvolved areas of tumour resection material" self.year = 2019 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.cellontology_original_obs_key = "celltype" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index 178073dad..d3fd7db16 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -17,16 +17,15 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Braga" + self.disease = "healthy" self.doi = "10.1038/s41591-019-0468-5" - self.healthy = True + self.normalization = "scaled" self.organ = "bronchus" if self.sample_fn == "vieira19_Bronchi_anonymised.processed.h5ad" else "lung parenchyma" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 - self.normalization = "norm" self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index 6707eab05..485a13c88 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -16,15 +16,15 @@ def __init__(self, **kwargs): self.download_url_meta = \ "https://www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" + self.assay_sc = "10X sequencing" self.author = "Hove" + self.disease = "healthy" self.doi = "10.1038/s41593-019-0393-4" - self.healthy = True self.normalization = "raw" self.organism = "mouse" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2019 - self.sample_source = "primary_tissue" self.bio_sample_obs_key = "sample" self.cellontology_original_obs_key = "cluster" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index 38268662e..372e0e2b7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -15,16 +15,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Liao" - self.healthy = True + self.disease = "healthy" self.normalization = "raw" self.organ = "kidney" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" + self.sample_source = "primary_tissue" self.year = 2020 self.doi = "10.1038/s41597-019-0351-8" - self.sample_source = "primary_tissue" self.var_symbol_col = "names" self.var_ensembl_col = "ensembl" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index e7217c49d..e6dce3ace 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -12,16 +12,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Voigt" + self.disease = "healthy" self.doi = "10.1073/pnas.1914143116" - self.healthy = True self.normalization = "norm" self.organ = "retina" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 1a71d7dbd..7adab9a37 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -21,16 +21,15 @@ def __init__(self, **kwargs): organ = self.sample_fn.split("_")[1].split(".")[0] + self.assay_sc = "10X sequencing" self.author = "Wang" + self.disease = "healthy" self.doi = "10.1084/jem.20191130" - self.healthy = True self.normalization = "raw" self.organ = organ self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 08087c757..2fd7f759e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -18,16 +18,15 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Lukassen" + self.disease = "healthy" self.doi = "10.1101/2020.03.13.991455" - self.healthy = True self.normalization = "raw" self.organ = "lung" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2020 self.sample_source = "primary_tissue" + self.year = 2020 self.var_symbol_col = "index" self.cellontology_original_obs_key = "CellType" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 7f926341f..9d33ec9a7 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -77,19 +77,17 @@ def __init__(self, **kwargs): self.download_url_meta = None self.cellontology_original_obs_key = "cell_ontology_class" - self.age_obs_key = "age" - self.development_stage_obs_key = "development_stage" # not given in all data sets + self.development_stage_obs_key = "development_stage" # not given in all data sets, TODO maybe infer as age? self.sex_obs_key = "sex" # ToDo: further anatomical information for subtissue in "subtissue"? self.author = "Pisco" + self.disease = "healthy" self.doi = "10.1101/661728" - self.healthy = True self.normalization = "norm" self.organism = "mouse" self.organ = organ self.assay_sc = "10X sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" - self.state_exact = "healthy" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 76c3916f9..ed4ed0511 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -6,6 +6,9 @@ class Dataset(DatasetBase): + """ + TODO: add disease from status and diagnosis fields, healthy is "control" + """ def __init__(self, **kwargs): super().__init__(**kwargs) @@ -29,8 +32,6 @@ def __init__(self, **kwargs): self.cellontology_original_obs_key = "celltype" self.state_exact_obs_key = "Diagnosis" - self.healthy_obs_key = "Status" - self.healthy_state_healthy = "Control" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index 4d43a8cf7..97b9527f0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -15,16 +15,16 @@ def __init__(self, **kwargs): ] self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Stewart" + self.disease = "healthy" self.doi = "10.1126/science.aat5031" - self.healthy = True self.normalization = "norm" self.organ = "kidney" self.organism = "human" - self.assay_sc = "10X sequencing" + self.sample_source = "primary_tissue" self.state_exact = "healthy" self.year = 2019 - self.sample_source = "primary_tissue" self.var_symbol_col = "index" self.var_ensembl_col = "ID" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index 68830d195..d1a625acd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -12,16 +12,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Park" + self.disease = "healthy" self.doi = "10.1126/science.aay3224" - self.healthy = True self.normalization = "norm" self.organ = "thymus" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2020 self.sample_source = "primary_tissue" + self.year = 2020 self.var_symbol_col = "index" self.cellontology_original_obs_key = "Anno_level_fig1" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py index 69999c1a6..a8ee1d628 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py @@ -5,12 +5,61 @@ def load(data_dir, **kwargs): + + sex_dict = { + "F": "female", + "M": "male" + } + + dev_stage_dict = { + 72: "11th week post-fertilization human stage", + 74: "11th week post-fertilization human stage", + 85: "13th week post-fertilization human stage", + 89: "13th week post-fertilization human stage", + 90: "13th week post-fertilization human stage", + 94: "14th week post-fertilization human stage", + 96: "14th week post-fertilization human stage", + 100: "15th week post-fertilization human stage", + 110: "16th week post-fertilization human stage", + 112: "17th week post-fertilization human stage", + 113: "17th week post-fertilization human stage", + 115: "17th week post-fertilization human stage", + 117: "17th week post-fertilization human stage", + 119: "18th week post-fertilization human stage", + 120: "18th week post-fertilization human stage", + 122: "18th week post-fertilization human stage", + 125: "18th week post-fertilization human stage", + 129: "19th week post-fertilization human stage", + } + + organ_dict = { + "Adrenal": "adrenal gland", + "Cerebellum": "cerebellum", + "Cerebrum": "telencephalon", + "Eye": "eye", + "Heart": "heart", + "Intestine": "intestine", + "Kidney": "kidney", + "Liver": "liver", + "Lung": "lung", + "Muscle": "muscle organ", + "Pancreas": "pancreas", + "Placenta": "placenta", + "Spleen": "spleen", + "Stomach": "stomach", + "Thymus": "thymus", + } + fn = os.path.join(data_dir, "GSE156793_S3_gene_count.loom.gz") - fn_tmp = os.path.join(data_dir, "tmp.loom") + fn_tmp = os.path.join(os.path.expanduser("~"), "tmp.loom") with gzip.open(fn, 'rb') as f_in: with open(fn_tmp, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) adata = anndata.read_loom(fn_tmp) os.remove(fn_tmp) + adata.obs["Sex"] = [sex_dict[x] for x in adata.obs["Sex"]] + adata.obs["Organ"] = [organ_dict[x] for x in adata.obs["Organ"]] + adata.obs["Developmental_stage"] = [dev_stage_dict[x] for x in adata.obs["Development_day"]] + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv new file mode 100644 index 000000000..37e9e316e --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv @@ -0,0 +1,78 @@ +source target target_id +AFP_ALB positive cells UNKNOWN UNKNOWN +Acinar cells acinar cell CL:0000622 +Adrenocortical cells cortical cell of adrenal gland CL:0002097 +Amacrine cells amacrine cell CL:0000561 +Antigen presenting cells professional antigen presenting cell CL:0000145 +Astrocytes astrocyte CL:0000127 +Bipolar cells bipolar neuron CL:0000103 +Bronchiolar and alveolar epithelial cells epithelial cell CL:0000066 +CCL19_CCL21 positive cells UNKNOWN UNKNOWN +CLC_IL5RA positive cells UNKNOWN UNKNOWN +CSH1_CSH2 positive cells UNKNOWN UNKNOWN +Cardiomyocytes cardiac muscle cell CL:0000746 +Chromaffin cells chromaffin cell CL:0000166 +Ciliated epithelial cells ciliated epithelial cell CL:0000067 +Corneal and conjunctival epithelial cells epithelial cell CL:0000066 +Ductal cells pancreatic ductal cell CL:0002079 +ELF3_AGBL2 positive cells UNKNOWN UNKNOWN +ENS glia glial cell CL:0000125 +ENS neurons enteric neuron CL:0007011 +Endocardial cells endocardial cell CL:0002350 +Epicardial fat cells epicardial adipocyte CL:1000309 +Erythroblasts erythroblast CL:0000765 +Excitatory neurons excitatory neuron CL:0008030 +Extravillous trophoblasts extravillous trophoblast CL:0008036 +Ganglion cells CNS neuron (sensu Vertebrata) CL:0000117 +Goblet cells goblet cell CL:0000160 +Granule neurons granule cell CL:0000120 +Hematopoietic stem cells hematopoietic stem cell CL:0000037 +Hepatoblasts hepatoblast CL:0005026 +Horizontal cells retina horizontal cell CL:0000745 +IGFBP1_DKK1 positive cells UNKNOWN UNKNOWN +Inhibitory interneurons inhibitory interneuron CL:0000498 +Inhibitory neurons inhibitory neuron CL:0008029 +Intestinal epithelial cells intestinal epithelial cell CL:0002563 +Islet endocrine cells endocrine cell CL:0000163 +Lens fibre cells lens fiber cell CL:0011004 +Limbic system neurons neuron of cerebral cortex CL:0002609 +Lymphatic endothelial cells endothelial cell of lymphatic vessel CL:0002138 +Lymphoid cells innate lymphoid cell CL:0001065 +MUC13_DMBT1 positive cells UNKNOWN UNKNOWN +Megakaryocytes megakaryocyte CL:0000556 +Mesangial cells mesangial cell CL:0000650 +Mesothelial cells mesothelial cell CL:0000077 +Metanephric cells kidney cell CL:1000497 +Microglia microglial cell CL:0000129 +Myeloid cells myeloid cell CL:0000763 +Neuroendocrine cells neuroendocrine cell CL:0000165 +Oligodendrocytes oligodendrocyte CL:0000128 +PAEP_MECOM positive cells UNKNOWN UNKNOWN +PDE11A_FAM19A2 positive cells UNKNOWN UNKNOWN +PDE1C_ACSM3 positive cells UNKNOWN UNKNOWN +Parietal and chief cells parietal cell CL:0000162 +Photoreceptor cells photoreceptor cell CL:0000210 +Purkinje neurons Purkinje cell CL:0000121 +Retinal pigment cells visual pigment cell CL:0000149 +Retinal progenitors and Muller glia retinal progenitor cell CL:0002672 +SATB2_LRRC7 positive cells UNKNOWN UNKNOWN +SKOR2_NPSR1 positive cells UNKNOWN UNKNOWN +SLC24A4_PEX5L positive cells UNKNOWN UNKNOWN +SLC26A4_PAEP positive cells UNKNOWN UNKNOWN +STC2_TLX1 positive cells UNKNOWN UNKNOWN +Satellite cells skeletal muscle satellite cell CL:0000594 +Schwann cells Schwann cell CL:0002573 +Skeletal muscle cells cell of skeletal muscle CL:0000188 +Smooth muscle cells smooth muscle cell CL:0000192 +Squamous epithelial cells squamous epithelial cell CL:0000076 +Stellate cells hepatic stellate cell CL:0000632 +Stromal cells stromal cell CL:0000499 +Sympathoblasts neural progenitor cell CL:0011020 +Syncytiotrophoblasts and villous cytotrophoblasts trophoblast cell CL:0000351 +Thymic epithelial cells epithelial cell of thymus CL:0002293 +Thymocytes thymocyte CL:0000893 +Trophoblast giant cells trophoblast giant cell CL:0002488 +Unipolar brush cells brush cell CL:0002204 +Ureteric bud cells ureteral cell CL:1000601 +Vascular endothelial cells endothelial cell of vascular tree CL:0002139 +Visceral neurons visceromotor neuron CL:0005025 diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml index fa2c1945f..3db231a7f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml @@ -4,15 +4,15 @@ dataset_structure: dataset_wise: author: - "Cao" + default_embedding: doi: - "10.1126/science.aba7721" download_url_data: "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE156793&format=file&file=GSE156793%5FS3%5Fgene%5Fcount%2Eloom%2Egz" download_url_meta: normalization: "raw" + primary_data: year: 2020 dataset_or_observation_wise: - age: - age_obs_key: "Age" assay_sc: "sci-RNA-seq" assay_sc_obs_key: assay_differentiation: @@ -24,11 +24,11 @@ dataset_or_observation_wise: cell_line: cell_line_obs_key: development_stage: - development_stage_obs_key: "Development_day" + development_stage_obs_key: "Developmental_stage" + disease: "healthy" + disease_obs_key: ethnicity: ethnicity_obs_key: - healthy: True - healthy_obs_key: individual: individual_obs_key: "Fetus_id" organ: @@ -48,7 +48,5 @@ observation_wise: feature_wise: var_ensembl_col: "gene_id" var_symbol_col: "gene_short_name" -misc: - healthy_state_healthy: "healthy" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index 18e848698..e911a971b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -32,15 +32,15 @@ def __init__(self, **kwargs): self.var_ensembl_col = "gene_ids-HCATisStab7463846" self.download_url_meta = None + + self.assay_sc = "10X sequencing" self.author = "Madissoon" + self.disease = "healthy" self.doi = "10.1186/s13059-019-1906-x" - self.healthy = True self.normalization = "raw" # ToDo "madissoon19_lung.processed.h5ad" is close to integer but not quire (~1e-4) self.organ = "lung parenchyma" if self.sample_fn == "madissoon19_lung.processed.h5ad" else \ "esophagus" if self.sample_fn == "oesophagus.cellxgene.h5ad" else "spleen" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv index 9544b4f32..d6c319350 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv @@ -62,3 +62,4 @@ T_CD8_gd CD8-alpha alpha positive, gamma-delta intraepithelial T cell CL:0000802 T_cell_dividing T cell CL:0000084 T_cells_Dividing T cell CL:0000084 T_regulatory regulatory T cell CL:0000815 +Unknown UNKNOWN UNKNOWN diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index 74dab7473..b905467fe 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -14,16 +14,15 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Lukowski" + self.disease = "healthy" self.doi = "10.15252/embj.2018100811" - self.healthy = True self.normalization = "raw" self.organ = "retina" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index 46040dfdf..79765a904 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -15,16 +15,15 @@ def __init__(self, **kwargs): "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "10x Genomics" + self.disease = "healthy" self.doi = "no_doi_10x_genomics" - self.healthy = True self.normalization = "raw" self.organ = "blood" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2019 self.sample_source = "primary_tissue" + self.year = 2019 self.var_symbol_col = "index" self.var_ensembl_col = "gene_ids" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 7d68ca517..b2be24f84 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -12,16 +12,15 @@ def __init__(self, **kwargs): "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" self.download_url_meta = None + self.assay_sc = "10X sequencing" self.author = "Regev" + self.disease = "healthy" self.doi = "no_doi_regev" - self.healthy = True self.normalization = "raw" self.organ_obs_key = "derived_organ_parts_label" self.organism = "human" - self.assay_sc = "10X sequencing" - self.state_exact = "healthy" - self.year = 2018 self.sample_source = "primary_tissue" + self.year = 2018 self.var_symbol_col = "index" self.var_ensembl_col = "Accession" diff --git a/sfaira/data/dataloaders/loaders/super_group.py b/sfaira/data/dataloaders/loaders/super_group.py index 6a10241ba..456787694 100644 --- a/sfaira/data/dataloaders/loaders/super_group.py +++ b/sfaira/data/dataloaders/loaders/super_group.py @@ -26,13 +26,13 @@ def __init__( """ # Directory choice hyperparamters: dir_prefix = "d" - dir_exlcude = [] + dir_exclude = [] # Collect all data loaders from files in directory: dataset_groups = [] cwd = os.path.dirname(__file__) for f in os.listdir(cwd): if os.path.isdir(os.path.join(cwd, f)): # only directories - if f[:len(dir_prefix)] == dir_prefix and f not in dir_exlcude: # Narrow down to data set directories + if f[:len(dir_prefix)] == dir_prefix and f not in dir_exclude: # Narrow down to data set directories path_dsg = pydoc.locate(f"sfaira.data.dataloaders.loaders.{f}.FILE_PATH") if path_dsg is not None: dataset_groups.append(DatasetGroupDirectoryOriented( diff --git a/sfaira/data/utils_scripts/clean_celltype_maps_global.py b/sfaira/data/utils_scripts/clean_celltype_maps_global.py deleted file mode 100644 index cdcdf9caf..000000000 --- a/sfaira/data/utils_scripts/clean_celltype_maps_global.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys -import tensorflow as tf - -# Any data loader here to extract path: -from sfaira.data.dataloaders.loaders import DatasetSuperGroupLoaders - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -data_path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) -path_cache = str(sys.argv[3]) -processes = int(str(sys.argv[4])) - -dsgl = DatasetSuperGroupLoaders( - data_path=data_path, - meta_path=path_meta, - cache_path=path_cache -) - -for x in dsgl.dataset_groups: - print(x.ids) - x.clean_ontology_class_map() diff --git a/sfaira/data/utils_scripts/create_anatomical_configs.py b/sfaira/data/utils_scripts/create_anatomical_configs.py index 7ccb94900..4663f8ab6 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs.py @@ -43,7 +43,7 @@ def clean(s): "placenta", "pleura", "prostate gland", - "rib" + "rib", "skeleton", "skin of body", "spinal cord", @@ -91,6 +91,7 @@ def clean(s): for organism, organs in configs_to_write.items(): for organ in organs: + print(f"Writing {organism} {organ}") dsgs = DatasetSuperGroupSfaira( data_path=".", meta_path=".", diff --git a/sfaira/data/utils_scripts/create_celltype_maps_global.py b/sfaira/data/utils_scripts/create_celltype_maps_global.py deleted file mode 100644 index fc77f0c9f..000000000 --- a/sfaira/data/utils_scripts/create_celltype_maps_global.py +++ /dev/null @@ -1,100 +0,0 @@ -import os -import pydoc -import sfaira -import sys -import tensorflow as tf - -# Any data loader here to extract path: -from sfaira.data.dataloaders.loaders.d10_1016_j_cmet_2019_01_021 import FILE_PATH - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -data_path = str(sys.argv[1]) -path_meta = str(sys.argv[2]) -path_cache = str(sys.argv[3]) -processes = int(str(sys.argv[4])) - -# Data loaders with one file per data set rather than one summary file: -# ToDo: not supported yet. -# TMS d10_1101_661728 -# MCA d10_1016_j_cell_2018_02_001 -# HCL d10_1038_s41586_020_2157_4 -studys_separate_csvs = [] -# "d10_1101_661728", -# "d10_1016_j_cell_2018_02_001", -# "d10_1038_s41586_020_2157_4" -# ] - -dir_sfaira_dataloaders = "/" + str(os.path.join(*str(os.path.dirname(FILE_PATH)).split("/")[:-1])) - -dir_prefix = "d" -dir_exlcude = [] -for dir_study in os.listdir(dir_sfaira_dataloaders): - print(dir_study) - if os.path.isdir(os.path.join(dir_sfaira_dataloaders, dir_study)): # only directories - # Narrow down to data set directories: - if dir_study[:len(dir_prefix)] == dir_prefix and dir_study not in dir_exlcude: - for f_dataset in os.listdir(os.path.join(dir_sfaira_dataloaders, dir_study)): - if os.path.isfile(os.path.join(dir_sfaira_dataloaders, dir_study, f_dataset)): # only files - print(f_dataset) - # Narrow down to data set files: - if f_dataset.split(".")[-1] == "py" and \ - f_dataset.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(str(f_dataset).split(".")[:-1]) - DatasetFound = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".Dataset") - # Check if global objects are available: - # - SAMPLE_FNS: for DatasetBaseGroupLoadingManyFiles - # - SAMPLE_IDS: for DatasetBaseGroupLoadingOneFile - sample_fns = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_FNS") - sample_ids = pydoc.locate( - "sfaira.data.dataloaders.loaders." + dir_study + "." + file_module + ".SAMPLE_IDS") - if sample_fns is not None and sample_ids is None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_fn=x, - data_path=data_path, - meta_path=path_meta, - cache_path=path_cache - ) - for x in sample_fns - ] - elif sample_fns is None and sample_ids is not None: - # DatasetBaseGroupLoadingManyFiles: - datasets_f = [ - DatasetFound( - sample_id=x, - data_path=data_path, - meta_path=path_meta, - cache_path=path_cache - ) - for x in sample_ids - ] - elif sample_fns is not None and sample_ids is not None: - raise ValueError(f"sample_fns and sample_ids both found for {f_dataset}") - else: - datasets_f = [DatasetFound( - data_path=data_path, - meta_path=path_meta, - cache_path=path_cache - )] - dsg_f = sfaira.data.DatasetGroup(datasets=dict([(x.id, x) for x in datasets_f])) - dsg_f.load( - load_raw=False, - allow_caching=True, - match_to_reference=False, - remove_gene_version=False, - ) - if str(dir_study) in studys_separate_csvs: - pass - else: - dsg_f.write_ontology_class_map( - fn=os.path.join(dir_sfaira_dataloaders, dir_study, file_module + ".tsv"), - protected_writing=True, - n_suggest=4, - ) diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 41c81575d..4ad33c888 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -1,21 +1,6 @@ +import os import sfaira import sys -import tensorflow as tf - -print(tf.__version__) - - -def write_meta(args0, args1): - # Write meta data, cache. - args0.write_meta(fn_meta=None, dir_out=args1) - # Test load from cache. - args0.load( - remove_gene_version=True, - load_raw=False, - allow_caching=False, - ) - return None - # Set global variables. print("sys.argv", sys.argv) @@ -28,15 +13,28 @@ def write_meta(args0, args1): ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( data_path=data_path, meta_path=path_meta, cache_path=path_cache ) -ds = ds.flatten() # Write meta data, cache and test load from cache: -ds.load( - annotated_only=False, - match_to_reference=None, - remove_gene_version=True, - load_raw=False, - allow_caching=True, - processes=processes, - func=write_meta, - kwargs_func={"args1": path_meta}, -) +for x in ds.dataset_groups: + for k, v in x.datasets.items(): + try: + # Initial load and cache writing: + # Only run this if data set was not already cached to speed up resumed jobs. + if not os.path.exists(v.cache_fn): + v.load( + match_to_reference=None, + remove_gene_version=True, + load_raw=False, + allow_caching=True, + ) + # Write meta data, cache. + v.write_meta(fn_meta=None, dir_out=path_meta) + # Test load from cache. + v.load( + remove_gene_version=False, # speed this up + load_raw=False, + allow_caching=False, + ) + v.clear() + except ValueError as e: + # Do not abort upon ValueErrors, such as from cell type map bugs. + print(f"WARNING: TO-FIX: ValueError in {k}: {e}") diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py new file mode 100644 index 000000000..fdbed37d9 --- /dev/null +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -0,0 +1,36 @@ +import os +import sfaira +import sys + +# Set global variables. +print("sys.argv", sys.argv) + +data_path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +path_out = str(sys.argv[4]) +schema = str(sys.argv[5]) +dois = str(sys.argv[6]) + +path_cache = path_cache if path_cache != "None" else None + +for x in dois.split(","): + ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + data_path=data_path, meta_path=path_meta, cache_path=path_cache + ) + ds.subset(key="doi", values=[x]) + ds.load( + match_to_reference=None, + remove_gene_version=True, + load_raw=False, + allow_caching=True, + set_metadata=False, + ) + if schema == "cellxgene": + ds.subset_genes(subset_type=None) + ds.streamline(format=schema.lower(), allow_uns_sfaira=True, clean_obs=False, clean_var=True, clean_uns=False) + assert len(ds.dataset_groups) == 1, len(ds.dataset_groups) + dsg = ds.dataset_groups[0] + for k, v in dsg.datasets.items(): + fn = v.doi_cleaned_id + ".h5ad" + v.adata.write_h5ad(os.path.join(path_out, fn)) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 59615c2db..1b81eebc7 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -13,9 +13,10 @@ import warnings from tqdm import tqdm +from sfaira.consts import AdataIdsSfaira from sfaira.models import BasicModel from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ CustomAccAgg, CustomF1Classwise, CustomFprClasswise, CustomTprClasswise, custom_cce_agg @@ -79,6 +80,7 @@ def __init__( self.idx_test = None self.md5 = weights_md5 self.cache_path = cache_path + self._adata_ids_sfaira = AdataIdsSfaira() def load_pretrained_weights(self): """ @@ -180,7 +182,7 @@ def _get_dataset( def _get_class_dict( self, - obs_key: str = 'cell_ontology_class' + obs_key: str ): y = self.data.obs[obs_key] for i, val in enumerate(y): @@ -215,12 +217,12 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): # If the feature space is already mapped to the right reference, return the data matrix immediately if 'mapped_features' in self.data.uns_keys(): - if self.data.uns['mapped_features'] == self.topology_container.genome_container.genome: + if self.data.uns[self._adata_ids_sfaira.mapped_features] == self.topology_container.genome_container.assembly: print(f"found {x.shape[0]} observations") return x # Compute indices of genes to keep - data_ids = self.data.var["ensembl"].values + data_ids = self.data.var[self._adata_ids_sfaira.gene_id_ensembl].values idx_feature_kept = np.where([x in self.topology_container.genome_container.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.topology_container.genome_container.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -607,7 +609,7 @@ def generator(): # Prepare data reading according to whether anndata is backed or not: if self.data.isbacked: n_features = self.data.X.shape[1] - cell_to_class = self._get_class_dict() + cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) output_types, output_shapes = self._get_output_dim(n_features, 'vae') def generator(): @@ -615,13 +617,13 @@ def generator(): for i in idx: x = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() sf = self._prepare_sf(x=x)[0] - y = self.data.obs['cell_ontology_class'][i] + y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][i] yield (x, sf), (x, cell_to_class[y]) else: x = self._prepare_data_matrix(idx=idx) sf = self._prepare_sf(x=x) - cell_to_class = self._get_class_dict() - y = self.data.obs['cell_ontology_class'][idx] # for gradients per celltype in compute_gradients_input() + cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) + y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][idx] # for gradients per celltype in compute_gradients_input() n_features = x.shape[1] output_types, output_shapes = self._get_output_dim(n_features, 'vae') @@ -813,7 +815,7 @@ def compute_gradients_input( ) if per_celltype: - cell_to_id = self._get_class_dict(obs_key="cell_ontology_class") + cell_to_id = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) cell_names = cell_to_id.keys() cell_id = cell_to_id.values() id_to_cell = dict([(key, value) for (key, value) in zip(cell_id, cell_names)]) @@ -963,7 +965,7 @@ def _get_celltype_out( type_classes = self.ntypes + 1 y = np.zeros((len(idx), type_classes), dtype="float32") celltype_idx = self.model.celltypes_version.map_to_target_leaves( - nodes=self.data.obs["cell_ontology_class"].values[idx].tolist(), + nodes=self.data.obs[self._adata_ids_sfaira.cell_ontology_class].values[idx].tolist(), ontology="custom", ontology_id=lookup_ontology, return_type="idx" diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index e47314d41..3f047416a 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -9,7 +9,7 @@ from sfaira.versions.metadata import CelltypeUniverse from sfaira.consts import OntologyContainerSfaira -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies class ModelZoo(abc.ABC): diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index 2b67191e8..523f37c3c 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -6,7 +6,7 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index 59d998052..f8a38f8e1 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -6,7 +6,7 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index b0ac37e08..099a385ec 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 8a8d91790..423cf915b 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index 21008ecc1..1c36084ec 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index ece67115b..55530e132 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput from sfaira.models.made import MaskingDense diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index b92369c45..fbd0fc579 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput diff --git a/sfaira/train/__init__.py b/sfaira/train/__init__.py index f1c9b5b6f..f0a188c17 100644 --- a/sfaira/train/__init__.py +++ b/sfaira/train/__init__.py @@ -1,2 +1,2 @@ from sfaira.train.summaries import GridsearchContainer, SummarizeGridsearchEmbedding, SummarizeGridsearchCelltype -from sfaira.train.train_model import TrainModelEmbedding, TrainModelCelltype, TargetZoos +from sfaira.train.train_model import TrainModelEmbedding, TrainModelCelltype diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 742be3464..ac9e10f0e 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -8,8 +8,8 @@ import os from sfaira.versions.metadata import CelltypeUniverse -from sfaira.train.train_model import TargetZoos from sfaira.estimators import EstimatorKerasEmbedding +from sfaira.data import DatasetSuperGroupSfaira def _tp(yhat, ytrue): @@ -875,14 +875,13 @@ def plot_best_classwise_heatmap( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - tz = TargetZoos(data_path=datapath) - if organism == "human": - dataset = tz.data_human[organ] - elif organism == "mouse": - dataset = tz.data_mouse[organ] - else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset.subset(key="organism", values=[organism]) + dataset.subset(key="organ", values=[organ]) + if not dataset.flatten().datasets: + raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") dataset.load() + dataset = dataset.flatten() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() celltypelist = list(cell_counts.keys()).copy() @@ -1036,14 +1035,13 @@ def plot_best_classwise_scatter( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - tz = TargetZoos(data_path=datapath) - if organism == "human": - dataset = tz.data_human[organ] - elif organism == "mouse": - dataset = tz.data_mouse[organ] - else: - raise(ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) + dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset.subset(key="organism", values=[organism]) + dataset.subset(key="organ", values=[organ]) + if not dataset.flatten().datasets: + raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") dataset.load() + dataset = dataset.flatten() cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() celltypelist = list(cell_counts.keys()).copy() @@ -1374,14 +1372,14 @@ def get_gradients_by_celltype( else: print('Compute gradients (1/3): load data') # load data - tz = TargetZoos(data_path=datapath) - if organism == "human": - dataset = tz.data_human[organ] - elif organism == "mouse": - dataset = tz.data_mouse[organ] - else: - raise (ValueError(f"Supplied organism {organism} not recognised. Should be one of ('mouse', 'loaders')")) - dataset.load(annotated_only=True) + dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset.subset(key="organism", values=[organism]) + dataset.subset(key="organ", values=[organ]) + dataset.subset(key="annotated", values=[True]) + if not dataset.flatten().datasets: + raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") + dataset.load() + dataset = dataset.flatten() print('Compute gradients (2/3): load embedding') # load embedding diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index cdfe6dd09..97020d5d6 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,161 +5,20 @@ import pickle from typing import Union -from sfaira.data import DatasetGroup, DatasetSuperGroup -from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.data import DatasetSuperGroupSfaira +from sfaira.estimators import EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.interface import ModelZooEmbedding, ModelZooCelltype -class TargetZoos: - """ - Class that provides access to all available dataset human in sfaira. +class TrainModel: - Parameters - ---------- - data_path : str - Path to the files for this dataset on disk - meta_path : str - Path to the meta files for this dataset on disk - """ - - def __init__(self, data_path: Union[str, None], meta_path: Union[str, None] = None, cache_path: Union[str, None] = None): - if data_path is not None: - from sfaira.data.dataloaders.anatomical_groups import mouse, human - self.data_mouse = { - "bladder": mouse.DatasetGroupBladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "brain": mouse.DatasetGroupBrain(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "diaphragm": mouse.DatasetGroupDiaphragm(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "adipose": mouse.DatasetGroupAdipose(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "heart": mouse.DatasetGroupHeart(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "kidney": mouse.DatasetGroupKidney(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "colon": mouse.DatasetGroupColon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "muscle": mouse.DatasetGroupMuscle(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "liver": mouse.DatasetGroupLiver(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "lung": mouse.DatasetGroupLung(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "mammarygland": mouse.DatasetGroupMammaryGland(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "bone": mouse.DatasetGroupBone(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "femalegonad": mouse.DatasetGroupFemalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "pancreas": mouse.DatasetGroupPancreas(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "blood": mouse.DatasetGroupBlood(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "placenta": mouse.DatasetGroupPlacenta(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "prostate": mouse.DatasetGroupProstate(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "rib": mouse.DatasetGroupRib(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "skin": mouse.DatasetGroupSkin(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "ileum": mouse.DatasetGroupIleum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "spleen": mouse.DatasetGroupSpleen(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "stomach": mouse.DatasetGroupStomach(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "malegonad": mouse.DatasetGroupMalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "thymus": mouse.DatasetGroupThymus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "tongue": mouse.DatasetGroupTongue(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "trachea": mouse.DatasetGroupTrachea(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - "uterus": mouse.DatasetGroupUterus(data_path=data_path, cache_path=cache_path), - } - self.data_human = { - 'adipose': human.DatasetGroupAdipose(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'adrenalgland': human.DatasetGroupAdrenalgland(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'artery': human.DatasetGroupArtery(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'bladder': human.DatasetGroupBladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'blood': human.DatasetGroupBlood(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'bone': human.DatasetGroupBone(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'brain': human.DatasetGroupBrain(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'calvaria': human.DatasetGroupCalvaria(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'cervix': human.DatasetGroupCervix(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'chorionicvillus': human.DatasetGroupChorionicvillus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'colon': human.DatasetGroupColon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'duodenum': human.DatasetGroupDuodenum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'epityphlon': human.DatasetGroupEpityphlon(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'esophagus': human.DatasetGroupEsophagus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'eye': human.DatasetGroupEye(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'fallopiantube': human.DatasetGroupFallopiantube(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'femalegonad': human.DatasetGroupFemalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'gallbladder': human.DatasetGroupGallbladder(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'heart': human.DatasetGroupHeart(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'hesc': human.DatasetGroupHesc(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'ileum': human.DatasetGroupIleum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'jejunum': human.DatasetGroupJejunum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'kidney': human.DatasetGroupKidney(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'liver': human.DatasetGroupLiver(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'lung': human.DatasetGroupLung(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'malegonad': human.DatasetGroupMalegonad(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'muscle': human.DatasetGroupMuscle(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'omentum': human.DatasetGroupOmentum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'pancreas': human.DatasetGroupPancreas(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'placenta': human.DatasetGroupPlacenta(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'pleura': human.DatasetGroupPleura(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'prostate': human.DatasetGroupProstate(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'rectum': human.DatasetGroupRectum(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'rib': human.DatasetGroupRib(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'skin': human.DatasetGroupSkin(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'spinalcord': human.DatasetGroupSpinalcord(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'spleen': human.DatasetGroupSpleen(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'stomach': human.DatasetGroupStomach(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'thymus': human.DatasetGroupThymus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'thyroid': human.DatasetGroupThyroid(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'trachea': human.DatasetGroupTrachea(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'ureter': human.DatasetGroupUreter(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - 'uterus': human.DatasetGroupUterus(data_path=data_path, meta_path=meta_path, cache_path=cache_path), - } - - else: - self.data_human = None - self.data_mouse = None - - def write_celltypes_tocsv_mouse(self, fn: str): - for x in self.data_mouse.keys(): - ds = self.data_mouse[x] - self._write_celltypes_tocsv(fn, x, ds) - - def write_celltypes_tocsv_human(self, fn: str): - for x in self.data_human.keys(): - ds = self.data_human[x] - self._write_celltypes_tocsv(fn, x, ds) - - def _write_celltypes_tocsv(self, fn: str, x: str, ds: DatasetGroup): - ds.load(annotated_only=True, remove_gene_version=False, match_to_reference=None) - if len(ds.adata_ls) > 0: - obs = ds.obs_concat(keys=["cell_ontology_class", "cell_ontology_id"]) - obs.index = range(0, obs.shape[0]) - strids = [] - listids = [] - for i in obs.index: - if type(obs.loc[i]['cell_ontology_class']) != list: - strids.append(i) - else: - listids.append(i) - remaining = [] - for _, l in obs.iloc[listids].iterrows(): - if type(l['cell_ontology_id']) == list: - if not len(l['cell_ontology_class']) == len(l['cell_ontology_id']): - raise ValueError( - "Number of cell type labels and cell type ontologies for this cell do not match") - for i in range(len(l['cell_ontology_class'])): - remaining.append({ - "cell_ontology_class": l['cell_ontology_class'][i], - "cell_ontology_id": l['cell_ontology_id'][i] - }) - else: - for i in range(len(l['cell_ontology_class'])): - remaining.append({ - "cell_ontology_class": l['cell_ontology_class'][i], - "cell_ontology_id": None - }) - obs = obs.loc[strids] - for i in remaining: - obs = obs.append(i, ignore_index=True) - obs = obs.drop_duplicates() - obs = obs.sort_values(by="cell_ontology_class") - obs.index = range(0, obs.shape[0]) - obs.to_csv(fn + x + ".csv") - - -class TrainModel(TargetZoos): - - estimator: Union[None, EstimatorKeras] - zoo: Union[None, ModelZoo] - model_dir: str - data: Union[DatasetGroup, DatasetSuperGroup, anndata.AnnData, str, None] - - def __init__(self, data_path: str, meta_path: str): + def __init__( + self, + config_path: str, + data_path: str, + meta_path: str, + cache_path: str, + ): # Check if handling backed anndata or base path to directory of raw files: if data_path.split(".")[-1] == "h5ad": self.data = anndata.read(data_path, backed='r') @@ -167,8 +26,13 @@ def __init__(self, data_path: str, meta_path: str): fn_backed_obs = ".".join(data_path.split(".")[:-1]) + "_obs.csv" self.data.obs = pd.read_csv(fn_backed_obs) else: - super(TrainModel, self).__init__(data_path=data_path, meta_path=meta_path) - self.data = None + dataset = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dataset.load_config(config_path) + self.set_data(dataset) + + @abc.abstractmethod + def set_data(self, dataset): + pass @abc.abstractmethod def init_estim(self): @@ -185,26 +49,8 @@ def adata(self): raise ValueError("self.data not set yet") elif isinstance(self.data, anndata.AnnData): return self.data - elif isinstance(self.data, DatasetGroup) or isinstance(self.data, DatasetSuperGroup): - return self.data.adata else: - raise ValueError("self.data type not recognized: %s " % type(self.data)) - - def mouse_target(self, organ: str): - self.set_data(data_group=self.data_mouse[organ]) - - def human_target(self, organ: str): - self.set_data(data_group=self.data_human[organ]) - - def set_data( - self, - data_group: Union[DatasetGroup, DatasetSuperGroup] - ): - """ - Set input data group. - :return: - """ - self.data = data_group + raise ValueError(f"self.data type not recognized: {type(self.data)}") @abc.abstractmethod def _save_specific( @@ -240,15 +86,21 @@ class TrainModelEmbedding(TrainModel): def __init__( self, + config_path: str, data_path: str, meta_path: str, - model_path: str + cache_path: str, + model_path: str, ): - super(TrainModelEmbedding, self).__init__(data_path=data_path, meta_path=meta_path) + super(TrainModelEmbedding, self).__init__(config_path=config_path, data_path=data_path, meta_path=meta_path, cache_path=cache_path) self.zoo = ModelZooEmbedding(model_lookuptable=None) self.estimator = None self.model_dir = model_path + def set_data(self, dataset): + dataset.load(match_to_reference=True) + self.data = dataset.adata + def init_estim( self, override_hyperpar: Union[dict, None] = None @@ -294,7 +146,7 @@ def _save_specific( """ embedding = self.estimator.predict_embedding() df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "assay_sc", + ["dataset", "cell_ontology_class", "state_exact", "author", "year", "assay_sc", "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] ] df_summary["ncounts"] = np.asarray( @@ -308,15 +160,22 @@ class TrainModelCelltype(TrainModel): def __init__( self, + config_path: str, data_path: str, meta_path: str, - model_path: str + cache_path: str, + model_path: str, ): - super(TrainModelCelltype, self).__init__(data_path=data_path, meta_path=meta_path) + super(TrainModelCelltype, self).__init__(config_path=config_path, data_path=data_path, meta_path=meta_path, cache_path=cache_path) self.zoo = ModelZooCelltype(model_lookuptable=None) self.estimator = None self.model_dir = model_path + def set_data(self, dataset): + dataset.subset("annotated", True) + dataset.load(match_to_reference=True) + self.data = dataset.adata + def init_estim( self, override_hyperpar: Union[dict, None] = None @@ -367,7 +226,7 @@ def _save_specific( ytrue = self.estimator.ytrue() yhat = self.estimator.predict() df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "lab", "year", "subtissue", "assay_sc", + ["dataset", "cell_ontology_class", "state_exact", "author", "year", "assay_sc", "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] ] df_summary["ncounts"] = np.asarray(self.estimator.data.X[self.estimator.idx_test, :].sum(axis=1)).flatten() diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index ad1974ab0..e8e1b0b3d 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -79,13 +79,15 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): @pytest.mark.parametrize("out_format", ["sfaira", "cellxgene"]) -@pytest.mark.parametrize("clean_objects", [True, False]) -def test_dsgs_streamline(out_format: str, clean_objects: bool): +@pytest.mark.parametrize("clean_obs", [True, False]) +@pytest.mark.parametrize("clean_var", [True, False]) +@pytest.mark.parametrize("clean_uns", [True, False]) +def test_dsgs_streamline(out_format: str, clean_obs: bool, clean_var: bool, clean_uns: bool): ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load(remove_gene_version=True) - ds.streamline(format=out_format, clean=clean_objects) + ds.streamline(format=out_format, allow_uns_sfaira=False, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns) def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index 75525e6c9..ad87bc788 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -1,5 +1,6 @@ import os import pydoc +import shutil from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup, DatasetBase from sfaira.data.utils import read_yaml @@ -54,12 +55,15 @@ def test_load(doi_sfaira_repr: str, test_data: str): else: raise ValueError("data loader not found in sfaira and also not in sfaira_extension") file_path = pydoc.locate(dir_loader + ".FILE_PATH") + cache_path = os.path.join(test_data, "cache") + # Clear dataset cache + shutil.rmtree(cache_path, ignore_errors=True) ds = DatasetGroupDirectoryOriented( file_base=file_path, data_path=test_data, meta_path=test_data, - cache_path=test_data + cache_path=cache_path ) # Test raw loading and caching: # You can set load_raw to True while debugging when caching works already to speed the test up, @@ -70,6 +74,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): load_raw=True, # tests raw loading allow_caching=True, # tests caching ) + assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {test_data}" # Create cell type conversion table: cwd = os.path.dirname(file_path) @@ -92,7 +97,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): load_func = pydoc.locate(dir_loader + "." + file_module + ".load") load_func_annotation = pydoc.locate(dir_loader + "." + file_module + ".LOAD_ANNOTATION") # Also check sfaira_extension for additional load_func_annotation: - if package_source != "sfairae": + if package_source != "sfairae" and sfairae is not None: load_func_annotation_sfairae = pydoc.locate(dir_loader_sfairae + "." + dataset_module + "." + file_module + ".LOAD_ANNOTATION") # LOAD_ANNOTATION is a dictionary so we can use update to extend it. @@ -118,7 +123,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): DatasetBase( data_path=test_data, meta_path=test_data, - cache_path=test_data, + cache_path=cache_path, load_func=load_func, dict_load_func_annotation=load_func_annotation, sample_fn=x, @@ -131,7 +136,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): DatasetFound( data_path=test_data, meta_path=test_data, - cache_path=test_data, + cache_path=cache_path, load_func=load_func, load_func_annotation=load_func_annotation, sample_fn=x, @@ -154,7 +159,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): file_base=file_path, data_path=test_data, meta_path=test_data, - cache_path=test_data + cache_path=cache_path ) ds.load( remove_gene_version=remove_gene_version, @@ -165,3 +170,5 @@ def test_load(doi_sfaira_repr: str, test_data: str): ds.clean_ontology_class_map() # Test concatenation: _ = ds.adata + # Clear dataset cache + shutil.rmtree(cache_path, ignore_errors=True) diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index 35f42a704..d3e59d778 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -6,7 +6,7 @@ import unittest from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.versions.topology_versions import Topologies +from sfaira.versions.topologies import Topologies class _TestEstimator: diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index 6c13dfa8b..02f3d7dbd 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -1,5 +1,4 @@ -from sfaira.versions.metadata import OntologyUberon, OntologyCelltypes, OntologyMmusdv, OntologyHsapdv, \ - OntologyHancestro +from sfaira.versions.metadata import OntologyUberon, OntologyCelltypes, OntologyMondo, OntologyMmusdv, OntologyHsapdv """ CL @@ -17,6 +16,40 @@ def test_cl_subsetting(): assert not oc.is_a(query="lymphocyte", reference="T cell") +""" +Hancestro +""" + +# def test_hancestro_loading(): +# _ = OntologyHancestro() + +""" +Hsapdv +""" + + +def test_hsapdv_loading(): + _ = OntologyHsapdv() + + +""" +MONDO +""" + + +def test_mondo_loading(): + _ = OntologyMondo() + + +""" +Mmusdv +""" + + +def test_mmusdv_loading(): + _ = OntologyMmusdv() + + """ UBERON """ diff --git a/sfaira/versions/__init__.py b/sfaira/versions/__init__.py index e46a788f9..df806c4cd 100644 --- a/sfaira/versions/__init__.py +++ b/sfaira/versions/__init__.py @@ -1,3 +1,3 @@ from . import metadata -from . import genome_versions -from . import topology_versions +from . import genomes +from . import topologies diff --git a/sfaira/versions/genome_versions/__init__.py b/sfaira/versions/genome_versions/__init__.py deleted file mode 100644 index 22dd2d697..000000000 --- a/sfaira/versions/genome_versions/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import human -from . import mouse -from .class_interface import SuperGenomeContainer diff --git a/sfaira/versions/genome_versions/class_interface.py b/sfaira/versions/genome_versions/class_interface.py deleted file mode 100644 index ffc44de77..000000000 --- a/sfaira/versions/genome_versions/class_interface.py +++ /dev/null @@ -1,82 +0,0 @@ -import abc -import pandas - - -class SuperGenomeContainer: - _cache_tab: pandas.DataFrame - genome: str - organism: str - - def __init__( - self, - organism: str, - genome: str - ): - self.organism = organism - if self.organism == "human": - try: - from sfaira_extension.versions.genome_versions.human import GenomeContainer - if genome not in GenomeContainer.available_genomes: - from .human import GenomeContainer - if genome not in GenomeContainer.available_genomes: - raise ValueError(f"Genome {genome} not recognised.") - except ImportError: - from .human import GenomeContainer - if genome not in GenomeContainer.available_genomes: - raise ValueError(f"Genome {genome} not recognised.") - elif self.organism == "mouse": - try: - from sfaira_extension.versions.genome_versions.mouse import GenomeContainer - if genome not in GenomeContainer.available_genomes: - from .mouse import GenomeContainer - if genome not in GenomeContainer.available_genomes: - raise ValueError(f"Genome {genome} not recognised.") - except ImportError: - from .mouse import GenomeContainer - if genome not in GenomeContainer.available_genomes: - raise ValueError(f"Genome {genome} not recognised.") - else: - raise ValueError(f"Organism {organism} not recognised.") - - self.gc = GenomeContainer() - self.set_genome(genome=genome) - - @property - def cache_tab(self): - return self._cache_tab - - def set_genome(self, genome): - self.genome = genome - self._cache_tab = self.gc.read_local_csv(genome=genome) - assert self.gc.genome_sizes[self.genome][0] == self.cache_tab.shape[0] - - def show_genomes(self): - return list(self.gc.genomes.keys()) - - @property - def names(self): - return self.cache_tab["name"].values.tolist() - - @property - def ensembl(self): - return self.cache_tab["ensg"].values.tolist() - - @property - def type(self): - return self.cache_tab["type"].values.tolist() - - @property - def ngenes(self): - return self.gc.genome_sizes[self.genome][0] - - @property - def names_to_id_dict(self): - return dict(zip(self.cache_tab["name"].values.tolist(), self.cache_tab["ensg"].values.tolist())) - - @property - def id_to_names_dict(self): - return dict(zip(self.cache_tab["ensg"].values.tolist(), self.cache_tab["name"].values.tolist())) - - @property - def strippednames_to_id_dict(self): - return dict(zip([i.split(".")[0] for i in self.cache_tab["name"]], self.cache_tab["ensg"].values.tolist())) diff --git a/sfaira/versions/genome_versions/human/Homo_sapiens_GRCh38_97.csv b/sfaira/versions/genome_versions/human/Homo_sapiens_GRCh38_97.csv deleted file mode 100644 index 09502f313..000000000 --- a/sfaira/versions/genome_versions/human/Homo_sapiens_GRCh38_97.csv +++ /dev/null @@ -1,19987 +0,0 @@ -,name,ensg,type -25937,TSPAN6,ENSG00000000003,protein_coding -25936,TNMD,ENSG00000000005,protein_coding -54434,DPM1,ENSG00000000419,protein_coding -3879,SCYL3,ENSG00000000457,protein_coding -3874,C1orf112,ENSG00000000460,protein_coding -920,FGR,ENSG00000000938,protein_coding -4285,CFH,ENSG00000000971,protein_coding -21077,FUCA2,ENSG00000001036,protein_coding -19946,GCLC,ENSG00000001084,protein_coding -19663,NFYA,ENSG00000001167,protein_coding -790,STPG1,ENSG00000001460,protein_coding -791,NIPAL3,ENSG00000001461,protein_coding -25516,LAS1L,ENSG00000001497,protein_coding -19819,ENPP4,ENSG00000001561,protein_coding -10539,SEMA3F,ENSG00000001617,protein_coding -23690,CFTR,ENSG00000001626,protein_coding -23174,ANKIB1,ENSG00000001629,protein_coding -23165,CYP51A1,ENSG00000001630,protein_coding -23170,KRIT1,ENSG00000001631,protein_coding -37508,RAD52,ENSG00000002016,protein_coding -33460,BAD,ENSG00000002330,protein_coding -13237,LAP3,ENSG00000002549,protein_coding -24592,CD99,ENSG00000002586,protein_coding -13162,HS3ST1,ENSG00000002587,protein_coding -24404,AOC1,ENSG00000002726,protein_coding -23719,WNT16,ENSG00000002745,protein_coding -22290,HECW1,ENSG00000002746,protein_coding -21603,MAD1L1,ENSG00000002822,protein_coding -50453,LASP1,ENSG00000002834,protein_coding -50957,SNX11,ENSG00000002919,protein_coding -24403,TMEM176A,ENSG00000002933,protein_coding -37800,M6PR,ENSG00000003056,protein_coding -26243,KLHL13,ENSG00000003096,protein_coding -6676,CYP26B1,ENSG00000003137,protein_coding -21744,ICA1,ENSG00000003147,protein_coding -48980,DBNDD1,ENSG00000003249,protein_coding -8846,ALS2,ENSG00000003393,protein_coding -8823,CASP10,ENSG00000003400,protein_coding -8817,CFLAR,ENSG00000003402,protein_coding -8631,TFPI,ENSG00000003436,protein_coding -6083,NDUFAF7,ENSG00000003509,protein_coding -10536,RBM5,ENSG00000003756,protein_coding -27389,MTMR7,ENSG00000003987,protein_coding -27391,SLC7A2,ENSG00000003989,protein_coding -23797,ARF5,ENSG00000004059,protein_coding -49990,SARM1,ENSG00000004139,protein_coding -49983,POLDIP2,ENSG00000004142,protein_coding -11655,PLXND1,ENSG00000004399,protein_coding -1100,AK2,ENSG00000004455,protein_coding -13212,CD38,ENSG00000004468,protein_coding -37548,FKBP4,ENSG00000004478,protein_coding -740,KDM1A,ENSG00000004487,protein_coding -10535,RBM6,ENSG00000004534,protein_coding -49153,CAMKK1,ENSG00000004660,protein_coding -38086,RECQL,ENSG00000004700,protein_coding -23193,VPS50,ENSG00000004766,protein_coding -56293,HSPB6,ENSG00000004776,protein_coding -56296,ARHGAP33,ENSG00000004777,protein_coding -47281,NDUFAB1,ENSG00000004779,protein_coding -23239,PDK4,ENSG00000004799,protein_coding -20597,SLC22A16,ENSG00000004809,protein_coding -10560,ZMYND10,ENSG00000004838,protein_coding -21873,ABCB5,ENSG00000004846,protein_coding -24868,ARX,ENSG00000004848,protein_coding -23242,SLC25A13,ENSG00000004864,protein_coding -23666,ST7,ENSG00000004866,protein_coding -50912,CDC27,ENSG00000004897,protein_coding -50790,SLC4A1,ENSG00000004939,protein_coding -23194,CALCR,ENSG00000004948,protein_coding -24678,HCCS,ENSG00000004961,protein_coding -49298,DVL2,ENSG00000004975,protein_coding -46702,PRSS22,ENSG00000005001,protein_coding -55799,UPF1,ENSG00000005007,protein_coding -21975,SKAP2,ENSG00000005020,protein_coding -26277,SLC25A5,ENSG00000005022,protein_coding -14420,MCUB,ENSG00000005059,protein_coding -22003,HOXA11,ENSG00000005073,protein_coding -23467,POLR2J,ENSG00000005075,protein_coding -49235,DHX33,ENSG00000005100,protein_coding -50755,MEOX1,ENSG00000005102,protein_coding -21774,THSD7A,ENSG00000005108,protein_coding -50274,LIG3,ENSG00000005156,protein_coding -38478,RPAP3,ENSG00000005175,protein_coding -47188,ACSM3,ENSG00000005187,protein_coding -47198,REXO5,ENSG00000005189,protein_coding -48137,CIAPIN1,ENSG00000005194,protein_coding -54910,SPPL2B,ENSG00000005206,protein_coding -29995,FAM214B,ENSG00000005238,protein_coding -50949,COPZ2,ENSG00000005243,protein_coding -23552,PRKAR2B,ENSG00000005249,protein_coding -24688,MSL3,ENSG00000005302,protein_coding -46774,CREBBP,ENSG00000005339,protein_coding -51222,TSPOAP1,ENSG00000005379,protein_coding -51221,MPO,ENSG00000005381,protein_coding -23229,PON1,ENSG00000005421,protein_coding -6785,GCFC2,ENSG00000005436,protein_coding -6740,WDR54,ENSG00000005448,protein_coding -23112,CROT,ENSG00000005469,protein_coding -23113,ABCB4,ENSG00000005471,protein_coding -23519,KMT2E,ENSG00000005483,protein_coding -22970,RHBDD2,ENSG00000005486,protein_coding -46523,SOX8,ENSG00000005513,protein_coding -20246,IBTK,ENSG00000005700,protein_coding -31974,ZNF195,ENSG00000005801,protein_coding -41458,MYCBP2,ENSG00000005810,protein_coding -41455,FBXL3,ENSG00000005812,protein_coding -47525,ITGAL,ENSG00000005844,protein_coding -51058,PDK2,ENSG00000005882,protein_coding -51056,ITGA3,ENSG00000005884,protein_coding -24855,ZFX,ENSG00000005889,protein_coding -26308,LAMP2,ENSG00000005893,protein_coding -50802,ITGA2B,ENSG00000005961,protein_coding -23236,ASB4,ENSG00000005981,protein_coding -47162,GDE1,ENSG00000006007,protein_coding -55793,REX1BD,ENSG00000006015,protein_coding -55792,CRLF1,ENSG00000006016,protein_coding -50932,AC003665.1,ENSG00000006025,protein_coding -50232,TMEM98,ENSG00000006042,protein_coding -49310,YBX2,ENSG00000006047,protein_coding -50599,KRT33A,ENSG00000006059,protein_coding -50849,MAP3K14,ENSG00000006062,protein_coding -32409,ABCC8,ENSG00000006071,protein_coding -47299,CACNG3,ENSG00000006116,protein_coding -33279,TMEM132A,ENSG00000006118,protein_coding -50321,AP2B1,ENSG00000006125,protein_coding -23263,TAC1,ENSG00000006128,protein_coding -46743,ZNF263,ENSG00000006194,protein_coding -48135,CX3CL1,ENSG00000006210,protein_coding -51090,SPATA20,ENSG00000006282,protein_coding -51092,CACNA1G,ENSG00000006283,protein_coding -46718,TNFRSF12A,ENSG00000006327,protein_coding -23255,DLX6,ENSG00000006377,protein_coding -43181,MAP3K9,ENSG00000006432,protein_coding -22252,RALA,ENSG00000006451,protein_coding -23285,BAIAP2L1,ENSG00000006453,protein_coding -24061,KDM7A,ENSG00000006459,protein_coding -21796,ETV1,ENSG00000006468,protein_coding -24095,AGK,ENSG00000006530,protein_coding -33719,ALDH3B1,ENSG00000006534,protein_coding -1738,TTC22,ENSG00000006555,protein_coding -23032,PHTF2,ENSG00000006576,protein_coding -22967,CCL26,ENSG00000006606,protein_coding -9631,FARP2,ENSG00000006607,protein_coding -32412,USH1C,ENSG00000006611,protein_coding -22082,GGCT,ENSG00000006625,protein_coding -23118,DBF4,ENSG00000006634,protein_coding -54974,TBXA2R,ENSG00000006638,protein_coding -23612,IFRD1,ENSG00000006652,protein_coding -56499,LGALS14,ENSG00000006659,protein_coding -49516,COX10,ENSG00000006695,protein_coding -22923,GTF2IRD1,ENSG00000006704,protein_coding -56471,PAF1,ENSG00000006712,protein_coding -22241,VPS41,ENSG00000006715,protein_coding -49501,ARHGAP44,ENSG00000006740,protein_coding -49506,ELAC2,ENSG00000006744,protein_coding -21785,SCIN,ENSG00000006747,protein_coding -24596,ARSD,ENSG00000006756,protein_coding -24635,PNPLA4,ENSG00000006757,protein_coding -49453,MYH13,ENSG00000006788,protein_coding -37518,ADIPOR2,ENSG00000006831,protein_coding -17442,CDKL3,ENSG00000006837,protein_coding -8195,UPP2,ENSG00000007001,protein_coding -46698,PRSS21,ENSG00000007038,protein_coding -56779,MARK4,ENSG00000007047,protein_coding -13216,PROM1,ENSG00000007062,protein_coding -55749,CCDC124,ENSG00000007080,protein_coding -56599,CEACAM21,ENSG00000007129,protein_coding -49097,PAFAH1B1,ENSG00000007168,protein_coding -49956,NOS2,ENSG00000007171,protein_coding -49485,DNAH9,ENSG00000007174,protein_coding -50011,KIAA0100,ENSG00000007202,protein_coding -50000,SLC13A2,ENSG00000007216,protein_coding -49450,AC005747.1,ENSG00000007237,protein_coding -56784,TRAPPC6A,ENSG00000007255,protein_coding -54986,MATK,ENSG00000007264,protein_coding -56607,CEACAM7,ENSG00000007306,protein_coding -51421,CD79B,ENSG00000007312,protein_coding -51422,SCN4A,ENSG00000007314,protein_coding -2657,ST7L,ENSG00000007341,protein_coding -26896,TKTL1,ENSG00000007350,protein_coding -32608,PAX6,ENSG00000007372,protein_coding -46512,RPUSD1,ENSG00000007376,protein_coding -46444,RHBDF1,ENSG00000007384,protein_coding -46459,LUC7L,ENSG00000007392,protein_coding -10565,CACNA2D2,ENSG00000007402,protein_coding -46551,BAIAP3,ENSG00000007516,protein_coding -46552,TSR3,ENSG00000007520,protein_coding -46482,PIGQ,ENSG00000007541,protein_coding -46576,CRAMP1,ENSG00000007545,protein_coding -19545,TEAD3,ENSG00000007866,protein_coding -3876,SELE,ENSG00000007908,protein_coding -259,DNAJC11,ENSG00000007923,protein_coding -3902,FMO3,ENSG00000007933,protein_coding -18775,MYLIP,ENSG00000007944,protein_coding -25945,NOX1,ENSG00000007952,protein_coding -760,E2F2,ENSG00000007968,protein_coding -21533,PSMB1,ENSG00000008018,protein_coding -25160,SYN1,ENSG00000008056,protein_coding -18764,JARID2,ENSG00000008083,protein_coding -24787,CDKL5,ENSG00000008086,protein_coding -4609,CAMK1G,ENSG00000008118,protein_coding -134,CDK11A,ENSG00000008128,protein_coding -138,NADK,ENSG00000008130,protein_coding -19885,TFAP2B,ENSG00000008196,protein_coding -19884,TFAP2D,ENSG00000008197,protein_coding -10175,DLEC1,ENSG00000008226,protein_coding -21690,CYTH3,ENSG00000008256,protein_coding -23119,ADAM22,ENSG00000008277,protein_coding -23538,SYPL1,ENSG00000008282,protein_coding -51390,CYB561,ENSG00000008283,protein_coding -51113,SPAG9,ENSG00000008294,protein_coding -10462,CELSR3,ENSG00000008300,protein_coding -23728,AASS,ENSG00000008311,protein_coding -37629,PLEKHG6,ENSG00000008323,protein_coding -10273,SS18L2,ENSG00000008324,protein_coding -55012,MPND,ENSG00000008382,protein_coding -38023,MGST1,ENSG00000008394,protein_coding -39814,CRY1,ENSG00000008405,protein_coding -56829,PGLYRP1,ENSG00000008438,protein_coding -55496,NFIX,ENSG00000008441,protein_coding -29201,ST3GAL1,ENSG00000008513,protein_coding -46724,MMP25,ENSG00000008516,protein_coding -46725,IL32,ENSG00000008517,protein_coding -46622,PKD1,ENSG00000008710,protein_coding -59637,MAPK8IP2,ENSG00000008735,protein_coding -50504,MED24,ENSG00000008838,protein_coding -27506,RHOBTB2,ENSG00000008853,protein_coding -6074,HEATR5B,ENSG00000008869,protein_coding -12283,SEC62,ENSG00000008952,protein_coding -28079,RPS20,ENSG00000008988,protein_coding -2712,CSDE1,ENSG00000009307,protein_coding -24527,UBE3C,ENSG00000009335,protein_coding -20622,REV3L,ENSG00000009413,protein_coding -26381,TENM1,ENSG00000009694,protein_coding -618,PAX7,ENSG00000009709,protein_coding -374,MASP2,ENSG00000009724,protein_coding -21194,IYD,ENSG00000009765,protein_coding -929,FAM76A,ENSG00000009780,protein_coding -4616,TRAF3IP3,ENSG00000009790,protein_coding -43371,POMT2,ENSG00000009830,protein_coding -21054,VTA1,ENSG00000009844,protein_coding -22899,MLXIPL,ENSG00000009950,protein_coding -22895,BAZ1B,ENSG00000009954,protein_coding -18742,RANBP9,ENSG00000010017,protein_coding -19571,ETV7,ENSG00000010030,protein_coding -5068,SPRTN,ENSG00000010072,protein_coding -3926,EEF1AKNMT,ENSG00000010165,protein_coding -37594,DYRK4,ENSG00000010219,protein_coding -50210,ZNF207,ENSG00000010244,protein_coding -10458,UQCRC1,ENSG00000010256,protein_coding -22212,STARD3NL,ENSG00000010270,protein_coding -37626,CD9,ENSG00000010278,protein_coding -10281,HHATL,ENSG00000010282,protein_coding -37644,NCAPD2,ENSG00000010292,protein_coding -37650,IFFO1,ENSG00000010295,protein_coding -56805,GIPR,ENSG00000010310,protein_coding -10630,PHF7,ENSG00000010318,protein_coding -10631,SEMA3G,ENSG00000010319,protein_coding -10633,NISCH,ENSG00000010322,protein_coding -10634,STAB1,ENSG00000010327,protein_coding -57060,FUZ,ENSG00000010361,protein_coding -37493,SLC6A13,ENSG00000010379,protein_coding -26745,IDS,ENSG00000010404,protein_coding -29925,PRSS3,ENSG00000010438,protein_coding -46740,ZNF200,ENSG00000010539,protein_coding -37671,CD4,ENSG00000010610,protein_coding -37680,LRRC23,ENSG00000010626,protein_coding -25963,BTK,ENSG00000010671,protein_coding -18937,HFE,ENSG00000010704,protein_coding -1334,SCMH1,ENSG00000010803,protein_coding -20633,FYN,ENSG00000010810,protein_coding -21060,HIVEP2,ENSG00000010818,protein_coding -3908,FMO1,ENSG00000010932,protein_coding -768,ELOA,ENSG00000011007,protein_coding -770,LYPLA2,ENSG00000011009,protein_coding -403,CLCN6,ENSG00000011021,protein_coding -51365,AC080038.1,ENSG00000011028,protein_coding -51116,NME1-NME2,ENSG00000011052,protein_coding -17890,SLC6A7,ENSG00000011083,protein_coding -37562,TSPAN9,ENSG00000011105,protein_coding -43580,BTBD7,ENSG00000011114,protein_coding -54980,AC005954.1,ENSG00000011132,protein_coding -51217,MKS1,ENSG00000011143,protein_coding -10309,ABHD5,ENSG00000011198,protein_coding -24641,ANOS1,ENSG00000011201,protein_coding -55602,AKAP8L,ENSG00000011243,protein_coding -51118,MBTD1,ENSG00000011258,protein_coding -51121,UTP18,ENSG00000011260,protein_coding -21672,RNF216,ENSG00000011275,protein_coding -49577,TTC19,ENSG00000011295,protein_coding -54802,PTBP1,ENSG00000011304,protein_coding -56404,DPF1,ENSG00000011332,protein_coding -33307,SYT7,ENSG00000011347,protein_coding -10352,LARS2,ENSG00000011376,protein_coding -32399,PIK3C2A,ENSG00000011405,protein_coding -56696,PLAUR,ENSG00000011422,protein_coding -22188,ANLN,ENSG00000011426,protein_coding -55605,WIZ,ENSG00000011451,protein_coding -31291,RABGAP1,ENSG00000011454,protein_coding -39510,DCN,ENSG00000011465,protein_coding -56809,QPCTL,ENSG00000011478,protein_coding -56849,PPP5C,ENSG00000011485,protein_coding -6536,CEP68,ENSG00000011523,protein_coding -6135,MAP4K3,ENSG00000011566,protein_coding -56283,ZBTB32,ENSG00000011590,protein_coding -56306,TYROBP,ENSG00000011600,protein_coding -47204,TMEM159,ENSG00000011638,protein_coding -26809,GABRA3,ENSG00000011677,protein_coding -50717,BRCA1,ENSG00000012048,protein_coding -56793,ERCC1,ENSG00000012061,protein_coding -56252,CD22,ENSG00000012124,protein_coding -10548,SEMA3B,ENSG00000012171,protein_coding -24823,MBTPS2,ENSG00000012174,protein_coding -25257,PRICKLE3,ENSG00000012211,protein_coding -10378,LTF,ENSG00000012223,protein_coding -27628,EXTL3,ENSG00000012232,protein_coding -39689,NR1H4,ENSG00000012504,protein_coding -19936,ELOVL5,ENSG00000012660,protein_coding -35928,ALOX5,ENSG00000012779,protein_coding -58046,KDM5D,ENSG00000012817,protein_coding -38786,CALCOCO1,ENSG00000012822,protein_coding -43578,UBR7,ENSG00000012963,protein_coding -42755,MAP4K5,ENSG00000012983,protein_coding -6003,EHD3,ENSG00000013016,protein_coding -56509,PSMC4,ENSG00000013275,protein_coding -13023,MAN2B2,ENSG00000013288,protein_coding -12299,SLC7A14,ENSG00000013293,protein_coding -12295,CLDN11,ENSG00000013297,protein_coding -50797,SLC25A39,ENSG00000013306,protein_coding -47471,MVP,ENSG00000013364,protein_coding -24428,NUB1,ENSG00000013374,protein_coding -20255,PGM3,ENSG00000013375,protein_coding -20256,RWDD2A,ENSG00000013392,protein_coding -8801,CLK1,ENSG00000013441,protein_coding -39802,POLR3B,ENSG00000013503,protein_coding -43346,ANGEL1,ENSG00000013523,protein_coding -17746,RNF14,ENSG00000013561,protein_coding -26904,DNASE1L1,ENSG00000013563,protein_coding -38250,DDX11,ENSG00000013573,protein_coding -37965,HEBP1,ENSG00000013583,protein_coding -37959,GPRC5A,ENSG00000013588,protein_coding -26774,MAMLD1,ENSG00000013619,protein_coding -33281,CD6,ENSG00000013725,protein_coding -12919,TACC3,ENSG00000013810,protein_coding -20422,UFL1,ENSG00000014123,protein_coding -33532,POLA2,ENSG00000014138,protein_coding -29348,ZC3H3,ENSG00000014164,protein_coding -33529,CAPN1,ENSG00000014216,protein_coding -11709,ACPP,ENSG00000014257,protein_coding -6495,MDH1,ENSG00000014641,protein_coding -13495,SLC30A9,ENSG00000014824,protein_coding -3104,MTMR11,ENSG00000014914,protein_coding -36886,COX15,ENSG00000014919,protein_coding -43542,CCDC88C,ENSG00000015133,protein_coding -38388,YAF2,ENSG00000015153,protein_coding -35154,ZMYND11,ENSG00000015171,protein_coding -25225,WAS,ENSG00000015285,protein_coding -48959,DPEP1,ENSG00000015413,protein_coding -58373,BID,ENSG00000015475,protein_coding -17571,MATR3,ENSG00000015479,protein_coding -22336,NPC1L1,ENSG00000015520,protein_coding -51074,XYLT2,ENSG00000015532,protein_coding -7462,RGPD5,ENSG00000015568,protein_coding -27586,STMN4,ENSG00000015592,protein_coding -22332,NUDCD3,ENSG00000015676,protein_coding -16228,ISL1,ENSG00000016082,protein_coding -10679,CHDH,ENSG00000016391,protein_coding -20989,IL20RA,ENSG00000016402,protein_coding -2185,CLCA1,ENSG00000016490,protein_coding -2186,CLCA4,ENSG00000016602,protein_coding -10647,GLT8D1,ENSG00000016864,protein_coding -11686,ATP2C1,ENSG00000017260,protein_coding -39737,IGF1,ENSG00000017427,protein_coding -25210,SLC38A5,ENSG00000017483,protein_coding -52240,RALBP1,ENSG00000017797,protein_coding -13898,RUFY3,ENSG00000018189,protein_coding -38374,CNTN1,ENSG00000018236,protein_coding -9144,SLC11A1,ENSG00000018280,protein_coding -11981,WWTR1,ENSG00000018408,protein_coding -8509,AGPS,ENSG00000018510,protein_coding -26278,CXorf56,ENSG00000018610,protein_coding -3623,ATP1A2,ENSG00000018625,protein_coding -6032,TTC27,ENSG00000018699,protein_coding -57578,ZNF582,ENSG00000018869,protein_coding -34962,VSIG2,ENSG00000019102,protein_coding -34761,PHLDB1,ENSG00000019144,protein_coding -7640,MARCO,ENSG00000019169,protein_coding -54480,CYP24A1,ENSG00000019186,protein_coding -32791,PRDM11,ENSG00000019485,protein_coding -32793,SYT13,ENSG00000019505,protein_coding -27989,SNAI2,ENSG00000019549,protein_coding -17894,CD74,ENSG00000019582,protein_coding -23073,HGF,ENSG00000019991,protein_coding -37322,ZRANB1,ENSG00000019995,protein_coding -1154,NCDN,ENSG00000020129,protein_coding -27776,ADGRA2,ENSG00000020181,protein_coding -54448,ZFP64,ENSG00000020256,protein_coding -42970,MNAT1,ENSG00000020426,protein_coding -42845,SAMD4A,ENSG00000020577,protein_coding -802,RUNX3,ENSG00000020633,protein_coding -34337,MRE11,ENSG00000020922,protein_coding -33903,PLEKHB1,ENSG00000021300,protein_coding -18533,SERPINB1,ENSG00000021355,protein_coding -23328,CYP3A43,ENSG00000021461,protein_coding -56153,SLC7A9,ENSG00000021488,protein_coding -6016,SPAST,ENSG00000021574,protein_coding -43393,NRXN3,ENSG00000021645,protein_coding -31963,OSBPL5,ENSG00000021762,protein_coding -44736,AQR,ENSG00000021776,protein_coding -9033,CPS1,ENSG00000021826,protein_coding -1776,C8B,ENSG00000021852,protein_coding -26577,FHL1,ENSG00000022267,protein_coding -54502,RTF2,ENSG00000022277,protein_coding -18078,GABRA1,ENSG00000022355,protein_coding -57476,NLRP2,ENSG00000022556,protein_coding -29274,SLC45A4,ENSG00000022567,protein_coding -40133,RNF10,ENSG00000022840,protein_coding -43889,ZNF839,ENSG00000022976,protein_coding -37112,ZDHHC6,ENSG00000023041,protein_coding -34899,GRAMD1B,ENSG00000023171,protein_coding -31828,RNH1,ENSG00000023191,protein_coding -8932,NDUFS1,ENSG00000023228,protein_coding -28024,RB1CC1,ENSG00000023287,protein_coding -30945,ERP44,ENSG00000023318,protein_coding -10615,ALAS1,ENSG00000023330,protein_coding -34423,BIRC3,ENSG00000023445,protein_coding -41018,AKAP11,ENSG00000023516,protein_coding -4265,GLRX2,ENSG00000023572,protein_coding -42989,SNAPC1,ENSG00000023608,protein_coding -38020,DERA,ENSG00000023697,protein_coding -38019,STRAP,ENSG00000023734,protein_coding -36887,ABCC2,ENSG00000023839,protein_coding -19539,DEF6,ENSG00000023892,protein_coding -3109,PLEKHO1,ENSG00000023902,protein_coding -2326,GCLM,ENSG00000023909,protein_coding -19716,UBR2,ENSG00000024048,protein_coding -56910,EHD2,ENSG00000024422,protein_coding -1956,DEPDC1,ENSG00000024526,protein_coding -21018,CCDC28A,ENSG00000024862,protein_coding -20360,RRAGD,ENSG00000025039,protein_coding -20765,HSF2,ENSG00000025156,protein_coding -54045,PHF20,ENSG00000025293,protein_coding -38971,HSD17B6,ENSG00000025423,protein_coding -32858,NR1H3,ENSG00000025434,protein_coding -59626,TYMP,ENSG00000025708,protein_coding -59623,NCAPH2,ENSG00000025770,protein_coding -54241,TOMM34,ENSG00000025772,protein_coding -20533,SEC63,ENSG00000025796,protein_coding -1056,KPNA6,ENSG00000025800,protein_coding -35448,VIM,ENSG00000026025,protein_coding -54705,RTEL1-TNFRSF6B,ENSG00000026036,protein_coding -36662,FAS,ENSG00000026103,protein_coding -21447,RNASET2,ENSG00000026297,protein_coding -32681,CD44,ENSG00000026508,protein_coding -54437,KCNG1,ENSG00000026559,protein_coding -21384,AGPAT4,ENSG00000026652,protein_coding -3647,SLAMF7,ENSG00000026751,protein_coding -18973,BTN3A1,ENSG00000026950,protein_coding -40688,MIPEP,ENSG00000027001,protein_coding -42978,PRKCH,ENSG00000027075,protein_coding -3502,INSRR,ENSG00000027644,protein_coding -20991,IFNGR1,ENSG00000027697,protein_coding -18333,B4GALT7,ENSG00000027847,protein_coding -3500,SH2D2A,ENSG00000027869,protein_coding -6396,VRK2,ENSG00000028116,protein_coding -427,TNFRSF1B,ENSG00000028137,protein_coding -39590,VEZT,ENSG00000028203,protein_coding -56627,POU2F2,ENSG00000028277,protein_coding -15536,BRD9,ENSG00000028310,protein_coding -45429,SNX1,ENSG00000028528,protein_coding -20921,TBPL1,ENSG00000028839,protein_coding -38184,ARNTL2,ENSG00000029153,protein_coding -20968,BCLAF1,ENSG00000029363,protein_coding -43145,SLC39A9,ENSG00000029364,protein_coding -27866,ANK1,ENSG00000029534,protein_coding -14164,IBSP,ENSG00000029559,protein_coding -21274,TFB1M,ENSG00000029639,protein_coding -49229,RABEP1,ENSG00000029725,protein_coding -26781,HMGB3,ENSG00000029993,protein_coding -32883,NUP160,ENSG00000030066,protein_coding -19490,BAK1,ENSG00000030110,protein_coding -31093,MUSK,ENSG00000030304,protein_coding -9049,IKZF2,ENSG00000030419,protein_coding -50799,GRN,ENSG00000030582,protein_coding -17531,FAM13B,ENSG00000031003,protein_coding -11388,ARHGAP31,ENSG00000031081,protein_coding -19863,CENPQ,ENSG00000031691,protein_coding -2540,SARS,ENSG00000031698,protein_coding -55080,RANBP3,ENSG00000031823,protein_coding -42923,ARID4A,ENSG00000032219,protein_coding -5520,EIPR1,ENSG00000032389,protein_coding -55153,PNPLA6,ENSG00000032444,protein_coding -40617,IFT88,ENSG00000032742,protein_coding -46827,ALG1,ENSG00000033011,protein_coding -40205,ZCCHC8,ENSG00000033030,protein_coding -24422,ABCF2,ENSG00000033050,protein_coding -24423,CHPF2,ENSG00000033100,protein_coding -1967,LRRC7,ENSG00000033122,protein_coding -43065,FUT8,ENSG00000033170,protein_coding -13793,UBA6,ENSG00000033178,protein_coding -34055,GAB2,ENSG00000033327,protein_coding -50662,ATP6V0A1,ENSG00000033627,protein_coding -45546,PIAS1,ENSG00000033800,protein_coding -10034,SLC4A7,ENSG00000033867,protein_coding -44574,APBA2,ENSG00000034053,protein_coding -49874,MAP2K3,ENSG00000034152,protein_coding -27986,EFCAB1,ENSG00000034239,protein_coding -6870,TMSB10,ENSG00000034510,protein_coding -11692,ASTE1,ENSG00000034533,protein_coding -28762,RNF19A,ENSG00000034677,protein_coding -21074,PEX3,ENSG00000034693,protein_coding -48611,GABARAPL2,ENSG00000034713,protein_coding -3918,MYOC,ENSG00000034971,protein_coding -5473,SH3YL1,ENSG00000035115,protein_coding -6635,FAM136A,ENSG00000035141,protein_coding -36393,VCL,ENSG00000035403,protein_coding -16372,DEPDC1B,ENSG00000035499,protein_coding -45426,DAPK2,ENSG00000035664,protein_coding -28127,NSMAF,ENSG00000035681,protein_coding -5310,ADSS,ENSG00000035687,protein_coding -13791,STAP1,ENSG00000035720,protein_coding -51848,TIMP2,ENSG00000035862,protein_coding -13424,RFC1,ENSG00000035928,protein_coding -11143,TBC1D23,ENSG00000036054,protein_coding -9286,CUL3,ENSG00000036257,protein_coding -27028,MYOM2,ENSG00000036448,protein_coding -25003,OTC,ENSG00000036473,protein_coding -43711,CYP46A1,ENSG00000036530,protein_coding -2064,AC118549.1,ENSG00000036549,protein_coding -27442,SLC18A1,ENSG00000036565,protein_coding -34816,USP2,ENSG00000036672,protein_coding -11458,CASR,ENSG00000036828,protein_coding -50682,TUBG2,ENSG00000037042,protein_coding -18204,RPL26L1,ENSG00000037241,protein_coding -18434,FLT4,ENSG00000037280,protein_coding -15631,NSUN2,ENSG00000037474,protein_coding -552,FBXO42,ENSG00000037637,protein_coding -17954,MFAP3,ENSG00000037749,protein_coding -55512,MRI1,ENSG00000037757,protein_coding -39030,METTL1,ENSG00000037897,protein_coding -38808,HOXC8,ENSG00000037965,protein_coding -15300,AGA,ENSG00000038002,protein_coding -13294,PI4K2B,ENSG00000038210,protein_coding -13182,BOD1L1,ENSG00000038219,protein_coding -18090,MAT2B,ENSG00000038274,protein_coding -15172,TLL1,ENSG00000038295,protein_coding -48341,EDC4,ENSG00000038358,protein_coding -15757,TRIO,ENSG00000038382,protein_coding -16782,VCAN,ENSG00000038427,protein_coding -46928,CLEC16A,ENSG00000038532,protein_coding -27376,MSR1,ENSG00000038945,protein_coding -48395,CDH1,ENSG00000039068,protein_coding -16292,MTREX,ENSG00000039123,protein_coding -15755,DNAH5,ENSG00000039139,protein_coding -16728,ZFYVE16,ENSG00000039319,protein_coding -48319,RIPOR1,ENSG00000039523,protein_coding -16138,C6,ENSG00000039537,protein_coding -16028,RAI14,ENSG00000039560,protein_coding -18012,SOX30,ENSG00000039600,protein_coding -57069,PNKP,ENSG00000039650,protein_coding -55468,BEST2,ENSG00000039987,protein_coding -48503,PHLPP2,ENSG00000040199,protein_coding -18144,SPDL1,ENSG00000040275,protein_coding -28343,STAU2,ENSG00000040341,protein_coding -635,PQLC2,ENSG00000040487,protein_coding -49141,CTNS,ENSG00000040531,protein_coding -58498,RTN4R,ENSG00000040608,protein_coding -49299,PHF23,ENSG00000040633,protein_coding -15895,CDH10,ENSG00000040731,protein_coding -7257,INPP4A,ENSG00000040933,protein_coding -52907,RAB27B,ENSG00000041353,protein_coding -45844,PSMA4,ENSG00000041357,protein_coding -41811,MYO16,ENSG00000041515,protein_coding -12716,LSG1,ENSG00000041802,protein_coding -10600,PARP3,ENSG00000041880,protein_coding -31191,TNC,ENSG00000041982,protein_coding -258,THAP3,ENSG00000041988,protein_coding -54424,RIPOR3,ENSG00000042062,protein_coding -43512,TDP1,ENSG00000042088,protein_coding -36271,AIFM2,ENSG00000042286,protein_coding -9314,C2orf83,ENSG00000042304,protein_coding -43482,AL049834.1,ENSG00000042317,protein_coding -34325,MED17,ENSG00000042429,protein_coding -6885,RETSAT,ENSG00000042445,protein_coding -6891,CAPG,ENSG00000042493,protein_coding -56880,AP2S1,ENSG00000042753,protein_coding -4725,USH2A,ENSG00000042781,protein_coding -22419,ZPBP,ENSG00000042813,protein_coding -29191,TG,ENSG00000042832,protein_coding -27540,ADAM28,ENSG00000042980,protein_coding -35061,BARX2,ENSG00000043039,protein_coding -12481,DCUN1D1,ENSG00000043093,protein_coding -17454,JADE2,ENSG00000043143,protein_coding -41716,ZIC2,ENSG00000043355,protein_coding -18153,LCP2,ENSG00000043462,protein_coding -1284,TRIT1,ENSG00000043514,protein_coding -37137,ADRB1,ENSG00000043591,protein_coding -1351,GUCA2B,ENSG00000044012,protein_coding -19738,CUL7,ENSG00000044090,protein_coding -17556,CTNNA1,ENSG00000044115,protein_coding -24795,PHKA2,ENSG00000044446,protein_coding -29689,CNTLN,ENSG00000044459,protein_coding -11042,EPHA3,ENSG00000044524,protein_coding -31337,HSPA5,ENSG00000044574,protein_coding -52595,DSG2,ENSG00000046604,protein_coding -24722,GEMIN8,ENSG00000046647,protein_coding -24718,OFD1,ENSG00000046651,protein_coding -24719,GPM6B,ENSG00000046653,protein_coding -26662,MAGEC2,ENSG00000046774,protein_coding -28259,PREX2,ENSG00000046889,protein_coding -35172,WDR37,ENSG00000047056,protein_coding -17135,YTHDC2,ENSG00000047188,protein_coding -24756,CTPS2,ENSG00000047230,protein_coding -28039,ATP6V1H,ENSG00000047249,protein_coding -13706,POLR2B,ENSG00000047315,protein_coding -45210,FAM214A,ENSG00000047346,protein_coding -13377,ARAP2,ENSG00000047365,protein_coding -4197,TPR,ENSG00000047410,protein_coding -11966,CP,ENSG00000047457,protein_coding -47351,KIAA0556,ENSG00000047578,protein_coding -18769,DTNBP1,ENSG00000047579,protein_coding -24988,XK,ENSG00000047597,protein_coding -37621,ANO2,ENSG00000047617,protein_coding -37592,C12orf4,ENSG00000047621,protein_coding -24777,SCML1,ENSG00000047634,protein_coding -24670,WWC3,ENSG00000047644,protein_coding -24680,ARHGAP6,ENSG00000047648,protein_coding -13240,FAM184B,ENSG00000047662,protein_coding -10425,MAP4,ENSG00000047849,protein_coding -20720,GOPC,ENSG00000047932,protein_coding -20712,ROS1,ENSG00000047936,protein_coding -34646,USP28,ENSG00000048028,protein_coding -21845,HDAC9,ENSG00000048052,protein_coding -18298,TSPAN17,ENSG00000048140,protein_coding -18285,NOP16,ENSG00000048162,protein_coding -13204,CC2D2A,ENSG00000048342,protein_coding -28814,RRM2B,ENSG00000048392,protein_coding -23791,ZNF800,ENSG00000048405,protein_coding -46966,TNFRSF17,ENSG00000048462,protein_coding -46968,SNX29,ENSG00000048471,protein_coding -38027,LMO3,ENSG00000048540,protein_coding -19713,MRPS10,ENSG00000048544,protein_coding -19709,GUCA1A,ENSG00000048545,protein_coding -34031,RSF1,ENSG00000048649,protein_coding -431,VPS13D,ENSG00000048707,protein_coding -35336,CELF2,ENSG00000048740,protein_coding -30784,FAM120A,ENSG00000048828,protein_coding -7963,R3HDM1,ENSG00000048991,protein_coding -1300,COL9A2,ENSG00000049089,protein_coding -39471,KITLG,ENSG00000049130,protein_coding -16378,ERCC8,ENSG00000049167,protein_coding -16424,ADAMTS6,ENSG00000049192,protein_coding -320,H6PD,ENSG00000049239,protein_coding -273,VAMP3,ENSG00000049245,protein_coding -275,PER3,ENSG00000049246,protein_coding -277,UTS2,ENSG00000049247,protein_coding -278,TNFRSF9,ENSG00000049249,protein_coding -51089,EPN3,ENSG00000049283,protein_coding -6037,LTBP1,ENSG00000049323,protein_coding -32616,RCN1,ENSG00000049449,protein_coding -22914,ELN,ENSG00000049540,protein_coding -22920,RFC2,ENSG00000049541,protein_coding -21286,ARID1B,ENSG00000049618,protein_coding -15552,CLPTM1L,ENSG00000049656,protein_coding -52958,NEDD4L,ENSG00000049759,protein_coding -25263,FOXP3,ENSG00000049768,protein_coding -25265,PPP1R3F,ENSG00000049769,protein_coding -16601,HEXB,ENSG00000049860,protein_coding -16553,PTCD2,ENSG00000049883,protein_coding -25727,NEXMIF,ENSG00000050030,protein_coding -42943,JKAMP,ENSG00000050130,protein_coding -32322,DKK3,ENSG00000050165,protein_coding -24290,ARHGEF5,ENSG00000050327,protein_coding -21962,NFE2L3,ENSG00000050344,protein_coding -18744,MCUR1,ENSG00000050393,protein_coding -38623,LIMA1,ENSG00000050405,protein_coding -38652,LETMD1,ENSG00000050426,protein_coding -38666,SLC4A8,ENSG00000050438,protein_coding -31520,LAMC3,ENSG00000050555,protein_coding -1987,PTGER3,ENSG00000050628,protein_coding -14588,TNIP3,ENSG00000050730,protein_coding -18425,MAPK9,ENSG00000050748,protein_coding -18357,COL23A1,ENSG00000050767,protein_coding -48591,BCAR1,ENSG00000050820,protein_coding -32158,FAM160A2,ENSG00000051009,protein_coding -48114,HERPUD1,ENSG00000051108,protein_coding -55806,HOMER3,ENSG00000051128,protein_coding -44856,RAD51,ENSG00000051180,protein_coding -11443,POLQ,ENSG00000051341,protein_coding -11807,PIK3CB,ENSG00000051382,protein_coding -48912,CYBA,ENSG00000051523,protein_coding -18264,THOC3,ENSG00000051596,protein_coding -21011,HEBP2,ENSG00000051620,protein_coding -40232,MPHOSPH9,ENSG00000051825,protein_coding -38054,PLEKHA5,ENSG00000052126,protein_coding -47595,PRSS8,ENSG00000052344,protein_coding -2714,SIKE1,ENSG00000052723,protein_coding -36838,RRP12,ENSG00000052749,protein_coding -15095,FNIP2,ENSG00000052795,protein_coding -15163,MSMO1,ENSG00000052802,protein_coding -32749,TTC17,ENSG00000052841,protein_coding -32777,ALX4,ENSG00000052850,protein_coding -17421,FSTL4,ENSG00000053108,protein_coding -43496,FOXN3,ENSG00000053254,protein_coding -20594,METTL24,ENSG00000053328,protein_coding -633,AKR7A2,ENSG00000053371,protein_coding -628,MRTO4,ENSG00000053372,protein_coding -54091,NNAT,ENSG00000053438,protein_coding -55702,USE1,ENSG00000053501,protein_coding -12486,MCF2L2,ENSG00000053524,protein_coding -37552,NRIP2,ENSG00000053702,protein_coding -52493,LAMA3,ENSG00000053747,protein_coding -42901,AP5M1,ENSG00000053770,protein_coding -13298,ANAPC4,ENSG00000053900,protein_coding -31941,KCNQ1,ENSG00000053918,protein_coding -1172,TRAPPC3,ENSG00000054116,protein_coding -1175,THRAP3,ENSG00000054118,protein_coding -31707,PHPT1,ENSG00000054148,protein_coding -31728,ENTPD2,ENSG00000054179,protein_coding -8236,LY75,ENSG00000054219,protein_coding -5147,ARID4B,ENSG00000054267,protein_coding -5258,OPN3,ENSG00000054277,protein_coding -5291,SDCCAG8,ENSG00000054282,protein_coding -9200,PTPRN,ENSG00000054356,protein_coding -4626,HHAT,ENSG00000054392,protein_coding -351,KIF1B,ENSG00000054523,protein_coding -18520,FOXC1,ENSG00000054598,protein_coding -59545,TBC1D22A,ENSG00000054611,protein_coding -43025,SYNE2,ENSG00000054654,protein_coding -43093,PLEKHH1,ENSG00000054690,protein_coding -54442,ATP9A,ENSG00000054793,protein_coding -54526,SPO11,ENSG00000054796,protein_coding -54492,CBLN4,ENSG00000054803,protein_coding -33942,CHRDL2,ENSG00000054938,protein_coding -33897,FAM168A,ENSG00000054965,protein_coding -33895,RELT,ENSG00000054967,protein_coding -43471,GALC,ENSG00000054983,protein_coding -8862,NOP58,ENSG00000055044,protein_coding -553,SZRD1,ENSG00000055070,protein_coding -24405,KCNH2,ENSG00000055118,protein_coding -24327,CUL1,ENSG00000055130,protein_coding -17953,FAM114A2,ENSG00000055147,protein_coding -18001,CYFIP2,ENSG00000055163,protein_coding -21145,TAB2,ENSG00000055208,protein_coding -21156,GINM1,ENSG00000055211,protein_coding -6076,EIF2AK2,ENSG00000055332,protein_coding -51845,AC022966.1,ENSG00000055483,protein_coding -24448,KMT2C,ENSG00000055609,protein_coding -2160,MCOLN3,ENSG00000055732,protein_coding -6388,CCDC85A,ENSG00000055813,protein_coding -5772,PUM2,ENSG00000055917,protein_coding -36920,MRPL43,ENSG00000055950,protein_coding -10655,ITIH4,ENSG00000055955,protein_coding -10653,ITIH1,ENSG00000055957,protein_coding -15206,HPF1,ENSG00000056050,protein_coding -15992,ZFR,ENSG00000056097,protein_coding -26432,ZNF280C,ENSG00000056277,protein_coding -13910,NPFFR2,ENSG00000056291,protein_coding -59492,PHF21B,ENSG00000056487,protein_coding -31237,TRAF1,ENSG00000056558,protein_coding -31285,RC3H2,ENSG00000056586,protein_coding -10680,IL17RB,ENSG00000056736,protein_coding -20631,TRAF3IP2,ENSG00000056972,protein_coding -24594,GYG2,ENSG00000056998,protein_coding -11123,DCBLD2,ENSG00000057019,protein_coding -53064,SERPINB3,ENSG00000057149,protein_coding -4054,SOAT1,ENSG00000057252,protein_coding -38313,PKP2,ENSG00000057294,protein_coding -2043,MSH4,ENSG00000057468,protein_coding -41884,F7,ENSG00000057593,protein_coding -35261,GDI2,ENSG00000057608,protein_coding -20502,PRDM1,ENSG00000057657,protein_coding -20503,ATG5,ENSG00000057663,protein_coding -39580,TMCC3,ENSG00000057704,protein_coding -769,PITHD1,ENSG00000057757,protein_coding -6166,MTA3,ENSG00000057935,protein_coding -12420,USP13,ENSG00000058056,protein_coding -12478,ATP11B,ENSG00000058063,protein_coding -4139,LAMC2,ENSG00000058085,protein_coding -23151,CDK14,ENSG00000058091,protein_coding -11595,SEC61A1,ENSG00000058262,protein_coding -39392,PPP1R12A,ENSG00000058272,protein_coding -45861,RASGRF1,ENSG00000058335,protein_coding -22330,CAMK2B,ENSG00000058404,protein_coding -578,CROCC,ENSG00000058453,protein_coding -47247,POLR3E,ENSG00000058600,protein_coding -4447,ATP2B4,ENSG00000058668,protein_coding -4451,ZC3H11A,ENSG00000058673,protein_coding -16955,RIOK2,ENSG00000058729,protein_coding -1706,YIPF1,ENSG00000058799,protein_coding -1703,NDC1,ENSG00000058804,protein_coding -12570,DGKG,ENSG00000058866,protein_coding -46706,FLYWCH1,ENSG00000059122,protein_coding -46555,UNKL,ENSG00000059145,protein_coding -24059,TBXAS1,ENSG00000059377,protein_coding -24060,PARP12,ENSG00000059378,protein_coding -36798,ALDH18A1,ENSG00000059573,protein_coding -5119,TARBP1,ENSG00000059588,protein_coding -14955,GATB,ENSG00000059691,protein_coding -6621,MXD1,ENSG00000059728,protein_coding -39622,CDK17,ENSG00000059758,protein_coding -31108,DNAJC25,ENSG00000059769,protein_coding -37738,SLC2A3,ENSG00000059804,protein_coding -36968,PSD,ENSG00000059915,protein_coding -53275,CTDP1,ENSG00000060069,protein_coding -37881,YBX3,ENSG00000060138,protein_coding -37880,STYK1,ENSG00000060140,protein_coding -37505,WNK1,ENSG00000060237,protein_coding -36230,CCAR1,ENSG00000060339,protein_coding -54657,OGFR,ENSG00000060491,protein_coding -54952,GNA15,ENSG00000060558,protein_coding -55001,CREB3L3,ENSG00000060566,protein_coding -882,PIGV,ENSG00000060642,protein_coding -996,PTPRU,ENSG00000060656,protein_coding -1024,SNRNP40,ENSG00000060688,protein_coding -40408,RIMBP2,ENSG00000060709,protein_coding -2464,COL11A1,ENSG00000060718,protein_coding -32633,QSER1,ENSG00000060749,protein_coding -21436,MPC1,ENSG00000060762,protein_coding -10177,ACAA1,ENSG00000060971,protein_coding -38128,BCAT1,ENSG00000060982,protein_coding -38487,HDAC7,ENSG00000061273,protein_coding -27445,LZTS1,ENSG00000061337,protein_coding -17260,PRDM6,ENSG00000061455,protein_coding -17535,WNT8A,ENSG00000061492,protein_coding -54034,SPAG4,ENSG00000061656,protein_coding -8586,NCKAP1,ENSG00000061676,protein_coding -38197,MRPS35,ENSG00000061794,protein_coding -15047,GUCY1B1,ENSG00000061918,protein_coding -40455,SFSWAP,ENSG00000061936,protein_coding -12754,TNK2,ENSG00000061938,protein_coding -39088,MON2,ENSG00000061987,protein_coding -48391,CDH3,ENSG00000062038,protein_coding -24600,ARSF,ENSG00000062096,protein_coding -16338,GPBP1,ENSG00000062194,protein_coding -33986,DGAT2,ENSG00000062282,protein_coding -56735,ZNF112,ENSG00000062370,protein_coding -38941,CS,ENSG00000062485,protein_coding -44893,LTK,ENSG00000062524,protein_coding -22303,MRPS24,ENSG00000062582,protein_coding -54318,ELMO2,ENSG00000062598,protein_coding -36588,WAPL,ENSG00000062650,protein_coding -51276,VMP1,ENSG00000062716,protein_coding -51312,APPBP2,ENSG00000062725,protein_coding -57098,POLD1,ENSG00000062822,protein_coding -50050,SEZ6,ENSG00000063015,protein_coding -38743,EIF4B,ENSG00000063046,protein_coding -57013,SLC6A16,ENSG00000063127,protein_coding -56907,BICRA,ENSG00000063169,protein_coding -56956,SPHK2,ENSG00000063176,protein_coding -56954,RPL18,ENSG00000063177,protein_coding -56958,CA11,ENSG00000063180,protein_coding -57520,ISOC2,ENSG00000063241,protein_coding -57538,U2AF2,ENSG00000063244,protein_coding -57541,EPN1,ENSG00000063245,protein_coding -56472,MED29,ENSG00000063322,protein_coding -15516,AHRR,ENSG00000063438,protein_coding -58446,GSC2,ENSG00000063515,protein_coding -26846,ZNF275,ENSG00000063587,protein_coding -26776,MTMR1,ENSG00000063601,protein_coding -9598,GPC1,ENSG00000063660,protein_coding -43390,ADCK1,ENSG00000063761,protein_coding -46592,HAGH,ENSG00000063854,protein_coding -12941,RNF4,ENSG00000063978,protein_coding -8827,CASP8,ENSG00000064012,protein_coding -13479,LIMCH1,ENSG00000064042,protein_coding -38173,INTS13,ENSG00000064102,protein_coding -38175,TM7SF3,ENSG00000064115,protein_coding -51053,DLX3,ENSG00000064195,protein_coding -34959,SPA17,ENSG00000064199,protein_coding -31932,TSPAN32,ENSG00000064201,protein_coding -54233,CCN5,ENSG00000064205,protein_coding -29487,DMRT3,ENSG00000064218,protein_coding -11121,ST3GAL6,ENSG00000064225,protein_coding -48784,ATP2C2,ENSG00000064270,protein_coding -51034,NGFR,ENSG00000064300,protein_coding -35006,CDON,ENSG00000064309,protein_coding -28998,TAF2,ENSG00000064313,protein_coding -24058,HIPK2,ENSG00000064393,protein_coding -23853,TNPO3,ENSG00000064419,protein_coding -55814,BORCS8-MEF2B,ENSG00000064489,protein_coding -55816,RFXANK,ENSG00000064490,protein_coding -55812,TMEM161A,ENSG00000064545,protein_coding -55837,LPAR2,ENSG00000064547,protein_coding -54301,CTSA,ENSG00000064601,protein_coding -55809,SUGP2,ENSG00000064607,protein_coding -17327,SLC12A2,ENSG00000064651,protein_coding -17254,SNX24,ENSG00000064652,protein_coding -54332,EYA2,ENSG00000064655,protein_coding -54823,CNN2,ENSG00000064666,protein_coding -54825,ABCA7,ENSG00000064687,protein_coding -17245,SNCAIP,ENSG00000064692,protein_coding -2645,DDX20,ENSG00000064703,protein_coding -45974,BTBD1,ENSG00000064726,protein_coding -38219,FAR2,ENSG00000064763,protein_coding -54477,BCAS1,ENSG00000064787,protein_coding -11024,POU1F1,ENSG00000064835,protein_coding -2617,CHI3L2,ENSG00000064886,protein_coding -54829,SBNO2,ENSG00000064932,protein_coding -8661,PMS1,ENSG00000064933,protein_coding -54972,HMG20B,ENSG00000064961,protein_coding -8629,CALCRL,ENSG00000064989,protein_coding -19529,TAF11,ENSG00000064995,protein_coding -19530,ANKS1A,ENSG00000064999,protein_coding -54894,AP3D1,ENSG00000065000,protein_coding -19537,ZNF76,ENSG00000065029,protein_coding -46619,SLC9A3R2,ENSG00000065054,protein_coding -46620,NTHL1,ENSG00000065057,protein_coding -19526,UHRF1BP1,ENSG00000065060,protein_coding -2552,GNAI3,ENSG00000065135,protein_coding -41666,IPO5,ENSG00000065150,protein_coding -37307,OAT,ENSG00000065154,protein_coding -2784,WDR3,ENSG00000065183,protein_coding -2213,PKN2,ENSG00000065243,protein_coding -54817,WDR18,ENSG00000065268,protein_coding -19905,TRAM2,ENSG00000065308,protein_coding -49428,NTN1,ENSG00000065320,protein_coding -49447,GLP2R,ENSG00000065325,protein_coding -35378,MCM10,ENSG00000065328,protein_coding -38909,DGKA,ENSG00000065357,protein_coding -38921,ERBB3,ENSG00000065361,protein_coding -11491,ROPN1,ENSG00000065371,protein_coding -8742,ANKRD44,ENSG00000065413,protein_coding -48614,KARS,ENSG00000065427,protein_coding -48613,ADAT1,ENSG00000065457,protein_coding -11478,PDIA5,ENSG00000065485,protein_coding -19602,TBC1D22B,ENSG00000065491,protein_coding -11427,NDUFB4,ENSG00000065518,protein_coding -530,SPEN,ENSG00000065526,protein_coding -11486,MYLK,ENSG00000065534,protein_coding -8619,ZC3H15,ENSG00000065548,protein_coding -49490,MAP2K4,ENSG00000065559,protein_coding -4675,TMEM206,ENSG00000065600,protein_coding -20260,SNAP91,ENSG00000065609,protein_coding -37026,SLK,ENSG00000065613,protein_coding -20264,CYB5R4,ENSG00000065615,protein_coding -37028,COL17A1,ENSG00000065618,protein_coding -37035,GSTO2,ENSG00000065621,protein_coding -35355,SEC61A2,ENSG00000065665,protein_coding -35283,PRKCQ,ENSG00000065675,protein_coding -54945,TLE2,ENSG00000065717,protein_coding -9560,ASB1,ENSG00000065802,protein_coding -35405,FAM107B,ENSG00000065809,protein_coding -20257,ME1,ENSG00000065833,protein_coding -13399,TBC1D1,ENSG00000065882,protein_coding -22264,CDK13,ENSG00000065883,protein_coding -6726,MTHFD2,ENSG00000065911,protein_coding -25131,SLC9A7,ENSG00000065923,protein_coding -37745,FOXJ2,ENSG00000065970,protein_coding -1367,YBX1,ENSG00000065978,protein_coding -55318,PDE4A,ENSG00000065989,protein_coding -4669,PPP2R5A,ENSG00000066027,protein_coding -6824,CTNNA2,ENSG00000066032,protein_coding -55191,ELAVL1,ENSG00000066044,protein_coding -1395,TIE1,ENSG00000066056,protein_coding -38633,DIP2B,ENSG00000066084,protein_coding -38618,SMARCD1,ENSG00000066117,protein_coding -1410,KDM4A,ENSG00000066135,protein_coding -1321,NFYC,ENSG00000066136,protein_coding -1357,ZMYND12,ENSG00000066185,protein_coding -15522,SLC9A3,ENSG00000066230,protein_coding -9448,NGEF,ENSG00000066248,protein_coding -4294,ASPM,ENSG00000066279,protein_coding -3642,CD84,ENSG00000066294,protein_coding -1400,ELOVL1,ENSG00000066322,protein_coding -32863,SPI1,ENSG00000066336,protein_coding -19199,ZNRD1,ENSG00000066379,protein_coding -32597,MPPED2,ENSG00000066382,protein_coding -11792,CLDN18,ENSG00000066405,protein_coding -11170,ZBTB11,ENSG00000066422,protein_coding -43558,ATXN3,ENSG00000066427,protein_coding -43567,GOLGA5,ENSG00000066455,protein_coding -37255,FGFR2,ENSG00000066468,protein_coding -1973,LRRC40,ENSG00000066557,protein_coding -17333,ISOC1,ENSG00000066583,protein_coding -43713,EML1,ENSG00000066629,protein_coding -20801,TRMT11,ENSG00000066651,protein_coding -47192,THUMPD1,ENSG00000066654,protein_coding -30953,MSANTD3,ENSG00000066697,protein_coding -43960,KIF26A,ENSG00000066735,protein_coding -43661,ATG2B,ENSG00000066739,protein_coding -28251,ARFGEF1,ENSG00000066777,protein_coding -47185,ACSM2B,ENSG00000066813,protein_coding -29217,ZFAT,ENSG00000066827,protein_coding -28218,MTFR1,ENSG00000066855,protein_coding -23362,STAG3,ENSG00000066923,protein_coding -52948,FECH,ENSG00000066926,protein_coding -45617,MYO9A,ENSG00000066933,protein_coding -57930,DDX3Y,ENSG00000067048,protein_coding -35196,PFKP,ENSG00000067057,protein_coding -35171,IDI1,ENSG00000067064,protein_coding -9358,SP100,ENSG00000067066,protein_coding -35209,KLF6,ENSG00000067082,protein_coding -16294,PLPP1,ENSG00000067113,protein_coding -45656,NEO1,ENSG00000067141,protein_coding -28300,TRAM1,ENSG00000067167,protein_coding -25652,PHKA1,ENSG00000067177,protein_coding -37630,TNFRSF1A,ENSG00000067182,protein_coding -50471,CACNB1,ENSG00000067191,protein_coding -2290,EVI5,ENSG00000067208,protein_coding -45674,STOML1,ENSG00000067221,protein_coding -45626,PKM,ENSG00000067225,protein_coding -16291,DHX29,ENSG00000067248,protein_coding -2325,DNTTIP2,ENSG00000067334,protein_coding -46867,METTL22,ENSG00000067365,protein_coding -44966,TP53BP1,ENSG00000067369,protein_coding -25426,TRO,ENSG00000067445,protein_coding -4741,RRP15,ENSG00000067533,protein_coding -10503,RHOA,ENSG00000067560,protein_coding -50752,DHX8,ENSG00000067596,protein_coding -147,PRKCZ,ENSG00000067606,protein_coding -57752,ZFY,ENSG00000067646,protein_coding -4765,IARS2,ENSG00000067704,protein_coding -39383,SYT1,ENSG00000067715,protein_coding -39372,NAV3,ENSG00000067798,protein_coding -26869,IDH3G,ENSG00000067829,protein_coding -46819,ROGDI,ENSG00000067836,protein_coding -26871,PDZD4,ENSG00000067840,protein_coding -26855,ATP2B3,ENSG00000067842,protein_coding -52432,ROCK1,ENSG00000067900,protein_coding -48287,CBFB,ENSG00000067955,protein_coding -24859,PDK3,ENSG00000067992,protein_coding -10555,HYAL2,ENSG00000068001,protein_coding -9571,HDAC4,ENSG00000068024,protein_coding -10557,RASSF1,ENSG00000068028,protein_coding -12921,FGFR3,ENSG00000068078,protein_coding -50714,IFI35,ENSG00000068079,protein_coding -51294,HEATR6,ENSG00000068097,protein_coding -50673,COASY,ENSG00000068120,protein_coding -50683,PLEKHH3,ENSG00000068137,protein_coding -46360,MEF2A,ENSG00000068305,protein_coding -25240,OTUD5,ENSG00000068308,protein_coding -25245,TFE3,ENSG00000068323,protein_coding -25217,TBC1D25,ENSG00000068354,protein_coding -26139,ACSL4,ENSG00000068366,protein_coding -37420,INPP5A,ENSG00000068383,protein_coding -25251,GPKOW,ENSG00000068394,protein_coding -25244,GRIPAP1,ENSG00000068400,protein_coding -25211,FTSJ1,ENSG00000068438,protein_coding -51258,PRR11,ENSG00000068489,protein_coding -6928,REEP1,ENSG00000068615,protein_coding -41876,ATP11A,ENSG00000068650,protein_coding -6920,POLR1A,ENSG00000068654,protein_coding -5763,LAPTM4A,ENSG00000068697,protein_coding -6256,TTC7A,ENSG00000068724,protein_coding -10467,IP6K2,ENSG00000068745,protein_coding -6292,STON1-GTF2A1L,ENSG00000068781,protein_coding -6228,SRBD1,ENSG00000068784,protein_coding -16398,KIF2A,ENSG00000068796,protein_coding -33484,RASGRP2,ENSG00000068831,protein_coding -6340,PSME4,ENSG00000068878,protein_coding -12176,IFT80,ENSG00000068885,protein_coding -56442,SIRT2,ENSG00000068903,protein_coding -6337,ERLEC1,ENSG00000068912,protein_coding -33499,PPP2R5B,ENSG00000068971,protein_coding -33485,PYGM,ENSG00000068976,protein_coding -25283,PAGE1,ENSG00000068985,protein_coding -17476,PITX1,ENSG00000069011,protein_coding -17511,TRPC7,ENSG00000069018,protein_coding -16450,MAST4,ENSG00000069020,protein_coding -19833,ADGRF5,ENSG00000069122,protein_coding -51616,SDK2,ENSG00000069188,protein_coding -27543,ADAM7,ENSG00000069206,protein_coding -5024,NUP133,ENSG00000069248,protein_coding -4515,NUCKS1,ENSG00000069275,protein_coding -47859,VPS35,ENSG00000069329,protein_coding -47869,DNAJA2,ENSG00000069345,protein_coding -56754,BCL3,ENSG00000069399,protein_coding -233,KCNAB2,ENSG00000069424,protein_coding -38095,ABCC9,ENSG00000069431,protein_coding -33736,GAL,ENSG00000069482,protein_coding -37837,CLEC2D,ENSG00000069493,protein_coding -25093,FUNDC1,ENSG00000069509,protein_coding -25083,MAOB,ENSG00000069535,protein_coding -45362,RORA,ENSG00000069667,protein_coding -31842,DRD4,ENSG00000069696,protein_coding -2273,TGFBR3,ENSG00000069702,protein_coding -47013,PLA2G10,ENSG00000069764,protein_coding -245,HES2,ENSG00000069812,protein_coding -11870,ATP1B3,ENSG00000069849,protein_coding -45248,NEDD4,ENSG00000069869,protein_coding -45234,PIGB,ENSG00000069943,protein_coding -45191,MAPK6,ENSG00000069956,protein_coding -45199,GNB5,ENSG00000069966,protein_coding -45231,RAB27A,ENSG00000069974,protein_coding -58356,HDHD5,ENSG00000069998,protein_coding -58460,UFD1,ENSG00000070010,protein_coding -37931,LRP6,ENSG00000070018,protein_coding -37993,GUCY2C,ENSG00000070019,protein_coding -31841,SCT,ENSG00000070031,protein_coding -31838,PHRF1,ENSG00000070047,protein_coding -31068,ELP1,ENSG00000070061,protein_coding -32403,NUCB2,ENSG00000070081,protein_coding -11991,PFN2,ENSG00000070087,protein_coding -31079,PTPN3,ENSG00000070159,protein_coding -43045,SPTB,ENSG00000070182,protein_coding -14295,DAPP1,ENSG00000070190,protein_coding -16197,FGF10,ENSG00000070193,protein_coding -31017,SLC44A1,ENSG00000070214,protein_coding -42884,TMEM260,ENSG00000070269,protein_coding -49069,SMG6,ENSG00000070366,protein_coding -42900,EXOC5,ENSG00000070367,protein_coding -58449,CLTCL1,ENSG00000070371,protein_coding -54792,FGF22,ENSG00000070388,protein_coding -54795,FSTL3,ENSG00000070404,protein_coding -58436,DGCR2,ENSG00000070413,protein_coding -54793,AC004156.1,ENSG00000070423,protein_coding -49090,MNT,ENSG00000070444,protein_coding -11559,ZXDC,ENSG00000070476,protein_coding -51771,JMJD6,ENSG00000070495,protein_coding -27883,POLB,ENSG00000070501,protein_coding -51765,ST6GALNAC1,ENSG00000070526,protein_coding -51547,WIPI1,ENSG00000070540,protein_coding -30087,FRMPD1,ENSG00000070601,protein_coding -30023,GBA2,ENSG00000070610,protein_coding -17898,NDST1,ENSG00000070614,protein_coding -23264,ASNS,ENSG00000070669,protein_coding -27879,AP3M2,ENSG00000070718,protein_coding -48157,CNGB1,ENSG00000070729,protein_coding -51761,ST6GALNAC2,ENSG00000070731,protein_coding -36032,CHAT,ENSG00000070748,protein_coding -28774,PABPC1,ENSG00000070756,protein_coding -1479,TESK2,ENSG00000070759,protein_coding -48164,CFAP20,ENSG00000070761,protein_coding -48167,CSNK2A2,ENSG00000070770,protein_coding -43483,AL162171.1,ENSG00000070778,protein_coding -1466,EIF2B3,ENSG00000070785,protein_coding -17891,CAMK2A,ENSG00000070808,protein_coding -17893,TCOF1,ENSG00000070814,protein_coding -717,CDC42,ENSG00000070831,protein_coding -21942,OSBPL3,ENSG00000070882,protein_coding -729,EPHA8,ENSG00000070886,protein_coding -48111,SLC12A3,ENSG00000070915,protein_coding -9747,RAD18,ENSG00000070950,protein_coding -39491,ATP2B1,ENSG00000070961,protein_coding -31939,TRPM5,ENSG00000070985,protein_coding -7385,NCK2,ENSG00000071051,protein_coding -7318,MAP4K4,ENSG00000071054,protein_coding -7260,MGAT4A,ENSG00000071073,protein_coding -7302,RPL31,ENSG00000071082,protein_coding -13140,WDR1,ENSG00000071127,protein_coding -21840,SNX13,ENSG00000071189,protein_coding -33260,MS4A12,ENSG00000071203,protein_coding -14907,ARHGAP10,ENSG00000071205,protein_coding -21439,RPS6KA2,ENSG00000071242,protein_coding -23712,ING3,ENSG00000071243,protein_coding -43342,VASH1,ENSG00000071246,protein_coding -9741,LMCD1,ENSG00000071282,protein_coding -22903,BUD23,ENSG00000071462,protein_coding -43427,SEL1L,ENSG00000071537,protein_coding -15537,TRIP13,ENSG00000071539,protein_coding -26907,ATP6AP1,ENSG00000071553,protein_coding -54866,TCF3,ENSG00000071564,protein_coding -5687,TRIB2,ENSG00000071575,protein_coding -54849,DAZAP1,ENSG00000071626,protein_coding -54862,MBD3,ENSG00000071655,protein_coding -9534,PRLH,ENSG00000071677,protein_coding -11961,HLTF,ENSG00000071794,protein_coding -26909,FAM50A,ENSG00000071859,protein_coding -26916,FAM3A,ENSG00000071889,protein_coding -29416,CPSF1,ENSG00000071894,protein_coding -8353,MYO3B,ENSG00000071909,protein_coding -8377,CYBRD1,ENSG00000071967,protein_coding -53085,CDH19,ENSG00000071991,protein_coding -21535,PDCD2,ENSG00000071994,protein_coding -39440,SLC6A15,ENSG00000072041,protein_coding -43105,RDH11,ENSG00000072042,protein_coding -55540,PRKACA,ENSG00000072062,protein_coding -55544,ADGRL1,ENSG00000072071,protein_coding -9488,SPP2,ENSG00000072080,protein_coding -43130,ACTN1,ENSG00000072110,protein_coding -43109,ZFYVE26,ENSG00000072121,protein_coding -25815,RPS6KA6,ENSG00000072133,protein_coding -49758,EPN2,ENSG00000072134,protein_coding -7823,PTPN18,ENSG00000072135,protein_coding -7744,LIMS2,ENSG00000072163,protein_coding -9216,ASIC4,ENSG00000072182,protein_coding -9210,SPEG,ENSG00000072195,protein_coding -13624,LNX1,ENSG00000072201,protein_coding -49787,ALDH3A2,ENSG00000072210,protein_coding -12764,TFRC,ENSG00000072274,protein_coding -49666,SREBF1,ENSG00000072310,protein_coding -26169,TRPC5,ENSG00000072315,protein_coding -17405,AFF4,ENSG00000072364,protein_coding -36115,UBE2D1,ENSG00000072401,protein_coding -43085,MPP5,ENSG00000072415,protein_coding -36140,RHOBTB1,ENSG00000072422,protein_coding -25397,SMC1A,ENSG00000072501,protein_coding -25400,HSD17B10,ENSG00000072506,protein_coding -33434,MARK2,ENSG00000072518,protein_coding -18088,HMMR,ENSG00000072571,protein_coding -40509,CHFR,ENSG00000072609,protein_coding -39300,TRHDE,ENSG00000072657,protein_coding -17375,P4HA2,ENSG00000072682,protein_coding -3706,FCGR2B,ENSG00000072694,protein_coding -48358,NFATC3,ENSG00000072736,protein_coding -9696,TRNT1,ENSG00000072756,protein_coding -49296,ACADVL,ENSG00000072778,protein_coding -18181,STK10,ENSG00000072786,protein_coding -18180,FBXW11,ENSG00000072803,protein_coding -49316,ACAP1,ENSG00000072818,protein_coding -13012,CRMP1,ENSG00000072832,protein_coding -13011,EVC,ENSG00000072840,protein_coding -49237,DERL2,ENSG00000072849,protein_coding -11319,SIDT1,ENSG00000072858,protein_coding -47056,NDE1,ENSG00000072864,protein_coding -32297,MRVI1,ENSG00000072952,protein_coding -55684,TMEM38A,ENSG00000072954,protein_coding -55656,AP1M1,ENSG00000072958,protein_coding -56748,PVR,ENSG00000073008,protein_coding -56686,XRCC1,ENSG00000073050,protein_coding -40293,SCARB1,ENSG00000073060,protein_coding -21575,CYP2W1,ENSG00000073067,protein_coding -11587,MCM2,ENSG00000073111,protein_coding -59601,MOV10L1,ENSG00000073146,protein_coding -59603,PANX2,ENSG00000073150,protein_coding -59607,SELENOO,ENSG00000073169,protein_coding -12636,TP63,ENSG00000073282,protein_coding -14467,ALPK1,ENSG00000073331,protein_coding -51700,LLGL2,ENSG00000073350,protein_coding -46038,PDE8A,ENSG00000073417,protein_coding -24673,CLCN4,ENSG00000073464,protein_coding -50284,NLE1,ENSG00000073536,protein_coding -15510,SDHA,ENSG00000073578,protein_coding -50535,SMARCE1,ENSG00000073584,protein_coding -50283,FNDC8,ENSG00000073598,protein_coding -50496,GSDMB,ENSG00000073605,protein_coding -37496,KDM5A,ENSG00000073614,protein_coding -50817,ADAM11,ENSG00000073670,protein_coding -11763,PPP2R3A,ENSG00000073711,protein_coding -42822,FERMT2,ENSG00000073712,protein_coding -8330,ABCB11,ENSG00000073734,protein_coding -8331,DHRS9,ENSG00000073737,protein_coding -3530,CD5L,ENSG00000073754,protein_coding -4206,PTGS2,ENSG00000073756,protein_coding -12562,IGF2BP2,ENSG00000073792,protein_coding -12555,MAP3K13,ENSG00000073803,protein_coding -12606,ST6GAL1,ENSG00000073849,protein_coding -50931,TBX21,ENSG00000073861,protein_coding -40861,FRY,ENSG00000073910,protein_coding -34140,PICALM,ENSG00000073921,protein_coding -50898,NSF,ENSG00000073969,protein_coding -7674,GLI2,ENSG00000074047,protein_coding -7681,CLASP1,ENSG00000074054,protein_coding -46587,MRPS34,ENSG00000074071,protein_coding -55593,NOTCH3,ENSG00000074181,protein_coding -34028,CLNS1A,ENSG00000074201,protein_coding -13022,PPP2R2C,ENSG00000074211,protein_coding -57017,TEAD2,ENSG00000074219,protein_coding -34145,EED,ENSG00000074266,protein_coding -18291,CDHR2,ENSG00000074276,protein_coding -18295,SNCB,ENSG00000074317,protein_coding -32447,TSG101,ENSG00000074319,protein_coding -49152,NCBP3,ENSG00000074356,protein_coding -49155,ATP2A3,ENSG00000074370,protein_coding -45415,CA12,ENSG00000074410,protein_coding -11591,MGLL,ENSG00000074416,protein_coding -39604,NTN4,ENSG00000074527,protein_coding -9156,BCS1L,ENSG00000074582,protein_coding -39794,NUAK1,ENSG00000074590,protein_coding -45481,DPP8,ENSG00000074603,protein_coding -45488,SLC24A1,ENSG00000074621,protein_coding -52983,ZNF532,ENSG00000074657,protein_coding -49043,SCARF1,ENSG00000074660,protein_coding -52991,LMAN1,ENSG00000074695,protein_coding -45485,HACD3,ENSG00000074696,protein_coding -21256,IPCEF1,ENSG00000074706,protein_coding -49157,ZZEF1,ENSG00000074755,protein_coding -21277,NOX3,ENSG00000074771,protein_coding -304,ENO1,ENSG00000074800,protein_coding -45092,SLC12A1,ENSG00000074803,protein_coding -55034,MYDGF,ENSG00000074842,protein_coding -55714,ANO8,ENSG00000074855,protein_coding -20636,TUBE1,ENSG00000074935,protein_coding -606,ARHGEF10L,ENSG00000074964,protein_coding -13559,TXK,ENSG00000074966,protein_coding -39834,WSCD2,ENSG00000075035,protein_coding -54690,KCNQ2,ENSG00000075043,protein_coding -36253,TACR2,ENSG00000075073,protein_coding -39684,ACTR6,ENSG00000075089,protein_coding -45503,TIPIN,ENSG00000075131,protein_coding -23120,SRI,ENSG00000075142,protein_coding -681,EIF4G3,ENSG00000075151,protein_coding -39730,NUP37,ENSG00000075188,protein_coding -23085,SEMA3A,ENSG00000075213,protein_coding -59537,GTSE1,ENSG00000075218,protein_coding -23064,SEMA3C,ENSG00000075223,protein_coding -59535,TTC38,ENSG00000075234,protein_coding -34517,ACAT1,ENSG00000075239,protein_coding -59542,GRAMD4,ENSG00000075240,protein_coding -59539,CELSR1,ENSG00000075275,protein_coding -36909,WNT8B,ENSG00000075290,protein_coding -6671,ZNF638,ENSG00000075292,protein_coding -23117,SLC25A40,ENSG00000075303,protein_coding -53165,TIMM21,ENSG00000075336,protein_coding -6641,ADD2,ENSG00000075340,protein_coding -33767,FGF4,ENSG00000075388,protein_coding -4026,RASAL2,ENSG00000075391,protein_coding -48966,VPS9D1,ENSG00000075399,protein_coding -35807,ZNF37A,ENSG00000075407,protein_coding -43926,MARK3,ENSG00000075413,protein_coding -39664,SLC25A3,ENSG00000075415,protein_coding -12323,FNDC3B,ENSG00000075420,protein_coding -5957,FOSL2,ENSG00000075426,protein_coding -51494,CACNG5,ENSG00000075429,protein_coding -51496,CACNG4,ENSG00000075461,protein_coding -13567,FRYL,ENSG00000075539,protein_coding -7249,TMEM131,ENSG00000075568,protein_coding -21671,FSCN1,ENSG00000075618,protein_coding -21668,ACTB,ENSG00000075624,protein_coding -52682,MOCOS,ENSG00000075643,protein_coding -12318,PLD1,ENSG00000075651,protein_coding -40713,ATP12A,ENSG00000075673,protein_coding -56318,WDR62,ENSG00000075702,protein_coding -12813,DLG1,ENSG00000075711,protein_coding -11613,RAB7A,ENSG00000075785,protein_coding -23561,BCAP29,ENSG00000075790,protein_coding -36910,SEC31B,ENSG00000075826,protein_coding -39839,SART3,ENSG00000075856,protein_coding -8026,ARHGAP15,ENSG00000075884,protein_coding -7891,TUBA3D,ENSG00000075886,protein_coding -36915,PAX2,ENSG00000075891,protein_coding -10345,EXOSC7,ENSG00000075914,protein_coding -3881,KIFAP3,ENSG00000075945,protein_coding -9845,MKRN2,ENSG00000075975,protein_coding -7970,MCM6,ENSG00000076003,protein_coding -34660,REXO2,ENSG00000076043,protein_coding -34658,RBM7,ENSG00000076053,protein_coding -38961,RBMS2,ENSG00000076067,protein_coding -38963,BAZ2A,ENSG00000076108,protein_coding -10412,PTPN23,ENSG00000076201,protein_coding -10150,MLH1,ENSG00000076242,protein_coding -39862,UNG,ENSG00000076248,protein_coding -3911,FMO4,ENSG00000076258,protein_coding -3960,KLHL20,ENSG00000076321,protein_coding -46462,RGS11,ENSG00000076344,protein_coding -49994,SLC46A1,ENSG00000076351,protein_coding -4588,PLXNA2,ENSG00000076356,protein_coding -50006,SPAG5,ENSG00000076382,protein_coding -39887,ANKRD13A,ENSG00000076513,protein_coding -28427,TPD52,ENSG00000076554,protein_coding -39864,ACACB,ENSG00000076555,protein_coding -50029,TRAF4,ENSG00000076604,protein_coding -28454,PAG1,ENSG00000076641,protein_coding -56162,GPATCH1,ENSG00000076650,protein_coding -55314,ICAM3,ENSG00000076662,protein_coding -36999,NT5C2,ENSG00000076685,protein_coding -34809,AP002956.1,ENSG00000076706,protein_coding -26483,GPC4,ENSG00000076716,protein_coding -26473,MBNL3,ENSG00000076770,protein_coding -55154,CAMSAP3,ENSG00000076826,protein_coding -701,RAP1GAP,ENSG00000076864,protein_coding -55156,XAB2,ENSG00000076924,protein_coding -56620,ARHGEF1,ENSG00000076928,protein_coding -55161,STXBP2,ENSG00000076944,protein_coding -55182,MAP2K7,ENSG00000076984,protein_coding -54991,NMRK2,ENSG00000077009,protein_coding -9458,DGKD,ENSG00000077044,protein_coding -23695,CTTNBP2,ENSG00000077063,protein_coding -23396,ACTL6B,ENSG00000077080,protein_coding -10011,RARB,ENSG00000077092,protein_coding -10016,TOP2B,ENSG00000077097,protein_coding -36817,TM9SF3,ENSG00000077147,protein_coding -36967,NFKB2,ENSG00000077150,protein_coding -4402,UBE2T,ENSG00000077152,protein_coding -4403,PPP1R12B,ENSG00000077157,protein_coding -8581,DNAJC10,ENSG00000077232,protein_coding -47350,GTF3C1,ENSG00000077235,protein_coding -47347,IL4R,ENSG00000077238,protein_coding -2067,USP33,ENSG00000077254,protein_coding -26155,PAK3,ENSG00000077264,protein_coding -26158,CAPN6,ENSG00000077274,protein_coding -26159,DCX,ENSG00000077279,protein_coding -56546,SNRPA,ENSG00000077312,protein_coding -35534,SPAG6,ENSG00000077327,protein_coding -56585,EXOSC5,ENSG00000077348,protein_coding -8379,DYNC1I2,ENSG00000077380,protein_coding -35585,APBB1IP,ENSG00000077420,protein_coding -23390,LRCH4,ENSG00000077454,protein_coding -34371,FAM76B,ENSG00000077458,protein_coding -55002,AC016586.1,ENSG00000077463,protein_coding -34200,TYR,ENSG00000077498,protein_coding -33939,POLD3,ENSG00000077514,protein_coding -5190,ACTN2,ENSG00000077522,protein_coding -636,CAPZB,ENSG00000077549,protein_coding -5179,GPR137B,ENSG00000077585,protein_coding -34257,NAALAD2,ENSG00000077616,protein_coding -14671,JADE1,ENSG00000077684,protein_coding -26271,SLC25A43,ENSG00000077713,protein_coding -26279,UBE2A,ENSG00000077721,protein_coding -27806,FGFR1,ENSG00000077782,protein_coding -22891,FKBP6,ENSG00000077800,protein_coding -59505,SMC1B,ENSG00000077935,protein_coding -59508,FBLN1,ENSG00000077942,protein_coding -35430,ITGA8,ENSG00000077943,protein_coding -53784,CST7,ENSG00000077984,protein_coding -9018,MAP2,ENSG00000078018,protein_coding -52773,PIAS2,ENSG00000078043,protein_coding -22237,AMPH,ENSG00000078053,protein_coding -25159,ARAF,ENSG00000078061,protein_coding -12483,MCCC1,ENSG00000078070,protein_coding -12485,LAMP3,ENSG00000078081,protein_coding -8273,FAP,ENSG00000078098,protein_coding -35492,NEBL,ENSG00000078114,protein_coding -34018,ACER3,ENSG00000078124,protein_coding -13440,UBE2K,ENSG00000078140,protein_coding -52722,PIK3C3,ENSG00000078142,protein_coding -13452,N4BP2,ENSG00000078177,protein_coding -37588,TIGAR,ENSG00000078237,protein_coding -37556,TULP3,ENSG00000078246,protein_coding -21306,SYNJ2,ENSG00000078269,protein_coding -15654,ADCY2,ENSG00000078295,protein_coding -43875,PPP2R5C,ENSG00000078304,protein_coding -46836,RBFOX1,ENSG00000078328,protein_coding -139,GNB1,ENSG00000078369,protein_coding -21998,HOXA9,ENSG00000078399,protein_coding -18725,EDN1,ENSG00000078401,protein_coding -35510,MLLT10,ENSG00000078403,protein_coding -23375,ZCWPW1,ENSG00000078487,protein_coding -22099,ADCYAP1R1,ENSG00000078549,protein_coding -27382,FGF20,ENSG00000078579,protein_coding -25778,P2RY10,ENSG00000078589,protein_coding -25783,ITM2A,ENSG00000078596,protein_coding -1626,NRDC,ENSG00000078618,protein_coding -27886,VDAC3,ENSG00000078668,protein_coding -27408,PCM1,ENSG00000078674,protein_coding -51812,TNRC6C,ENSG00000078687,protein_coding -53955,CBFA2T2,ENSG00000078699,protein_coding -31223,BRINP1,ENSG00000078725,protein_coding -53984,ITCH,ENSG00000078747,protein_coding -17530,PKD2L2,ENSG00000078795,protein_coding -53998,TP53INP2,ENSG00000078804,protein_coding -86,SDF4,ENSG00000078808,protein_coding -54004,MYH7B,ENSG00000078814,protein_coding -53938,BPIFB2,ENSG00000078898,protein_coding -196,TP73,ENSG00000078900,protein_coding -31877,TOLLIP,ENSG00000078902,protein_coding -22306,UBE2D4,ENSG00000078967,protein_coding -52070,CLUL1,ENSG00000079101,protein_coding -28601,RUNX1T1,ENSG00000079102,protein_coding -28641,CDH17,ENSG00000079112,protein_coding -52062,THOC1,ENSG00000079134,protein_coding -8529,FKBP7,ENSG00000079150,protein_coding -8524,OSBPL6,ENSG00000079156,protein_coding -16062,SLC1A3,ENSG00000079215,protein_coding -9085,XRCC5,ENSG00000079246,protein_coding -12150,LXN,ENSG00000079257,protein_coding -9355,SP140,ENSG00000079263,protein_coding -1522,MKNK1,ENSG00000079277,protein_coding -9124,TNS1,ENSG00000079308,protein_coding -54872,REXO1,ENSG00000079313,protein_coding -36273,SAR1A,ENSG00000079332,protein_coding -2432,CDC14A,ENSG00000079335,protein_coding -38484,RAPGEF3,ENSG00000079337,protein_coding -56650,CEACAM1,ENSG00000079385,protein_coding -38502,SENP1,ENSG00000079387,protein_coding -36414,DUSP13,ENSG00000079393,protein_coding -56637,CIC,ENSG00000079432,protein_coding -56645,LIPE,ENSG00000079435,protein_coding -27277,FDFT1,ENSG00000079459,protein_coding -56638,PAFAH1B3,ENSG00000079462,protein_coding -25541,OPHN1,ENSG00000079482,protein_coding -13924,AFM,ENSG00000079557,protein_coding -47461,KIF22,ENSG00000079616,protein_coding -18910,SCGN,ENSG00000079689,protein_coding -18906,CARMIL1,ENSG00000079691,protein_coding -1876,PGM1,ENSG00000079739,protein_coding -5703,DDX1,ENSG00000079785,protein_coding -55333,DNM2,ENSG00000079805,protein_coding -20856,EPB41L2,ENSG00000079819,protein_coding -20109,RIMS1,ENSG00000079841,protein_coding -20881,MOXD1,ENSG00000079931,protein_coding -20882,STX7,ENSG00000079950,protein_coding -59648,RABL2B,ENSG00000079974,protein_coding -55319,KEAP1,ENSG00000079999,protein_coding -20136,DDX43,ENSG00000080007,protein_coding -57494,AC010327.1,ENSG00000080031,protein_coding -41616,DCT,ENSG00000080166,protein_coding -54316,SLC35C2,ENSG00000080189,protein_coding -11084,CRYBG3,ENSG00000080200,protein_coding -11079,EPHA6,ENSG00000080224,protein_coding -7649,SCTR,ENSG00000080293,protein_coding -29513,RFX3,ENSG00000080298,protein_coding -8127,RIF1,ENSG00000080345,protein_coding -39293,RAB21,ENSG00000080371,protein_coding -13904,SLC4A4,ENSG00000080493,protein_coding -29499,SMARCA2,ENSG00000080503,protein_coding -55286,RDH8,ENSG00000080511,protein_coding -20571,SESN1,ENSG00000080546,protein_coding -26114,MID2,ENSG00000080561,protein_coding -26105,PIH1D3,ENSG00000080572,protein_coding -55284,COL5A3,ENSG00000080573,protein_coding -47551,SRCAP,ENSG00000080603,protein_coding -29508,PUM3,ENSG00000080608,protein_coding -41094,CPB2,ENSG00000080618,protein_coding -45847,CHRNA3,ENSG00000080644,protein_coding -17137,KCNN2,ENSG00000080709,protein_coding -23981,CNOT4,ENSG00000080802,protein_coding -43221,PSEN1,ENSG00000080815,protein_coding -11117,CPOX,ENSG00000080819,protein_coding -11111,CLDND1,ENSG00000080822,protein_coding -43888,MOK,ENSG00000080823,protein_coding -43886,HSP90AA1,ENSG00000080824,protein_coding -54078,RBL1,ENSG00000080839,protein_coding -54062,DLGAP4,ENSG00000080845,protein_coding -35122,IGSF9B,ENSG00000080854,protein_coding -4290,CFHR2,ENSG00000080910,protein_coding -52107,NDC80,ENSG00000080986,protein_coding -45158,AP4E1,ENSG00000081014,protein_coding -2686,RSBN1,ENSG00000081019,protein_coding -2681,MAGI3,ENSG00000081026,protein_coding -13946,CXCL2,ENSG00000081041,protein_coding -13923,AFP,ENSG00000081051,protein_coding -9303,COL4A4,ENSG00000081052,protein_coding -17434,TCF7,ENSG00000081059,protein_coding -20542,OSTM1,ENSG00000081087,protein_coding -53081,CDH7,ENSG00000081138,protein_coding -11156,IMPG2,ENSG00000081148,protein_coding -11164,PCNP,ENSG00000081154,protein_coding -43140,EXD2,ENSG00000081177,protein_coding -43099,ARG2,ENSG00000081181,protein_coding -16848,MEF2C,ENSG00000081189,protein_coding -4314,PTPRC,ENSG00000081237,protein_coding -4356,CACNA1S,ENSG00000081248,protein_coding -4361,PKP1,ENSG00000081277,protein_coding -11717,UBA5,ENSG00000081307,protein_coding -8730,STK17B,ENSG00000081320,protein_coding -30862,CDC14B,ENSG00000081377,protein_coding -30866,ZNF510,ENSG00000081386,protein_coding -8334,LRP2,ENSG00000081479,protein_coding -55852,ZNF506,ENSG00000081665,protein_coding -4941,JMJD4,ENSG00000081692,protein_coding -3713,DUSP12,ENSG00000081721,protein_coding -40303,AACS,ENSG00000081760,protein_coding -17743,DELE1,ENSG00000081791,protein_coding -23744,SLC13A1,ENSG00000081800,protein_coding -23735,CADPS2,ENSG00000081803,protein_coding -17673,PCDHB4,ENSG00000081818,protein_coding -17650,PCDHA6,ENSG00000081842,protein_coding -17702,PCDHGA2,ENSG00000081853,protein_coding -1708,HSPB11,ENSG00000081870,protein_coding -53048,PHLPP1,ENSG00000081913,protein_coding -52953,ATP8B1,ENSG00000081923,protein_coding -1933,IL12RB2,ENSG00000081985,protein_coding -24425,SMARCD3,ENSG00000082014,protein_coding -16080,WDR70,ENSG00000082068,protein_coding -16110,FYB1,ENSG00000082074,protein_coding -8844,MPP4,ENSG00000082126,protein_coding -8830,STRADB,ENSG00000082146,protein_coding -8797,BZW1,ENSG00000082153,protein_coding -34404,PGR,ENSG00000082175,protein_coding -16019,C1QTNF3,ENSG00000082196,protein_coding -52871,ME2,ENSG00000082212,protein_coding -15965,C5orf22,ENSG00000082213,protein_coding -7957,CCNT2,ENSG00000082258,protein_coding -20079,FAM135A,ENSG00000082269,protein_coding -20072,COL19A1,ENSG00000082293,protein_coding -52168,EPB41L3,ENSG00000082397,protein_coding -8287,COBLL1,ENSG00000082438,protein_coding -25588,DLG3,ENSG00000082458,protein_coding -4721,KCNK2,ENSG00000082482,protein_coding -4623,SERTAD4,ENSG00000082497,protein_coding -4641,TRAF5,ENSG00000082512,protein_coding -17977,MRPL22,ENSG00000082515,protein_coding -17976,GEMIN5,ENSG00000082516,protein_coding -28030,OPRK1,ENSG00000082556,protein_coding -50952,NFE2L1,ENSG00000082641,protein_coding -11477,SEMA5B,ENSG00000082684,protein_coding -11409,GSK3B,ENSG00000082701,protein_coding -11501,ITGB5,ENSG00000082781,protein_coding -37510,ERC1,ENSG00000082805,protein_coding -6449,XPO1,ENSG00000082898,protein_coding -11988,RNF13,ENSG00000082996,protein_coding -30449,TRPM3,ENSG00000083067,protein_coding -47282,PALB2,ENSG00000083093,protein_coding -20254,DOP1A,ENSG00000083097,protein_coding -20365,LYRM2,ENSG00000083099,protein_coding -20227,BCKDHB,ENSG00000083123,protein_coding -27873,KAT6A,ENSG00000083168,protein_coding -30633,TUT7,ENSG00000083223,protein_coding -49796,ULK2,ENSG00000083290,protein_coding -28805,GRHL2,ENSG00000083307,protein_coding -16564,TNPO1,ENSG00000083312,protein_coding -414,PLOD1,ENSG00000083444,protein_coding -49147,P2RX5,ENSG00000083454,protein_coding -49148,ITGAE,ENSG00000083457,protein_coding -41401,DIS3,ENSG00000083520,protein_coding -41402,PIBF1,ENSG00000083535,protein_coding -41307,TDRD3,ENSG00000083544,protein_coding -41062,NUFIP1,ENSG00000083635,protein_coding -40875,PDS5B,ENSG00000083642,protein_coding -16141,OXCT1,ENSG00000083720,protein_coding -25456,RRAGB,ENSG00000083750,protein_coding -39507,EPYC,ENSG00000083782,protein_coding -47965,CYLD,ENSG00000083799,protein_coding -57727,SLC27A5,ENSG00000083807,protein_coding -57724,ZNF324,ENSG00000083812,protein_coding -57659,ZNF671,ENSG00000083814,protein_coding -57648,ZNF416,ENSG00000083817,protein_coding -57661,ZNF586,ENSG00000083828,protein_coding -57725,ZNF446,ENSG00000083838,protein_coding -57617,ZNF264,ENSG00000083844,protein_coding -57713,RPS5,ENSG00000083845,protein_coding -15433,FAT1,ENSG00000083857,protein_coding -13817,YTHDC1,ENSG00000083896,protein_coding -11023,CHMP2B,ENSG00000083937,protein_coding -1302,SMAP2,ENSG00000084070,protein_coding -1274,PPIE,ENSG00000084072,protein_coding -1298,ZMPSTE24,ENSG00000084073,protein_coding -7189,STARD7,ENSG00000084090,protein_coding -13705,NOA1,ENSG00000084092,protein_coding -13703,REST,ENSG00000084093,protein_coding -39612,HAL,ENSG00000084110,protein_coding -39850,SSH1,ENSG00000084112,protein_coding -33690,GSTP1,ENSG00000084207,protein_coding -35074,APLP2,ENSG00000084234,protein_coding -37969,FAM234B,ENSG00000084444,protein_coding -38082,SLCO1A2,ENSG00000084453,protein_coding -37998,WBP11,ENSG00000084463,protein_coding -1064,EIF3I,ENSG00000084623,protein_coding -1023,NKAIN1,ENSG00000084628,protein_coding -1041,COL16A1,ENSG00000084636,protein_coding -1058,TXLNA,ENSG00000084652,protein_coding -5789,APOB,ENSG00000084674,protein_coding -5841,NCOA1,ENSG00000084676,protein_coding -5901,AGBL5,ENSG00000084693,protein_coding -5854,EFR3B,ENSG00000084710,protein_coding -5870,KIF3C,ENSG00000084731,protein_coding -5873,RAB10,ENSG00000084733,protein_coding -5937,GCKR,ENSG00000084734,protein_coding -5881,HADHA,ENSG00000084754,protein_coding -5898,MAPRE3,ENSG00000084764,protein_coding -5917,CAD,ENSG00000084774,protein_coding -32652,CD59,ENSG00000085063,protein_coding -32780,CD82,ENSG00000085117,protein_coding -26427,BCORL1,ENSG00000085185,protein_coding -25758,ATRX,ENSG00000085224,protein_coding -16497,AK6,ENSG00000085231,protein_coding -31627,FCN1,ENSG00000085265,protein_coding -12271,MYNN,ENSG00000085274,protein_coding -12261,MECOM,ENSG00000085276,protein_coding -16685,SCAMP1,ENSG00000085365,protein_coding -20493,PREP,ENSG00000085377,protein_coding -20483,HACE1,ENSG00000085382,protein_coding -52348,SEH1L,ENSG00000085415,protein_coding -2527,WDR47,ENSG00000085433,protein_coding -9278,WDFY1,ENSG00000085449,protein_coding -2627,OVGP1,ENSG00000085465,protein_coding -2502,SLC25A24,ENSG00000085491,protein_coding -21383,MAP3K4,ENSG00000085511,protein_coding -23373,PILRA,ENSG00000085514,protein_coding -3613,IGSF9,ENSG00000085552,protein_coding -23114,ABCB1,ENSG00000085563,protein_coding -46733,ZNF213,ENSG00000085644,protein_coding -23962,AKR1B1,ENSG00000085662,protein_coding -28545,CPNE3,ENSG00000085719,protein_coding -47039,RRN3,ENSG00000085721,protein_coding -33789,CTTN,ENSG00000085733,protein_coding -33998,WNT11,ENSG00000085741,protein_coding -6364,MTIF2,ENSG00000085760,protein_coding -27799,DDHD2,ENSG00000085788,protein_coding -1613,TTC39A,ENSG00000085831,protein_coding -1615,EPS15,ENSG00000085832,protein_coding -1653,ORC1,ENSG00000085840,protein_coding -14787,MGST2,ENSG00000085871,protein_coding -55667,CHERP,ENSG00000085872,protein_coding -9453,ATG16L1,ENSG00000085978,protein_coding -9460,USP40,ENSG00000085982,protein_coding -1508,POMGNT1,ENSG00000085998,protein_coding -1510,RAD54L,ENSG00000085999,protein_coding -1499,MAST2,ENSG00000086015,protein_coding -29887,DNAJA1,ENSG00000086061,protein_coding -29889,B4GALT1,ENSG00000086062,protein_coding -29894,CHMP5,ENSG00000086065,protein_coding -29895,NFX1,ENSG00000086102,protein_coding -38615,AQP6,ENSG00000086159,protein_coding -16399,DIMT1,ENSG00000086189,protein_coding -16400,IPO11,ENSG00000086200,protein_coding -32932,FOLH1,ENSG00000086205,protein_coding -21684,EIF2AK1,ENSG00000086232,protein_coding -22210,NME8,ENSG00000086288,protein_coding -22207,EPDR1,ENSG00000086289,protein_coding -21967,SNX10,ENSG00000086300,protein_coding -35383,SEPHS1,ENSG00000086475,protein_coding -46467,MRPL28,ENSG00000086504,protein_coding -46457,HBQ1,ENSG00000086506,protein_coding -56544,ITPKC,ENSG00000086544,protein_coding -56610,CEACAM6,ENSG00000086548,protein_coding -17927,FAT2,ENSG00000086570,protein_coding -17904,RBM22,ENSG00000086589,protein_coding -40251,TMED2,ENSG00000086598,protein_coding -5180,ERO1B,ENSG00000086619,protein_coding -45891,ZFAND6,ENSG00000086666,protein_coding -48739,HSD17B2,ENSG00000086696,protein_coding -24761,TXLNG,ENSG00000086712,protein_coding -24790,PPEF1,ENSG00000086717,protein_coding -22919,LAT2,ENSG00000086730,protein_coding -25404,HUWE1,ENSG00000086758,protein_coding -34638,ZW10,ENSG00000086827,protein_coding -34579,ALG9,ENSG00000086848,protein_coding -57101,MYBPC2,ENSG00000086967,protein_coding -34204,NOX4,ENSG00000086991,protein_coding -13063,ACOX3,ENSG00000087008,protein_coding -34373,MTMR2,ENSG00000087053,protein_coding -56972,PPP1R15A,ENSG00000087074,protein_coding -56970,HSD17B14,ENSG00000087076,protein_coding -23406,TRIP6,ENSG00000087077,protein_coding -23410,ACHE,ENSG00000087085,protein_coding -56980,FTL,ENSG00000087086,protein_coding -23408,SRRT,ENSG00000087087,protein_coding -56978,BAX,ENSG00000087088,protein_coding -49965,NLK,ENSG00000087095,protein_coding -50003,PIGS,ENSG00000087111,protein_coding -18385,ADAMTS2,ENSG00000087116,protein_coding -13819,TMPRSS11E,ENSG00000087128,protein_coding -50784,ATXN7L3,ENSG00000087152,protein_coding -51835,PGS1,ENSG00000087157,protein_coding -51410,PSMC5,ENSG00000087191,protein_coding -18304,UIMC1,ENSG00000087206,protein_coding -48117,CETP,ENSG00000087237,protein_coding -48059,MMP2,ENSG00000087245,protein_coding -48089,MT3,ENSG00000087250,protein_coding -48063,LPCAT2,ENSG00000087253,protein_coding -48075,GNAO1,ENSG00000087258,protein_coding -48085,OGFOD1,ENSG00000087263,protein_coding -12948,SH3BP2,ENSG00000087266,protein_coding -12953,NOP14,ENSG00000087269,protein_coding -12949,ADD1,ENSG00000087274,protein_coding -42749,L2HGDH,ENSG00000087299,protein_coding -42811,TXNDC16,ENSG00000087301,protein_coding -42803,RTRAF,ENSG00000087302,protein_coding -42804,NID2,ENSG00000087303,protein_coding -6617,GMCL1,ENSG00000087338,protein_coding -33598,SF3B2,ENSG00000087365,protein_coding -38201,KLHL42,ENSG00000087448,protein_coding -54565,GNAS,ENSG00000087460,protein_coding -38304,DNM1L,ENSG00000087470,protein_coding -38206,PTHLH,ENSG00000087494,protein_coding -54588,PHACTR3,ENSG00000087495,protein_coding -38225,ERGIC2,ENSG00000087502,protein_coding -54510,TFAP2C,ENSG00000087510,protein_coding -54498,AURKA,ENSG00000087586,protein_coding -54500,CASS4,ENSG00000087589,protein_coding -24734,PIR,ENSG00000087842,protein_coding -34037,AAMDC,ENSG00000087884,protein_coding -55082,RFX2,ENSG00000087903,protein_coding -51359,METTL2A,ENSG00000087995,protein_coding -56951,SULT2B1,ENSG00000088002,protein_coding -1863,ALG6,ENSG00000088035,protein_coding -57403,CNOT3,ENSG00000088038,protein_coding -57479,GP6,ENSG00000088053,protein_coding -7654,PTPN4,ENSG00000088179,protein_coding -7627,DDX18,ENSG00000088205,protein_coding -55102,KHSRP,ENSG00000088247,protein_coding -54948,GNA11,ENSG00000088256,protein_coding -759,ASAP3,ENSG00000088280,protein_coding -54009,EDEM2,ENSG00000088298,protein_coding -53932,DNMT3B,ENSG00000088305,protein_coding -53880,REM1,ENSG00000088320,protein_coding -53895,TPX2,ENSG00000088325,protein_coding -53902,PDRG1,ENSG00000088356,protein_coding -54058,EPB41L1,ENSG00000088367,protein_coding -41682,SLC15A1,ENSG00000088386,protein_coding -41683,DOCK9,ENSG00000088387,protein_coding -41845,ANKRD10,ENSG00000088448,protein_coding -41617,TGDS,ENSG00000088451,protein_coding -10574,DOCK3,ENSG00000088538,protein_coding -10568,C3orf18,ENSG00000088543,protein_coding -48138,COQ9,ENSG00000088682,protein_coding -9849,TMEM40,ENSG00000088726,protein_coding -10407,KIF9,ENSG00000088727,protein_coding -52191,ARHGAP28,ENSG00000088756,protein_coding -53481,CRLS1,ENSG00000088766,protein_coding -53296,DEFB127,ENSG00000088782,protein_coding -43946,PPP1R13B,ENSG00000088808,protein_coding -53407,ATRN,ENSG00000088812,protein_coding -53430,SMOX,ENSG00000088826,protein_coding -53411,SIGLEC1,ENSG00000088827,protein_coding -53334,FKBP1A,ENSG00000088832,protein_coding -53336,NSFL1C,ENSG00000088833,protein_coding -53400,SLC4A11,ENSG00000088836,protein_coding -53402,C20orf194,ENSG00000088854,protein_coding -53369,ZNF343,ENSG00000088876,protein_coding -53380,EBF4,ENSG00000088881,protein_coding -53382,CPXM1,ENSG00000088882,protein_coding -53420,MAVS,ENSG00000088888,protein_coding -53397,LZTS3,ENSG00000088899,protein_coding -15426,F11,ENSG00000088926,protein_coding -53698,XRN2,ENSG00000088930,protein_coding -53691,KIZ,ENSG00000088970,protein_coding -40127,DYNLL1,ENSG00000088986,protein_coding -40051,TESC,ENSG00000088992,protein_coding -53622,SNX5,ENSG00000089006,protein_coding -39957,AC004086.1,ENSG00000089009,protein_coding -53344,SIRPG,ENSG00000089012,protein_coding -39936,MAPKAPK5,ENSG00000089022,protein_coding -40161,P2RX7,ENSG00000089041,protein_coding -53570,ESF1,ENSG00000089048,protein_coding -53648,RBBP9,ENSG00000089050,protein_coding -40166,ANAPC5,ENSG00000089053,protein_coding -53443,SLC23A2,ENSG00000089057,protein_coding -39980,SLC8B1,ENSG00000089060,protein_coding -53450,TMEM230,ENSG00000089063,protein_coding -53642,DZANK1,ENSG00000089091,protein_coding -40169,KDM2B,ENSG00000089094,protein_coding -53674,CFAP61,ENSG00000089101,protein_coding -39984,LHX5,ENSG00000089116,protein_coding -53568,TASP1,ENSG00000089123,protein_coding -39961,AC004551.1,ENSG00000089127,protein_coding -40104,GCN1,ENSG00000089154,protein_coding -40106,RPLP0,ENSG00000089157,protein_coding -40108,PXN,ENSG00000089159,protein_coding -40115,SIRT4,ENSG00000089163,protein_coding -39959,RPH3A,ENSG00000089169,protein_coding -53594,KIF16B,ENSG00000089177,protein_coding -53473,TRMT6,ENSG00000089195,protein_coding -53471,CHGB,ENSG00000089199,protein_coding -40065,PEBP1,ENSG00000089220,protein_coding -39996,TBX5,ENSG00000089225,protein_coding -39929,BRAP,ENSG00000089234,protein_coding -39943,ERP29,ENSG00000089248,protein_coding -40054,NOS1,ENSG00000089250,protein_coding -47600,FUS,ENSG00000089280,protein_coding -25572,IGBP1,ENSG00000089289,protein_coding -56243,FXYD5,ENSG00000089327,protein_coding -56211,ZNF302,ENSG00000089335,protein_coding -56228,GRAMD1A,ENSG00000089351,protein_coding -56236,FXYD3,ENSG00000089356,protein_coding -25530,HEPH,ENSG00000089472,protein_coding -46798,CDIP1,ENSG00000089486,protein_coding -48262,CMTM1,ENSG00000089505,protein_coding -50650,KCNH4,ENSG00000089558,protein_coding -33360,GANAB,ENSG00000089597,protein_coding -55838,GMIP,ENSG00000089639,protein_coding -26101,RBM41,ENSG00000089682,protein_coding -51825,BIRC5,ENSG00000089685,protein_coding -37669,LAG3,ENSG00000089692,protein_coding -37667,MLF2,ENSG00000089693,protein_coding -43596,OTUB2,ENSG00000089723,protein_coding -43597,DDX24,ENSG00000089737,protein_coding -43035,ZBTB25,ENSG00000089775,protein_coding -37747,NECAP1,ENSG00000089818,protein_coding -26877,ARHGAP4,ENSG00000089820,protein_coding -55003,ANKRD24,ENSG00000089847,protein_coding -37343,DHX32,ENSG00000089876,protein_coding -43897,RCOR1,ENSG00000089902,protein_coding -43332,GPATCH2L,ENSG00000089916,protein_coding -56540,LTBP4,ENSG00000090006,protein_coding -56536,BLVRB,ENSG00000090013,protein_coding -898,SLC9A1,ENSG00000090020,protein_coding -30732,SPTLC1,ENSG00000090054,protein_coding -43668,PAPOLA,ENSG00000090060,protein_coding -43705,CCNK,ENSG00000090061,protein_coding -10603,PCBP4,ENSG00000090097,protein_coding -4253,RGS1,ENSG00000090104,protein_coding -47492,YPEL3,ENSG00000090238,protein_coding -24085,MRPS33,ENSG00000090263,protein_coding -24079,NDUFB2,ENSG00000090266,protein_coding -890,NUDC,ENSG00000090273,protein_coding -12908,MAEA,ENSG00000090316,protein_coding -55304,ICAM1,ENSG00000090339,protein_coding -56871,STRN4,ENSG00000090372,protein_coding -39183,IRAK3,ENSG00000090376,protein_coding -39248,LYZ,ENSG00000090382,protein_coding -12215,SI,ENSG00000090402,protein_coding -668,MUL1,ENSG00000090432,protein_coding -46783,TFAP4,ENSG00000090447,protein_coding -45469,PDCD7,ENSG00000090470,protein_coding -45459,SPG21,ENSG00000090487,protein_coding -12582,FETUB,ENSG00000090512,protein_coding -12576,DNAJB11,ENSG00000090520,protein_coding -12639,P3H2,ENSG00000090530,protein_coding -12537,THPO,ENSG00000090534,protein_coding -12538,CHRD,ENSG00000090539,protein_coding -57028,FLT3LG,ENSG00000090554,protein_coding -46473,RAB11FIP3,ENSG00000090565,protein_coding -46553,GNPTG,ENSG00000090581,protein_coding -40531,ZNF268,ENSG00000090612,protein_coding -40508,GOLGA3,ENSG00000090615,protein_coding -1267,PABPC4,ENSG00000090621,protein_coding -55169,CD209,ENSG00000090659,protein_coding -55197,CERS4,ENSG00000090661,protein_coding -55151,MCOLN1,ENSG00000090674,protein_coding -702,USP48,ENSG00000090686,protein_coding -25554,EFNB1,ENSG00000090776,protein_coding -48447,PDPR,ENSG00000090857,protein_coding -48453,AARS,ENSG00000090861,protein_coding -48571,GLG1,ENSG00000090863,protein_coding -25583,KIF4A,ENSG00000090889,protein_coding -47304,TNRC6A,ENSG00000090905,protein_coding -56475,PLEKHG2,ENSG00000090924,protein_coding -56479,DLL3,ENSG00000090932,protein_coding -57525,NAT14,ENSG00000090971,protein_coding -40228,PITPNM2,ENSG00000090975,protein_coding -13673,EXOC1,ENSG00000090989,protein_coding -17800,RBM27,ENSG00000091009,protein_coding -17803,POU4F3,ENSG00000091010,protein_coding -39357,OSBPL8,ENSG00000091039,protein_coding -22991,DTX2,ENSG00000091073,protein_coding -6021,NLRC4,ENSG00000091106,protein_coding -23528,PUS7,ENSG00000091127,protein_coding -23575,LAMB4,ENSG00000091128,protein_coding -23576,NRCAM,ENSG00000091129,protein_coding -23572,LAMB1,ENSG00000091136,protein_coding -23566,SLC26A4,ENSG00000091137,protein_coding -23569,SLC26A3,ENSG00000091138,protein_coding -23571,DLD,ENSG00000091140,protein_coding -52934,WDR7,ENSG00000091157,protein_coding -52933,TXNL1,ENSG00000091164,protein_coding -9694,IL5RA,ENSG00000091181,protein_coding -47070,ABCC6,ENSG00000091262,protein_coding -10092,CMTM6,ENSG00000091317,protein_coding -8391,ITGA6,ENSG00000091409,protein_coding -8400,RAPGEF4,ENSG00000091428,protein_coding -8402,MAP3K20,ENSG00000091436,protein_coding -24822,SMPX,ENSG00000091482,protein_coding -5256,FH,ENSG00000091483,protein_coding -13311,SEL1L3,ENSG00000091490,protein_coding -11735,TF,ENSG00000091513,protein_coding -11730,CDV3,ENSG00000091527,protein_coding -49679,MYO15A,ENSG00000091536,protein_coding -49681,ALKBH5,ENSG00000091542,protein_coding -51484,APOH,ENSG00000091583,protein_coding -49239,NLRP1,ENSG00000091592,protein_coding -49254,PITPNM3,ENSG00000091622,protein_coding -49206,SPAG7,ENSG00000091640,protein_coding -47862,ORC6,ENSG00000091651,protein_coding -28394,ZFHX4,ENSG00000091656,protein_coding -32512,SLC17A6,ENSG00000091664,protein_coding -23895,CPA1,ENSG00000091704,protein_coding -23884,ZC3HC1,ENSG00000091732,protein_coding -21222,ESR1,ENSG00000091831,protein_coding -21240,RGS17,ENSG00000091844,protein_coding -27065,ANGPT2,ENSG00000091879,protein_coding -50773,TMEM101,ENSG00000091947,protein_coding -11291,CD200,ENSG00000091972,protein_coding -11298,CCDC80,ENSG00000091986,protein_coding -42401,CMA1,ENSG00000092009,protein_coding -42357,PSME1,ENSG00000092010,protein_coding -42540,PPP2R3C,ENSG00000092020,protein_coding -42291,HAUS4,ENSG00000092036,protein_coding -42334,JPH4,ENSG00000092051,protein_coding -42324,MYH7,ENSG00000092054,protein_coding -42305,CEBPE,ENSG00000092067,protein_coding -42306,SLC7A8,ENSG00000092068,protein_coding -42051,OSGEP,ENSG00000092094,protein_coding -42317,SLC22A17,ENSG00000092096,protein_coding -42362,RNF31,ENSG00000092098,protein_coding -42463,SCFD1,ENSG00000092108,protein_coding -42462,G2E3,ENSG00000092140,protein_coding -42476,HECTD1,ENSG00000092148,protein_coding -42109,HNRNPC,ENSG00000092199,protein_coding -42110,RPGRIP1,ENSG00000092200,protein_coding -42111,SUPT16H,ENSG00000092201,protein_coding -42120,TOX4,ENSG00000092203,protein_coding -42621,GEMIN2,ENSG00000092208,protein_coding -42382,TGM1,ENSG00000092295,protein_coding -42381,AL096870.1,ENSG00000092330,protein_coding -9937,DAZL,ENSG00000092345,protein_coding -57798,TBL1Y,ENSG00000092377,protein_coding -17179,SEMA6A,ENSG00000092421,protein_coding -45150,TRPM7,ENSG00000092439,protein_coding -44896,TYRO3,ENSG00000092445,protein_coding -44991,WDR76,ENSG00000092470,protein_coding -44926,CAPN3,ENSG00000092529,protein_coding -44929,SNAP23,ENSG00000092531,protein_coding -2790,TBX15,ENSG00000092607,protein_coding -2819,PHGDH,ENSG00000092621,protein_coding -54658,COL9A3,ENSG00000092758,protein_coding -21325,EZR,ENSG00000092820,protein_coding -38932,MYL6,ENSG00000092841,protein_coding -1162,AGO1,ENSG00000092847,protein_coding -1169,TEKT2,ENSG00000092850,protein_coding -1159,CLSPN,ENSG00000092853,protein_coding -50275,RFFL,ENSG00000092871,protein_coding -51714,UNC13D,ENSG00000092929,protein_coding -51776,MFSD11,ENSG00000092931,protein_coding -27575,DPYSL2,ENSG00000092964,protein_coding -4745,TGFB2,ENSG00000092969,protein_coding -4732,GPATCH2,ENSG00000092978,protein_coding -59497,NUP50,ENSG00000093000,protein_coding -58463,CDC45,ENSG00000093009,protein_coding -58478,COMT,ENSG00000093010,protein_coding -58358,ADA2,ENSG00000093072,protein_coding -20897,VNN3,ENSG00000093134,protein_coding -20813,ECHDC1,ENSG00000093144,protein_coding -10152,LRRFIP2,ENSG00000093167,protein_coding -10272,SEC22C,ENSG00000093183,protein_coding -10186,XYLB,ENSG00000093217,protein_coding -25233,HDAC6,ENSG00000094631,protein_coding -55587,OR1I1,ENSG00000094661,protein_coding -18162,GABRP,ENSG00000094755,protein_coding -50602,KRT31,ENSG00000094796,protein_coding -50516,CDC6,ENSG00000094804,protein_coding -25730,UPRT,ENSG00000094841,protein_coding -17541,CDC23,ENSG00000094880,protein_coding -38766,AAAS,ENSG00000094914,protein_coding -38824,CBX5,ENSG00000094916,protein_coding -3906,FMO2,ENSG00000094963,protein_coding -3939,SUCO,ENSG00000094975,protein_coding -6270,MSH2,ENSG00000095002,protein_coding -16327,MAP3K1,ENSG00000095015,protein_coding -55458,DHPS,ENSG00000095059,protein_coding -55469,HOOK2,ENSG00000095066,protein_coding -34664,NXPE1,ENSG00000095110,protein_coding -34759,ARCN1,ENSG00000095139,protein_coding -31075,EPB41L4B,ENSG00000095203,protein_coding -31024,TMEM38B,ENSG00000095209,protein_coding -31234,PSMD5,ENSG00000095261,protein_coding -31262,PTGS1,ENSG00000095303,protein_coding -31459,NUP188,ENSG00000095319,protein_coding -31465,CRAT,ENSG00000095321,protein_coding -31385,SH2D3C,ENSG00000095370,protein_coding -30911,NANS,ENSG00000095380,protein_coding -30915,TBC1D2,ENSG00000095383,protein_coding -31177,WHRN,ENSG00000095397,protein_coding -36757,PDE6C,ENSG00000095464,protein_coding -36899,CWF19L1,ENSG00000095485,protein_coding -36921,SEMA4G,ENSG00000095539,protein_coding -36723,BTAF1,ENSG00000095564,protein_coding -37288,IKZF5,ENSG00000095574,protein_coding -36812,BLNK,ENSG00000095585,protein_coding -36816,TLL2,ENSG00000095587,protein_coding -36748,CYP26A1,ENSG00000095596,protein_coding -37144,TDRD1,ENSG00000095627,protein_coding -36796,SORBS1,ENSG00000095637,protein_coding -36859,CRTAC1,ENSG00000095713,protein_coding -35641,BAMBI,ENSG00000095739,protein_coding -57512,IL11,ENSG00000095752,protein_coding -35582,MYO3A,ENSG00000095777,protein_coding -35637,WAC,ENSG00000095787,protein_coding -35752,CREM,ENSG00000095794,protein_coding -46590,NUBP2,ENSG00000095906,protein_coding -46540,TPSD1,ENSG00000095917,protein_coding -54961,SMIM24,ENSG00000095932,protein_coding -18724,HIVEP1,ENSG00000095951,protein_coding -19667,TREM2,ENSG00000095970,protein_coding -19639,KCNK16,ENSG00000095981,protein_coding -19871,CRISP3,ENSG00000096006,protein_coding -19549,FKBP5,ENSG00000096060,protein_coding -19561,SRPK1,ENSG00000096063,protein_coding -19568,BRPF3,ENSG00000096070,protein_coding -19774,MRPS18A,ENSG00000096080,protein_coding -19690,PGC,ENSG00000096088,protein_coding -19909,TMEM14A,ENSG00000096092,protein_coding -19904,EFHC1,ENSG00000096093,protein_coding -19679,NCR2,ENSG00000096264,protein_coding -19793,HSP90AB1,ENSG00000096384,protein_coding -19497,MLN,ENSG00000096395,protein_coding -19802,CDC5L,ENSG00000096401,protein_coding -19492,ITPR3,ENSG00000096433,protein_coding -19014,ZNF184,ENSG00000096654,protein_coding -18650,DSP,ENSG00000096696,protein_coding -36205,SIRT1,ENSG00000096717,protein_coding -36216,HNRNPH3,ENSG00000096746,protein_coding -29821,IFT74,ENSG00000096872,protein_coding -29539,JAK2,ENSG00000096968,protein_coding -55757,IL12RB1,ENSG00000096996,protein_coding -31515,ABL1,ENSG00000097007,protein_coding -243,ACOT7,ENSG00000097021,protein_coding -2191,SH3GLB1,ENSG00000097033,protein_coding -2269,CDC7,ENSG00000097046,protein_coding -2163,SYDE2,ENSG00000097096,protein_coding -30507,PCSK5,ENSG00000099139,protein_coding -36906,SCD,ENSG00000099194,protein_coding -55338,TMED1,ENSG00000099203,protein_coding -37151,ABLIM1,ENSG00000099204,protein_coding -29568,ERMP1,ENSG00000099219,protein_coding -35618,RAB18,ENSG00000099246,protein_coding -35728,NRP1,ENSG00000099250,protein_coding -35564,PRTFDC1,ENSG00000099256,protein_coding -2404,PALMD,ENSG00000099260,protein_coding -36255,TSPAN15,ENSG00000099282,protein_coding -36270,H2AFY2,ENSG00000099284,protein_coding -36048,WASHC2A,ENSG00000099290,protein_coding -55758,MAST3,ENSG00000099308,protein_coding -57738,MZF1,ENSG00000099326,protein_coding -55703,OCEL1,ENSG00000099330,protein_coding -55697,MYO9B,ENSG00000099331,protein_coding -56412,KCNK6,ENSG00000099337,protein_coding -56413,CATSPERG,ENSG00000099338,protein_coding -56415,PSMD8,ENSG00000099341,protein_coding -47573,FBXL19,ENSG00000099364,protein_coding -47580,STX1B,ENSG00000099365,protein_coding -47579,HSD3B7,ENSG00000099377,protein_coding -47577,SETD1A,ENSG00000099381,protein_coding -47565,BCL7C,ENSG00000099385,protein_coding -24919,MAGEB2,ENSG00000099399,protein_coding -54839,EFNA2,ENSG00000099617,protein_coding -54836,CIRBP,ENSG00000099622,protein_coding -54834,ATP5F1D,ENSG00000099624,protein_coding -54832,CBARP,ENSG00000099625,protein_coding -57768,PCDH11Y,ENSG00000099715,protein_coding -57796,AMELY,ENSG00000099721,protein_coding -57801,PRKY,ENSG00000099725,protein_coding -46591,IGFALS,ENSG00000099769,protein_coding -55212,HNRNPM,ENSG00000099783,protein_coding -55209,MARCH2,ENSG00000099785,protein_coding -55562,NDUFB7,ENSG00000099795,protein_coding -55560,TECR,ENSG00000099797,protein_coding -54912,TIMM13,ENSG00000099800,protein_coding -54784,CDC34,ENSG00000099804,protein_coding -29774,MTAP,ENSG00000099810,protein_coding -54799,MISP,ENSG00000099812,protein_coding -43985,CEP170B,ENSG00000099814,protein_coding -54827,POLR2E,ENSG00000099817,protein_coding -54790,POLRMT,ENSG00000099821,protein_coding -54788,HCN2,ENSG00000099822,protein_coding -31840,CDHR5,ENSG00000099834,protein_coding -54893,IZUMO4,ENSG00000099840,protein_coding -31835,RASSF7,ENSG00000099849,protein_coding -54916,GADD45B,ENSG00000099860,protein_coding -54798,PALM,ENSG00000099864,protein_coding -54781,MADCAM1,ENSG00000099866,protein_coding -54891,MKNK2,ENSG00000099875,protein_coding -58480,ARVCF,ENSG00000099889,protein_coding -58490,TRMT2A,ENSG00000099899,protein_coding -58492,RANBP1,ENSG00000099901,protein_coding -58494,ZDHHC8,ENSG00000099904,protein_coding -58512,KLHL22,ENSG00000099910,protein_coding -58518,MED15,ENSG00000099917,protein_coding -58529,SERPIND1,ENSG00000099937,protein_coding -58530,SNAP29,ENSG00000099940,protein_coding -58532,CRKL,ENSG00000099942,protein_coding -58537,LZTR1,ENSG00000099949,protein_coding -58777,MMP11,ENSG00000099953,protein_coding -58364,CECR2,ENSG00000099954,protein_coding -58778,SMARCB1,ENSG00000099956,protein_coding -58542,P2RX6,ENSG00000099957,protein_coding -58779,DERL3,ENSG00000099958,protein_coding -58544,SLC7A4,ENSG00000099960,protein_coding -58372,BCL2L13,ENSG00000099968,protein_coding -58793,DDTL,ENSG00000099974,protein_coding -58795,DDT,ENSG00000099977,protein_coding -58992,OSM,ENSG00000099985,protein_coding -58802,CABIN1,ENSG00000099991,protein_coding -58996,TBC1D10A,ENSG00000099992,protein_coding -58804,SUSD2,ENSG00000099994,protein_coding -58997,SF3A1,ENSG00000099995,protein_coding -58805,GGT5,ENSG00000099998,protein_coding -58999,RNF215,ENSG00000099999,protein_coding -59000,SEC14L2,ENSG00000100003,protein_coding -59008,SEC14L3,ENSG00000100012,protein_coding -58810,SPECC1L,ENSG00000100014,protein_coding -58595,PPIL2,ENSG00000100023,protein_coding -58814,UPB1,ENSG00000100024,protein_coding -58596,YPEL1,ENSG00000100027,protein_coding -58817,SNRPD3,ENSG00000100028,protein_coding -59015,PES1,ENSG00000100029,protein_coding -58600,MAPK1,ENSG00000100030,protein_coding -58820,GGT1,ENSG00000100031,protein_coding -58424,PRODH,ENSG00000100033,protein_coding -58603,PPM1F,ENSG00000100034,protein_coding -59018,SLC35E4,ENSG00000100036,protein_coding -58605,TOP3B,ENSG00000100038,protein_coding -58843,CRYBB3,ENSG00000100053,protein_coding -59210,CYTH4,ENSG00000100055,protein_coding -58444,ESS2,ENSG00000100056,protein_coding -59215,MFNG,ENSG00000100060,protein_coding -59216,CARD10,ENSG00000100065,protein_coding -58853,LRP5L,ENSG00000100068,protein_coding -58448,SLC25A1,ENSG00000100075,protein_coding -58861,GRK3,ENSG00000100077,protein_coding -59040,PLA2G3,ENSG00000100078,protein_coding -59219,LGALS2,ENSG00000100079,protein_coding -59221,GGA1,ENSG00000100083,protein_coding -58455,HIRA,ENSG00000100084,protein_coding -59222,SH3BP1,ENSG00000100092,protein_coding -58873,SEZ6L,ENSG00000100095,protein_coding -59227,LGALS1,ENSG00000100097,protein_coding -58878,HPS4,ENSG00000100099,protein_coding -59048,PIK3IP1,ENSG00000100100,protein_coding -59229,Z83844.1,ENSG00000100101,protein_coding -58879,SRRD,ENSG00000100104,protein_coding -59051,PATZ1,ENSG00000100105,protein_coding -59230,TRIOBP,ENSG00000100106,protein_coding -58880,TFIP11,ENSG00000100109,protein_coding -59232,GCAT,ENSG00000100116,protein_coding -58682,GGTLC2,ENSG00000100121,protein_coding -58886,CRYBB1,ENSG00000100122,protein_coding -59234,ANKRD54,ENSG00000100124,protein_coding -59237,EIF3L,ENSG00000100129,protein_coding -59388,SNU13,ENSG00000100138,protein_coding -59240,MICALL1,ENSG00000100139,protein_coding -59244,POLR2F,ENSG00000100142,protein_coding -59246,SOX10,ENSG00000100146,protein_coding -59396,CCDC134,ENSG00000100147,protein_coding -59066,DEPDC5,ENSG00000100150,protein_coding -59250,PICK1,ENSG00000100151,protein_coding -58926,TTC28,ENSG00000100154,protein_coding -59252,SLC16A8,ENSG00000100156,protein_coding -59403,CENPM,ENSG00000100162,protein_coding -59405,SEPT3,ENSG00000100167,protein_coding -59078,SLC5A1,ENSG00000100170,protein_coding -59089,SLC5A4,ENSG00000100191,protein_coding -59270,KDELR3,ENSG00000100196,protein_coding -59418,CYP2D6,ENSG00000100197,protein_coding -59271,DDX17,ENSG00000100201,protein_coding -59272,DMC1,ENSG00000100206,protein_coding -59424,TCF20,ENSG00000100207,protein_coding -58935,HSCB,ENSG00000100209,protein_coding -59275,CBY1,ENSG00000100211,protein_coding -59279,TOMM22,ENSG00000100216,protein_coding -58741,RSPH14,ENSG00000100218,protein_coding -58937,XBP1,ENSG00000100219,protein_coding -59102,RTCB,ENSG00000100220,protein_coding -59280,JOSD1,ENSG00000100221,protein_coding -59104,FBXO7,ENSG00000100225,protein_coding -59281,GTPBP1,ENSG00000100226,protein_coding -59435,POLDIP3,ENSG00000100227,protein_coding -58745,RAB36,ENSG00000100228,protein_coding -59109,TIMP3,ENSG00000100234,protein_coding -59618,PPP6R2,ENSG00000100239,protein_coding -59619,SBF1,ENSG00000100241,protein_coding -59283,SUN2,ENSG00000100242,protein_coding -59438,CYB5R3,ENSG00000100243,protein_coding -59290,DNAL4,ENSG00000100246,protein_coding -58944,C22orf31,ENSG00000100249,protein_coding -59621,MIOX,ENSG00000100253,protein_coding -59622,LMF2,ENSG00000100258,protein_coding -58950,RHBDD3,ENSG00000100263,protein_coding -59446,PACSIN2,ENSG00000100266,protein_coding -59449,TTLL1,ENSG00000100271,protein_coding -58954,RASL10A,ENSG00000100276,protein_coding -58956,AP1B1,ENSG00000100280,protein_coding -59135,HMGXB4,ENSG00000100281,protein_coding -59137,TOM1,ENSG00000100284,protein_coding -58965,NEFH,ENSG00000100285,protein_coding -59634,CHKB,ENSG00000100288,protein_coding -59451,BIK,ENSG00000100290,protein_coding -59142,HMOX1,ENSG00000100292,protein_coding -59452,MCAT,ENSG00000100294,protein_coding -58966,THOC5,ENSG00000100296,protein_coding -59143,MCM5,ENSG00000100297,protein_coding -59305,APOBEC3H,ENSG00000100298,protein_coding -59638,ARSA,ENSG00000100299,protein_coding -59453,TSPO,ENSG00000100300,protein_coding -59146,RASD2,ENSG00000100302,protein_coding -59454,TTLL12,ENSG00000100304,protein_coding -59306,CBX7,ENSG00000100307,protein_coding -59310,PDGFB,ENSG00000100311,protein_coding -59644,ACR,ENSG00000100312,protein_coding -58973,CABP7,ENSG00000100314,protein_coding -59313,RPL3,ENSG00000100316,protein_coding -58974,ZMAT5,ENSG00000100319,protein_coding -59153,RBFOX2,ENSG00000100320,protein_coding -59317,SYNGR1,ENSG00000100321,protein_coding -59319,TAB1,ENSG00000100324,protein_coding -58978,ASCC2,ENSG00000100325,protein_coding -58979,MTMR3,ENSG00000100330,protein_coding -59322,MIEF1,ENSG00000100335,protein_coding -59167,APOL4,ENSG00000100336,protein_coding -59468,PNPLA5,ENSG00000100341,protein_coding -59169,APOL1,ENSG00000100342,protein_coding -59469,PNPLA3,ENSG00000100344,protein_coding -59170,MYH9,ENSG00000100345,protein_coding -59326,CACNA1I,ENSG00000100346,protein_coding -59470,SAMM50,ENSG00000100347,protein_coding -59177,TXN2,ENSG00000100348,protein_coding -59178,FOXRED2,ENSG00000100350,protein_coding -59331,GRAP2,ENSG00000100351,protein_coding -59179,EIF3D,ENSG00000100353,protein_coding -59335,TNRC6B,ENSG00000100354,protein_coding -59339,SGSM3,ENSG00000100359,protein_coding -59186,IFT27,ENSG00000100360,protein_coding -59188,PVALB,ENSG00000100362,protein_coding -59499,KIAA0930,ENSG00000100364,protein_coding -59191,NCF4,ENSG00000100365,protein_coding -59192,CSF2RB,ENSG00000100368,protein_coding -59349,SLC25A17,ENSG00000100372,protein_coding -59503,UPK3A,ENSG00000100373,protein_coding -59504,FAM118A,ENSG00000100376,protein_coding -59200,KCTD17,ENSG00000100379,protein_coding -59353,ST13,ENSG00000100380,protein_coding -59204,IL2RB,ENSG00000100385,protein_coding -59358,RBX1,ENSG00000100387,protein_coding -59364,EP300,ENSG00000100393,protein_coding -59369,L3MBTL2,ENSG00000100395,protein_coding -59371,CHADL,ENSG00000100399,protein_coding -59372,RANGAP1,ENSG00000100401,protein_coding -59374,ZC3H7B,ENSG00000100403,protein_coding -59379,PHF5A,ENSG00000100410,protein_coding -59380,ACO2,ENSG00000100412,protein_coding -59381,POLR3H,ENSG00000100413,protein_coding -59538,TRMU,ENSG00000100416,protein_coding -59384,PMM1,ENSG00000100417,protein_coding -59385,DESI1,ENSG00000100418,protein_coding -59543,CERK,ENSG00000100422,protein_coding -59584,BRD1,ENSG00000100425,protein_coding -59591,ZBED4,ENSG00000100426,protein_coding -59600,MLC1,ENSG00000100427,protein_coding -59611,HDAC10,ENSG00000100429,protein_coding -43481,KCNK10,ENSG00000100433,protein_coding -42272,ABHD4,ENSG00000100439,protein_coding -42398,KHNYN,ENSG00000100441,protein_coding -42680,FKBP3,ENSG00000100442,protein_coding -42399,SDR39U1,ENSG00000100445,protein_coding -42402,CTSG,ENSG00000100448,protein_coding -42404,GZMH,ENSG00000100450,protein_coding -42405,GZMB,ENSG00000100453,protein_coding -42287,RBM23,ENSG00000100461,protein_coding -42289,PRMT5,ENSG00000100462,protein_coding -42469,COCH,ENSG00000100473,protein_coding -42475,AP4S1,ENSG00000100478,protein_coding -42724,POLE2,ENSG00000100479,protein_coding -42747,VCPKMT,ENSG00000100483,protein_coding -42748,SOS2,ENSG00000100485,protein_coding -42753,CDKL1,ENSG00000100490,protein_coding -42765,NIN,ENSG00000100503,protein_coding -42771,PYGL,ENSG00000100504,protein_coding -42775,TRIM9,ENSG00000100505,protein_coding -42815,PSMC6,ENSG00000100519,protein_coding -42817,GNPNAT1,ENSG00000100522,protein_coding -42827,DDHD1,ENSG00000100523,protein_coding -42840,CDKN3,ENSG00000100526,protein_coding -42841,CNIH1,ENSG00000100528,protein_coding -42844,CGRRF1,ENSG00000100532,protein_coding -43087,ATP6V1D,ENSG00000100554,protein_coding -42905,CCDC198,ENSG00000100557,protein_coding -43090,PLEK2,ENSG00000100558,protein_coding -43094,PIGH,ENSG00000100564,protein_coding -43348,LRRC74A,ENSG00000100565,protein_coding -42917,PSMA3,ENSG00000100567,protein_coding -43101,VTI1B,ENSG00000100568,protein_coding -42927,TIMM9,ENSG00000100575,protein_coding -43372,GSTZ1,ENSG00000100577,protein_coding -42928,KIAA0586,ENSG00000100578,protein_coding -43373,TMED8,ENSG00000100580,protein_coding -43375,SAMD15,ENSG00000100583,protein_coding -43378,AHSA1,ENSG00000100591,protein_coding -42939,DAAM1,ENSG00000100592,protein_coding -43380,ISM2,ENSG00000100593,protein_coding -43381,SPTLC2,ENSG00000100596,protein_coding -43565,RIN3,ENSG00000100599,protein_coding -43566,LGMN,ENSG00000100600,protein_coding -43384,ALKBH1,ENSG00000100601,protein_coding -43388,SNW1,ENSG00000100603,protein_coding -43570,CHGA,ENSG00000100604,protein_coding -43571,ITPK1,ENSG00000100605,protein_coding -42951,DHRS7,ENSG00000100612,protein_coding -42956,PPM1A,ENSG00000100614,protein_coding -42969,SIX4,ENSG00000100625,protein_coding -43142,GALNT16,ENSG00000100626,protein_coding -43592,ASB2,ENSG00000100628,protein_coding -43409,CEP128,ENSG00000100629,protein_coding -43144,ERH,ENSG00000100632,protein_coding -42986,HIF1A,ENSG00000100644,protein_coding -43151,SUSD6,ENSG00000100647,protein_coding -43153,SRSF5,ENSG00000100650,protein_coding -43154,SLC10A1,ENSG00000100652,protein_coding -43922,EIF5,ENSG00000100664,protein_coding -43611,SERPINA4,ENSG00000100665,protein_coding -43158,SLC8A3,ENSG00000100678,protein_coding -43626,DICER1,ENSG00000100697,protein_coding -43944,ZFYVE21,ENSG00000100711,protein_coding -43032,MTHFD1,ENSG00000100714,protein_coding -43647,TCL1A,ENSG00000100721,protein_coding -43487,ZC3H14,ENSG00000100722,protein_coding -46567,TELO2,ENSG00000100726,protein_coding -43186,PCNX1,ENSG00000100731,protein_coding -43658,BDKRB1,ENSG00000100739,protein_coding -43662,GSKIP,ENSG00000100744,protein_coding -43674,VRK1,ENSG00000100749,protein_coding -43516,PSMC1,ENSG00000100764,protein_coding -43223,PAPLN,ENSG00000100767,protein_coding -43534,RPS6KA5,ENSG00000100784,protein_coding -43545,PPP4R3A,ENSG00000100796,protein_coding -42296,C14orf93,ENSG00000100802,protein_coding -42298,PSMB5,ENSG00000100804,protein_coding -43724,YY1,ENSG00000100811,protein_coding -42302,ACIN1,ENSG00000100813,protein_coding -42039,CCNB1IP1,ENSG00000100814,protein_coding -43556,TRIP11,ENSG00000100815,protein_coding -42053,APEX1,ENSG00000100823,protein_coding -42316,PABPN1,ENSG00000100836,protein_coding -42318,EFS,ENSG00000100842,protein_coding -42499,ARHGAP5,ENSG00000100852,protein_coding -43890,CINP,ENSG00000100865,protein_coding -42336,DHRS2,ENSG00000100867,protein_coding -42538,SRP54,ENSG00000100883,protein_coding -42351,CPNE6,ENSG00000100884,protein_coding -42113,CHD8,ENSG00000100888,protein_coding -42353,PCK2,ENSG00000100889,protein_coding -42542,KIAA0391,ENSG00000100890,protein_coding -42354,DCAF11,ENSG00000100897,protein_coding -42550,PSMA6,ENSG00000100902,protein_coding -42553,NFKBIA,ENSG00000100906,protein_coding -42358,EMC9,ENSG00000100908,protein_coding -42360,PSME2,ENSG00000100911,protein_coding -42564,BRMS1L,ENSG00000100916,protein_coding -42366,REC8,ENSG00000100918,protein_coding -42370,TM9SF1,ENSG00000100926,protein_coding -42617,SEC23A,ENSG00000100934,protein_coding -42380,GMPR2,ENSG00000100938,protein_coding -42626,PNN,ENSG00000100941,protein_coding -42383,RABGGTA,ENSG00000100949,protein_coding -42395,NFATC4,ENSG00000100968,protein_coding -54303,PLTP,ENSG00000100979,protein_coding -54305,PCIF1,ENSG00000100982,protein_coding -54003,GSS,ENSG00000100983,protein_coding -54308,MMP9,ENSG00000100985,protein_coding -53790,VSX1,ENSG00000100987,protein_coding -54007,TRPC4AP,ENSG00000100991,protein_coding -53800,PYGB,ENSG00000100994,protein_coding -53803,ABHD12,ENSG00000100997,protein_coding -54011,PROCR,ENSG00000101000,protein_coding -53805,GINS1,ENSG00000101003,protein_coding -53806,NINL,ENSG00000101004,protein_coding -54313,CD40,ENSG00000101017,protein_coding -54022,UQCC1,ENSG00000101019,protein_coding -54338,ZMYND8,ENSG00000101040,protein_coding -54201,SGK2,ENSG00000101049,protein_coding -54203,IFT52,ENSG00000101052,protein_coding -54205,MYBL2,ENSG00000101057,protein_coding -54217,R3HDML,ENSG00000101074,protein_coding -54220,HNF4A,ENSG00000101076,protein_coding -54071,NDRG3,ENSG00000101079,protein_coding -54069,SLA2,ENSG00000101082,protein_coding -54068,RAB5IF,ENSG00000101084,protein_coding -54440,NFATC2,ENSG00000101096,protein_coding -54236,RIMS4,ENSG00000101098,protein_coding -54240,PABPC1L,ENSG00000101104,protein_coding -54243,STK4,ENSG00000101109,protein_coding -54444,SALL4,ENSG00000101115,protein_coding -54431,ADNP,ENSG00000101126,protein_coding -54482,PFDN4,ENSG00000101132,protein_coding -54484,DOK5,ENSG00000101134,protein_coding -54499,CSTF1,ENSG00000101138,protein_coding -54520,BMP7,ENSG00000101144,protein_coding -54527,RAE1,ENSG00000101146,protein_coding -54720,TPD52L2,ENSG00000101150,protein_coding -54721,DNAJC5,ENSG00000101152,protein_coding -54574,NELFCD,ENSG00000101158,protein_coding -54575,CTSZ,ENSG00000101160,protein_coding -54734,PRPF6,ENSG00000101161,protein_coding -54576,TUBB1,ENSG00000101162,protein_coding -54578,PRELID3B,ENSG00000101166,protein_coding -54621,HRH3,ENSG00000101180,protein_coding -54620,MTG2,ENSG00000101181,protein_coding -54618,PSMA7,ENSG00000101182,protein_coding -54647,SLCO4A1,ENSG00000101187,protein_coding -54652,NTSR1,ENSG00000101188,protein_coding -54655,MRGBP,ENSG00000101189,protein_coding -54659,TCFL5,ENSG00000101190,protein_coding -54662,DIDO1,ENSG00000101191,protein_coding -54664,GID8,ENSG00000101193,protein_coding -54665,SLC17A9,ENSG00000101194,protein_coding -54680,BIRC7,ENSG00000101197,protein_coding -54682,NKAIN4,ENSG00000101198,protein_coding -54684,ARFGAP1,ENSG00000101199,protein_coding -53392,AVP,ENSG00000101200,protein_coding -54686,COL20A1,ENSG00000101203,protein_coding -54688,CHRNA4,ENSG00000101204,protein_coding -54693,EEF1A2,ENSG00000101210,protein_coding -54696,PTK6,ENSG00000101213,protein_coding -54701,GMEB2,ENSG00000101216,protein_coding -53413,C20orf27,ENSG00000101220,protein_coding -53414,SPEF1,ENSG00000101222,protein_coding -53416,CDC25B,ENSG00000101224,protein_coding -53564,ISM1,ENSG00000101230,protein_coding -53426,RNF24,ENSG00000101236,protein_coding -54707,ARFRP1,ENSG00000101246,protein_coding -53571,NDUFAF5,ENSG00000101247,protein_coding -53572,SEL1L2,ENSG00000101251,protein_coding -53307,TRIB3,ENSG00000101255,protein_coding -53442,RASSF2,ENSG00000101265,protein_coding -53311,CSNK2A1,ENSG00000101266,protein_coding -53316,SLC52A3,ENSG00000101276,protein_coding -53319,ANGPT4,ENSG00000101280,protein_coding -53320,RSPO4,ENSG00000101282,protein_coding -53455,CDS2,ENSG00000101290,protein_coding -53457,PROKR2,ENSG00000101292,protein_coding -53882,HM13,ENSG00000101294,protein_coding -53328,SNPH,ENSG00000101298,protein_coding -53897,MYLK2,ENSG00000101306,protein_coding -53342,SIRPB1,ENSG00000101307,protein_coding -53650,SEC23B,ENSG00000101310,protein_coding -53484,FERMT1,ENSG00000101311,protein_coding -53501,HAO1,ENSG00000101323,protein_coding -53356,PDYN,ENSG00000101327,protein_coding -53907,CCM2L,ENSG00000101331,protein_coding -53511,PLCB4,ENSG00000101333,protein_coding -54064,MYL9,ENSG00000101335,protein_coding -53909,HCK,ENSG00000101336,protein_coding -53910,TM9SF4,ENSG00000101337,protein_coding -54076,TLDC2,ENSG00000101342,protein_coding -53673,CRNKL1,ENSG00000101343,protein_coding -53915,POFUT1,ENSG00000101346,protein_coding -54077,SAMHD1,ENSG00000101347,protein_coding -53514,PAK5,ENSG00000101349,protein_coding -53918,KIF3B,ENSG00000101350,protein_coding -54082,MROH8,ENSG00000101353,protein_coding -53371,NOP56,ENSG00000101361,protein_coding -54086,MANBAL,ENSG00000101363,protein_coding -53378,IDH3B,ENSG00000101365,protein_coding -53933,MAPRE1,ENSG00000101367,protein_coding -53534,JAG1,ENSG00000101384,protein_coding -53953,CDK5RAP1,ENSG00000101391,protein_coding -53954,SNTA1,ENSG00000101400,protein_coding -53391,OXT,ENSG00000101405,protein_coding -54101,TTI1,ENSG00000101407,protein_coding -53961,E2F1,ENSG00000101412,protein_coding -54102,RPRD1B,ENSG00000101413,protein_coding -53962,PXMP4,ENSG00000101417,protein_coding -53967,CHMP4B,ENSG00000101421,protein_coding -54112,BPI,ENSG00000101425,protein_coding -53756,CST9L,ENSG00000101435,protein_coding -54134,SLC32A1,ENSG00000101438,protein_coding -53759,CST3,ENSG00000101439,protein_coding -53977,ASIP,ENSG00000101440,protein_coding -53763,CST4,ENSG00000101441,protein_coding -54135,ACTR5,ENSG00000101442,protein_coding -54267,WFDC2,ENSG00000101443,protein_coding -53981,AHCY,ENSG00000101444,protein_coding -54137,PPP1R16B,ENSG00000101445,protein_coding -54270,SPINT3,ENSG00000101446,protein_coding -54139,FAM83D,ENSG00000101447,protein_coding -54275,EPPIN,ENSG00000101448,protein_coding -54142,DHX35,ENSG00000101452,protein_coding -54291,DNTTIP1,ENSG00000101457,protein_coding -53994,MAP1LC3A,ENSG00000101460,protein_coding -53780,SYNDIG1,ENSG00000101463,protein_coding -53995,PIGU,ENSG00000101464,protein_coding -54293,TNNC2,ENSG00000101470,protein_coding -54295,ACOT8,ENSG00000101473,protein_coding -53785,APMAP,ENSG00000101474,protein_coding -52692,CELF4,ENSG00000101489,protein_coding -53208,ZNF516,ENSG00000101493,protein_coding -53024,CDH20,ENSG00000101542,protein_coding -53289,ADNP2,ENSG00000101544,protein_coding -53286,RBFA,ENSG00000101546,protein_coding -52061,USP14,ENSG00000101557,protein_coding -52258,VAPA,ENSG00000101558,protein_coding -52103,METTL4,ENSG00000101574,protein_coding -52116,LPIN2,ENSG00000101577,protein_coding -52112,SMCHD1,ENSG00000101596,protein_coding -52124,MYOM1,ENSG00000101605,protein_coding -52128,MYL12A,ENSG00000101608,protein_coding -52340,CEP76,ENSG00000101624,protein_coding -52769,ST8SIA5,ENSG00000101638,protein_coding -52350,CEP192,ENSG00000101639,protein_coding -52375,RNMT,ENSG00000101654,protein_coding -52818,SMAD7,ENSG00000101665,protein_coding -52840,LIPG,ENSG00000101670,protein_coding -52197,LAMA1,ENSG00000101680,protein_coding -52613,RNF125,ENSG00000101695,protein_coding -52232,ANKRD12,ENSG00000101745,protein_coding -52641,NOL4,ENSG00000101746,protein_coding -52899,POLI,ENSG00000101751,protein_coding -52447,MIB1,ENSG00000101752,protein_coding -52476,RBBP8,ENSG00000101773,protein_coding -52485,RIOK3,ENSG00000101782,protein_coding -25943,CSTF2,ENSG00000101811,protein_coding -26071,H2BFM,ENSG00000101812,protein_coding -24604,MXRA5,ENSG00000101825,protein_coding -26120,VSIG1,ENSG00000101842,protein_coding -26121,PSMD10,ENSG00000101843,protein_coding -26122,ATG4A,ENSG00000101844,protein_coding -24630,STS,ENSG00000101846,protein_coding -24663,TBL1X,ENSG00000101849,protein_coding -24664,GPR143,ENSG00000101850,protein_coding -26266,PGRMC1,ENSG00000101856,protein_coding -24865,POLA1,ENSG00000101868,protein_coding -24675,MID1,ENSG00000101871,protein_coding -26292,NKAP,ENSG00000101882,protein_coding -26298,RHOXF1,ENSG00000101883,protein_coding -26136,NXT2,ENSG00000101888,protein_coding -26135,GUCY2F,ENSG00000101890,protein_coding -26307,ATP1B4,ENSG00000101892,protein_coding -53884,MCTS2P,ENSG00000101898,protein_coding -26166,ALG13,ENSG00000101901,protein_coding -24696,PRPS2,ENSG00000101911,protein_coding -24701,TLR8,ENSG00000101916,protein_coding -26521,MOSPD1,ENSG00000101928,protein_coding -26146,AMMECR1,ENSG00000101935,protein_coding -26154,CHRDL1,ENSG00000101938,protein_coding -25223,WDR13,ENSG00000101940,protein_coding -25226,SUV39H1,ENSG00000101945,protein_coding -25284,PAGE4,ENSG00000101951,protein_coding -24999,SRPX,ENSG00000101955,protein_coding -24724,GLRA2,ENSG00000101958,protein_coding -26368,XIAP,ENSG00000101966,protein_coding -26373,STAG2,ENSG00000101972,protein_coding -26625,ATP11C,ENSG00000101974,protein_coding -26623,MCF2,ENSG00000101977,protein_coding -26622,F9,ENSG00000101981,protein_coding -26865,ABCD1,ENSG00000101986,protein_coding -25262,CCDC22,ENSG00000101997,protein_coding -25260,CACNA1F,ENSG00000102001,protein_coding -25258,SYP,ENSG00000102003,protein_coding -25256,PLP2,ENSG00000102007,protein_coding -24735,BMX,ENSG00000102010,protein_coding -26215,LUZP4,ENSG00000102021,protein_coding -26220,PLS3,ENSG00000102024,protein_coding -26878,NAA10,ENSG00000102030,protein_coding -26879,RENBP,ENSG00000102032,protein_coding -26428,ELF4,ENSG00000102034,protein_coding -26414,SMARCA1,ENSG00000102038,protein_coding -25500,MTMR8,ENSG00000102043,protein_coding -24730,ASB9,ENSG00000102048,protein_coding -25513,ZC3H12B,ENSG00000102053,protein_coding -24763,RBBP7,ENSG00000102054,protein_coding -25076,PPP1R2C,ENSG00000102055,protein_coding -25243,KCND1,ENSG00000102057,protein_coding -26888,OPN1LW,ENSG00000102076,protein_coding -26435,SLC25A14,ENSG00000102078,protein_coding -26732,FMR1,ENSG00000102081,protein_coding -25239,PIM2,ENSG00000102096,protein_coding -24784,SCML2,ENSG00000102098,protein_coding -25238,SLC35A2,ENSG00000102100,protein_coding -25236,PQBP1,ENSG00000102103,protein_coding -24789,RS1,ENSG00000102104,protein_coding -25235,PCSK1N,ENSG00000102109,protein_coding -26900,EMD,ENSG00000102119,protein_coding -26905,TAZ,ENSG00000102125,protein_coding -26029,RAB40AL,ENSG00000102128,protein_coding -25765,PGK1,ENSG00000102144,protein_coding -25232,GATA1,ENSG00000102145,protein_coding -25760,MAGT1,ENSG00000102158,protein_coding -24828,SMS,ENSG00000102172,protein_coding -24829,PHEX,ENSG00000102174,protein_coding -26913,UBL4A,ENSG00000102178,protein_coding -26777,CD99L2,ENSG00000102181,protein_coding -39534,EEA1,ENSG00000102189,protein_coding -26787,GPR50,ENSG00000102195,protein_coding -25135,RP2,ENSG00000102218,protein_coding -25137,JADE3,ENSG00000102221,protein_coding -25146,CDK16,ENSG00000102225,protein_coding -25147,USP11,ENSG00000102226,protein_coding -24861,PCYT1B,ENSG00000102230,protein_coding -26581,BRS3,ENSG00000102239,protein_coding -26582,HTATSF1,ENSG00000102241,protein_coding -26583,VGLL1,ENSG00000102243,protein_coding -26588,CD40LG,ENSG00000102245,protein_coding -25162,TIMP1,ENSG00000102265,protein_coding -25848,KLHL4,ENSG00000102271,protein_coding -26801,GABRE,ENSG00000102287,protein_coding -25876,PCDH11X,ENSG00000102290,protein_coding -25419,FGD1,ENSG00000102302,protein_coding -25643,PIN4,ENSG00000102309,protein_coding -25213,PORCN,ENSG00000102312,protein_coding -25422,ITIH6,ENSG00000102313,protein_coding -25423,MAGED2,ENSG00000102316,protein_coding -25219,RBM3,ENSG00000102317,protein_coding -25459,KLF8,ENSG00000102349,protein_coding -25938,SRPX2,ENSG00000102359,protein_coding -25939,SYTL4,ENSG00000102362,protein_coding -25734,ZDHHC15,ENSG00000102383,protein_coding -25955,CENPI,ENSG00000102384,protein_coding -25957,DRP2,ENSG00000102385,protein_coding -25958,TAF7L,ENSG00000102387,protein_coding -25744,PBDC1,ENSG00000102390,protein_coding -25966,GLA,ENSG00000102393,protein_coding -25974,ARMCX3,ENSG00000102401,protein_coding -26037,BEX4,ENSG00000102409,protein_coding -41102,RUBCNL,ENSG00000102445,protein_coding -41735,NALCN,ENSG00000102452,protein_coding -41737,FGF14,ENSG00000102466,protein_coding -41115,HTR2A,ENSG00000102468,protein_coding -41494,NDFIP2,ENSG00000102471,protein_coding -41806,TNFSF13B,ENSG00000102524,protein_coding -41147,FNDC3A,ENSG00000102531,protein_coding -41153,MLNR,ENSG00000102539,protein_coding -41155,CDADC1,ENSG00000102543,protein_coding -41156,CAB39L,ENSG00000102547,protein_coding -41405,KLF5,ENSG00000102554,protein_coding -41675,STK24,ENSG00000102572,protein_coding -55382,ACP5,ENSG00000102575,protein_coding -41637,DNAJC3,ENSG00000102580,protein_coding -41642,UGGT2,ENSG00000102595,protein_coding -41854,ARHGEF7,ENSG00000102606,protein_coding -40657,FGF9,ENSG00000102678,protein_coding -40678,SGCG,ENSG00000102683,protein_coding -40707,PARP4,ENSG00000102699,protein_coding -40933,SUPT20H,ENSG00000102710,protein_coding -40978,MRPS31,ENSG00000102738,protein_coding -40980,SLC25A15,ENSG00000102743,protein_coding -41164,KPNA3,ENSG00000102753,protein_coding -40801,FLT1,ENSG00000102755,protein_coding -41005,RGCC,ENSG00000102760,protein_coding -41006,VWA8,ENSG00000102763,protein_coding -41013,DGKH,ENSG00000102780,protein_coding -40828,KATNAL1,ENSG00000102781,protein_coding -41203,INTS6,ENSG00000102786,protein_coding -41450,ACOD1,ENSG00000102794,protein_coding -41215,DHRS12,ENSG00000102796,protein_coding -40848,MEDAG,ENSG00000102802,protein_coding -41054,TSC22D1,ENSG00000102804,protein_coding -41453,CLN5,ENSG00000102805,protein_coding -41251,OLFM4,ENSG00000102837,protein_coding -46508,MSLN,ENSG00000102854,protein_coding -46804,MGRN1,ENSG00000102858,protein_coding -47561,ZNF629,ENSG00000102870,protein_coding -48290,TRADD,ENSG00000102871,protein_coding -48293,HSF4,ENSG00000102878,protein_coding -47498,CORO1A,ENSG00000102879,protein_coding -47497,MAPK3,ENSG00000102882,protein_coding -47494,GDPD3,ENSG00000102886,protein_coding -48298,ELMO3,ENSG00000102890,protein_coding -48088,MT4,ENSG00000102891,protein_coding -47882,PHKB,ENSG00000102893,protein_coding -47201,LYRM1,ENSG00000102897,protein_coding -48339,NUTF2,ENSG00000102898,protein_coding -48109,NUP93,ENSG00000102900,protein_coding -48337,CENPT,ENSG00000102901,protein_coding -48336,TSNAXIP1,ENSG00000102904,protein_coding -48428,NFAT5,ENSG00000102908,protein_coding -47897,LONP2,ENSG00000102910,protein_coding -47905,AC026470.1,ENSG00000102921,protein_coding -47920,CBLN1,ENSG00000102924,protein_coding -48128,ARL2BP,ENSG00000102931,protein_coding -48131,PLLP,ENSG00000102934,protein_coding -47927,ZNF423,ENSG00000102935,protein_coding -48134,CCL22,ENSG00000102962,protein_coding -48522,DHODH,ENSG00000102967,protein_coding -48136,CCL17,ENSG00000102970,protein_coding -48326,CTCF,ENSG00000102974,protein_coding -48329,ACD,ENSG00000102977,protein_coding -48140,POLR2C,ENSG00000102978,protein_coding -48330,PARD6A,ENSG00000102981,protein_coding -48516,ZNF821,ENSG00000102984,protein_coding -48161,MMP15,ENSG00000102996,protein_coding -48160,USB1,ENSG00000103005,protein_coding -48425,CYB5B,ENSG00000103018,protein_coding -48170,CCDC113,ENSG00000103021,protein_coding -48171,PRSS54,ENSG00000103023,protein_coding -46586,NME3,ENSG00000103024,protein_coding -48178,NDRG4,ENSG00000103034,protein_coding -48561,PSMD7,ENSG00000103035,protein_coding -48180,SETD6,ENSG00000103037,protein_coding -48187,SLC38A7,ENSG00000103042,protein_coding -48472,VAC14,ENSG00000103043,protein_coding -48407,HAS3,ENSG00000103044,protein_coding -48399,TANGO6,ENSG00000103047,protein_coding -48465,COG4,ENSG00000103051,protein_coding -48379,SMPD3,ENSG00000103056,protein_coding -48374,SLC7A6OS,ENSG00000103061,protein_coding -48372,SLC7A6,ENSG00000103064,protein_coding -48370,PLA2G15,ENSG00000103066,protein_coding -48367,ESRP2,ENSG00000103067,protein_coding -48579,FA2H,ENSG00000103089,protein_coding -48581,WDR59,ENSG00000103091,protein_coding -48638,MON1B,ENSG00000103111,protein_coding -48703,CMC2,ENSG00000103121,protein_coding -46466,AXIN1,ENSG00000103126,protein_coding -46719,HCFC1R1,ENSG00000103145,protein_coding -46447,NPRL3,ENSG00000103148,protein_coding -48761,MLYCD,ENSG00000103150,protein_coding -46446,MPG,ENSG00000103152,protein_coding -48765,NECAB2,ENSG00000103154,protein_coding -48773,HSDL1,ENSG00000103160,protein_coding -48775,TAF1C,ENSG00000103168,protein_coding -46826,NAGPA,ENSG00000103174,protein_coding -48781,WFDC1,ENSG00000103175,protein_coding -46824,SEC14L5,ENSG00000103184,protein_coding -48788,COTL1,ENSG00000103187,protein_coding -48791,USP10,ENSG00000103194,protein_coding -48793,CRISPLD2,ENSG00000103196,protein_coding -46621,TSC2,ENSG00000103197,protein_coding -46813,ZNF500,ENSG00000103199,protein_coding -46470,NME4,ENSG00000103202,protein_coding -47068,ABCC1,ENSG00000103222,protein_coding -47072,NOMO3,ENSG00000103226,protein_coding -46517,LMF1,ENSG00000103227,protein_coding -48846,FOXF1,ENSG00000103241,protein_coding -46507,CIAO3,ENSG00000103245,protein_coding -48849,MTHFSD,ENSG00000103248,protein_coding -46561,CLCN7,ENSG00000103249,protein_coding -46506,HAGHL,ENSG00000103253,protein_coding -46504,FAM173A,ENSG00000103254,protein_coding -48891,SLC7A5,ENSG00000103257,protein_coding -46503,METRN,ENSG00000103260,protein_coding -48875,FBXO31,ENSG00000103264,protein_coding -46496,STUB1,ENSG00000103266,protein_coding -46493,RHBDL1,ENSG00000103269,protein_coding -46921,NUBP1,ENSG00000103274,protein_coding -46548,UBE2I,ENSG00000103275,protein_coding -47206,ZP2,ENSG00000103310,protein_coding -46741,MEFV,ENSG00000103313,protein_coding -47208,CRYM,ENSG00000103316,protein_coding -47245,EEF2K,ENSG00000103319,protein_coding -46477,CAPN15,ENSG00000103326,protein_coding -48920,PIEZO1,ENSG00000103335,protein_coding -46960,GSPT1,ENSG00000103342,protein_coding -46757,ZNF174,ENSG00000103343,protein_coding -46764,CLUAP1,ENSG00000103351,protein_coding -47279,UBFD1,ENSG00000103353,protein_coding -46695,PRSS33,ENSG00000103355,protein_coding -47277,EARS2,ENSG00000103356,protein_coding -46693,ELOB,ENSG00000103363,protein_coding -47275,GGA2,ENSG00000103365,protein_coding -47320,AQP8,ENSG00000103375,protein_coding -46981,CPPED1,ENSG00000103381,protein_coding -47263,USP31,ENSG00000103404,protein_coding -46797,HMOX2,ENSG00000103415,protein_coding -46792,DNAJA3,ENSG00000103423,protein_coding -46788,CORO7-PAM16,ENSG00000103426,protein_coding -47011,BFAR,ENSG00000103429,protein_coding -47977,SALL1,ENSG00000103449,protein_coding -47996,TOX3,ENSG00000103460,protein_coding -48021,RBL2,ENSG00000103479,protein_coding -47456,QPRT,ENSG00000103485,protein_coding -47097,XYLT1,ENSG00000103489,protein_coding -47603,PYCARD,ENSG00000103490,protein_coding -48028,RPGRIP1L,ENSG00000103494,protein_coding -47463,MAZ,ENSG00000103495,protein_coding -47581,STX4,ENSG00000103496,protein_coding -47472,CDIPT,ENSG00000103502,protein_coding -47591,BCKDK,ENSG00000103507,protein_coding -47592,KAT8,ENSG00000103510,protein_coding -47021,NOMO1,ENSG00000103512,protein_coding -47348,IL21R,ENSG00000103522,protein_coding -47150,SYT17,ENSG00000103528,protein_coding -47157,TMC5,ENSG00000103534,protein_coding -47164,CCP110,ENSG00000103540,protein_coding -47165,VPS35L,ENSG00000103544,protein_coding -48066,SLC6A2,ENSG00000103546,protein_coding -47559,RNF40,ENSG00000103549,protein_coding -47167,KNOP1,ENSG00000103550,protein_coding -45292,AQP9,ENSG00000103569,protein_coding -45532,AAGAB,ENSG00000103591,protein_coding -45534,IQCH,ENSG00000103599,protein_coding -45411,LACTB,ENSG00000103642,protein_coding -45555,CORO2B,ENSG00000103647,protein_coding -45711,CSK,ENSG00000103653,protein_coding -45422,HERC1,ENSG00000103657,protein_coding -45437,TRIP4,ENSG00000103671,protein_coding -45460,MTFMT,ENSG00000103707,protein_coding -45464,RASL12,ENSG00000103710,protein_coding -45957,AP3B2,ENSG00000103723,protein_coding -45829,ACSBG1,ENSG00000103740,protein_coding -45480,IGDCC4,ENSG00000103742,protein_coding -45493,RAB11A,ENSG00000103769,protein_coding -45860,CTSH,ENSG00000103811,protein_coding -46346,TTC23,ENSG00000103852,protein_coding -45666,CD276,ENSG00000103855,protein_coding -45892,FAH,ENSG00000103876,protein_coding -45907,CEMIP,ENSG00000103888,protein_coding -44894,RPAP1,ENSG00000103932,protein_coding -45969,HOMER2,ENSG00000103942,protein_coding -44911,EHD4,ENSG00000103966,protein_coding -44922,TMEM87A,ENSG00000103978,protein_coding -44927,ZNF106,ENSG00000103994,protein_coding -45103,CEP152,ENSG00000103995,protein_coding -45130,ATP8B4,ENSG00000104043,protein_coding -44533,OCA2,ENSG00000104044,protein_coding -45128,DTWD1,ENSG00000104047,protein_coding -44956,TGM5,ENSG00000104055,protein_coding -44576,FAM189A1,ENSG00000104059,protein_coding -45137,GABPB1,ENSG00000104064,protein_coding -44585,TJP1,ENSG00000104067,protein_coding -44817,BMF,ENSG00000104081,protein_coding -45173,DMXL2,ENSG00000104093,protein_coding -45175,SCG3,ENSG00000104112,protein_coding -44860,DNAJC17,ENSG00000104129,protein_coding -45005,EIF3J,ENSG00000104131,protein_coding -45007,SPG11,ENSG00000104133,protein_coding -44866,RHOV,ENSG00000104140,protein_coding -44868,VPS18,ENSG00000104142,protein_coding -44886,OIP5,ENSG00000104147,protein_coding -45054,SLC30A4,ENSG00000104154,protein_coding -45060,BLOC1S6,ENSG00000104164,protein_coding -45090,MYEF2,ENSG00000104177,protein_coding -28239,SGK3,ENSG00000104205,protein_coding -27396,PDGFRL,ENSG00000104213,protein_coding -28248,CSPP1,ENSG00000104218,protein_coding -27386,ZDHHC2,ENSG00000104219,protein_coding -27777,BRF2,ENSG00000104221,protein_coding -27587,TRIM35,ENSG00000104228,protein_coding -28477,ZFAND1,ENSG00000104231,protein_coding -28061,RP1,ENSG00000104237,protein_coding -28515,CA2,ENSG00000104267,protein_coding -27625,FZD3,ENSG00000104290,protein_coding -27630,INTS9,ENSG00000104299,protein_coding -28570,RIPK2,ENSG00000104312,protein_coding -28308,EYA1,ENSG00000104313,protein_coding -28575,NBN,ENSG00000104320,protein_coding -28316,TRPA1,ENSG00000104321,protein_coding -28696,CPQ,ENSG00000104324,protein_coding -28576,DECR1,ENSG00000104325,protein_coding -28577,CALB1,ENSG00000104327,protein_coding -28100,IMPAD1,ENSG00000104331,protein_coding -27850,SFRP1,ENSG00000104332,protein_coding -28706,LAPTM4B,ENSG00000104341,protein_coding -28349,UBE2W,ENSG00000104343,protein_coding -28718,POP1,ENSG00000104356,protein_coding -28719,NIPAL2,ENSG00000104361,protein_coding -27882,IKBKB,ENSG00000104365,protein_coding -27880,PLAT,ENSG00000104368,protein_coding -28365,JPH1,ENSG00000104369,protein_coding -27885,DKK4,ENSG00000104371,protein_coding -28721,STK3,ENSG00000104375,protein_coding -28366,GDAP1,ENSG00000104381,protein_coding -28146,RAB2A,ENSG00000104388,protein_coding -28905,EIF3E,ENSG00000104408,protein_coding -28908,EMC2,ENSG00000104412,protein_coding -28654,ESRP1,ENSG00000104413,protein_coding -29196,CCN4,ENSG00000104415,protein_coding -29197,NDRG1,ENSG00000104419,protein_coding -28407,ZC2HC1A,ENSG00000104427,protein_coding -28408,IL7,ENSG00000104432,protein_coding -28416,STMN2,ENSG00000104435,protein_coding -28217,ARMC1,ENSG00000104442,protein_coding -28956,TRPS1,ENSG00000104447,protein_coding -28758,SPAG1,ENSG00000104450,protein_coding -29262,CHRAC1,ENSG00000104472,protein_coding -28808,NCALD,ENSG00000104490,protein_coding -28480,SNX16,ENSG00000104497,protein_coding -29322,GML,ENSG00000104499,protein_coding -28817,UBR5,ENSG00000104517,protein_coding -29352,GSDMD,ENSG00000104518,protein_coding -29361,TSTA3,ENSG00000104522,protein_coding -29360,PYCR3,ENSG00000104524,protein_coding -29358,EEF1D,ENSG00000104529,protein_coding -29063,ANXA13,ENSG00000104537,protein_coding -29094,SQLE,ENSG00000104549,protein_coding -27434,SH2D4A,ENSG00000104611,protein_coding -27438,INTS10,ENSG00000104613,protein_coding -27199,ERI1,ENSG00000104626,protein_coding -27482,SLC39A14,ENSG00000104635,protein_coding -27259,MTMR9,ENSG00000104643,protein_coding -27660,LEPROTL1,ENSG00000104660,protein_coding -27664,DCTN6,ENSG00000104671,protein_coding -27520,R3HCC1,ENSG00000104679,protein_coding -27681,GSR,ENSG00000104687,protein_coding -27515,TNFRSF10A,ENSG00000104689,protein_coding -27682,UBXN8,ENSG00000104691,protein_coding -27684,PPP2CB,ENSG00000104695,protein_coding -27000,ERICH1,ENSG00000104714,protein_coding -27546,NEFM,ENSG00000104722,protein_coding -27370,TUSC3,ENSG00000104723,protein_coding -27021,ARHGEF10,ENSG00000104728,protein_coding -48887,KLHDC4,ENSG00000104731,protein_coding -27970,MCM4,ENSG00000104738,protein_coding -27831,ADAM2,ENSG00000104755,protein_coding -27559,KCTD9,ENSG00000104756,protein_coding -27405,FGL1,ENSG00000104760,protein_coding -27409,ASAH1,ENSG00000104763,protein_coding -27570,BNIP3L,ENSG00000104765,protein_coding -55452,MAN2B1,ENSG00000104774,protein_coding -56701,KCNN4,ENSG00000104783,protein_coding -56973,TULP2,ENSG00000104804,protein_coding -56974,NUCB1,ENSG00000104805,protein_coding -56977,DHDH,ENSG00000104808,protein_coding -56981,GYS1,ENSG00000104812,protein_coding -56424,MAP4K1,ENSG00000104814,protein_coding -56990,CGB2,ENSG00000104818,protein_coding -56435,ECH1,ENSG00000104823,protein_coding -56438,HNRNPL,ENSG00000104824,protein_coding -56443,NFKBIB,ENSG00000104825,protein_coding -56984,LHB,ENSG00000104826,protein_coding -56986,CGB3,ENSG00000104827,protein_coding -55109,TUBB4A,ENSG00000104833,protein_coding -56445,SARS2,ENSG00000104835,protein_coding -57003,KCNA7,ENSG00000104848,protein_coding -57005,SNRNP70,ENSG00000104852,protein_coding -56772,CLPTM1,ENSG00000104853,protein_coding -56773,RELB,ENSG00000104856,protein_coding -56774,CLASRP,ENSG00000104859,protein_coding -57006,LIN7B,ENSG00000104863,protein_coding -56780,PPP1R37,ENSG00000104866,protein_coding -57039,FCGRT,ENSG00000104870,protein_coding -57025,PIH1D1,ENSG00000104872,protein_coding -56787,CKM,ENSG00000104879,protein_coding -55145,ARHGEF18,ENSG00000104880,protein_coding -56791,PPP1R13L,ENSG00000104881,protein_coding -55146,PEX11G,ENSG00000104883,protein_coding -56790,ERCC2,ENSG00000104884,protein_coding -54895,DOT1L,ENSG00000104885,protein_coding -54897,PLEKHJ1,ENSG00000104886,protein_coding -57024,SLC17A7,ENSG00000104888,protein_coding -55476,RNASEH2A,ENSG00000104889,protein_coding -56789,KLC3,ENSG00000104892,protein_coding -57016,CD37,ENSG00000104894,protein_coding -54900,SF3A2,ENSG00000104897,protein_coding -54901,AMH,ENSG00000104899,protein_coding -57019,DKKL1,ENSG00000104901,protein_coding -55500,LYL1,ENSG00000104903,protein_coding -54904,OAZ1,ENSG00000104904,protein_coding -55501,TRMT1,ENSG00000104907,protein_coding -55505,STX10,ENSG00000104915,protein_coding -55163,RETN,ENSG00000104918,protein_coding -55167,FCER2,ENSG00000104921,protein_coding -56816,DMPK,ENSG00000104936,protein_coding -55171,CLEC4M,ENSG00000104938,protein_coding -56819,RSPH6A,ENSG00000104941,protein_coding -57071,TBC1D17,ENSG00000104946,protein_coding -57073,IL4I1,ENSG00000104951,protein_coding -54944,TLE6,ENSG00000104953,protein_coding -55511,CCDC130,ENSG00000104957,protein_coding -57065,PTOV1,ENSG00000104960,protein_coding -54946,TLE5,ENSG00000104964,protein_coding -56826,NOVA2,ENSG00000104967,protein_coding -54931,SGTA,ENSG00000104969,protein_coding -57450,LILRB1,ENSG00000104972,protein_coding -57062,MED25,ENSG00000104973,protein_coding -57448,LILRA1,ENSG00000104974,protein_coding -55186,SNAPC2,ENSG00000104976,protein_coding -55514,C19orf53,ENSG00000104979,protein_coding -55189,TIMM44,ENSG00000104980,protein_coding -56827,CCDC61,ENSG00000104983,protein_coding -55533,IL27RA,ENSG00000104998,protein_coding -55542,ASF1B,ENSG00000105011,protein_coding -57487,TNNT1,ENSG00000105048,protein_coding -57081,VRK3,ENSG00000105053,protein_coding -55652,FAM32A,ENSG00000105058,protein_coding -57499,PPP6R1,ENSG00000105063,protein_coding -55666,C19orf44,ENSG00000105072,protein_coding -55674,MED26,ENSG00000105085,protein_coding -55283,OLFM2,ENSG00000105088,protein_coding -55607,RASAL3,ENSG00000105122,protein_coding -55600,AKAP8,ENSG00000105127,protein_coding -55596,EPHX3,ENSG00000105131,protein_coding -55589,ILVBL,ENSG00000105135,protein_coding -57643,ZNF419,ENSG00000105136,protein_coding -55588,SYDE1,ENSG00000105137,protein_coding -55586,CASP14,ENSG00000105141,protein_coding -55584,SLC1A6,ENSG00000105143,protein_coding -57618,AURKC,ENSG00000105146,protein_coding -56097,POP4,ENSG00000105171,protein_coding -56101,CCNE1,ENSG00000105173,protein_coding -56106,URI1,ENSG00000105176,protein_coding -56141,PDCD5,ENSG00000105185,protein_coding -56142,ANKRD27,ENSG00000105186,protein_coding -56476,RPS16,ENSG00000105193,protein_coding -56478,TIMM50,ENSG00000105197,protein_coding -56490,LGALS13,ENSG00000105198,protein_coding -56506,FBL,ENSG00000105202,protein_coding -56504,DYRK1B,ENSG00000105204,protein_coding -56501,CLC,ENSG00000105205,protein_coding -56519,CNTD2,ENSG00000105219,protein_coding -56194,GPI,ENSG00000105220,protein_coding -56520,AKT2,ENSG00000105221,protein_coding -56527,PLD3,ENSG00000105223,protein_coding -56531,PRX,ENSG00000105227,protein_coding -54996,PIAS4,ENSG00000105229,protein_coding -56541,NUMBL,ENSG00000105245,protein_coding -55004,EBI3,ENSG00000105246,protein_coding -55006,YJU2,ENSG00000105248,protein_coding -55007,SHD,ENSG00000105251,protein_coding -56322,TBCB,ENSG00000105254,protein_coding -55009,FSD1,ENSG00000105255,protein_coding -56321,POLR2I,ENSG00000105258,protein_coding -56320,OVOL3,ENSG00000105261,protein_coding -56316,CLIP3,ENSG00000105270,protein_coding -54987,ZFR2,ENSG00000105278,protein_coding -56875,SLC1A5,ENSG00000105281,protein_coding -56868,PRKD2,ENSG00000105287,protein_coding -54979,TJP3,ENSG00000105289,protein_coding -56301,APLP1,ENSG00000105290,protein_coding -54976,CACTIN,ENSG00000105298,protein_coding -56891,CCDC9,ENSG00000105321,protein_coding -56576,HNRNPUL1,ENSG00000105323,protein_coding -54964,FZR1,ENSG00000105325,protein_coding -56887,BBC3,ENSG00000105327,protein_coding -56578,TGFB1,ENSG00000105329,protein_coding -29272,DENND3,ENSG00000105339,protein_coding -56590,DMAC2,ENSG00000105341,protein_coding -56603,CEACAM4,ENSG00000105352,protein_coding -55046,PLIN3,ENSG00000105355,protein_coding -57090,MYH14,ENSG00000105357,protein_coding -55301,MRPL4,ENSG00000105364,protein_coding -57187,SIGLEC8,ENSG00000105366,protein_coding -56619,CD79A,ENSG00000105369,protein_coding -57178,LIM2,ENSG00000105370,protein_coding -55306,ICAM4,ENSG00000105371,protein_coding -56617,RPS19,ENSG00000105372,protein_coding -56911,NOP53,ENSG00000105373,protein_coding -57177,NKG7,ENSG00000105374,protein_coding -55307,ICAM5,ENSG00000105376,protein_coding -57172,ETFB,ENSG00000105379,protein_coding -57164,CD33,ENSG00000105383,protein_coding -56608,CEACAM5,ENSG00000105388,protein_coding -56918,CRX,ENSG00000105392,protein_coding -55707,BABAM1,ENSG00000105393,protein_coding -55315,TYK2,ENSG00000105397,protein_coding -56921,SULT2A1,ENSG00000105398,protein_coding -55316,CDC37,ENSG00000105401,protein_coding -56901,NAPA,ENSG00000105402,protein_coding -56622,RABAC1,ENSG00000105404,protein_coding -56624,ATP1A3,ENSG00000105409,protein_coding -56896,MEIS3,ENSG00000105419,protein_coding -55051,PTPRS,ENSG00000105426,protein_coding -56643,CNFN,ENSG00000105427,protein_coding -55055,ZNRF4,ENSG00000105428,protein_coding -56641,MEGF8,ENSG00000105429,protein_coding -56941,KDELR1,ENSG00000105438,protein_coding -56947,CYTH2,ENSG00000105443,protein_coding -56943,GRWD1,ENSG00000105447,protein_coding -56942,GRIN2D,ENSG00000105464,protein_coding -56940,SYNGR4,ENSG00000105467,protein_coding -57113,CLEC11A,ENSG00000105472,protein_coding -56937,CCDC114,ENSG00000105479,protein_coding -56931,CARD8,ENSG00000105483,protein_coding -56927,LIG1,ENSG00000105486,protein_coding -57193,SIGLEC6,ENSG00000105492,protein_coding -57197,ZNF175,ENSG00000105497,protein_coding -56925,PLA2G4C,ENSG00000105499,protein_coding -57200,SIGLEC5,ENSG00000105501,protein_coding -56924,CABP5,ENSG00000105507,protein_coding -57211,HAS1,ENSG00000105509,protein_coding -55359,RAB3D,ENSG00000105514,protein_coding -56957,DBP,ENSG00000105516,protein_coding -55361,TMEM205,ENSG00000105518,protein_coding -55078,CAPS,ENSG00000105519,protein_coding -55363,PLPPR2,ENSG00000105520,protein_coding -56952,FAM83E,ENSG00000105523,protein_coding -56963,RASIP1,ENSG00000105538,protein_coding -54774,THEG,ENSG00000105549,protein_coding -56966,FGF21,ENSG00000105550,protein_coding -56968,BCAT2,ENSG00000105552,protein_coding -54772,MIER2,ENSG00000105556,protein_coding -56971,PLEKHA4,ENSG00000105559,protein_coding -57239,PPP2R1A,ENSG00000105568,protein_coding -55463,TNPO2,ENSG00000105576,protein_coding -55456,WDR83OS,ENSG00000105583,protein_coding -57391,CACNG7,ENSG00000105605,protein_coding -55484,GCDH,ENSG00000105607,protein_coding -57416,LILRB5,ENSG00000105609,protein_coding -55483,KLF1,ENSG00000105610,protein_coding -55481,DNASE2,ENSG00000105612,protein_coding -55479,MAST1,ENSG00000105613,protein_coding -57404,LENG1,ENSG00000105617,protein_coding -57401,PRPF31,ENSG00000105618,protein_coding -57400,TFPT,ENSG00000105619,protein_coding -55744,JAK3,ENSG00000105639,protein_coding -55745,RPL18A,ENSG00000105640,protein_coding -55748,SLC5A5,ENSG00000105641,protein_coding -55750,KCNN1,ENSG00000105642,protein_coding -55752,ARRDC2,ENSG00000105643,protein_coding -55761,PIK3R2,ENSG00000105647,protein_coding -55764,RAB3A,ENSG00000105649,protein_coding -55767,AC005759.1,ENSG00000105650,protein_coding -55780,ISYNA1,ENSG00000105655,protein_coding -55782,ELL,ENSG00000105656,protein_coding -55797,CRTC1,ENSG00000105662,protein_coding -55798,COMP,ENSG00000105664,protein_coding -56280,UPK1A,ENSG00000105668,protein_coding -55803,COPE,ENSG00000105669,protein_coding -55805,DDX49,ENSG00000105671,protein_coding -56278,ETV2,ENSG00000105672,protein_coding -56271,ATP4A,ENSG00000105675,protein_coding -55810,ARMC6,ENSG00000105676,protein_coding -56270,TMEM147,ENSG00000105677,protein_coding -56268,GAPDHS,ENSG00000105679,protein_coding -56251,MAG,ENSG00000105695,protein_coding -55794,AC003112.1,ENSG00000105696,protein_coding -56250,HAMP,ENSG00000105697,protein_coding -56249,USF2,ENSG00000105698,protein_coding -56246,LSR,ENSG00000105699,protein_coding -55788,KXD1,ENSG00000105700,protein_coding -55786,FKBP8,ENSG00000105701,protein_coding -55824,AC004475.1,ENSG00000105705,protein_coding -56232,HPN,ENSG00000105707,protein_coding -55843,ZNF14,ENSG00000105708,protein_coding -56231,SCN1B,ENSG00000105711,protein_coding -55834,PBX4,ENSG00000105717,protein_coding -56636,ERF,ENSG00000105722,protein_coding -56633,GSK3A,ENSG00000105723,protein_coding -55839,ATP13A1,ENSG00000105726,protein_coding -56626,ZNF574,ENSG00000105732,protein_coding -56625,GRIK5,ENSG00000105737,protein_coding -56398,SIPA1L3,ENSG00000105738,protein_coding -55903,ZNF85,ENSG00000105750,protein_coding -56684,ETHE1,ENSG00000105755,protein_coding -56695,CADM4,ENSG00000105767,protein_coding -56700,SMG9,ENSG00000105771,protein_coding -22113,AVL9,ENSG00000105778,protein_coding -23116,RUNDC3B,ENSG00000105784,protein_coding -23142,CFAP69,ENSG00000105792,protein_coding -23147,GTPBP10,ENSG00000105793,protein_coding -23474,RASA4,ENSG00000105808,protein_coding -23184,CDK6,ENSG00000105810,protein_coding -23499,PMPCB,ENSG00000105819,protein_coding -23500,DNAJC2,ENSG00000105821,protein_coding -23202,TFPI2,ENSG00000105825,protein_coding -23207,BET1,ENSG00000105829,protein_coding -23541,NAMPT,ENSG00000105835,protein_coding -21857,TWISTNB,ENSG00000105849,protein_coding -23551,PIK3CG,ENSG00000105851,protein_coding -23232,PON3,ENSG00000105852,protein_coding -23233,PON2,ENSG00000105854,protein_coding -21869,ITGB8,ENSG00000105855,protein_coding -23554,HBP1,ENSG00000105856,protein_coding -23559,DUS4L,ENSG00000105865,protein_coding -21884,SP4,ENSG00000105866,protein_coding -23976,WDR91,ENSG00000105875,protein_coding -21886,DNAH11,ENSG00000105877,protein_coding -23568,CBLL1,ENSG00000105879,protein_coding -23257,DLX5,ENSG00000105880,protein_coding -23992,MTPN,ENSG00000105887,protein_coding -21890,STEAP1B,ENSG00000105889,protein_coding -24006,PTN,ENSG00000105894,protein_coding -21937,MPP6,ENSG00000105926,protein_coding -21940,GSDME,ENSG00000105928,protein_coding -24035,ATP6V0A4,ENSG00000105929,protein_coding -24040,ZC3HAV1,ENSG00000105939,protein_coding -24042,TTC26,ENSG00000105948,protein_coding -22341,OGDH,ENSG00000105953,protein_coding -21948,NPVF,ENSG00000105954,protein_coding -21573,ADAP1,ENSG00000105963,protein_coding -23646,TFEC,ENSG00000105967,protein_coding -22345,H2AFV,ENSG00000105968,protein_coding -23652,CAV2,ENSG00000105971,protein_coding -23654,CAV1,ENSG00000105974,protein_coding -23658,MET,ENSG00000105976,protein_coding -24515,RNF32,ENSG00000105982,protein_coding -24517,LMBR1,ENSG00000105983,protein_coding -23685,WNT2,ENSG00000105989,protein_coding -21980,HOXA1,ENSG00000105991,protein_coding -24532,DNAJB6,ENSG00000105993,protein_coding -21987,HOXA2,ENSG00000105996,protein_coding -21988,HOXA3,ENSG00000105997,protein_coding -21617,LFNG,ENSG00000106003,protein_coding -21993,HOXA5,ENSG00000106004,protein_coding -21994,HOXA6,ENSG00000106006,protein_coding -21619,BRAT1,ENSG00000106009,protein_coding -21620,IQCE,ENSG00000106012,protein_coding -23699,ANKRD7,ENSG00000106013,protein_coding -24556,VIPR2,ENSG00000106018,protein_coding -23711,TSPAN12,ENSG00000106025,protein_coding -24102,SSBP1,ENSG00000106028,protein_coding -22012,HOXA13,ENSG00000106031,protein_coding -23714,CPED1,ENSG00000106034,protein_coding -22021,EVX1,ENSG00000106038,protein_coding -22030,HIBADH,ENSG00000106049,protein_coding -22033,TAX1BP1,ENSG00000106052,protein_coding -22045,CPVL,ENSG00000106066,protein_coding -22049,CHN2,ENSG00000106069,protein_coding -22432,GRB10,ENSG00000106070,protein_coding -22908,ABHD11,ENSG00000106077,protein_coding -22438,COBL,ENSG00000106078,protein_coding -22064,FKBP14,ENSG00000106080,protein_coding -22065,PLEKHA8,ENSG00000106086,protein_coding -22904,STX1A,ENSG00000106089,protein_coding -22080,NOD1,ENSG00000106100,protein_coding -22089,GARS,ENSG00000106105,protein_coding -22090,CRHR2,ENSG00000106113,protein_coding -24210,EPHB6,ENSG00000106123,protein_coding -22094,MINDY4,ENSG00000106125,protein_coding -22098,GHRHR,ENSG00000106128,protein_coding -24229,CASP2,ENSG00000106144,protein_coding -22519,CHCHD2,ENSG00000106153,protein_coding -22968,CCL24,ENSG00000106178,protein_coding -22986,HSPB1,ENSG00000106211,protein_coding -23290,NPTX2,ENSG00000106236,protein_coding -23305,PDAP1,ENSG00000106244,protein_coding -23306,BUD31,ENSG00000106245,protein_coding -23307,PTCD1,ENSG00000106246,protein_coding -23321,CYP3A5,ENSG00000106258,protein_coding -23342,ZKSCAN1,ENSG00000106261,protein_coding -21612,EIF3B,ENSG00000106263,protein_coding -21609,SNX8,ENSG00000106266,protein_coding -21608,NUDT1,ENSG00000106268,protein_coding -23727,PTPRZ1,ENSG00000106278,protein_coding -23351,TAF6,ENSG00000106290,protein_coding -23753,WASL,ENSG00000106299,protein_coding -23757,HYAL4,ENSG00000106302,protein_coding -23758,SPAM1,ENSG00000106304,protein_coding -21682,AIMP2,ENSG00000106305,protein_coding -23395,TFR2,ENSG00000106327,protein_coding -23798,FSCN3,ENSG00000106328,protein_coding -23394,MOSPD3,ENSG00000106330,protein_coding -23799,PAX4,ENSG00000106331,protein_coding -23393,PCOLCE,ENSG00000106333,protein_coding -23391,FBXO24,ENSG00000106336,protein_coding -22106,PPP1R17,ENSG00000106341,protein_coding -23809,RBM28,ENSG00000106344,protein_coding -21689,USP42,ENSG00000106346,protein_coding -23813,IMPDH1,ENSG00000106348,protein_coding -23386,AGFG2,ENSG00000106351,protein_coding -22112,LSM5,ENSG00000106355,protein_coding -23420,SERPINE1,ENSG00000106366,protein_coding -23421,AP1S1,ENSG00000106367,protein_coding -23425,MOGAT3,ENSG00000106384,protein_coding -21724,C1GALT1,ENSG00000106392,protein_coding -23428,PLOD3,ENSG00000106397,protein_coding -21733,RPA3,ENSG00000106399,protein_coding -23429,ZNHIT1,ENSG00000106400,protein_coding -23430,CLDN15,ENSG00000106404,protein_coding -24291,NOBOX,ENSG00000106410,protein_coding -21742,GLCCI1,ENSG00000106415,protein_coding -23441,MYL10,ENSG00000106436,protein_coding -21770,PHF14,ENSG00000106443,protein_coding -23874,NRF1,ENSG00000106459,protein_coding -21777,TMEM106B,ENSG00000106460,protein_coding -24328,EZH2,ENSG00000106462,protein_coding -23896,CEP41,ENSG00000106477,protein_coding -24361,ZNF862,ENSG00000106479,protein_coding -22211,SFRP4,ENSG00000106483,protein_coding -23899,MEST,ENSG00000106484,protein_coding -21805,MEOX2,ENSG00000106511,protein_coding -21820,ANKMY2,ENSG00000106524,protein_coding -24374,ACTR3C,ENSG00000106526,protein_coding -22242,POU6F2,ENSG00000106536,protein_coding -21824,TSPAN13,ENSG00000106537,protein_coding -24378,RARRES2,ENSG00000106538,protein_coding -21825,AGR2,ENSG00000106541,protein_coding -21830,AHR,ENSG00000106546,protein_coding -23946,CHCHD3,ENSG00000106554,protein_coding -24396,GIMAP2,ENSG00000106560,protein_coding -24402,TMEM176B,ENSG00000106565,protein_coding -22277,GLI3,ENSG00000106571,protein_coding -22285,PSMA2,ENSG00000106588,protein_coding -22287,MRPL32,ENSG00000106591,protein_coding -22299,COA1,ENSG00000106603,protein_coding -22300,BLVRA,ENSG00000106605,protein_coding -22304,URGCP,ENSG00000106608,protein_coding -22809,TMEM248,ENSG00000106609,protein_coding -24435,RHEB,ENSG00000106615,protein_coding -24437,PRKAG2,ENSG00000106617,protein_coding -22323,AEBP1,ENSG00000106624,protein_coding -22325,POLD2,ENSG00000106628,protein_coding -22327,MYL7,ENSG00000106631,protein_coding -22328,GCK,ENSG00000106633,protein_coding -22897,BCL7B,ENSG00000106635,protein_coding -22329,YKT6,ENSG00000106636,protein_coding -22898,TBL2,ENSG00000106638,protein_coding -24443,GALNTL5,ENSG00000106648,protein_coding -22922,CLIP2,ENSG00000106665,protein_coding -22917,EIF4H,ENSG00000106682,protein_coding -22916,LIMK1,ENSG00000106683,protein_coding -29525,SPATA6L,ENSG00000106686,protein_coding -29524,SLC1A1,ENSG00000106688,protein_coding -31307,LHX2,ENSG00000106689,protein_coding -31021,FKTN,ENSG00000106692,protein_coding -31018,FSD1L,ENSG00000106701,protein_coding -30123,CNTNAP3,ENSG00000106714,protein_coding -30678,SPIN1,ENSG00000106723,protein_coding -30500,NMRK1,ENSG00000106733,protein_coding -31071,TMEM245,ENSG00000106771,protein_coding -30514,PRUNE2,ENSG00000106772,protein_coding -31229,MEGF9,ENSG00000106780,protein_coding -30912,TRIM14,ENSG00000106785,protein_coding -30914,CORO2A,ENSG00000106789,protein_coding -30927,TGFBR1,ENSG00000106799,protein_coding -30932,SEC61B,ENSG00000106803,protein_coding -31238,C5,ENSG00000106804,protein_coding -30753,OGN,ENSG00000106809,protein_coding -30755,ASPN,ENSG00000106819,protein_coding -30756,ECM2,ENSG00000106823,protein_coding -30545,TLE4,ENSG00000106829,protein_coding -31259,LHX6,ENSG00000106852,protein_coding -31105,PTGR1,ENSG00000106853,protein_coding -31118,SUSD1,ENSG00000106868,protein_coding -31169,AMBP,ENSG00000106927,protein_coding -31175,AKNA,ENSG00000106948,protein_coding -31190,TNFSF8,ENSG00000106952,protein_coding -31414,DNM1,ENSG00000106976,protein_coding -31391,ENG,ENSG00000106991,protein_coding -31395,AK1,ENSG00000106992,protein_coding -29529,CDC37L1,ENSG00000106993,protein_coding -29555,RLN2,ENSG00000107014,protein_coding -29557,RLN1,ENSG00000107018,protein_coding -29560,PLGRKT,ENSG00000107020,protein_coding -31450,TBC1D13,ENSG00000107021,protein_coding -29566,RIC1,ENSG00000107036,protein_coding -29593,KDM4C,ENSG00000107077,protein_coding -29476,DOCK8,ENSG00000107099,protein_coding -29479,KANK1,ENSG00000107104,protein_coding -29798,ELAVL2,ENSG00000107105,protein_coding -31506,NCS1,ENSG00000107130,protein_coding -30006,TESK1,ENSG00000107140,protein_coding -31650,KCNT1,ENSG00000107147,protein_coding -30017,CA9,ENSG00000107159,protein_coding -31510,FUBP3,ENSG00000107164,protein_coding -29629,TYRP1,ENSG00000107165,protein_coding -30021,CREB3,ENSG00000107175,protein_coding -30024,RGP1,ENSG00000107185,protein_coding -29637,MPDZ,ENSG00000107186,protein_coding -31658,LHX3,ENSG00000107187,protein_coding -29868,DDX58,ENSG00000107201,protein_coding -31709,EDF1,ENSG00000107223,protein_coding -30420,PIP5K1B,ENSG00000107242,protein_coding -29518,GLIS3,ENSG00000107249,protein_coding -29893,BAG1,ENSG00000107262,protein_coding -31540,RAPGEF1,ENSG00000107263,protein_coding -31727,NPDC1,ENSG00000107281,protein_coding -30432,APBA1,ENSG00000107282,protein_coding -31547,SETX,ENSG00000107290,protein_coding -29691,SH3GL2,ENSG00000107295,protein_coding -31718,PTGDS,ENSG00000107317,protein_coding -31723,ABCA2,ENSG00000107331,protein_coding -30096,SHB,ENSG00000107338,protein_coding -29928,UBE2R2,ENSG00000107341,protein_coding -30458,ABHD17B,ENSG00000107362,protein_coding -30090,EXOSC3,ENSG00000107371,protein_coding -30470,ZFAND5,ENSG00000107372,protein_coding -101,DVL1,ENSG00000107404,protein_coding -36795,PDLIM1,ENSG00000107438,protein_coding -36806,CCNJ,ENSG00000107443,protein_coding -36814,DNTT,ENSG00000107447,protein_coding -35307,GATA3,ENSG00000107485,protein_coding -37161,ATRNL1,ENSG00000107518,protein_coding -36866,HPS1,ENSG00000107521,protein_coding -35381,PHYH,ENSG00000107537,protein_coding -35907,RASSF4,ENSG00000107551,protein_coding -36889,DNMBP,ENSG00000107554,protein_coding -37198,RAB11FIP2,ENSG00000107560,protein_coding -35896,CXCL12,ENSG00000107562,protein_coding -36895,ERLIN1,ENSG00000107566,protein_coding -37213,EIF3A,ENSG00000107581,protein_coding -36904,PKD2L1,ENSG00000107593,protein_coding -35444,CUBN,ENSG00000107611,protein_coding -35446,TRDMT1,ENSG00000107614,protein_coding -36237,DDX50,ENSG00000107625,protein_coding -36006,MAPK8,ENSG00000107643,protein_coding -37237,SEC23IP,ENSG00000107651,protein_coding -37259,ATE1,ENSG00000107669,protein_coding -37264,NSMCE4A,ENSG00000107672,protein_coding -37271,PLEKHA1,ENSG00000107679,protein_coding -36283,PALD1,ENSG00000107719,protein_coding -36294,UNC5B,ENSG00000107731,protein_coding -36298,CDH23,ENSG00000107736,protein_coding -36301,VSIR,ENSG00000107738,protein_coding -36308,SPOCK2,ENSG00000107742,protein_coding -36317,MICU1,ENSG00000107745,protein_coding -36359,PPP3CB,ENSG00000107758,protein_coding -36565,CCSER2,ENSG00000107771,protein_coding -36595,BMPR1A,ENSG00000107779,protein_coding -36629,MINPP1,ENSG00000107789,protein_coding -36660,ACTA2,ENSG00000107796,protein_coding -36669,LIPA,ENSG00000107798,protein_coding -36931,TLX1,ENSG00000107807,protein_coding -36924,TWNK,ENSG00000107815,protein_coding -36925,LZTS2,ENSG00000107816,protein_coding -36927,SFXN3,ENSG00000107819,protein_coding -36929,KAZALD1,ENSG00000107821,protein_coding -36946,FBXW4,ENSG00000107829,protein_coding -36950,FGF8,ENSG00000107831,protein_coding -36951,NPM3,ENSG00000107833,protein_coding -36718,TNKS2,ENSG00000107854,protein_coding -36965,PITX3,ENSG00000107859,protein_coding -36966,GBF1,ENSG00000107862,protein_coding -35561,ARHGAP21,ENSG00000107863,protein_coding -36724,CPEB3,ENSG00000107864,protein_coding -36969,FBXL15,ENSG00000107872,protein_coding -36970,CUEDC2,ENSG00000107874,protein_coding -36977,SUFU,ENSG00000107882,protein_coding -35600,ANKRD26,ENSG00000107890,protein_coding -35605,ACBD5,ENSG00000107897,protein_coding -37310,LHPP,ENSG00000107902,protein_coding -35165,LARP4B,ENSG00000107929,protein_coding -35168,GTPBP4,ENSG00000107937,protein_coding -37337,EDRF1,ENSG00000107938,protein_coding -37342,BCCIP,ENSG00000107949,protein_coding -35664,MTPAP,ENSG00000107951,protein_coding -37017,NEURL1,ENSG00000107954,protein_coding -37020,SH3PXD2A,ENSG00000107957,protein_coding -35198,PITRM1,ENSG00000107959,protein_coding -37025,STN1,ENSG00000107960,protein_coding -35673,MAP3K8,ENSG00000107968,protein_coding -36078,DKK1,ENSG00000107984,protein_coding -37383,EBF3,ENSG00000108001,protein_coding -37392,GLRX3,ENSG00000108010,protein_coding -37054,SORCS1,ENSG00000108018,protein_coding -35259,TASOR2,ENSG00000108021,protein_coding -37071,XPNPEP1,ENSG00000108039,protein_coding -37084,SMC3,ENSG00000108055,protein_coding -37096,SHOC2,ENSG00000108061,protein_coding -36116,TFAM,ENSG00000108064,protein_coding -36131,CCDC6,ENSG00000108091,protein_coding -35749,CUL2,ENSG00000108094,protein_coding -35758,CCNY,ENSG00000108100,protein_coding -57518,UBE2S,ENSG00000108106,protein_coding -57516,RPL28,ENSG00000108107,protein_coding -36475,ZMIZ1,ENSG00000108175,protein_coding -36198,DNAJC12,ENSG00000108176,protein_coding -36476,PPIF,ENSG00000108179,protein_coding -36215,PBLD,ENSG00000108187,protein_coding -36532,TSPAN14,ENSG00000108219,protein_coding -36759,LGI1,ENSG00000108231,protein_coding -36774,TBC1D12,ENSG00000108239,protein_coding -36778,CYP2C18,ENSG00000108242,protein_coding -50553,KRT23,ENSG00000108244,protein_coding -50059,CRYBA1,ENSG00000108255,protein_coding -50060,NUFIP2,ENSG00000108256,protein_coding -50073,GIT1,ENSG00000108262,protein_coding -50472,RPL19,ENSG00000108298,protein_coding -50475,FBXL20,ENSG00000108306,protein_coding -50796,RUNDC3A,ENSG00000108309,protein_coding -50786,UBTF,ENSG00000108312,protein_coding -50503,CSF3,ENSG00000108342,protein_coding -50501,PSMD3,ENSG00000108344,protein_coding -50511,CASC3,ENSG00000108349,protein_coding -50513,RAPGEFL1,ENSG00000108352,protein_coding -51475,RGS9,ENSG00000108370,protein_coding -51229,RNF43,ENSG00000108375,protein_coding -50900,WNT3,ENSG00000108379,protein_coding -49133,ASPA,ENSG00000108381,protein_coding -51243,RAD51C,ENSG00000108384,protein_coding -51234,SEPT4,ENSG00000108387,protein_coding -51232,MTMR4,ENSG00000108389,protein_coding -51251,TRIM37,ENSG00000108395,protein_coding -49154,P2RX1,ENSG00000108405,protein_coding -51272,DHX40,ENSG00000108406,protein_coding -50605,KRT37,ENSG00000108417,protein_coding -51280,TUBD1,ENSG00000108423,protein_coding -50928,KPNB1,ENSG00000108424,protein_coding -50905,GOSR2,ENSG00000108433,protein_coding -50943,PNPO,ENSG00000108439,protein_coding -51282,RPS6KB1,ENSG00000108443,protein_coding -49721,TRIM16L,ENSG00000108448,protein_coding -50947,CDK5RAP3,ENSG00000108465,protein_coding -50955,CBX1,ENSG00000108468,protein_coding -51702,RECQL5,ENSG00000108469,protein_coding -49586,PIGL,ENSG00000108474,protein_coding -51709,GALK1,ENSG00000108479,protein_coding -51341,INTS2,ENSG00000108506,protein_coding -49207,CAMTA2,ENSG00000108509,protein_coding -51343,MED13,ENSG00000108510,protein_coding -50977,HOXB6,ENSG00000108511,protein_coding -49205,ENO3,ENSG00000108515,protein_coding -49204,PFN1,ENSG00000108518,protein_coding -49203,RNF167,ENSG00000108523,protein_coding -49202,SLC25A11,ENSG00000108528,protein_coding -49652,RASD1,ENSG00000108551,protein_coding -49199,CHRNE,ENSG00000108556,protein_coding -49662,RAI1,ENSG00000108557,protein_coding -49230,NUP88,ENSG00000108559,protein_coding -49234,C1QBP,ENSG00000108561,protein_coding -50102,SLC6A4,ENSG00000108576,protein_coding -50106,BLMH,ENSG00000108578,protein_coding -50111,CPD,ENSG00000108582,protein_coding -50112,GOSR1,ENSG00000108587,protein_coding -51407,CCDC47,ENSG00000108588,protein_coding -49260,MED31,ENSG00000108590,protein_coding -49677,DRG2,ENSG00000108591,protein_coding -51409,FTSJ3,ENSG00000108592,protein_coding -49801,AKAP10,ENSG00000108599,protein_coding -49794,ALDH3A1,ENSG00000108602,protein_coding -51411,SMARCD2,ENSG00000108604,protein_coding -51426,ICAM2,ENSG00000108622,protein_coding -51821,SYNGR2,ENSG00000108639,protein_coding -49761,B9D1,ENSG00000108641,protein_coding -50182,UTP6,ENSG00000108651,protein_coding -51441,DDX5,ENSG00000108654,protein_coding -50206,C17orf75,ENSG00000108666,protein_coding -51842,CYTH1,ENSG00000108669,protein_coding -50217,PSMD11,ENSG00000108671,protein_coding -51853,LGALS3BP,ENSG00000108679,protein_coding -50234,ASIC2,ENSG00000108684,protein_coding -50259,CCL7,ENSG00000108688,protein_coding -50258,CCL2,ENSG00000108691,protein_coding -50261,CCL8,ENSG00000108700,protein_coding -50264,CCL1,ENSG00000108702,protein_coding -50320,PEX12,ENSG00000108733,protein_coding -50608,KRT32,ENSG00000108759,protein_coding -50644,DHX58,ENSG00000108771,protein_coding -50645,KAT2A,ENSG00000108773,protein_coding -50648,RAB5C,ENSG00000108774,protein_coding -50668,NAGLU,ENSG00000108784,protein_coding -50671,HSD17B1,ENSG00000108786,protein_coding -50674,MLX,ENSG00000108788,protein_coding -50686,CNTNAP1,ENSG00000108797,protein_coding -51019,ABI3,ENSG00000108798,protein_coding -50688,EZH1,ENSG00000108799,protein_coding -51052,DLX4,ENSG00000108813,protein_coding -51062,PPP1R9B,ENSG00000108819,protein_coding -51068,COL1A1,ENSG00000108821,protein_coding -51066,SGCA,ENSG00000108823,protein_coding -50710,PTGES3L-AARSD1,ENSG00000108825,protein_coding -51076,MRPL27,ENSG00000108826,protein_coding -50715,VAT1,ENSG00000108828,protein_coding -51078,LRRC59,ENSG00000108829,protein_coding -50716,RND2,ENSG00000108830,protein_coding -49279,ALOX12,ENSG00000108839,protein_coding -50777,HDAC5,ENSG00000108840,protein_coding -51095,ABCC3,ENSG00000108846,protein_coding -51099,LUC7L3,ENSG00000108848,protein_coding -50770,PPY,ENSG00000108849,protein_coding -50764,MPP2,ENSG00000108852,protein_coding -51445,SMURF2,ENSG00000108854,protein_coding -50759,DUSP3,ENSG00000108861,protein_coding -51499,CACNG1,ENSG00000108878,protein_coding -50822,EFTUD2,ENSG00000108883,protein_coding -51151,HLF,ENSG00000108924,protein_coding -51545,SLC16A6,ENSG00000108932,protein_coding -51549,PRKAR1A,ENSG00000108946,protein_coding -49361,EFNB3,ENSG00000108947,protein_coding -51550,FAM20A,ENSG00000108950,protein_coding -49033,YWHAE,ENSG00000108953,protein_coding -51154,MMD,ENSG00000108960,protein_coding -49404,RANGRF,ENSG00000108961,protein_coding -49061,DPH1,ENSG00000108963,protein_coding -51568,MAP2K6,ENSG00000108984,protein_coding -49868,DHRS7B,ENSG00000109016,protein_coding -49935,WSB1,ENSG00000109046,protein_coding -49449,RCVRN,ENSG00000109047,protein_coding -49459,MYH1,ENSG00000109061,protein_coding -51653,SLC9A3R1,ENSG00000109062,protein_coding -49462,MYH3,ENSG00000109063,protein_coding -51654,NAT9,ENSG00000109065,protein_coding -51655,TMEM104,ENSG00000109066,protein_coding -49991,VTN,ENSG00000109072,protein_coding -49982,TNFAIP1,ENSG00000109079,protein_coding -49981,IFT20,ENSG00000109083,protein_coding -49980,TMEM97,ENSG00000109084,protein_coding -51664,CDR2L,ENSG00000109089,protein_coding -49533,PMP22,ENSG00000109099,protein_coding -50001,FOXN1,ENSG00000109101,protein_coding -50002,UNC119,ENSG00000109103,protein_coding -50004,ALDOC,ENSG00000109107,protein_coding -50015,SUPT6H,ENSG00000109111,protein_coding -50018,RAB34,ENSG00000109113,protein_coding -50046,PHF12,ENSG00000109118,protein_coding -13484,PHOX2B,ENSG00000109132,protein_coding -13492,TMEM33,ENSG00000109133,protein_coding -13542,GABRA4,ENSG00000109158,protein_coding -13799,GNRHR,ENSG00000109163,protein_coding -13564,SLAIN2,ENSG00000109171,protein_coding -13569,OCIAD1,ENSG00000109180,protein_coding -13829,UGT2B10,ENSG00000109181,protein_coding -13574,CWH43,ENSG00000109182,protein_coding -13595,DCUN1D4,ENSG00000109184,protein_coding -13606,USP46,ENSG00000109189,protein_coding -13872,SULT1E1,ENSG00000109193,protein_coding -13881,ODAM,ENSG00000109205,protein_coding -13885,SMR3A,ENSG00000109208,protein_coding -13633,CHIC2,ENSG00000109220,protein_coding -13670,NMU,ENSG00000109255,protein_coding -13678,KIAA1211,ENSG00000109265,protein_coding -14297,LAMTOR3,ENSG00000109270,protein_coding -13933,PF4V1,ENSG00000109272,protein_coding -14325,NFKB1,ENSG00000109320,protein_coding -13952,AREG,ENSG00000109321,protein_coding -14326,MANBA,ENSG00000109323,protein_coding -14332,UBE2D3,ENSG00000109332,protein_coding -14132,MAPK10,ENSG00000109339,protein_coding -14765,ELF2,ENSG00000109381,protein_coding -14775,NDUFC1,ENSG00000109390,protein_coding -14802,UCP1,ENSG00000109424,protein_coding -14806,TBC1D9,ENSG00000109436,protein_coding -14813,ZNF330,ENSG00000109445,protein_coding -14820,INPP4B,ENSG00000109452,protein_coding -14830,GAB1,ENSG00000109458,protein_coding -15160,KLHL2,ENSG00000109466,protein_coding -14606,IL2,ENSG00000109471,protein_coding -15164,CPE,ENSG00000109472,protein_coding -14404,RPL34,ENSG00000109475,protein_coding -13020,WFS1,ENSG00000109501,protein_coding -15184,ANXA10,ENSG00000109511,protein_coding -13043,GRPEL1,ENSG00000109519,protein_coding -14428,GAR1,ENSG00000109534,protein_coding -15479,FRG1,ENSG00000109536,protein_coding -15205,CLCN3,ENSG00000109572,protein_coding -15216,AADAT,ENSG00000109576,protein_coding -15239,GALNT7,ENSG00000109586,protein_coding -13282,DHX15,ENSG00000109606,protein_coding -13289,SOD3,ENSG00000109610,protein_coding -13292,SEPSECS,ENSG00000109618,protein_coding -13069,CPZ,ENSG00000109625,protein_coding -14990,TRIM2,ENSG00000109654,protein_coding -13134,SLC2A9,ENSG00000109667,protein_coding -14973,FBXW7,ENSG00000109670,protein_coding -15298,NEIL3,ENSG00000109674,protein_coding -13319,TBC1D19,ENSG00000109680,protein_coding -13157,CLNK,ENSG00000109684,protein_coding -12923,NSD2,ENSG00000109685,protein_coding -14944,SH3D19,ENSG00000109686,protein_coding -13321,STIM2,ENSG00000109689,protein_coding -13180,NKX3-2,ENSG00000109705,protein_coding -12951,MFSD10,ENSG00000109736,protein_coding -15063,GLRB,ENSG00000109738,protein_coding -13209,BST1,ENSG00000109743,protein_coding -15102,RAPGEF2,ENSG00000109756,protein_coding -12964,HGFAC,ENSG00000109758,protein_coding -15401,SNX25,ENSG00000109762,protein_coding -15402,LRP2BP,ENSG00000109771,protein_coding -15405,UFSP2,ENSG00000109775,protein_coding -13412,KLF3,ENSG00000109787,protein_coding -13420,KLHL5,ENSG00000109790,protein_coding -15419,FAM149A,ENSG00000109794,protein_coding -13242,NCAPG,ENSG00000109805,protein_coding -13433,UGDH,ENSG00000109814,protein_coding -13280,PPARGC1A,ENSG00000109819,protein_coding -35003,DDX25,ENSG00000109832,protein_coding -34586,CRYAB,ENSG00000109846,protein_coding -32493,DBX1,ENSG00000109851,protein_coding -32494,HTATIP2,ENSG00000109854,protein_coding -34194,CTSC,ENSG00000109861,protein_coding -32553,CCDC34,ENSG00000109881,protein_coding -34652,ZBTB16,ENSG00000109906,protein_coding -32606,ELP4,ENSG00000109911,protein_coding -34695,ZPR1,ENSG00000109917,protein_coding -32879,MTCH2,ENSG00000109919,protein_coding -32881,FNBP4,ENSG00000109920,protein_coding -34851,TECTA,ENSG00000109927,protein_coding -34854,SC5D,ENSG00000109929,protein_coding -34879,CRTAM,ENSG00000109943,protein_coding -34880,JHY,ENSG00000109944,protein_coding -35137,B3GAT1,ENSG00000109956,protein_coding -34888,HSPA8,ENSG00000109971,protein_coding -33113,P2RX3,ENSG00000109991,protein_coding -34924,VWA5A,ENSG00000110002,protein_coding -33452,DNAJC4,ENSG00000110011,protein_coding -34957,SIAE,ENSG00000110013,protein_coding -33507,SNX15,ENSG00000110025,protein_coding -33178,LPXN,ENSG00000110031,protein_coding -33202,DTX4,ENSG00000110042,protein_coding -33496,ATG2A,ENSG00000110046,protein_coding -33491,EHD1,ENSG00000110047,protein_coding -33225,OSBP,ENSG00000110048,protein_coding -33718,UNC93B1,ENSG00000110057,protein_coding -35001,PUS3,ENSG00000110060,protein_coding -35023,DCPS,ENSG00000110063,protein_coding -33729,KMT5B,ENSG00000110066,protein_coding -35018,FOXRED1,ENSG00000110074,protein_coding -33733,PPP6R3,ENSG00000110075,protein_coding -33482,NRXN2,ENSG00000110076,protein_coding -33251,MS4A6A,ENSG00000110077,protein_coding -33252,MS4A4A,ENSG00000110079,protein_coding -35025,ST3GAL4,ENSG00000110080,protein_coding -33738,CPT1A,ENSG00000110090,protein_coding -33763,CCND1,ENSG00000110092,protein_coding -33271,CCDC86,ENSG00000110104,protein_coding -33274,PRPF19,ENSG00000110107,protein_coding -33277,TMEM109,ENSG00000110108,protein_coding -32160,CCKBR,ENSG00000110148,protein_coding -32167,HPX,ENSG00000110169,protein_coding -32168,TRIM3,ENSG00000110171,protein_coding -34259,CHORDC1,ENSG00000110172,protein_coding -33852,FOLR1,ENSG00000110195,protein_coding -33846,ANAPC15,ENSG00000110200,protein_coding -33847,FOLR3,ENSG00000110203,protein_coding -34332,PANX1,ENSG00000110218,protein_coding -33894,ARHGEF17,ENSG00000110237,protein_coding -34696,APOA5,ENSG00000110243,protein_coding -34699,APOA4,ENSG00000110244,protein_coding -34700,APOC3,ENSG00000110245,protein_coding -34720,CEP164,ENSG00000110274,protein_coding -32294,RNF141,ENSG00000110315,protein_coding -34412,CEP126,ENSG00000110318,protein_coding -32300,EIF4G2,ENSG00000110321,protein_coding -34732,IL10RA,ENSG00000110324,protein_coding -32311,GALNT18,ENSG00000110328,protein_coding -34424,BIRC2,ENSG00000110330,protein_coding -34747,UBE4A,ENSG00000110344,protein_coding -34769,DDX6,ENSG00000110367,protein_coding -34778,UPK2,ENSG00000110375,protein_coding -34807,CBL,ENSG00000110395,protein_coding -34826,NECTIN1,ENSG00000110400,protein_coding -32645,HIPK3,ENSG00000110422,protein_coding -32646,KIAA1549L,ENSG00000110427,protein_coding -32653,FBXO3,ENSG00000110429,protein_coding -32678,PDHX,ENSG00000110435,protein_coding -32684,SLC1A2,ENSG00000110436,protein_coding -32703,COMMD9,ENSG00000110442,protein_coding -33280,SLC15A3,ENSG00000110446,protein_coding -33284,CD5,ENSG00000110448,protein_coding -32774,ACCS,ENSG00000110455,protein_coding -33339,SCGB2A2,ENSG00000110484,protein_coding -32828,MDK,ENSG00000110492,protein_coding -32830,AMBRA1,ENSG00000110497,protein_coding -32859,MADD,ENSG00000110514,protein_coding -32873,PTPMT1,ENSG00000110536,protein_coding -33437,NAA40,ENSG00000110583,protein_coding -31958,CARS,ENSG00000110619,protein_coding -31953,SLC22A18,ENSG00000110628,protein_coding -31937,CD81,ENSG00000110651,protein_coding -34510,SLC35F2,ENSG00000110660,protein_coding -31931,C11orf21,ENSG00000110665,protein_coding -34505,ELMOD1,ENSG00000110675,protein_coding -32376,CALCA,ENSG00000110680,protein_coding -32386,SOX6,ENSG00000110693,protein_coding -32391,C11orf58,ENSG00000110696,protein_coding -33686,PITPNM1,ENSG00000110697,protein_coding -32396,RPS13,ENSG00000110700,protein_coding -33684,AIP,ENSG00000110711,protein_coding -31995,NUP98,ENSG00000110713,protein_coding -33721,NDUFS8,ENSG00000110717,protein_coding -33724,TCIRG1,ENSG00000110719,protein_coding -33727,CHKA,ENSG00000110721,protein_coding -34524,EXPH5,ENSG00000110723,protein_coding -32439,HPS5,ENSG00000110756,protein_coding -32440,GTF2H1,ENSG00000110768,protein_coding -34563,POU2AF1,ENSG00000110777,protein_coding -32461,PTPN5,ENSG00000110786,protein_coding -37623,VWF,ENSG00000110799,protein_coding -40186,PSMD9,ENSG00000110801,protein_coding -37673,P3H3,ENSG00000110811,protein_coding -38188,PPFIBP1,ENSG00000110841,protein_coding -38592,PRPF40B,ENSG00000110844,protein_coding -37841,CD69,ENSG00000110848,protein_coding -39825,PRDM4,ENSG00000110851,protein_coding -37844,CLEC2B,ENSG00000110852,protein_coding -40130,COQ5,ENSG00000110871,protein_coding -39843,SELPLG,ENSG00000110876,protein_coding -39846,CORO1C,ENSG00000110880,protein_coding -38617,ASIC1,ENSG00000110881,protein_coding -39854,DAO,ENSG00000110887,protein_coding -38241,CAPRIN2,ENSG00000110888,protein_coding -38246,TSPAN11,ENSG00000110900,protein_coding -39869,KCTD10,ENSG00000110906,protein_coding -38649,SLC11A2,ENSG00000110911,protein_coding -40138,MLEC,ENSG00000110917,protein_coding -39873,MVK,ENSG00000110921,protein_coding -38653,CSRNP2,ENSG00000110925,protein_coding -40165,CAMKK2,ENSG00000110931,protein_coding -38663,BIN2,ENSG00000110934,protein_coding -38947,IL23A,ENSG00000110944,protein_coding -38964,ATP5F1B,ENSG00000110955,protein_coding -38966,PTGES3,ENSG00000110958,protein_coding -38318,SYT10,ENSG00000110975,protein_coding -40192,BCL7A,ENSG00000110987,protein_coding -40208,RSRC2,ENSG00000111011,protein_coding -39029,CYP27B1,ENSG00000111012,protein_coding -39406,MYF6,ENSG00000111046,protein_coding -39407,MYF5,ENSG00000111049,protein_coding -39409,LIN7A,ENSG00000111052,protein_coding -38742,KRT18,ENSG00000111057,protein_coding -39413,ACSS3,ENSG00000111058,protein_coding -38746,TNS2,ENSG00000111077,protein_coding -39003,GLI1,ENSG00000111087,protein_coding -39095,PPM1H,ENSG00000111110,protein_coding -39600,METAP2,ENSG00000111142,protein_coding -39614,LTA4H,ENSG00000111144,protein_coding -39619,ELK3,ENSG00000111145,protein_coding -37490,SLC6A12,ENSG00000111181,protein_coding -37515,WNT5B,ENSG00000111186,protein_coding -37879,MAGOHB,ENSG00000111196,protein_coding -39877,TRPV4,ENSG00000111199,protein_coding -37551,ITFG2,ENSG00000111203,protein_coding -37554,FOXM1,ENSG00000111206,protein_coding -37890,PRR4,ENSG00000111215,protein_coding -37571,PRMT8,ENSG00000111218,protein_coding -37579,PARP11,ENSG00000111224,protein_coding -39898,ARPC3,ENSG00000111229,protein_coding -39899,GPN3,ENSG00000111231,protein_coding -39902,VPS29,ENSG00000111237,protein_coding -37591,FGF6,ENSG00000111241,protein_coding -39915,MYL2,ENSG00000111245,protein_coding -37593,RAD51AP1,ENSG00000111247,protein_coding -39918,CUX2,ENSG00000111249,protein_coding -39924,SH2B3,ENSG00000111252,protein_coding -37597,AKAP3,ENSG00000111254,protein_coding -37936,MANSC1,ENSG00000111261,protein_coding -37610,KCNA1,ENSG00000111262,protein_coding -37941,DUSP16,ENSG00000111266,protein_coding -37944,CREBL2,ENSG00000111269,protein_coding -39931,ACAD10,ENSG00000111271,protein_coding -39933,ALDH2,ENSG00000111275,protein_coding -37949,CDKN1B,ENSG00000111276,protein_coding -37962,GPRC5D,ENSG00000111291,protein_coding -39945,NAA25,ENSG00000111300,protein_coding -37970,GSG1,ENSG00000111305,protein_coding -37632,SCNN1A,ENSG00000111319,protein_coding -37633,AC005840.1,ENSG00000111321,protein_coding -40225,OGFOD2,ENSG00000111325,protein_coding -40235,CDK2AP1,ENSG00000111328,protein_coding -39963,OAS3,ENSG00000111331,protein_coding -39964,OAS2,ENSG00000111335,protein_coding -38002,ART4,ENSG00000111339,protein_coding -38004,MGP,ENSG00000111341,protein_coding -39968,RASAL1,ENSG00000111344,protein_coding -38006,ARHGDIB,ENSG00000111348,protein_coding -40255,GTF2H3,ENSG00000111358,protein_coding -40254,EIF2B1,ENSG00000111361,protein_coding -40252,DDX55,ENSG00000111364,protein_coding -38449,SLC38A1,ENSG00000111371,protein_coding -38043,RERGL,ENSG00000111404,protein_coding -38481,ENDOU,ENSG00000111405,protein_coding -40040,C12orf49,ENSG00000111412,protein_coding -38492,VDR,ENSG00000111424,protein_coding -40402,FZD10,ENSG00000111432,protein_coding -40059,RFC5,ENSG00000111445,protein_coding -40414,STX2,ENSG00000111450,protein_coding -40420,ADGRD1,ENSG00000111452,protein_coding -38831,COPZ1,ENSG00000111481,protein_coding -39150,TBC1D30,ENSG00000111490,protein_coding -39197,CAND1,ENSG00000111530,protein_coding -39215,IL26,ENSG00000111536,protein_coding -39214,IFNG,ENSG00000111537,protein_coding -38914,AC034102.1,ENSG00000111540,protein_coding -39217,MDM1,ENSG00000111554,protein_coding -39231,NUP107,ENSG00000111581,protein_coding -39270,AC092881.1,ENSG00000111596,protein_coding -38955,TIMELESS,ENSG00000111602,protein_coding -39244,CPSF6,ENSG00000111605,protein_coding -39335,KRR1,ENSG00000111615,protein_coding -37643,MRPL51,ENSG00000111639,protein_coding -37648,GAPDH,ENSG00000111640,protein_coding -37652,NOP2,ENSG00000111641,protein_coding -37654,CHD4,ENSG00000111642,protein_coding -37658,ACRBP,ENSG00000111644,protein_coding -39677,UHRF1BP1L,ENSG00000111647,protein_coding -37665,COPS7A,ENSG00000111652,protein_coding -37659,ING4,ENSG00000111653,protein_coding -37674,GNB3,ENSG00000111664,protein_coding -37675,CDCA3,ENSG00000111665,protein_coding -39712,CHPT1,ENSG00000111666,protein_coding -37676,USP5,ENSG00000111667,protein_coding -37677,TPI1,ENSG00000111669,protein_coding -39715,GNPTAB,ENSG00000111670,protein_coding -37678,SPSB2,ENSG00000111671,protein_coding -37683,ENO2,ENSG00000111674,protein_coding -37684,ATN1,ENSG00000111676,protein_coding -37685,C12orf57,ENSG00000111678,protein_coding -37688,PTPN6,ENSG00000111679,protein_coding -37696,LPCAT3,ENSG00000111684,protein_coding -39756,NT5DC3,ENSG00000111696,protein_coding -38076,SLCO1B3,ENSG00000111700,protein_coding -37722,APOBEC1,ENSG00000111701,protein_coding -37728,NANOG,ENSG00000111704,protein_coding -40069,SUDS3,ENSG00000111707,protein_coding -38087,GOLT1B,ENSG00000111711,protein_coding -38089,GYS2,ENSG00000111713,protein_coding -38091,LDHB,ENSG00000111716,protein_coding -40094,PRKAB1,ENSG00000111725,protein_coding -38099,CMAS,ENSG00000111726,protein_coding -39766,HCFC2,ENSG00000111727,protein_coding -38100,ST8SIA1,ENSG00000111728,protein_coding -37750,CLEC4A,ENSG00000111729,protein_coding -38106,C2CD5,ENSG00000111731,protein_coding -37786,AICDA,ENSG00000111732,protein_coding -40102,RAB35,ENSG00000111737,protein_coding -37799,PHC1,ENSG00000111752,protein_coding -40121,COX6A1,ENSG00000111775,protein_coding -40122,AL021546.1,ENSG00000111780,protein_coding -39804,RFX4,ENSG00000111783,protein_coding -39807,RIC8B,ENSG00000111785,protein_coding -40126,SRSF9,ENSG00000111786,protein_coding -38174,FGFR1OP2,ENSG00000111790,protein_coding -37830,KLRB1,ENSG00000111796,protein_coding -20157,COL12A1,ENSG00000111799,protein_coding -18975,BTN3A3,ENSG00000111801,protein_coding -18879,TDP2,ENSG00000111802,protein_coding -20681,FRK,ENSG00000111816,protein_coding -20689,DSE,ENSG00000111817,protein_coding -20701,RWDD1,ENSG00000111832,protein_coding -20702,RSPH4A,ENSG00000111834,protein_coding -18697,MAK,ENSG00000111837,protein_coding -18692,TMEM14C,ENSG00000111843,protein_coding -18691,PAK1IP1,ENSG00000111845,protein_coding -18683,GCNT2,ENSG00000111846,protein_coding -20319,SMIM8,ENSG00000111850,protein_coding -18708,NEDD9,ENSG00000111859,protein_coding -20726,CEP85L,ENSG00000111860,protein_coding -18718,ADTRP,ENSG00000111863,protein_coding -20733,ASF1A,ENSG00000111875,protein_coding -20732,MCM9,ENSG00000111877,protein_coding -20735,FAM184A,ENSG00000111879,protein_coding -20344,RNGTT,ENSG00000111880,protein_coding -20739,MAN1A1,ENSG00000111885,protein_coding -20357,GABRR2,ENSG00000111886,protein_coding -20767,SERINC1,ENSG00000111897,protein_coding -20790,HDDC2,ENSG00000111906,protein_coding -20789,TPD52L1,ENSG00000111907,protein_coding -20799,HINT3,ENSG00000111911,protein_coding -20796,NCOA7,ENSG00000111912,protein_coding -18889,RIPOR2,ENSG00000111913,protein_coding -21135,SASH1,ENSG00000111961,protein_coding -21140,UST,ENSG00000111962,protein_coding -21182,ULBP1,ENSG00000111981,protein_coding -21237,FBXO5,ENSG00000112029,protein_coding -21239,MTRF1L,ENSG00000112031,protein_coding -19540,PPARD,ENSG00000112033,protein_coding -21255,OPRM1,ENSG00000112038,protein_coding -19542,FANCE,ENSG00000112039,protein_coding -19546,TULP1,ENSG00000112041,protein_coding -19562,SLC26A8,ENSG00000112053,protein_coding -19564,MAPK14,ENSG00000112062,protein_coding -19867,RHAG,ENSG00000112077,protein_coding -19574,KCTD20,ENSG00000112078,protein_coding -19576,STK38,ENSG00000112079,protein_coding -19578,SRSF3,ENSG00000112081,protein_coding -21347,SOD2,ENSG00000112096,protein_coding -21357,MRPL18,ENSG00000112110,protein_coding -19899,IL17A,ENSG00000112115,protein_coding -19900,IL17F,ENSG00000112116,protein_coding -19902,MCM3,ENSG00000112118,protein_coding -19605,RNF8,ENSG00000112130,protein_coding -18730,PHACTR1,ENSG00000112137,protein_coding -19614,MDGA1,ENSG00000112139,protein_coding -19926,ICK,ENSG00000112144,protein_coding -19927,FBXO9,ENSG00000112146,protein_coding -18749,CD83,ENSG00000112149,protein_coding -20366,MDN1,ENSG00000112159,protein_coding -19634,GLP1R,ENSG00000112164,protein_coding -19635,SAYSD1,ENSG00000112167,protein_coding -19974,BMP5,ENSG00000112175,protein_coding -20376,BACH2,ENSG00000112182,protein_coding -18792,RBM24,ENSG00000112183,protein_coding -18794,CAP2,ENSG00000112186,protein_coding -19668,TREML2,ENSG00000112195,protein_coding -19989,ZNF451,ENSG00000112200,protein_coding -19991,BAG2,ENSG00000112208,protein_coding -19992,RAB23,ENSG00000112210,protein_coding -19661,TSPO2,ENSG00000112212,protein_coding -20423,FHL5,ENSG00000112214,protein_coding -20430,GPR63,ENSG00000112218,protein_coding -20018,KHDRBS2,ENSG00000112232,protein_coding -20443,FBXL4,ENSG00000112234,protein_coding -20454,CCNC,ENSG00000112237,protein_coding -20456,PRDM13,ENSG00000112238,protein_coding -18834,E2F3,ENSG00000112242,protein_coding -20032,PTP4A1,ENSG00000112245,protein_coding -20464,SIM1,ENSG00000112246,protein_coding -20466,ASCC3,ENSG00000112249,protein_coding -18857,HDGFL1,ENSG00000112273,protein_coding -20490,BVES,ENSG00000112276,protein_coding -20074,COL9A1,ENSG00000112280,protein_coding -20861,MED23,ENSG00000112282,protein_coding -20592,WASF1,ENSG00000112290,protein_coding -18875,GPLD1,ENSG00000112293,protein_coding -18876,ALDH5A1,ENSG00000112294,protein_coding -20509,CRYBG1,ENSG00000112297,protein_coding -20895,VNN1,ENSG00000112299,protein_coding -20898,VNN2,ENSG00000112303,protein_coding -18880,ACOT13,ENSG00000112304,protein_coding -20085,SMAP1,ENSG00000112305,protein_coding -20902,RPS12,ENSG00000112306,protein_coding -18882,C6orf62,ENSG00000112308,protein_coding -20088,B3GAT2,ENSG00000112309,protein_coding -18886,GMNN,ENSG00000112312,protein_coding -20912,EYA4,ENSG00000112319,protein_coding -20529,SOBP,ENSG00000112320,protein_coding -20545,NR2E1,ENSG00000112333,protein_coding -20547,SNX3,ENSG00000112335,protein_coding -18921,SLC17A2,ENSG00000112337,protein_coding -20945,HBS1L,ENSG00000112339,protein_coding -18922,TRIM38,ENSG00000112343,protein_coding -20981,PEX7,ENSG00000112357,protein_coding -20585,ZBTB24,ENSG00000112365,protein_coding -20588,FIG4,ENSG00000112367,protein_coding -21005,PERP,ENSG00000112378,protein_coding -21006,ARFGEF3,ENSG00000112379,protein_coding -20616,SLC16A10,ENSG00000112394,protein_coding -21024,HECA,ENSG00000112406,protein_coding -21056,ADGRG6,ENSG00000112414,protein_coding -21078,PHACTR2,ENSG00000112419,protein_coding -21100,EPM2A,ENSG00000112425,protein_coding -19132,OR12D3,ENSG00000112462,protein_coding -19455,SLC39A7,ENSG00000112473,protein_coding -21453,CCR6,ENSG00000112486,protein_coding -21463,UNC93A,ENSG00000112494,protein_coding -21366,SLC22A2,ENSG00000112499,protein_coding -19481,PHF1,ENSG00000112511,protein_coding -19482,CUTA,ENSG00000112514,protein_coding -21390,PACRG,ENSG00000112530,protein_coding -21401,QKI,ENSG00000112531,protein_coding -21414,C6orf118,ENSG00000112539,protein_coding -21415,PDE10A,ENSG00000112541,protein_coding -19686,MDFI,ENSG00000112559,protein_coding -19688,TFEB,ENSG00000112561,protein_coding -21490,SMOC2,ENSG00000112562,protein_coding -19703,CCND3,ENSG00000112576,protein_coding -19702,BYSL,ENSG00000112578,protein_coding -21529,FAM120B,ENSG00000112584,protein_coding -21534,TBP,ENSG00000112592,protein_coding -19712,GUCA1B,ENSG00000112599,protein_coding -19718,PRPH2,ENSG00000112619,protein_coding -19721,BICRAL,ENSG00000112624,protein_coding -19732,PPP2R5D,ENSG00000112640,protein_coding -19740,MRPL2,ENSG00000112651,protein_coding -19742,PTK7,ENSG00000112655,protein_coding -19743,SRF,ENSG00000112658,protein_coding -19744,CUL9,ENSG00000112659,protein_coding -19746,DNPH1,ENSG00000112667,protein_coding -18494,DUSP22,ENSG00000112679,protein_coding -18497,EXOC2,ENSG00000112685,protein_coding -20158,COX7A2,ENSG00000112695,protein_coding -20159,TMEM30A,ENSG00000112697,protein_coding -18521,GMDS,ENSG00000112699,protein_coding -20172,SENP6,ENSG00000112701,protein_coding -20180,IMPG1,ENSG00000112706,protein_coding -19777,VEGFA,ENSG00000112715,protein_coding -18576,PRPF4B,ENSG00000112739,protein_coding -20223,TTK,ENSG00000112742,protein_coding -19792,SLC29A1,ENSG00000112759,protein_coding -20635,CCN6,ENSG00000112761,protein_coding -18976,BTN2A1,ENSG00000112763,protein_coding -20638,LAMA4,ENSG00000112769,protein_coding -20236,TENT5A,ENSG00000112773,protein_coding -19816,CLIC5,ENSG00000112782,protein_coding -40493,FBRSL1,ENSG00000112787,protein_coding -19820,ENPP5,ENSG00000112796,protein_coding -18623,LY86,ENSG00000112799,protein_coding -19006,PRSS16,ENSG00000112812,protein_coding -19832,MEP1A,ENSG00000112818,protein_coding -20271,TBX18,ENSG00000112837,protein_coding -16439,ERBIN,ENSG00000112851,protein_coding -17669,PCDHB2,ENSG00000112852,protein_coding -17637,HARS2,ENSG00000112855,protein_coding -17040,NUDT12,ENSG00000112874,protein_coding -15527,CEP72,ENSG00000112877,protein_coding -17082,MAN2A1,ENSG00000112893,protein_coding -15689,SEMA5A,ENSG00000112902,protein_coding -16134,C7,ENSG00000112936,protein_coding -15634,TENT4A,ENSG00000112941,protein_coding -16151,GHR,ENSG00000112964,protein_coding -16179,HMGCS1,ENSG00000112972,protein_coding -15740,DAP,ENSG00000112977,protein_coding -17536,NME5,ENSG00000112981,protein_coding -17539,BRD8,ENSG00000112983,protein_coding -17540,KIF20A,ENSG00000112984,protein_coding -16193,NNT,ENSG00000112992,protein_coding -16204,MRPS30,ENSG00000112996,protein_coding -17553,HSPA9,ENSG00000113013,protein_coding -16551,MRPS27,ENSG00000113048,protein_coding -17610,PFDN1,ENSG00000113068,protein_coding -17611,HBEGF,ENSG00000113070,protein_coding -17613,SLC4A9,ENSG00000113073,protein_coding -17239,LOX,ENSG00000113083,protein_coding -16277,GZMK,ENSG00000113088,protein_coding -15926,CDH9,ENSG00000113100,protein_coding -17621,APBB3,ENSG00000113108,protein_coding -17631,TMCO6,ENSG00000113119,protein_coding -17932,SPARC,ENSG00000113140,protein_coding -17632,IK,ENSG00000113141,protein_coding -16620,HMGCR,ENSG00000113161,protein_coding -16621,COL4A3BP,ENSG00000113163,protein_coding -18289,FAF2,ENSG00000113194,protein_coding -17962,HAND1,ENSG00000113196,protein_coding -17671,PCDHB3,ENSG00000113205,protein_coding -17674,PCDHB5,ENSG00000113209,protein_coding -17676,PCDHB6,ENSG00000113211,protein_coding -17679,PCDHB7,ENSG00000113212,protein_coding -16668,PDE8B,ENSG00000113231,protein_coding -18362,CLK4,ENSG00000113240,protein_coding -17695,PCDHB15,ENSG00000113248,protein_coding -17993,HAVCR1,ENSG00000113249,protein_coding -18380,GRM6,ENSG00000113262,protein_coding -17997,ITK,ENSG00000113263,protein_coding -18415,RNF130,ENSG00000113269,protein_coding -18015,THG1L,ENSG00000113272,protein_coding -16690,ARSB,ENSG00000113273,protein_coding -18018,CLINT1,ENSG00000113282,protein_coding -16713,THBS4,ENSG00000113296,protein_coding -18431,CNOT6,ENSG00000113300,protein_coding -18046,IL12B,ENSG00000113302,protein_coding -18446,BTNL8,ENSG00000113303,protein_coding -18054,TTC1,ENSG00000113312,protein_coding -16739,MSH3,ENSG00000113318,protein_coding -16743,RASGRF2,ENSG00000113319,protein_coding -18080,GABRG2,ENSG00000113327,protein_coding -18086,CCNG1,ENSG00000113328,protein_coding -16860,POLR3G,ENSG00000113356,protein_coding -15963,DROSHA,ENSG00000113360,protein_coding -15961,CDH6,ENSG00000113361,protein_coding -17305,LMNB1,ENSG00000113368,protein_coding -16874,ARRDC3,ENSG00000113369,protein_coding -15985,GOLPH3,ENSG00000113384,protein_coding -15995,SUB1,ENSG00000113387,protein_coding -15998,NPR3,ENSG00000113389,protein_coding -16894,FAM172A,ENSG00000113391,protein_coding -17331,SLC27A6,ENSG00000113396,protein_coding -16008,TARS,ENSG00000113407,protein_coding -15573,IRX4,ENSG00000113430,protein_coding -16949,LNPEP,ENSG00000113441,protein_coding -16359,PDE4D,ENSG00000113448,protein_coding -16034,RAD1,ENSG00000113456,protein_coding -16035,BRIX1,ENSG00000113460,protein_coding -16037,AGXT2,ENSG00000113492,protein_coding -16038,PRLR,ENSG00000113494,protein_coding -15544,SLC12A7,ENSG00000113504,protein_coding -17393,IL4,ENSG00000113520,protein_coding -17390,RAD50,ENSG00000113522,protein_coding -17389,IL5,ENSG00000113525,protein_coding -17016,ST8SIA4,ENSG00000113532,protein_coding -17748,GNPDA1,ENSG00000113552,protein_coding -17744,PCDH12,ENSG00000113555,protein_coding -17435,SKP1,ENSG00000113558,protein_coding -16077,NUP155,ENSG00000113569,protein_coding -17437,PPP2CA,ENSG00000113575,protein_coding -17757,FGF1,ENSG00000113578,protein_coding -17765,NR3C1,ENSG00000113580,protein_coding -17430,C5orf15,ENSG00000113583,protein_coding -16430,PPWD1,ENSG00000113593,protein_coding -16099,LIFR,ENSG00000113594,protein_coding -16431,TRIM23,ENSG00000113595,protein_coding -16434,TRAPPC13,ENSG00000113597,protein_coding -16112,C9,ENSG00000113600,protein_coding -17457,SEC24A,ENSG00000113615,protein_coding -17465,TXNDC15,ENSG00000113621,protein_coding -16126,TTC33,ENSG00000113638,protein_coding -18127,RARS,ENSG00000113643,protein_coding -18126,WWC1,ENSG00000113645,protein_coding -17483,H2AFY,ENSG00000113648,protein_coding -17806,TCERG1,ENSG00000113649,protein_coding -17818,DPYSL3,ENSG00000113657,protein_coding -17506,SMAD5,ENSG00000113658,protein_coding -17867,CSNK1A1,ENSG00000113712,protein_coding -17882,HMGXB3,ENSG00000113716,protein_coding -18201,ERGIC1,ENSG00000113719,protein_coding -17888,PDGFRB,ENSG00000113721,protein_coding -17889,CDX1,ENSG00000113722,protein_coding -18206,ATP6V0E1,ENSG00000113732,protein_coding -18213,BNIP1,ENSG00000113734,protein_coding -18218,STC2,ENSG00000113739,protein_coding -18232,CPEB4,ENSG00000113742,protein_coding -18259,HRH2,ENSG00000113749,protein_coding -18324,DBN1,ENSG00000113758,protein_coding -18305,ZNF346,ENSG00000113761,protein_coding -18302,UNC5A,ENSG00000113763,protein_coding -12552,EHHADH,ENSG00000113790,protein_coding -10927,CNTN3,ENSG00000113805,protein_coding -12178,SMC4,ENSG00000113810,protein_coding -10683,SELENOK,ENSG00000113811,protein_coding -10682,ACTR8,ENSG00000113812,protein_coding -12575,TBCCD1,ENSG00000113838,protein_coding -11395,TIMMDC1,ENSG00000113845,protein_coding -9697,CRBN,ENSG00000113851,protein_coding -12586,KNG1,ENSG00000113889,protein_coding -12583,HRG,ENSG00000113905,protein_coding -12619,BCL6,ENSG00000113916,protein_coding -11428,HGD,ENSG00000113924,protein_coding -12645,CLDN16,ENSG00000113946,protein_coding -11082,ARL6,ENSG00000113966,protein_coding -11718,NPHP3,ENSG00000113971,protein_coding -11456,CD86,ENSG00000114013,protein_coding -11747,AMOTL2,ENSG00000114019,protein_coding -11144,NIT2,ENSG00000114021,protein_coding -11462,FAM162A,ENSG00000114023,protein_coding -9767,OGG1,ENSG00000114026,protein_coding -11466,KPNA1,ENSG00000114030,protein_coding -11769,PCCB,ENSG00000114054,protein_coding -44498,UBE3A,ENSG00000114062,protein_coding -11799,ARMC8,ENSG00000114098,protein_coding -11804,CEP70,ENSG00000114107,protein_coding -11828,RBP2,ENSG00000114113,protein_coding -11831,RBP1,ENSG00000114115,protein_coding -11848,SLC25A36,ENSG00000114120,protein_coding -11867,GRK7,ENSG00000114124,protein_coding -11865,RNF7,ENSG00000114125,protein_coding -11874,TFDP2,ENSG00000114126,protein_coding -11878,XRN1,ENSG00000114127,protein_coding -9969,KAT2B,ENSG00000114166,protein_coding -12220,BCHE,ENSG00000114200,protein_coding -12238,SERPINI2,ENSG00000114204,protein_coding -12240,PDCD10,ENSG00000114209,protein_coding -12277,LRRC31,ENSG00000114248,protein_coding -10697,WNT5A,ENSG00000114251,protein_coding -10453,PFKFB4,ENSG00000114268,protein_coding -10456,COL7A1,ENSG00000114270,protein_coding -12666,FGF12,ENSG00000114279,protein_coding -10468,PRKAR2A,ENSG00000114302,protein_coding -12696,HES1,ENSG00000114315,protein_coding -10501,USP4,ENSG00000114316,protein_coding -12731,ACAP2,ENSG00000114331,protein_coding -12337,ECT2,ENSG00000114346,protein_coding -10541,GNAT1,ENSG00000114349,protein_coding -10543,GNAI2,ENSG00000114353,protein_coding -11151,TFG,ENSG00000114354,protein_coding -57925,USP9Y,ENSG00000114374,protein_coding -10554,HYAL1,ENSG00000114378,protein_coding -10556,TUSC2,ENSG00000114383,protein_coding -10561,NPRL2,ENSG00000114388,protein_coding -11173,RPL24,ENSG00000114391,protein_coding -10562,CYB561D2,ENSG00000114395,protein_coding -10778,C3orf14,ENSG00000114405,protein_coding -12444,FXR1,ENSG00000114416,protein_coding -11195,CBLB,ENSG00000114423,protein_coding -11222,BBX,ENSG00000114439,protein_coding -11228,IFT57,ENSG00000114446,protein_coding -12408,GNB4,ENSG00000114450,protein_coding -11229,HHLA2,ENSG00000114455,protein_coding -12836,IQCG,ENSG00000114473,protein_coding -10991,GBE1,ENSG00000114480,protein_coding -11241,MORC1,ENSG00000114487,protein_coding -11498,UMPS,ENSG00000114491,protein_coding -12805,NCBP2,ENSG00000114503,protein_coding -11517,SNX4,ENSG00000114520,protein_coding -11283,C3orf52,ENSG00000114529,protein_coding -10857,FRMD4B,ENSG00000114541,protein_coding -11544,SLC41A3,ENSG00000114544,protein_coding -11543,ROPN1B,ENSG00000114547,protein_coding -11571,PLXNA1,ENSG00000114554,protein_coding -11327,ATP6V1A,ENSG00000114573,protein_coding -11590,ABTB1,ENSG00000114626,protein_coding -11589,PODXL2,ENSG00000114631,protein_coding -11385,UPK1B,ENSG00000114638,protein_coding -10418,CSPG5,ENSG00000114646,protein_coding -10409,KLHL18,ENSG00000114648,protein_coding -10413,SCAP,ENSG00000114650,protein_coding -11628,EFCC1,ENSG00000114654,protein_coding -11626,KIAA1257,ENSG00000114656,protein_coding -11693,NEK11,ENSG00000114670,protein_coding -11700,MRPL3,ENSG00000114686,protein_coding -11924,PLSCR4,ENSG00000114698,protein_coding -10569,HEMK1,ENSG00000114735,protein_coding -10571,CISH,ENSG00000114737,protein_coding -10572,MAPKAPK3,ENSG00000114738,protein_coding -10188,ACVR2B,ENSG00000114739,protein_coding -10198,WDR48,ENSG00000114742,protein_coding -11985,COMMD2,ENSG00000114744,protein_coding -10199,GORASP1,ENSG00000114745,protein_coding -12421,PEX5L,ENSG00000114757,protein_coding -10599,RRP9,ENSG00000114767,protein_coding -12507,ABCC5,ENSG00000114770,protein_coding -12041,AADAC,ENSG00000114771,protein_coding -10605,ABHD14B,ENSG00000114779,protein_coding -10232,EIF1B,ENSG00000114784,protein_coding -10607,ABHD14A-ACY1,ENSG00000114786,protein_coding -12068,ARHGEF26,ENSG00000114790,protein_coding -12498,KLHL24,ENSG00000114796,protein_coding -12086,PLCH1,ENSG00000114805,protein_coding -10270,VIPR1,ENSG00000114812,protein_coding -10627,DNAH1,ENSG00000114841,protein_coding -12112,SSR3,ENSG00000114850,protein_coding -10276,ZBTB47,ENSG00000114853,protein_coding -10632,TNNC1,ENSG00000114854,protein_coding -10274,NKTR,ENSG00000114857,protein_coding -12535,CLCN2,ENSG00000114859,protein_coding -10873,FOXP1,ENSG00000114861,protein_coding -12532,EIF4G1,ENSG00000114867,protein_coding -10648,SPCS1,ENSG00000114902,protein_coding -10649,NEK4,ENSG00000114904,protein_coding -9227,SLC4A3,ENSG00000114923,protein_coding -8927,INO80D,ENSG00000114933,protein_coding -8935,EEF1B2,ENSG00000114942,protein_coding -8949,ADAM23,ENSG00000114948,protein_coding -6713,DGUOK,ENSG00000114956,protein_coding -6723,MOB1A,ENSG00000114978,protein_coding -7200,KANSL3,ENSG00000114982,protein_coding -7202,LMAN2L,ENSG00000114988,protein_coding -6741,RTKN,ENSG00000114993,protein_coding -7543,TTL,ENSG00000114999,protein_coding -7554,IL1A,ENSG00000115008,protein_coding -9326,CCL20,ENSG00000115009,protein_coding -9004,PIKFYVE,ENSG00000115020,protein_coding -7146,KCNIP3,ENSG00000115041,protein_coding -7148,FAHD2A,ENSG00000115042,protein_coding -9391,NCL,ENSG00000115053,protein_coding -7245,ACTR1B,ENSG00000115073,protein_coding -7595,SLC35F5,ENSG00000115084,protein_coding -7248,ZAP70,ENSG00000115085,protein_coding -7600,ACTR3,ENSG00000115091,protein_coding -7644,STEAP3,ENSG00000115107,protein_coding -7656,EPB41L5,ENSG00000115109,protein_coding -7679,TFCP2L1,ENSG00000115112,protein_coding -5827,SF3B6,ENSG00000115128,protein_coding -5829,TP53I3,ENSG00000115129,protein_coding -5848,DNAJC27,ENSG00000115137,protein_coding -5857,POMC,ENSG00000115138,protein_coding -8134,STAM2,ENSG00000115145,protein_coding -5887,OTOF,ENSG00000115155,protein_coding -8180,GPD2,ENSG00000115159,protein_coding -5894,CENPA,ENSG00000115163,protein_coding -8191,CYTIP,ENSG00000115165,protein_coding -8194,ACVR1,ENSG00000115170,protein_coding -8218,TANC1,ENSG00000115183,protein_coding -5918,SLC30A3,ENSG00000115194,protein_coding -5922,MPV17,ENSG00000115204,protein_coding -5923,GTF3C2,ENSG00000115207,protein_coding -5926,EIF2B4,ENSG00000115211,protein_coding -5931,NRBP1,ENSG00000115216,protein_coding -8239,ITGB6,ENSG00000115221,protein_coding -5936,FNDC4,ENSG00000115226,protein_coding -8566,ITGA4,ENSG00000115232,protein_coding -8252,PSMD14,ENSG00000115233,protein_coding -5927,SNX17,ENSG00000115234,protein_coding -6334,ASB3,ENSG00000115239,protein_coding -5929,PPM1G,ENSG00000115241,protein_coding -8578,PDE1A,ENSG00000115252,protein_coding -54856,REEP6,ENSG00000115255,protein_coding -54855,PCSK4,ENSG00000115257,protein_coding -8271,GCG,ENSG00000115263,protein_coding -54852,APC2,ENSG00000115266,protein_coding -8274,IFIH1,ENSG00000115267,protein_coding -54850,RPS15,ENSG00000115268,protein_coding -8275,GCA,ENSG00000115271,protein_coding -6742,INO80B,ENSG00000115274,protein_coding -6745,MOGS,ENSG00000115275,protein_coding -6749,TTC31,ENSG00000115282,protein_coding -54845,NDUFS7,ENSG00000115286,protein_coding -6753,PCGF1,ENSG00000115289,protein_coding -8285,GRB14,ENSG00000115290,protein_coding -5979,CLIP4,ENSG00000115295,protein_coding -6754,TLX2,ENSG00000115297,protein_coding -6349,SPTBN1,ENSG00000115306,protein_coding -6756,AUP1,ENSG00000115307,protein_coding -6358,RTN4,ENSG00000115310,protein_coding -6757,HTRA2,ENSG00000115317,protein_coding -6758,LOXL3,ENSG00000115318,protein_coding -6759,DOK1,ENSG00000115325,protein_coding -8299,GALNT3,ENSG00000115339,protein_coding -6772,POLE4,ENSG00000115350,protein_coding -6773,TACR1,ENSG00000115353,protein_coding -6368,CCDC88A,ENSG00000115355,protein_coding -9028,ACADL,ENSG00000115361,protein_coding -6777,EVA1A,ENSG00000115363,protein_coding -6783,MRPL19,ENSG00000115364,protein_coding -9032,LANCL1,ENSG00000115365,protein_coding -8648,WDR75,ENSG00000115368,protein_coding -6378,EFEMP1,ENSG00000115380,protein_coding -6820,REG1A,ENSG00000115386,protein_coding -6398,FANCL,ENSG00000115392,protein_coding -9070,FN1,ENSG00000115414,protein_coding -8679,STAT1,ENSG00000115415,protein_coding -8678,GLS,ENSG00000115419,protein_coding -6425,PAPOLG,ENSG00000115421,protein_coding -6864,DNAH6,ENSG00000115423,protein_coding -9083,PECR,ENSG00000115425,protein_coding -7259,UNC50,ENSG00000115446,protein_coding -9102,IGFBP2,ENSG00000115457,protein_coding -6886,ELMOD3,ENSG00000115459,protein_coding -9103,IGFBP5,ENSG00000115461,protein_coding -6443,USP34,ENSG00000115464,protein_coding -9439,EFHD1,ENSG00000115468,protein_coding -9444,KCNJ13,ENSG00000115474,protein_coding -6460,CCT4,ENSG00000115484,protein_coding -6900,GGCX,ENSG00000115486,protein_coding -9450,NEU2,ENSG00000115488,protein_coding -6480,EHBP1,ENSG00000115504,protein_coding -6486,OTX1,ENSG00000115507,protein_coding -7276,TXNDC9,ENSG00000115514,protein_coding -8753,COQ10B,ENSG00000115520,protein_coding -6909,GNLY,ENSG00000115523,protein_coding -8750,SF3B1,ENSG00000115524,protein_coding -6916,ST3GAL5,ENSG00000115525,protein_coding -7286,CHST10,ENSG00000115526,protein_coding -7290,PDCL3,ENSG00000115539,protein_coding -8757,MOB4,ENSG00000115540,protein_coding -8755,HSPE1,ENSG00000115541,protein_coding -6931,KDM3A,ENSG00000115548,protein_coding -9153,PLCD4,ENSG00000115556,protein_coding -6932,CHMP3,ENSG00000115561,protein_coding -9155,ZNF142,ENSG00000115568,protein_coding -7320,IL1R2,ENSG00000115590,protein_coding -9163,PRKAG3,ENSG00000115592,protein_coding -6976,SMYD1,ENSG00000115593,protein_coding -7322,IL1R1,ENSG00000115594,protein_coding -9166,WNT6,ENSG00000115596,protein_coding -7324,IL1RL2,ENSG00000115598,protein_coding -7326,IL1RL1,ENSG00000115602,protein_coding -7327,IL18R1,ENSG00000115604,protein_coding -7329,IL18RAP,ENSG00000115607,protein_coding -7334,SLC9A2,ENSG00000115616,protein_coding -7381,FHL2,ENSG00000115641,protein_coding -9531,MLPH,ENSG00000115648,protein_coding -9187,CNPPD1,ENSG00000115649,protein_coding -7389,UXS1,ENSG00000115652,protein_coding -9192,ABCB6,ENSG00000115657,protein_coding -9196,STK16,ENSG00000115661,protein_coding -7424,SLC5A7,ENSG00000115665,protein_coding -9627,HDLBP,ENSG00000115677,protein_coding -9625,PPP1R7,ENSG00000115685,protein_coding -9623,PASK,ENSG00000115687,protein_coding -9635,STK25,ENSG00000115694,protein_coding -5503,TPO,ENSG00000115705,protein_coding -7737,PROC,ENSG00000115718,protein_coding -5591,ID2,ENSG00000115738,protein_coding -5615,TAF1B,ENSG00000115750,protein_coding -5633,HPCAL1,ENSG00000115756,protein_coding -5634,ODC1,ENSG00000115758,protein_coding -6024,BIRC6,ENSG00000115760,protein_coding -5637,NOL10,ENSG00000115761,protein_coding -7854,PLEKHB2,ENSG00000115762,protein_coding -8369,GORASP2,ENSG00000115806,protein_coding -6071,STRN,ENSG00000115808,protein_coding -6081,CEBPZ,ENSG00000115816,protein_coding -6084,PRKD3,ENSG00000115825,protein_coding -8373,DCAF17,ENSG00000115827,protein_coding -6088,QPCT,ENSG00000115828,protein_coding -7959,RAB3GAP1,ENSG00000115839,protein_coding -8380,SLC25A12,ENSG00000115840,protein_coding -6096,RMDN2,ENSG00000115841,protein_coding -8385,DLX2,ENSG00000115844,protein_coding -7967,LCT,ENSG00000115850,protein_coding -7972,DARS,ENSG00000115866,protein_coding -6117,SRSF7,ENSG00000115875,protein_coding -5769,SDC1,ENSG00000115884,protein_coding -8764,PLCL1,ENSG00000115896,protein_coding -6533,SLC1A4,ENSG00000115902,protein_coding -6129,SOS1,ENSG00000115904,protein_coding -8024,KYNU,ENSG00000115919,protein_coding -8436,WIPF1,ENSG00000115935,protein_coding -8807,ORC2,ENSG00000115942,protein_coding -6161,COX7A2L,ENSG00000115944,protein_coding -6582,PNO1,ENSG00000115946,protein_coding -8084,ORC4,ENSG00000115947,protein_coding -6588,PLEK,ENSG00000115956,protein_coding -8112,RND3,ENSG00000115963,protein_coding -8444,ATF2,ENSG00000115966,protein_coding -6185,THADA,ENSG00000115970,protein_coding -6610,AAK1,ENSG00000115977,protein_coding -8829,TRAK2,ENSG00000115993,protein_coding -6629,C2orf42,ENSG00000115998,protein_coding -6630,TIA1,ENSG00000116001,protein_coding -6633,PCYOX1,ENSG00000116005,protein_coding -54813,KISS1R,ENSG00000116014,protein_coding -6237,EPAS1,ENSG00000116016,protein_coding -54814,ARID3A,ENSG00000116017,protein_coding -8860,SUMO1,ENSG00000116030,protein_coding -6648,CD207,ENSG00000116031,protein_coding -54819,GRIN3B,ENSG00000116032,protein_coding -6651,VAX2,ENSG00000116035,protein_coding -6652,ATP6V1B1,ENSG00000116039,protein_coding -8498,NFE2L2,ENSG00000116044,protein_coding -6274,MSH6,ENSG00000116062,protein_coding -8530,PLEKHA3,ENSG00000116095,protein_coding -6681,SPR,ENSG00000116096,protein_coding -9243,EPHA4,ENSG00000116106,protein_coding -8919,PARD3B,ENSG00000116117,protein_coding -9260,FARSB,ENSG00000116120,protein_coding -6697,ALMS1,ENSG00000116127,protein_coding -3007,BCL9,ENSG00000116128,protein_coding -3897,PRRX1,ENSG00000116132,protein_coding -1741,DHCR24,ENSG00000116133,protein_coding -509,DNAJC16,ENSG00000116138,protein_coding -4784,MARK1,ENSG00000116141,protein_coding -3997,TNR,ENSG00000116147,protein_coding -154,MORN1,ENSG00000116151,protein_coding -1658,GPX7,ENSG00000116157,protein_coding -3988,CACYBP,ENSG00000116161,protein_coding -1672,SCP2,ENSG00000116171,protein_coding -46536,TPSG1,ENSG00000116176,protein_coding -4012,PAPPA2,ENSG00000116183,protein_coding -4038,RALGPS2,ENSG00000116191,protein_coding -4041,ANGPTL1,ENSG00000116194,protein_coding -206,CEP104,ENSG00000116198,protein_coding -4042,FAM20B,ENSG00000116199,protein_coding -1714,TCEANC2,ENSG00000116205,protein_coding -1712,TMEM59,ENSG00000116209,protein_coding -1709,LRRC42,ENSG00000116212,protein_coding -195,WRAP73,ENSG00000116213,protein_coding -4059,NPHS2,ENSG00000116218,protein_coding -1722,MRPL37,ENSG00000116221,protein_coding -239,ICMT,ENSG00000116237,protein_coding -236,RPL22,ENSG00000116251,protein_coding -234,CHD5,ENSG00000116254,protein_coding -4078,QSOX1,ENSG00000116260,protein_coding -2521,STXBP3,ENSG00000116266,protein_coding -257,PHF13,ENSG00000116273,protein_coding -284,ERRFI1,ENSG00000116285,protein_coding -280,PARK7,ENSG00000116288,protein_coding -2539,KIAA1324,ENSG00000116299,protein_coding -983,OPRD1,ENSG00000116329,protein_coding -2557,AMPD2,ENSG00000116337,protein_coding -992,SRSF4,ENSG00000116350,protein_coding -994,MECR,ENSG00000116353,protein_coding -2583,KCNC4,ENSG00000116396,protein_coding -4166,EDEM3,ENSG00000116406,protein_coding -2631,WDR77,ENSG00000116455,protein_coding -2633,ATP5PB,ENSG00000116459,protein_coding -2638,RAP1A,ENSG00000116473,protein_coding -1070,HDAC1,ENSG00000116478,protein_coding -2658,CAPZA1,ENSG00000116489,protein_coding -1090,S100PBP,ENSG00000116497,protein_coding -1095,RNF19B,ENSG00000116514,protein_coding -3412,SCAMP3,ENSG00000116521,protein_coding -1104,TRIM62,ENSG00000116525,protein_coding -3419,ASH1L,ENSG00000116539,protein_coding -1134,DLGAP3,ENSG00000116544,protein_coding -1144,SFPQ,ENSG00000116560,protein_coding -5007,RHOU,ENSG00000116574,protein_coding -3437,GON4L,ENSG00000116580,protein_coding -3447,ARHGEF2,ENSG00000116584,protein_coding -3454,LAMTOR2,ENSG00000116586,protein_coding -3476,MEF2D,ENSG00000116604,protein_coding -1840,DOCK7,ENSG00000116641,protein_coding -376,SRM,ENSG00000116649,protein_coding -394,FBXO2,ENSG00000116661,protein_coding -396,FBXO6,ENSG00000116663,protein_coding -4161,C1orf21,ENSG00000116667,protein_coding -4174,SWT1,ENSG00000116668,protein_coding -397,MAD2L2,ENSG00000116670,protein_coding -1904,DNAJC6,ENSG00000116675,protein_coding -1909,LEPR,ENSG00000116678,protein_coding -4183,IVNS1ABP,ENSG00000116679,protein_coding -413,KIAA2013,ENSG00000116685,protein_coding -416,MFN2,ENSG00000116688,protein_coding -4196,PRG4,ENSG00000116690,protein_coding -418,MIIP,ENSG00000116691,protein_coding -4145,SMG7,ENSG00000116698,protein_coding -4146,NCF2,ENSG00000116701,protein_coding -4202,PDC,ENSG00000116703,protein_coding -1925,SLC35D1,ENSG00000116704,protein_coding -4208,PLA2G4A,ENSG00000116711,protein_coding -1941,GADD45A,ENSG00000116717,protein_coding -443,PRAMEF1,ENSG00000116721,protein_coding -442,PRAMEF12,ENSG00000116726,protein_coding -1948,WLS,ENSG00000116729,protein_coding -488,PRDM2,ENSG00000116731,protein_coding -4259,RGS2,ENSG00000116741,protein_coding -1955,RPE65,ENSG00000116745,protein_coding -4264,RO60,ENSG00000116747,protein_coding -2709,AMPD1,ENSG00000116748,protein_coding -4262,UCHL5,ENSG00000116750,protein_coding -2707,BCAS2,ENSG00000116752,protein_coding -1975,SRSF11,ENSG00000116754,protein_coding -1980,CTH,ENSG00000116761,protein_coding -512,AGMAT,ENSG00000116771,protein_coding -2695,OLFML3,ENSG00000116774,protein_coding -2017,TNNI3K,ENSG00000116783,protein_coding -4286,CFHR3,ENSG00000116785,protein_coding -520,PLEKHM2,ENSG00000116786,protein_coding -2024,CRYZ,ENSG00000116791,protein_coding -2684,PHTF1,ENSG00000116793,protein_coding -532,ZBTB17,ENSG00000116809,protein_coding -2751,CD58,ENSG00000116815,protein_coding -1156,TFAP2E,ENSG00000116819,protein_coding -2759,CD2,ENSG00000116824,protein_coding -2766,TTF2,ENSG00000116830,protein_coding -4334,NR5A2,ENSG00000116833,protein_coding -4353,KIF21B,ENSG00000116852,protein_coding -4358,TMEM9,ENSG00000116857,protein_coding -1170,ADPRHL2,ENSG00000116863,protein_coding -1173,MAP7D1,ENSG00000116871,protein_coding -2793,WARS2,ENSG00000116874,protein_coding -2802,HAO2,ENSG00000116882,protein_coding -1184,OSCP1,ENSG00000116885,protein_coding -1186,MRPS15,ENSG00000116898,protein_coding -5067,EXOC8,ENSG00000116903,protein_coding -5065,GNPAT,ENSG00000116906,protein_coding -5074,TSNAX,ENSG00000116918,protein_coding -1211,C1orf109,ENSG00000116922,protein_coding -1245,RRAGC,ENSG00000116954,protein_coding -5175,NID1,ENSG00000116962,protein_coding -5185,LGALS8,ENSG00000116977,protein_coding -1272,NT5C1A,ENSG00000116981,protein_coding -1273,HPCAL4,ENSG00000116983,protein_coding -5191,MTR,ENSG00000116984,protein_coding -1277,BMP8B,ENSG00000116985,protein_coding -1286,MYCL,ENSG00000116990,protein_coding -5086,SIPA1L2,ENSG00000116991,protein_coding -5201,ZP4,ENSG00000116996,protein_coding -1294,RLF,ENSG00000117000,protein_coding -5257,KMO,ENSG00000117009,protein_coding -1313,ZNF684,ENSG00000117010,protein_coding -1324,KCNQ4,ENSG00000117013,protein_coding -1317,RIMS3,ENSG00000117016,protein_coding -5294,AKT3,ENSG00000117020,protein_coding -3511,ETV3,ENSG00000117036,protein_coding -2037,ACADM,ENSG00000117054,protein_coding -2053,ST6GALNAC5,ENSG00000117069,protein_coding -3643,SLAMF1,ENSG00000117090,protein_coding -3646,CD48,ENSG00000117091,protein_coding -2116,ADGRL2,ENSG00000117114,protein_coding -596,PADI2,ENSG00000117115,protein_coding -595,SDHB,ENSG00000117118,protein_coding -591,MFAP2,ENSG00000117122,protein_coding -2146,RPF1,ENSG00000117133,protein_coding -4412,KDM5B,ENSG00000117139,protein_coding -3733,UAP1,ENSG00000117143,protein_coding -608,ACTL8,ENSG00000117148,protein_coding -2150,CTBS,ENSG00000117151,protein_coding -3741,RGS4,ENSG00000117152,protein_coding -4421,KLHL12,ENSG00000117153,protein_coding -611,IGSF21,ENSG00000117154,protein_coding -2155,SSX2IP,ENSG00000117155,protein_coding -2176,ZNHIT6,ENSG00000117174,protein_coding -653,PLA2G2D,ENSG00000117215,protein_coding -4492,RBBP5,ENSG00000117222,protein_coding -2220,GBP3,ENSG00000117226,protein_coding -2222,GBP1,ENSG00000117228,protein_coding -676,KIF17,ENSG00000117245,protein_coding -2946,GPR89A,ENSG00000117262,protein_coding -4507,CDK18,ENSG00000117266,protein_coding -4517,RAB29,ENSG00000117280,protein_coding -2948,CD160,ENSG00000117281,protein_coding -688,ECE1,ENSG00000117298,protein_coding -772,HMGCL,ENSG00000117305,protein_coding -771,GALE,ENSG00000117308,protein_coding -762,ID3,ENSG00000117318,protein_coding -4572,CR2,ENSG00000117322,protein_coding -4580,CD46,ENSG00000117335,protein_coding -3120,PRPF3,ENSG00000117360,protein_coding -3116,APH1A,ENSG00000117362,protein_coding -1369,P3H1,ENSG00000117385,protein_coding -1381,SLC2A1,ENSG00000117394,protein_coding -1389,EBNA1BP2,ENSG00000117395,protein_coding -1399,CDC20,ENSG00000117399,protein_coding -1396,MPL,ENSG00000117400,protein_coding -1419,ARTN,ENSG00000117407,protein_coding -1421,IPO13,ENSG00000117408,protein_coding -1424,ATP6V0B,ENSG00000117410,protein_coding -1425,B4GALT2,ENSG00000117411,protein_coding -1441,ERI3,ENSG00000117419,protein_coding -1463,PTCH2,ENSG00000117425,protein_coding -1485,AKR1A1,ENSG00000117448,protein_coding -1483,PRDX1,ENSG00000117450,protein_coding -1501,PIK3R3,ENSG00000117461,protein_coding -1506,TSPAN1,ENSG00000117472,protein_coding -3867,BLZF1,ENSG00000117475,protein_coding -3868,CCDC181,ENSG00000117477,protein_coding -3869,SLC19A2,ENSG00000117479,protein_coding -1515,FAAH,ENSG00000117480,protein_coding -1513,NSUN4,ENSG00000117481,protein_coding -2310,TMED5,ENSG00000117500,protein_coding -3900,MROH9,ENSG00000117501,protein_coding -2316,DR1,ENSG00000117505,protein_coding -2352,CNN3,ENSG00000117519,protein_coding -3916,PRRC2C,ENSG00000117523,protein_coding -2346,F3,ENSG00000117525,protein_coding -2345,ABCD3,ENSG00000117528,protein_coding -3921,VAMP4,ENSG00000117533,protein_coding -2445,DPH5,ENSG00000117543,protein_coding -3941,FASLG,ENSG00000117560,protein_coding -2380,PTBP2,ENSG00000117569,protein_coding -3950,TNFSF4,ENSG00000117586,protein_coding -3954,PRDX6,ENSG00000117592,protein_coding -3965,DARS2,ENSG00000117593,protein_coding -4614,HSD11B1,ENSG00000117594,protein_coding -4618,IRF6,ENSG00000117595,protein_coding -4619,UTP25,ENSG00000117597,protein_coding -2398,PLPPR5,ENSG00000117598,protein_coding -2401,PLPPR4,ENSG00000117600,protein_coding -3969,SERPINC1,ENSG00000117601,protein_coding -793,RCAN3,ENSG00000117602,protein_coding -811,SYF2,ENSG00000117614,protein_coding -814,RSRP1,ENSG00000117616,protein_coding -2412,SLC35A3,ENSG00000117620,protein_coding -4640,RCOR3,ENSG00000117625,protein_coding -837,STMN1,ENSG00000117632,protein_coding -831,MTFR1L,ENSG00000117640,protein_coding -826,MAN1C1,ENSG00000117643,protein_coding -4652,NEK2,ENSG00000117650,protein_coding -872,RPS6KA1,ENSG00000117676,protein_coding -867,DHDDS,ENSG00000117682,protein_coding -4677,NENF,ENSG00000117691,protein_coding -4689,NSL1,ENSG00000117697,protein_coding -4706,PROX1,ENSG00000117707,protein_coding -881,ARID1A,ENSG00000117713,protein_coding -4715,CENPF,ENSG00000117724,protein_coding -940,RPA2,ENSG00000117748,protein_coding -937,PPP1R8,ENSG00000117751,protein_coding -932,STX12,ENSG00000117758,protein_coding -4788,MARC2,ENSG00000117791,protein_coding -1567,SLC5A9,ENSG00000117834,protein_coding -1622,OSBPL9,ENSG00000117859,protein_coding -1635,TXNDC12,ENSG00000117862,protein_coding -24552,ESYT2,ENSG00000117868,protein_coding -56792,CD3EAP,ENSG00000117877,protein_coding -45911,MESD,ENSG00000117899,protein_coding -45795,RCN2,ENSG00000117906,protein_coding -45849,CHRNB4,ENSG00000117971,protein_coding -31874,MUC5B,ENSG00000117983,protein_coding -31898,CTSD,ENSG00000117984,protein_coding -5533,COLEC11,ENSG00000118004,protein_coding -11770,STAG1,ENSG00000118007,protein_coding -11796,A4GNT,ENSG00000118017,protein_coding -54830,STK11,ENSG00000118046,protein_coding -34753,KMT2A,ENSG00000118058,protein_coding -34764,TREH,ENSG00000118094,protein_coding -34757,IFT46,ENSG00000118096,protein_coding -34433,MMP8,ENSG00000118113,protein_coding -34701,APOA1,ENSG00000118137,protein_coding -56903,ZNF541,ENSG00000118156,protein_coding -56897,SLC8A2,ENSG00000118160,protein_coding -56899,KPTN,ENSG00000118162,protein_coding -34789,RPS25,ENSG00000118181,protein_coding -4344,KIF14,ENSG00000118193,protein_coding -4362,TNNT2,ENSG00000118194,protein_coding -4345,DDX59,ENSG00000118197,protein_coding -4347,CAMSAP2,ENSG00000118200,protein_coding -3715,ATF6,ENSG00000118217,protein_coding -8994,CRYGD,ENSG00000118231,protein_coding -9082,MREG,ENSG00000118242,protein_coding -9111,TNP1,ENSG00000118245,protein_coding -8957,FASTKD2,ENSG00000118246,protein_coding -8922,NRP2,ENSG00000118257,protein_coding -8976,CREB1,ENSG00000118260,protein_coding -8963,KLF7,ENSG00000118263,protein_coding -52597,TTR,ENSG00000118271,protein_coding -52598,B4GALT6,ENSG00000118276,protein_coding -3117,C1orf54,ENSG00000118292,protein_coding -3115,CA14,ENSG00000118298,protein_coding -38138,CASC1,ENSG00000118307,protein_coding -38134,LRMP,ENSG00000118308,protein_coding -18070,ATP10B,ENSG00000118322,protein_coding -33953,SPCS2,ENSG00000118363,protein_coding -34054,USP35,ENSG00000118369,protein_coding -20219,ELOVL4,ENSG00000118402,protein_coding -20161,FILIP1,ENSG00000118407,protein_coding -20370,CASP8AP2,ENSG00000118412,protein_coding -20199,HMGN3,ENSG00000118418,protein_coding -20251,UBE3D,ENSG00000118420,protein_coding -20338,CNR1,ENSG00000118432,protein_coding -20337,SPACA1,ENSG00000118434,protein_coding -1977,ANKRD13C,ENSG00000118454,protein_coding -1917,SGIP1,ENSG00000118473,protein_coding -20038,PHF3,ENSG00000118482,protein_coding -21084,ZC2HC1B,ENSG00000118491,protein_coding -21116,ADGB,ENSG00000118492,protein_coding -21085,PLAGL1,ENSG00000118495,protein_coding -21108,FBXO30,ENSG00000118496,protein_coding -21000,TNFAIP3,ENSG00000118503,protein_coding -20857,AKAP7,ENSG00000118507,protein_coding -21114,RAB32,ENSG00000118508,protein_coding -20948,MYB,ENSG00000118513,protein_coding -20943,ALDH8A1,ENSG00000118514,protein_coding -20927,SGK1,ENSG00000118515,protein_coding -20812,RNF146,ENSG00000118518,protein_coding -20860,ARG1,ENSG00000118520,protein_coding -20874,CCN2,ENSG00000118523,protein_coding -20920,TCF21,ENSG00000118526,protein_coding -48528,PMFBP1,ENSG00000118557,protein_coding -13207,FBXL5,ENSG00000118564,protein_coding -13239,MED28,ENSG00000118579,protein_coding -39066,SLC16A7,ENSG00000118596,protein_coding -39114,RXYLT1,ENSG00000118600,protein_coding -55907,ZNF430,ENSG00000118620,protein_coding -6901,VAMP8,ENSG00000118640,protein_coding -2692,DCLRE1B,ENSG00000118655,protein_coding -52130,MYL12B,ENSG00000118680,protein_coding -20561,FOXO3,ENSG00000118689,protein_coding -20567,ARMC2,ENSG00000118690,protein_coding -54085,GHRH,ENSG00000118702,protein_coding -54084,RPN2,ENSG00000118705,protein_coding -54066,TGIF2,ENSG00000118707,protein_coding -2732,CASQ2,ENSG00000118729,protein_coding -2458,OLFM3,ENSG00000118733,protein_coding -14170,PKD2,ENSG00000118762,protein_coding -14171,ABCG2,ENSG00000118777,protein_coding -14168,SPP1,ENSG00000118785,protein_coding -13992,STBD1,ENSG00000118804,protein_coding -14010,CCNI,ENSG00000118816,protein_coding -12153,RARRES1,ENSG00000118849,protein_coding -12154,MFSD1,ENSG00000118855,protein_coding -4769,RAB3GAP2,ENSG00000118873,protein_coding -46830,EEF2KMT,ENSG00000118894,protein_coding -46823,PPL,ENSG00000118898,protein_coding -46822,UBN1,ENSG00000118900,protein_coding -41414,KLF12,ENSG00000118922,protein_coding -41433,UCHL3,ENSG00000118939,protein_coding -41285,PCDH17,ENSG00000118946,protein_coding -5780,HS1BP3,ENSG00000118960,protein_coding -5786,LDAH,ENSG00000118961,protein_coding -5760,WDR35,ENSG00000118965,protein_coding -37587,CCND2,ENSG00000118971,protein_coding -37590,FGF23,ENSG00000118972,protein_coding -16932,ELL2,ENSG00000118985,protein_coding -8727,DNAH7,ENSG00000118997,protein_coding -8898,CYP20A1,ENSG00000119004,protein_coding -8812,NDUFB3,ENSG00000119013,protein_coding -8738,GTF3C3,ENSG00000119041,protein_coding -8772,SATB2,ENSG00000119042,protein_coding -17443,UBE2B,ENSG00000119048,protein_coding -30490,TRPM6,ENSG00000119121,protein_coding -30465,GDA,ENSG00000119125,protein_coding -30448,KLF9,ENSG00000119138,protein_coding -30429,TJP2,ENSG00000119139,protein_coding -7388,C2orf40,ENSG00000119147,protein_coding -5602,ITGB1BP1,ENSG00000119185,protein_coding -5603,CPSF3,ENSG00000119203,protein_coding -12808,PIGZ,ENSG00000119227,protein_coding -12803,SENP5,ENSG00000119231,protein_coding -40265,CCDC92,ENSG00000119242,protein_coding -5051,C1orf198,ENSG00000119280,protein_coding -5062,TRIM67,ENSG00000119283,protein_coding -5189,HEATR1,ENSG00000119285,protein_coding -31122,PTBP3,ENSG00000119314,protein_coding -31044,RAD23B,ENSG00000119318,protein_coding -31150,FKBP15,ENSG00000119321,protein_coding -31070,CTNNAL1,ENSG00000119326,protein_coding -31069,ABITRAM,ENSG00000119328,protein_coding -31440,WDR34,ENSG00000119333,protein_coding -31443,SET,ENSG00000119335,protein_coding -31467,PTPA,ENSG00000119383,protein_coding -31434,GLE1,ENSG00000119392,protein_coding -31240,RAB14,ENSG00000119396,protein_coding -31239,CNTRL,ENSG00000119397,protein_coding -31205,TRIM32,ENSG00000119401,protein_coding -31231,FBXW2,ENSG00000119402,protein_coding -31236,PHF19,ENSG00000119403,protein_coding -31311,NEK6,ENSG00000119408,protein_coding -31156,BSPRY,ENSG00000119411,protein_coding -31333,PPP6C,ENSG00000119414,protein_coding -31257,NDUFA8,ENSG00000119421,protein_coding -31157,HDHD3,ENSG00000119431,protein_coding -31260,RBM18,ENSG00000119446,protein_coding -31135,SLC46A2,ENSG00000119457,protein_coding -31128,HSDL2,ENSG00000119471,protein_coding -31345,MAPKAP1,ENSG00000119487,protein_coding -30942,NR4A3,ENSG00000119508,protein_coding -30947,INVS,ENSG00000119509,protein_coding -30922,GALNT12,ENSG00000119514,protein_coding -31300,DENND1A,ENSG00000119522,protein_coding -30931,ALG2,ENSG00000119523,protein_coding -1187,CSF3R,ENSG00000119535,protein_coding -53053,KDSR,ENSG00000119537,protein_coding -53055,VPS4B,ENSG00000119541,protein_coding -52947,ONECUT2,ENSG00000119547,protein_coding -54854,C19orf25,ENSG00000119559,protein_coding -57731,ZBTB45,ENSG00000119574,protein_coding -43290,YLPM1,ENSG00000119596,protein_coding -43212,DCAF4,ENSG00000119599,protein_coding -43291,PROX2,ENSG00000119608,protein_coding -43270,VSX2,ENSG00000119614,protein_coding -43289,FCF1,ENSG00000119616,protein_coding -43295,PGF,ENSG00000119630,protein_coding -43600,IFI27L2,ENSG00000119632,protein_coding -43265,BBOF1,ENSG00000119636,protein_coding -43303,NEK9,ENSG00000119638,protein_coding -43301,ACYP1,ENSG00000119640,protein_coding -43327,IFT43,ENSG00000119650,protein_coding -43279,NPC2,ENSG00000119655,protein_coding -43244,DNAL1,ENSG00000119661,protein_coding -43356,IRF2BPL,ENSG00000119669,protein_coding -43238,ACOT2,ENSG00000119673,protein_coding -43283,LTBP2,ENSG00000119681,protein_coding -43286,AREL1,ENSG00000119682,protein_coding -43299,MLH3,ENSG00000119684,protein_coding -43323,TTLL5,ENSG00000119685,protein_coding -43319,FLVCR2,ENSG00000119686,protein_coding -43271,ABCD4,ENSG00000119688,protein_coding -43293,DLST,ENSG00000119689,protein_coding -43601,PPP4R4,ENSG00000119698,protein_coding -43328,TGFB3,ENSG00000119699,protein_coding -43302,ZC2HC1C,ENSG00000119703,protein_coding -43387,SLIRP,ENSG00000119705,protein_coding -43219,RBM25,ENSG00000119707,protein_coding -43266,ALDH6A1,ENSG00000119711,protein_coding -43538,GPR68,ENSG00000119714,protein_coding -43336,ESRRB,ENSG00000119715,protein_coding -43296,EIF2B2,ENSG00000119718,protein_coding -43517,NRDE2,ENSG00000119720,protein_coding -43262,COQ6,ENSG00000119723,protein_coding -43257,ZNF410,ENSG00000119725,protein_coding -6245,RHOQ,ENSG00000119729,protein_coding -6339,GPR75,ENSG00000119737,protein_coding -5944,SUPT7L,ENSG00000119760,protein_coding -5813,KLHL29,ENSG00000119771,protein_coding -5859,DNMT3A,ENSG00000119772,protein_coding -5900,TMEM214,ENSG00000119777,protein_coding -5817,ATAD2B,ENSG00000119778,protein_coding -5826,FKBP1B,ENSG00000119782,protein_coding -6107,ATL2,ENSG00000119787,protein_coding -5987,YPEL5,ENSG00000119801,protein_coding -6045,FAM98A,ENSG00000119812,protein_coding -6022,YIPF4,ENSG00000119820,protein_coding -6519,AFTPH,ENSG00000119844,protein_coding -6516,LGALSL,ENSG00000119862,protein_coding -6586,CNRIP1,ENSG00000119865,protein_coding -6418,BCL11A,ENSG00000119866,protein_coding -6248,CRIPT,ENSG00000119878,protein_coding -6266,EPCAM,ENSG00000119888,protein_coding -20145,SLC17A5,ENSG00000119899,protein_coding -20097,OGFRL1,ENSG00000119900,protein_coding -36918,SLF2,ENSG00000119906,protein_coding -36735,IDE,ENSG00000119912,protein_coding -37106,TECTB,ENSG00000119913,protein_coding -36964,ELOVL3,ENSG00000119915,protein_coding -36672,IFIT3,ENSG00000119917,protein_coding -36878,NKX2-3,ENSG00000119919,protein_coding -36670,IFIT2,ENSG00000119922,protein_coding -37105,GPAM,ENSG00000119927,protein_coding -36885,CUTC,ENSG00000119929,protein_coding -36714,PPP1R3C,ENSG00000119938,protein_coding -36864,PYROXD2,ENSG00000119943,protein_coding -36873,CNNM1,ENSG00000119946,protein_coding -37076,MXI1,ENSG00000119950,protein_coding -37079,SMNDC1,ENSG00000119953,protein_coding -37286,C10orf88,ENSG00000119965,protein_coding -36775,HELLS,ENSG00000119969,protein_coding -37205,PRLHR,ENSG00000119973,protein_coding -36799,TCTN3,ENSG00000119977,protein_coding -37217,FAM45A,ENSG00000119979,protein_coding -36853,AVPI1,ENSG00000119986,protein_coding -37248,WDR11,ENSG00000120008,protein_coding -36955,ARMH3,ENSG00000120029,protein_coding -36954,KCNIP2,ENSG00000120049,protein_coding -37041,CFAP58,ENSG00000120051,protein_coding -36874,GOT1,ENSG00000120053,protein_coding -36891,CPN1,ENSG00000120054,protein_coding -36973,C10orf95,ENSG00000120055,protein_coding -36856,SFRP5,ENSG00000120057,protein_coding -51473,GNA13,ENSG00000120063,protein_coding -50979,HOXB8,ENSG00000120068,protein_coding -50882,KANSL1,ENSG00000120071,protein_coding -50976,HOXB5,ENSG00000120075,protein_coding -50874,CRHR1,ENSG00000120088,protein_coding -50970,HOXB3,ENSG00000120093,protein_coding -50967,HOXB1,ENSG00000120094,protein_coding -18197,DUSP1,ENSG00000120129,protein_coding -18129,PANK3,ENSG00000120137,protein_coding -18245,MSX2,ENSG00000120149,protein_coding -29825,TEK,ENSG00000120156,protein_coding -29534,RCL1,ENSG00000120158,protein_coding -29817,CAAP1,ENSG00000120159,protein_coding -29829,EQTN,ENSG00000120160,protein_coding -29830,MOB3B,ENSG00000120162,protein_coding -29552,INSL6,ENSG00000120210,protein_coding -29554,INSL4,ENSG00000120211,protein_coding -29571,MLANA,ENSG00000120215,protein_coding -29562,CD274,ENSG00000120217,protein_coding -29756,IFNA6,ENSG00000120235,protein_coding -29762,IFNA8,ENSG00000120242,protein_coding -15065,GRIA2,ENSG00000120251,protein_coding -21165,NUP43,ENSG00000120253,protein_coding -21202,MTHFD1L,ENSG00000120254,protein_coding -21171,LRP11,ENSG00000120256,protein_coding -21220,CCDC170,ENSG00000120262,protein_coding -21166,PCMT1,ENSG00000120265,protein_coding -21197,PLEKHG1,ENSG00000120278,protein_coding -21231,MYCT1,ENSG00000120279,protein_coding -24925,CXorf21,ENSG00000120280,protein_coding -24921,MAGEB4,ENSG00000120289,protein_coding -17608,CYSTM1,ENSG00000120306,protein_coding -17634,WDR55,ENSG00000120314,protein_coding -17736,ARAP3,ENSG00000120318,protein_coding -17680,PCDHB8,ENSG00000120322,protein_coding -17687,PCDHB10,ENSG00000120324,protein_coding -17691,PCDHB14,ENSG00000120327,protein_coding -17689,PCDHB12,ENSG00000120328,protein_coding -17698,SLC25A2,ENSG00000120329,protein_coding -3992,TNN,ENSG00000120332,protein_coding -3989,MRPS14,ENSG00000120333,protein_coding -3963,CENPL,ENSG00000120334,protein_coding -3947,TNFSF18,ENSG00000120337,protein_coding -4021,SEC16B,ENSG00000120341,protein_coding -3894,GORAB,ENSG00000120370,protein_coding -21457,GPR31,ENSG00000120436,protein_coding -21353,ACAT2,ENSG00000120437,protein_coding -21354,TCP1,ENSG00000120438,protein_coding -21464,TTLL2,ENSG00000120440,protein_coding -35093,SNX19,ENSG00000120451,protein_coding -35053,KCNJ5,ENSG00000120457,protein_coding -34965,MSANTD2,ENSG00000120458,protein_coding -35055,TP53AIP1,ENSG00000120471,protein_coding -25591,TEX11,ENSG00000120498,protein_coding -25580,ARR3,ENSG00000120500,protein_coding -25582,PDZD11,ENSG00000120509,protein_coding -14887,SLC10A7,ENSG00000120519,protein_coding -28915,NUDCD1,ENSG00000120526,protein_coding -28917,ENY2,ENSG00000120533,protein_coding -35603,MASTL,ENSG00000120539,protein_coding -35555,KIAA1217,ENSG00000120549,protein_coding -35649,LYZL1,ENSG00000120563,protein_coding -35486,PLXDC2,ENSG00000120594,protein_coding -35711,EPC1,ENSG00000120616,protein_coding -37485,IQSEC3,ENSG00000120645,protein_coding -37497,CCDC77,ENSG00000120647,protein_coding -975,TAF12,ENSG00000120656,protein_coding -41030,ENOX1,ENSG00000120658,protein_coding -41021,TNFSF11,ENSG00000120659,protein_coding -40996,MTRF1,ENSG00000120662,protein_coding -40912,SOHLH2,ENSG00000120669,protein_coding -41027,DNAJC15,ENSG00000120675,protein_coding -40959,PROSER1,ENSG00000120685,protein_coding -40948,UFM1,ENSG00000120686,protein_coding -40988,WBP4,ENSG00000120688,protein_coding -40986,ELF1,ENSG00000120690,protein_coding -40926,SMAD9,ENSG00000120693,protein_coding -40854,HSPH1,ENSG00000120694,protein_coding -40994,KBTBD7,ENSG00000120696,protein_coding -40931,ALG5,ENSG00000120697,protein_coding -40932,EXOSC8,ENSG00000120699,protein_coding -17552,ETF1,ENSG00000120705,protein_coding -17501,TGFBI,ENSG00000120708,protein_coding -17545,FAM53C,ENSG00000120709,protein_coding -17561,SIL1,ENSG00000120725,protein_coding -17574,PAIP2,ENSG00000120727,protein_coding -17529,MYOT,ENSG00000120729,protein_coding -17547,KDM3B,ENSG00000120733,protein_coding -17550,EGR1,ENSG00000120738,protein_coding -12006,SERP1,ENSG00000120742,protein_coding -11886,PLS1,ENSG00000120756,protein_coding -56386,ZFP30,ENSG00000120784,protein_coding -39586,NR2C1,ENSG00000120798,protein_coding -39700,UTP20,ENSG00000120800,protein_coding -39662,TMPO,ENSG00000120802,protein_coding -39701,ARL1,ENSG00000120805,protein_coding -39764,GLT8D2,ENSG00000120820,protein_coding -39813,MTERF2,ENSG00000120832,protein_coding -39556,SOCS2,ENSG00000120833,protein_coding -39767,NFYB,ENSG00000120837,protein_coding -39728,WASHC3,ENSG00000120860,protein_coding -39667,APAF1,ENSG00000120868,protein_coding -27642,DUSP4,ENSG00000120875,protein_coding -27593,CLU,ENSG00000120885,protein_coding -27507,TNFRSF10B,ENSG00000120889,protein_coding -27488,SORBS3,ENSG00000120896,protein_coding -27588,PTK2B,ENSG00000120899,protein_coding -27590,CHRNA2,ENSG00000120903,protein_coding -27577,ADRA1A,ENSG00000120907,protein_coding -27485,PPP3CC,ENSG00000120910,protein_coding -27491,PDLIM2,ENSG00000120913,protein_coding -27591,EPHX2,ENSG00000120915,protein_coding -27897,RNF170,ENSG00000120925,protein_coding -408,NPPB,ENSG00000120937,protein_coding -386,UBIAD1,ENSG00000120942,protein_coding -372,TARDBP,ENSG00000120948,protein_coding -422,TNFRSF8,ENSG00000120949,protein_coding -448,PRAMEF2,ENSG00000120952,protein_coding -28791,ZNF706,ENSG00000120963,protein_coding -28047,LYPLA1,ENSG00000120992,protein_coding -28379,CRISPLD1,ENSG00000121005,protein_coding -28247,COPS5,ENSG00000121022,protein_coding -28338,RDH10,ENSG00000121039,protein_coding -51215,EPX,ENSG00000121053,protein_coding -51183,AKAP1,ENSG00000121057,protein_coding -51176,COIL,ENSG00000121058,protein_coding -51170,TRIM25,ENSG00000121060,protein_coding -51179,SCPEP1,ENSG00000121064,protein_coding -51039,SPOP,ENSG00000121067,protein_coding -51335,TBX2,ENSG00000121068,protein_coding -51040,SLC35B1,ENSG00000121073,protein_coding -51337,TBX4,ENSG00000121075,protein_coding -51236,TEX14,ENSG00000121101,protein_coding -51042,FAM117A,ENSG00000121104,protein_coding -7196,NCAPH,ENSG00000121152,protein_coding -15017,LRAT,ENSG00000121207,protein_coding -14996,TMEM131L,ENSG00000121210,protein_coding -14994,MND1,ENSG00000121211,protein_coding -32120,TRIM6,ENSG00000121236,protein_coding -47896,ABCC11,ENSG00000121270,protein_coding -47949,TENT4B,ENSG00000121274,protein_coding -47950,ADCY7,ENSG00000121281,protein_coding -56156,CEP89,ENSG00000121289,protein_coding -56121,TSHZ3,ENSG00000121297,protein_coding -1669,ECHDC2,ENSG00000121310,protein_coding -37885,TAS2R8,ENSG00000121314,protein_coding -37989,PLBD1,ENSG00000121316,protein_coding -37888,TAS2R10,ENSG00000121318,protein_coding -37917,PRB2,ENSG00000121335,protein_coding -38084,PYROXD1,ENSG00000121350,protein_coding -38083,IAPP,ENSG00000121351,protein_coding -38094,KCNJ8,ENSG00000121361,protein_coding -37884,TAS2R7,ENSG00000121377,protein_coding -37928,BCL2L14,ENSG00000121380,protein_coding -37886,TAS2R9,ENSG00000121381,protein_coding -40598,PSPC1,ENSG00000121390,protein_coding -57646,ZNF549,ENSG00000121406,protein_coding -57704,A1BG,ENSG00000121410,protein_coding -57685,ZSCAN18,ENSG00000121413,protein_coding -57653,ZNF211,ENSG00000121417,protein_coding -10917,PDZRN3,ENSG00000121440,protein_coding -4117,RGSL1,ENSG00000121446,protein_coding -4079,LHX4,ENSG00000121454,protein_coding -4171,RNF2,ENSG00000121481,protein_coding -4173,TRMT1L,ENSG00000121486,protein_coding -11480,SEC22A,ENSG00000121542,protein_coding -11460,CSTA,ENSG00000121552,protein_coding -11251,DPPA4,ENSG00000121570,protein_coding -11401,POPDC2,ENSG00000121577,protein_coding -11386,B4GALT4,ENSG00000121578,protein_coding -11324,NAA50,ENSG00000121579,protein_coding -11396,CD80,ENSG00000121594,protein_coding -32568,KIF18A,ENSG00000121621,protein_coding -3015,GJA8,ENSG00000121634,protein_coding -5313,DESI2,ENSG00000121644,protein_coding -32813,MAPK8IP1,ENSG00000121653,protein_coding -32812,CRY2,ENSG00000121671,protein_coding -32816,PEX16,ENSG00000121680,protein_coding -32635,DEPDC7,ENSG00000121690,protein_coding -32670,CAT,ENSG00000121691,protein_coding -23370,PILRB,ENSG00000121716,protein_coding -40604,ZMYM2,ENSG00000121741,protein_coding -40613,GJB6,ENSG00000121742,protein_coding -40608,GJA3,ENSG00000121743,protein_coding -39296,TBC1D15,ENSG00000121749,protein_coding -1042,ADGRB2,ENSG00000121753,protein_coding -1037,HCRTR1,ENSG00000121764,protein_coding -1026,ZCCHC17,ENSG00000121766,protein_coding -1028,FABP3,ENSG00000121769,protein_coding -1051,KHDRBS1,ENSG00000121774,protein_coding -1053,TMEM39B,ENSG00000121775,protein_coding -10376,CCRL2,ENSG00000121797,protein_coding -10373,CCR2,ENSG00000121807,protein_coding -2965,POLR3GL,ENSG00000121851,protein_coding -12328,GHSR,ENSG00000121853,protein_coding -12329,TNFSF10,ENSG00000121858,protein_coding -12405,ZNF639,ENSG00000121864,protein_coding -12218,SLITRK3,ENSG00000121871,protein_coding -12401,PIK3CA,ENSG00000121879,protein_coding -13445,PDS5A,ENSG00000121892,protein_coding -13419,TMEM156,ENSG00000121895,protein_coding -13431,LIAS,ENSG00000121897,protein_coding -37300,CPXM2,ENSG00000121898,protein_coding -1093,TMEM54,ENSG00000121900,protein_coding -1116,ZSCAN20,ENSG00000121903,protein_coding -1118,CSMD2,ENSG00000121904,protein_coding -1092,HPCA,ENSG00000121905,protein_coding -2608,LRIF1,ENSG00000121931,protein_coding -2635,TMIGD3,ENSG00000121933,protein_coding -2526,CLCC1,ENSG00000121940,protein_coding -2525,GPSM2,ENSG00000121957,protein_coding -8045,GTDC1,ENSG00000121964,protein_coding -7975,CXCR4,ENSG00000121966,protein_coding -7961,ZRANB3,ENSG00000121988,protein_coding -8081,ACVR2A,ENSG00000121989,protein_coding -16624,POLK,ENSG00000122008,protein_coding -16639,SV2C,ENSG00000122012,protein_coding -40794,FLT3,ENSG00000122025,protein_coding -40771,RPL21,ENSG00000122026,protein_coding -40779,MTIF3,ENSG00000122033,protein_coding -40778,GTF3A,ENSG00000122034,protein_coding -40774,RASL11A,ENSG00000122035,protein_coding -40817,UBL3,ENSG00000122042,protein_coding -12831,FYTTD1,ENSG00000122068,protein_coding -9622,MTERF4,ENSG00000122085,protein_coding -26419,XPNPEP2,ENSG00000122121,protein_coding -26420,SASH3,ENSG00000122122,protein_coding -26416,OCRL,ENSG00000122126,protein_coding -31643,PAEP,ENSG00000122133,protein_coding -31642,OBP2A,ENSG00000122136,protein_coding -31639,MRPS2,ENSG00000122140,protein_coding -25785,TBX22,ENSG00000122145,protein_coding -4442,FMOD,ENSG00000122176,protein_coding -4430,MYOG,ENSG00000122180,protein_coding -4450,LAX1,ENSG00000122188,protein_coding -21374,PLG,ENSG00000122194,protein_coding -18280,KIAA1191,ENSG00000122203,protein_coding -3633,COPA,ENSG00000122218,protein_coding -3651,CD244,ENSG00000122223,protein_coding -3650,LY9,ENSG00000122224,protein_coding -47261,HS3ST2,ENSG00000122254,protein_coding -47303,RBBP6,ENSG00000122257,protein_coding -46954,ZC3H7A,ENSG00000122299,protein_coding -46940,PRM2,ENSG00000122304,protein_coding -21309,SERAC1,ENSG00000122335,protein_coding -36520,ANXA11,ENSG00000122359,protein_coding -36593,LDB3,ENSG00000122367,protein_coding -36592,OPN4,ENSG00000122375,protein_coding -36614,SHLD2,ENSG00000122376,protein_coding -36531,PRXL2A,ENSG00000122378,protein_coding -46732,ZNF205,ENSG00000122386,protein_coding -46759,NAA60,ENSG00000122390,protein_coding -2297,RPL5,ENSG00000122406,protein_coding -2181,ODF2L,ENSG00000122417,protein_coding -2087,PTGFR,ENSG00000122420,protein_coding -2149,SPATA1,ENSG00000122432,protein_coding -2420,TRMT13,ENSG00000122435,protein_coding -2421,LRRC39,ENSG00000122477,protein_coding -2361,RWDD3,ENSG00000122481,protein_coding -2264,ZNF644,ENSG00000122482,protein_coding -2311,CCDC18,ENSG00000122483,protein_coding -2287,RPAP2,ENSG00000122484,protein_coding -53281,PQLC1,ENSG00000122490,protein_coding -22134,BBS9,ENSG00000122507,protein_coding -21680,PMS2,ENSG00000122512,protein_coding -22342,ZMIZ2,ENSG00000122515,protein_coding -21677,OCM,ENSG00000122543,protein_coding -22168,SEPT7,ENSG00000122545,protein_coding -22181,EEPD1,ENSG00000122547,protein_coding -21906,KLHL7,ENSG00000122550,protein_coding -22163,HERPUD2,ENSG00000122557,protein_coding -21964,CBX3,ENSG00000122565,protein_coding -21963,HNRNPA2B1,ENSG00000122566,protein_coding -22060,WIPF3,ENSG00000122574,protein_coding -21749,NXPH1,ENSG00000122584,protein_coding -21934,NPY,ENSG00000122585,protein_coding -21904,FAM126A,ENSG00000122591,protein_coding -21996,HOXA7,ENSG00000122592,protein_coding -22275,INHBA,ENSG00000122641,protein_coding -22127,FKBP9,ENSG00000122642,protein_coding -22129,NT5C3A,ENSG00000122643,protein_coding -21788,ARL4A,ENSG00000122644,protein_coding -21678,CCZ1,ENSG00000122674,protein_coding -22321,POLM,ENSG00000122678,protein_coding -22361,RAMP3,ENSG00000122679,protein_coding -21607,MRM2,ENSG00000122687,protein_coding -21851,TWIST1,ENSG00000122691,protein_coding -29888,SMU1,ENSG00000122692,protein_coding -30049,GLIPR2,ENSG00000122694,protein_coding -30094,SLC25A51,ENSG00000122696,protein_coding -30051,CLTA,ENSG00000122705,protein_coding -30047,RECK,ENSG00000122707,protein_coding -29892,SPINK4,ENSG00000122711,protein_coding -29873,TAF1L,ENSG00000122728,protein_coding -29867,ACO1,ENSG00000122729,protein_coding -29979,PHF24,ENSG00000122733,protein_coding -29953,DNAI1,ENSG00000122735,protein_coding -30091,DCAF10,ENSG00000122741,protein_coding -29958,CNTFR,ENSG00000122756,protein_coding -24037,KIAA1549,ENSG00000122778,protein_coding -24028,TRIM24,ENSG00000122779,protein_coding -23971,CYREN,ENSG00000122783,protein_coding -23968,CALD1,ENSG00000122786,protein_coding -24015,AKR1D1,ENSG00000122787,protein_coding -25314,NUDT10,ENSG00000122824,protein_coding -36488,SFTPA1,ENSG00000122852,protein_coding -36258,NEUROG3,ENSG00000122859,protein_coding -36391,PLAU,ENSG00000122861,protein_coding -36243,SRGN,ENSG00000122862,protein_coding -36306,CHST3,ENSG00000122863,protein_coding -36117,BICC1,ENSG00000122870,protein_coding -36112,CISD1,ENSG00000122873,protein_coding -36160,EGR2,ENSG00000122877,protein_coding -36344,ECD,ENSG00000122882,protein_coding -36336,P4HA1,ENSG00000122884,protein_coding -36221,SLC25A16,ENSG00000122912,protein_coding -36102,ZWINT,ENSG00000122952,protein_coding -36245,VPS26A,ENSG00000122958,protein_coding -39988,RBM19,ENSG00000122965,protein_coding -40095,CIT,ENSG00000122966,protein_coding -39892,IFT81,ENSG00000122970,protein_coding -40145,ACADS,ENSG00000122971,protein_coding -39909,HVCN1,ENSG00000122986,protein_coding -39971,DDX54,ENSG00000123064,protein_coding -40024,MED13L,ENSG00000123066,protein_coding -1603,CDKN2C,ENSG00000123080,protein_coding -1610,RNF11,ENSG00000123091,protein_coding -38158,RASSF8,ENSG00000123094,protein_coding -38159,BHLHE41,ENSG00000123095,protein_coding -38160,SSPN,ENSG00000123096,protein_coding -38168,ITPR2,ENSG00000123104,protein_coding -38208,CCDC91,ENSG00000123106,protein_coding -28585,NECAB1,ENSG00000123119,protein_coding -28541,WWP1,ENSG00000123124,protein_coding -24844,ACOT9,ENSG00000123130,protein_coding -24843,PRDX4,ENSG00000123131,protein_coding -55553,DDX39A,ENSG00000123136,protein_coding -55554,PKN1,ENSG00000123143,protein_coding -55466,TRIR,ENSG00000123144,protein_coding -55551,ADGRE5,ENSG00000123146,protein_coding -55454,WDR83,ENSG00000123154,protein_coding -55557,GIPC1,ENSG00000123159,protein_coding -26403,ACTRT1,ENSG00000123165,protein_coding -41218,CCDC70,ENSG00000123171,protein_coding -41169,SPRYD7,ENSG00000123178,protein_coding -41163,EBPL,ENSG00000123179,protein_coding -41221,ATP7B,ENSG00000123191,protein_coding -41092,ZC3H13,ENSG00000123200,protein_coding -16436,NLN,ENSG00000123213,protein_coding -16429,CENPK,ENSG00000123219,protein_coding -35372,OPTN,ENSG00000123240,protein_coding -35301,ITIH5,ENSG00000123243,protein_coding -38638,ATF1,ENSG00000123268,protein_coding -39033,TSFM,ENSG00000123297,protein_coding -38854,NEUROD4,ENSG00000123307,protein_coding -39004,ARHGAP9,ENSG00000123329,protein_coding -38842,NCKAP1L,ENSG00000123338,protein_coding -38905,MMP19,ENSG00000123342,protein_coding -38763,PFDN5,ENSG00000123349,protein_coding -38586,SPATS2,ENSG00000123352,protein_coding -38902,ORMDL2,ENSG00000123353,protein_coding -38682,NR4A1,ENSG00000123358,protein_coding -38844,PDE1B,ENSG00000123360,protein_coding -38792,HOXC13,ENSG00000123364,protein_coding -38912,CDK2,ENSG00000123374,protein_coding -38988,LRP1,ENSG00000123384,protein_coding -38799,HOXC11,ENSG00000123388,protein_coding -38684,ATG101,ENSG00000123395,protein_coding -38830,NFE2,ENSG00000123405,protein_coding -38793,HOXC12,ENSG00000123407,protein_coding -38917,IKZF4,ENSG00000123411,protein_coding -38819,SMUG1,ENSG00000123415,protein_coding -38574,TUBA1B,ENSG00000123416,protein_coding -39031,EEF1AKMT3,ENSG00000123427,protein_coding -32874,KBTBD4,ENSG00000123444,protein_coding -31598,SARDH,ENSG00000123453,protein_coding -31596,DBH,ENSG00000123454,protein_coding -1524,ATPAF1,ENSG00000123472,protein_coding -1548,STIL,ENSG00000123473,protein_coding -9482,HJURP,ENSG00000123485,protein_coding -26206,IL13RA2,ENSG00000123496,protein_coding -20685,COL10A1,ENSG00000123500,protein_coding -20611,AMD1,ENSG00000123505,protein_coding -20431,NDUFAF4,ENSG00000123545,protein_coding -20451,USP45,ENSG00000123552,protein_coding -26060,PLP1,ENSG00000123560,protein_coding -26085,SERPINA7,ENSG00000123561,protein_coding -26056,MORF4L2,ENSG00000123562,protein_coding -26070,H2BFWT,ENSG00000123569,protein_coding -26061,RAB9B,ENSG00000123570,protein_coding -26084,NRK,ENSG00000123572,protein_coding -26076,FAM199X,ENSG00000123575,protein_coding -26077,ESX1,ENSG00000123576,protein_coding -26762,MAGEA9,ENSG00000123584,protein_coding -24707,ATXN3L,ENSG00000123594,protein_coding -24716,RAB9A,ENSG00000123595,protein_coding -8371,METTL8,ENSG00000123600,protein_coding -8303,TTC21B,ENSG00000123607,protein_coding -8122,NMI,ENSG00000123609,protein_coding -8123,TNFAIP6,ENSG00000123610,protein_coding -8192,ACVR1C,ENSG00000123612,protein_coding -8224,BAZ2B,ENSG00000123636,protein_coding -17925,SLC36A1,ENSG00000123643,protein_coding -4656,LPGAT1,ENSG00000123684,protein_coding -4688,BATF3,ENSG00000123685,protein_coding -4613,G0S2,ENSG00000123689,protein_coding -51579,KCNJ2,ENSG00000123700,protein_coding -26471,RAP2C,ENSG00000123728,protein_coding -14597,EXOSC9,ENSG00000123737,protein_coding -14424,PLA2G12A,ENSG00000123739,protein_coding -56583,B9D2,ENSG00000123810,protein_coding -56542,COQ8B,ENSG00000123815,protein_coding -4560,PFKFB2,ENSG00000123836,protein_coding -4563,C4BPA,ENSG00000123838,protein_coding -4562,C4BPB,ENSG00000123843,protein_coding -34191,RAB38,ENSG00000123892,protein_coding -34336,GPR83,ENSG00000123901,protein_coding -29263,AGO2,ENSG00000123908,protein_coding -12934,MXD4,ENSG00000123933,protein_coding -30691,CKS2,ENSG00000123975,protein_coding -9328,DAW1,ENSG00000123977,protein_coding -9265,ACSL3,ENSG00000123983,protein_coding -9217,CHPF,ENSG00000123989,protein_coding -9205,DNPEP,ENSG00000123992,protein_coding -9223,INHA,ENSG00000123999,protein_coding -9261,MOGAT1,ENSG00000124003,protein_coding -9221,OBSL1,ENSG00000124006,protein_coding -9284,FAM124B,ENSG00000124019,protein_coding -48349,SLC12A4,ENSG00000124067,protein_coding -48331,ENKD1,ENSG00000124074,protein_coding -54494,MC3R,ENSG00000124089,protein_coding -54503,GCNT7,ENSG00000124091,protein_coding -54534,CTCFL,ENSG00000124092,protein_coding -54497,FAM210B,ENSG00000124098,protein_coding -54249,PI3,ENSG00000124102,protein_coding -54504,FAM209A,ENSG00000124103,protein_coding -54294,SNX21,ENSG00000124104,protein_coding -54254,SLPI,ENSG00000124107,protein_coding -54288,WFDC3,ENSG00000124116,protein_coding -54226,TTPAL,ENSG00000124120,protein_coding -54371,PREX1,ENSG00000124126,protein_coding -54245,KCNS1,ENSG00000124134,protein_coding -54310,SLC12A5,ENSG00000124140,protein_coding -54131,ARHGAP40,ENSG00000124143,protein_coding -54257,SDC4,ENSG00000124145,protein_coding -54346,NCOA3,ENSG00000124151,protein_coding -54264,PIGT,ENSG00000124155,protein_coding -54251,SEMG2,ENSG00000124157,protein_coding -54255,MATN4,ENSG00000124159,protein_coding -54311,NCOA5,ENSG00000124160,protein_coding -54549,VAPB,ENSG00000124164,protein_coding -54428,PARD6B,ENSG00000124171,protein_coding -54577,ATP5F1E,ENSG00000124172,protein_coding -54172,CHD6,ENSG00000124177,protein_coding -54162,PLCG1,ENSG00000124181,protein_coding -54209,TOX2,ENSG00000124191,protein_coding -54194,SRSF6,ENSG00000124193,protein_coding -54215,GDAP1L1,ENSG00000124194,protein_coding -54206,GTSF1L,ENSG00000124196,protein_coding -54374,ARFGEF2,ENSG00000124198,protein_coding -54381,ZNFX1,ENSG00000124201,protein_coding -54580,ZNF831,ENSG00000124203,protein_coding -54581,EDN3,ENSG00000124205,protein_coding -54377,CSE1L,ENSG00000124207,protein_coding -54407,TMEM189-UBE2V1,ENSG00000124208,protein_coding -54548,RAB22A,ENSG00000124209,protein_coding -54391,PTGIS,ENSG00000124212,protein_coding -54378,STAU1,ENSG00000124214,protein_coding -54594,CDH26,ENSG00000124215,protein_coding -54404,SNAI1,ENSG00000124216,protein_coding -54435,MOCS3,ENSG00000124217,protein_coding -54554,STX16,ENSG00000124222,protein_coding -54538,PMEPA1,ENSG00000124225,protein_coding -54401,RNF114,ENSG00000124226,protein_coding -54546,ANKRD60,ENSG00000124227,protein_coding -54380,DDX27,ENSG00000124228,protein_coding -54256,RBPJL,ENSG00000124232,protein_coding -54250,SEMG1,ENSG00000124233,protein_coding -54545,C20orf85,ENSG00000124237,protein_coding -54429,BCAS4,ENSG00000124243,protein_coding -54234,KCNK15,ENSG00000124249,protein_coding -54261,TP53TG5,ENSG00000124251,protein_coding -54535,PCK1,ENSG00000124253,protein_coding -54537,ZBP1,ENSG00000124256,protein_coding -54300,NEURL2,ENSG00000124257,protein_coding -26807,MAGEA10,ENSG00000124260,protein_coding -15657,MTRR,ENSG00000124275,protein_coding -15658,FASTKD3,ENSG00000124279,protein_coding -56178,PEPD,ENSG00000124299,protein_coding -56181,CHST8,ENSG00000124302,protein_coding -25392,IQSEC2,ENSG00000124313,protein_coding -26973,VAMP7,ENSG00000124333,protein_coding -26976,IL9R,ENSG00000124334,protein_coding -24593,XG,ENSG00000124343,protein_coding -6709,STAMBP,ENSG00000124356,protein_coding -6664,NAGK,ENSG00000124357,protein_coding -6666,MCEE,ENSG00000124370,protein_coding -6670,PAIP2B,ENSG00000124374,protein_coding -6620,SNRNP27,ENSG00000124380,protein_coding -6668,MPHOSPH10,ENSG00000124383,protein_coding -48911,IL17C,ENSG00000124391,protein_coding -13501,ATP8A1,ENSG00000124406,protein_coding -49865,USP22,ENSG00000124422,protein_coding -25828,POF1B,ENSG00000124429,protein_coding -56846,HIF3A,ENSG00000124440,protein_coding -56692,ZNF576,ENSG00000124444,protein_coding -56699,IRGC,ENSG00000124449,protein_coding -56710,ZNF45,ENSG00000124459,protein_coding -56682,LYPD3,ENSG00000124466,protein_coding -56657,PSG8,ENSG00000124467,protein_coding -56652,CEACAM8,ENSG00000124469,protein_coding -25084,NDP,ENSG00000124479,protein_coding -25049,USP9X,ENSG00000124486,protein_coding -19868,CRISP2,ENSG00000124490,protein_coding -18616,F13A1,ENSG00000124491,protein_coding -19504,GRM4,ENSG00000124493,protein_coding -19714,TRERF1,ENSG00000124496,protein_coding -19515,PACSIN1,ENSG00000124507,protein_coding -18972,BTN2A2,ENSG00000124508,protein_coding -18739,SIRT5,ENSG00000124523,protein_coding -18874,MRS2,ENSG00000124532,protein_coding -18532,WRNIP1,ENSG00000124535,protein_coding -19735,RRP36,ENSG00000124541,protein_coding -18978,BTN1A1,ENSG00000124557,protein_coding -19525,SNRPC,ENSG00000124562,protein_coding -18918,SLC17A3,ENSG00000124564,protein_coding -18917,SLC17A1,ENSG00000124568,protein_coding -18539,SERPINB6,ENSG00000124570,protein_coding -19766,XPO5,ENSG00000124571,protein_coding -19757,ABCC10,ENSG00000124574,protein_coding -18960,HIST1H1D,ENSG00000124575,protein_coding -19731,PEX6,ENSG00000124587,protein_coding -18541,NQO2,ENSG00000124588,protein_coding -19693,AL365205.1,ENSG00000124593,protein_coding -19660,OARD1,ENSG00000124596,protein_coding -19659,UNC5CL,ENSG00000124602,protein_coding -19800,AARS2,ENSG00000124608,protein_coding -18927,HIST1H1A,ENSG00000124610,protein_coding -19011,ZNF391,ENSG00000124613,protein_coding -19514,RPS10,ENSG00000124614,protein_coding -19647,MOCS1,ENSG00000124615,protein_coding -18999,HIST1H2BJ,ENSG00000124635,protein_coding -19700,MED20,ENSG00000124641,protein_coding -19050,OR2B6,ENSG00000124657,protein_coding -19720,TBCC,ENSG00000124659,protein_coding -19516,SPDEF,ENSG00000124664,protein_coding -19534,TCP11,ENSG00000124678,protein_coding -19772,MAD2L1BP,ENSG00000124688,protein_coding -19662,APOBEC2,ENSG00000124701,protein_coding -19734,KLHDC3,ENSG00000124702,protein_coding -19730,GNMT,ENSG00000124713,protein_coding -19627,DNAH8,ENSG00000124721,protein_coding -19675,TREM1,ENSG00000124731,protein_coding -19733,MEA1,ENSG00000124733,protein_coding -19950,KLHL31,ENSG00000124743,protein_coding -19976,COL21A1,ENSG00000124749,protein_coding -19584,CDKN1A,ENSG00000124762,protein_coding -18849,SOX4,ENSG00000124766,protein_coding -19625,GLO1,ENSG00000124767,protein_coding -19588,CPNE5,ENSG00000124772,protein_coding -19638,KCNK17,ENSG00000124780,protein_coding -18634,RREB1,ENSG00000124782,protein_coding -18639,SSR1,ENSG00000124783,protein_coding -18645,RIOK1,ENSG00000124784,protein_coding -18615,NRN1,ENSG00000124785,protein_coding -18664,SLC35B3,ENSG00000124786,protein_coding -18598,RPP40,ENSG00000124787,protein_coding -18784,ATXN1,ENSG00000124788,protein_coding -18799,NUP153,ENSG00000124789,protein_coding -18809,DEK,ENSG00000124795,protein_coding -18660,EEF1E1,ENSG00000124802,protein_coding -19874,CRISP1,ENSG00000124812,protein_coding -19813,RUNX2,ENSG00000124813,protein_coding -19848,OPN5,ENSG00000124818,protein_coding -18698,GCM2,ENSG00000124827,protein_coding -9538,LRRFIP1,ENSG00000124831,protein_coding -9535,RAB17,ENSG00000124839,protein_coding -13931,CXCL6,ENSG00000124875,protein_coding -13950,EREG,ENSG00000124882,protein_coding -33022,TRIM51,ENSG00000124900,protein_coding -33315,MYRF,ENSG00000124920,protein_coding -33338,SCGB1D2,ENSG00000124935,protein_coding -33336,SCGB2A1,ENSG00000124939,protein_coding -33348,AHNAK,ENSG00000124942,protein_coding -9785,EMC3,ENSG00000125037,protein_coding -9743,SSUH2,ENSG00000125046,protein_coding -38562,WNT1,ENSG00000125084,protein_coding -13059,SH3TC1,ENSG00000125089,protein_coding -48181,CNOT1,ENSG00000125107,protein_coding -48301,LRRC29,ENSG00000125122,protein_coding -48087,BBS2,ENSG00000125124,protein_coding -48102,MT1G,ENSG00000125144,protein_coding -48090,MT2A,ENSG00000125148,protein_coding -48288,C16orf70,ENSG00000125149,protein_coding -48189,GOT2,ENSG00000125166,protein_coding -48141,DOK4,ENSG00000125170,protein_coding -40405,PIWIL1,ENSG00000125207,protein_coding -41695,GPR18,ENSG00000125245,protein_coding -41708,CLYBL,ENSG00000125246,protein_coding -41730,TMTC4,ENSG00000125247,protein_coding -41658,RAP2A,ENSG00000125249,protein_coding -41765,SLC10A2,ENSG00000125255,protein_coding -41629,ABCC4,ENSG00000125257,protein_coding -41784,EFNB2,ENSG00000125266,protein_coding -41623,SOX21,ENSG00000125285,protein_coding -41701,TM9SF2,ENSG00000125304,protein_coding -50780,C17orf53,ENSG00000125319,protein_coding -21479,KIF25,ENSG00000125337,protein_coding -17388,IRF1,ENSG00000125347,protein_coding -26288,UPF3B,ENSG00000125351,protein_coding -26289,RNF113A,ENSG00000125352,protein_coding -26281,SEPT6,ENSG00000125354,protein_coding -26305,TMEM255A,ENSG00000125355,protein_coding -26290,NDUFA1,ENSG00000125356,protein_coding -24683,AMELX,ENSG00000125363,protein_coding -42751,DMAC2L,ENSG00000125375,protein_coding -42837,BMP4,ENSG00000125378,protein_coding -42810,PTGER2,ENSG00000125384,protein_coding -12944,FAM193A,ENSG00000125386,protein_coding -12954,GRK4,ENSG00000125388,protein_coding -51595,SOX9,ENSG00000125398,protein_coding -49540,TEKT3,ENSG00000125409,protein_coding -49460,MYH2,ENSG00000125414,protein_coding -49520,HS3ST3B1,ENSG00000125430,protein_coding -49403,SLC25A35,ENSG00000125434,protein_coding -51684,MRPS7,ENSG00000125445,protein_coding -51683,GGA3,ENSG00000125447,protein_coding -51674,ARMC7,ENSG00000125449,protein_coding -51682,NUP85,ENSG00000125450,protein_coding -51687,SLC25A19,ENSG00000125454,protein_coding -51685,MIF4GD,ENSG00000125457,protein_coding -51675,NT5C,ENSG00000125458,protein_coding -3430,MSTO1,ENSG00000125459,protein_coding -3473,C1orf61,ENSG00000125462,protein_coding -31548,TTF1,ENSG00000125482,protein_coding -31553,GTF3C4,ENSG00000125484,protein_coding -31552,DDX31,ENSG00000125485,protein_coding -31551,BARHL1,ENSG00000125492,protein_coding -57464,KIR2DL1,ENSG00000125498,protein_coding -57483,PPP1R12C,ENSG00000125503,protein_coding -57406,MBOAT7,ENSG00000125505,protein_coding -54697,SRMS,ENSG00000125508,protein_coding -54741,OPRL1,ENSG00000125510,protein_coding -54713,SLC2A4RG,ENSG00000125520,protein_coding -54745,NPBWR2,ENSG00000125522,protein_coding -54699,FNDC11,ENSG00000125531,protein_coding -54666,BHLHE23,ENSG00000125533,protein_coding -54695,PPDPF,ENSG00000125534,protein_coding -7556,IL1B,ENSG00000125538,protein_coding -6967,PLGLB2,ENSG00000125551,protein_coding -7559,IL37,ENSG00000125571,protein_coding -7546,CHCHD5,ENSG00000125611,protein_coding -7573,PAX8,ENSG00000125618,protein_coding -7636,INSIG2,ENSG00000125629,protein_coding -7544,POLR1B,ENSG00000125630,protein_coding -7630,CCDC93,ENSG00000125633,protein_coding -7570,PSD4,ENSG00000125637,protein_coding -55105,SLC25A23,ENSG00000125648,protein_coding -55096,PSPN,ENSG00000125650,protein_coding -55097,GTF2F1,ENSG00000125651,protein_coding -55095,ALKBH7,ENSG00000125652,protein_coding -55094,CLPP,ENSG00000125656,protein_coding -55112,TNFSF9,ENSG00000125657,protein_coding -26357,GRIA3,ENSG00000125675,protein_coding -26361,THOC2,ENSG00000125676,protein_coding -50477,MED1,ENSG00000125686,protein_coding -50449,RPL23,ENSG00000125691,protein_coding -51406,AC046185.1,ENSG00000125695,protein_coding -1846,ATG4C,ENSG00000125703,protein_coding -55116,CD70,ENSG00000125726,protein_coding -55119,C3,ENSG00000125730,protein_coding -55125,SH2D3A,ENSG00000125731,protein_coding -55123,TRIP10,ENSG00000125733,protein_coding -55121,GPR108,ENSG00000125734,protein_coding -55118,TNFSF14,ENSG00000125735,protein_coding -56795,FOSB,ENSG00000125740,protein_coding -56799,OPA3,ENSG00000125741,protein_coding -56808,SNRPD2,ENSG00000125743,protein_coding -56796,RTN2,ENSG00000125744,protein_coding -56801,EML2,ENSG00000125746,protein_coding -56798,VASP,ENSG00000125753,protein_coding -56820,SYMPK,ENSG00000125755,protein_coding -53467,GPCPD1,ENSG00000125772,protein_coding -53329,SDCBP2,ENSG00000125775,protein_coding -53422,PANK2,ENSG00000125779,protein_coding -53364,TGM3,ENSG00000125780,protein_coding -53389,GNRH2,ENSG00000125787,protein_coding -53295,DEFB126,ENSG00000125788,protein_coding -53722,FOXA2,ENSG00000125798,protein_coding -53734,CD93,ENSG00000125810,protein_coding -53743,GZF1,ENSG00000125812,protein_coding -53711,PAX1,ENSG00000125813,protein_coding -53744,NAPB,ENSG00000125814,protein_coding -53753,CST8,ENSG00000125815,protein_coding -53699,NKX2-4,ENSG00000125816,protein_coding -53415,CENPB,ENSG00000125817,protein_coding -53322,PSMF1,ENSG00000125818,protein_coding -53705,NKX2-2,ENSG00000125820,protein_coding -53653,DTD1,ENSG00000125821,protein_coding -53746,CSTL1,ENSG00000125823,protein_coding -53308,RBCK1,ENSG00000125826,protein_coding -53502,TMX4,ENSG00000125827,protein_coding -53747,CST11,ENSG00000125831,protein_coding -53358,STK35,ENSG00000125834,protein_coding -53367,SNRPB,ENSG00000125835,protein_coding -53306,NRSN2,ENSG00000125841,protein_coding -53419,AP5S1,ENSG00000125843,protein_coding -53618,RRBP1,ENSG00000125844,protein_coding -53491,BMP2,ENSG00000125845,protein_coding -53637,ZNF133,ENSG00000125846,protein_coding -53578,FLRT3,ENSG00000125848,protein_coding -53623,OVOL2,ENSG00000125850,protein_coding -53610,PCSK2,ENSG00000125851,protein_coding -53409,GFRA4,ENSG00000125861,protein_coding -53529,MKKS,ENSG00000125863,protein_coding -53612,BFSP1,ENSG00000125864,protein_coding -53615,DSTN,ENSG00000125868,protein_coding -53513,LAMP5,ENSG00000125869,protein_coding -53601,SNRPB2,ENSG00000125870,protein_coding -53625,MGME1,ENSG00000125871,protein_coding -53482,LRRN4,ENSG00000125872,protein_coding -53309,TBC1D20,ENSG00000125875,protein_coding -53399,ITPA,ENSG00000125877,protein_coding -53312,TCF15,ENSG00000125878,protein_coding -53602,OTOR,ENSG00000125879,protein_coding -53474,MCM8,ENSG00000125885,protein_coding -53619,BANF2,ENSG00000125888,protein_coding -53324,TMEM74B,ENSG00000125895,protein_coding -53317,FAM110A,ENSG00000125898,protein_coding -53340,SIRPD,ENSG00000125900,protein_coding -53390,MRPS26,ENSG00000125901,protein_coding -53298,DEFB129,ENSG00000125903,protein_coding -54954,S1PR4,ENSG00000125910,protein_coding -54955,NCLN,ENSG00000125912,protein_coding -25648,CITED1,ENSG00000125931,protein_coding -752,HNRNPR,ENSG00000125944,protein_coding -753,ZNF436,ENSG00000125945,protein_coding -43054,MAX,ENSG00000125952,protein_coding -43049,CHURC1-FNTB,ENSG00000125954,protein_coding -26006,ARMCX5,ENSG00000125962,protein_coding -54024,GDF5,ENSG00000125965,protein_coding -54016,MMP24,ENSG00000125966,protein_coding -53957,NECAB3,ENSG00000125967,protein_coding -53889,ID1,ENSG00000125968,protein_coding -53971,RALY,ENSG00000125970,protein_coding -53992,DYNLRB1,ENSG00000125971,protein_coding -54029,C20orf173,ENSG00000125975,protein_coding -53974,EIF2S2,ENSG00000125977,protein_coding -54030,ERGIC3,ENSG00000125991,protein_coding -54041,ROMO1,ENSG00000125995,protein_coding -54021,FAM83C,ENSG00000125998,protein_coding -53950,BPIFB1,ENSG00000125999,protein_coding -54026,CEP250,ENSG00000126001,protein_coding -53914,PLAGL2,ENSG00000126003,protein_coding -54014,MMP24OS,ENSG00000126005,protein_coding -24750,GRPR,ENSG00000126010,protein_coding -25388,KDM5C,ENSG00000126012,protein_coding -26178,AMOT,ENSG00000126016,protein_coding -10564,TMEM115,ENSG00000126062,protein_coding -1157,PSMB2,ENSG00000126067,protein_coding -1166,AGO3,ENSG00000126070,protein_coding -1472,UROD,ENSG00000126088,protein_coding -1413,ST3GAL3,ENSG00000126091,protein_coding -1446,TMEM53,ENSG00000126106,protein_coding -1471,HECTD3,ENSG00000126107,protein_coding -43934,KLC1,ENSG00000126214,protein_coding -43943,XRCC3,ENSG00000126215,protein_coding -41872,TUBGCP3,ENSG00000126216,protein_coding -41879,MCF2L,ENSG00000126217,protein_coding -41885,F10,ENSG00000126218,protein_coding -41891,PCID2,ENSG00000126226,protein_coding -41889,PROZ,ENSG00000126231,protein_coding -29312,SLURP1,ENSG00000126233,protein_coding -56307,LRFN3,ENSG00000126243,protein_coding -56285,IGFLR1,ENSG00000126246,protein_coding -56324,CAPNS1,ENSG00000126247,protein_coding -56196,PDCD2L,ENSG00000126249,protein_coding -56257,GPR42,ENSG00000126251,protein_coding -56277,RBM42,ENSG00000126254,protein_coding -56300,KIRREL2,ENSG00000126259,protein_coding -56199,UBA2,ENSG00000126261,protein_coding -56264,FFAR2,ENSG00000126262,protein_coding -56305,HCST,ENSG00000126264,protein_coding -56255,FFAR1,ENSG00000126266,protein_coding -56279,COX6B1,ENSG00000126267,protein_coding -50611,KRT36,ENSG00000126337,protein_coding -50507,THRA,ENSG00000126351,protein_coding -50533,CCR7,ENSG00000126353,protein_coding -50508,NR1D1,ENSG00000126368,protein_coding -33540,FRMD8,ENSG00000126391,protein_coding -33468,PRDX5,ENSG00000126432,protein_coding -57049,BCL2L12,ENSG00000126453,protein_coding -57048,IRF3,ENSG00000126456,protein_coding -57050,PRMT1,ENSG00000126457,protein_coding -57046,RRAS,ENSG00000126458,protein_coding -57043,PRRG2,ENSG00000126460,protein_coding -57047,SCAF1,ENSG00000126461,protein_coding -57044,PRR12,ENSG00000126464,protein_coding -57056,TSKS,ENSG00000126467,protein_coding -33445,FLRT1,ENSG00000126500,protein_coding -22774,ASL,ENSG00000126522,protein_coding -22811,SBDS,ENSG00000126524,protein_coding -13874,CSN1S1,ENSG00000126545,protein_coding -13876,STATH,ENSG00000126549,protein_coding -13878,HTN1,ENSG00000126550,protein_coding -50658,STAT5A,ENSG00000126561,protein_coding -50696,WNK4,ENSG00000126562,protein_coding -50699,BECN1,ENSG00000126581,protein_coding -57390,PRKCG,ENSG00000126583,protein_coding -46772,TRAP1,ENSG00000126602,protein_coding -46784,GLIS2,ENSG00000126603,protein_coding -50096,NSRP1,ENSG00000126653,protein_coding -952,DNAJC8,ENSG00000126698,protein_coding -919,AHDC1,ENSG00000126705,protein_coding -923,IFI6,ENSG00000126709,protein_coding -25838,DACH2,ENSG00000126733,protein_coding -37661,ZNF384,ENSG00000126746,protein_coding -37695,EMG1,ENSG00000126749,protein_coding -25192,SSX1,ENSG00000126752,protein_coding -25166,UXT,ENSG00000126756,protein_coding -25164,CFP,ENSG00000126759,protein_coding -25165,ELK1,ENSG00000126767,protein_coding -25237,TIMM17B,ENSG00000126768,protein_coding -42950,PCNX4,ENSG00000126773,protein_coding -42866,ATG14,ENSG00000126775,protein_coding -42873,KTN1,ENSG00000126777,protein_coding -42968,SIX1,ENSG00000126778,protein_coding -43003,RHOJ,ENSG00000126785,protein_coding -42856,DLGAP5,ENSG00000126787,protein_coding -42941,L3HYPDH,ENSG00000126790,protein_coding -43040,HSPA2,ENSG00000126803,protein_coding -43037,ZBTB1,ENSG00000126804,protein_coding -42974,TRMT5,ENSG00000126814,protein_coding -43019,SGPP1,ENSG00000126821,protein_coding -43044,PLEKHG3,ENSG00000126822,protein_coding -37810,PZP,ENSG00000126838,protein_coding -48984,PRDM7,ENSG00000126856,protein_coding -50197,RHOT1,ENSG00000126858,protein_coding -50162,EVI2A,ENSG00000126860,protein_coding -50159,OMG,ENSG00000126861,protein_coding -24553,WDR60,ENSG00000126870,protein_coding -31522,AIF1L,ENSG00000126878,protein_coding -31528,FAM78A,ENSG00000126882,protein_coding -31523,NUP214,ENSG00000126883,protein_coding -26929,CTAG2,ENSG00000126890,protein_coding -26876,AVPR2,ENSG00000126895,protein_coding -26914,SLC10A3,ENSG00000126903,protein_coding -54999,MAP2K2,ENSG00000126934,protein_coding -25967,HNRNPH2,ENSG00000126945,protein_coding -25971,ARMCX1,ENSG00000126947,protein_coding -25954,TMEM35A,ENSG00000126950,protein_coding -25984,NXF5,ENSG00000126952,protein_coding -25962,TIMM8A,ENSG00000126953,protein_coding -25508,ZC4H2,ENSG00000126970,protein_coding -18403,CANX,ENSG00000127022,protein_coding -96,INTS11,ENSG00000127054,protein_coding -4254,RGS13,ENSG00000127074,protein_coding -30760,IPPK,ENSG00000127080,protein_coding -30768,ZNF484,ENSG00000127081,protein_coding -30754,OMD,ENSG00000127083,protein_coding -30772,FGD3,ENSG00000127084,protein_coding -1344,HIVEP3,ENSG00000127124,protein_coding -1358,PPCS,ENSG00000127125,protein_coding -1343,EDN2,ENSG00000127129,protein_coding -43700,BCL11B,ENSG00000127152,protein_coding -16814,COX7C,ENSG00000127184,protein_coding -31710,TRAF2,ENSG00000127191,protein_coding -55710,ABHD8,ENSG00000127220,protein_coding -12610,MASP1,ENSG00000127241,protein_coding -12678,ATP13A4,ENSG00000127249,protein_coding -12675,PLAAT1,ENSG00000127252,protein_coding -39189,HELB,ENSG00000127311,protein_coding -39225,RAP1B,ENSG00000127314,protein_coding -39216,IL22,ENSG00000127318,protein_coding -39283,TSPAN8,ENSG00000127324,protein_coding -39259,BEST3,ENSG00000127325,protein_coding -39262,RAB3IP,ENSG00000127328,protein_coding -39275,PTPRB,ENSG00000127329,protein_coding -39204,DYRK2,ENSG00000127334,protein_coding -39250,YEATS4,ENSG00000127337,protein_coding -24103,TAS2R3,ENSG00000127362,protein_coding -24104,TAS2R4,ENSG00000127364,protein_coding -24106,TAS2R5,ENSG00000127366,protein_coding -24432,CRYGN,ENSG00000127377,protein_coding -24376,LRRC61,ENSG00000127399,protein_coding -24214,TRPV5,ENSG00000127412,protein_coding -12894,IDUA,ENSG00000127415,protein_coding -12895,FGFRL1,ENSG00000127418,protein_coding -12891,TMEM175,ENSG00000127419,protein_coding -832,AUNIP,ENSG00000127423,protein_coding -55282,PIN1,ENSG00000127445,protein_coding -55277,FBXL12,ENSG00000127452,protein_coding -627,EMC1,ENSG00000127463,protein_coding -652,PLA2G5,ENSG00000127472,protein_coding -625,UBR4,ENSG00000127481,protein_coding -680,HP1BP3,ENSG00000127483,protein_coding -55570,ADGRE2,ENSG00000127507,protein_coding -55688,SIN3B,ENSG00000127511,protein_coding -55574,OR7A10,ENSG00000127515,protein_coding -55670,SLC35E1,ENSG00000127526,protein_coding -55660,EPS15L1,ENSG00000127527,protein_coding -55658,KLF2,ENSG00000127528,protein_coding -55583,OR7C2,ENSG00000127529,protein_coding -55571,AC005255.1,ENSG00000127530,protein_coding -55690,F2RL3,ENSG00000127533,protein_coding -54865,UQCR11,ENSG00000127540,protein_coding -46612,GFER,ENSG00000127554,protein_coding -46614,SYNGR3,ENSG00000127561,protein_coding -46710,PKMYT1,ENSG00000127564,protein_coding -46486,WFIKKN1,ENSG00000127578,protein_coding -46498,WDR24,ENSG00000127580,protein_coding -46500,FBXL16,ENSG00000127585,protein_coding -46513,CHTF18,ENSG00000127586,protein_coding -46514,GNG13,ENSG00000127588,protein_coding -1257,MACF1,ENSG00000127603,protein_coding -55344,SMARCA4,ENSG00000127616,protein_coding -55050,KDM4B,ENSG00000127663,protein_coding -55044,TICAM1,ENSG00000127666,protein_coding -39424,METTL25,ENSG00000127720,protein_coding -17861,IL17B,ENSG00000127743,protein_coding -49146,EMC6,ENSG00000127774,protein_coding -49130,OR1E2,ENSG00000127780,protein_coding -49092,METTL16,ENSG00000127804,protein_coding -9197,TUBA4A,ENSG00000127824,protein_coding -9148,VIL1,ENSG00000127831,protein_coding -9135,AAMP,ENSG00000127837,protein_coding -9136,PNKD,ENSG00000127838,protein_coding -40687,TNFRSF19,ENSG00000127863,protein_coding -40749,RNF6,ENSG00000127870,protein_coding -37445,ECHS1,ENSG00000127884,protein_coding -57600,ZNF835,ENSG00000127903,protein_coding -23164,AKAP9,ENSG00000127914,protein_coding -23205,GNG11,ENSG00000127920,protein_coding -23249,SEM1,ENSG00000127922,protein_coding -23197,GNGT1,ENSG00000127928,protein_coding -22965,HIP1,ENSG00000127946,protein_coding -23028,PTPN12,ENSG00000127947,protein_coding -22971,POR,ENSG00000127948,protein_coding -23022,FGL2,ENSG00000127951,protein_coding -22976,STYXL1,ENSG00000127952,protein_coding -23123,STEAP4,ENSG00000127954,protein_coding -23053,GNAI1,ENSG00000127955,protein_coding -23180,PEX1,ENSG00000127980,protein_coding -23162,MTERF1,ENSG00000127989,protein_coding -23216,SGCE,ENSG00000127990,protein_coding -23181,RBM48,ENSG00000127993,protein_coding -23215,CASD1,ENSG00000127995,protein_coding -56512,ZNF780B,ENSG00000128000,protein_coding -56465,LRFN1,ENSG00000128011,protein_coding -56473,ZFP36,ENSG00000128016,protein_coding -13659,SRD5A3,ENSG00000128039,protein_coding -13700,SPINK2,ENSG00000128040,protein_coding -13618,RASL11B,ENSG00000128045,protein_coding -13689,PAICS,ENSG00000128050,protein_coding -13653,KDR,ENSG00000128052,protein_coding -13686,PPAT,ENSG00000128059,protein_coding -59610,TUBGCP6,ENSG00000128159,protein_coding -59620,ADM2,ENSG00000128165,protein_coding -58501,DGCR6L,ENSG00000128185,protein_coding -58486,DGCR8,ENSG00000128191,protein_coding -58877,ASPHD2,ENSG00000128203,protein_coding -58774,VPREB3,ENSG00000128218,protein_coding -58586,SDF2L1,ENSG00000128228,protein_coding -59014,GAL3ST1,ENSG00000128242,protein_coding -59073,YWHAH,ENSG00000128245,protein_coding -58963,RFPL1,ENSG00000128250,protein_coding -59086,RFPL2,ENSG00000128253,protein_coding -58742,GNAZ,ENSG00000128266,protein_coding -59320,MGAT3,ENSG00000128268,protein_coding -58812,ADORA2A,ENSG00000128271,protein_coding -59324,ATF4,ENSG00000128272,protein_coding -59440,A4GALT,ENSG00000128274,protein_coding -59093,RFPL3,ENSG00000128276,protein_coding -59218,CDC42EP1,ENSG00000128283,protein_coding -59160,APOL3,ENSG00000128284,protein_coding -59348,MCHR1,ENSG00000128285,protein_coding -58882,TPST2,ENSG00000128294,protein_coding -59253,BAIAP2L2,ENSG00000128298,protein_coding -59198,MPST,ENSG00000128309,protein_coding -59233,GALR3,ENSG00000128310,protein_coding -59196,TST,ENSG00000128311,protein_coding -59152,APOL5,ENSG00000128313,protein_coding -58762,IGLL1,ENSG00000128322,protein_coding -59168,APOL2,ENSG00000128335,protein_coding -59209,RAC2,ENSG00000128340,protein_coding -58990,LIF,ENSG00000128342,protein_coding -59241,C22orf23,ENSG00000128346,protein_coding -59295,APOBEC3A,ENSG00000128383,protein_coding -59301,APOBEC3F,ENSG00000128394,protein_coding -59506,RIBC2,ENSG00000128408,protein_coding -50621,KRT17,ENSG00000128422,protein_coding -44710,EMC4,ENSG00000128463,protein_coding -49767,RNF112,ENSG00000128482,protein_coding -49804,SPECC1,ENSG00000128487,protein_coding -23893,CPA4,ENSG00000128510,protein_coding -23601,DOCK4,ENSG00000128512,protein_coding -23771,POT1,ENSG00000128513,protein_coding -23742,TAS2R16,ENSG00000128519,protein_coding -23848,ATP6V1F,ENSG00000128524,protein_coding -23698,LSM8,ENSG00000128534,protein_coding -23536,CDHR3,ENSG00000128536,protein_coding -23457,PRKRIP1,ENSG00000128563,protein_coding -23423,VGF,ENSG00000128564,protein_coding -23930,PODXL,ENSG00000128567,protein_coding -23629,FOXP2,ENSG00000128573,protein_coding -23869,STRIP2,ENSG00000128578,protein_coding -23437,IFT22,ENSG00000128581,protein_coding -23926,MKLN1,ENSG00000128585,protein_coding -23580,DNAJB9,ENSG00000128590,protein_coding -23845,FLNC,ENSG00000128591,protein_coding -23804,LRRC4,ENSG00000128594,protein_coding -23841,CALU,ENSG00000128595,protein_coding -23844,CCDC136,ENSG00000128596,protein_coding -23864,SMO,ENSG00000128602,protein_coding -23851,IRF5,ENSG00000128604,protein_coding -23488,LRRC17,ENSG00000128606,protein_coding -23886,KLHDC10,ENSG00000128607,protein_coding -23749,NDUFA5,ENSG00000128609,protein_coding -23733,FEZF1,ENSG00000128610,protein_coding -23843,OPN1SW,ENSG00000128617,protein_coding -56447,MRPS12,ENSG00000128626,protein_coding -8689,MYO1B,ENSG00000128641,protein_coding -8470,HOXD1,ENSG00000128645,protein_coding -8464,HOXD3,ENSG00000128652,protein_coding -8475,MTX2,ENSG00000128654,protein_coding -8513,PDE11A,ENSG00000128655,protein_coding -8440,CHN1,ENSG00000128656,protein_coding -8365,GAD1,ENSG00000128683,protein_coding -8657,OSGEPL1,ENSG00000128694,protein_coding -8660,ORMDL1,ENSG00000128699,protein_coding -8382,HAT1,ENSG00000128708,protein_coding -8462,HOXD9,ENSG00000128709,protein_coding -8460,HOXD10,ENSG00000128710,protein_coding -8459,HOXD11,ENSG00000128713,protein_coding -8457,HOXD13,ENSG00000128714,protein_coding -44536,HERC2,ENSG00000128731,protein_coding -44399,SNRPN,ENSG00000128739,protein_coding -52339,PSMG2,ENSG00000128789,protein_coding -52237,TWSG1,ENSG00000128791,protein_coding -36008,ARHGAP22,ENSG00000128805,protein_coding -36013,WDFY4,ENSG00000128815,protein_coding -44811,EIF2AK4,ENSG00000128829,protein_coding -45204,MYO5C,ENSG00000128833,protein_coding -45278,CGNL1,ENSG00000128849,protein_coding -45179,TMOD2,ENSG00000128872,protein_coding -44941,TTBK2,ENSG00000128881,protein_coding -44984,ELL3,ENSG00000128886,protein_coding -44848,CCDC32,ENSG00000128891,protein_coding -44873,INO80,ENSG00000128908,protein_coding -45360,ICE2,ENSG00000128915,protein_coding -44870,DLL4,ENSG00000128917,protein_coding -45288,ALDH1A2,ENSG00000128918,protein_coding -45315,MINDY2,ENSG00000128923,protein_coding -44841,IVD,ENSG00000128928,protein_coding -44840,KNSTRN,ENSG00000128944,protein_coding -45097,DUT,ENSG00000128951,protein_coding -44872,CHAC1,ENSG00000128965,protein_coding -45549,CLN6,ENSG00000128973,protein_coding -45208,ARPP19,ENSG00000128989,protein_coding -45382,VPS13C,ENSG00000129003,protein_coding -45548,CALML4,ENSG00000129007,protein_coding -45685,ISLR,ENSG00000129009,protein_coding -45606,THAP10,ENSG00000129028,protein_coding -45673,LOXL1,ENSG00000129038,protein_coding -11715,ACKR4,ENSG00000129048,protein_coding -11752,ANAPC13,ENSG00000129055,protein_coding -11651,MBD4,ENSG00000129071,protein_coding -32367,COPB1,ENSG00000129083,protein_coding -32370,PSMA1,ENSG00000129084,protein_coding -22517,SUMF2,ENSG00000129103,protein_coding -15189,PALLD,ENSG00000129116,protein_coding -15289,SPCS3,ENSG00000129128,protein_coding -32551,BBOX1,ENSG00000129151,protein_coding -32416,MYOD1,ENSG00000129152,protein_coding -32418,SERGEF,ENSG00000129158,protein_coding -32417,KCNC1,ENSG00000129159,protein_coding -32420,TPH1,ENSG00000129167,protein_coding -32475,CSRP3,ENSG00000129170,protein_coding -32477,E2F8,ENSG00000129173,protein_coding -15337,DCTD,ENSG00000129187,protein_coding -49354,SOX15,ENSG00000129194,protein_coding -49253,PIMREG,ENSG00000129195,protein_coding -49232,RPAIN,ENSG00000129197,protein_coding -49223,USP6,ENSG00000129204,protein_coding -49356,SHBG,ENSG00000129214,protein_coding -49195,PLD2,ENSG00000129219,protein_coding -49251,AIPL1,ENSG00000129221,protein_coding -49351,CD68,ENSG00000129226,protein_coding -49259,TXNDC17,ENSG00000129235,protein_coding -49358,ATP1B2,ENSG00000129244,protein_coding -49355,FXR2,ENSG00000129245,protein_coding -49215,KIF1C,ENSG00000129250,protein_coding -49353,MPDU1,ENSG00000129255,protein_coding -29188,PHF20L1,ENSG00000129292,protein_coding -29185,LRRC6,ENSG00000129295,protein_coding -38543,CCNT1,ENSG00000129315,protein_coding -38417,PUS7L,ENSG00000129317,protein_coding -55325,KRI1,ENSG00000129347,protein_coding -55331,ILF3,ENSG00000129351,protein_coding -55329,SLC44A2,ENSG00000129353,protein_coding -55327,AP1M2,ENSG00000129354,protein_coding -55326,CDKN2D,ENSG00000129355,protein_coding -27399,MTUS1,ENSG00000129422,protein_coding -57149,KLK14,ENSG00000129437,protein_coding -57152,SIGLEC9,ENSG00000129450,protein_coding -57142,KLK10,ENSG00000129451,protein_coding -57139,KLK8,ENSG00000129455,protein_coding -42327,NGDN,ENSG00000129460,protein_coding -42393,RIPK3,ENSG00000129465,protein_coding -42391,ADCY4,ENSG00000129467,protein_coding -42119,RAB2B,ENSG00000129472,protein_coding -42314,BCL2L2,ENSG00000129473,protein_coding -42294,AJUBA,ENSG00000129474,protein_coding -42486,DTD2,ENSG00000129480,protein_coding -42045,PARP2,ENSG00000129484,protein_coding -42482,HEATR5A,ENSG00000129493,protein_coding -42600,FOXA1,ENSG00000129514,protein_coding -42525,SNX6,ENSG00000129515,protein_coding -42519,EAPP,ENSG00000129518,protein_coding -42514,EGLN3,ENSG00000129521,protein_coding -42682,MIS18BP1,ENSG00000129534,protein_coding -42352,NRL,ENSG00000129535,protein_coding -42078,RNASE1,ENSG00000129538,protein_coding -42379,NEDD8,ENSG00000129559,protein_coding -42270,DAD1,ENSG00000129562,protein_coding -42047,TEP1,ENSG00000129566,protein_coding -17111,EPB41L4A,ENSG00000129595,protein_coding -17165,CDO1,ENSG00000129596,protein_coding -17127,REEP5,ENSG00000129625,protein_coding -47876,ITFG1,ENSG00000129636,protein_coding -51741,QRICH2,ENSG00000129646,protein_coding -51735,FOXJ1,ENSG00000129654,protein_coding -51788,SEC14L1,ENSG00000129657,protein_coding -51750,RHBDF2,ENSG00000129667,protein_coding -51749,AANAT,ENSG00000129673,protein_coding -26589,ARHGEF6,ENSG00000129675,protein_coding -26578,MAP7D3,ENSG00000129680,protein_coding -26615,FGF13,ENSG00000129682,protein_coding -27789,ASH2L,ENSG00000129691,protein_coding -27715,TTI2,ENSG00000129696,protein_coding -31991,ART1,ENSG00000129744,protein_coding -31994,CHRNA10,ENSG00000129749,protein_coding -31951,CDKN1C,ENSG00000129757,protein_coding -9971,SGO1,ENSG00000129810,protein_coding -57748,RPS4Y1,ENSG00000129824,protein_coding -57940,VCY1B,ENSG00000129862,protein_coding -57939,VCY,ENSG00000129864,protein_coding -57978,CDY2B,ENSG00000129873,protein_coding -48941,CDH15,ENSG00000129910,protein_coding -54877,KLF16,ENSG00000129911,protein_coding -46468,TMEM8A,ENSG00000129925,protein_coding -54963,DOHH,ENSG00000129932,protein_coding -55825,MAU2,ENSG00000129933,protein_coding -54778,SHC2,ENSG00000129946,protein_coding -54804,PLPPR3,ENSG00000129951,protein_coding -31924,INS-IGF2,ENSG00000129965,protein_coding -54881,ABHD17A,ENSG00000129968,protein_coding -54113,LBP,ENSG00000129988,protein_coding -57493,SYT5,ENSG00000129990,protein_coding -57488,TNNI3,ENSG00000129991,protein_coding -48931,CBFA2T3,ENSG00000129993,protein_coding -54848,GAMT,ENSG00000130005,protein_coding -24627,PUDP,ENSG00000130021,protein_coding -21515,ERMARD,ENSG00000130023,protein_coding -21511,PHF10,ENSG00000130024,protein_coding -26796,PRRG3,ENSG00000130032,protein_coding -37603,GALNT8,ENSG00000130035,protein_coding -37613,KCNA5,ENSG00000130037,protein_coding -37575,CRACR2A,ENSG00000130038,protein_coding -30682,NXNL2,ENSG00000130045,protein_coding -25551,STARD8,ENSG00000130052,protein_coding -25559,FAM155B,ENSG00000130054,protein_coding -25586,GDPD2,ENSG00000130055,protein_coding -24847,SAT1,ENSG00000130066,protein_coding -25420,GNL3L,ENSG00000130119,protein_coding -9501,SH3BP4,ENSG00000130147,protein_coding -24727,MOSPD2,ENSG00000130150,protein_coding -55352,DOCK6,ENSG00000130158,protein_coding -55375,ECSIT,ENSG00000130159,protein_coding -55348,LDLR,ENSG00000130164,protein_coding -55380,ELOF1,ENSG00000130165,protein_coding -55357,TSPAN16,ENSG00000130167,protein_coding -55356,ANGPTL8,ENSG00000130173,protein_coding -55370,PRKCSH,ENSG00000130175,protein_coding -55379,CNN1,ENSG00000130176,protein_coding -41924,CDC16,ENSG00000130177,protein_coding -46729,ZSCAN10,ENSG00000130182,protein_coding -29310,THEM6,ENSG00000130193,protein_coding -56761,NECTIN2,ENSG00000130202,protein_coding -56764,APOE,ENSG00000130203,protein_coding -56763,TOMM40,ENSG00000130204,protein_coding -56766,APOC1,ENSG00000130208,protein_coding -30698,GADD45G,ENSG00000130222,protein_coding -26210,LRCH2,ENSG00000130224,protein_coding -24471,DPP6,ENSG00000130226,protein_coding -27464,XPO7,ENSG00000130227,protein_coding -24736,ACE2,ENSG00000130234,protein_coding -56419,FAM98C,ENSG00000130244,protein_coding -55058,SAFB2,ENSG00000130254,protein_coding -55060,RPL36,ENSG00000130255,protein_coding -54871,ATP8B3,ENSG00000130270,protein_coding -55801,GDF1,ENSG00000130283,protein_coding -55818,NCAN,ENSG00000130287,protein_coding -9613,KIF1A,ENSG00000130294,protein_coding -55715,GTPBP3,ENSG00000130299,protein_coding -55716,PLVAP,ENSG00000130300,protein_coding -55719,BST2,ENSG00000130303,protein_coding -55729,SLC27A1,ENSG00000130304,protein_coding -22888,NSUN5,ENSG00000130305,protein_coding -55706,USHBP1,ENSG00000130307,protein_coding -55734,COLGALT1,ENSG00000130309,protein_coding -55713,DDA1,ENSG00000130311,protein_coding -55711,MRPL34,ENSG00000130312,protein_coding -55731,AC010618.1,ENSG00000130313,protein_coding -54909,LSM7,ENSG00000130332,protein_coding -21311,TULP4,ENSG00000130338,protein_coding -21300,SNX9,ENSG00000130340,protein_coding -20514,RTN4IP1,ENSG00000130347,protein_coding -20516,QRSL1,ENSG00000130348,protein_coding -20523,C6orf203,ENSG00000130349,protein_coding -21332,RSPH3,ENSG00000130363,protein_coding -21360,MAS1,ENSG00000130368,protein_coding -55086,ACSBG2,ENSG00000130377,protein_coding -55089,MLLT1,ENSG00000130382,protein_coding -55072,FUT5,ENSG00000130383,protein_coding -25306,BMP15,ENSG00000130385,protein_coding -21476,AFDN,ENSG00000130396,protein_coding -56427,ACTN4,ENSG00000130402,protein_coding -32238,STK33,ENSG00000130413,protein_coding -9586,NDUFA10,ENSG00000130414,protein_coding -23400,EPO,ENSG00000130427,protein_coding -23304,ARPC1B,ENSG00000130429,protein_coding -57394,CACNG6,ENSG00000130433,protein_coding -16387,ZSWIM6,ENSG00000130449,protein_coding -55741,FCHO1,ENSG00000130475,protein_coding -55736,UNC13A,ENSG00000130477,protein_coding -55739,MAP1S,ENSG00000130479,protein_coding -59630,KLHDC7B,ENSG00000130487,protein_coding -5507,PXDN,ENSG00000130508,protein_coding -55779,SSBP4,ENSG00000130511,protein_coding -55776,GDF15,ENSG00000130513,protein_coding -55775,PGPEP1,ENSG00000130517,protein_coding -55769,IQCN,ENSG00000130518,protein_coding -55773,LSM4,ENSG00000130520,protein_coding -55770,JUND,ENSG00000130522,protein_coding -57009,HRC,ENSG00000130528,protein_coding -57010,TRPM4,ENSG00000130529,protein_coding -58279,OR11H1,ENSG00000130538,protein_coding -59467,SULT4A1,ENSG00000130540,protein_coding -55140,ZNF557,ENSG00000130544,protein_coding -55106,CRB3,ENSG00000130545,protein_coding -31630,OLFM1,ENSG00000130558,protein_coding -31651,CAMSAP1,ENSG00000130559,protein_coding -31653,UBAC1,ENSG00000130560,protein_coding -9456,SAG,ENSG00000130561,protein_coding -54714,ZBTB46,ENSG00000130584,protein_coding -54700,HELZ2,ENSG00000130589,protein_coding -54733,SAMD10,ENSG00000130590,protein_coding -31907,LSP1,ENSG00000130592,protein_coding -31913,TNNT3,ENSG00000130595,protein_coding -31906,TNNI2,ENSG00000130598,protein_coding -31615,COL5A1,ENSG00000130635,protein_coding -59512,ATXN10,ENSG00000130638,protein_coding -37436,TUBGCP2,ENSG00000130640,protein_coding -37440,CALY,ENSG00000130643,protein_coding -37455,CYP2E1,ENSG00000130649,protein_coding -31767,PNPLA7,ENSG00000130653,protein_coding -46450,HBZ,ENSG00000130656,protein_coding -56454,PAK4,ENSG00000130669,protein_coding -24521,MNX1,ENSG00000130675,protein_coding -53812,ZNF337,ENSG00000130684,protein_coding -858,CEP85,ENSG00000130695,protein_coding -54612,TAF4,ENSG00000130699,protein_coding -54634,GATA5,ENSG00000130700,protein_coding -54632,RBBP8NL,ENSG00000130701,protein_coding -54625,LAMA5,ENSG00000130702,protein_coding -54622,OSBPL2,ENSG00000130703,protein_coding -54623,ADRM1,ENSG00000130706,protein_coding -31509,ASS1,ENSG00000130707,protein_coding -31513,PRDM12,ENSG00000130711,protein_coding -31514,EXOSC2,ENSG00000130713,protein_coding -31536,POMT1,ENSG00000130714,protein_coding -31538,UCK1,ENSG00000130717,protein_coding -31518,FIBCD1,ENSG00000130720,protein_coding -31532,PRRC2B,ENSG00000130723,protein_coding -57735,CHMP2A,ENSG00000130724,protein_coding -57736,UBE2M,ENSG00000130725,protein_coding -57733,TRIM28,ENSG00000130726,protein_coding -46487,METTL26,ENSG00000130731,protein_coding -55342,YIPF2,ENSG00000130733,protein_coding -55322,ATG4D,ENSG00000130734,protein_coding -24852,EIF2S3,ENSG00000130741,protein_coding -56884,TMEM160,ENSG00000130748,protein_coding -56885,AC008755.1,ENSG00000130749,protein_coding -56883,NPAS1,ENSG00000130751,protein_coding -56467,GMFG,ENSG00000130755,protein_coding -56517,MAP3K10,ENSG00000130758,protein_coding -189,ARHGEF16,ENSG00000130762,protein_coding -203,LRRC47,ENSG00000130764,protein_coding -956,SESN2,ENSG00000130766,protein_coding -941,SMPDL3B,ENSG00000130768,protein_coding -953,ATP5IF1,ENSG00000130770,protein_coding -957,MED18,ENSG00000130772,protein_coding -939,THEMIS2,ENSG00000130775,protein_coding -40202,CLIP1,ENSG00000130779,protein_coding -40219,CCDC62,ENSG00000130783,protein_coding -40220,HIP1R,ENSG00000130787,protein_coding -55238,ZNF317,ENSG00000130803,protein_coding -55293,PPAN,ENSG00000130810,protein_coding -55297,EIF3G,ENSG00000130811,protein_coding -55291,ANGPTL6,ENSG00000130812,protein_coding -55289,C19orf66,ENSG00000130813,protein_coding -55298,DNMT1,ENSG00000130816,protein_coding -55262,ZNF426,ENSG00000130818,protein_coding -26863,SLC6A8,ENSG00000130821,protein_coding -26862,PNCK,ENSG00000130822,protein_coding -26933,DKC1,ENSG00000130826,protein_coding -26911,PLXNA3,ENSG00000130827,protein_coding -26859,DUSP9,ENSG00000130829,protein_coding -26937,MPP1,ENSG00000130830,protein_coding -57317,ZNF331,ENSG00000130844,protein_coding -53223,ZNF236,ENSG00000130856,protein_coding -56167,SLC7A10,ENSG00000130876,protein_coding -56164,LRP3,ENSG00000130881,protein_coding -40234,C12orf65,ENSG00000130921,protein_coding -51518,NOL11,ENSG00000130935,protein_coding -347,UBE4B,ENSG00000130939,protein_coding -367,CASZ1,ENSG00000130940,protein_coding -59534,PKDREJ,ENSG00000130943,protein_coding -30852,HSD17B3,ENSG00000130948,protein_coding -30802,NUTM2F,ENSG00000130950,protein_coding -30861,HABP4,ENSG00000130956,protein_coding -30812,FBP2,ENSG00000130957,protein_coding -30856,SLC35D2,ENSG00000130958,protein_coding -24981,PRRG1,ENSG00000130962,protein_coding -25144,UBA1,ENSG00000130985,protein_coding -25138,RGN,ENSG00000130988,protein_coding -12929,POLN,ENSG00000130997,protein_coding -21153,PPIL4,ENSG00000131013,protein_coding -21180,ULBP2,ENSG00000131015,protein_coding -21211,AKAP12,ENSG00000131016,protein_coding -21226,SYNE1,ENSG00000131018,protein_coding -21190,ULBP3,ENSG00000131019,protein_coding -21163,LATS1,ENSG00000131023,protein_coding -57482,EPS8L1,ENSG00000131037,protein_coding -57419,LILRB2,ENSG00000131042,protein_coding -54061,AAR2,ENSG00000131043,protein_coding -53900,TTLL9,ENSG00000131044,protein_coding -53943,BPIFA2,ENSG00000131050,protein_coding -54042,RBM39,ENSG00000131051,protein_coding -53891,COX4I2,ENSG00000131055,protein_coding -53947,BPIFA3,ENSG00000131059,protein_coding -53964,ZNF341,ENSG00000131061,protein_coding -54001,GGT7,ENSG00000131067,protein_coding -53873,DEFB118,ENSG00000131068,protein_coding -54002,ACSS2,ENSG00000131069,protein_coding -25534,EDA2R,ENSG00000131080,protein_coding -25490,ARHGEF9,ENSG00000131089,protein_coding -50830,C1QL1,ENSG00000131094,protein_coding -50826,GFAP,ENSG00000131095,protein_coding -50771,PYY,ENSG00000131096,protein_coding -50821,HIGD1B,ENSG00000131097,protein_coding -58371,ATP6V1E1,ENSG00000131100,protein_coding -56729,ZNF227,ENSG00000131115,protein_coding -56694,ZNF428,ENSG00000131116,protein_coding -56681,TEX101,ENSG00000131126,protein_coding -12864,ZNF141,ENSG00000131127,protein_coding -55194,CCL25,ENSG00000131142,protein_coding -48822,COX4I1,ENSG00000131143,protein_coding -48818,EMC8,ENSG00000131148,protein_coding -48802,GSE1,ENSG00000131149,protein_coding -48872,AC010531.1,ENSG00000131152,protein_coding -48812,GINS2,ENSG00000131153,protein_coding -48960,CHMP1A,ENSG00000131165,protein_coding -25799,SH3BGRL,ENSG00000131171,protein_coding -25763,COX7B,ENSG00000131174,protein_coding -18318,SLC34A1,ENSG00000131183,protein_coding -18320,F12,ENSG00000131187,protein_coding -18323,PRR7,ENSG00000131188,protein_coding -53264,NFATC1,ENSG00000131196,protein_coding -27834,IDO1,ENSG00000131203,protein_coding -1250,GJA9,ENSG00000131233,protein_coding -1291,CAP1,ENSG00000131236,protein_coding -1292,PPT1,ENSG00000131238,protein_coding -50164,RAB11FIP4,ENSG00000131242,protein_coding -25725,RLIM,ENSG00000131263,protein_coding -25675,CDX4,ENSG00000131264,protein_coding -25729,ABCB7,ENSG00000131269,protein_coding -43901,TRAF3,ENSG00000131323,protein_coding -55695,HAUS8,ENSG00000131351,protein_coding -55566,ADGRE3,ENSG00000131355,protein_coding -9895,MRPS25,ENSG00000131368,protein_coding -9902,SH3BP5,ENSG00000131370,protein_coding -9913,HACL1,ENSG00000131373,protein_coding -9944,TBC1D5,ENSG00000131374,protein_coding -9900,CAPN7,ENSG00000131375,protein_coding -9929,RFTN1,ENSG00000131378,protein_coding -9888,C3orf20,ENSG00000131379,protein_coding -9896,RBSN,ENSG00000131381,protein_coding -9924,GALNT15,ENSG00000131386,protein_coding -9881,SLC6A6,ENSG00000131389,protein_coding -57093,KCNC3,ENSG00000131398,protein_coding -57096,NAPSA,ENSG00000131400,protein_coding -57094,NR1H2,ENSG00000131408,protein_coding -57108,LRRC4B,ENSG00000131409,protein_coding -17378,PDLIM4,ENSG00000131435,protein_coding -17395,KIF3A,ENSG00000131437,protein_coding -18440,MGAT1,ENSG00000131446,protein_coding -18428,GFPT2,ENSG00000131459,protein_coding -50679,TUBG1,ENSG00000131462,protein_coding -50701,PSME3,ENSG00000131467,protein_coding -50713,RPL27,ENSG00000131469,protein_coding -50675,PSMC3IP,ENSG00000131470,protein_coding -50703,AOC3,ENSG00000131471,protein_coding -50636,ACLY,ENSG00000131473,protein_coding -50695,VPS25,ENSG00000131475,protein_coding -50694,RAMP2,ENSG00000131477,protein_coding -50702,AOC2,ENSG00000131480,protein_coding -50707,G6PC,ENSG00000131482,protein_coding -17630,NDUFA2,ENSG00000131495,protein_coding -17616,ANKHD1,ENSG00000131503,protein_coding -17729,DIAPH1,ENSG00000131504,protein_coding -17750,NDFIP1,ENSG00000131507,protein_coding -17589,UBE2D2,ENSG00000131508,protein_coding -23952,EXOC4,ENSG00000131558,protein_coding -93,ACAP3,ENSG00000131584,protein_coding -75,C1orf159,ENSG00000131591,protein_coding -33776,ANO1,ENSG00000131620,protein_coding -33781,PPFIA1,ENSG00000131626,protein_coding -46570,TMEM204,ENSG00000131634,protein_coding -46709,KREMEN2,ENSG00000131650,protein_coding -46720,THOC6,ENSG00000131652,protein_coding -46636,TRAF7,ENSG00000131653,protein_coding -30789,BARX1,ENSG00000131668,protein_coding -30776,NINJ1,ENSG00000131669,protein_coding -311,CA6,ENSG00000131686,protein_coding -232,NPHP4,ENSG00000131697,protein_coding -16549,MAP1B,ENSG00000131711,protein_coding -26301,RHOXF2,ENSG00000131721,protein_coding -26253,IL13RA1,ENSG00000131724,protein_coding -26248,WDR44,ENSG00000131725,protein_coding -16747,CKMT2,ENSG00000131730,protein_coding -16749,ZCCHC9,ENSG00000131732,protein_coding -50601,KRT34,ENSG00000131737,protein_coding -50600,KRT33B,ENSG00000131738,protein_coding -50531,TNS4,ENSG00000131746,protein_coding -50526,TOP2A,ENSG00000131747,protein_coding -50484,STARD3,ENSG00000131748,protein_coding -50517,RARA,ENSG00000131759,protein_coding -50483,PPP1R1B,ENSG00000131771,protein_coding -29235,KHDRBS3,ENSG00000131773,protein_coding -3000,CHD1L,ENSG00000131778,protein_coding -2957,PEX11B,ENSG00000131779,protein_coding -2997,FMO5,ENSG00000131781,protein_coding -2952,PIAS3,ENSG00000131788,protein_coding -2994,PRKAB2,ENSG00000131791,protein_coding -32594,FSHB,ENSG00000131808,protein_coding -24799,PDHA1,ENSG00000131828,protein_coding -24779,RAI2,ENSG00000131831,protein_coding -16545,MCCC2,ENSG00000131844,protein_coding -57628,AC005261.1,ENSG00000131845,protein_coding -57565,ZSCAN5A,ENSG00000131848,protein_coding -57721,ZNF132,ENSG00000131849,protein_coding -57611,USP29,ENSG00000131864,protein_coding -46401,SELENOS,ENSG00000131871,protein_coding -46400,CHSY1,ENSG00000131873,protein_coding -46402,SNRPA1,ENSG00000131876,protein_coding -49682,LLGL1,ENSG00000131899,protein_coding -891,NR0B2,ENSG00000131910,protein_coding -866,LIN28A,ENSG00000131914,protein_coding -27895,THAP1,ENSG00000131931,protein_coding -56159,RHPN2,ENSG00000131941,protein_coding -56099,C19orf12,ENSG00000131943,protein_coding -56158,FAAP24,ENSG00000131944,protein_coding -42948,LRRC9,ENSG00000131951,protein_coding -42916,ACTR10,ENSG00000131966,protein_coding -42772,ABHD12B,ENSG00000131969,protein_coding -42847,GCH1,ENSG00000131979,protein_coding -42855,LGALS3,ENSG00000131981,protein_coding -55527,PODNL1,ENSG00000132000,protein_coding -55559,DNAJB1,ENSG00000132002,protein_coding -55515,ZSWIM4,ENSG00000132003,protein_coding -55461,FBXW9,ENSG00000132004,protein_coding -55529,RFX1,ENSG00000132005,protein_coding -55415,ZNF20,ENSG00000132010,protein_coding -55525,C19orf57,ENSG00000132016,protein_coding -55528,DCAF15,ENSG00000132017,protein_coding -55526,CC2D1A,ENSG00000132024,protein_coding -55477,RTBDN,ENSG00000132026,protein_coding -5762,MATN3,ENSG00000132031,protein_coding -32023,TRIM21,ENSG00000132109,protein_coding -1570,SPATA6,ENSG00000132122,protein_coding -1511,LRRC41,ENSG00000132128,protein_coding -50271,CCT6B,ENSG00000132141,protein_coding -10422,DHX30,ENSG00000132153,protein_coding -9847,RAF1,ENSG00000132155,protein_coding -9812,SLC6A11,ENSG00000132164,protein_coding -9839,PPARG,ENSG00000132170,protein_coding -9859,NUP210,ENSG00000132182,protein_coding -3710,FCRLA,ENSG00000132185,protein_coding -3737,HSD17B7,ENSG00000132196,protein_coding -52075,ENOSF1,ENSG00000132199,protein_coding -52115,EMILIN2,ENSG00000132205,protein_coding -47501,SLX1A,ENSG00000132207,protein_coding -32169,ARFIP2,ENSG00000132254,protein_coding -32123,TRIM5,ENSG00000132256,protein_coding -32159,CNGA4,ENSG00000132259,protein_coding -32124,TRIM22,ENSG00000132274,protein_coding -32174,RRP8,ENSG00000132275,protein_coding -32171,TIMM10B,ENSG00000132286,protein_coding -29179,EFR3A,ENSG00000132294,protein_coding -29182,HHLA1,ENSG00000132297,protein_coding -6921,PTCD3,ENSG00000132300,protein_coding -6923,IMMT,ENSG00000132305,protein_coding -6927,MRPL35,ENSG00000132313,protein_coding -9516,IQCA1,ENSG00000132321,protein_coding -9551,ILKAP,ENSG00000132323,protein_coding -9555,PER2,ENSG00000132326,protein_coding -9541,RAMP1,ENSG00000132329,protein_coding -9546,SCLY,ENSG00000132330,protein_coding -37366,PTPRE,ENSG00000132334,protein_coding -40418,RAN,ENSG00000132341,protein_coding -16129,PRKAA1,ENSG00000132356,protein_coding -16133,CARD6,ENSG00000132357,protein_coding -49110,RAP1GAP2,ENSG00000132359,protein_coding -49103,CLUH,ENSG00000132361,protein_coding -49038,INPP5K,ENSG00000132376,protein_coding -49172,MYBBP1A,ENSG00000132382,protein_coding -49055,RPA1,ENSG00000132383,protein_coding -49053,SERPINF1,ENSG00000132386,protein_coding -49165,UBE2G1,ENSG00000132388,protein_coding -11600,EEFSEC,ENSG00000132394,protein_coding -13036,TBC1D14,ENSG00000132405,protein_coding -12991,TMEM128,ENSG00000132406,protein_coding -20448,COQ3,ENSG00000132423,protein_coding -20449,PNISR,ENSG00000132424,protein_coding -20492,POPDC3,ENSG00000132429,protein_coding -22477,SEC61G,ENSG00000132432,protein_coding -22486,LANCL2,ENSG00000132434,protein_coding -22429,FIGNL1,ENSG00000132436,protein_coding -22430,DDC,ENSG00000132437,protein_coding -24936,FTHL17,ENSG00000132446,protein_coding -13899,GRSF1,ENSG00000132463,protein_coding -13891,ENAM,ENSG00000132464,protein_coding -13893,JCHAIN,ENSG00000132465,protein_coding -13917,ANKRD17,ENSG00000132466,protein_coding -13894,UTP3,ENSG00000132467,protein_coding -51708,ITGB4,ENSG00000132470,protein_coding -51715,WBP2,ENSG00000132471,protein_coding -51710,H3F3B,ENSG00000132475,protein_coding -51712,UNK,ENSG00000132478,protein_coding -51716,AC087289.1,ENSG00000132481,protein_coding -1991,ZRANB2,ENSG00000132485,protein_coding -49311,EIF5A,ENSG00000132507,protein_coding -49364,KDM6B,ENSG00000132510,protein_coding -49290,CLEC10A,ENSG00000132514,protein_coding -49217,SLC52A1,ENSG00000132517,protein_coding -49377,GUCY2D,ENSG00000132518,protein_coding -49312,GPS2,ENSG00000132522,protein_coding -49269,XAF1,ENSG00000132530,protein_coding -49294,DLG4,ENSG00000132535,protein_coding -28717,RIDA,ENSG00000132541,protein_coding -28738,VPS13B,ENSG00000132549,protein_coding -28753,RGS22,ENSG00000132554,protein_coding -28711,MATN2,ENSG00000132561,protein_coding -17548,REEP2,ENSG00000132563,protein_coding -17466,PCBD2,ENSG00000132570,protein_coding -50013,SDF2,ENSG00000132581,protein_coding -50042,FLOT2,ENSG00000132589,protein_coding -50036,ERAL1,ENSG00000132591,protein_coding -48376,PRMT7,ENSG00000132600,protein_coding -48422,NIP7,ENSG00000132603,protein_coding -48424,TERF2,ENSG00000132604,protein_coding -48416,VPS4A,ENSG00000132612,protein_coding -48470,MTSS2,ENSG00000132613,protein_coding -53412,HSPA12B,ENSG00000132622,protein_coding -53519,ANKEF1,ENSG00000132623,protein_coding -53662,SCP2D1,ENSG00000132631,protein_coding -53386,PCED1A,ENSG00000132635,protein_coding -53524,SNAP25,ENSG00000132639,protein_coding -53553,BTBD3,ENSG00000132640,protein_coding -53453,PCNA,ENSG00000132646,protein_coding -53741,NXT1,ENSG00000132661,protein_coding -53645,POLR3F,ENSG00000132664,protein_coding -53670,RIN2,ENSG00000132669,protein_coding -53388,PTPRA,ENSG00000132670,protein_coding -53731,SSTR4,ENSG00000132671,protein_coding -3434,DAP3,ENSG00000132676,protein_coding -3471,RHBG,ENSG00000132677,protein_coding -3442,KHDC4,ENSG00000132680,protein_coding -3624,ATP1A4,ENSG00000132681,protein_coding -3489,NES,ENSG00000132688,protein_coding -3486,BCAN,ENSG00000132692,protein_coding -3598,CRP,ENSG00000132693,protein_coding -3505,ARHGEF11,ENSG00000132694,protein_coding -3455,RAB25,ENSG00000132698,protein_coding -3484,HAPLN2,ENSG00000132702,protein_coding -3594,APCS,ENSG00000132703,protein_coding -3528,FCRL2,ENSG00000132704,protein_coding -3628,DCAF8,ENSG00000132716,protein_coding -3439,SYT11,ENSG00000132718,protein_coding -33741,IGHMBP2,ENSG00000132740,protein_coding -33697,ACY3,ENSG00000132744,protein_coding -33699,ALDH3B2,ENSG00000132746,protein_coding -33737,TESMIN,ENSG00000132749,protein_coding -1482,MMACHC,ENSG00000132763,protein_coding -1423,DPH2,ENSG00000132768,protein_coding -1478,TOE1,ENSG00000132773,protein_coding -1488,NASP,ENSG00000132780,protein_coding -1477,MUTYH,ENSG00000132781,protein_coding -54098,CTNNBL1,ENSG00000132792,protein_coding -54169,LPIN3,ENSG00000132793,protein_coding -54297,ZSWIM3,ENSG00000132801,protein_coding -54531,RBM38,ENSG00000132819,protein_coding -54099,VSTM2L,ENSG00000132821,protein_coding -54213,OSER1,ENSG00000132823,protein_coding -54227,SERINC3,ENSG00000132824,protein_coding -54593,PPP1R3D,ENSG00000132825,protein_coding -16691,DMGDH,ENSG00000132837,protein_coding -16693,BHMT2,ENSG00000132840,protein_coding -16678,AP3B1,ENSG00000132842,protein_coding -16662,ZBED3,ENSG00000132846,protein_coding -1825,PATJ,ENSG00000132849,protein_coding -1837,KANK4,ENSG00000132854,protein_coding -1842,ANGPTL3,ENSG00000132855,protein_coding -52731,SYT4,ENSG00000132872,protein_coding -52745,SLC14A2,ENSG00000132874,protein_coding -395,FBXO44,ENSG00000132879,protein_coding -550,CPLANE2,ENSG00000132881,protein_coding -508,CASP9,ENSG00000132906,protein_coding -17946,NMUR2,ENSG00000132911,protein_coding -17905,DCTN4,ENSG00000132912,protein_coding -17877,PDE6A,ENSG00000132915,protein_coding -40740,ATP8A2,ENSG00000132932,protein_coding -40811,MTUS2,ENSG00000132938,protein_coding -40601,ZMYM5,ENSG00000132950,protein_coding -40842,USPL1,ENSG00000132952,protein_coding -40626,XPO4,ENSG00000132953,protein_coding -40590,TPTE2,ENSG00000132958,protein_coding -40805,POMP,ENSG00000132963,protein_coding -40753,CDK8,ENSG00000132964,protein_coding -40844,ALOX5AP,ENSG00000132965,protein_coding -40756,WASF3,ENSG00000132970,protein_coding -40720,RNF17,ENSG00000132972,protein_coding -40760,GPR12,ENSG00000132975,protein_coding -5222,CHRM3,ENSG00000133019,protein_coding -49457,MYH8,ENSG00000133020,protein_coding -49417,MYH10,ENSG00000133026,protein_coding -49654,PEMT,ENSG00000133027,protein_coding -49464,SCO1,ENSG00000133028,protein_coding -49634,MPRIP,ENSG00000133030,protein_coding -4435,CHI3L1,ENSG00000133048,protein_coding -4434,MYBPH,ENSG00000133055,protein_coding -4477,PIK3C2B,ENSG00000133056,protein_coding -4495,DSTYK,ENSG00000133059,protein_coding -4436,CHIT1,ENSG00000133063,protein_coding -4519,SLC41A1,ENSG00000133065,protein_coding -4401,LGR6,ENSG00000133067,protein_coding -4497,TMCC2,ENSG00000133069,protein_coding -40911,DCLK1,ENSG00000133083,protein_coding -40918,CCNA1,ENSG00000133101,protein_coding -40964,COG6,ENSG00000133103,protein_coding -40916,SPART,ENSG00000133104,protein_coding -40859,RXFP2,ENSG00000133105,protein_coding -41025,EPSTI1,ENSG00000133106,protein_coding -40942,TRPC4,ENSG00000133107,protein_coding -40941,POSTN,ENSG00000133110,protein_coding -40925,RFXAP,ENSG00000133111,protein_coding -41072,TPT1,ENSG00000133112,protein_coding -41064,GPALPP1,ENSG00000133114,protein_coding -40958,STOML3,ENSG00000133115,protein_coding -40880,KL,ENSG00000133116,protein_coding -40892,RFC3,ENSG00000133119,protein_coding -40881,STARD13,ENSG00000133121,protein_coding -26130,IRS4,ENSG00000133124,protein_coding -26097,MORC4,ENSG00000133131,protein_coding -26041,BEX2,ENSG00000133134,protein_coding -26095,RNF128,ENSG00000133135,protein_coding -26096,TBC1D8B,ENSG00000133138,protein_coding -26050,TCEAL4,ENSG00000133142,protein_coding -26033,BEX1,ENSG00000133169,protein_coding -51611,FAM104A,ENSG00000133193,protein_coding -51601,SLC39A11,ENSG00000133195,protein_coding -734,EPHB2,ENSG00000133216,protein_coding -798,SRRM1,ENSG00000133226,protein_coding -54886,BTBD2,ENSG00000133243,protein_coding -55213,PRAM1,ENSG00000133246,protein_coding -57508,KMT5C,ENSG00000133247,protein_coding -55214,ZNF414,ENSG00000133250,protein_coding -12876,PDE6B,ENSG00000133256,protein_coding -57503,HSPBP1,ENSG00000133265,protein_coding -54884,CSNK1G2,ENSG00000133275,protein_coding -16909,SLF1,ENSG00000133302,protein_coding -53175,CNDP2,ENSG00000133313,protein_coding -33442,MACROD1,ENSG00000133315,protein_coding -33390,WDR74,ENSG00000133316,protein_coding -33420,LGALS12,ENSG00000133317,protein_coding -33428,RTN3,ENSG00000133318,protein_coding -33421,PLAAT4,ENSG00000133321,protein_coding -33422,PLAAT2,ENSG00000133328,protein_coding -47060,MYH11,ENSG00000133392,protein_coding -47064,FOPNL,ENSG00000133393,protein_coding -15626,MED10,ENSG00000133398,protein_coding -15968,PDZD2,ENSG00000133401,protein_coding -59025,MORC2,ENSG00000133422,protein_coding -59114,LARGE1,ENSG00000133424,protein_coding -58791,GSTT2B,ENSG00000133433,protein_coding -58865,MYO18B,ENSG00000133454,protein_coding -58781,SLC2A11,ENSG00000133460,protein_coding -59206,C1QTNF6,ENSG00000133466,protein_coding -58558,GGT2,ENSG00000133475,protein_coding -59333,FAM83F,ENSG00000133477,protein_coding -59011,SEC14L4,ENSG00000133488,protein_coding -24394,GIMAP6,ENSG00000133561,protein_coding -24391,GIMAP4,ENSG00000133574,protein_coding -24078,ADCK2,ENSG00000133597,protein_coding -24073,MKRN1,ENSG00000133606,protein_coding -24416,AGAP3,ENSG00000133612,protein_coding -24358,KRBA1,ENSG00000133619,protein_coding -24464,ACTR3B,ENSG00000133627,protein_coding -39449,NTS,ENSG00000133636,protein_coding -39523,BTG1,ENSG00000133639,protein_coding -39443,LRRIQ1,ENSG00000133640,protein_coding -39465,C12orf29,ENSG00000133641,protein_coding -12706,ATP13A3,ENSG00000133657,protein_coding -36505,SFTPD,ENSG00000133661,protein_coding -36530,DYDC2,ENSG00000133665,protein_coding -36516,TMEM254,ENSG00000133678,protein_coding -38228,TMTC1,ENSG00000133687,protein_coding -38140,KRAS,ENSG00000133703,protein_coding -38239,IPO8,ENSG00000133704,protein_coding -17796,LARS,ENSG00000133706,protein_coding -17830,SPINK5,ENSG00000133710,protein_coding -28474,IMPA1,ENSG00000133731,protein_coding -28504,LRRCC1,ENSG00000133739,protein_coding -28506,E2F5,ENSG00000133740,protein_coding -28512,CA1,ENSG00000133742,protein_coding -39423,CCDC59,ENSG00000133773,protein_coding -32277,SWAP70,ENSG00000133789,protein_coding -32343,ARNTL,ENSG00000133794,protein_coding -32296,LYVE1,ENSG00000133800,protein_coding -32290,AMPD3,ENSG00000133805,protein_coding -32281,SBF2,ENSG00000133812,protein_coding -32326,MICAL2,ENSG00000133816,protein_coding -32365,RRAS2,ENSG00000133818,protein_coding -17216,HSD17B4,ENSG00000133835,protein_coding -39287,ZFC3H1,ENSG00000133858,protein_coding -27685,TEX15,ENSG00000133863,protein_coding -27658,SARAF,ENSG00000133872,protein_coding -27721,RNF122,ENSG00000133874,protein_coding -27723,DUSP26,ENSG00000133878,protein_coding -33536,DPF2,ENSG00000133884,protein_coding -33489,MEN1,ENSG00000133895,protein_coding -43324,ERG28,ENSG00000133935,protein_coding -43620,GSC,ENSG00000133937,protein_coding -43535,DGLUCY,ENSG00000133943,protein_coding -43582,UNC79,ENSG00000133958,protein_coding -43227,NUMB,ENSG00000133961,protein_coding -43548,CATSPERB,ENSG00000133962,protein_coding -43273,VRTN,ENSG00000133980,protein_coding -43164,COX16,ENSG00000133983,protein_coding -43179,TTC9,ENSG00000133985,protein_coding -43174,MED6,ENSG00000133997,protein_coding -43089,EIF2S1,ENSG00000134001,protein_coding -43172,ADAM20,ENSG00000134007,protein_coding -27521,LOXL2,ENSG00000134013,protein_coding -27613,ELP3,ENSG00000134014,protein_coding -27501,PEBP4,ENSG00000134020,protein_coding -27542,ADAMDEC1,ENSG00000134028,protein_coding -52811,CTIF,ENSG00000134030,protein_coding -52868,MRO,ENSG00000134042,protein_coding -52896,MBD2,ENSG00000134046,protein_coding -52788,IER3IP1,ENSG00000134049,protein_coding -16491,MRPS36,ENSG00000134056,protein_coding -16488,CCNB1,ENSG00000134057,protein_coding -16492,CDK7,ENSG00000134058,protein_coding -16454,CD180,ENSG00000134061,protein_coding -9797,IRAK2,ENSG00000134070,protein_coding -9768,CAMK1,ENSG00000134072,protein_coding -9759,THUMPD3,ENSG00000134077,protein_coding -9795,VHL,ENSG00000134086,protein_coding -9713,BHLHE40,ENSG00000134107,protein_coding -9718,ARL8B,ENSG00000134108,protein_coding -9721,EDEM1,ENSG00000134109,protein_coding -9681,CNTN6,ENSG00000134115,protein_coding -9671,CHL1,ENSG00000134121,protein_coding -44771,MEIS2,ENSG00000134138,protein_coding -44749,DPH6,ENSG00000134146,protein_coding -44709,KATNBL1,ENSG00000134152,protein_coding -44706,EMC7,ENSG00000134153,protein_coding -44646,TRPM1,ENSG00000134160,protein_coding -2556,GNAT2,ENSG00000134183,protein_coding -2562,GSTM1,ENSG00000134184,protein_coding -2518,PRPF38B,ENSG00000134186,protein_coding -2821,REG4,ENSG00000134193,protein_coding -2718,TSPAN2,ENSG00000134198,protein_coding -2717,TSHB,ENSG00000134200,protein_coding -2564,GSTM5,ENSG00000134201,protein_coding -2566,GSTM3,ENSG00000134202,protein_coding -2698,SYT6,ENSG00000134207,protein_coding -2498,VAV3,ENSG00000134215,protein_coding -2620,CHIA,ENSG00000134216,protein_coding -2542,PSRC1,ENSG00000134222,protein_coding -2820,HMGCS2,ENSG00000134240,protein_coding -2688,PTPN22,ENSG00000134242,protein_coding -2544,SORT1,ENSG00000134243,protein_coding -2655,WNT2B,ENSG00000134245,protein_coding -2762,PTGFRN,ENSG00000134247,protein_coding -2591,LAMTOR5,ENSG00000134248,protein_coding -2825,ADAM30,ENSG00000134249,protein_coding -2826,NOTCH2,ENSG00000134250,protein_coding -2768,TRIM45,ENSG00000134253,protein_coding -2613,CEPT1,ENSG00000134255,protein_coding -2764,CD101,ENSG00000134256,protein_coding -2770,VTCN1,ENSG00000134258,protein_coding -2722,NGF,ENSG00000134259,protein_coding -2691,AP4B1,ENSG00000134262,protein_coding -52268,NAPG,ENSG00000134265,protein_coding -52337,SPIRE1,ENSG00000134278,protein_coding -38389,PPHLN1,ENSG00000134283,protein_coding -38558,FKBP11,ENSG00000134285,protein_coding -38559,ARF3,ENSG00000134287,protein_coding -38496,TMEM106C,ENSG00000134291,protein_coding -38451,SLC38A2,ENSG00000134294,protein_coding -5608,YWHAQ,ENSG00000134308,protein_coding -5592,KIDINS220,ENSG00000134313,protein_coding -5618,GRHL1,ENSG00000134317,protein_coding -5655,ROCK2,ENSG00000134318,protein_coding -5566,RSAD2,ENSG00000134321,protein_coding -5712,MYCN,ENSG00000134323,protein_coding -5674,LPIN1,ENSG00000134324,protein_coding -5565,CMPK2,ENSG00000134326,protein_coding -5604,IAH1,ENSG00000134330,protein_coding -32442,LDHA,ENSG00000134333,protein_coding -32434,SAA2,ENSG00000134339,protein_coding -32546,ANO3,ENSG00000134343,protein_coding -16305,IL6ST,ENSG00000134352,protein_coding -16253,FST,ENSG00000134363,protein_coding -4289,CFHR4,ENSG00000134365,protein_coding -4373,NAV1,ENSG00000134369,protein_coding -4266,CDC73,ENSG00000134371,protein_coding -4385,TIMM17A,ENSG00000134375,protein_coding -4298,CRB1,ENSG00000134376,protein_coding -4292,CFHR5,ENSG00000134389,protein_coding -47288,ERN2,ENSG00000134398,protein_coding -47134,RPS15A,ENSG00000134419,protein_coding -52989,RAX,ENSG00000134438,protein_coding -52950,NARS,ENSG00000134440,protein_coding -52988,GRP,ENSG00000134443,protein_coding -53035,RELCH,ENSG00000134444,protein_coding -35266,FBH1,ENSG00000134452,protein_coding -35273,RBM17,ENSG00000134453,protein_coding -35269,IL2RA,ENSG00000134460,protein_coding -35264,ANKRD16,ENSG00000134461,protein_coding -35348,ECHDC3,ENSG00000134463,protein_coding -35268,IL15RA,ENSG00000134470,protein_coding -16827,CCNH,ENSG00000134480,protein_coding -52514,HRH4,ENSG00000134489,protein_coding -52483,TMEM241,ENSG00000134490,protein_coding -52543,KCTD1,ENSG00000134504,protein_coding -52482,CABLES1,ENSG00000134508,protein_coding -18145,DOCK2,ENSG00000134516,protein_coding -37971,EMP1,ENSG00000134531,protein_coding -38119,SOX5,ENSG00000134532,protein_coding -38009,RERG,ENSG00000134533,protein_coding -38081,SLCO1B1,ENSG00000134538,protein_coding -37864,KLRD1,ENSG00000134539,protein_coding -37874,KLRC1,ENSG00000134545,protein_coding -38088,SPX,ENSG00000134548,protein_coding -37895,PRH2,ENSG00000134551,protein_coding -32845,LRP4,ENSG00000134569,protein_coding -32862,MYBPC3,ENSG00000134571,protein_coding -32855,DDB2,ENSG00000134574,protein_coding -32857,ACP2,ENSG00000134575,protein_coding -26478,USP26,ENSG00000134588,protein_coding -26527,RTL8C,ENSG00000134590,protein_coding -26431,RAB33A,ENSG00000134594,protein_coding -26638,SOX3,ENSG00000134595,protein_coding -26437,RBMX2,ENSG00000134597,protein_coding -26468,STK26,ENSG00000134602,protein_coding -34342,PIWIL4,ENSG00000134627,protein_coding -34289,MTNR1B,ENSG00000134640,protein_coding -1018,PUM1,ENSG00000134644,protein_coding -1045,SPOCD1,ENSG00000134668,protein_coding -1089,YARS,ENSG00000134684,protein_coding -1111,PHC2,ENSG00000134686,protein_coding -1212,CDCA8,ENSG00000134690,protein_coding -1208,GNL2,ENSG00000134697,protein_coding -1161,AGO4,ENSG00000134698,protein_coding -1809,HOOK1,ENSG00000134709,protein_coding -1810,CYP2J2,ENSG00000134716,protein_coding -1640,BTF3L4,ENSG00000134717,protein_coding -1655,TUT4,ENSG00000134744,protein_coding -1654,PRPF38A,ENSG00000134748,protein_coding -52584,DSC2,ENSG00000134755,protein_coding -52593,DSG3,ENSG00000134757,protein_coding -52618,RNF138,ENSG00000134758,protein_coding -52680,ELP2,ENSG00000134759,protein_coding -52588,DSG1,ENSG00000134760,protein_coding -52583,DSC3,ENSG00000134762,protein_coding -52586,DSC1,ENSG00000134765,protein_coding -52646,DTNA,ENSG00000134769,protein_coding -52686,FHOD3,ENSG00000134775,protein_coding -52688,TPGS2,ENSG00000134779,protein_coding -33313,DAGLA,ENSG00000134780,protein_coding -33117,SLC43A3,ENSG00000134802,protein_coding -33124,TIMM10,ENSG00000134809,protein_coding -33238,CBLIF,ENSG00000134812,protein_coding -56895,DHX34,ENSG00000134815,protein_coding -33109,APLNR,ENSG00000134817,protein_coding -33319,FADS2,ENSG00000134824,protein_coding -33316,TMEM258,ENSG00000134825,protein_coding -33239,TCN1,ENSG00000134827,protein_coding -56894,C5AR2,ENSG00000134830,protein_coding -13663,TMEM165,ENSG00000134851,protein_coding -13665,CLOCK,ENSG00000134852,protein_coding -13640,PDGFRA,ENSG00000134853,protein_coding -41726,GGACT,ENSG00000134864,protein_coding -41833,COL4A2,ENSG00000134871,protein_coding -41633,CLDN10,ENSG00000134873,protein_coding -41635,DZIP1,ENSG00000134874,protein_coding -41692,UBAC2,ENSG00000134882,protein_coding -41787,ARGLU1,ENSG00000134884,protein_coding -41760,BIVM,ENSG00000134897,protein_coding -41761,ERCC5,ENSG00000134899,protein_coding -41752,TPP2,ENSG00000134900,protein_coding -41758,POGLUT2,ENSG00000134901,protein_coding -41840,CARS2,ENSG00000134905,protein_coding -35056,ARHGAP32,ENSG00000134909,protein_coding -34991,STT3A,ENSG00000134910,protein_coding -35083,ADAMTS8,ENSG00000134917,protein_coding -34994,ACRV1,ENSG00000134940,protein_coding -35045,ETS1,ENSG00000134954,protein_coding -34978,SLC37A2,ENSG00000134955,protein_coding -13427,KLB,ENSG00000134962,protein_coding -17160,TMED7,ENSG00000134970,protein_coding -17122,APC,ENSG00000134982,protein_coding -17107,NREP,ENSG00000134986,protein_coding -17097,WDR36,ENSG00000134987,protein_coding -30501,OSTF1,ENSG00000134996,protein_coding -30509,RFK,ENSG00000135002,protein_coding -30594,UBQLN1,ENSG00000135018,protein_coding -30625,NAA35,ENSG00000135040,protein_coding -30495,C9orf40,ENSG00000135045,protein_coding -30479,ANXA1,ENSG00000135046,protein_coding -30651,CTSL,ENSG00000135047,protein_coding -30456,CEMIP2,ENSG00000135048,protein_coding -30618,AGTPBP1,ENSG00000135049,protein_coding -30626,GOLM1,ENSG00000135052,protein_coding -30431,FAM189A2,ENSG00000135063,protein_coding -30538,PSAT1,ENSG00000135069,protein_coding -30632,ISCA1,ENSG00000135070,protein_coding -18006,ADAM19,ENSG00000135074,protein_coding -17994,HAVCR2,ENSG00000135077,protein_coding -18060,CCNJL,ENSG00000135083,protein_coding -40066,TAOK3,ENSG00000135090,protein_coding -39858,USP30,ENSG00000135093,protein_coding -39982,SDS,ENSG00000135094,protein_coding -40118,MSI1,ENSG00000135097,protein_coding -40153,HNF1A,ENSG00000135100,protein_coding -40053,FBXO21,ENSG00000135108,protein_coding -40003,TBX3,ENSG00000135111,protein_coding -40155,OASL,ENSG00000135114,protein_coding -40045,HRK,ENSG00000135116,protein_coding -40041,RNFT2,ENSG00000135119,protein_coding -40164,P2RX4,ENSG00000135124,protein_coding -40099,BICDL1,ENSG00000135127,protein_coding -39967,DTX1,ENSG00000135144,protein_coding -39949,TRAFD1,ENSG00000135148,protein_coding -23105,DMTF1,ENSG00000135164,protein_coding -23279,OCM2,ENSG00000135175,protein_coding -23107,TMEM243,ENSG00000135185,protein_coding -23021,CCDC146,ENSG00000135205,protein_coding -23031,TMEM60,ENSG00000135211,protein_coding -23060,CD36,ENSG00000135218,protein_coding -13838,UGT2A3,ENSG00000135220,protein_coding -13875,CSN2,ENSG00000135222,protein_coding -13855,UGT2B28,ENSG00000135226,protein_coding -23577,PNPLA8,ENSG00000135241,protein_coding -23817,HILPDA,ENSG00000135245,protein_coding -23840,FAM71F1,ENSG00000135248,protein_coding -23530,RINT1,ENSG00000135249,protein_coding -23521,SRPK2,ENSG00000135250,protein_coding -23847,KCP,ENSG00000135253,protein_coding -23648,TES,ENSG00000135269,protein_coding -23635,MDFIC,ENSG00000135272,protein_coding -20138,MTO1,ENSG00000135297,protein_coding -20068,ADGRB3,ENSG00000135298,protein_coding -20362,ANKRD6,ENSG00000135299,protein_coding -20190,HTR1B,ENSG00000135312,protein_coding -20124,KHDC1,ENSG00000135314,protein_coding -20268,CEP162,ENSG00000135315,protein_coding -20287,SYNCRIP,ENSG00000135316,protein_coding -20285,SNX14,ENSG00000135317,protein_coding -20283,NT5E,ENSG00000135318,protein_coding -20267,MRAP2,ENSG00000135324,protein_coding -20402,EPHA7,ENSG00000135333,protein_coding -20331,AKIRIN2,ENSG00000135334,protein_coding -20330,ORC3,ENSG00000135336,protein_coding -20209,LCA5,ENSG00000135338,protein_coding -20383,MAP3K7,ENSG00000135341,protein_coding -20312,CGA,ENSG00000135346,protein_coding -20374,GJA10,ENSG00000135355,protein_coding -32704,PRR5L,ENSG00000135362,protein_coding -32660,LMO2,ENSG00000135363,protein_coding -32818,PHF21A,ENSG00000135365,protein_coding -32664,NAT10,ENSG00000135372,protein_coding -32674,EHF,ENSG00000135373,protein_coding -32671,ELF5,ENSG00000135374,protein_coding -32632,PRRG4,ENSG00000135378,protein_coding -32663,CAPRIN1,ENSG00000135387,protein_coding -38784,ATP5MC2,ENSG00000135390,protein_coding -38903,DNAJC14,ENSG00000135392,protein_coding -38894,CD63,ENSG00000135404,protein_coding -38582,PRPH,ENSG00000135406,protein_coding -39034,AVIL,ENSG00000135407,protein_coding -38769,AMHR2,ENSG00000135409,protein_coding -38847,LACRT,ENSG00000135413,protein_coding -38898,GDF11,ENSG00000135414,protein_coding -38959,GLS2,ENSG00000135423,protein_coding -38890,ITGA7,ENSG00000135424,protein_coding -38852,TESPA1,ENSG00000135426,protein_coding -38594,FAM186B,ENSG00000135436,protein_coding -38893,RDH5,ENSG00000135437,protein_coding -39023,AGAP2,ENSG00000135439,protein_coding -38892,BLOC1S1,ENSG00000135441,protein_coding -38705,KRT85,ENSG00000135443,protein_coding -39026,CDK4,ENSG00000135446,protein_coding -38845,PPP1R1A,ENSG00000135447,protein_coding -38583,TROAP,ENSG00000135451,protein_coding -39025,TSPAN31,ENSG00000135452,protein_coding -39019,B4GALNT1,ENSG00000135454,protein_coding -38654,TFCP2,ENSG00000135457,protein_coding -38939,COQ10A,ENSG00000135469,protein_coding -38608,FAIM2,ENSG00000135472,protein_coding -38946,PAN2,ENSG00000135473,protein_coding -38762,ESPL1,ENSG00000135476,protein_coding -38693,KRT7,ENSG00000135480,protein_coding -38927,ZC3H10,ENSG00000135482,protein_coding -38829,HNRNPA1,ENSG00000135486,protein_coding -39018,SLC26A10,ENSG00000135502,protein_coding -38679,ACVR1B,ENSG00000135503,protein_coding -39021,OS9,ENSG00000135506,protein_coding -38956,MIP,ENSG00000135517,protein_coding -38590,KCNH3,ENSG00000135519,protein_coding -21081,LTV1,ENSG00000135521,protein_coding -20971,MAP7,ENSG00000135525,protein_coding -20580,CD164,ENSG00000135535,protein_coding -20553,AFG1L,ENSG00000135537,protein_coding -21012,NHSL1,ENSG00000135540,protein_coding -20952,AHI1,ENSG00000135541,protein_coding -20795,HEY2,ENSG00000135547,protein_coding -20769,PKIB,ENSG00000135549,protein_coding -20888,TAAR5,ENSG00000135569,protein_coding -21050,NMBR,ENSG00000135577,protein_coding -20583,SMPD2,ENSG00000135587,protein_coding -20584,MICAL1,ENSG00000135596,protein_coding -21022,REPS1,ENSG00000135597,protein_coding -21091,STX11,ENSG00000135604,protein_coding -13562,TEC,ENSG00000135605,protein_coding -6689,PRADC1,ENSG00000135617,protein_coding -6763,SEMA4F,ENSG00000135622,protein_coding -6690,CCT7,ENSG00000135624,protein_coding -6692,EGR4,ENSG00000135625,protein_coding -6685,RAB11FIP5,ENSG00000135631,protein_coding -6688,SMYD5,ENSG00000135632,protein_coding -6674,DYSF,ENSG00000135636,protein_coding -6748,CCDC142,ENSG00000135637,protein_coding -6682,EMX1,ENSG00000135638,protein_coding -39272,KCNMB4,ENSG00000135643,protein_coding -39084,USP15,ENSG00000135655,protein_coding -39148,GNS,ENSG00000135677,protein_coding -39239,CPM,ENSG00000135678,protein_coding -39235,MDM2,ENSG00000135679,protein_coding -48790,KLHL36,ENSG00000135686,protein_coding -48719,BCO1,ENSG00000135697,protein_coding -48742,MPHOSPH6,ENSG00000135698,protein_coding -48607,CHST5,ENSG00000135702,protein_coding -48798,KIAA0513,ENSG00000135709,protein_coding -48267,DYNC1LI2,ENSG00000135720,protein_coding -48291,FBXL8,ENSG00000135722,protein_coding -48303,FHOD1,ENSG00000135723,protein_coding -48142,CCDC102A,ENSG00000135736,protein_coding -48304,SLC9A5,ENSG00000135740,protein_coding -5046,AGT,ENSG00000135744,protein_coding -5360,ZNF670-ZNF695,ENSG00000135747,protein_coding -5095,PCNX2,ENSG00000135749,protein_coding -5101,KCNK1,ENSG00000135750,protein_coding -5032,URB2,ENSG00000135763,protein_coding -5070,EGLN1,ENSG00000135766,protein_coding -5048,CAPN9,ENSG00000135773,protein_coding -5044,COG2,ENSG00000135775,protein_coding -5027,ABCB10,ENSG00000135776,protein_coding -5094,NTPCR,ENSG00000135778,protein_coding -5031,TAF5L,ENSG00000135801,protein_coding -4114,GLUL,ENSG00000135821,protein_coding -4093,STX6,ENSG00000135823,protein_coding -4124,RGS8,ENSG00000135824,protein_coding -4120,RNASEL,ENSG00000135828,protein_coding -4129,DHX9,ENSG00000135829,protein_coding -4088,KIAA1614,ENSG00000135835,protein_coding -4074,CEP350,ENSG00000135837,protein_coding -4127,NPL,ENSG00000135838,protein_coding -4167,FAM129A,ENSG00000135842,protein_coding -3937,PIGC,ENSG00000135845,protein_coding -4137,LAMC1,ENSG00000135862,protein_coding -3971,RC3H1,ENSG00000135870,protein_coding -9372,GPR55,ENSG00000135898,protein_coding -9354,SP110,ENSG00000135899,protein_coding -9279,MRPL44,ENSG00000135900,protein_coding -9433,CHRND,ENSG00000135902,protein_coding -9251,PAX3,ENSG00000135903,protein_coding -9289,DOCK10,ENSG00000135905,protein_coding -9159,TTLL4,ENSG00000135912,protein_coding -9149,USP37,ENSG00000135913,protein_coding -9381,HTR2B,ENSG00000135914,protein_coding -9370,ITM2C,ENSG00000135916,protein_coding -9319,SLC19A3,ENSG00000135917,protein_coding -9281,SERPINE2,ENSG00000135919,protein_coding -9199,DNAJB2,ENSG00000135924,protein_coding -9167,WNT10A,ENSG00000135925,protein_coding -9137,TMBIM1,ENSG00000135926,protein_coding -9160,CYP27A1,ENSG00000135929,protein_coding -9437,EIF4E2,ENSG00000135930,protein_coding -9383,ARMC9,ENSG00000135931,protein_coding -9366,CAB39,ENSG00000135932,protein_coding -7244,COX5B,ENSG00000135940,protein_coding -7278,REV1,ENSG00000135945,protein_coding -7266,TSGA10,ENSG00000135951,protein_coding -7335,MFSD9,ENSG00000135953,protein_coding -7191,TMEM127,ENSG00000135956,protein_coding -7446,EDAR,ENSG00000135960,protein_coding -7377,TGFBRAP1,ENSG00000135966,protein_coding -7437,GCC2,ENSG00000135968,protein_coding -7372,MRPS9,ENSG00000135972,protein_coding -7375,GPR45,ENSG00000135973,protein_coding -7380,C2orf49,ENSG00000135974,protein_coding -7227,ANKRD36,ENSG00000135976,protein_coding -8091,EPC2,ENSG00000135999,protein_coding -7849,ARHGEF4,ENSG00000136002,protein_coding -39841,ISCU,ENSG00000136003,protein_coding -39784,ALDH1L2,ENSG00000136010,protein_coding -39751,STAB2,ENSG00000136011,protein_coding -39601,USP44,ENSG00000136014,protein_coding -39687,SCYL2,ENSG00000136021,protein_coding -39797,CKAP4,ENSG00000136026,protein_coding -39566,PLXNC1,ENSG00000136040,protein_coding -39786,APPL2,ENSG00000136044,protein_coding -39823,PWP1,ENSG00000136045,protein_coding -39723,DRAM1,ENSG00000136048,protein_coding -39785,WASHC4,ENSG00000136051,protein_coding -39779,SLC41A2,ENSG00000136052,protein_coding -10172,VILL,ENSG00000136059,protein_coding -10736,FLNB,ENSG00000136068,protein_coding -41227,NEK3,ENSG00000136098,protein_coding -41250,PCDH8,ENSG00000136099,protein_coding -41236,VPS36,ENSG00000136100,protein_coding -41193,RNASEH2B,ENSG00000136104,protein_coding -41238,CKAP2,ENSG00000136108,protein_coding -41247,CNMD,ENSG00000136110,protein_coding -41430,TBC1D4,ENSG00000136111,protein_coding -41234,THSD1,ENSG00000136114,protein_coding -41400,BORA,ENSG00000136122,protein_coding -41112,LRCH1,ENSG00000136141,protein_coding -41119,SUCLA2,ENSG00000136143,protein_coding -41160,RCBTB1,ENSG00000136144,protein_coding -41128,MED4,ENSG00000136146,protein_coding -41159,PHF11,ENSG00000136147,protein_coding -41082,COG3,ENSG00000136152,protein_coding -41436,LMO7,ENSG00000136153,protein_coding -41461,SCEL,ENSG00000136155,protein_coding -41131,ITM2B,ENSG00000136156,protein_coding -41502,SPRY2,ENSG00000136158,protein_coding -41126,NUDT15,ENSG00000136159,protein_coding -41470,EDNRB,ENSG00000136160,protein_coding -41139,RCBTB2,ENSG00000136161,protein_coding -41095,LCP1,ENSG00000136167,protein_coding -41157,SETDB2,ENSG00000136169,protein_coding -22062,SCRN1,ENSG00000136193,protein_coding -22283,C7orf25,ENSG00000136197,protein_coding -22396,TNS3,ENSG00000136205,protein_coding -22311,SPDYE1,ENSG00000136206,protein_coding -21615,CHST12,ENSG00000136213,protein_coding -21913,IGF2BP3,ENSG00000136231,protein_coding -21911,GPNMB,ENSG00000136235,protein_coding -21888,RAPGEF5,ENSG00000136237,protein_coding -21695,RAC1,ENSG00000136238,protein_coding -21697,KDELR2,ENSG00000136240,protein_coding -21908,NUPL2,ENSG00000136243,protein_coding -21895,IL6,ENSG00000136244,protein_coding -21700,ZDHHC4,ENSG00000136247,protein_coding -22190,AOAH,ENSG00000136250,protein_coding -21822,BZW2,ENSG00000136261,protein_coding -21799,DGKB,ENSG00000136267,protein_coding -22357,TBRG4,ENSG00000136270,protein_coding -22337,DDX56,ENSG00000136271,protein_coding -22403,HUS1,ENSG00000136273,protein_coding -22356,NACAD,ENSG00000136274,protein_coding -22317,DBNL,ENSG00000136279,protein_coding -22355,CCM2,ENSG00000136280,protein_coding -22352,MYO1G,ENSG00000136286,protein_coding -21621,TTYH3,ENSG00000136295,protein_coding -21647,MMD2,ENSG00000136297,protein_coding -42388,CIDEB,ENSG00000136305,protein_coding -42036,TTC5,ENSG00000136319,protein_coding -42585,NKX2-8,ENSG00000136327,protein_coding -42581,NKX2-1,ENSG00000136352,protein_coding -42329,ZFHX2,ENSG00000136367,protein_coding -45877,MTHFS,ENSG00000136371,protein_coding -45855,ADAMTS7,ENSG00000136378,protein_coding -45904,ABHD17C,ENSG00000136379,protein_coding -45840,IREB2,ENSG00000136381,protein_coding -46031,ALPK3,ENSG00000136383,protein_coding -45981,TM6SF1,ENSG00000136404,protein_coding -45826,CIB2,ENSG00000136425,protein_coding -50997,CALCOCO2,ENSG00000136436,protein_coding -51085,RSAD1,ENSG00000136444,protein_coding -50833,NMT1,ENSG00000136448,protein_coding -51087,MYCBPAP,ENSG00000136449,protein_coding -51206,SRSF1,ENSG00000136450,protein_coding -51202,VEZF1,ENSG00000136451,protein_coding -51083,CHAD,ENSG00000136457,protein_coding -51399,TACO1,ENSG00000136463,protein_coding -51433,TEX2,ENSG00000136478,protein_coding -51397,DCAF7,ENSG00000136485,protein_coding -51414,GH2,ENSG00000136487,protein_coding -51416,CSH1,ENSG00000136488,protein_coding -51404,LIMD2,ENSG00000136490,protein_coding -51340,BRIP1,ENSG00000136492,protein_coding -51043,KAT7,ENSG00000136504,protein_coding -12612,RTP4,ENSG00000136514,protein_coding -12413,ACTL6A,ENSG00000136518,protein_coding -12417,NDUFB5,ENSG00000136521,protein_coding -12416,MRPL47,ENSG00000136522,protein_coding -12565,TRA2B,ENSG00000136527,protein_coding -8295,SCN2A,ENSG00000136531,protein_coding -8255,TBR1,ENSG00000136535,protein_coding -8232,MARCH7,ENSG00000136536,protein_coding -8189,ERMN,ENSG00000136541,protein_coding -8187,GALNT5,ENSG00000136542,protein_coding -8310,SCN7A,ENSG00000136546,protein_coding -8248,TANK,ENSG00000136560,protein_coding -27269,BLK,ENSG00000136573,protein_coding -27273,GATA4,ENSG00000136574,protein_coding -12293,SKIL,ENSG00000136603,protein_coding -4763,EPRS,ENSG00000136628,protein_coding -4796,HLX,ENSG00000136630,protein_coding -3108,VPS45,ENSG00000136631,protein_coding -4550,IL10,ENSG00000136634,protein_coding -4724,KCTD3,ENSG00000136636,protein_coding -4700,RPS6KC1,ENSG00000136643,protein_coding -7577,CBWD2,ENSG00000136682,protein_coding -7561,IL36G,ENSG00000136688,protein_coding -7569,IL1RN,ENSG00000136689,protein_coding -7564,IL36A,ENSG00000136694,protein_coding -7566,IL36RN,ENSG00000136695,protein_coding -7565,IL36B,ENSG00000136696,protein_coding -7567,IL1F10,ENSG00000136697,protein_coding -7836,CFC1,ENSG00000136698,protein_coding -7798,SMPD4,ENSG00000136699,protein_coding -7746,WDR33,ENSG00000136709,protein_coding -7821,CCDC115,ENSG00000136710,protein_coding -7759,SAP130,ENSG00000136715,protein_coding -7726,BIN1,ENSG00000136717,protein_coding -7822,IMP4,ENSG00000136718,protein_coding -7768,HS6ST1,ENSG00000136720,protein_coding -7762,UGGT1,ENSG00000136731,protein_coding -7718,GYPC,ENSG00000136732,protein_coding -35457,STAM,ENSG00000136738,protein_coding -35583,GAD2,ENSG00000136750,protein_coding -35596,ABI1,ENSG00000136754,protein_coding -35602,YME1L1,ENSG00000136758,protein_coding -35515,DNAJC1,ENSG00000136770,protein_coding -31010,NIPSNAP3A,ENSG00000136783,protein_coding -31455,LRRC8A,ENSG00000136802,protein_coding -31387,CDK9,ENSG00000136807,protein_coding -31088,TXN,ENSG00000136810,protein_coding -31432,ODF2,ENSG00000136811,protein_coding -31102,ECPAS,ENSG00000136813,protein_coding -31494,TOR1B,ENSG00000136816,protein_coding -31496,C9orf78,ENSG00000136819,protein_coding -30994,SMC2,ENSG00000136824,protein_coding -31049,KLF4,ENSG00000136826,protein_coding -31495,TOR1A,ENSG00000136827,protein_coding -31365,RALGPS1,ENSG00000136828,protein_coding -31377,FAM129B,ENSG00000136830,protein_coding -31266,OR1J1,ENSG00000136834,protein_coding -31005,OR13C9,ENSG00000136839,protein_coding -31399,ST6GALNAC4,ENSG00000136840,protein_coding -30894,TMOD1,ENSG00000136842,protein_coding -31251,DAB2IP,ENSG00000136848,protein_coding -31378,STXBP1,ENSG00000136854,protein_coding -31371,SLC2A8,ENSG00000136856,protein_coding -31366,ANGPTL2,ENSG00000136859,protein_coding -31228,CDK5RAP2,ENSG00000136861,protein_coding -31140,ZFP37,ENSG00000136866,protein_coding -31149,SLC31A2,ENSG00000136867,protein_coding -31151,SLC31A1,ENSG00000136868,protein_coding -31215,TLR4,ENSG00000136869,protein_coding -30970,ZNF189,ENSG00000136870,protein_coding -30971,ALDOB,ENSG00000136872,protein_coding -30943,STX17,ENSG00000136874,protein_coding -31153,PRPF4,ENSG00000136875,protein_coding -31390,FPGS,ENSG00000136877,protein_coding -31497,USP20,ENSG00000136878,protein_coding -30967,BAAT,ENSG00000136881,protein_coding -31170,KIF12,ENSG00000136883,protein_coding -31180,ATP6V1G1,ENSG00000136888,protein_coding -30952,TEX10,ENSG00000136891,protein_coding -31368,GARNL3,ENSG00000136895,protein_coding -30969,MRPL50,ENSG00000136897,protein_coding -31402,DPM2,ENSG00000136908,protein_coding -31326,WDR38,ENSG00000136918,protein_coding -30896,TSTD2,ENSG00000136925,protein_coding -30918,GABBR2,ENSG00000136928,protein_coding -30907,HEMGN,ENSG00000136929,protein_coding -31314,PSMB7,ENSG00000136930,protein_coding -31316,NR5A1,ENSG00000136931,protein_coding -30904,TRMO,ENSG00000136932,protein_coding -31336,RABEPK,ENSG00000136933,protein_coding -31329,GOLGA1,ENSG00000136935,protein_coding -30899,XPA,ENSG00000136936,protein_coding -30897,NCBP1,ENSG00000136937,protein_coding -30908,ANP32B,ENSG00000136938,protein_coding -31278,OR1L4,ENSG00000136939,protein_coding -31282,PDCL,ENSG00000136940,protein_coding -31327,RPL35,ENSG00000136942,protein_coding -30873,CTSV,ENSG00000136943,protein_coding -31360,LMX1B,ENSG00000136944,protein_coding -31328,ARPC5L,ENSG00000136950,protein_coding -28995,ENPP2,ENSG00000136960,protein_coding -29000,DSCC1,ENSG00000136982,protein_coding -29034,DERL1,ENSG00000136986,protein_coding -29132,MYC,ENSG00000136997,protein_coding -28992,CCN3,ENSG00000136999,protein_coding -29576,IL33,ENSG00000137033,protein_coding -29603,DMAC1,ENSG00000137038,protein_coding -29573,RANBP6,ENSG00000137040,protein_coding -30079,POLR1E,ENSG00000137054,protein_coding -29820,PLAA,ENSG00000137055,protein_coding -29966,IL11RA,ENSG00000137070,protein_coding -29931,UBAP2,ENSG00000137073,protein_coding -29884,APTX,ENSG00000137074,protein_coding -30055,RNF38,ENSG00000137075,protein_coding -30019,TLN1,ENSG00000137076,protein_coding -29972,CCL21,ENSG00000137077,protein_coding -30011,SIT1,ENSG00000137078,protein_coding -29741,IFNA21,ENSG00000137080,protein_coding -29486,DMRT1,ENSG00000137090,protein_coding -29981,DNAJB5,ENSG00000137094,protein_coding -30029,SPAG8,ENSG00000137098,protein_coding -29961,DCTN3,ENSG00000137100,protein_coding -30008,CD72,ENSG00000137101,protein_coding -30032,TMEM8B,ENSG00000137103,protein_coding -30075,GRHPR,ENSG00000137106,protein_coding -30100,ALDH1B1,ENSG00000137124,protein_coding -30030,HINT2,ENSG00000137133,protein_coding -30015,ARHGEF39,ENSG00000137135,protein_coding -30101,IGFBPL1,ENSG00000137142,protein_coding -29707,DENND4C,ENSG00000137145,protein_coding -29711,RPS6,ENSG00000137154,protein_coding -19727,CNPY3,ENSG00000137161,protein_coding -19684,FOXP4,ENSG00000137166,protein_coding -19591,PPIL1,ENSG00000137168,protein_coding -19739,KLC4,ENSG00000137171,protein_coding -18804,KIF13A,ENSG00000137177,protein_coding -19072,ZSCAN9,ENSG00000137185,protein_coding -19599,PIM1,ENSG00000137193,protein_coding -18782,GMPR,ENSG00000137198,protein_coding -19607,CMTR1,ENSG00000137200,protein_coding -18673,TFAP2A,ENSG00000137203,protein_coding -19748,SLC22A7,ENSG00000137204,protein_coding -19764,YIPF3,ENSG00000137207,protein_coding -18694,TMEM14B,ENSG00000137210,protein_coding -19788,TMEM63B,ENSG00000137216,protein_coding -19692,FRS3,ENSG00000137218,protein_coding -19761,TJAP1,ENSG00000137221,protein_coding -19789,CAPN11,ENSG00000137225,protein_coding -19960,TINAG,ENSG00000137251,protein_coding -19970,HCRTR2,ENSG00000137252,protein_coding -18877,KIAA0319,ENSG00000137261,protein_coding -18495,IRF4,ENSG00000137265,protein_coding -18558,SLC22A23,ENSG00000137266,protein_coding -18553,TUBB2A,ENSG00000137267,protein_coding -19952,LRRC1,ENSG00000137269,protein_coding -19930,GCM1,ENSG00000137270,protein_coding -18514,FOXF2,ENSG00000137273,protein_coding -18551,BPHL,ENSG00000137274,protein_coding -18548,RIPK1,ENSG00000137275,protein_coding -18556,TUBB2B,ENSG00000137285,protein_coding -19493,UQCC2,ENSG00000137288,protein_coding -19507,HMGA1,ENSG00000137309,protein_coding -19275,TCF19,ENSG00000137310,protein_coding -19246,FLOT1,ENSG00000137312,protein_coding -19249,IER3,ENSG00000137331,protein_coding -19242,MDC1,ENSG00000137337,protein_coding -19077,PGBD1,ENSG00000137338,protein_coding -19235,ATAT1,ENSG00000137343,protein_coding -18807,TPMT,ENSG00000137364,protein_coding -19559,CLPS,ENSG00000137392,protein_coding -18815,RNF144B,ENSG00000137393,protein_coding -19240,NRM,ENSG00000137404,protein_coding -19595,MTCH1,ENSG00000137409,protein_coding -19259,VARS2,ENSG00000137411,protein_coding -19706,TAF8,ENSG00000137413,protein_coding -18798,FAM8A1,ENSG00000137414,protein_coding -18689,C6orf52,ENSG00000137434,protein_coding -13214,FGFBP1,ENSG00000137440,protein_coding -13215,FGFBP2,ENSG00000137441,protein_coding -13199,CPEB2,ENSG00000137449,protein_coding -14988,FHDC1,ENSG00000137460,protein_coding -15000,TLR2,ENSG00000137462,protein_coding -14774,MGARP,ENSG00000137463,protein_coding -14891,TTC29,ENSG00000137473,protein_coding -34023,MYO7A,ENSG00000137474,protein_coding -33884,FCHSD2,ENSG00000137478,protein_coding -33966,ARRB1,ENSG00000137486,protein_coding -33961,SLCO2B1,ENSG00000137491,protein_coding -34001,THAP12,ENSG00000137492,protein_coding -34108,ANKRD42,ENSG00000137494,protein_coding -33837,IL18BP,ENSG00000137496,protein_coding -33838,NUMA1,ENSG00000137497,protein_coding -34112,CCDC90B,ENSG00000137500,protein_coding -34135,SYTL2,ENSG00000137501,protein_coding -34095,RAB30,ENSG00000137502,protein_coding -34133,CREBZF,ENSG00000137504,protein_coding -34011,LRRC32,ENSG00000137507,protein_coding -34090,PRCP,ENSG00000137509,protein_coding -34059,NARS2,ENSG00000137513,protein_coding -33835,RNF121,ENSG00000137522,protein_coding -28050,MRPL15,ENSG00000137547,protein_coding -28377,PI15,ENSG00000137558,protein_coding -28183,TTPA,ENSG00000137561,protein_coding -28181,GGH,ENSG00000137563,protein_coding -28276,SLCO5A1,ENSG00000137571,protein_coding -28275,SULF1,ENSG00000137573,protein_coding -28072,TGS1,ENSG00000137574,protein_coding -28126,SDCBP,ENSG00000137575,protein_coding -15203,NEK1,ENSG00000137601,protein_coding -15186,DDX60,ENSG00000137628,protein_coding -34666,NXPE4,ENSG00000137634,protein_coding -34858,SORL1,ENSG00000137642,protein_coding -34735,TMPRSS4,ENSG00000137648,protein_coding -34693,BUD13,ENSG00000137656,protein_coding -34407,TRPC6,ENSG00000137672,protein_coding -34428,MMP7,ENSG00000137673,protein_coding -34429,MMP20,ENSG00000137674,protein_coding -34432,MMP27,ENSG00000137675,protein_coding -34413,CFAP300,ENSG00000137691,protein_coding -34447,DCUN1D5,ENSG00000137692,protein_coding -34417,YAP1,ENSG00000137693,protein_coding -34837,TRIM29,ENSG00000137699,protein_coding -34792,SLC37A4,ENSG00000137700,protein_coding -34567,BTG4,ENSG00000137707,protein_coding -34840,POU2F3,ENSG00000137709,protein_coding -34539,RDX,ENSG00000137710,protein_coding -34577,PPP2R1B,ENSG00000137713,protein_coding -34547,FDX1,ENSG00000137714,protein_coding -34584,C11orf1,ENSG00000137720,protein_coding -34730,FXYD6,ENSG00000137726,protein_coding -34548,ARHGAP20,ENSG00000137727,protein_coding -34726,FXYD2,ENSG00000137731,protein_coding -34444,MMP13,ENSG00000137745,protein_coding -34731,TMPRSS13,ENSG00000137747,protein_coding -34472,CASP1,ENSG00000137752,protein_coding -34471,CASP5,ENSG00000137757,protein_coding -34503,ALKBH8,ENSG00000137760,protein_coding -45538,MAP2K5,ENSG00000137764,protein_coding -45225,UNC13C,ENSG00000137766,protein_coding -45063,SQOR,ENSG00000137767,protein_coding -45001,CTDSPL2,ENSG00000137770,protein_coding -45321,SLTM,ENSG00000137776,protein_coding -44801,THBS1,ENSG00000137801,protein_coding -44902,MAPKBP1,ENSG00000137802,protein_coding -44887,NUSAP1,ENSG00000137804,protein_coding -44889,NDUFAF1,ENSG00000137806,protein_coding -45576,AC027237.1,ENSG00000137807,protein_coding -45553,ITGA11,ENSG00000137809,protein_coding -44852,KNL1,ENSG00000137812,protein_coding -44933,HAUS2,ENSG00000137814,protein_coding -44890,RTF1,ENSG00000137815,protein_coding -45627,PARP6,ENSG00000137817,protein_coding -45578,RPLP1,ENSG00000137818,protein_coding -45572,PAQR5,ENSG00000137819,protein_coding -45604,LRRC49,ENSG00000137821,protein_coding -44964,TUBGCP4,ENSG00000137822,protein_coding -44857,RMDN3,ENSG00000137824,protein_coding -44892,ITPKA,ENSG00000137825,protein_coding -45596,UACA,ENSG00000137831,protein_coding -45522,SMAD6,ENSG00000137834,protein_coding -44828,PLCB2,ENSG00000137841,protein_coding -44950,TMEM62,ENSG00000137842,protein_coding -44821,PAK6,ENSG00000137843,protein_coding -45305,ADAM10,ENSG00000137845,protein_coding -45032,DUOX1,ENSG00000137857,protein_coding -45042,SLC28A2,ENSG00000137860,protein_coding -45686,STRA6,ENSG00000137868,protein_coding -45163,CYP19A1,ENSG00000137869,protein_coding -45263,ZNF280D,ENSG00000137871,protein_coding -45077,SEMA6D,ENSG00000137872,protein_coding -45198,BCL2L10,ENSG00000137875,protein_coding -45230,RSL24D1,ENSG00000137876,protein_coding -44907,SPTBN5,ENSG00000137877,protein_coding -45285,GCOM1,ENSG00000137878,protein_coding -44859,GCHFR,ENSG00000137880,protein_coding -2321,BCAR3,ENSG00000137936,protein_coding -2132,TTLL7,ENSG00000137941,protein_coding -2319,FNBP1L,ENSG00000137942,protein_coding -2218,KYAT3,ENSG00000137944,protein_coding -2216,GTF2B,ENSG00000137947,protein_coding -2275,BRDT,ENSG00000137948,protein_coding -2039,RABGGTB,ENSG00000137955,protein_coding -2090,IFI44L,ENSG00000137959,protein_coding -2078,GIPC2,ENSG00000137960,protein_coding -2338,ARHGAP29,ENSG00000137962,protein_coding -2091,IFI44,ENSG00000137965,protein_coding -2032,SLC44A5,ENSG00000137968,protein_coding -2184,CLCA2,ENSG00000137975,protein_coding -2144,DNASE2B,ENSG00000137976,protein_coding -2422,DBT,ENSG00000137992,protein_coding -2428,RTCA,ENSG00000137996,protein_coding -5933,IFT172,ENSG00000138002,protein_coding -5885,SELENOI,ENSG00000138018,protein_coding -5909,CGREF1,ENSG00000138028,protein_coding -5882,HADHB,ENSG00000138029,protein_coding -5908,KHK,ENSG00000138030,protein_coding -5846,ADCY3,ENSG00000138031,protein_coding -6203,PPM1B,ENSG00000138032,protein_coding -6377,PNPT1,ENSG00000138035,protein_coding -6193,DYNC2LI1,ENSG00000138036,protein_coding -6296,LHCGR,ENSG00000138039,protein_coding -6140,THUMPD2,ENSG00000138050,protein_coding -6098,CYP1B1,ENSG00000138061,protein_coding -6078,SULT6B1,ENSG00000138068,protein_coding -6537,RAB1A,ENSG00000138069,protein_coding -6541,ACTR2,ENSG00000138071,protein_coding -5911,PREB,ENSG00000138073,protein_coding -5915,SLC5A6,ENSG00000138074,protein_coding -6194,ABCG5,ENSG00000138075,protein_coding -6207,PREPL,ENSG00000138078,protein_coding -6206,SLC3A1,ENSG00000138079,protein_coding -5907,EMILIN1,ENSG00000138080,protein_coding -6278,FBXO11,ENSG00000138081,protein_coding -6219,SIX3,ENSG00000138083,protein_coding -5916,ATRAID,ENSG00000138085,protein_coding -5845,CENPO,ENSG00000138092,protein_coding -6196,LRPPRC,ENSG00000138095,protein_coding -5920,TRIM54,ENSG00000138100,protein_coding -5862,DTNB,ENSG00000138101,protein_coding -36975,ACTR1A,ENSG00000138107,protein_coding -36784,CYP2C9,ENSG00000138109,protein_coding -36974,MFSD13A,ENSG00000138111,protein_coding -36789,CYP2C8,ENSG00000138115,protein_coding -36752,MYOF,ENSG00000138119,protein_coding -36862,LOXL4,ENSG00000138131,protein_coding -36658,STAMBPL1,ENSG00000138134,protein_coding -36668,CH25H,ENSG00000138135,protein_coding -36934,LBX1,ENSG00000138136,protein_coding -36635,ATAD1,ENSG00000138138,protein_coding -37269,BTBD16,ENSG00000138152,protein_coding -36736,KIF11,ENSG00000138160,protein_coding -37281,CUZD1,ENSG00000138161,protein_coding -37266,TACC2,ENSG00000138162,protein_coding -37082,DUSP5,ENSG00000138166,protein_coding -37011,CALHM2,ENSG00000138172,protein_coding -36982,ARL3,ENSG00000138175,protein_coding -36753,CEP55,ENSG00000138180,protein_coding -36686,KIF20B,ENSG00000138182,protein_coding -36800,ENTPD1,ENSG00000138185,protein_coding -36742,EXOC6,ENSG00000138190,protein_coding -36765,PLCE1,ENSG00000138193,protein_coding -36756,RBP4,ENSG00000138207,protein_coding -11798,DBR1,ENSG00000138231,protein_coding -11712,DNAJC13,ENSG00000138246,protein_coding -12028,GPR87,ENSG00000138271,protein_coding -36355,ANXA7,ENSG00000138279,protein_coding -36345,FAM149B1,ENSG00000138286,protein_coding -36309,ASCC1,ENSG00000138303,protein_coding -36334,PLA2G12B,ENSG00000138308,protein_coding -36154,ZNF365,ENSG00000138311,protein_coding -36332,OIT3,ENSG00000138315,protein_coding -36286,ADAMTS14,ENSG00000138316,protein_coding -36462,RPS24,ENSG00000138326,protein_coding -36225,TET1,ENSG00000138336,protein_coding -36218,DNA2,ENSG00000138346,protein_coding -36210,MYPN,ENSG00000138347,protein_coding -8790,AOX1,ENSG00000138356,protein_coding -9069,ATIC,ENSG00000138363,protein_coding -9095,SMARCAL1,ENSG00000138375,protein_coding -9061,BARD1,ENSG00000138376,protein_coding -8683,STAT4,ENSG00000138378,protein_coding -8666,MSTN,ENSG00000138379,protein_coding -8889,CARF,ENSG00000138380,protein_coding -8653,ASNSD1,ENSG00000138381,protein_coding -8349,METTL5,ENSG00000138382,protein_coding -8348,SSB,ENSG00000138385,protein_coding -8675,NAB1,ENSG00000138386,protein_coding -8848,CDK15,ENSG00000138395,protein_coding -8340,PPIG,ENSG00000138398,protein_coding -8338,FASTKD1,ENSG00000138399,protein_coding -8956,MDH1B,ENSG00000138400,protein_coding -8732,HECW2,ENSG00000138411,protein_coding -9002,IDH1,ENSG00000138413,protein_coding -8419,OLA1,ENSG00000138430,protein_coding -8428,CIR1,ENSG00000138433,protein_coding -8574,ITPRID2,ENSG00000138434,protein_coding -8439,CHRNA1,ENSG00000138435,protein_coding -8884,FAM117B,ENSG00000138439,protein_coding -8888,WDR12,ENSG00000138442,protein_coding -8901,ABI2,ENSG00000138443,protein_coding -8621,ITGAV,ENSG00000138448,protein_coding -8651,SLC40A1,ENSG00000138449,protein_coding -11297,SLC35A5,ENSG00000138459,protein_coding -11475,SLC49A4,ENSG00000138463,protein_coding -11157,SENP7,ENSG00000138468,protein_coding -11240,GUCA1C,ENSG00000138472,protein_coding -11220,CCDC54,ENSG00000138483,protein_coding -11402,COX17,ENSG00000138495,protein_coding -11469,PARP9,ENSG00000138496,protein_coding -45260,MNS1,ENSG00000138587,protein_coding -45144,USP8,ENSG00000138592,protein_coding -45112,SECISBP2L,ENSG00000138593,protein_coding -45181,TMOD3,ENSG00000138594,protein_coding -45153,SPPL2A,ENSG00000138600,protein_coding -45569,GLCE,ENSG00000138604,protein_coding -45034,SHF,ENSG00000138606,protein_coding -45414,APH1B,ENSG00000138613,protein_coding -45487,INTS14,ENSG00000138614,protein_coding -45471,CILP,ENSG00000138615,protein_coding -45474,PARP16,ENSG00000138617,protein_coding -45727,PPCDC,ENSG00000138621,protein_coding -45660,HCN4,ENSG00000138622,protein_coding -45696,SEMA7A,ENSG00000138623,protein_coding -45699,UBL7,ENSG00000138629,protein_coding -14130,ARHGAP24,ENSG00000138639,protein_coding -14198,FAM13A,ENSG00000138640,protein_coding -14192,HERC3,ENSG00000138641,protein_coding -14183,HERC6,ENSG00000138642,protein_coding -14184,HERC5,ENSG00000138646,protein_coding -14710,PCDH10,ENSG00000138650,protein_coding -14509,NDST4,ENSG00000138653,protein_coding -14473,ZGRF1,ENSG00000138658,protein_coding -14464,AP1AR,ENSG00000138660,protein_coding -14097,COPS4,ENSG00000138663,protein_coding -14074,HNRNPD,ENSG00000138668,protein_coding -14061,PRKG2,ENSG00000138669,protein_coding -14064,RASGEF1B,ENSG00000138670,protein_coding -14091,SEC31A,ENSG00000138674,protein_coding -14057,FGF5,ENSG00000138675,protein_coding -14112,GPAT3,ENSG00000138678,protein_coding -14607,IL21,ENSG00000138684,protein_coding -14613,FGF2,ENSG00000138685,protein_coding -14599,BBS7,ENSG00000138686,protein_coding -14604,KIAA1109,ENSG00000138688,protein_coding -14242,BMPR1B,ENSG00000138696,protein_coding -14262,RAP1GDS1,ENSG00000138698,protein_coding -14662,LARP1B,ENSG00000138709,protein_coding -14209,MMRN1,ENSG00000138722,protein_coding -14568,PDE5A,ENSG00000138735,protein_coding -14581,PRDM5,ENSG00000138738,protein_coding -14601,TRPC3,ENSG00000138741,protein_coding -13978,NAAA,ENSG00000138744,protein_coding -13986,NUP54,ENSG00000138750,protein_coding -13981,CXCL9,ENSG00000138755,protein_coding -14039,BMP2K,ENSG00000138756,protein_coding -13972,G3BP2,ENSG00000138757,protein_coding -14008,SEPT11,ENSG00000138758,protein_coding -14029,FRAS1,ENSG00000138759,protein_coding -13988,SCARB2,ENSG00000138760,protein_coding -14014,CCNG2,ENSG00000138764,protein_coding -14021,CNOT6L,ENSG00000138767,protein_coding -13974,USO1,ENSG00000138768,protein_coding -13971,CDKL2,ENSG00000138769,protein_coding -13998,SHROOM3,ENSG00000138771,protein_coding -14032,ANXA3,ENSG00000138772,protein_coding -14366,PPA2,ENSG00000138777,protein_coding -14342,CENPE,ENSG00000138778,protein_coding -14376,GSTCD,ENSG00000138780,protein_coding -14375,INTS12,ENSG00000138785,protein_coding -14441,ENPEP,ENSG00000138792,protein_coding -14422,CASP6,ENSG00000138794,protein_coding -14398,LEF1,ENSG00000138795,protein_coding -14397,HADH,ENSG00000138796,protein_coding -14432,EGF,ENSG00000138798,protein_coding -14391,PAPSS1,ENSG00000138801,protein_coding -14415,SEC24B,ENSG00000138802,protein_coding -14290,C4orf17,ENSG00000138813,protein_coding -14311,PPP3CA,ENSG00000138814,protein_coding -14320,SLC39A8,ENSG00000138821,protein_coding -14292,MTTP,ENSG00000138823,protein_coding -17329,FBN2,ENSG00000138829,protein_coding -46581,MAPK8IP3,ENSG00000138834,protein_coding -31161,RGS3,ENSG00000138835,protein_coding -58816,GUCD1,ENSG00000138867,protein_coding -59599,TTLL8,ENSG00000138892,protein_coding -59043,RNF185,ENSG00000138942,protein_coding -59478,SHISAL1,ENSG00000138944,protein_coding -59475,PARVG,ENSG00000138964,protein_coding -37498,B4GALNT3,ENSG00000139044,protein_coding -38007,AC020613.1,ENSG00000139053,protein_coding -38005,ERP27,ENSG00000139055,protein_coding -37927,ETV6,ENSG00000139083,protein_coding -37862,GABARAPL1,ENSG00000139112,protein_coding -38356,KIF21A,ENSG00000139116,protein_coding -38350,CPNE8,ENSG00000139117,protein_coding -38309,YARS2,ENSG00000139131,protein_coding -38302,FGD4,ENSG00000139132,protein_coding -38327,ALG10,ENSG00000139133,protein_coding -38044,PIK3C2G,ENSG00000139144,protein_coding -38258,SINHCAF,ENSG00000139146,protein_coding -38048,PLCZ1,ENSG00000139151,protein_coding -38058,AEBP2,ENSG00000139154,protein_coding -38075,SLCO1C1,ENSG00000139155,protein_coding -38275,ETFBKMT,ENSG00000139160,protein_coding -38111,ETNK1,ENSG00000139163,protein_coding -38391,ZCRB1,ENSG00000139168,protein_coding -38421,TMEM117,ENSG00000139173,protein_coding -38398,PRICKLE1,ENSG00000139174,protein_coding -37701,C1RL,ENSG00000139178,protein_coding -37600,NDUFA9,ENSG00000139180,protein_coding -37706,CLSTN3,ENSG00000139182,protein_coding -37801,KLRG1,ENSG00000139187,protein_coding -37640,VAMP1,ENSG00000139190,protein_coding -37639,TAPBPL,ENSG00000139192,protein_coding -37638,CD27,ENSG00000139193,protein_coding -37703,RBP5,ENSG00000139194,protein_coding -37709,PEX5,ENSG00000139197,protein_coding -37662,PIANP,ENSG00000139200,protein_coding -38460,SLC38A4,ENSG00000139209,protein_coding -38462,AMIGO2,ENSG00000139211,protein_coding -38447,SCAF11,ENSG00000139218,protein_coding -38497,COL2A1,ENSG00000139219,protein_coding -39416,PPFIA2,ENSG00000139220,protein_coding -38529,ANP32D,ENSG00000139223,protein_coding -39179,LLPH,ENSG00000139233,protein_coding -39055,LRIG3,ENSG00000139263,protein_coding -39028,MARCH9,ENSG00000139266,protein_coding -39001,INHBE,ENSG00000139269,protein_coding -39333,GLIPR1,ENSG00000139278,protein_coding -39298,TPH2,ENSG00000139287,protein_coding -39346,PHLDA1,ENSG00000139289,protein_coding -39291,TMEM19,ENSG00000139291,protein_coding -39284,LGR5,ENSG00000139292,protein_coding -39403,PTPRQ,ENSG00000139304,protein_coding -39479,DUSP6,ENSG00000139318,protein_coding -39484,POC1B,ENSG00000139323,protein_coding -39469,TMTC3,ENSG00000139324,protein_coding -39509,LUM,ENSG00000139329,protein_coding -39508,KERA,ENSG00000139330,protein_coding -39608,SNRPF,ENSG00000139343,protein_coding -39611,AMDHD1,ENSG00000139344,protein_coding -39629,NEDD1,ENSG00000139350,protein_coding -39714,SYCP3,ENSG00000139351,protein_coding -39742,ASCL1,ENSG00000139352,protein_coding -39691,GAS2L3,ENSG00000139354,protein_coding -40308,TMEM132B,ENSG00000139364,protein_coding -40377,SLC15A4,ENSG00000139370,protein_coding -39763,TDG,ENSG00000139372,protein_coding -39974,RITA1,ENSG00000139405,protein_coding -39983,SDSL,ENSG00000139410,protein_coding -39871,MMAB,ENSG00000139428,protein_coding -39879,GLTP,ENSG00000139433,protein_coding -39884,GIT2,ENSG00000139436,protein_coding -39883,TCHP,ENSG00000139437,protein_coding -39875,FAM222A,ENSG00000139438,protein_coding -39865,FOXN4,ENSG00000139445,protein_coding -40738,NUP58,ENSG00000139496,protein_coding -40736,MTMR6,ENSG00000139505,protein_coding -40806,SLC46A3,ENSG00000139508,protein_coding -40816,SLC7A1,ENSG00000139514,protein_coding -40788,PDX1,ENSG00000139515,protein_coding -40781,LNX2,ENSG00000139517,protein_coding -38916,SUOX,ENSG00000139531,protein_coding -38557,CCDC65,ENSG00000139537,protein_coding -38937,SLC39A5,ENSG00000139540,protein_coding -38777,TARBP2,ENSG00000139546,protein_coding -38978,RDH16,ENSG00000139547,protein_coding -38570,DHH,ENSG00000139549,protein_coding -38678,ACVRL1,ENSG00000139567,protein_coding -38837,GPR84,ENSG00000139572,protein_coding -38779,NPFF,ENSG00000139574,protein_coding -38936,NABP2,ENSG00000139579,protein_coding -40869,N4BP2L1,ENSG00000139597,protein_coding -38664,CELA1,ENSG00000139610,protein_coding -38934,SMARCC2,ENSG00000139613,protein_coding -40867,BRCA2,ENSG00000139618,protein_coding -38538,KANSL2,ENSG00000139620,protein_coding -38622,CERS5,ENSG00000139624,protein_coding -38775,MAP3K12,ENSG00000139625,protein_coding -38758,ITGB7,ENSG00000139626,protein_coding -38665,GALNT6,ENSG00000139629,protein_coding -38755,CSAD,ENSG00000139631,protein_coding -38572,LMBR1L,ENSG00000139636,protein_coding -38765,C12orf10,ENSG00000139637,protein_coding -38926,ESYT1,ENSG00000139641,protein_coding -38600,TMBIM6,ENSG00000139644,protein_coding -38938,ANKRD52,ENSG00000139645,protein_coding -38716,KRT71,ENSG00000139648,protein_coding -38757,ZNF740,ENSG00000139651,protein_coding -41046,SMIM2,ENSG00000139656,protein_coding -41210,WDFY2,ENSG00000139668,protein_coding -41244,HNRNPA1L2,ENSG00000139675,protein_coding -41137,LPAR6,ENSG00000139679,protein_coding -41114,ESD,ENSG00000139684,protein_coding -41133,RB1,ENSG00000139687,protein_coding -40238,SBNO1,ENSG00000139697,protein_coding -40174,MORN3,ENSG00000139714,protein_coding -40181,SETD1B,ENSG00000139718,protein_coding -40201,VPS33A,ENSG00000139719,protein_coding -40221,VPS37B,ENSG00000139722,protein_coding -40177,RHOF,ENSG00000139725,protein_coding -40217,DENR,ENSG00000139726,protein_coding -41299,DIAPH3,ENSG00000139734,protein_coding -41467,SLAIN1,ENSG00000139737,protein_coding -41490,RBM26,ENSG00000139746,protein_coding -40080,SRRM4,ENSG00000139767,protein_coding -41753,METTL21C,ENSG00000139780,protein_coding -41654,MBNL2,ENSG00000139793,protein_coding -41669,RNF113B,ENSG00000139797,protein_coding -41714,ZIC5,ENSG00000139800,protein_coding -41805,ABHD13,ENSG00000139826,protein_coding -41837,RAB20,ENSG00000139832,protein_coding -41896,GRTP1,ENSG00000139835,protein_coding -41892,CUL4A,ENSG00000139842,protein_coding -42601,TTC6,ENSG00000139865,protein_coding -42608,SSTR1,ENSG00000139874,protein_coding -42301,CDH24,ENSG00000139880,protein_coding -42286,REM2,ENSG00000139890,protein_coding -42397,CBLN3,ENSG00000139899,protein_coding -42374,TSSK4,ENSG00000139908,protein_coding -42418,NOVA1,ENSG00000139910,protein_coding -42356,FITM1,ENSG00000139914,protein_coding -42692,MDGA2,ENSG00000139915,protein_coding -42778,TMX1,ENSG00000139921,protein_coding -42787,FRMD6,ENSG00000139926,protein_coding -42880,PELI2,ENSG00000139946,protein_coding -42945,RTN1,ENSG00000139970,protein_coding -42911,ARMH4,ENSG00000139971,protein_coding -42991,SYT16,ENSG00000139973,protein_coding -42977,SLC38A6,ENSG00000139974,protein_coding -42904,NAA30,ENSG00000139977,protein_coding -43169,ADAM21,ENSG00000139985,protein_coding -43106,RDH12,ENSG00000139988,protein_coding -43135,DCAF5,ENSG00000139990,protein_coding -43051,RAB15,ENSG00000139998,protein_coding -43011,WDR89,ENSG00000140006,protein_coding -43026,ESR2,ENSG00000140009,protein_coding -43000,KCNH5,ENSG00000140015,protein_coding -43423,STON2,ENSG00000140022,protein_coding -43510,EFCAB11,ENSG00000140025,protein_coding -43474,GPR65,ENSG00000140030,protein_coding -43253,PTGR2,ENSG00000140043,protein_coding -43316,JDP2,ENSG00000140044,protein_coding -43664,AK7,ENSG00000140057,protein_coding -43591,FAM181A,ENSG00000140067,protein_coding -43564,SLC24A4,ENSG00000140090,protein_coding -43555,FBLN5,ENSG00000140092,protein_coding -43602,SERPINA10,ENSG00000140093,protein_coding -43989,CLBA1,ENSG00000140104,protein_coding -43734,WARS,ENSG00000140105,protein_coding -43732,SLC25A47,ENSG00000140107,protein_coding -43887,WDR20,ENSG00000140153,protein_coding -44363,NIPA2,ENSG00000140157,protein_coding -44711,SLC12A6,ENSG00000140199,protein_coding -45031,DUOXA1,ENSG00000140254,protein_coding -44990,MFAP1,ENSG00000140259,protein_coding -45272,TCF12,ENSG00000140262,protein_coding -45025,SORD,ENSG00000140263,protein_coding -44986,SERF2,ENSG00000140264,protein_coding -44963,ZSCAN29,ENSG00000140265,protein_coding -45030,DUOXA2,ENSG00000140274,protein_coding -45029,DUOX2,ENSG00000140279,protein_coding -45178,LYSMD2,ENSG00000140280,protein_coding -45133,SLC27A2,ENSG00000140284,protein_coding -45127,FGF7,ENSG00000140285,protein_coding -45135,HDC,ENSG00000140287,protein_coding -45342,GCNT3,ENSG00000140297,protein_coding -45345,BNIP2,ENSG00000140299,protein_coding -45343,GTF2A2,ENSG00000140307,protein_coding -44813,SRP14,ENSG00000140319,protein_coding -44842,BAHD1,ENSG00000140320,protein_coding -44838,DISP2,ENSG00000140323,protein_coding -44938,CDAN1,ENSG00000140326,protein_coding -45585,TLE3,ENSG00000140332,protein_coding -45557,ANP32A,ENSG00000140350,protein_coding -45744,COMMD4,ENSG00000140365,protein_coding -45777,UBE2Q2,ENSG00000140367,protein_coding -45798,PSTPIP1,ENSG00000140368,protein_coding -45784,ETFA,ENSG00000140374,protein_coding -45886,BCL2A1,ENSG00000140379,protein_coding -45806,HMG20A,ENSG00000140382,protein_coding -45791,SCAPER,ENSG00000140386,protein_coding -45799,TSPAN3,ENSG00000140391,protein_coding -45832,WDR61,ENSG00000140395,protein_coding -28291,NCOA2,ENSG00000140396,protein_coding -45746,NEIL1,ENSG00000140398,protein_coding -45748,MAN2C1,ENSG00000140400,protein_coding -45831,DNAJA4,ENSG00000140403,protein_coding -45915,TLNRD1,ENSG00000140406,protein_coding -45407,TPM1,ENSG00000140416,protein_coding -46336,IGF1R,ENSG00000140443,protein_coding -46321,ARRDC4,ENSG00000140450,protein_coding -45452,PIF1,ENSG00000140451,protein_coding -45419,USP3,ENSG00000140455,protein_coding -45691,CYP11A1,ENSG00000140459,protein_coding -45651,BBS4,ENSG00000140463,protein_coding -45675,PML,ENSG00000140464,protein_coding -45709,CYP1A1,ENSG00000140465,protein_coding -46368,ADAMTS17,ENSG00000140470,protein_coding -46379,LINS1,ENSG00000140471,protein_coding -45716,ULK3,ENSG00000140474,protein_coding -45740,GOLGA6D,ENSG00000140478,protein_coding -46404,PCSK6,ENSG00000140479,protein_coding -45687,CCDC33,ENSG00000140481,protein_coding -45630,CELF6,ENSG00000140488,protein_coding -45718,SCAMP2,ENSG00000140497,protein_coding -45710,CYP1A2,ENSG00000140505,protein_coding -45713,LMAN1L,ENSG00000140506,protein_coding -46097,HAPLN3,ENSG00000140511,protein_coding -46118,RHCG,ENSG00000140519,protein_coding -46111,POLG,ENSG00000140521,protein_coding -46109,RLBP1,ENSG00000140522,protein_coding -46110,FANCI,ENSG00000140525,protein_coding -46105,ABHD2,ENSG00000140526,protein_coding -46128,WDR93,ENSG00000140527,protein_coding -46122,TICRR,ENSG00000140534,protein_coding -46080,NTRK3,ENSG00000140538,protein_coding -46087,DET1,ENSG00000140543,protein_coding -46098,MFGE8,ENSG00000140545,protein_coding -46145,ZNF710,ENSG00000140548,protein_coding -46192,UNC45A,ENSG00000140553,protein_coding -46218,ST8SIA2,ENSG00000140557,protein_coding -46262,MCTP2,ENSG00000140563,protein_coding -46187,FURIN,ENSG00000140564,protein_coding -46172,IQGAP1,ENSG00000140575,protein_coding -46174,CRTC3,ENSG00000140577,protein_coding -45935,EFL1,ENSG00000140598,protein_coding -45986,SH3GL3,ENSG00000140600,protein_coding -46026,SEC11A,ENSG00000140612,protein_coding -46815,SEPT12,ENSG00000140623,protein_coding -46820,GLYR1,ENSG00000140632,protein_coding -46874,PMM2,ENSG00000140650,protein_coding -47620,SLC5A2,ENSG00000140675,protein_coding -47608,ITGAX,ENSG00000140678,protein_coding -47619,TGFB1I1,ENSG00000140682,protein_coding -47622,C16orf58,ENSG00000140688,protein_coding -47617,ARMC5,ENSG00000140691,protein_coding -47009,PARN,ENSG00000140694,protein_coding -48030,FTO,ENSG00000140718,protein_coding -47236,UQCRC2,ENSG00000140740,protein_coding -47249,AC092338.1,ENSG00000140743,protein_coding -47225,IGSF6,ENSG00000140749,protein_coding -47308,ARHGAP17,ENSG00000140750,protein_coding -47863,MYLK3,ENSG00000140795,protein_coding -47894,ABCC12,ENSG00000140798,protein_coding -47956,NKD1,ENSG00000140807,protein_coding -48527,DHX38,ENSG00000140829,protein_coding -48523,TXNL4B,ENSG00000140830,protein_coding -48502,MARVELD3,ENSG00000140832,protein_coding -48496,CHST4,ENSG00000140835,protein_coding -48538,ZFHX3,ENSG00000140836,protein_coding -48569,CLEC18B,ENSG00000140839,protein_coding -48122,CPNE2,ENSG00000140848,protein_coding -48118,NLRC5,ENSG00000140853,protein_coding -48151,KATNB1,ENSG00000140854,protein_coding -48152,KIFC3,ENSG00000140859,protein_coding -48643,ADAMTS18,ENSG00000140873,protein_coding -48647,AC092724.1,ENSG00000140876,protein_coding -48715,GCSH,ENSG00000140905,protein_coding -48265,CMTM3,ENSG00000140931,protein_coding -48263,CMTM2,ENSG00000140932,protein_coding -48237,CDH11,ENSG00000140937,protein_coding -48294,AC074143.1,ENSG00000140939,protein_coding -48877,MAP1LC3B,ENSG00000140941,protein_coding -48769,MBTPS1,ENSG00000140943,protein_coding -48747,CDH13,ENSG00000140945,protein_coding -48879,ZCCHC14,ENSG00000140948,protein_coding -48786,MEAK7,ENSG00000140950,protein_coding -48776,ADAD2,ENSG00000140955,protein_coding -48764,OSGIN1,ENSG00000140961,protein_coding -48827,IRF8,ENSG00000140968,protein_coding -46492,RHOT2,ENSG00000140983,protein_coding -46600,RPL3L,ENSG00000140986,protein_coding -46755,ZSCAN32,ENSG00000140987,protein_coding -46602,RPS2,ENSG00000140988,protein_coding -46601,NDUFB10,ENSG00000140990,protein_coding -46673,PDPK1,ENSG00000140992,protein_coding -46746,TIGD7,ENSG00000140993,protein_coding -48977,DEF8,ENSG00000140995,protein_coding -48971,TCF25,ENSG00000141002,protein_coding -48928,GALNS,ENSG00000141012,protein_coding -48981,GAS8,ENSG00000141013,protein_coding -49651,MED9,ENSG00000141026,protein_coding -49579,NCOR1,ENSG00000141027,protein_coding -49645,COPS3,ENSG00000141030,protein_coding -49676,GID4,ENSG00000141034,protein_coding -49602,ZNF287,ENSG00000141040,protein_coding -49498,MYOCD,ENSG00000141052,protein_coding -49940,KSR1,ENSG00000141068,protein_coding -48410,UTP4,ENSG00000141076,protein_coding -48334,RANBP10,ENSG00000141084,protein_coding -48345,CTRL,ENSG00000141086,protein_coding -48350,DPEP3,ENSG00000141096,protein_coding -48333,GFOD2,ENSG00000141098,protein_coding -48435,NOB1,ENSG00000141101,protein_coding -49727,PRPSAP2,ENSG00000141127,protein_coding -50285,UNC45B,ENSG00000141161,protein_coding -51160,PCTP,ENSG00000141179,protein_coding -51212,OR4D1,ENSG00000141194,protein_coding -51146,TOM1L1,ENSG00000141198,protein_coding -51139,KIF2B,ENSG00000141200,protein_coding -51612,C17orf80,ENSG00000141219,protein_coding -51108,TOB1,ENSG00000141232,protein_coding -49007,VPS53,ENSG00000141252,protein_coding -49132,SPATA22,ENSG00000141255,protein_coding -49087,SGSM2,ENSG00000141258,protein_coding -50925,NPEPPS,ENSG00000141279,protein_coding -50958,SKAP1,ENSG00000141293,protein_coding -50934,LRRC46,ENSG00000141294,protein_coding -50935,SCRN2,ENSG00000141295,protein_coding -50078,SSH2,ENSG00000141298,protein_coding -50203,RHBDL3,ENSG00000141314,protein_coding -50233,SPACA3,ENSG00000141316,protein_coding -51544,ARSG,ENSG00000141337,protein_coding -51555,ABCA8,ENSG00000141338,protein_coding -50776,G6PC3,ENSG00000141349,protein_coding -51274,CLTC,ENSG00000141367,protein_coding -51310,C17orf64,ENSG00000141371,protein_coding -51320,BCAS3,ENSG00000141376,protein_coding -51275,PTRH2,ENSG00000141378,protein_coding -52532,AC091021.1,ENSG00000141380,protein_coding -52537,TAF4B,ENSG00000141384,protein_coding -52332,AFG3L2,ENSG00000141385,protein_coding -52334,PRELID3A,ENSG00000141391,protein_coding -52307,IMPA2,ENSG00000141401,protein_coding -52296,GNAL,ENSG00000141404,protein_coding -52679,SLC39A6,ENSG00000141424,protein_coding -52678,RPRD1A,ENSG00000141425,protein_coding -52677,C18orf21,ENSG00000141428,protein_coding -52668,GALNT1,ENSG00000141429,protein_coding -52640,ASXL3,ENSG00000141431,protein_coding -52087,ADCYAP1,ENSG00000141433,protein_coding -52622,MEP1B,ENSG00000141434,protein_coding -52603,SLC25A52,ENSG00000141437,protein_coding -52620,GAREM1,ENSG00000141441,protein_coding -52441,ESCO1,ENSG00000141446,protein_coding -52504,OSBPL1A,ENSG00000141447,protein_coding -52464,GATA6,ENSG00000141448,protein_coding -52437,GREB1L,ENSG00000141449,protein_coding -52488,RMC1,ENSG00000141452,protein_coding -49180,PELP1,ENSG00000141456,protein_coding -52489,NPC1,ENSG00000141458,protein_coding -52753,SLC14A1,ENSG00000141469,protein_coding -49184,ARRB2,ENSG00000141480,protein_coding -49266,SLC13A5,ENSG00000141485,protein_coding -49188,ZMYND15,ENSG00000141497,protein_coding -49360,WRAP53,ENSG00000141499,protein_coding -49196,MINK1,ENSG00000141503,protein_coding -49357,SAT2,ENSG00000141504,protein_coding -49293,ASGR1,ENSG00000141505,protein_coding -49425,PIK3R5,ENSG00000141506,protein_coding -49359,TP53,ENSG00000141510,protein_coding -51876,CCDC40,ENSG00000141519,protein_coding -51966,ARHGDIA,ENSG00000141522,protein_coding -51817,TMC6,ENSG00000141524,protein_coding -51999,SLC16A3,ENSG00000141526,protein_coding -51882,CARD14,ENSG00000141527,protein_coding -51630,TTYH2,ENSG00000141540,protein_coding -52032,RAB40B,ENSG00000141542,protein_coding -51879,EIF4A3,ENSG00000141543,protein_coding -52001,CSNK1D,ENSG00000141551,protein_coding -51971,ANAPC11,ENSG00000141552,protein_coding -52039,TBCD,ENSG00000141556,protein_coding -52035,FN3KRP,ENSG00000141560,protein_coding -52021,NARF,ENSG00000141562,protein_coding -51899,RPTOR,ENSG00000141564,protein_coding -52025,FOXK2,ENSG00000141568,protein_coding -51718,TRIM65,ENSG00000141569,protein_coding -51865,CBX8,ENSG00000141570,protein_coding -52008,SECTM1,ENSG00000141574,protein_coding -51737,RNF157,ENSG00000141576,protein_coding -51921,CEP131,ENSG00000141577,protein_coding -52041,ZNF750,ENSG00000141579,protein_coding -52030,WDR45B,ENSG00000141580,protein_coding -51868,CBX4,ENSG00000141582,protein_coding -52765,RNF165,ENSG00000141622,protein_coding -52822,DYM,ENSG00000141627,protein_coding -52865,MAPK4,ENSG00000141639,protein_coding -52873,ELAC1,ENSG00000141642,protein_coding -52858,MBD1,ENSG00000141644,protein_coding -52875,SMAD4,ENSG00000141646,protein_coding -53038,TNFRSF11A,ENSG00000141655,protein_coding -53044,ZCCHC2,ENSG00000141664,protein_coding -53164,FBXO15,ENSG00000141665,protein_coding -53150,CBLN2,ENSG00000141668,protein_coding -53002,PMAIP1,ENSG00000141682,protein_coding -50630,P3H4,ENSG00000141696,protein_coding -50632,NT5C3B,ENSG00000141698,protein_coding -50677,RETREG3,ENSG00000141699,protein_coding -50488,ERBB2,ENSG00000141736,protein_coding -50491,GRB7,ENSG00000141738,protein_coding -50490,MIEN1,ENSG00000141741,protein_coding -50486,PNMT,ENSG00000141744,protein_coding -50468,ARL5C,ENSG00000141748,protein_coding -50473,STAC2,ENSG00000141750,protein_coding -50529,IGFBP4,ENSG00000141753,protein_coding -50631,FKBP10,ENSG00000141756,protein_coding -53284,TXNL4A,ENSG00000141759,protein_coding -55510,CACNA1A,ENSG00000141837,protein_coding -55536,MISP3,ENSG00000141854,protein_coding -55539,SAMD1,ENSG00000141858,protein_coding -55597,BRD4,ENSG00000141867,protein_coding -54930,SLC39A3,ENSG00000141873,protein_coding -54958,NFIC,ENSG00000141905,protein_coding -54783,TPGS1,ENSG00000141933,protein_coding -54771,PLPP2,ENSG00000141934,protein_coding -57612,ZIM3,ENSG00000141946,protein_coding -60330,PRDM15,ENSG00000141956,protein_coding -60409,PFKL,ENSG00000141959,protein_coding -55042,FEM1A,ENSG00000141965,protein_coding -55126,VAV1,ENSG00000141968,protein_coding -55721,MVB12A,ENSG00000141971,protein_coding -55651,CIB3,ENSG00000141977,protein_coding -55665,AC008764.1,ENSG00000141979,protein_coding -55016,SH3GL1,ENSG00000141985,protein_coding -55067,DUS3L,ENSG00000141994,protein_coding -55036,DPP9,ENSG00000142002,protein_coding -56616,DMRTC2,ENSG00000142025,protein_coding -56580,CCDC97,ENSG00000142039,protein_coding -56582,TMEM91,ENSG00000142046,protein_coding -56333,ZFP14,ENSG00000142065,protein_coding -31803,SIRT3,ENSG00000142082,protein_coding -31815,IFITM3,ENSG00000142089,protein_coding -31808,PGGHG,ENSG00000142102,protein_coding -60106,HUNK,ENSG00000142149,protein_coding -60492,COL6A1,ENSG00000142156,protein_coding -60148,IFNAR1,ENSG00000142166,protein_coding -60099,SOD1,ENSG00000142168,protein_coding -60498,COL6A2,ENSG00000142173,protein_coding -60375,SIK1,ENSG00000142178,protein_coding -60406,DNMT3L,ENSG00000142182,protein_coding -60415,TRPM2,ENSG00000142185,protein_coding -33555,SCYL1,ENSG00000142186,protein_coding -60151,TMEM50B,ENSG00000142188,protein_coding -59985,APP,ENSG00000142192,protein_coding -60209,DOP1B,ENSG00000142197,protein_coding -60114,URB1,ENSG00000142207,protein_coding -43976,AKT1,ENSG00000142208,protein_coding -4551,IL19,ENSG00000142224,protein_coding -56938,EMP3,ENSG00000142227,protein_coding -56886,SAE1,ENSG00000142230,protein_coding -56960,NTN5,ENSG00000142233,protein_coding -56948,LMTK3,ENSG00000142235,protein_coding -56778,GEMIN7,ENSG00000142252,protein_coding -56758,CBLC,ENSG00000142273,protein_coding -56200,WTIP,ENSG00000142279,protein_coding -55217,ADAMTS10,ENSG00000142303,protein_coding -15557,SLC6A3,ENSG00000142319,protein_coding -9604,RNPEPL1,ENSG00000142327,protein_coding -9606,CAPN10,ENSG00000142330,protein_coding -55215,MYO1F,ENSG00000142347,protein_coding -57699,ERVK3-1,ENSG00000142396,protein_coding -57386,NLRP12,ENSG00000142405,protein_coding -57392,CACNG8,ENSG00000142408,protein_coding -57556,ZNF787,ENSG00000142409,protein_coding -55343,TIMM29,ENSG00000142444,protein_coding -55195,FBN3,ENSG00000142449,protein_coding -55341,CARM1,ENSG00000142453,protein_coding -55174,EVI5L,ENSG00000142459,protein_coding -49189,TM4SF5,ENSG00000142484,protein_coding -49774,SLC47A1,ENSG00000142494,protein_coding -49193,PSMB6,ENSG00000142507,protein_coding -57115,GPR32,ENSG00000142511,protein_coding -57182,SIGLEC10,ENSG00000142512,protein_coding -57117,ACP4,ENSG00000142513,protein_coding -57128,KLK3,ENSG00000142515,protein_coding -57083,ZNF473,ENSG00000142528,protein_coding -57102,AC020909.1,ENSG00000142530,protein_coding -57035,RPS11,ENSG00000142534,protein_coding -57022,PTH2,ENSG00000142538,protein_coding -57099,AC020909.2,ENSG00000142539,protein_coding -57030,RPL13A,ENSG00000142541,protein_coding -57150,CTU1,ENSG00000142544,protein_coding -57041,NOSIP,ENSG00000142546,protein_coding -57169,IGLON5,ENSG00000142549,protein_coding -57040,RCN3,ENSG00000142552,protein_coding -57227,ZNF614,ENSG00000142556,protein_coding -314,SLC2A5,ENSG00000142583,protein_coding -294,RERE,ENSG00000142599,protein_coding -171,MMEL1,ENSG00000142606,protein_coding -143,CFAP74,ENSG00000142609,protein_coding -182,PRDM16,ENSG00000142611,protein_coding -506,CELA2A,ENSG00000142615,protein_coding -600,PADI3,ENSG00000142619,protein_coding -500,FHAD1,ENSG00000142621,protein_coding -599,PADI1,ENSG00000142623,protein_coding -544,EPHA2,ENSG00000142627,protein_coding -548,ARHGEF19,ENSG00000142632,protein_coding -504,EFHD2,ENSG00000142634,protein_coding -364,PEX14,ENSG00000142655,protein_coding -357,PGD,ENSG00000142657,protein_coding -783,MYOM3,ENSG00000142661,protein_coding -860,SH3BGRL3,ENSG00000142669,protein_coding -854,CNKSR1,ENSG00000142675,protein_coding -766,RPL11,ENSG00000142676,protein_coding -785,IL22RA1,ENSG00000142677,protein_coding -853,ZNF593,ENSG00000142684,protein_coding -1158,C1orf216,ENSG00000142686,protein_coding -1152,KIAA0319L,ENSG00000142687,protein_coding -1178,EVA1B,ENSG00000142694,protein_coding -1123,C1orf94,ENSG00000142698,protein_coding -1596,DMRTA2,ENSG00000142700,protein_coding -14658,PLK4,ENSG00000142731,protein_coding -911,MAP3K6,ENSG00000142733,protein_coding -912,FCN3,ENSG00000142748,protein_coding -887,GPN2,ENSG00000142751,protein_coding -910,SYTL1,ENSG00000142765,protein_coding -904,WDTC1,ENSG00000142784,protein_coding -712,CELA3A,ENSG00000142789,protein_coding -697,NBPF3,ENSG00000142794,protein_coding -705,HSPG2,ENSG00000142798,protein_coding -1864,ITGB3BP,ENSG00000142856,protein_coding -1934,SERBP1,ENSG00000142864,protein_coding -2166,BCL10,ENSG00000142867,protein_coding -2173,CCN1,ENSG00000142871,protein_coding -2137,PRKACB,ENSG00000142875,protein_coding -2056,PIGK,ENSG00000142892,protein_coding -1036,TINAGL1,ENSG00000142910,protein_coding -1102,AZIN2,ENSG00000142920,protein_coding -1452,RPS8,ENSG00000142937,protein_coding -1450,KIF2C,ENSG00000142945,protein_coding -1409,PTPRF,ENSG00000142949,protein_coding -1459,BEST4,ENSG00000142959,protein_coding -1523,MOB3C,ENSG00000142961,protein_coding -1530,CYP4B1,ENSG00000142973,protein_coding -1746,TMEM61,ENSG00000143001,protein_coding -1700,DMRTB1,ENSG00000143006,protein_coding -2202,LMO4,ENSG00000143013,protein_coding -2546,SYPL2,ENSG00000143028,protein_coding -2257,BARHL2,ENSG00000143032,protein_coding -2309,MTF2,ENSG00000143033,protein_coding -2351,SLC44A3,ENSG00000143036,protein_coding -2754,IGSF3,ENSG00000143061,protein_coding -2818,ZNF697,ENSG00000143067,protein_coding -2653,CTTNBP2NL,ENSG00000143079,protein_coding -2574,STRIP1,ENSG00000143093,protein_coding -2598,KCNA10,ENSG00000143105,protein_coding -2545,PSMA5,ENSG00000143106,protein_coding -2519,FNDC7,ENSG00000143107,protein_coding -2634,C1orf162,ENSG00000143110,protein_coding -2605,CD53,ENSG00000143119,protein_coding -2592,PROK1,ENSG00000143125,protein_coding -2541,CELSR2,ENSG00000143126,protein_coding -2955,ITGA10,ENSG00000143127,protein_coding -3836,GPR161,ENSG00000143147,protein_coding -3775,ALDH9A1,ENSG00000143149,protein_coding -3864,ATP1B1,ENSG00000143153,protein_coding -3837,TIPRL,ENSG00000143155,protein_coding -3865,NME7,ENSG00000143156,protein_coding -3805,POGK,ENSG00000143157,protein_coding -3832,MPC2,ENSG00000143158,protein_coding -3824,CREG1,ENSG00000143162,protein_coding -3833,DCAF6,ENSG00000143164,protein_coding -3812,GPA33,ENSG00000143167,protein_coding -3768,RXRG,ENSG00000143171,protein_coding -3843,TBX19,ENSG00000143178,protein_coding -3784,UCK2,ENSG00000143179,protein_coding -3781,TMCO1,ENSG00000143183,protein_coding -3851,XCL1,ENSG00000143184,protein_coding -3849,XCL2,ENSG00000143185,protein_coding -3817,POU2F1,ENSG00000143190,protein_coding -3809,MAEL,ENSG00000143194,protein_coding -3808,ILDR2,ENSG00000143195,protein_coding -3853,DPT,ENSG00000143196,protein_coding -3774,MGST3,ENSG00000143198,protein_coding -3829,ADCY10,ENSG00000143199,protein_coding -4002,COP1,ENSG00000143207,protein_coding -3665,NECTIN4,ENSG00000143217,protein_coding -3672,UFC1,ENSG00000143222,protein_coding -3675,PPOX,ENSG00000143224,protein_coding -3696,FCGR2A,ENSG00000143226,protein_coding -3747,NUF2,ENSG00000143228,protein_coding -3742,RGS5,ENSG00000143248,protein_coding -3686,SDHC,ENSG00000143252,protein_coding -3668,PFDN2,ENSG00000143256,protein_coding -3683,NR1I3,ENSG00000143257,protein_coding -3674,USP21,ENSG00000143258,protein_coding -4293,F13B,ENSG00000143278,protein_coding -3498,PRCC,ENSG00000143294,protein_coding -3520,FCRL5,ENSG00000143297,protein_coding -3495,RRNAD1,ENSG00000143303,protein_coding -3496,MRPL24,ENSG00000143314,protein_coding -3619,PIGM,ENSG00000143315,protein_coding -3625,CASQ1,ENSG00000143318,protein_coding -3494,ISG20L2,ENSG00000143319,protein_coding -3492,CRABP2,ENSG00000143320,protein_coding -3497,HDGF,ENSG00000143321,protein_coding -4045,ABL2,ENSG00000143322,protein_coding -4085,XPR1,ENSG00000143324,protein_coding -4122,RGS16,ENSG00000143333,protein_coding -4070,TOR1AIP1,ENSG00000143337,protein_coding -4065,FAM163A,ENSG00000143340,protein_coding -4193,HMCN1,ENSG00000143341,protein_coding -4149,RGL1,ENSG00000143344,protein_coding -4752,LYPLAL1,ENSG00000143353,protein_coding -4308,LHX9,ENSG00000143355,protein_coding -3154,PRUNE1,ENSG00000143363,protein_coding -3207,RORC,ENSG00000143365,protein_coding -3185,TUFT1,ENSG00000143367,protein_coding -3103,SF3B4,ENSG00000143368,protein_coding -3124,ECM1,ENSG00000143369,protein_coding -3172,ZNF687,ENSG00000143373,protein_coding -3122,TARS2,ENSG00000143374,protein_coding -3183,CGN,ENSG00000143375,protein_coding -3189,SNX27,ENSG00000143376,protein_coding -3148,SETDB1,ENSG00000143379,protein_coding -3127,ADAMTSL4,ENSG00000143382,protein_coding -3132,MCL1,ENSG00000143384,protein_coding -3141,CTSK,ENSG00000143387,protein_coding -3176,RFX5,ENSG00000143390,protein_coding -3173,PI4KB,ENSG00000143393,protein_coding -3169,PIP5K1A,ENSG00000143398,protein_coding -3112,ANP32E,ENSG00000143401,protein_coding -3153,MINDY1,ENSG00000143409,protein_coding -3152,ANXA9,ENSG00000143412,protein_coding -3179,SELENBP1,ENSG00000143416,protein_coding -3149,CERS2,ENSG00000143418,protein_coding -3134,ENSA,ENSG00000143420,protein_coding -3163,SEMA6C,ENSG00000143434,protein_coding -3198,MRPL9,ENSG00000143436,protein_coding -3143,ARNT,ENSG00000143437,protein_coding -3181,POGZ,ENSG00000143442,protein_coding -3157,C1orf56,ENSG00000143443,protein_coding -3199,OAZ3,ENSG00000143450,protein_coding -3137,HORMAD1,ENSG00000143452,protein_coding -3136,GOLPH3L,ENSG00000143457,protein_coding -3160,GABPB2,ENSG00000143458,protein_coding -4620,SYT14,ENSG00000143469,protein_coding -4631,KCNH1,ENSG00000143473,protein_coding -4663,DTL,ENSG00000143476,protein_coding -4546,DYRK3,ENSG00000143479,protein_coding -4544,EIF2D,ENSG00000143486,protein_coding -4661,INTS7,ENSG00000143493,protein_coding -4698,VASH2,ENSG00000143494,protein_coding -4817,TAF1A,ENSG00000143498,protein_coding -4710,SMYD2,ENSG00000143499,protein_coding -4834,SUSD4,ENSG00000143502,protein_coding -4803,DUSP10,ENSG00000143507,protein_coding -4816,HHIPL2,ENSG00000143512,protein_coding -4841,TP53BP2,ENSG00000143514,protein_coding -3357,ATP8B2,ENSG00000143515,protein_coding -3231,FLG2,ENSG00000143520,protein_coding -3233,CRNN,ENSG00000143536,protein_coding -3390,ADAM15,ENSG00000143537,protein_coding -3334,JTB,ENSG00000143543,protein_coding -3337,RAB13,ENSG00000143545,protein_coding -3288,S100A8,ENSG00000143546,protein_coding -3344,TPM3,ENSG00000143549,protein_coding -3339,NUP210L,ENSG00000143552,protein_coding -3309,SNAPIN,ENSG00000143553,protein_coding -3319,SLC27A3,ENSG00000143554,protein_coding -3293,S100A7,ENSG00000143556,protein_coding -3349,UBAP2L,ENSG00000143569,protein_coding -3328,SLC39A1,ENSG00000143570,protein_coding -3351,HAX1,ENSG00000143575,protein_coding -3333,CREB3L4,ENSG00000143578,protein_coding -3393,EFNA3,ENSG00000143590,protein_coding -3356,AQP10,ENSG00000143595,protein_coding -3374,KCNN3,ENSG00000143603,protein_coding -3348,C1orf43,ENSG00000143612,protein_coding -3321,GATAD2B,ENSG00000143614,protein_coding -3310,ILF2,ENSG00000143621,protein_coding -3441,RIT1,ENSG00000143622,protein_coding -3316,INTS3,ENSG00000143624,protein_coding -3415,PKLR,ENSG00000143627,protein_coding -3414,HCN3,ENSG00000143630,protein_coding -3230,FLG,ENSG00000143631,protein_coding -5023,ACTA1,ENSG00000143632,protein_coding -5064,C1orf131,ENSG00000143633,protein_coding -5037,GALNT2,ENSG00000143641,protein_coding -5056,TTC13,ENSG00000143643,protein_coding -5351,SCCPDH,ENSG00000143653,protein_coding -5168,LYST,ENSG00000143669,protein_coding -5098,MAP3K21,ENSG00000143674,protein_coding -5288,CEP170,ENSG00000143702,protein_coding -5474,ACP1,ENSG00000143727,protein_coding -4940,SNAP47,ENSG00000143740,protein_coding -4880,SRP9,ENSG00000143742,protein_coding -4857,NVL,ENSG00000143748,protein_coding -4892,SDE2,ENSG00000143751,protein_coding -4853,DEGS1,ENSG00000143753,protein_coding -4851,FBXO28,ENSG00000143756,protein_coding -4951,ARF1,ENSG00000143761,protein_coding -4890,LEFTY2,ENSG00000143768,protein_coding -4861,CNIH4,ENSG00000143771,protein_coding -4911,ITPKB,ENSG00000143772,protein_coding -4959,GUK1,ENSG00000143774,protein_coding -4919,CDC42BPA,ENSG00000143776,protein_coding -4864,CNIH3,ENSG00000143786,protein_coding -4953,C1orf35,ENSG00000143793,protein_coding -5593,MBOAT2,ENSG00000143797,protein_coding -4904,PARP1,ENSG00000143799,protein_coding -4916,PSEN2,ENSG00000143801,protein_coding -4887,PYCR2,ENSG00000143811,protein_coding -4873,LBR,ENSG00000143815,protein_coding -4945,WNT9A,ENSG00000143816,protein_coding -4882,EPHX1,ENSG00000143819,protein_coding -4466,REN,ENSG00000143839,protein_coding -4462,SOX13,ENSG00000143842,protein_coding -4464,ETNK2,ENSG00000143845,protein_coding -4429,PPFIA4,ENSG00000143847,protein_coding -4470,PLEKHA6,ENSG00000143850,protein_coding -4399,PTPN7,ENSG00000143851,protein_coding -4409,SYT2,ENSG00000143858,protein_coding -4398,ARL8A,ENSG00000143862,protein_coding -5752,OSR1,ENSG00000143867,protein_coding -5784,GDF7,ENSG00000143869,protein_coding -5645,PDIA6,ENSG00000143870,protein_coding -5775,RHOB,ENSG00000143878,protein_coding -5642,ATP6V1C2,ENSG00000143882,protein_coding -6113,HNRNPLL,ENSG00000143889,protein_coding -6115,GALM,ENSG00000143891,protein_coding -6208,CAMKMT,ENSG00000143919,protein_coding -6195,ABCG8,ENSG00000143921,protein_coding -6160,EML4,ENSG00000143924,protein_coding -6262,CALM2,ENSG00000143933,protein_coding -6336,CHAC2,ENSG00000143942,protein_coding -6363,RPS27A,ENSG00000143947,protein_coding -6491,WDPCP,ENSG00000143951,protein_coding -6503,VPS54,ENSG00000143952,protein_coding -6818,REG3G,ENSG00000143954,protein_coding -5865,ASXL2,ENSG00000143970,protein_coding -6571,ETAA1,ENSG00000143971,protein_coding -6634,SNRPG,ENSG00000143977,protein_coding -5910,ABHD1,ENSG00000143994,protein_coding -6559,MEIS1,ENSG00000143995,protein_coding -7155,TRIM43B,ENSG00000144010,protein_coding -7170,TRIM43,ENSG00000144015,protein_coding -7192,CIAO1,ENSG00000144021,protein_coding -7141,ZNF514,ENSG00000144026,protein_coding -7193,SNRNP200,ENSG00000144028,protein_coding -7140,MRPS5,ENSG00000144029,protein_coding -6657,ANKRD53,ENSG00000144031,protein_coding -6703,TPRKB,ENSG00000144034,protein_coding -6700,NAT8,ENSG00000144035,protein_coding -6677,EXOC6B,ENSG00000144036,protein_coding -6684,SFXN5,ENSG00000144040,protein_coding -6658,TEX261,ENSG00000144043,protein_coding -6755,DQX1,ENSG00000144045,protein_coding -6706,DUSP11,ENSG00000144048,protein_coding -7406,ST6GAL2,ENSG00000144057,protein_coding -7476,NPHP1,ENSG00000144061,protein_coding -7474,MALL,ENSG00000144063,protein_coding -6979,THNSL2,ENSG00000144115,protein_coding -7666,RALB,ENSG00000144118,protein_coding -7642,C1QL2,ENSG00000144119,protein_coding -7652,TMEM177,ENSG00000144120,protein_coding -7552,NT5DC4,ENSG00000144130,protein_coding -7589,RABL2A,ENSG00000144134,protein_coding -7551,SLC20A1,ENSG00000144136,protein_coding -7534,FBLN7,ENSG00000144152,protein_coding -7536,ZC3H8,ENSG00000144161,protein_coding -7270,LIPT1,ENSG00000144182,protein_coding -7255,CNGA3,ENSG00000144191,protein_coding -7224,FAHD2B,ENSG00000144199,protein_coding -7275,LYG1,ENSG00000144214,protein_coding -7280,AFF3,ENSG00000144218,protein_coding -7966,UBXN4,ENSG00000144224,protein_coding -7996,NXPH2,ENSG00000144227,protein_coding -7992,SPOPL,ENSG00000144228,protein_coding -7981,THSD7B,ENSG00000144229,protein_coding -7745,GPR17,ENSG00000144230,protein_coding -7751,POLR2D,ENSG00000144231,protein_coding -7755,AMMECR1L,ENSG00000144233,protein_coding -8157,GALNT13,ENSG00000144278,protein_coding -8207,PKP4,ENSG00000144283,protein_coding -8306,SCN1A,ENSG00000144285,protein_coding -8258,SLC4A10,ENSG00000144290,protein_coding -8429,SCRN3,ENSG00000144306,protein_coding -8455,LNPK,ENSG00000144320,protein_coding -8548,ZNF385B,ENSG00000144331,protein_coding -8697,TMEFF2,ENSG00000144339,protein_coding -8406,CDCA7,ENSG00000144354,protein_coding -8384,DLX1,ENSG00000144355,protein_coding -8350,UBR3,ENSG00000144357,protein_coding -8343,PHOSPHO2,ENSG00000144362,protein_coding -8636,GULP1,ENSG00000144366,protein_coding -8623,FAM171B,ENSG00000144369,protein_coding -8754,HSPD1,ENSG00000144381,protein_coding -8736,CCDC150,ENSG00000144395,protein_coding -8977,METTL21A,ENSG00000144401,protein_coding -9020,UNC80,ENSG00000144406,protein_coding -9006,PTH2R,ENSG00000144407,protein_coding -8962,CPO,ENSG00000144410,protein_coding -8892,NBEAL1,ENSG00000144426,protein_coding -9023,KANSL1L,ENSG00000144445,protein_coding -9054,SPAG16,ENSG00000144451,protein_coding -9065,ABCA12,ENSG00000144452,protein_coding -9700,SUMF1,ENSG00000144455,protein_coding -9291,NYAP2,ENSG00000144460,protein_coding -9301,RHBDD1,ENSG00000144468,protein_coding -9520,ACKR3,ENSG00000144476,protein_coding -9485,TRPM8,ENSG00000144481,protein_coding -9554,HES6,ENSG00000144485,protein_coding -9547,ESPNL,ENSG00000144488,protein_coding -9601,ANKMY1,ENSG00000144504,protein_coding -9411,COPS7B,ENSG00000144524,protein_coding -9417,DIS3L2,ENSG00000144535,protein_coding -9765,CPNE9,ENSG00000144550,protein_coding -9790,FANCD2,ENSG00000144554,protein_coding -9827,TAMM41,ENSG00000144559,protein_coding -9824,VGLL4,ENSG00000144560,protein_coding -9964,RAB5A,ENSG00000144566,protein_coding -9188,RETREG2,ENSG00000144567,protein_coding -9145,CTDSP1,ENSG00000144579,protein_coding -9151,CNOT9,ENSG00000144580,protein_coding -9090,MARCH4,ENSG00000144583,protein_coding -9225,STK11IP,ENSG00000144589,protein_coding -9215,GMPPA,ENSG00000144591,protein_coding -9882,GRIP2,ENSG00000144596,protein_coding -9906,EAF1,ENSG00000144597,protein_coding -9688,CNTN4,ENSG00000144619,protein_coding -10095,DYNC1LI1,ENSG00000144635,protein_coding -10054,RBMS3,ENSG00000144642,protein_coding -10069,GADL1,ENSG00000144644,protein_coding -10076,OSBPL10,ENSG00000144645,protein_coding -10300,POMGNT2,ENSG00000144647,protein_coding -10290,ACKR2,ENSG00000144648,protein_coding -10298,GASK1A,ENSG00000144649,protein_coding -10202,CSRNP1,ENSG00000144655,protein_coding -10218,SLC25A38,ENSG00000144659,protein_coding -10165,ITGA9,ENSG00000144668,protein_coding -10183,SLC22A14,ENSG00000144671,protein_coding -10161,GOLGA4,ENSG00000144674,protein_coding -10170,CTDSPL,ENSG00000144677,protein_coding -10137,STAC,ENSG00000144681,protein_coding -9857,IQSEC1,ENSG00000144711,protein_coding -9851,CAND2,ENSG00000144712,protein_coding -9853,RPL32,ENSG00000144713,protein_coding -10772,PTPRG,ENSG00000144724,protein_coding -10712,IL17RD,ENSG00000144730,protein_coding -10903,SHQ1,ENSG00000144736,protein_coding -10827,SLC25A26,ENSG00000144741,protein_coding -10854,UBA3,ENSG00000144744,protein_coding -10855,ARL6IP5,ENSG00000144746,protein_coding -10852,TMF1,ENSG00000144747,protein_coding -10830,LRIG1,ENSG00000144749,protein_coding -10691,LRTM1,ENSG00000144771,protein_coding -38942,AC073896.1,ENSG00000144785,protein_coding -10354,LIMD1,ENSG00000144791,protein_coding -10328,ZNF660,ENSG00000144792,protein_coding -11179,NFKBIZ,ENSG00000144802,protein_coding -11134,COL8A1,ENSG00000144810,protein_coding -11177,NXPE3,ENSG00000144815,protein_coding -11148,ADGRG7,ENSG00000144820,protein_coding -11231,MYH15,ENSG00000144821,protein_coding -11277,PHLDB2,ENSG00000144824,protein_coding -11279,ABHD10,ENSG00000144827,protein_coding -11280,TAGLN3,ENSG00000144834,protein_coding -11399,PLA1A,ENSG00000144837,protein_coding -11429,RABL3,ENSG00000144840,protein_coding -11398,ADPRH,ENSG00000144843,protein_coding -11381,IGSF11,ENSG00000144847,protein_coding -11296,ATG3,ENSG00000144848,protein_coding -11407,NR1I2,ENSG00000144852,protein_coding -11311,BOC,ENSG00000144857,protein_coding -11737,SRPRB,ENSG00000144867,protein_coding -11721,TMEM108,ENSG00000144868,protein_coding -11953,AGTR1,ENSG00000144891,protein_coding -12022,MED12L,ENSG00000144893,protein_coding -12007,EIF2A,ENSG00000144895,protein_coding -11547,ALDH1L1,ENSG00000144908,protein_coding -11520,OSBPL11,ENSG00000144909,protein_coding -11890,TRPC1,ENSG00000144935,protein_coding -12333,NCEH1,ENSG00000144959,protein_coding -12342,SPATA16,ENSG00000144962,protein_coding -27321,FAM86B2,ENSG00000145002,protein_coding -12628,LPP,ENSG00000145012,protein_coding -12713,TMEM44,ENSG00000145014,protein_coding -12829,RUBCN,ENSG00000145016,protein_coding -10506,AMT,ENSG00000145020,protein_coding -10505,TCTA,ENSG00000145022,protein_coding -10508,NICN1,ENSG00000145029,protein_coding -10455,UCN2,ENSG00000145040,protein_coding -10582,DCAF1,ENSG00000145041,protein_coding -10580,MANF,ENSG00000145050,protein_coding -11437,STXBP5L,ENSG00000145087,protein_coding -11452,EAF2,ENSG00000145088,protein_coding -11454,ILDR1,ENSG00000145103,protein_coding -12778,TM4SF19,ENSG00000145107,protein_coding -12751,MUC4,ENSG00000145113,protein_coding -13255,SLIT2,ENSG00000145147,protein_coding -12519,EIF2B5,ENSG00000145191,protein_coding -12580,AHSG,ENSG00000145192,protein_coding -12530,ECE2,ENSG00000145194,protein_coding -12524,VWA5B2,ENSG00000145198,protein_coding -12892,DGKQ,ENSG00000145214,protein_coding -13622,FIP1L1,ENSG00000145216,protein_coding -12893,SLC26A1,ENSG00000145217,protein_coding -12992,LYAR,ENSG00000145220,protein_coding -13790,CENPC,ENSG00000145241,protein_coding -13773,EPHA5,ENSG00000145242,protein_coding -13551,CORIN,ENSG00000145244,protein_coding -13548,ATP10D,ENSG00000145246,protein_coding -13571,OCIAD2,ENSG00000145247,protein_coding -13565,SLC10A4,ENSG00000145248,protein_coding -14137,SLC10A6,ENSG00000145283,protein_coding -14087,SCD5,ENSG00000145284,protein_coding -14099,PLAC8,ENSG00000145287,protein_coding -14079,ENOPH1,ENSG00000145293,protein_coding -13884,CABS1,ENSG00000145309,protein_coding -13907,GC,ENSG00000145321,protein_coding -14291,TRMT10A,ENSG00000145331,protein_coding -14146,KLHL8,ENSG00000145332,protein_coding -14204,SNCA,ENSG00000145335,protein_coding -14186,PYURF,ENSG00000145337,protein_coding -14381,TBCK,ENSG00000145348,protein_coding -14500,CAMK2D,ENSG00000145349,protein_coding -14336,CISD2,ENSG00000145354,protein_coding -14304,DDIT4L,ENSG00000145358,protein_coding -14489,ANK2,ENSG00000145362,protein_coding -14466,TIFA,ENSG00000145365,protein_coding -14617,SPATA5,ENSG00000145375,protein_coding -14559,FABP2,ENSG00000145384,protein_coding -14598,CCNA2,ENSG00000145386,protein_coding -14551,METTL14,ENSG00000145388,protein_coding -14557,USP53,ENSG00000145390,protein_coding -14782,SETD7,ENSG00000145391,protein_coding -15121,NAF1,ENSG00000145414,protein_coding -15128,MARCH1,ENSG00000145416,protein_coding -15003,SFRP2,ENSG00000145423,protein_coding -14942,RPS3A,ENSG00000145425,protein_coding -15001,RNF175,ENSG00000145428,protein_coding -15056,PDGFC,ENSG00000145431,protein_coding -15196,CBR4,ENSG00000145439,protein_coding -15269,GLRA3,ENSG00000145451,protein_coding -15424,CYP4V2,ENSG00000145476,protein_coding -15725,ROPN1L,ENSG00000145491,protein_coding -15569,NDUFS6,ENSG00000145494,protein_coding -15723,MARCH6,ENSG00000145495,protein_coding -15543,NKD2,ENSG00000145506,protein_coding -15856,CDH18,ENSG00000145526,protein_coding -15612,ADAMTS16,ENSG00000145536,protein_coding -15632,SRD5A1,ENSG00000145545,protein_coding -15794,MYO10,ENSG00000145555,protein_coding -15759,OTULINL,ENSG00000145569,protein_coding -16131,RPL37,ENSG00000145592,protein_coding -16054,SKP2,ENSG00000145604,protein_coding -16106,OSMR,ENSG00000145623,protein_coding -16049,UGT3A1,ENSG00000145626,protein_coding -16350,PLK2,ENSG00000145632,protein_coding -16418,SHISAL2B,ENSG00000145642,protein_coding -16282,GZMA,ENSG00000145649,protein_coding -16471,PIK3R1,ENSG00000145675,protein_coding -16784,HAPLN1,ENSG00000145681,protein_coding -16686,LHFPL2,ENSG00000145685,protein_coding -16752,SSBP2,ENSG00000145687,protein_coding -16694,BHMT,ENSG00000145692,protein_coding -16617,ANKRD31,ENSG00000145700,protein_coding -16647,IQGAP2,ENSG00000145703,protein_coding -16659,CRHBP,ENSG00000145708,protein_coding -16824,RASA1,ENSG00000145715,protein_coding -16954,LIX1,ENSG00000145721,protein_coding -17033,GIN1,ENSG00000145723,protein_coding -17034,PPIP5K2,ENSG00000145725,protein_coding -17031,PAM,ENSG00000145730,protein_coding -16542,BDP1,ENSG00000145734,protein_coding -16532,GTF2H2,ENSG00000145736,protein_coding -16484,SLC30A5,ENSG00000145740,protein_coding -16585,BTF3,ENSG00000145741,protein_coding -17062,FBXL17,ENSG00000145743,protein_coding -16922,SPATA9,ENSG00000145757,protein_coding -17095,TSLP,ENSG00000145777,protein_coding -17213,TNFAIP8,ENSG00000145779,protein_coding -17156,FEM1C,ENSG00000145780,protein_coding -17175,COMMD10,ENSG00000145781,protein_coding -17166,ATG12,ENSG00000145782,protein_coding -17315,MEGF10,ENSG00000145794,protein_coding -17338,ADAMTS19,ENSG00000145808,protein_coding -17777,YIPF5,ENSG00000145817,protein_coding -17761,ARHGAP26,ENSG00000145819,protein_coding -17490,CXCL14,ENSG00000145824,protein_coding -17499,LECT2,ENSG00000145826,protein_coding -17491,SLC25A48,ENSG00000145832,protein_coding -17461,DDX46,ENSG00000145833,protein_coding -17496,IL9,ENSG00000145839,protein_coding -17991,TIMD4,ENSG00000145850,protein_coding -18040,RNF145,ENSG00000145860,protein_coding -18063,C1QTNF2,ENSG00000145861,protein_coding -18075,GABRA6,ENSG00000145863,protein_coding -18074,GABRB2,ENSG00000145864,protein_coding -17841,FBXO38,ENSG00000145868,protein_coding -17838,SPINK7,ENSG00000145879,protein_coding -17860,PCYOX1L,ENSG00000145882,protein_coding -17941,GLRA1,ENSG00000145888,protein_coding -17913,TNIP1,ENSG00000145901,protein_coding -17940,G3BP1,ENSG00000145907,protein_coding -17908,ZNF300,ENSG00000145908,protein_coding -18350,N4BP3,ENSG00000145911,protein_coding -18352,NHP2,ENSG00000145912,protein_coding -18351,RMND5B,ENSG00000145916,protein_coding -18226,BOD1,ENSG00000145919,protein_coding -18261,CPLX2,ENSG00000145920,protein_coding -18114,TENM2,ENSG00000145934,protein_coding -18157,KCNMB1,ENSG00000145936,protein_coding -18567,FAM50B,ENSG00000145945,protein_coding -18531,MYLK4,ENSG00000145949,protein_coding -18577,FAM217A,ENSG00000145975,protein_coding -18733,TBC1D7,ENSG00000145979,protein_coding -18605,FARS2,ENSG00000145982,protein_coding -18735,GFOD1,ENSG00000145990,protein_coding -18839,CDKAL1,ENSG00000145996,protein_coding -17600,PSD2,ENSG00000146005,protein_coding -17560,LRRTM2,ENSG00000146006,protein_coding -17638,ZMAT2,ENSG00000146007,protein_coding -17542,GFRA3,ENSG00000146013,protein_coding -17522,KLHL3,ENSG00000146021,protein_coding -18871,DCDC2,ENSG00000146038,protein_coding -18916,SLC17A4,ENSG00000146039,protein_coding -18913,HIST1H2BA,ENSG00000146047,protein_coding -18872,KAAG1,ENSG00000146049,protein_coding -18463,TRIM7,ENSG00000146054,protein_coding -18467,TRIM41,ENSG00000146063,protein_coding -18286,HIGD2A,ENSG00000146066,protein_coding -18330,FAM193B,ENSG00000146067,protein_coding -19829,PLA2G7,ENSG00000146070,protein_coding -19836,TNFRSF21,ENSG00000146072,protein_coding -18290,RNF44,ENSG00000146083,protein_coding -19862,MMUT,ENSG00000146085,protein_coding -18423,RASGEF1C,ENSG00000146090,protein_coding -18328,DOK3,ENSG00000146094,protein_coding -18983,ABT1,ENSG00000146109,protein_coding -19239,PPP1R18,ENSG00000146112,protein_coding -19644,DAAM2,ENSG00000146122,protein_coding -19993,PRIM2,ENSG00000146143,protein_coding -19954,MLIP,ENSG00000146147,protein_coding -19972,HMGCLL1,ENSG00000146151,protein_coding -20027,LGSN,ENSG00000146166,protein_coding -19596,FGD2,ENSG00000146192,protein_coding -19535,SCUBE3,ENSG00000146197,protein_coding -9626,ANO7,ENSG00000146205,protein_coding -19749,CRIP3,ENSG00000146215,protein_coding -19747,TTBK1,ENSG00000146216,protein_coding -19799,TCTE1,ENSG00000146221,protein_coding -19722,RPL7L1,ENSG00000146223,protein_coding -19796,NFKBIE,ENSG00000146232,protein_coding -19825,CYP39A1,ENSG00000146233,protein_coding -20250,TPBG,ENSG00000146242,protein_coding -20195,IRAK1BP1,ENSG00000146243,protein_coding -20196,PHIP,ENSG00000146247,protein_coding -20259,PRSS35,ENSG00000146250,protein_coding -20434,MMS22L,ENSG00000146263,protein_coding -20447,FAXC,ENSG00000146267,protein_coding -20356,GABRR1,ENSG00000146276,protein_coding -20351,PNRC1,ENSG00000146278,protein_coding -20355,PM20D2,ENSG00000146281,protein_coding -20329,RARS2,ENSG00000146282,protein_coding -20532,SCML4,ENSG00000146285,protein_coding -20749,TBC1D32,ENSG00000146350,protein_coding -20780,CLVS2,ENSG00000146352,protein_coding -20590,GPR6,ENSG00000146360,protein_coding -20788,RNF217,ENSG00000146373,protein_coding -20811,RSPO3,ENSG00000146374,protein_coding -20843,ARHGAP18,ENSG00000146376,protein_coding -20892,TAAR2,ENSG00000146378,protein_coding -20887,TAAR6,ENSG00000146383,protein_coding -20885,TAAR8,ENSG00000146385,protein_coding -21023,ABRACL,ENSG00000146386,protein_coding -20893,TAAR1,ENSG00000146399,protein_coding -20901,SLC18B1,ENSG00000146409,protein_coding -20967,MTFR2,ENSG00000146410,protein_coding -20922,SLC2A12,ENSG00000146411,protein_coding -21109,SHPRH,ENSG00000146414,protein_coding -21065,AIG1,ENSG00000146416,protein_coding -21321,DYNLT1,ENSG00000146425,protein_coding -21268,TIAM2,ENSG00000146426,protein_coding -21318,TMEM181,ENSG00000146433,protein_coding -21358,PNLDC1,ENSG00000146453,protein_coding -21351,WTAP,ENSG00000146457,protein_coding -1146,ZMYM4,ENSG00000146463,protein_coding -21233,VIP,ENSG00000146469,protein_coding -21219,ARMT1,ENSG00000146476,protein_coding -21368,SLC22A3,ENSG00000146477,protein_coding -21778,VWDE,ENSG00000146530,protein_coding -21623,GNA12,ENSG00000146535,protein_coding -21576,C7orf50,ENSG00000146540,protein_coding -21632,SDK1,ENSG00000146555,protein_coding -21711,CCZ1B,ENSG00000146574,protein_coding -21702,C7orf26,ENSG00000146576,protein_coding -21652,RBAK,ENSG00000146587,protein_coding -22039,CREB5,ENSG00000146592,protein_coding -21855,FERD3L,ENSG00000146618,protein_coding -22480,EGFR,ENSG00000146648,protein_coding -33510,CDCA5,ENSG00000146670,protein_coding -22377,IGFBP3,ENSG00000146674,protein_coding -22347,PURB,ENSG00000146676,protein_coding -22376,IGFBP1,ENSG00000146678,protein_coding -22989,SSC4D,ENSG00000146700,protein_coding -22978,MDH2,ENSG00000146701,protein_coding -23000,POMZP3,ENSG00000146707,protein_coding -22511,NIPSNAP2,ENSG00000146729,protein_coding -22514,CCT6A,ENSG00000146731,protein_coding -22513,PSPH,ENSG00000146733,protein_coding -22889,TRIM50,ENSG00000146755,protein_coding -22754,ZNF92,ENSG00000146757,protein_coding -23534,ATXN7L1,ENSG00000146776,protein_coding -23618,TMEM168,ENSG00000146802,protein_coding -23750,ASB15,ENSG00000146809,protein_coding -23358,MAP11,ENSG00000146826,protein_coding -23404,SLC12A9,ENSG00000146828,protein_coding -23398,GIGYF1,ENSG00000146830,protein_coding -23332,TRIM4,ENSG00000146833,protein_coding -23376,MEPCE,ENSG00000146834,protein_coding -23401,ZAN,ENSG00000146839,protein_coding -23888,TMEM209,ENSG00000146842,protein_coding -23970,AGBL3,ENSG00000146856,protein_coding -23979,STRA8,ENSG00000146857,protein_coding -24039,ZC3HAV1L,ENSG00000146858,protein_coding -23973,TMEM140,ENSG00000146859,protein_coding -51360,TLK2,ENSG00000146872,protein_coding -24239,EPHA1,ENSG00000146904,protein_coding -24520,NOM1,ENSG00000146909,protein_coding -24502,CNPY1,ENSG00000146910,protein_coding -24550,NCAPG2,ENSG00000146918,protein_coding -24418,ASB10,ENSG00000146926,protein_coding -24622,NLGN4X,ENSG00000146938,protein_coding -24665,SHROOM2,ENSG00000146950,protein_coding -24071,RAB19,ENSG00000146955,protein_coding -24050,LUC7L2,ENSG00000146963,protein_coding -24074,DENND2A,ENSG00000146966,protein_coding -24739,CLTRN,ENSG00000147003,protein_coding -24803,SH3KBP1,ENSG00000147010,protein_coding -24954,TMEM47,ENSG00000147027,protein_coding -24987,LANCL3,ENSG00000147036,protein_coding -24997,SYTL5,ENSG00000147041,protein_coding -25058,CASK,ENSG00000147044,protein_coding -25099,KDM6A,ENSG00000147050,protein_coding -25470,SPIN2A,ENSG00000147059,protein_coding -25518,MSN,ENSG00000147065,protein_coding -25300,AKAP4,ENSG00000147081,protein_coding -25301,CCNB3,ENSG00000147082,protein_coding -25650,HDAC8,ENSG00000147099,protein_coding -25722,SLC16A2,ENSG00000147100,protein_coding -25100,DIPK2B,ENSG00000147113,protein_coding -25151,ZNF157,ENSG00000147117,protein_coding -25174,ZNF182,ENSG00000147118,protein_coding -25130,CHST7,ENSG00000147119,protein_coding -25123,KRBOX4,ENSG00000147121,protein_coding -25141,NDUFB11,ENSG00000147123,protein_coding -25155,ZNF41,ENSG00000147124,protein_coding -25581,RAB41,ENSG00000147127,protein_coding -25610,ZMYM3,ENSG00000147130,protein_coding -25615,TAF1,ENSG00000147133,protein_coding -25780,GPR174,ENSG00000147138,protein_coding -25611,NONO,ENSG00000147140,protein_coding -25246,CCDC120,ENSG00000147144,protein_coding -25775,LPAR4,ENSG00000147145,protein_coding -25214,EBP,ENSG00000147155,protein_coding -25568,AWAT2,ENSG00000147160,protein_coding -25622,OGT,ENSG00000147162,protein_coding -25601,SNX12,ENSG00000147164,protein_coding -25612,ITGB1BP2,ENSG00000147166,protein_coding -25605,IL2RG,ENSG00000147168,protein_coding -25623,GCNA,ENSG00000147174,protein_coding -25827,ZNF711,ENSG00000147180,protein_coding -25858,CPXCR1,ENSG00000147183,protein_coding -25914,DIAPH2,ENSG00000147202,protein_coding -26034,NXF3,ENSG00000147206,protein_coding -26098,RIPPLY1,ENSG00000147223,protein_coding -26111,PRPS1,ENSG00000147224,protein_coding -26093,RADX,ENSG00000147231,protein_coding -26110,FRMPD3,ENSG00000147234,protein_coding -26195,HTR2C,ENSG00000147246,protein_coding -26250,DOCK11,ENSG00000147251,protein_coding -26449,IGSF1,ENSG00000147255,protein_coding -26445,ARHGAP36,ENSG00000147256,protein_coding -26485,GPC3,ENSG00000147257,protein_coding -26436,GPR119,ENSG00000147262,protein_coding -26597,RBMX,ENSG00000147274,protein_coding -27063,MCPH1,ENSG00000147316,protein_coding -27194,MFHAS1,ENSG00000147324,protein_coding -26996,FBXO25,ENSG00000147364,protein_coding -26797,FATE1,ENSG00000147378,protein_coding -26800,MAGEA4,ENSG00000147381,protein_coding -26830,NSDHL,ENSG00000147383,protein_coding -26831,ZNF185,ENSG00000147394,protein_coding -26829,CETN2,ENSG00000147400,protein_coding -26901,RPL10,ENSG00000147403,protein_coding -27436,CSGALNACT1,ENSG00000147408,protein_coding -27443,ATP6V1B2,ENSG00000147416,protein_coding -27602,CCDC25,ENSG00000147419,protein_coding -27633,HMBOX1,ENSG00000147421,protein_coding -27891,CHRNB3,ENSG00000147432,protein_coding -27894,CHRNA6,ENSG00000147434,protein_coding -27557,GNRH1,ENSG00000147437,protein_coding -27496,BIN3,ENSG00000147439,protein_coding -27463,DOK2,ENSG00000147443,protein_coding -27528,SLC25A37,ENSG00000147454,protein_coding -27519,CHMP7,ENSG00000147457,protein_coding -27554,DOCK5,ENSG00000147459,protein_coding -27792,STAR,ENSG00000147465,protein_coding -27775,PLPBP,ENSG00000147471,protein_coding -27773,ERLIN2,ENSG00000147475,protein_coding -28001,SNTG1,ENSG00000147481,protein_coding -28008,PXDNL,ENSG00000147485,protein_coding -28018,ST18,ENSG00000147488,protein_coding -28040,RGS20,ENSG00000147509,protein_coding -27814,TACC1,ENSG00000147526,protein_coding -27857,GOLGA7,ENSG00000147533,protein_coding -27800,PLPP5,ENSG00000147535,protein_coding -27860,GINS4,ENSG00000147536,protein_coding -27801,NSD3,ENSG00000147548,protein_coding -28222,DNAJC5B,ENSG00000147570,protein_coding -28225,CRH,ENSG00000147571,protein_coding -28224,TRIM55,ENSG00000147573,protein_coding -28232,ADHFE1,ENSG00000147576,protein_coding -28423,MRPS28,ENSG00000147586,protein_coding -28464,PMP2,ENSG00000147588,protein_coding -28302,LACTB2,ENSG00000147592,protein_coding -28288,PRDM14,ENSG00000147596,protein_coding -28328,TERF1,ENSG00000147601,protein_coding -28337,RPL7,ENSG00000147604,protein_coding -28596,SLC26A7,ENSG00000147606,protein_coding -28536,PSKH2,ENSG00000147613,protein_coding -28533,ATP6V0D2,ENSG00000147614,protein_coding -28921,SYBU,ENSG00000147642,protein_coding -28864,DPYS,ENSG00000147647,protein_coding -28704,MTDH,ENSG00000147649,protein_coding -28869,LRP12,ENSG00000147650,protein_coding -28920,EBAG9,ENSG00000147654,protein_coding -28902,RSPO2,ENSG00000147655,protein_coding -28757,POLR2K,ENSG00000147669,protein_coding -28987,MAL2,ENSG00000147676,protein_coding -28962,EIF3H,ENSG00000147677,protein_coding -28963,UTP23,ENSG00000147679,protein_coding -29083,NDUFB9,ENSG00000147684,protein_coding -29080,TATDN1,ENSG00000147687,protein_coding -29038,FAM83A,ENSG00000147689,protein_coding -29155,GSDMC,ENSG00000147697,protein_coding -29248,FAM135B,ENSG00000147724,protein_coding -29451,ZNF7,ENSG00000147789,protein_coding -29439,ARHGAP39,ENSG00000147799,protein_coding -29421,SLC39A4,ENSG00000147804,protein_coding -29355,NAPRT,ENSG00000147813,protein_coding -29505,VLDLR,ENSG00000147852,protein_coding -29530,AK3,ENSG00000147853,protein_coding -29580,UHRF2,ENSG00000147854,protein_coding -29647,NFIB,ENSG00000147862,protein_coding -29656,CER1,ENSG00000147869,protein_coding -29705,PLIN2,ENSG00000147872,protein_coding -29753,IFNA5,ENSG00000147873,protein_coding -29701,HAUS6,ENSG00000147874,protein_coding -29783,CDKN2B,ENSG00000147883,protein_coding -29748,IFNA16,ENSG00000147885,protein_coding -29780,CDKN2A,ENSG00000147889,protein_coding -29835,C9orf72,ENSG00000147894,protein_coding -29833,IFNK,ENSG00000147896,protein_coding -30069,ZCCHC7,ENSG00000147905,protein_coding -30082,FBXO10,ENSG00000147912,protein_coding -29963,SIGMAR1,ENSG00000147955,protein_coding -30355,CBWD5,ENSG00000147996,protein_coding -30537,CEP78,ENSG00000148019,protein_coding -30612,NTRK2,ENSG00000148053,protein_coding -30593,IDNK,ENSG00000148057,protein_coding -30689,SHC3,ENSG00000148082,protein_coding -30725,AUH,ENSG00000148090,protein_coding -30806,MFSD14B,ENSG00000148110,protein_coding -30815,AOPEP,ENSG00000148120,protein_coding -30961,PLPPR1,ENSG00000148123,protein_coding -30999,OR13C4,ENSG00000148136,protein_coding -31037,ZNF462,ENSG00000148143,protein_coding -31132,INIP,ENSG00000148153,protein_coding -31114,UGCG,ENSG00000148154,protein_coding -31066,ACTL7B,ENSG00000148156,protein_coding -31134,SNX30,ENSG00000148158,protein_coding -31246,STOM,ENSG00000148175,protein_coding -31242,GSN,ENSG00000148180,protein_coding -31261,MRRF,ENSG00000148187,protein_coding -31318,NR6A1,ENSG00000148200,protein_coding -31299,CRB2,ENSG00000148204,protein_coding -31281,OR5C1,ENSG00000148215,protein_coding -31158,ALAD,ENSG00000148218,protein_coding -31202,ASTN2,ENSG00000148219,protein_coding -31155,WDR31,ENSG00000148225,protein_coding -31159,POLE3,ENSG00000148229,protein_coding -31586,SURF4,ENSG00000148248,protein_coding -31569,GBGT1,ENSG00000148288,protein_coding -31584,SURF1,ENSG00000148290,protein_coding -31585,SURF2,ENSG00000148291,protein_coding -31575,SURF6,ENSG00000148296,protein_coding -31577,MED22,ENSG00000148297,protein_coding -31588,REXO4,ENSG00000148300,protein_coding -31579,RPL7A,ENSG00000148303,protein_coding -31563,GTF3C5,ENSG00000148308,protein_coding -31488,ASB6,ENSG00000148331,protein_coding -31409,PTGES2,ENSG00000148334,protein_coding -31486,NTMT1,ENSG00000148335,protein_coding -31413,CIZ1,ENSG00000148337,protein_coding -31406,SLC25A25,ENSG00000148339,protein_coding -31461,SH3GLB2,ENSG00000148341,protein_coding -31462,MIGA2,ENSG00000148343,protein_coding -31492,PTGES,ENSG00000148344,protein_coding -31411,LCN2,ENSG00000148346,protein_coding -31375,LRSAM1,ENSG00000148356,protein_coding -31507,HMCN2,ENSG00000148357,protein_coding -31503,GPR107,ENSG00000148358,protein_coding -31721,PAXX,ENSG00000148362,protein_coding -35169,IDI2,ENSG00000148377,protein_coding -31670,INPP5E,ENSG00000148384,protein_coding -31648,LCN9,ENSG00000148386,protein_coding -31671,SEC16A,ENSG00000148396,protein_coding -31769,DPH7,ENSG00000148399,protein_coding -31673,NOTCH1,ENSG00000148400,protein_coding -31780,CACNA1B,ENSG00000148408,protein_coding -31654,NACC2,ENSG00000148411,protein_coding -35349,PROSER2,ENSG00000148426,protein_coding -35345,USP6NL,ENSG00000148429,protein_coding -35528,COMMD3,ENSG00000148444,protein_coding -35544,MSRB2,ENSG00000148450,protein_coding -35593,PDSS1,ENSG00000148459,protein_coding -35428,FAM171A1,ENSG00000148468,protein_coding -35431,MINDY3,ENSG00000148481,protein_coding -35463,SLC39A12,ENSG00000148482,protein_coding -35459,TMEM236,ENSG00000148483,protein_coding -35440,RSU1,ENSG00000148484,protein_coding -35450,ST8SIA6,ENSG00000148488,protein_coding -35738,PARD3,ENSG00000148498,protein_coding -35780,ANKRD30A,ENSG00000148513,protein_coding -35690,ZEB1,ENSG00000148516,protein_coding -36124,FAM13C,ENSG00000148541,protein_coding -36163,NRBF2,ENSG00000148572,protein_coding -36066,A1CF,ENSG00000148584,protein_coding -36560,CDHR1,ENSG00000148600,protein_coding -36563,LRIT1,ENSG00000148602,protein_coding -36562,RGR,ENSG00000148604,protein_coding -36461,POLR3A,ENSG00000148606,protein_coding -36206,HERC4,ENSG00000148634,protein_coding -36430,LRMDA,ENSG00000148655,protein_coding -36388,CAMK2G,ENSG00000148660,protein_coding -36602,ADIRF,ENSG00000148671,protein_coding -36613,GLUD1,ENSG00000148672,protein_coding -36702,ANKRD1,ENSG00000148677,protein_coding -36699,HTR7,ENSG00000148680,protein_coding -36700,RPP30,ENSG00000148688,protein_coding -36758,FRA10AC1,ENSG00000148690,protein_coding -37074,ADD3,ENSG00000148700,protein_coding -37127,HABP2,ENSG00000148702,protein_coding -37183,VAX1,ENSG00000148704,protein_coding -36316,DNAJB12,ENSG00000148719,protein_coding -36280,EIF4EBP2,ENSG00000148730,protein_coding -36277,NPFFR1,ENSG00000148734,protein_coding -37131,PLEKHS1,ENSG00000148735,protein_coding -37121,TCF7L2,ENSG00000148737,protein_coding -37370,MKI67,ENSG00000148773,protein_coding -36986,CYP17A1,ENSG00000148795,protein_coding -37004,INA,ENSG00000148798,protein_coding -37444,FUOM,ENSG00000148803,protein_coding -37413,LRRC27,ENSG00000148814,protein_coding -37450,MTG1,ENSG00000148824,protein_coding -37421,NKX6-2,ENSG00000148826,protein_coding -37448,PAOX,ENSG00000148832,protein_coding -37033,GSTO1,ENSG00000148834,protein_coding -37007,TAF5,ENSG00000148835,protein_coding -36962,PPRC1,ENSG00000148840,protein_coding -37036,ITPRIP,ENSG00000148841,protein_coding -36997,CNNM2,ENSG00000148842,protein_coding -37010,PDCD11,ENSG00000148843,protein_coding -37349,ADAM12,ENSG00000148848,protein_coding -37225,RGS10,ENSG00000148908,protein_coding -32345,BTBD10,ENSG00000148925,protein_coding -32289,ADM,ENSG00000148926,protein_coding -32517,GAS2,ENSG00000148935,protein_coding -32549,SLC5A12,ENSG00000148942,protein_coding -32557,LIN7C,ENSG00000148943,protein_coding -32728,LRRC4C,ENSG00000148948,protein_coding -32605,IMMP1L,ENSG00000148950,protein_coding -32432,SAA4,ENSG00000148965,protein_coding -31998,PGAP2,ENSG00000148985,protein_coding -33354,TUT1,ENSG00000149016,protein_coding -33346,SCGB1A1,ENSG00000149021,protein_coding -31905,SYT8,ENSG00000149043,protein_coding -32200,ZNF214,ENSG00000149050,protein_coding -32199,ZNF215,ENSG00000149054,protein_coding -32760,HSD17B12,ENSG00000149084,protein_coding -32677,APIP,ENSG00000149089,protein_coding -32688,PAMR1,ENSG00000149090,protein_coding -32826,DGKZ,ENSG00000149091,protein_coding -32628,EIF3M,ENSG00000149100,protein_coding -33110,TNKS1BP1,ENSG00000149115,protein_coding -33184,GLYAT,ENSG00000149124,protein_coding -33128,SERPING1,ENSG00000149131,protein_coding -33030,OR5F1,ENSG00000149133,protein_coding -33112,SSRP1,ENSG00000149136,protein_coding -33122,SLC43A1,ENSG00000149150,protein_coding -32889,PTPRJ,ENSG00000149177,protein_coding -32847,C11orf49,ENSG00000149179,protein_coding -32850,ARFGAP2,ENSG00000149182,protein_coding -32869,CELF1,ENSG00000149187,protein_coding -34149,HIKESHI,ENSG00000149196,protein_coding -34151,CCDC81,ENSG00000149201,protein_coding -34365,SESN3,ENSG00000149212,protein_coding -34361,ENDOD1,ENSG00000149218,protein_coding -34379,CCDC82,ENSG00000149231,protein_coding -33974,KLHL35,ENSG00000149243,protein_coding -34063,TENM4,ENSG00000149256,protein_coding -33977,SERPINH1,ENSG00000149257,protein_coding -34021,CAPN5,ENSG00000149260,protein_coding -34042,INTS4,ENSG00000149262,protein_coding -34026,PAK1,ENSG00000149269,protein_coding -33971,RPS3,ENSG00000149273,protein_coding -34545,ZC3H12C,ENSG00000149289,protein_coding -34630,TTC12,ENSG00000149292,protein_coding -34626,NCAM1,ENSG00000149294,protein_coding -34634,DRD2,ENSG00000149295,protein_coding -34589,C11orf52,ENSG00000149300,protein_coding -34650,HTR3B,ENSG00000149305,protein_coding -34519,NPAT,ENSG00000149308,protein_coding -34520,ATM,ENSG00000149311,protein_coding -34488,AASDHPPT,ENSG00000149313,protein_coding -35136,GLB1L2,ENSG00000149328,protein_coding -53532,SLX4IP,ENSG00000149346,protein_coding -33844,LAMTOR1,ENSG00000149357,protein_coding -33925,P4HA3,ENSG00000149380,protein_coding -34845,GRIK4,ENSG00000149403,protein_coding -35077,ST14,ENSG00000149418,protein_coding -34794,HYOU1,ENSG00000149428,protein_coding -53773,GGTLC1,ENSG00000149435,protein_coding -53410,ADAM33,ENSG00000149451,protein_coding -33406,SLC22A8,ENSG00000149452,protein_coding -53632,KAT14,ENSG00000149474,protein_coding -33293,TKFC,ENSG00000149476,protein_coding -33355,MTA2,ENSG00000149480,protein_coding -33296,TMEM138,ENSG00000149483,protein_coding -33320,FADS1,ENSG00000149485,protein_coding -53370,TMC2,ENSG00000149488,protein_coding -33358,ROM1,ENSG00000149489,protein_coding -33356,EML3,ENSG00000149499,protein_coding -33333,INCENP,ENSG00000149503,protein_coding -33273,ZP1,ENSG00000149506,protein_coding -33246,OOSP2,ENSG00000149507,protein_coding -33247,MS4A3,ENSG00000149516,protein_coding -160,PLCH2,ENSG00000149527,protein_coding -33298,CPSF7,ENSG00000149532,protein_coding -33249,MS4A2,ENSG00000149534,protein_coding -33359,B3GAT3,ENSG00000149541,protein_coding -34989,EI24,ENSG00000149547,protein_coding -34977,CCDC15,ENSG00000149548,protein_coding -34992,CHEK1,ENSG00000149554,protein_coding -34987,FEZ1,ENSG00000149557,protein_coding -34963,ESAM,ENSG00000149564,protein_coding -35027,KIRREL3,ENSG00000149571,protein_coding -34741,MPZL2,ENSG00000149573,protein_coding -34737,SCN2B,ENSG00000149575,protein_coding -34711,SIDT2,ENSG00000149577,protein_coding -34756,TMEM25,ENSG00000149582,protein_coding -34712,TAGLN,ENSG00000149591,protein_coding -54211,JPH2,ENSG00000149596,protein_coding -53899,DUSP15,ENSG00000149599,protein_coding -53931,COMMD7,ENSG00000149600,protein_coding -53958,C20orf144,ENSG00000149609,protein_coding -54106,KIAA1755,ENSG00000149633,protein_coding -54299,SPATA25,ENSG00000149634,protein_coding -54324,OCSTAMP,ENSG00000149635,protein_coding -54073,DSN1,ENSG00000149636,protein_coding -54074,SOGA1,ENSG00000149639,protein_coding -54052,CNBD2,ENSG00000149646,protein_coding -54286,SPINT4,ENSG00000149651,protein_coding -54315,CDH22,ENSG00000149654,protein_coding -54617,LSM14B,ENSG00000149657,protein_coding -54678,YTHDF1,ENSG00000149658,protein_coding -54630,CABLES2,ENSG00000149679,protein_coding -33764,LTO1,ENSG00000149716,protein_coding -33500,GPHA2,ENSG00000149735,protein_coding -33416,SLC22A9,ENSG00000149742,protein_coding -33449,TRPT1,ENSG00000149743,protein_coding -33450,NUDT22,ENSG00000149761,protein_coding -33448,FERMT3,ENSG00000149781,protein_coding -33459,PLCB3,ENSG00000149782,protein_coding -33521,MRPL49,ENSG00000149792,protein_coding -33535,CDC42EP2,ENSG00000149798,protein_coding -33519,FAU,ENSG00000149806,protein_coding -33515,TM7SF2,ENSG00000149809,protein_coding -33513,VPS51,ENSG00000149823,protein_coding -47491,TBX6,ENSG00000149922,protein_coding -47490,PPP4C,ENSG00000149923,protein_coding -47488,ALDOA,ENSG00000149925,protein_coding -47486,FAM57B,ENSG00000149926,protein_coding -47484,DOC2A,ENSG00000149927,protein_coding -47482,HIRIP3,ENSG00000149929,protein_coding -47481,TAOK2,ENSG00000149930,protein_coding -47480,TMEM219,ENSG00000149932,protein_coding -39171,HMGA2,ENSG00000149948,protein_coding -34440,MMP3,ENSG00000149968,protein_coding -24819,CNKSR2,ENSG00000149970,protein_coding -34396,CNTN5,ENSG00000149972,protein_coding -37843,KLRF1,ENSG00000150045,protein_coding -37856,CLEC1A,ENSG00000150048,protein_coding -35620,MKX,ENSG00000150051,protein_coding -35627,MPP7,ENSG00000150054,protein_coding -35720,ITGB1,ENSG00000150093,protein_coding -35865,FXYD4,ENSG00000150201,protein_coding -32980,TRIM48,ENSG00000150244,protein_coding -33058,OR8K1,ENSG00000150261,protein_coding -33068,OR5M9,ENSG00000150269,protein_coding -36090,PCDH15,ENSG00000150275,protein_coding -47570,CTF1,ENSG00000150281,protein_coding -34351,CWC15,ENSG00000150316,protein_coding -3085,FCGR1A,ENSG00000150337,protein_coding -36149,ARID5B,ENSG00000150347,protein_coding -41378,KLHL1,ENSG00000150361,protein_coding -48212,CDH8,ENSG00000150394,protein_coding -41900,DCUN1D2,ENSG00000150401,protein_coding -41903,TMCO3,ENSG00000150403,protein_coding -34979,TMEM218,ENSG00000150433,protein_coding -35020,TIRAP,ENSG00000150455,protein_coding -40623,EEF1AKMT1,ENSG00000150456,protein_coding -40631,LATS2,ENSG00000150457,protein_coding -40638,SAP18,ENSG00000150459,protein_coding -13741,ADGRL3,ENSG00000150471,protein_coding -52689,KIAA1328,ENSG00000150477,protein_coding -41200,FAM124A,ENSG00000150510,protein_coding -42629,MIA2,ENSG00000150527,protein_coding -7984,HNMT,ENSG00000150540,protein_coding -7932,LYPD1,ENSG00000150551,protein_coding -8099,LYPD6B,ENSG00000150556,protein_coding -37091,PDCD4,ENSG00000150593,protein_coding -37099,ADRA2A,ENSG00000150594,protein_coding -15280,GPM6A,ENSG00000150625,protein_coding -15284,WDR17,ENSG00000150627,protein_coding -15286,SPATA4,ENSG00000150628,protein_coding -15292,VEGFC,ENSG00000150630,protein_coding -53109,CCDC102B,ENSG00000150636,protein_coding -53125,CD226,ENSG00000150637,protein_coding -53177,CNDP1,ENSG00000150656,protein_coding -44803,FSIP1,ENSG00000150667,protein_coding -34116,DLG2,ENSG00000150672,protein_coding -34137,CCDC83,ENSG00000150676,protein_coding -4247,RGS18,ENSG00000150681,protein_coding -34158,PRSS23,ENSG00000150687,protein_coding -15989,MTMR12,ENSG00000150712,protein_coding -8575,PPP1R1C,ENSG00000150722,protein_coding -34558,C11orf53,ENSG00000150750,protein_coding -15715,CCT5,ENSG00000150753,protein_coding -15713,ATPSCKMT,ENSG00000150756,protein_coding -37357,DOCK1,ENSG00000150760,protein_coding -34590,DIXDC1,ENSG00000150764,protein_coding -34594,DLAT,ENSG00000150768,protein_coding -34598,PIH1D2,ENSG00000150773,protein_coding -34599,NKAPD1,ENSG00000150776,protein_coding -34600,TIMM8B,ENSG00000150779,protein_coding -34603,IL18,ENSG00000150782,protein_coding -34605,TEX12,ENSG00000150783,protein_coding -34610,PTS,ENSG00000150787,protein_coding -35538,PIP4K2A,ENSG00000150867,protein_coding -5653,C2orf50,ENSG00000150873,protein_coding -40953,FREM2,ENSG00000150893,protein_coding -40975,FOXO1,ENSG00000150907,protein_coding -6066,CRIM1,ENSG00000150938,protein_coding -14552,SEC24D,ENSG00000150961,protein_coding -40224,ABCB9,ENSG00000150967,protein_coding -40243,RILPL2,ENSG00000150977,protein_coding -40299,DHX37,ENSG00000150990,protein_coding -40296,UBC,ENSG00000150991,protein_coding -9708,ITPR1,ENSG00000150995,protein_coding -15126,TKTL2,ENSG00000151005,protein_coding -47587,PRSS53,ENSG00000151006,protein_coding -14753,SLC7A11,ENSG00000151012,protein_coding -14764,NOCT,ENSG00000151014,protein_coding -35569,ENKUR,ENSG00000151023,protein_coding -35573,GPR158,ENSG00000151025,protein_coding -35680,LYZL2,ENSG00000151033,protein_coding -37522,CACNA2D4,ENSG00000151062,protein_coding -37527,DCP1B,ENSG00000151065,protein_coding -37528,CACNA1C,ENSG00000151067,protein_coding -37605,KCNA6,ENSG00000151079,protein_coding -10001,THRB,ENSG00000151090,protein_coding -10020,NGLY1,ENSG00000151092,protein_coding -10023,OXSM,ENSG00000151093,protein_coding -32451,UEVLD,ENSG00000151116,protein_coding -32458,TMEM86A,ENSG00000151117,protein_coding -39783,C12orf45,ENSG00000151131,protein_coding -39812,TMEM263,ENSG00000151135,protein_coding -39818,BTBD11,ENSG00000151136,protein_coding -39870,UBE3B,ENSG00000151148,protein_coding -36133,ANK3,ENSG00000151150,protein_coding -36110,IPMK,ENSG00000151151,protein_coding -39905,RAD9B,ENSG00000151164,protein_coding -39981,PLBD2,ENSG00000151176,protein_coding -36455,DLG5,ENSG00000151208,protein_coding -36526,MAT1A,ENSG00000151224,protein_coding -38363,SLC2A13,ENSG00000151229,protein_coding -38387,GXYLT1,ENSG00000151233,protein_coding -38420,TWF1,ENSG00000151239,protein_coding -35157,DIP2C,ENSG00000151240,protein_coding -14269,EIF4E,ENSG00000151247,protein_coding -10819,MAGI1,ENSG00000151276,protein_coding -41757,TEX30,ENSG00000151287,protein_coding -17266,CSNK1G3,ENSG00000151292,protein_coding -17238,SRFBP1,ENSG00000151304,protein_coding -42506,AKAP6,ENSG00000151320,protein_coding -42511,NPAS3,ENSG00000151322,protein_coding -42539,FAM177A1,ENSG00000151327,protein_coding -42575,MBIP,ENSG00000151332,protein_coding -42596,MIPOL1,ENSG00000151338,protein_coding -32776,EXT2,ENSG00000151348,protein_coding -5486,TMEM18,ENSG00000151353,protein_coding -5536,ALLC,ENSG00000151360,protein_coding -34045,KCTD14,ENSG00000151364,protein_coding -34048,THRSP,ENSG00000151365,protein_coding -34049,NDUFC2,ENSG00000151366,protein_coding -34155,ME3,ENSG00000151376,protein_coding -5738,MSGN1,ENSG00000151379,protein_coding -16011,ADAMTS12,ENSG00000151388,protein_coding -42489,NUBPL,ENSG00000151413,protein_coding -4309,NEK7,ENSG00000151414,protein_coding -4312,ATP6V1G3,ENSG00000151418,protein_coding -17066,FER,ENSG00000151422,protein_coding -43377,VIPAS39,ENSG00000151445,protein_coding -14634,ANKRD50,ENSG00000151458,protein_coding -35351,UPF2,ENSG00000151461,protein_coding -35358,CDC123,ENSG00000151465,protein_coding -14672,SCLT1,ENSG00000151466,protein_coding -35369,CCDC3,ENSG00000151468,protein_coding -14675,C4orf33,ENSG00000151470,protein_coding -35393,FRMD4A,ENSG00000151474,protein_coding -14653,SLC25A31,ENSG00000151475,protein_coding -38013,PTPRO,ENSG00000151490,protein_coding -38015,EPS8,ENSG00000151491,protein_coding -35133,ACAD8,ENSG00000151498,protein_coding -35132,THYN1,ENSG00000151500,protein_coding -35131,VPS26B,ENSG00000151502,protein_coding -35128,NCAPD3,ENSG00000151503,protein_coding -37113,VTI1A,ENSG00000151532,protein_coding -13233,QDPR,ENSG00000151552,protein_coding -37156,FAM160B1,ENSG00000151553,protein_coding -39693,ANO4,ENSG00000151572,protein_coding -45254,TEX9,ENSG00000151575,protein_coding -11338,QTRT2,ENSG00000151576,protein_coding -11340,DRD3,ENSG00000151577,protein_coding -14872,MMAA,ENSG00000151611,protein_coding -14876,ZNF827,ENSG00000151612,protein_coding -14890,POU4F2,ENSG00000151615,protein_coding -14900,EDNRA,ENSG00000151617,protein_coding -14912,NR3C2,ENSG00000151623,protein_coding -35229,AKR1C2,ENSG00000151632,protein_coding -37411,DPYSL4,ENSG00000151640,protein_coding -37431,VENTX,ENSG00000151650,protein_coding -37435,ADAM8,ENSG00000151651,protein_coding -35302,ITIH2,ENSG00000151655,protein_coding -35303,KIN,ENSG00000151657,protein_coding -6247,PIGF,ENSG00000151665,protein_coding -8656,ANKAR,ENSG00000151687,protein_coding -8668,INPP1,ENSG00000151689,protein_coding -8669,MFSD6,ENSG00000151690,protein_coding -5569,RNF144A,ENSG00000151692,protein_coding -5600,ASAP2,ENSG00000151693,protein_coding -5605,ADAM17,ENSG00000151694,protein_coding -35049,FLI1,ENSG00000151702,protein_coding -35051,KCNJ1,ENSG00000151704,protein_coding -35067,TMEM45B,ENSG00000151715,protein_coding -15344,WWC2,ENSG00000151718,protein_coding -15383,CENPU,ENSG00000151725,protein_coding -15384,ACSL1,ENSG00000151726,protein_coding -15399,SLC25A4,ENSG00000151729,protein_coding -38276,AMN1,ENSG00000151743,protein_coding -38296,BICD1,ENSG00000151746,protein_coding -42761,SAV1,ENSG00000151748,protein_coding -41037,CCDC122,ENSG00000151773,protein_coding -41052,SERP2,ENSG00000151778,protein_coding -5700,NBAS,ENSG00000151779,protein_coding -9978,ZNF385D,ENSG00000151789,protein_coding -15049,TDO2,ENSG00000151790,protein_coding -13523,GUF1,ENSG00000151806,protein_coding -42906,SLC35F4,ENSG00000151812,protein_coding -13537,GABRA2,ENSG00000151834,protein_coding -40682,SACS,ENSG00000151835,protein_coding -42944,CCDC175,ENSG00000151838,protein_coding -40728,PABPC3,ENSG00000151846,protein_coding -40721,CENPJ,ENSG00000151849,protein_coding -16146,FBXO4,ENSG00000151876,protein_coding -16183,TMEM267,ENSG00000151881,protein_coding -16182,CCL28,ENSG00000151882,protein_coding -16216,PARP8,ENSG00000151883,protein_coding -37164,GFRA1,ENSG00000151892,protein_coding -37207,CACUL1,ENSG00000151893,protein_coding -19980,DST,ENSG00000151914,protein_coding -19984,BEND6,ENSG00000151917,protein_coding -37227,TIAL1,ENSG00000151923,protein_coding -37230,BAG3,ENSG00000151929,protein_coding -40380,GLT1D1,ENSG00000151948,protein_coding -40383,TMEM132D,ENSG00000151952,protein_coding -15021,RBM46,ENSG00000151962,protein_coding -12169,SCHIP1,ENSG00000151967,protein_coding -20458,MCHR2,ENSG00000152034,protein_coding -9269,KCNE4,ENSG00000152049,protein_coding -9276,AP1S3,ENSG00000152056,protein_coding -3977,RABGAP1L,ENSG00000152061,protein_coding -7797,CCDC74B,ENSG00000152076,protein_coding -2356,TMEM56,ENSG00000152078,protein_coding -7799,MZT2B,ENSG00000152082,protein_coding -7800,TUBA3E,ENSG00000152086,protein_coding -4015,ASTN1,ENSG00000152092,protein_coding -7831,CFC1B,ENSG00000152093,protein_coding -7853,FAM168B,ENSG00000152102,protein_coding -4712,PTPN14,ENSG00000152104,protein_coding -7946,MGAT5,ENSG00000152127,protein_coding -7950,TMEM163,ENSG00000152128,protein_coding -6075,GPATCH11,ENSG00000152133,protein_coding -40085,HSPB8,ENSG00000152137,protein_coding -6118,GEMIN6,ENSG00000152147,protein_coding -6139,TMEM178A,ENSG00000152154,protein_coding -41482,POU4F1,ENSG00000152192,protein_coding -41483,RNF219,ENSG00000152193,protein_coding -41144,CYSLTR2,ENSG00000152207,protein_coding -14225,GRID2,ENSG00000152208,protein_coding -41162,ARL11,ENSG00000152213,protein_coding -52729,RIT2,ENSG00000152214,protein_coding -52740,SETBP1,ENSG00000152217,protein_coding -32595,ARL14EP,ENSG00000152219,protein_coding -52756,EPG5,ENSG00000152223,protein_coding -52757,PSTPIP2,ENSG00000152229,protein_coding -52761,ATP5F1A,ENSG00000152234,protein_coding -52762,HAUS1,ENSG00000152240,protein_coding -52764,C18orf25,ENSG00000152242,protein_coding -8328,SPC25,ENSG00000152253,protein_coding -8329,G6PC2,ENSG00000152254,protein_coding -8395,PDK1,ENSG00000152256,protein_coding -32349,PTH,ENSG00000152266,protein_coding -32371,PDE3B,ENSG00000152270,protein_coding -6877,TCF7L1,ENSG00000152284,protein_coding -6882,TGOLN2,ENSG00000152291,protein_coding -6893,SH2D6,ENSG00000152292,protein_coding -43513,KCNK13,ENSG00000152315,protein_coding -3730,UHMK1,ENSG00000152332,protein_coding -16756,ATG10,ENSG00000152348,protein_coding -16629,POC5,ENSG00000152359,protein_coding -17521,SPOCK1,ENSG00000152377,protein_coding -16730,FAM151B,ENSG00000152380,protein_coding -3806,TADA1,ENSG00000152382,protein_coding -34496,GUCY1A2,ENSG00000152402,protein_coding -34501,CWF19L2,ENSG00000152404,protein_coding -16699,JMY,ENSG00000152409,protein_coding -16701,HOMER1,ENSG00000152413,protein_coding -16778,XRCC4,ENSG00000152422,protein_coding -8763,BOLL,ENSG00000152430,protein_coding -57629,ZNF547,ENSG00000152433,protein_coding -57645,ZNF773,ENSG00000152439,protein_coding -57660,ZNF776,ENSG00000152443,protein_coding -57673,ZNF256,ENSG00000152454,protein_coding -35413,SUV39H2,ENSG00000152455,protein_coding -35414,DCLRE1C,ENSG00000152457,protein_coding -35420,OLAH,ENSG00000152463,protein_coding -35424,RPP38,ENSG00000152464,protein_coding -35425,NMT2,ENSG00000152465,protein_coding -57680,ZSCAN1,ENSG00000152467,protein_coding -57712,ZNF837,ENSG00000152475,protein_coding -40765,USP12,ENSG00000152484,protein_coding -12658,CCDC50,ENSG00000152492,protein_coding -17100,CAMK4,ENSG00000152495,protein_coding -17143,TRIM36,ENSG00000152503,protein_coding -6182,ZFP36L2,ENSG00000152518,protein_coding -40798,PAN3,ENSG00000152520,protein_coding -6189,PLEKHH2,ENSG00000152527,protein_coding -38506,PFKM,ENSG00000152556,protein_coding -34425,TMEM123,ENSG00000152558,protein_coding -34481,GRIA4,ENSG00000152578,protein_coding -12031,IGSF10,ENSG00000152580,protein_coding -16041,SPEF2,ENSG00000152582,protein_coding -14158,SPARCL1,ENSG00000152583,protein_coding -14160,DSPP,ENSG00000152591,protein_coding -14161,DMP1,ENSG00000152592,protein_coding -14165,MEPE,ENSG00000152595,protein_coding -12044,MBNL1,ENSG00000152601,protein_coding -16047,CAPSL,ENSG00000152611,protein_coding -16056,NADK2,ENSG00000152620,protein_coding -10084,GPD1L,ENSG00000152642,protein_coding -20754,GJA1,ENSG00000152661,protein_coding -16289,CCNO,ENSG00000152669,protein_coding -16300,DDX4,ENSG00000152670,protein_coding -6646,CLEC4F,ENSG00000152672,protein_coding -6018,SLC30A6,ENSG00000152683,protein_coding -16241,PELO,ENSG00000152684,protein_coding -6043,RASGRP3,ENSG00000152689,protein_coding -17455,SAR1B,ENSG00000152700,protein_coding -17475,CATSPER3,ENSG00000152705,protein_coding -41618,GPR180,ENSG00000152749,protein_coding -1920,TCTEX1D1,ENSG00000152760,protein_coding -1922,WDR78,ENSG00000152763,protein_coding -36656,ANKRD22,ENSG00000152766,protein_coding -41668,FARP1,ENSG00000152767,protein_coding -36676,IFIT5,ENSG00000152778,protein_coding -36677,SLC16A12,ENSG00000152779,protein_coding -36679,PANK1,ENSG00000152782,protein_coding -14056,PRDM8,ENSG00000152784,protein_coding -14060,BMP3,ENSG00000152785,protein_coding -14078,HNRNPDL,ENSG00000152795,protein_coding -36740,HHEX,ENSG00000152804,protein_coding -21094,UTRN,ENSG00000152818,protein_coding -21110,GRM1,ENSG00000152822,protein_coding -20830,PTPRK,ENSG00000152894,protein_coding -5150,GGPS1,ENSG00000152904,protein_coding -48628,CNTNAP4,ENSG00000152910,protein_coding -22738,ZNF117,ENSG00000152926,protein_coding -16355,RAB3C,ENSG00000152932,protein_coding -38146,LMNTD1,ENSG00000152936,protein_coding -16500,MARVELD2,ENSG00000152939,protein_coding -16499,RAD17,ENSG00000152942,protein_coding -38177,MED21,ENSG00000152944,protein_coding -11923,PLOD2,ENSG00000152952,protein_coding -13006,STK32B,ENSG00000152953,protein_coding -18869,NRSN1,ENSG00000152954,protein_coding -13015,JAKMIP1,ENSG00000152969,protein_coding -11939,ZIC1,ENSG00000152977,protein_coding -13270,ADGRA3,ENSG00000152990,protein_coding -11954,CPB1,ENSG00000153002,protein_coding -16419,SREK1IP1,ENSG00000153006,protein_coding -13291,LGI2,ENSG00000153012,protein_coding -16420,CWC27,ENSG00000153015,protein_coding -4094,MR1,ENSG00000153029,protein_coding -17126,SRP19,ENSG00000153037,protein_coding -16490,CENPH,ENSG00000153044,protein_coding -18594,CDYL,ENSG00000153046,protein_coding -46877,CARHSP1,ENSG00000153048,protein_coding -46911,TEKT5,ENSG00000153060,protein_coding -14315,BANK1,ENSG00000153064,protein_coding -46952,TXNDC11,ENSG00000153066,protein_coding -16113,DAB2,ENSG00000153071,protein_coding -7955,ACMSD,ENSG00000153086,protein_coding -7500,ACOXL,ENSG00000153093,protein_coding -7504,BCL2L11,ENSG00000153094,protein_coding -7520,ANAPC1,ENSG00000153107,protein_coding -16942,CAST,ENSG00000153113,protein_coding -14794,SCOC,ENSG00000153130,protein_coding -14798,CLGN,ENSG00000153132,protein_coding -16857,CETN3,ENSG00000153140,protein_coding -14837,SMARCA5,ENSG00000153147,protein_coding -18700,SYCP2L,ENSG00000153157,protein_coding -18653,BMP6,ENSG00000153162,protein_coding -7400,RGPD3,ENSG00000153165,protein_coding -39138,RASSF3,ENSG00000153179,protein_coding -5319,HNRNPU,ENSG00000153187,protein_coding -7443,RANBP2,ENSG00000153201,protein_coding -5358,AHCTF1,ENSG00000153207,protein_coding -7527,MERTK,ENSG00000153208,protein_coding -7532,TMEM87B,ENSG00000153214,protein_coding -5401,OR14K1,ENSG00000153230,protein_coding -39277,PTPRR,ENSG00000153233,protein_coding -8179,NR4A2,ENSG00000153234,protein_coding -8204,CCDC148,ENSG00000153237,protein_coding -8238,PLA2R1,ENSG00000153246,protein_coding -8242,RBMS1,ENSG00000153250,protein_coding -8294,SCN3A,ENSG00000153253,protein_coding -10779,FEZF2,ENSG00000153266,protein_coding -11269,CD96,ENSG00000153283,protein_coding -19826,SLC25A27,ENSG00000153291,protein_coding -19835,ADGRF1,ENSG00000153292,protein_coding -19844,ADGRF4,ENSG00000153294,protein_coding -21480,FRMD1,ENSG00000153303,protein_coding -29157,FAM49B,ENSG00000153310,protein_coding -29165,ASAP1,ENSG00000153317,protein_coding -52604,TRAPPC8,ENSG00000153339,protein_coding -16915,FAM81B,ENSG00000153347,protein_coding -52666,INO80C,ENSG00000153391,protein_coding -15558,LPCAT1,ENSG00000153395,protein_coding -15504,PLEKHG4B,ENSG00000153404,protein_coding -46796,NMRAL1,ENSG00000153406,protein_coding -46803,UBALD1,ENSG00000153443,protein_coding -46828,C16orf89,ENSG00000153446,protein_coding -43575,TMEM251,ENSG00000153485,protein_coding -41841,ING1,ENSG00000153487,protein_coding -41859,TEX29,ENSG00000153495,protein_coding -41871,SPACA7,ENSG00000153498,protein_coding -41899,ADPRHL1,ENSG00000153531,protein_coding -10091,CMTM7,ENSG00000153551,protein_coding -10114,FBXL2,ENSG00000153558,protein_coding -10115,UBP1,ENSG00000153560,protein_coding -6937,RMND5A,ENSG00000153561,protein_coding -6938,CD8A,ENSG00000153563,protein_coding -6991,RPIA,ENSG00000153574,protein_coding -44542,GOLGA8F,ENSG00000153684,protein_coding -29606,PTPRD,ENSG00000153707,protein_coding -29632,LURAP1L,ENSG00000153714,protein_coding -21258,CNKSR3,ENSG00000153721,protein_coding -11432,GTF2E1,ENSG00000153767,protein_coding -48594,CFDP1,ENSG00000153774,protein_coding -25864,TGIF2LX,ENSG00000153779,protein_coding -48797,ZDHHC7,ENSG00000153786,protein_coding -48799,FAM92B,ENSG00000153789,protein_coding -21945,C7orf31,ENSG00000153790,protein_coding -13802,TMPRSS11D,ENSG00000153802,protein_coding -22034,JAZF1,ENSG00000153814,protein_coding -48725,CMIP,ENSG00000153815,protein_coding -9330,SPHKAP,ENSG00000153820,protein_coding -51577,KCNJ16,ENSG00000153822,protein_coding -9337,PID1,ENSG00000153823,protein_coding -9344,TRIP12,ENSG00000153827,protein_coding -9346,FBXO36,ENSG00000153832,protein_coding -56177,CEBPG,ENSG00000153879,protein_coding -56182,KCTD15,ENSG00000153885,protein_coding -56214,ZNF599,ENSG00000153896,protein_coding -2158,MCOLN2,ENSG00000153898,protein_coding -56238,LGI4,ENSG00000153902,protein_coding -2168,DDAH1,ENSG00000153904,protein_coding -16443,SREK1,ENSG00000153914,protein_coding -16983,CHD1,ENSG00000153922,protein_coding -51162,ANKFN1,ENSG00000153930,protein_coding -51169,DGKE,ENSG00000153933,protein_coding -2195,HS2ST1,ENSG00000153936,protein_coding -51187,MSI2,ENSG00000153944,protein_coding -23074,CACNA2D1,ENSG00000153956,protein_coding -20703,ZUP1,ENSG00000153975,protein_coding -49509,HS3ST3A1,ENSG00000153976,protein_coding -51263,GDPD1,ENSG00000153982,protein_coding -20722,NUS1,ENSG00000153989,protein_coding -23093,SEMA3D,ENSG00000153993,protein_coding -43006,PPP2R5E,ENSG00000154001,protein_coding -2044,ASB17,ENSG00000154007,protein_coding -49736,GRAP,ENSG00000154016,protein_coding -49732,SLC5A10,ENSG00000154025,protein_coding -2060,AK5,ENSG00000154027,protein_coding -52502,CABYR,ENSG00000154040,protein_coding -52511,IMPACT,ENSG00000154059,protein_coding -52491,ANKRD29,ENSG00000154065,protein_coding -20081,SDHAF4,ENSG00000154079,protein_coding -52555,CHST9,ENSG00000154080,protein_coding -34821,THY1,ENSG00000154096,protein_coding -48774,DNAAF1,ENSG00000154099,protein_coding -48813,C16orf74,ENSG00000154102,protein_coding -34849,TBCEL,ENSG00000154114,protein_coding -48884,JPH3,ENSG00000154118,protein_coding -15765,ANKH,ENSG00000154122,protein_coding -15764,OTULIN,ENSG00000154124,protein_coding -34875,UBASH3B,ENSG00000154127,protein_coding -34971,ROBO4,ENSG00000154133,protein_coding -34970,ROBO3,ENSG00000154134,protein_coding -34955,PANX3,ENSG00000154143,protein_coding -34956,TBRG1,ENSG00000154144,protein_coding -34960,NRGN,ENSG00000154146,protein_coding -15790,RETREG1,ENSG00000154153,protein_coding -15871,CDH12,ENSG00000154162,protein_coding -11115,GPR15,ENSG00000154165,protein_coding -11145,TOMM70,ENSG00000154174,protein_coding -11152,ABI3BP,ENSG00000154175,protein_coding -28897,ANGPT1,ENSG00000154188,protein_coding -51509,PITPNC1,ENSG00000154217,protein_coding -1649,CC2D1B,ENSG00000154222,protein_coding -46374,CERS3,ENSG00000154227,protein_coding -51487,PRKCA,ENSG00000154229,protein_coding -46394,LRRK1,ENSG00000154237,protein_coding -51480,CEP112,ENSG00000154240,protein_coding -9648,GAL3ST2,ENSG00000154252,protein_coding -51557,ABCA9,ENSG00000154258,protein_coding -51558,ABCA6,ENSG00000154262,protein_coding -51562,ABCA10,ENSG00000154263,protein_coding -51565,ABCA5,ENSG00000154265,protein_coding -20862,ENPP3,ENSG00000154269,protein_coding -13390,C4orf19,ENSG00000154274,protein_coding -13477,UCHL1,ENSG00000154277,protein_coding -4819,MIA3,ENSG00000154305,protein_coding -4828,DISP1,ENSG00000154309,protein_coding -12311,TNIK,ENSG00000154310,protein_coding -27267,FAM167A,ENSG00000154319,protein_coding -27275,NEIL2,ENSG00000154328,protein_coding -30412,PGM5,ENSG00000154330,protein_coding -4949,WNT3A,ENSG00000154342,protein_coding -4964,OBSCN,ENSG00000154358,protein_coding -27338,LONRF1,ENSG00000154359,protein_coding -4970,TRIM11,ENSG00000154370,protein_coding -4876,ENAH,ENSG00000154380,protein_coding -23628,PPP1R3A,ENSG00000154415,protein_coding -5019,CCSAP,ENSG00000154429,protein_coding -23688,ASZ1,ENSG00000154438,protein_coding -15200,SH3RF1,ENSG00000154447,protein_coding -2230,GBP5,ENSG00000154451,protein_coding -37292,BUB3,ENSG00000154473,protein_coding -37299,GPR26,ENSG00000154478,protein_coding -8341,CCDC173,ENSG00000154479,protein_coding -37339,MMP21,ENSG00000154485,protein_coding -37354,C10orf90,ENSG00000154493,protein_coding -2298,DIPK1A,ENSG00000154511,protein_coding -8448,ATP5MC3,ENSG00000154518,protein_coding -30205,CNTNAP3B,ENSG00000154529,protein_coding -25334,MAGED4,ENSG00000154545,protein_coding -20353,SRSF12,ENSG00000154548,protein_coding -15410,PDLIM3,ENSG00000154553,protein_coding -15411,SORBS2,ENSG00000154556,protein_coding -28354,ELOC,ENSG00000154582,protein_coding -28358,LY96,ENSG00000154589,protein_coding -52534,PSMA8,ENSG00000154611,protein_coding -57937,TMSB4Y,ENSG00000154620,protein_coding -59881,CXADR,ENSG00000154639,protein_coding -59885,BTG3,ENSG00000154640,protein_coding -59890,C21orf91,ENSG00000154642,protein_coding -59894,CHODL,ENSG00000154645,protein_coding -59896,TMPRSS15,ENSG00000154646,protein_coding -59924,NCAM2,ENSG00000154654,protein_coding -52176,L3MBTL4,ENSG00000154655,protein_coding -22107,PDE1C,ENSG00000154678,protein_coding -22799,RABGEF1,ENSG00000154710,protein_coding -59977,MRPL39,ENSG00000154719,protein_coding -59978,JAM2,ENSG00000154721,protein_coding -59982,ATP5PF,ENSG00000154723,protein_coding -59983,GABPA,ENSG00000154727,protein_coding -59997,ADAMTS1,ENSG00000154734,protein_coding -59999,ADAMTS5,ENSG00000154736,protein_coding -9841,TSEN2,ENSG00000154743,protein_coding -50302,SLFN13,ENSG00000154760,protein_coding -9866,WNT7A,ENSG00000154764,protein_coding -9876,XPC,ENSG00000154767,protein_coding -9887,CCDC174,ENSG00000154781,protein_coding -9892,FGD5,ENSG00000154783,protein_coding -49642,FLCN,ENSG00000154803,protein_coding -9926,DPH3,ENSG00000154813,protein_coding -9927,OXNAD1,ENSG00000154814,protein_coding -9940,PLCL2,ENSG00000154822,protein_coding -52859,CXXC1,ENSG00000154832,protein_coding -52863,SKA1,ENSG00000154839,protein_coding -52243,PPP4R1,ENSG00000154845,protein_coding -52267,APCDD1,ENSG00000154856,protein_coding -52277,PIEZO2,ENSG00000154864,protein_coding -52303,MPPE1,ENSG00000154889,protein_coding -49440,USP43,ENSG00000154914,protein_coding -11739,RAB6B,ENSG00000154917,protein_coding -51077,EME1,ENSG00000154920,protein_coding -11755,EPHB1,ENSG00000154928,protein_coding -53788,ACSS1,ENSG00000154930,protein_coding -51096,ANKRD40,ENSG00000154945,protein_coding -49488,ZNF18,ENSG00000154957,protein_coding -51127,CA10,ENSG00000154975,protein_coding -22487,VOPP1,ENSG00000154978,protein_coding -22503,SEPT14,ENSG00000154997,protein_coding -25822,APOOL,ENSG00000155008,protein_coding -14387,DKK2,ENSG00000155011,protein_coding -14395,CYP2U1,ENSG00000155016,protein_coding -21679,RSPH10B,ENSG00000155026,protein_coding -21665,FBXL18,ENSG00000155034,protein_coding -7704,CNTNAP5,ENSG00000155052,protein_coding -7145,PROM2,ENSG00000155066,protein_coding -20587,AK9,ENSG00000155085,protein_coding -28825,ODF1,ENSG00000155087,protein_coding -28827,KLF10,ENSG00000155090,protein_coding -24537,PTPRN2,ENSG00000155093,protein_coding -28831,AZIN1,ENSG00000155096,protein_coding -28839,ATP6V1C1,ENSG00000155097,protein_coding -28588,PIP4P2,ENSG00000155099,protein_coding -28592,OTUD6B,ENSG00000155100,protein_coding -20602,CDK19,ENSG00000155111,protein_coding -20613,GTF3C6,ENSG00000155115,protein_coding -29666,TTC39B,ENSG00000155158,protein_coding -27070,AGPAT5,ENSG00000155189,protein_coding -36845,MMS19,ENSG00000155229,protein_coding -42015,OR4K1,ENSG00000155249,protein_coding -36852,PI4K2A,ENSG00000155252,protein_coding -36854,MARVELD1,ENSG00000155254,protein_coding -36855,ZFYVE27,ENSG00000155256,protein_coding -36858,GOLGA7B,ENSG00000155265,protein_coding -13068,GPR78,ENSG00000155269,protein_coding -13065,TRMT44,ENSG00000155275,protein_coding -36879,SLC25A28,ENSG00000155287,protein_coding -59835,HSPA13,ENSG00000155304,protein_coding -59836,SAMSN1,ENSG00000155307,protein_coding -59857,USP25,ENSG00000155313,protein_coding -17293,GRAMD2B,ENSG00000155324,protein_coding -17412,ZCCHC10,ENSG00000155329,protein_coding -47865,C16orf87,ENSG00000155330,protein_coding -2661,MOV10,ENSG00000155363,protein_coding -2663,RHOC,ENSG00000155366,protein_coding -2665,PPM1J,ENSG00000155367,protein_coding -7647,DBI,ENSG00000155368,protein_coding -2672,SLC16A1,ENSG00000155380,protein_coding -47942,HEATR3,ENSG00000155393,protein_coding -22865,TRIM74,ENSG00000155428,protein_coding -7689,NIFK,ENSG00000155438,protein_coding -42279,OXA1L,ENSG00000155463,protein_coding -42280,SLC7A7,ENSG00000155465,protein_coding -26657,MAGEC1,ENSG00000155495,protein_coding -17969,LARP1,ENSG00000155506,protein_coding -17975,CNOT8,ENSG00000155508,protein_coding -17949,GRIA1,ENSG00000155511,protein_coding -23957,LRGUK,ENSG00000155530,protein_coding -16330,SETD9,ENSG00000155542,protein_coding -16331,MIER3,ENSG00000155545,protein_coding -23983,NUP205,ENSG00000155561,protein_coding -47321,ZKSCAN2,ENSG00000155592,protein_coding -30460,C9orf85,ENSG00000155621,protein_coding -25340,XAGE2,ENSG00000155622,protein_coding -36819,PIK3AP1,ENSG00000155629,protein_coding -8522,RBM45,ENSG00000155636,protein_coding -8532,TTN,ENSG00000155657,protein_coding -25527,VSIG4,ENSG00000155659,protein_coding -24338,PDIA4,ENSG00000155660,protein_coding -47342,KDM8,ENSG00000155666,protein_coding -47237,PDZD9,ENSG00000155714,protein_coding -47228,OTOA,ENSG00000155719,protein_coding -8788,KCTD18,ENSG00000155729,protein_coding -8809,FAM126B,ENSG00000155744,protein_coding -8828,FLACC1,ENSG00000155749,protein_coding -8833,C2CD6,ENSG00000155754,protein_coding -8841,TMEM237,ENSG00000155755,protein_coding -8853,FZD7,ENSG00000155760,protein_coding -2785,SPAG17,ENSG00000155761,protein_coding -29004,DEPTOR,ENSG00000155792,protein_coding -5229,FMN2,ENSG00000155816,protein_coding -30976,RNF20,ENSG00000155827,protein_coding -30987,CYLC2,ENSG00000155833,protein_coding -17875,PPARGC1B,ENSG00000155846,protein_coding -22197,ELMO1,ENSG00000155849,protein_coding -17880,SLC26A2,ENSG00000155850,protein_coding -18016,LSM11,ENSG00000155858,protein_coding -17996,MED7,ENSG00000155868,protein_coding -29698,SAXO1,ENSG00000155875,protein_coding -29700,RRAGA,ENSG00000155876,protein_coding -29717,SLC24A2,ENSG00000155886,protein_coding -11845,TRIM42,ENSG00000155890,protein_coding -11852,PXYLP1,ENSG00000155893,protein_coding -29172,ADCY8,ENSG00000155897,protein_coding -11859,RASA2,ENSG00000155903,protein_coding -21217,RMND1,ENSG00000155906,protein_coding -21186,RAET1L,ENSG00000155918,protein_coding -29193,SLA,ENSG00000155926,protein_coding -39182,TMBIM4,ENSG00000155957,protein_coding -26952,VBP1,ENSG00000155959,protein_coding -26954,RAB39B,ENSG00000155961,protein_coding -26955,CLIC2,ENSG00000155962,protein_coding -26740,AFF2,ENSG00000155966,protein_coding -27383,MICU3,ENSG00000155970,protein_coding -39190,GRIP1,ENSG00000155974,protein_coding -27388,VPS37A,ENSG00000155975,protein_coding -39012,KIF5A,ENSG00000155980,protein_coding -27419,NAT2,ENSG00000156006,protein_coding -26767,MAGEA8,ENSG00000156009,protein_coding -27420,PSD3,ENSG00000156011,protein_coding -30497,CARNMT1,ENSG00000156017,protein_coding -36327,MCU,ENSG00000156026,protein_coding -43248,ELMSAN1,ENSG00000156030,protein_coding -36352,CFAP70,ENSG00000156042,protein_coding -30527,GNA14,ENSG00000156049,protein_coding -43261,FAM161B,ENSG00000156050,protein_coding -30531,GNAQ,ENSG00000156052,protein_coding -39157,WIF1,ENSG00000156076,protein_coding -13864,UGT2B4,ENSG00000156096,protein_coding -2550,GPR61,ENSG00000156097,protein_coding -28561,MMP16,ENSG00000156103,protein_coding -36396,ADK,ENSG00000156110,protein_coding -36443,KCNMA1,ENSG00000156113,protein_coding -43317,BATF,ENSG00000156127,protein_coding -13903,DCK,ENSG00000156136,protein_coding -13911,ADAMTS3,ENSG00000156140,protein_coding -2576,ALX3,ENSG00000156150,protein_coding -28659,DPY19L4,ENSG00000156162,protein_coding -28665,NDUFAF6,ENSG00000156170,protein_coding -2612,DRAM2,ENSG00000156171,protein_coding -28678,C8orf37,ENSG00000156172,protein_coding -13977,PPEF2,ENSG00000156194,protein_coding -45916,CFAP161,ENSG00000156206,protein_coding -45987,ADAMTSL3,ENSG00000156218,protein_coding -13982,ART3,ENSG00000156219,protein_coding -46033,SLC28A1,ENSG00000156222,protein_coding -45968,WHAMM,ENSG00000156232,protein_coding -14019,CXCL13,ENSG00000156234,protein_coding -60016,N6AMT1,ENSG00000156239,protein_coding -60021,RWDD2B,ENSG00000156253,protein_coding -60023,USP16,ENSG00000156256,protein_coding -60024,CCT8,ENSG00000156261,protein_coding -60027,MAP3K7CL,ENSG00000156265,protein_coding -14044,NAA11,ENSG00000156269,protein_coding -60033,BACH1,ENSG00000156273,protein_coding -60044,CLDN17,ENSG00000156282,protein_coding -60046,CLDN8,ENSG00000156284,protein_coding -25008,TSPAN7,ENSG00000156298,protein_coding -60093,TIAM1,ENSG00000156299,protein_coding -60102,SCAF4,ENSG00000156304,protein_coding -25001,RPGR,ENSG00000156313,protein_coding -30666,CDK20,ENSG00000156345,protein_coding -37005,PCGF6,ENSG00000156374,protein_coding -43893,ANKRD9,ENSG00000156381,protein_coding -37030,SFR1,ENSG00000156384,protein_coding -37044,SORCS3,ENSG00000156395,protein_coding -36983,SFXN2,ENSG00000156398,protein_coding -43952,ATP5MPL,ENSG00000156411,protein_coding -55069,FUT6,ENSG00000156413,protein_coding -43953,TDRD9,ENSG00000156414,protein_coding -18176,FGF18,ENSG00000156427,protein_coding -17740,PCDH1,ENSG00000156453,protein_coding -17792,SH3RF2,ENSG00000156463,protein_coding -28687,GDF6,ENSG00000156466,protein_coding -28688,UQCRB,ENSG00000156467,protein_coding -28690,MTERF3,ENSG00000156469,protein_coding -28691,PTDSS1,ENSG00000156471,protein_coding -17810,PPP2R2B,ENSG00000156475,protein_coding -28712,RPL30,ENSG00000156482,protein_coding -28724,KCNS2,ENSG00000156486,protein_coding -26519,FAM122C,ENSG00000156500,protein_coding -36247,SUPV3L1,ENSG00000156502,protein_coding -26518,FAM122B,ENSG00000156504,protein_coding -20142,EEF1A1,ENSG00000156508,protein_coding -28756,FBXO43,ENSG00000156509,protein_coding -36249,HKDC1,ENSG00000156510,protein_coding -36251,HK1,ENSG00000156515,protein_coding -36272,TYSND1,ENSG00000156521,protein_coding -26502,PHF6,ENSG00000156531,protein_coding -20148,CD109,ENSG00000156535,protein_coding -19654,LRFN2,ENSG00000156564,protein_coding -36281,NODAL,ENSG00000156574,protein_coding -33114,PRG3,ENSG00000156575,protein_coding -33126,UBE2L6,ENSG00000156587,protein_coding -33133,ZDHHC5,ENSG00000156599,protein_coding -33134,MED19,ENSG00000156603,protein_coding -19616,ZFAND3,ENSG00000156639,protein_coding -45664,NPTN,ENSG00000156642,protein_coding -36408,KAT6B,ENSG00000156650,protein_coding -36415,SAMD8,ENSG00000156671,protein_coding -27778,RAB11FIP1,ENSG00000156675,protein_coding -27738,UNC5D,ENSG00000156687,protein_coding -33188,GLYATL2,ENSG00000156689,protein_coding -26426,UTP14A,ENSG00000156697,protein_coding -26429,AIFM1,ENSG00000156709,protein_coding -19566,MAPK13,ENSG00000156711,protein_coding -27796,BAG4,ENSG00000156735,protein_coding -33259,MS4A1,ENSG00000156738,protein_coding -29035,TBC1D31,ENSG00000156787,protein_coding -29056,WDYHV1,ENSG00000156795,protein_coding -29050,ATAD2,ENSG00000156802,protein_coding -29058,FBXO32,ENSG00000156804,protein_coding -29097,NSMCE2,ENSG00000156831,protein_coding -47543,ZNF689,ENSG00000156853,protein_coding -47547,PRR14,ENSG00000156858,protein_coding -47548,FBRS,ENSG00000156860,protein_coding -2406,FRRS1,ENSG00000156869,protein_coding -47557,PHKG2,ENSG00000156873,protein_coding -2416,MFSD14A,ENSG00000156875,protein_coding -2418,SASS6,ENSG00000156876,protein_coding -47612,COX6A2,ENSG00000156885,protein_coding -47610,ITGAD,ENSG00000156886,protein_coding -26580,ADGRG4,ENSG00000156920,protein_coding -26606,ZIC3,ENSG00000156925,protein_coding -21912,MALSU1,ENSG00000156928,protein_coding -12549,VPS8,ENSG00000156931,protein_coding -45118,GALK2,ENSG00000156958,protein_coding -9761,LHFPL4,ENSG00000156959,protein_coding -9387,B3GNT7,ENSG00000156966,protein_coding -47051,MPV17L,ENSG00000156968,protein_coding -44820,BUB1B,ENSG00000156970,protein_coding -9409,PDE6D,ENSG00000156973,protein_coding -12591,EIF4A2,ENSG00000156976,protein_coding -9766,BRPF1,ENSG00000156983,protein_coding -9774,RPUSD3,ENSG00000156990,protein_coding -12616,SST,ENSG00000157005,protein_coding -9798,TATDN2,ENSG00000157014,protein_coding -9801,GHRL,ENSG00000157017,protein_coding -9804,SEC13,ENSG00000157020,protein_coding -10189,EXOG,ENSG00000157036,protein_coding -47038,NTAN1,ENSG00000157045,protein_coding -4130,SHCBP1L,ENSG00000157060,protein_coding -4140,NMNAT2,ENSG00000157064,protein_coding -1641,ZFYVE9,ENSG00000157077,protein_coding -9805,ATP2B2,ENSG00000157087,protein_coding -10268,LYZL4,ENSG00000157093,protein_coding -9814,SLC6A1,ENSG00000157103,protein_coding -47138,SMG1,ENSG00000157106,protein_coding -16569,FCHO2,ENSG00000157107,protein_coding -27675,RBPMS,ENSG00000157110,protein_coding -16572,TMEM171,ENSG00000157111,protein_coding -10279,KLHL40,ENSG00000157119,protein_coding -1775,C8A,ENSG00000157131,protein_coding -9836,TIMP4,ENSG00000157150,protein_coding -9834,SYN2,ENSG00000157152,protein_coding -27698,NRG1,ENSG00000157168,protein_coding -4199,ODR4,ENSG00000157181,protein_coding -1685,CPT2,ENSG00000157184,protein_coding -556,NECAP2,ENSG00000157191,protein_coding -1691,LRP8,ENSG00000157193,protein_coding -1719,CDCP2,ENSG00000157211,protein_coding -24479,PAXIP1,ENSG00000157212,protein_coding -23141,STEAP2,ENSG00000157214,protein_coding -1723,SSBP3,ENSG00000157216,protein_coding -24484,HTR5A,ENSG00000157219,protein_coding -23149,CLDN12,ENSG00000157224,protein_coding -42282,MMP14,ENSG00000157227,protein_coding -23157,FZD1,ENSG00000157240,protein_coding -23177,GATAD1,ENSG00000157259,protein_coding -30773,SUSD3,ENSG00000157303,protein_coding -48423,TMED6,ENSG00000157315,protein_coding -48440,CLEC18A,ENSG00000157322,protein_coding -42345,DHRS4,ENSG00000157326,protein_coding -441,C1orf158,ENSG00000157330,protein_coding -48449,CLEC18C,ENSG00000157335,protein_coding -19555,ARMC12,ENSG00000157343,protein_coding -48455,DDX19B,ENSG00000157349,protein_coding -48460,ST3GAL2,ENSG00000157350,protein_coding -48464,FCSK,ENSG00000157353,protein_coding -48469,IL34,ENSG00000157368,protein_coding -42386,DHRS1,ENSG00000157379,protein_coding -10673,CACNA1D,ENSG00000157388,protein_coding -24598,ARSE,ENSG00000157399,protein_coding -13645,KIT,ENSG00000157404,protein_coding -48480,HYDIN,ENSG00000157423,protein_coding -13683,AASDH,ENSG00000157426,protein_coding -48494,ZNF19,ENSG00000157429,protein_coding -10687,CACNA2D3,ENSG00000157445,protein_coding -45319,RNF111,ENSG00000157450,protein_coding -45325,CCNB2,ENSG00000157456,protein_coding -45335,FAM81A,ENSG00000157470,protein_coding -45328,MYO1E,ENSG00000157483,protein_coding -10715,APPL1,ENSG00000157500,protein_coding -26087,PWWP3B,ENSG00000157502,protein_coding -17856,AFAP1L1,ENSG00000157510,protein_coding -26112,TSC22D3,ENSG00000157514,protein_coding -60243,VPS26C,ENSG00000157538,protein_coding -60247,DYRK1A,ENSG00000157540,protein_coding -60249,KCNJ6,ENSG00000157542,protein_coding -60254,KCNJ15,ENSG00000157551,protein_coding -60260,ERG,ENSG00000157554,protein_coding -60264,ETS2,ENSG00000157557,protein_coding -32787,TSPAN18,ENSG00000157570,protein_coding -60290,LCA5L,ENSG00000157578,protein_coding -19794,SLC35B2,ENSG00000157593,protein_coding -26143,TMEM164,ENSG00000157600,protein_coding -60317,MX1,ENSG00000157601,protein_coding -32825,CREB3L1,ENSG00000157613,protein_coding -60334,C2CD2,ENSG00000157617,protein_coding -24933,TAB3,ENSG00000157625,protein_coding -51926,SLC38A10,ENSG00000157637,protein_coding -31160,C9orf43,ENSG00000157653,protein_coding -31084,PALM2-AKAP2,ENSG00000157654,protein_coding -31168,ZNF618,ENSG00000157657,protein_coding -24010,DGKI,ENSG00000157680,protein_coding -31181,TMEM268,ENSG00000157693,protein_coding -24030,SVOPL,ENSG00000157703,protein_coding -45431,SNX22,ENSG00000157734,protein_coding -24047,UBN2,ENSG00000157741,protein_coding -24081,BRAF,ENSG00000157764,protein_coding -13304,SLC34A2,ENSG00000157765,protein_coding -46096,ACAN,ENSG00000157766,protein_coding -21595,PSMG3,ENSG00000157778,protein_coding -40137,CABP1,ENSG00000157782,protein_coding -13423,WDR19,ENSG00000157796,protein_coding -24068,SLC37A3,ENSG00000157800,protein_coding -46135,AP3S2,ENSG00000157823,protein_coding -8138,FMNL2,ENSG00000157827,protein_coding -5880,GAREM2,ENSG00000157833,protein_coding -40147,SPPL3,ENSG00000157837,protein_coding -5896,DPYSL5,ENSG00000157851,protein_coding -5886,DRC1,ENSG00000157856,protein_coding -13177,RAB28,ENSG00000157869,protein_coding -170,PRXL2B,ENSG00000157870,protein_coding -167,TNFRSF14,ENSG00000157873,protein_coding -163,PANK4,ENSG00000157881,protein_coding -5889,CIB4,ENSG00000157884,protein_coding -45498,MEGF11,ENSG00000157890,protein_coding -40154,C12orf43,ENSG00000157895,protein_coding -159,PEX10,ENSG00000157911,protein_coding -158,RER1,ENSG00000157916,protein_coding -21643,RADIL,ENSG00000157927,protein_coding -152,SKI,ENSG00000157933,protein_coding -21657,WIPI2,ENSG00000157954,protein_coding -824,LDLRAP1,ENSG00000157978,protein_coding -9505,AGAP1,ENSG00000157985,protein_coding -5932,KRTCAP3,ENSG00000157992,protein_coding -21685,ANKRD61,ENSG00000157999,protein_coding -841,PAFAH2,ENSG00000158006,protein_coding -845,EXTL1,ENSG00000158008,protein_coding -846,SLC30A2,ENSG00000158014,protein_coding -5949,BABAM2,ENSG00000158019,protein_coding -848,TRIM63,ENSG00000158022,protein_coding -40190,WDR66,ENSG00000158023,protein_coding -32185,MRPL17,ENSG00000158042,protein_coding -7187,DUSP2,ENSG00000158050,protein_coding -789,GRHL3,ENSG00000158055,protein_coding -861,UBXN11,ENSG00000158062,protein_coding -32201,NLRP14,ENSG00000158077,protein_coding -30791,PTPDC1,ENSG00000158079,protein_coding -5999,GALNT14,ENSG00000158089,protein_coding -11781,NCK1,ENSG00000158092,protein_coding -40182,HPD,ENSG00000158104,protein_coding -29344,RHPN1,ENSG00000158106,protein_coding -194,TPRG1L,ENSG00000158109,protein_coding -40195,LRRC43,ENSG00000158113,protein_coding -30863,PRXL2C,ENSG00000158122,protein_coding -6005,XDH,ENSG00000158125,protein_coding -943,XKR8,ENSG00000158156,protein_coding -7203,CNNM4,ENSG00000158158,protein_coding -944,EYA3,ENSG00000158161,protein_coding -11794,DZIP1L,ENSG00000158163,protein_coding -26003,TMSB15A,ENSG00000158164,protein_coding -30826,FANCC,ENSG00000158169,protein_coding -11801,MRAS,ENSG00000158186,protein_coding -916,WASF2,ENSG00000158195,protein_coding -52443,ABHD3,ENSG00000158201,protein_coding -11802,ESYT3,ENSG00000158220,protein_coding -11805,FAIM,ENSG00000158234,protein_coding -897,TENT5B,ENSG00000158246,protein_coding -11840,CLSTN2,ENSG00000158258,protein_coding -52065,COLEC12,ENSG00000158270,protein_coding -238,RNF207,ENSG00000158286,protein_coding -26310,CUL4B,ENSG00000158290,protein_coding -242,GPR153,ENSG00000158292,protein_coding -54325,SLC13A3,ENSG00000158296,protein_coding -26014,GPRASP2,ENSG00000158301,protein_coding -1251,RHBDL2,ENSG00000158315,protein_coding -22843,AUTS2,ENSG00000158321,protein_coding -25303,SHROOM4,ENSG00000158352,protein_coding -18943,HIST1H2BD,ENSG00000158373,protein_coding -17544,CDC25C,ENSG00000158402,protein_coding -18968,HIST1H4H,ENSG00000158406,protein_coding -7272,MITD1,ENSG00000158411,protein_coding -7277,EIF5B,ENSG00000158417,protein_coding -25399,RIBC1,ENSG00000158423,protein_coding -26067,TMSB15B,ENSG00000158427,protein_coding -9142,CATIP,ENSG00000158428,protein_coding -7306,CNOT11,ENSG00000158435,protein_coding -54389,KCNB1,ENSG00000158445,protein_coding -23862,TSPAN33,ENSG00000158457,protein_coding -17601,NRG2,ENSG00000158458,protein_coding -23866,AHCYL2,ENSG00000158467,protein_coding -54392,B4GALT5,ENSG00000158470,protein_coding -3537,CD1D,ENSG00000158473,protein_coding -3540,CD1A,ENSG00000158477,protein_coding -54398,SPATA2,ENSG00000158480,protein_coding -3542,CD1C,ENSG00000158481,protein_coding -33825,FAM86C1,ENSG00000158483,protein_coding -3543,CD1B,ENSG00000158485,protein_coding -47202,DNAH3,ENSG00000158486,protein_coding -3544,CD1E,ENSG00000158488,protein_coding -17775,HMHB1,ENSG00000158497,protein_coding -23892,CPA2,ENSG00000158516,protein_coding -22930,NCF1,ENSG00000158517,protein_coding -23894,CPA5,ENSG00000158525,protein_coding -25418,TSR2,ENSG00000158526,protein_coding -23224,PPP1R9A,ENSG00000158528,protein_coding -48910,ZC3H18,ENSG00000158545,protein_coding -9189,ZFAND2B,ENSG00000158552,protein_coding -19008,POM121L2,ENSG00000158553,protein_coding -33975,GDPD5,ENSG00000158555,protein_coding -23240,DYNC1I1,ENSG00000158560,protein_coding -25428,PFKFB1,ENSG00000158571,protein_coding -25430,ALAS2,ENSG00000158578,protein_coding -22338,TMED4,ENSG00000158604,protein_coding -4476,PPP1R15B,ENSG00000158615,protein_coding -23905,COPG2,ENSG00000158623,protein_coding -34005,EMSY,ENSG00000158636,protein_coding -25443,PAGE5,ENSG00000158639,protein_coding -27863,GPAT4,ENSG00000158669,protein_coding -22399,PKD1L1,ENSG00000158683,protein_coding -19082,ZSCAN12,ENSG00000158691,protein_coding -3612,TAGLN2,ENSG00000158710,protein_coding -4511,ELK4,ENSG00000158711,protein_coding -3602,SLAMF8,ENSG00000158714,protein_coding -4513,SLC45A3,ENSG00000158715,protein_coding -3600,DUSP23,ENSG00000158716,protein_coding -48916,RNF166,ENSG00000158717,protein_coding -641,NBL1,ENSG00000158747,protein_coding -644,HTR6,ENSG00000158748,protein_coding -3658,ITLN2,ENSG00000158764,protein_coding -3659,F11R,ENSG00000158769,protein_coding -3663,USF1,ENSG00000158773,protein_coding -654,PLA2G2F,ENSG00000158786,protein_coding -48965,SPATA2L,ENSG00000158792,protein_coding -3669,NIT1,ENSG00000158793,protein_coding -3670,DEDD,ENSG00000158796,protein_coding -48968,ZNF276,ENSG00000158805,protein_coding -27465,NPM2,ENSG00000158806,protein_coding -25561,EDA,ENSG00000158813,protein_coding -27466,FGF17,ENSG00000158815,protein_coding -661,VWA5B1,ENSG00000158816,protein_coding -671,CDA,ENSG00000158825,protein_coding -672,PINK1,ENSG00000158828,protein_coding -3676,B4GALT3,ENSG00000158850,protein_coding -27467,DMTN,ENSG00000158856,protein_coding -3677,ADAMTS4,ENSG00000158859,protein_coding -27468,FAM160B2,ENSG00000158863,protein_coding -3678,NDUFS2,ENSG00000158864,protein_coding -47307,SLC5A11,ENSG00000158865,protein_coding -3679,FCER1G,ENSG00000158869,protein_coding -3680,APOA2,ENSG00000158874,protein_coding -3681,TOMM40L,ENSG00000158882,protein_coding -3685,MPZ,ENSG00000158887,protein_coding -54276,WFDC8,ENSG00000158901,protein_coding -27494,CCAR2,ENSG00000158941,protein_coding -50901,WNT9B,ENSG00000158955,protein_coding -1887,CACHD1,ENSG00000158966,protein_coding -17354,CDC42SE2,ENSG00000158985,protein_coding -17356,RAPGEF6,ENSG00000158987,protein_coding -986,EPB41,ENSG00000159023,protein_coding -60110,MIS18A,ENSG00000159055,protein_coding -34050,ALG8,ENSG00000159063,protein_coding -31714,FBXW5,ENSG00000159069,protein_coding -60124,CFAP298,ENSG00000159079,protein_coding -60127,SYNJ1,ENSG00000159082,protein_coding -60129,PAXBP1,ENSG00000159086,protein_coding -60144,IFNAR2,ENSG00000159110,protein_coding -50933,MRPL10,ENSG00000159111,protein_coding -60150,IFNGR2,ENSG00000159128,protein_coding -60155,GART,ENSG00000159131,protein_coding -60157,SON,ENSG00000159140,protein_coding -60159,DONSON,ENSG00000159147,protein_coding -3102,SV2A,ENSG00000159164,protein_coding -4364,LAD1,ENSG00000159166,protein_coding -27537,STC1,ENSG00000159167,protein_coding -4366,TNNI1,ENSG00000159173,protein_coding -4370,CSRP1,ENSG00000159176,protein_coding -50988,PRAC1,ENSG00000159182,protein_coding -50993,HOXB13,ENSG00000159184,protein_coding -732,C1QC,ENSG00000159189,protein_coding -60174,KCNE2,ENSG00000159197,protein_coding -51001,ATP5MC1,ENSG00000159199,protein_coding -60182,RCAN1,ENSG00000159200,protein_coding -51003,UBE2Z,ENSG00000159202,protein_coding -3118,CIART,ENSG00000159208,protein_coding -51004,SNF8,ENSG00000159210,protein_coding -60183,CLIC6,ENSG00000159212,protein_coding -1426,CCDC24,ENSG00000159214,protein_coding -60186,RUNX1,ENSG00000159216,protein_coding -51010,IGF2BP1,ENSG00000159217,protein_coding -51008,GIP,ENSG00000159224,protein_coding -60202,CBR1,ENSG00000159228,protein_coding -60208,CBR3,ENSG00000159231,protein_coding -6735,AC005041.1,ENSG00000159239,protein_coding -44732,GJD2,ENSG00000159248,protein_coding -44734,ACTC1,ENSG00000159251,protein_coding -60213,MORC3,ENSG00000159256,protein_coding -60215,CHAF1B,ENSG00000159259,protein_coding -60219,CLDN14,ENSG00000159261,protein_coding -60226,SIM2,ENSG00000159263,protein_coding -60227,HLCS,ENSG00000159267,protein_coding -45678,GOLGA6A,ENSG00000159289,protein_coding -59455,SCUBE1,ENSG00000159307,protein_coding -50853,ARHGAP27,ENSG00000159314,protein_coding -45652,ADPGK,ENSG00000159322,protein_coding -37668,PTMS,ENSG00000159335,protein_coding -44917,PLA2G4D,ENSG00000159337,protein_coding -602,PADI4,ENSG00000159339,protein_coding -4424,ADIPOR1,ENSG00000159346,protein_coding -4425,CYB5R1,ENSG00000159348,protein_coding -3170,PSMD4,ENSG00000159352,protein_coding -594,ATP13A2,ENSG00000159363,protein_coding -6760,M1AP,ENSG00000159374,protein_coding -3180,PSMB4,ENSG00000159377,protein_coding -48056,IRX6,ENSG00000159387,protein_coding -4440,BTG2,ENSG00000159388,protein_coding -48071,CES5A,ENSG00000159398,protein_coding -6768,HK2,ENSG00000159399,protein_coding -37700,C1R,ENSG00000159403,protein_coding -3193,CELF3,ENSG00000159409,protein_coding -620,ALDH4A1,ENSG00000159423,protein_coding -44936,STARD9,ENSG00000159433,protein_coding -3212,THEM4,ENSG00000159445,protein_coding -3223,TCHH,ENSG00000159450,protein_coding -3245,LCE2B,ENSG00000159455,protein_coding -44946,UBR1,ENSG00000159459,protein_coding -48082,AMFR,ENSG00000159461,protein_coding -1402,MED8,ENSG00000159479,protein_coding -44959,TGM7,ENSG00000159495,protein_coding -58772,RGL4,ENSG00000159496,protein_coding -3276,SPRR2G,ENSG00000159516,protein_coding -3282,PGLYRP3,ENSG00000159527,protein_coding -45788,ISL2,ENSG00000159556,protein_coding -48127,RSPRY1,ENSG00000159579,protein_coding -1491,CCDC17,ENSG00000159588,protein_coding -1492,GPBP1L1,ENSG00000159592,protein_coding -48273,NAE1,ENSG00000159593,protein_coding -1495,TMEM69,ENSG00000159596,protein_coding -48143,ADGRG5,ENSG00000159618,protein_coding -48149,DRC7,ENSG00000159625,protein_coding -51393,ACE,ENSG00000159640,protein_coding -48158,TEPP,ENSG00000159648,protein_coding -11560,UROC1,ENSG00000159650,protein_coding -1528,EFCAB14,ENSG00000159658,protein_coding -12903,SPON2,ENSG00000159674,protein_coding -11568,CHCHD6,ENSG00000159685,protein_coding -12906,CTBP1,ENSG00000159692,protein_coding -48309,LRRC36,ENSG00000159708,protein_coding -48310,TPPP3,ENSG00000159713,protein_coding -48312,ZDHHC1,ENSG00000159714,protein_coding -48315,ATP6V0D1,ENSG00000159720,protein_coding -48317,AGRP,ENSG00000159723,protein_coding -12936,ZFYVE28,ENSG00000159733,protein_coding -48328,CARMIL2,ENSG00000159753,protein_coding -48332,C16orf86,ENSG00000159761,protein_coding -24221,PIP,ENSG00000159763,protein_coding -24234,FAM131B,ENSG00000159784,protein_coding -12961,RGS12,ENSG00000159788,protein_coding -48344,PSKH1,ENSG00000159792,protein_coding -24237,ZYX,ENSG00000159840,protein_coding -49025,ABR,ENSG00000159842,protein_coding -56702,LYPD5,ENSG00000159871,protein_coding -58936,CCDC117,ENSG00000159873,protein_coding -56716,ZNF230,ENSG00000159882,protein_coding -30014,CCDC107,ENSG00000159884,protein_coding -56718,ZNF222,ENSG00000159885,protein_coding -30028,NPR2,ENSG00000159899,protein_coding -56711,ZNF221,ENSG00000159905,protein_coding -56732,ZNF233,ENSG00000159915,protein_coding -56730,ZNF235,ENSG00000159917,protein_coding -30052,GNE,ENSG00000159921,protein_coding -59401,TNFRSF13C,ENSG00000159958,protein_coding -49129,OR3A3,ENSG00000159961,protein_coding -56881,ARHGAP35,ENSG00000160007,protein_coding -56863,PTGIR,ENSG00000160013,protein_coding -56861,CALM3,ENSG00000160014,protein_coding -362,DFFA,ENSG00000160049,protein_coding -1059,CCDC28B,ENSG00000160050,protein_coding -1061,IQCC,ENSG00000160051,protein_coding -1063,TMEM234,ENSG00000160055,protein_coding -1075,BSDC1,ENSG00000160058,protein_coding -1082,ZBTB8A,ENSG00000160062,protein_coding -117,ATAD3B,ENSG00000160072,protein_coding -121,SSU72,ENSG00000160075,protein_coding -90,UBE2J2,ENSG00000160087,protein_coding -1107,ZNF362,ENSG00000160094,protein_coding -1091,FNDC5,ENSG00000160097,protein_coding -55691,CPAMD8,ENSG00000160111,protein_coding -55704,NR2F6,ENSG00000160113,protein_coding -55709,ANKLE1,ENSG00000160117,protein_coding -11461,CCDC58,ENSG00000160124,protein_coding -26792,VMA21,ENSG00000160131,protein_coding -11492,KALRN,ENSG00000160145,protein_coding -55833,CILP2,ENSG00000160161,protein_coding -60339,ABCG1,ENSG00000160179,protein_coding -60341,TFF3,ENSG00000160180,protein_coding -60342,TFF2,ENSG00000160181,protein_coding -60343,TFF1,ENSG00000160182,protein_coding -60344,TMPRSS3,ENSG00000160183,protein_coding -60345,UBASH3A,ENSG00000160185,protein_coding -60347,RSPH1,ENSG00000160188,protein_coding -60348,SLC37A1,ENSG00000160190,protein_coding -60354,PDE9A,ENSG00000160191,protein_coding -60359,WDR4,ENSG00000160193,protein_coding -60360,NDUFV3,ENSG00000160194,protein_coding -60364,PKNOX1,ENSG00000160199,protein_coding -60365,CBS,ENSG00000160200,protein_coding -60366,U2AF1,ENSG00000160201,protein_coding -60371,CRYAA,ENSG00000160202,protein_coding -60379,HSF2BP,ENSG00000160207,protein_coding -60383,RRP1B,ENSG00000160208,protein_coding -60384,PDXK,ENSG00000160209,protein_coding -26918,G6PD,ENSG00000160211,protein_coding -60387,CSTB,ENSG00000160213,protein_coding -60389,RRP1,ENSG00000160214,protein_coding -60392,AGPAT3,ENSG00000160216,protein_coding -60395,TRAPPC10,ENSG00000160218,protein_coding -26932,GAB3,ENSG00000160219,protein_coding -60398,GATD3A,ENSG00000160221,protein_coding -60403,ICOSLG,ENSG00000160223,protein_coding -60408,AIRE,ENSG00000160224,protein_coding -60410,CFAP410,ENSG00000160226,protein_coding -55900,ZNF66,ENSG00000160229,protein_coding -60420,LRRC3,ENSG00000160233,protein_coding -60454,ITGB2,ENSG00000160255,protein_coding -60459,FAM207A,ENSG00000160256,protein_coding -31567,RALGDS,ENSG00000160271,protein_coding -60499,FTCD,ENSG00000160282,protein_coding -60501,SPATC1L,ENSG00000160284,protein_coding -60503,LSS,ENSG00000160285,protein_coding -31599,VAV2,ENSG00000160293,protein_coding -60506,MCM3AP,ENSG00000160294,protein_coding -60510,C21orf58,ENSG00000160298,protein_coding -60511,PCNT,ENSG00000160299,protein_coding -60514,DIP2A,ENSG00000160305,protein_coding -60517,S100B,ENSG00000160307,protein_coding -60518,PRMT2,ENSG00000160310,protein_coding -57176,CLDND2,ENSG00000160318,protein_coding -55947,ZNF208,ENSG00000160321,protein_coding -31589,ADAMTS13,ENSG00000160323,protein_coding -31590,CACFD1,ENSG00000160325,protein_coding -31591,SLC2A6,ENSG00000160326,protein_coding -57311,ZNF761,ENSG00000160336,protein_coding -31626,FCN2,ENSG00000160339,protein_coding -31638,C9orf116,ENSG00000160345,protein_coding -31641,LCN1,ENSG00000160349,protein_coding -55910,ZNF714,ENSG00000160352,protein_coding -31664,GPSM1,ENSG00000160360,protein_coding -56524,C19orf47,ENSG00000160392,protein_coding -56530,HIPK4,ENSG00000160396,protein_coding -31382,CFAP157,ENSG00000160401,protein_coding -31384,TOR2A,ENSG00000160404,protein_coding -31398,ST6GALNAC6,ENSG00000160408,protein_coding -56539,SHKBP1,ENSG00000160410,protein_coding -57481,RDH13,ENSG00000160439,protein_coding -31448,ZER1,ENSG00000160445,protein_coding -31446,ZDHHC12,ENSG00000160446,protein_coding -31444,PKN3,ENSG00000160447,protein_coding -56538,SPTBN4,ENSG00000160460,protein_coding -57504,BRSK1,ENSG00000160469,protein_coding -57509,COX6B2,ENSG00000160471,protein_coding -57513,TMEM190,ENSG00000160472,protein_coding -57551,NLRP4,ENSG00000160505,protein_coding -31530,PLPP7,ENSG00000160539,protein_coding -50065,TAOK1,ENSG00000160551,protein_coding -31545,MED27,ENSG00000160563,protein_coding -56631,DEDD2,ENSG00000160570,protein_coding -34703,SIK3,ENSG00000160584,protein_coding -34740,MPZL3,ENSG00000160588,protein_coding -34738,JAML,ENSG00000160593,protein_coding -50026,AC010761.1,ENSG00000160602,protein_coding -50025,TLCD1,ENSG00000160606,protein_coding -34713,PCSK7,ENSG00000160613,protein_coding -55059,SAFB,ENSG00000160633,protein_coding -34746,CD3G,ENSG00000160654,protein_coding -3306,S100A1,ENSG00000160678,protein_coding -3308,CHTOP,ENSG00000160679,protein_coding -34774,CXCR5,ENSG00000160683,protein_coding -3386,ZBTB7B,ENSG00000160685,protein_coding -3384,FLAD1,ENSG00000160688,protein_coding -3381,SHC1,ENSG00000160691,protein_coding -34797,VPS11,ENSG00000160695,protein_coding -34804,NLRX1,ENSG00000160703,protein_coding -3372,ADAR,ENSG00000160710,protein_coding -3362,IL6R,ENSG00000160712,protein_coding -3367,UBE2Q1,ENSG00000160714,protein_coding -3370,CHRNB2,ENSG00000160716,protein_coding -3327,CRTC2,ENSG00000160741,protein_coding -10306,ANO10,ENSG00000160746,protein_coding -3416,FDPS,ENSG00000160752,protein_coding -3418,RUSC1,ENSG00000160753,protein_coding -3411,FAM189B,ENSG00000160767,protein_coding -3464,PAQR6,ENSG00000160781,protein_coding -3462,PMF1,ENSG00000160783,protein_coding -3460,SLC25A44,ENSG00000160785,protein_coding -3457,LMNA,ENSG00000160789,protein_coding -10375,CCR5,ENSG00000160791,protein_coding -10402,NBEAL2,ENSG00000160796,protein_coding -10401,CCDC12,ENSG00000160799,protein_coding -10399,PTH1R,ENSG00000160801,protein_coding -3453,UBQLN4,ENSG00000160803,protein_coding -10398,MYL3,ENSG00000160808,protein_coding -23378,PPP1R35,ENSG00000160813,protein_coding -3482,GPATCH4,ENSG00000160818,protein_coding -3504,LRRC71,ENSG00000160838,protein_coding -3524,FCRL3,ENSG00000160856,protein_coding -23337,AZGP1,ENSG00000160862,protein_coding -18307,FGFR4,ENSG00000160867,protein_coding -23325,CYP3A4,ENSG00000160868,protein_coding -23322,CYP3A7,ENSG00000160870,protein_coding -55502,NACC1,ENSG00000160877,protein_coding -29324,CYP11B1,ENSG00000160882,protein_coding -18303,HK3,ENSG00000160883,protein_coding -29308,LY6K,ENSG00000160886,protein_coding -55506,IER2,ENSG00000160888,protein_coding -23313,ZNF394,ENSG00000160908,protein_coding -23309,CPSF4,ENSG00000160917,protein_coding -29328,LY6E,ENSG00000160932,protein_coding -29422,VPS28,ENSG00000160948,protein_coding -29423,TONSL,ENSG00000160949,protein_coding -55556,PTGER1,ENSG00000160951,protein_coding -54842,PWWP3A,ENSG00000160953,protein_coding -29434,RECQL4,ENSG00000160957,protein_coding -29436,LRRC14,ENSG00000160959,protein_coding -55569,ZNF333,ENSG00000160961,protein_coding -23438,COL26A1,ENSG00000160963,protein_coding -29430,PPP1R16A,ENSG00000160972,protein_coding -29429,FOXH1,ENSG00000160973,protein_coding -23462,ORAI2,ENSG00000160991,protein_coding -23463,ALKBH4,ENSG00000160993,protein_coding -55585,CCDC105,ENSG00000160994,protein_coding -23451,SH2B2,ENSG00000160999,protein_coding -18410,MRNIP,ENSG00000161010,protein_coding -18409,SQSTM1,ENSG00000161011,protein_coding -18407,MGAT4B,ENSG00000161013,protein_coding -29448,RPL8,ENSG00000161016,protein_coding -18405,MAML1,ENSG00000161021,protein_coding -55608,PGLYRP2,ENSG00000161031,protein_coding -23464,LRWD1,ENSG00000161036,protein_coding -23485,FBXL13,ENSG00000161040,protein_coding -23493,NAPEPLD,ENSG00000161048,protein_coding -18433,SCGB3A1,ENSG00000161055,protein_coding -23501,PSMC2,ENSG00000161057,protein_coding -54956,CELF5,ENSG00000161082,protein_coding -54966,MFSD12,ENSG00000161091,protein_coding -58507,USP41,ENSG00000161133,protein_coding -58583,YDJC,ENSG00000161179,protein_coding -58584,CCDC116,ENSG00000161180,protein_coding -12521,DVL3,ENSG00000161202,protein_coding -12522,AP2M1,ENSG00000161203,protein_coding -12523,ABCF3,ENSG00000161204,protein_coding -12771,PCYT1A,ENSG00000161217,protein_coding -56451,AC010605.1,ENSG00000161243,protein_coding -56266,DMKN,ENSG00000161249,protein_coding -56287,U2AF1L4,ENSG00000161265,protein_coding -12821,BDH1,ENSG00000161267,protein_coding -56299,NPHS1,ENSG00000161270,protein_coding -56317,THAP8,ENSG00000161277,protein_coding -56326,COX7A1,ENSG00000161281,protein_coding -56345,ZNF382,ENSG00000161298,protein_coding -31832,LRRC56,ENSG00000161328,protein_coding -50466,PLXDC1,ENSG00000161381,protein_coding -50487,PGAP3,ENSG00000161395,protein_coding -50492,IKZF3,ENSG00000161405,protein_coding -51656,GRIN2C,ENSG00000161509,protein_coding -51657,FDXR,ENSG00000161513,protein_coding -51705,SAP30BP,ENSG00000161526,protein_coding -51723,ACOX1,ENSG00000161533,protein_coding -51742,PRPSAP1,ENSG00000161542,protein_coding -51751,CYGB,ENSG00000161544,protein_coding -51775,SRSF2,ENSG00000161547,protein_coding -57216,ZNF577,ENSG00000161551,protein_coding -56939,TMEM143,ENSG00000161558,protein_coding -50633,KLHL10,ENSG00000161594,protein_coding -57021,CCDC155,ENSG00000161609,protein_coding -50651,HCRT,ENSG00000161610,protein_coding -57026,ALDH16A1,ENSG00000161618,protein_coding -38848,DCD,ENSG00000161634,protein_coding -38839,ITGA5,ENSG00000161638,protein_coding -57078,SIGLEC11,ENSG00000161640,protein_coding -38838,ZNF385A,ENSG00000161642,protein_coding -50762,MPP3,ENSG00000161647,protein_coding -50763,CD300LG,ENSG00000161649,protein_coding -57089,IZUMO2,ENSG00000161652,protein_coding -50772,NAGS,ENSG00000161653,protein_coding -50775,LSM12,ENSG00000161654,protein_coding -50781,ASB16,ENSG00000161664,protein_coding -57103,EMC10,ENSG00000161671,protein_coding -57106,JOSD2,ENSG00000161677,protein_coding -57112,SHANK1,ENSG00000161681,protein_coding -50800,FAM171A2,ENSG00000161682,protein_coding -50814,DBF4B,ENSG00000161692,protein_coding -50835,PLCD3,ENSG00000161714,protein_coding -38599,FMNL3,ENSG00000161791,protein_coding -38614,AQP5,ENSG00000161798,protein_coding -38616,RACGAP1,ENSG00000161800,protein_coding -55235,OR7G1,ENSG00000161807,protein_coding -38632,LARP4,ENSG00000161813,protein_coding -38681,GRASP,ENSG00000161835,protein_coding -55312,RAVER1,ENSG00000161847,protein_coding -38706,KRT84,ENSG00000161849,protein_coding -38708,KRT82,ENSG00000161850,protein_coding -55486,SYCE2,ENSG00000161860,protein_coding -55350,SPC24,ENSG00000161888,protein_coding -19495,IP6K3,ENSG00000161896,protein_coding -19496,LEMD2,ENSG00000161904,protein_coding -49179,ALOX15,ENSG00000161905,protein_coding -19666,TREML1,ENSG00000161911,protein_coding -55373,ZNF653,ENSG00000161914,protein_coding -49185,MED11,ENSG00000161920,protein_coding -49187,CXCL16,ENSG00000161921,protein_coding -49227,SCIMP,ENSG00000161929,protein_coding -49281,RNASEK-C17orf49,ENSG00000161939,protein_coding -49286,BCL6B,ENSG00000161940,protein_coding -49291,ASGR2,ENSG00000161944,protein_coding -49343,TNFSF13,ENSG00000161955,protein_coding -49344,SENP3,ENSG00000161956,protein_coding -49332,FGF11,ENSG00000161958,protein_coding -49346,EIF4A1,ENSG00000161960,protein_coding -49414,RPL26,ENSG00000161970,protein_coding -49421,CCDC42,ENSG00000161973,protein_coding -46442,POLR3K,ENSG00000161980,protein_coding -46443,SNRNP25,ENSG00000161981,protein_coding -46481,PRR35,ENSG00000161992,protein_coding -46490,WDR90,ENSG00000161996,protein_coding -46497,JMJD8,ENSG00000161999,protein_coding -46505,CCDC78,ENSG00000162004,protein_coding -46510,MSLNL,ENSG00000162006,protein_coding -46527,SSTR5,ENSG00000162009,protein_coding -46589,SPSB3,ENSG00000162032,protein_coding -46594,MEIOB,ENSG00000162039,protein_coding -46598,HS3ST6,ENSG00000162040,protein_coding -46660,TEDC2,ENSG00000162062,protein_coding -46655,CCNF,ENSG00000162063,protein_coding -46664,TBC1D24,ENSG00000162065,protein_coding -46670,AMDHD2,ENSG00000162066,protein_coding -46663,NTN3,ENSG00000162068,protein_coding -46721,BICDL2,ENSG00000162069,protein_coding -46711,PAQR4,ENSG00000162073,protein_coding -46705,FLYWCH2,ENSG00000162076,protein_coding -46700,ZG16B,ENSG00000162078,protein_coding -46747,ZNF75A,ENSG00000162086,protein_coding -46777,ADCY9,ENSG00000162104,protein_coding -33790,SHANK2,ENSG00000162105,protein_coding -33859,CLPB,ENSG00000162129,protein_coding -33955,NEU3,ENSG00000162139,protein_coding -33294,CYB561A3,ENSG00000162144,protein_coding -33303,PPP1R32,ENSG00000162148,protein_coding -33343,ASRGL1,ENSG00000162174,protein_coding -33372,GNG3,ENSG00000162188,protein_coding -33368,UBXN1,ENSG00000162191,protein_coding -33364,LBHD1,ENSG00000162194,protein_coding -33374,TTC9C,ENSG00000162222,protein_coding -33378,TAF6L,ENSG00000162227,protein_coding -33383,NXF1,ENSG00000162231,protein_coding -33385,STX5,ENSG00000162236,protein_coding -33539,SLC25A45,ENSG00000162241,protein_coding -10609,RPL29,ENSG00000162244,protein_coding -10654,ITIH3,ENSG00000162267,protein_coding -33520,SYVN1,ENSG00000162298,protein_coding -33511,ZFPL1,ENSG00000162300,protein_coding -33472,RPS6KA4,ENSG00000162302,protein_coding -33732,LRP5,ENSG00000162337,protein_coding -33748,TPCN2,ENSG00000162341,protein_coding -33765,FGF19,ENSG00000162344,protein_coding -1542,CYP4A22,ENSG00000162365,protein_coding -1545,PDZK1IP1,ENSG00000162366,protein_coding -1546,TAL1,ENSG00000162367,protein_coding -1549,CMPK1,ENSG00000162368,protein_coding -1578,BEND5,ENSG00000162373,protein_coding -1587,ELAVL4,ENSG00000162374,protein_coding -1660,COA7,ENSG00000162377,protein_coding -1663,ZYG11B,ENSG00000162378,protein_coding -1682,SLC1A7,ENSG00000162383,protein_coding -1687,CZIB,ENSG00000162384,protein_coding -1689,MAGOH,ENSG00000162385,protein_coding -1731,ACOT11,ENSG00000162390,protein_coding -1732,FAM151A,ENSG00000162391,protein_coding -1737,PARS2,ENSG00000162396,protein_coding -1740,LEXM,ENSG00000162398,protein_coding -1748,BSND,ENSG00000162399,protein_coding -1750,USP24,ENSG00000162402,protein_coding -1768,PLPP3,ENSG00000162407,protein_coding -251,NOL9,ENSG00000162408,protein_coding -1772,PRKAA2,ENSG00000162409,protein_coding -256,KLHL21,ENSG00000162413,protein_coding -1473,ZSWIM5,ENSG00000162415,protein_coding -980,GMEB1,ENSG00000162419,protein_coding -292,SLC45A1,ENSG00000162426,protein_coding -828,SELENON,ENSG00000162430,protein_coding -1902,AK4,ENSG00000162433,protein_coding -1890,JAK1,ENSG00000162434,protein_coding -1889,RAVER2,ENSG00000162437,protein_coding -505,CTRC,ENSG00000162438,protein_coding -339,LZIC,ENSG00000162441,protein_coding -346,RBP7,ENSG00000162444,protein_coding -1521,KNCN,ENSG00000162456,protein_coding -525,FBLIM1,ENSG00000162458,protein_coding -524,TMEM82,ENSG00000162460,protein_coding -522,SLC25A34,ENSG00000162461,protein_coding -631,AKR7A3,ENSG00000162482,protein_coding -398,DRAXIN,ENSG00000162490,protein_coding -484,PDPN,ENSG00000162493,protein_coding -481,LRRC38,ENSG00000162494,protein_coding -434,DHRS3,ENSG00000162496,protein_coding -1007,MATN1,ENSG00000162510,protein_coding -1010,LAPTM5,ENSG00000162511,protein_coding -1016,SDC3,ENSG00000162512,protein_coding -1038,PEF1,ENSG00000162517,protein_coding -1086,SYNC,ENSG00000162520,protein_coding -1085,RBBP4,ENSG00000162521,protein_coding -1088,KIAA1522,ENSG00000162522,protein_coding -1073,TSSK3,ENSG00000162526,protein_coding -645,TMCO4,ENSG00000162542,protein_coding -658,UBXN10,ENSG00000162543,protein_coding -667,CAMK2N1,ENSG00000162545,protein_coding -699,ALPL,ENSG00000162551,protein_coding -722,WNT4,ENSG00000162552,protein_coding -83,TTLL10,ENSG00000162571,protein_coding -92,SCNN1D,ENSG00000162572,protein_coding -103,MXRA8,ENSG00000162576,protein_coding -150,FAAP20,ENSG00000162585,protein_coding -191,MEGF6,ENSG00000162591,protein_coding -201,CCDC27,ENSG00000162592,protein_coding -1929,IL23R,ENSG00000162594,protein_coding -1946,DIRAS3,ENSG00000162595,protein_coding -1812,C1orf87,ENSG00000162598,protein_coding -1817,NFIA,ENSG00000162599,protein_coding -1787,OMA1,ENSG00000162600,protein_coding -1793,MYSM1,ENSG00000162601,protein_coding -1823,TM2D1,ENSG00000162604,protein_coding -1839,USP1,ENSG00000162607,protein_coding -2076,FUBP1,ENSG00000162613,protein_coding -2075,NEXN,ENSG00000162614,protein_coding -2077,DNAJB4,ENSG00000162616,protein_coding -2094,ADGRL4,ENSG00000162618,protein_coding -2014,LRRIQ3,ENSG00000162620,protein_coding -2020,LRRC53,ENSG00000162621,protein_coding -2025,TYW3,ENSG00000162623,protein_coding -2029,LHX8,ENSG00000162624,protein_coding -2397,SNX7,ENSG00000162627,protein_coding -4268,B3GALT2,ENSG00000162630,protein_coding -2496,NTNG1,ENSG00000162631,protein_coding -2515,FAM102B,ENSG00000162636,protein_coding -2516,HENMT1,ENSG00000162639,protein_coding -2523,AKNAD1,ENSG00000162641,protein_coding -2164,C1orf52,ENSG00000162642,protein_coding -2159,WDR63,ENSG00000162643,protein_coding -2224,GBP2,ENSG00000162645,protein_coding -2547,ATXN7L2,ENSG00000162650,protein_coding -2227,GBP4,ENSG00000162654,protein_coding -2250,ZNF326,ENSG00000162664,protein_coding -2266,HFM1,ENSG00000162669,protein_coding -4233,BRINP3,ENSG00000162670,protein_coding -2289,GFI1,ENSG00000162676,protein_coding -4283,KCNT2,ENSG00000162687,protein_coding -2410,AGL,ENSG00000162688,protein_coding -2440,VCAM1,ENSG00000162692,protein_coding -2441,EXTL2,ENSG00000162694,protein_coding -2443,SLC30A7,ENSG00000162695,protein_coding -4342,ZNF281,ENSG00000162702,protein_coding -4148,ARPC5,ENSG00000162704,protein_coding -3576,CADM3,ENSG00000162706,protein_coding -5384,NLRP3,ENSG00000162711,protein_coding -5381,ZNF496,ENSG00000162714,protein_coding -5408,TRIM58,ENSG00000162722,protein_coding -3614,SLAMF9,ENSG00000162723,protein_coding -5428,OR2M5,ENSG00000162727,protein_coding -3621,KCNJ9,ENSG00000162728,protein_coding -3622,IGSF8,ENSG00000162729,protein_coding -3735,DDR2,ENSG00000162733,protein_coding -3627,PEA15,ENSG00000162734,protein_coding -3632,PEX19,ENSG00000162735,protein_coding -3636,NCSTN,ENSG00000162736,protein_coding -3639,VANGL2,ENSG00000162738,protein_coding -3640,SLAMF6,ENSG00000162739,protein_coding -3717,OLFML2B,ENSG00000162745,protein_coding -3711,FCRLB,ENSG00000162746,protein_coding -3705,FCGR3B,ENSG00000162747,protein_coding -3955,SLC9C2,ENSG00000162753,protein_coding -3667,KLHDC9,ENSG00000162755,protein_coding -4617,C1orf74,ENSG00000162757,protein_coding -3765,LMX1A,ENSG00000162761,protein_coding -3771,LRRC52,ENSG00000162763,protein_coding -4696,FLVCR1,ENSG00000162769,protein_coding -4685,FAM71A,ENSG00000162771,protein_coding -4683,ATF3,ENSG00000162772,protein_coding -2587,RBM15,ENSG00000162775,protein_coding -2616,DENND2D,ENSG00000162777,protein_coding -4055,AXDND1,ENSG00000162779,protein_coding -4063,TDRD5,ENSG00000162782,protein_coding -4095,IER5,ENSG00000162783,protein_coding -9619,SNED1,ENSG00000162804,protein_coding -4764,BPNT1,ENSG00000162813,protein_coding -4733,SPATA17,ENSG00000162814,protein_coding -4787,C1orf115,ENSG00000162817,protein_coding -4823,BROX,ENSG00000162819,protein_coding -2937,NBPF20,ENSG00000162825,protein_coding -3008,ACP6,ENSG00000162836,protein_coding -5261,WDR64,ENSG00000162843,protein_coding -5329,KIF26B,ENSG00000162849,protein_coding -5345,TFB2M,ENSG00000162851,protein_coding -5346,CNST,ENSG00000162852,protein_coding -6289,PPP1R21,ENSG00000162869,protein_coding -4500,KLHDC8A,ENSG00000162873,protein_coding -4521,PM20D1,ENSG00000162877,protein_coding -6158,PKDCC,ENSG00000162878,protein_coding -6171,OXER1,ENSG00000162881,protein_coding -6172,HAAO,ENSG00000162882,protein_coding -5157,B3GALNT2,ENSG00000162885,protein_coding -4547,MAPKAPK2,ENSG00000162889,protein_coding -4552,IL20,ENSG00000162891,protein_coding -4553,IL24,ENSG00000162892,protein_coding -4554,FCMR,ENSG00000162894,protein_coding -4557,PIGR,ENSG00000162896,protein_coding -4558,FCAMR,ENSG00000162897,protein_coding -4840,CAPN2,ENSG00000162909,protein_coding -4954,MRPL55,ENSG00000162910,protein_coding -4862,WDR26,ENSG00000162923,protein_coding -6429,REL,ENSG00000162924,protein_coding -6434,PUS10,ENSG00000162927,protein_coding -6436,PEX13,ENSG00000162928,protein_coding -6437,KIAA1841,ENSG00000162929,protein_coding -4974,TRIM17,ENSG00000162931,protein_coding -8759,RFTN2,ENSG00000162944,protein_coding -5077,DISC1,ENSG00000162946,protein_coding -5996,CAPN13,ENSG00000162949,protein_coding -6834,LRRTM1,ENSG00000162951,protein_coding -6013,MEMO1,ENSG00000162959,protein_coding -6014,DPY30,ENSG00000162961,protein_coding -8782,TYW5,ENSG00000162971,protein_coding -8783,MAIP1,ENSG00000162972,protein_coding -5650,KCNF1,ENSG00000162975,protein_coding -5654,PQLC3,ENSG00000162976,protein_coding -8129,ARL5A,ENSG00000162980,protein_coding -5697,LRATD1,ENSG00000162981,protein_coding -8163,KCNJ3,ENSG00000162989,protein_coding -8568,NEUROD1,ENSG00000162992,protein_coding -6360,CLHC1,ENSG00000162994,protein_coding -8584,FRZB,ENSG00000162998,protein_coding -8590,DUSP19,ENSG00000162999,protein_coding -6373,CFAP36,ENSG00000163001,protein_coding -8592,NUP35,ENSG00000163002,protein_coding -7444,CCDC138,ENSG00000163006,protein_coding -8624,ZSWIM2,ENSG00000163012,protein_coding -6691,FBXO41,ENSG00000163013,protein_coding -6712,ACTG2,ENSG00000163017,protein_coding -5824,WDCP,ENSG00000163026,protein_coding -5736,SMC6,ENSG00000163029,protein_coding -5735,VSNL1,ENSG00000163032,protein_coding -7894,CCDC74A,ENSG00000163040,protein_coding -4894,H3F3A,ENSG00000163041,protein_coding -7918,ANKRD30BL,ENSG00000163046,protein_coding -4917,COQ8A,ENSG00000163050,protein_coding -9350,SLC16A14,ENSG00000163053,protein_coding -7125,TEKT4,ENSG00000163060,protein_coding -7639,EN1,ENSG00000163064,protein_coding -13600,SGCB,ENSG00000163069,protein_coding -13602,SPATA18,ENSG00000163071,protein_coding -8327,NOSTRIN,ENSG00000163072,protein_coding -7651,CFAP221,ENSG00000163075,protein_coding -9252,CCDC140,ENSG00000163081,protein_coding -9256,SGPP2,ENSG00000163082,protein_coding -7668,INHBB,ENSG00000163083,protein_coding -8312,XIRP2,ENSG00000163092,protein_coding -8335,BBS5,ENSG00000163093,protein_coding -57299,BIRC8,ENSG00000163098,protein_coding -14235,SMARCAD1,ENSG00000163104,protein_coding -14236,HPGDS,ENSG00000163106,protein_coding -14239,PDLIM5,ENSG00000163110,protein_coding -14246,PDHA2,ENSG00000163114,protein_coding -14255,STPG2,ENSG00000163116,protein_coding -7197,NEURL3,ENSG00000163121,protein_coding -3121,RPRD2,ENSG00000163125,protein_coding -7207,ANKRD23,ENSG00000163126,protein_coding -3139,CTSS,ENSG00000163131,protein_coding -13002,MSX1,ENSG00000163132,protein_coding -13258,PACRGL,ENSG00000163138,protein_coding -3156,BNIPL,ENSG00000163141,protein_coding -13202,C1QTNF7,ENSG00000163145,protein_coding -3164,TNFAIP8L2,ENSG00000163154,protein_coding -3166,LYSMD1,ENSG00000163155,protein_coding -3165,SCNM1,ENSG00000163156,protein_coding -3167,TMOD4,ENSG00000163157,protein_coding -3168,VPS72,ENSG00000163159,protein_coding -7731,ERCC3,ENSG00000163161,protein_coding -7307,RNF149,ENSG00000163162,protein_coding -7739,IWS1,ENSG00000163166,protein_coding -6721,BOLA3,ENSG00000163170,protein_coding -6092,CDC42EP3,ENSG00000163171,protein_coding -3219,S100A11,ENSG00000163191,protein_coding -3237,LCE3D,ENSG00000163202,protein_coding -3260,SMCP,ENSG00000163206,protein_coding -3261,IVL,ENSG00000163207,protein_coding -3266,SPRR3,ENSG00000163209,protein_coding -6121,DHX57,ENSG00000163214,protein_coding -3270,SPRR2D,ENSG00000163216,protein_coding -6597,BMP10,ENSG00000163217,protein_coding -3283,PGLYRP4,ENSG00000163218,protein_coding -6594,ARHGAP25,ENSG00000163219,protein_coding -3285,S100A9,ENSG00000163220,protein_coding -3286,S100A12,ENSG00000163221,protein_coding -6639,TGFA,ENSG00000163235,protein_coding -3366,TDRD10,ENSG00000163239,protein_coding -8983,CCNYL1,ENSG00000163249,protein_coding -8986,FZD5,ENSG00000163251,protein_coding -8995,CRYGC,ENSG00000163254,protein_coding -13241,DCAF16,ENSG00000163257,protein_coding -3347,C1orf189,ENSG00000163263,protein_coding -9416,NPPC,ENSG00000163273,protein_coding -13524,GNPDA2,ENSG00000163281,protein_coding -9423,ALPP,ENSG00000163283,protein_coding -13535,GABRG1,ENSG00000163285,protein_coding -9426,ALPG,ENSG00000163286,protein_coding -13543,GABRB1,ENSG00000163288,protein_coding -14041,PAQR3,ENSG00000163291,protein_coding -13557,NIPAL1,ENSG00000163293,protein_coding -9430,ALPI,ENSG00000163295,protein_coding -14052,ANTXR2,ENSG00000163297,protein_coding -14107,HELQ,ENSG00000163312,protein_coding -14108,MRPS18C,ENSG00000163319,protein_coding -11032,CGGBP1,ENSG00000163320,protein_coding -14109,ABRAXAS1,ENSG00000163322,protein_coding -8430,GPR155,ENSG00000163328,protein_coding -8212,DAPL1,ENSG00000163331,protein_coding -3375,PMVK,ENSG00000163344,protein_coding -3377,PBXIP1,ENSG00000163346,protein_coding -12644,CLDN1,ENSG00000163347,protein_coding -3379,PYGO2,ENSG00000163348,protein_coding -2694,HIPK1,ENSG00000163349,protein_coding -3385,LENEP,ENSG00000163352,protein_coding -3387,DCST2,ENSG00000163354,protein_coding -3388,DCST1,ENSG00000163357,protein_coding -9528,COL6A3,ENSG00000163359,protein_coding -4350,INAVA,ENSG00000163362,protein_coding -3433,YY1AP1,ENSG00000163374,protein_coding -10836,KBTBD8,ENSG00000163376,protein_coding -10848,TAFA4,ENSG00000163377,protein_coding -10850,EOGT,ENSG00000163378,protein_coding -10856,LMOD3,ENSG00000163380,protein_coding -3481,NAXE,ENSG00000163382,protein_coding -11393,POGLUT1,ENSG00000163389,protein_coding -2737,SLC22A15,ENSG00000163393,protein_coding -13318,CCKAR,ENSG00000163394,protein_coding -4359,IGFN1,ENSG00000163395,protein_coding -2743,ATP1A1,ENSG00000163399,protein_coding -11453,SLC15A2,ENSG00000163406,protein_coding -10881,EIF4E3,ENSG00000163412,protein_coding -10883,PROK2,ENSG00000163421,protein_coding -11383,TEX55,ENSG00000163424,protein_coding -11418,LRRC58,ENSG00000163428,protein_coding -11422,FSTL1,ENSG00000163430,protein_coding -4384,LMOD1,ENSG00000163431,protein_coding -4391,ELF3,ENSG00000163435,protein_coding -13669,PDCL2,ENSG00000163440,protein_coding -4428,TMEM183A,ENSG00000163444,protein_coding -9084,TMEM169,ENSG00000163449,protein_coding -13708,IGFBP7,ENSG00000163453,protein_coding -3401,TRIM46,ENSG00000163462,protein_coding -3399,KRTCAP2,ENSG00000163463,protein_coding -9130,CXCR1,ENSG00000163464,protein_coding -9132,ARPC2,ENSG00000163466,protein_coding -3470,TSACC,ENSG00000163467,protein_coding -3469,CCT3,ENSG00000163468,protein_coding -3466,TMEM79,ENSG00000163472,protein_coding -3452,SSR2,ENSG00000163479,protein_coding -9157,RNF25,ENSG00000163481,protein_coding -9158,STK36,ENSG00000163482,protein_coding -4431,ADORA1,ENSG00000163485,protein_coding -10030,NEK10,ENSG00000163491,protein_coding -8542,CCDC141,ENSG00000163492,protein_coding -9174,FEV,ENSG00000163497,protein_coding -9175,CRYBA2,ENSG00000163499,protein_coding -9179,IHH,ENSG00000163501,protein_coding -11233,CIP2A,ENSG00000163507,protein_coding -10044,EOMES,ENSG00000163508,protein_coding -8553,CWC22,ENSG00000163510,protein_coding -10049,AZI2,ENSG00000163512,protein_coding -10066,TGFBR2,ENSG00000163513,protein_coding -11238,RETNLB,ENSG00000163515,protein_coding -9194,ANKZF1,ENSG00000163516,protein_coding -9863,HDAC11,ENSG00000163517,protein_coding -3521,FCRL4,ENSG00000163518,protein_coding -11239,TRAT1,ENSG00000163519,protein_coding -9864,FBLN2,ENSG00000163520,protein_coding -9195,GLB1L,ENSG00000163521,protein_coding -10075,STT3B,ENSG00000163527,protein_coding -9872,CHCHD4,ENSG00000163528,protein_coding -11249,DPPA2,ENSG00000163530,protein_coding -4488,NFASC,ENSG00000163531,protein_coding -3529,FCRL1,ENSG00000163534,protein_coding -8789,SGO2,ENSG00000163535,protein_coding -12242,SERPINI1,ENSG00000163536,protein_coding -10118,CLASP2,ENSG00000163539,protein_coding -6863,SUCLG1,ENSG00000163541,protein_coding -4499,NUAK2,ENSG00000163545,protein_coding -3559,SPTA1,ENSG00000163554,protein_coding -12290,PRKCI,ENSG00000163558,protein_coding -3570,MNDA,ENSG00000163563,protein_coding -3572,PYHIN1,ENSG00000163564,protein_coding -3573,IFI16,ENSG00000163565,protein_coding -3574,AIM2,ENSG00000163568,protein_coding -9962,EFHB,ENSG00000163576,protein_coding -12306,EIF5A2,ENSG00000163577,protein_coding -12310,SLC2A2,ENSG00000163581,protein_coding -12305,RPL22L1,ENSG00000163584,protein_coding -6978,FABP1,ENSG00000163586,protein_coding -12191,PPM1L,ENSG00000163590,protein_coding -8885,ICA1L,ENSG00000163596,protein_coding -8910,CTLA4,ENSG00000163599,protein_coding -8911,ICOS,ENSG00000163600,protein_coding -10897,RYBP,ENSG00000163602,protein_coding -10910,PPP4R2,ENSG00000163605,protein_coding -11303,CD200R1,ENSG00000163606,protein_coding -11305,GTPBP8,ENSG00000163607,protein_coding -11306,NEPRO,ENSG00000163608,protein_coding -11318,SPICE1,ENSG00000163611,protein_coding -11334,CCDC191,ENSG00000163617,protein_coding -10780,CADPS,ENSG00000163618,protein_coding -14119,NKX6-1,ENSG00000163623,protein_coding -14121,CDS1,ENSG00000163624,protein_coding -14122,WDFY3,ENSG00000163625,protein_coding -13916,COX18,ENSG00000163626,protein_coding -14136,PTPN13,ENSG00000163629,protein_coding -10788,SYNPR,ENSG00000163630,protein_coding -13922,ALB,ENSG00000163631,protein_coding -10793,C3orf49,ENSG00000163632,protein_coding -14139,C4orf36,ENSG00000163633,protein_coding -10794,THOC7,ENSG00000163634,protein_coding -10798,ATXN7,ENSG00000163635,protein_coding -10802,PSMD6,ENSG00000163636,protein_coding -10808,PRICKLE2,ENSG00000163637,protein_coding -10815,ADAMTS9,ENSG00000163638,protein_coding -14177,PPM1K,ENSG00000163644,protein_coding -12010,ERICH6,ENSG00000163645,protein_coding -12020,CLRN1,ENSG00000163646,protein_coding -12100,GMPS,ENSG00000163655,protein_coding -12114,TIPARP,ENSG00000163659,protein_coding -12125,CCNL1,ENSG00000163660,protein_coding -12134,PTX3,ENSG00000163661,protein_coding -10714,HESX1,ENSG00000163666,protein_coding -10140,DCLK3,ENSG00000163673,protein_coding -10733,SLMAP,ENSG00000163681,protein_coding -13430,RPL9,ENSG00000163682,protein_coding -13436,SMIM14,ENSG00000163683,protein_coding -10742,RPP14,ENSG00000163684,protein_coding -10741,ABHD6,ENSG00000163686,protein_coding -10740,DNASE1L3,ENSG00000163687,protein_coding -10757,C3orf67,ENSG00000163689,protein_coding -13463,RBM47,ENSG00000163694,protein_coding -13470,APBB2,ENSG00000163697,protein_coding -9777,IL17RE,ENSG00000163701,protein_coding -9778,IL17RC,ENSG00000163702,protein_coding -9780,CRELD1,ENSG00000163703,protein_coding -9782,PRRT3,ENSG00000163704,protein_coding -9793,FANCD2OS,ENSG00000163705,protein_coding -11892,PCOLCE2,ENSG00000163710,protein_coding -11898,U2SURP,ENSG00000163714,protein_coding -9763,MTMR14,ENSG00000163719,protein_coding -12435,TTC14,ENSG00000163728,protein_coding -13942,CXCL3,ENSG00000163734,protein_coding -13939,CXCL5,ENSG00000163735,protein_coding -13938,PPBP,ENSG00000163736,protein_coding -13937,PF4,ENSG00000163737,protein_coding -13947,MTHFD2L,ENSG00000163738,protein_coding -13934,CXCL1,ENSG00000163739,protein_coding -13968,RCHY1,ENSG00000163743,protein_coding -11927,PLSCR2,ENSG00000163746,protein_coding -13993,CCDC158,ENSG00000163749,protein_coding -11957,CPA3,ENSG00000163751,protein_coding -11960,GYG1,ENSG00000163754,protein_coding -11965,HPS3,ENSG00000163755,protein_coding -11970,TM4SF18,ENSG00000163762,protein_coding -11731,TOPBP1,ENSG00000163781,protein_coding -11742,RYK,ENSG00000163785,protein_coding -10303,SNRK,ENSG00000163788,protein_coding -5913,TCF23,ENSG00000163792,protein_coding -5919,DNAJC5G,ENSG00000163793,protein_coding -5921,UCN,ENSG00000163794,protein_coding -5928,ZNF513,ENSG00000163795,protein_coding -5945,SLC4A1AP,ENSG00000163798,protein_coding -5961,PLB1,ENSG00000163803,protein_coding -5967,SPDYA,ENSG00000163806,protein_coding -10337,KIAA1143,ENSG00000163807,protein_coding -10338,KIF15,ENSG00000163808,protein_coding -10341,TGM4,ENSG00000163810,protein_coding -5970,WDR43,ENSG00000163811,protein_coding -10343,ZDHHC3,ENSG00000163812,protein_coding -10348,CDCP1,ENSG00000163814,protein_coding -10346,CLEC3B,ENSG00000163815,protein_coding -10359,SLC6A20,ENSG00000163817,protein_coding -10360,LZTFL1,ENSG00000163818,protein_coding -10365,FYCO1,ENSG00000163820,protein_coding -10371,CCR1,ENSG00000163823,protein_coding -10379,RTP3,ENSG00000163825,protein_coding -10380,LRRC2,ENSG00000163827,protein_coding -10415,ELP6,ENSG00000163832,protein_coding -11446,FBXO40,ENSG00000163833,protein_coding -11470,DTX3L,ENSG00000163840,protein_coding -11513,ZNF148,ENSG00000163848,protein_coding -11832,NMNAT3,ENSG00000163864,protein_coding -1127,SMIM12,ENSG00000163866,protein_coding -1139,ZMYM6,ENSG00000163867,protein_coding -11584,TPRA1,ENSG00000163870,protein_coding -12499,YEATS2,ENSG00000163872,protein_coding -1192,GRIK3,ENSG00000163873,protein_coding -1200,ZC3H12A,ENSG00000163874,protein_coding -1202,MEAF6,ENSG00000163875,protein_coding -1204,SNIP1,ENSG00000163877,protein_coding -1207,DNALI1,ENSG00000163879,protein_coding -12536,POLR2H,ENSG00000163882,protein_coding -11555,KLF15,ENSG00000163884,protein_coding -11557,CFAP100,ENSG00000163885,protein_coding -12529,CAMK2N2,ENSG00000163888,protein_coding -12559,LIPH,ENSG00000163898,protein_coding -12557,TMEM41A,ENSG00000163900,protein_coding -11609,RPN1,ENSG00000163902,protein_coding -12560,SENP2,ENSG00000163904,protein_coding -1270,HEYL,ENSG00000163909,protein_coding -11652,IFT122,ENSG00000163913,protein_coding -11653,RHO,ENSG00000163914,protein_coding -12598,RFC4,ENSG00000163918,protein_coding -12607,RPL39L,ENSG00000163923,protein_coding -10629,BAP1,ENSG00000163930,protein_coding -10670,TKT,ENSG00000163931,protein_coding -10668,PRKCD,ENSG00000163932,protein_coding -10667,RFT1,ENSG00000163933,protein_coding -10663,SFMBT1,ENSG00000163935,protein_coding -10640,GNL3,ENSG00000163938,protein_coding -10637,PBRM1,ENSG00000163939,protein_coding -12909,UVSSA,ENSG00000163945,protein_coding -10707,TASOR,ENSG00000163946,protein_coding -10708,ARHGEF3,ENSG00000163947,protein_coding -12917,SLBP,ENSG00000163950,protein_coding -12967,LRPAP1,ENSG00000163956,protein_coding -12768,ZDHHC19,ENSG00000163958,protein_coding -12770,SLC51A,ENSG00000163959,protein_coding -12780,UBXN7,ENSG00000163960,protein_coding -12787,RNF168,ENSG00000163961,protein_coding -12797,PIGX,ENSG00000163964,protein_coding -12811,MELTF,ENSG00000163975,protein_coding -12990,OTOP1,ENSG00000163982,protein_coding -13030,S100P,ENSG00000163993,protein_coding -13054,ABLIM2,ENSG00000163995,protein_coding -1310,EXO5,ENSG00000164002,protein_coding -1368,CLDN19,ENSG00000164007,protein_coding -1370,C1orf50,ENSG00000164008,protein_coding -1375,ERMAP,ENSG00000164010,protein_coding -1377,ZNF691,ENSG00000164011,protein_coding -14382,AIMP1,ENSG00000164022,protein_coding -14392,SGMS2,ENSG00000164023,protein_coding -14276,METAP1,ENSG00000164024,protein_coding -14298,DNAJB14,ENSG00000164031,protein_coding -14300,H2AFZ,ENSG00000164032,protein_coding -14306,EMCN,ENSG00000164035,protein_coding -14337,SLC9B1,ENSG00000164037,protein_coding -14340,SLC9B2,ENSG00000164038,protein_coding -14341,BDH2,ENSG00000164039,protein_coding -14665,PGRMC2,ENSG00000164040,protein_coding -10431,CDC25A,ENSG00000164045,protein_coding -10436,CAMP,ENSG00000164047,protein_coding -10437,ZNF589,ENSG00000164048,protein_coding -10444,FBXW12,ENSG00000164049,protein_coding -10446,PLXNB1,ENSG00000164050,protein_coding -10447,CCDC51,ENSG00000164051,protein_coding -10450,ATRIP,ENSG00000164053,protein_coding -10452,SHISA5,ENSG00000164054,protein_coding -14620,SPRY1,ENSG00000164056,protein_coding -10512,BSN,ENSG00000164061,protein_coding -10514,APEH,ENSG00000164062,protein_coding -14652,INTU,ENSG00000164066,protein_coding -10517,RNF123,ENSG00000164068,protein_coding -14654,HSPA4L,ENSG00000164070,protein_coding -14660,MFSD8,ENSG00000164073,protein_coding -14661,ABHD18,ENSG00000164074,protein_coding -10528,CAMKV,ENSG00000164076,protein_coding -10534,MON1A,ENSG00000164077,protein_coding -10531,MST1R,ENSG00000164078,protein_coding -10583,RAD54L2,ENSG00000164080,protein_coding -10585,TEX264,ENSG00000164081,protein_coding -10588,GRM2,ENSG00000164082,protein_coding -10611,DUSP7,ENSG00000164086,protein_coding -10612,POC1A,ENSG00000164087,protein_coding -10621,PPM1M,ENSG00000164088,protein_coding -14408,ETNPPL,ENSG00000164089,protein_coding -10622,WDR82,ENSG00000164091,protein_coding -14447,PITX2,ENSG00000164093,protein_coding -14558,C4orf3,ENSG00000164096,protein_coding -14542,PRSS12,ENSG00000164099,protein_coding -14538,NDST3,ENSG00000164100,protein_coding -15242,HMGB2,ENSG00000164104,protein_coding -15244,SAP30,ENSG00000164105,protein_coding -15245,SCRG1,ENSG00000164106,protein_coding -15252,HAND2,ENSG00000164107,protein_coding -14576,MAD2L1,ENSG00000164109,protein_coding -14594,ANXA5,ENSG00000164111,protein_coding -14595,TMEM155,ENSG00000164112,protein_coding -14605,ADAD1,ENSG00000164113,protein_coding -15028,MAP9,ENSG00000164114,protein_coding -15045,GUCY1A1,ENSG00000164116,protein_coding -15262,FBXO8,ENSG00000164117,protein_coding -15263,CEP44,ENSG00000164118,protein_coding -15266,HPGD,ENSG00000164120,protein_coding -15287,ASB5,ENSG00000164122,protein_coding -15097,C4orf45,ENSG00000164123,protein_coding -15084,TMEM144,ENSG00000164124,protein_coding -15079,GASK1B,ENSG00000164125,protein_coding -15123,NPY1R,ENSG00000164128,protein_coding -15124,NPY5R,ENSG00000164129,protein_coding -14776,NAA15,ENSG00000164134,protein_coding -14819,IL15,ENSG00000164136,protein_coding -14953,FAM160A1,ENSG00000164142,protein_coding -14984,ARFIP1,ENSG00000164144,protein_coding -15619,ICE1,ENSG00000164151,protein_coding -14853,HHIP,ENSG00000164161,protein_coding -14855,ANAPC10,ENSG00000164162,protein_coding -14860,ABCE1,ENSG00000164163,protein_coding -14861,OTUD4,ENSG00000164164,protein_coding -14882,LSM6,ENSG00000164167,protein_coding -14905,TMEM184C,ENSG00000164168,protein_coding -14906,PRMT9,ENSG00000164169,protein_coding -16245,ITGA2,ENSG00000164171,protein_coding -16248,MOCS2,ENSG00000164172,protein_coding -16014,SLC45A2,ENSG00000164175,protein_coding -16790,EDIL3,ENSG00000164176,protein_coding -16834,TMEM161B,ENSG00000164180,protein_coding -16376,ELOVL7,ENSG00000164181,protein_coding -16381,NDUFAF2,ENSG00000164182,protein_coding -17241,ZNF474,ENSG00000164185,protein_coding -16052,LMBRD2,ENSG00000164187,protein_coding -16058,RANBP3L,ENSG00000164188,protein_coding -16068,NIPBL,ENSG00000164190,protein_coding -16412,RNF180,ENSG00000164197,protein_coding -16862,ADGRV1,ENSG00000164199,protein_coding -17090,SLC25A46,ENSG00000164209,protein_coding -17103,STARD4,ENSG00000164211,protein_coding -17146,PGGT1B,ENSG00000164219,protein_coding -16650,F2RL2,ENSG00000164220,protein_coding -17149,CCDC112,ENSG00000164221,protein_coding -15734,ANKRD33B,ENSG00000164236,protein_coding -15718,CMBL,ENSG00000164237,protein_coding -17307,C5orf63,ENSG00000164241,protein_coding -17319,PRRC1,ENSG00000164244,protein_coding -16655,F2RL1,ENSG00000164251,protein_coding -16660,AGGF1,ENSG00000164252,protein_coding -16670,WDR41,ENSG00000164253,protein_coding -15883,PRDM9,ENSG00000164256,protein_coding -16254,NDUFS4,ENSG00000164258,protein_coding -17825,SCGB3A2,ENSG00000164265,protein_coding -17823,SPINK1,ENSG00000164266,protein_coding -17844,HTR4,ENSG00000164270,protein_coding -16275,ESM1,ENSG00000164283,protein_coding -17858,GRPEL2,ENSG00000164284,protein_coding -16283,CDC20B,ENSG00000164287,protein_coding -16919,ARSK,ENSG00000164291,protein_coding -16924,RHOBTB3,ENSG00000164292,protein_coding -16284,GPX8,ENSG00000164294,protein_coding -17881,TIGD6,ENSG00000164296,protein_coding -16723,SPZ1,ENSG00000164299,protein_coding -16717,SERINC5,ENSG00000164300,protein_coding -15366,ENPP6,ENSG00000164303,protein_coding -18641,CAGE1,ENSG00000164304,protein_coding -15381,CASP3,ENSG00000164305,protein_coding -15382,PRIMPOL,ENSG00000164306,protein_coding -16944,ERAP1,ENSG00000164307,protein_coding -16948,ERAP2,ENSG00000164308,protein_coding -16707,CMYA5,ENSG00000164309,protein_coding -16092,EGFLAM,ENSG00000164318,protein_coding -15400,CFAP97,ENSG00000164323,protein_coding -16575,TMEM174,ENSG00000164325,protein_coding -16546,CARTPT,ENSG00000164326,protein_coding -16107,RICTOR,ENSG00000164327,protein_coding -16706,TENT2,ENSG00000164329,protein_coding -18035,EBF1,ENSG00000164330,protein_coding -16587,ANKRA2,ENSG00000164331,protein_coding -18044,UBLCP1,ENSG00000164332,protein_coding -17219,FAM170A,ENSG00000164334,protein_coding -16588,UTP15,ENSG00000164338,protein_coding -15418,TLR3,ENSG00000164342,protein_coding -15425,KLKB1,ENSG00000164344,protein_coding -16604,NSA2,ENSG00000164346,protein_coding -16602,GFM2,ENSG00000164347,protein_coding -15550,TERT,ENSG00000164362,protein_coding -15548,SLC6A18,ENSG00000164363,protein_coding -15508,CCDC127,ENSG00000164366,protein_coding -18511,FOXQ1,ENSG00000164379,protein_coding -19843,ADGRF2,ENSG00000164393,protein_coding -17365,ACSL6,ENSG00000164398,protein_coding -17368,IL3,ENSG00000164399,protein_coding -17369,CSF2,ENSG00000164400,protein_coding -17397,SEPT8,ENSG00000164402,protein_coding -17400,SHROOM1,ENSG00000164403,protein_coding -17402,GDF9,ENSG00000164404,protein_coding -17403,UQCRQ,ENSG00000164405,protein_coding -17404,LEAP2,ENSG00000164406,protein_coding -20317,GJB7,ENSG00000164411,protein_coding -20327,SLC35A1,ENSG00000164414,protein_coding -20470,GRIK2,ENSG00000164418,protein_coding -20137,CGAS,ENSG00000164430,protein_coding -20776,FABP7,ENSG00000164434,protein_coding -18168,TLX3,ENSG00000164438,protein_coding -21027,TXLNB,ENSG00000164440,protein_coding -21030,CITED2,ENSG00000164442,protein_coding -20700,CALHM4,ENSG00000164451,protein_coding -21425,TBXT,ENSG00000164458,protein_coding -18210,CREBRF,ENSG00000164463,protein_coding -20718,DCBLD1,ENSG00000164465,protein_coding -18256,SFXN1,ENSG00000164466,protein_coding -20851,SAMD3,ENSG00000164483,protein_coding -20852,TMEM200A,ENSG00000164484,protein_coding -20990,IL22RA2,ENSG00000164485,protein_coding -21487,DACT2,ENSG00000164488,protein_coding -20526,PDSS2,ENSG00000164494,protein_coding -22422,SPATA48,ENSG00000164500,protein_coding -21124,STXBP5,ENSG00000164506,protein_coding -18912,HIST1H2AA,ENSG00000164508,protein_coding -16304,IL31RA,ENSG00000164509,protein_coding -16311,ANKRD55,ENSG00000164512,protein_coding -21175,RAET1E,ENSG00000164520,protein_coding -19594,PI16,ENSG00000164530,protein_coding -22157,TBX20,ENSG00000164532,protein_coding -21696,DAGLB,ENSG00000164535,protein_coding -22186,KIAA0895,ENSG00000164542,protein_coding -22298,STK17A,ENSG00000164543,protein_coding -21920,TRA2A,ENSG00000164548,protein_coding -17955,GALNT10,ENSG00000164574,protein_coding -17961,SAP30L,ENSG00000164576,protein_coding -17896,RPS14,ENSG00000164587,protein_coding -16208,HCN1,ENSG00000164588,protein_coding -17902,MYOZ3,ENSG00000164591,protein_coding -23556,COG5,ENSG00000164597,protein_coding -22103,NEUROD6,ENSG00000164600,protein_coding -23619,BMT2,ENSG00000164603,protein_coding -23622,GPR85,ENSG00000164604,protein_coding -18065,SLU7,ENSG00000164609,protein_coding -22132,RP9,ENSG00000164610,protein_coding -18066,PTTG1,ENSG00000164611,protein_coding -17460,CAMLG,ENSG00000164615,protein_coding -22142,BMPER,ENSG00000164619,protein_coding -17734,RELL2,ENSG00000164620,protein_coding -19637,KCNK5,ENSG00000164626,protein_coding -19640,KIF6,ENSG00000164627,protein_coding -21707,ZNF12,ENSG00000164631,protein_coding -21658,SLC29A4,ENSG00000164638,protein_coding -23134,TEX47,ENSG00000164645,protein_coding -23140,STEAP1,ENSG00000164647,protein_coding -21887,CDCA7L,ENSG00000164649,protein_coding -21875,SP8,ENSG00000164651,protein_coding -21731,MIOS,ENSG00000164654,protein_coding -23102,KIAA1324L,ENSG00000164659,protein_coding -19696,USP49,ENSG00000164663,protein_coding -21322,SYTL3,ENSG00000164674,protein_coding -23746,IQUB,ENSG00000164675,protein_coding -28418,HEY1,ENSG00000164683,protein_coding -28448,ZNF704,ENSG00000164684,protein_coding -28462,FABP5,ENSG00000164687,protein_coding -24508,SHH,ENSG00000164690,protein_coding -21334,TAGAP,ENSG00000164691,protein_coding -23213,COL1A2,ENSG00000164692,protein_coding -21339,FNDC1,ENSG00000164694,protein_coding -28478,CHMP4C,ENSG00000164695,protein_coding -23988,SLC13A4,ENSG00000164707,protein_coding -22319,PGAM2,ENSG00000164708,protein_coding -23284,BRI3,ENSG00000164713,protein_coding -23281,LMTK2,ENSG00000164715,protein_coding -50289,SLC35G3,ENSG00000164729,protein_coding -27279,CTSB,ENSG00000164733,protein_coding -28058,SOX17,ENSG00000164736,protein_coding -27346,DLC1,ENSG00000164741,protein_coding -22367,ADCY1,ENSG00000164742,protein_coding -27352,C8orf48,ENSG00000164743,protein_coding -22404,SUN3,ENSG00000164744,protein_coding -22405,C7orf57,ENSG00000164746,protein_coding -28385,HNF4G,ENSG00000164749,protein_coding -28398,PEX2,ENSG00000164751,protein_coding -28964,RAD21,ENSG00000164754,protein_coding -28968,SLC30A8,ENSG00000164756,protein_coding -28975,MED30,ENSG00000164758,protein_coding -28982,TNFRSF11B,ENSG00000164761,protein_coding -28332,SBSPON,ENSG00000164764,protein_coding -22518,PHKG1,ENSG00000164776,protein_coding -24501,EN2,ENSG00000164778,protein_coding -28926,KCNV1,ENSG00000164794,protein_coding -28944,CSMD3,ENSG00000164796,protein_coding -27960,SPIDR,ENSG00000164808,protein_coding -23509,ORC5,ENSG00000164815,protein_coding -27091,DEFA5,ENSG00000164816,protein_coding -21569,DNAAF5,ENSG00000164818,protein_coding -27080,DEFA4,ENSG00000164821,protein_coding -27078,DEFA6,ENSG00000164822,protein_coding -28574,OSGIN2,ENSG00000164823,protein_coding -27076,DEFB1,ENSG00000164825,protein_coding -21570,SUN1,ENSG00000164828,protein_coding -28888,OXR1,ENSG00000164830,protein_coding -28911,TMEM74,ENSG00000164841,protein_coding -21579,GPR146,ENSG00000164849,protein_coding -21583,GPER1,ENSG00000164850,protein_coding -21586,UNCX,ENSG00000164853,protein_coding -21594,TMEM184A,ENSG00000164855,protein_coding -24406,NOS3,ENSG00000164867,protein_coding -27120,SPAG11B,ENSG00000164871,protein_coding -21588,MICALL2,ENSG00000164877,protein_coding -28513,CA3,ENSG00000164879,protein_coding -21591,INTS1,ENSG00000164880,protein_coding -24411,CDK5,ENSG00000164885,protein_coding -24412,SLC4A2,ENSG00000164889,protein_coding -28539,SLC7A13,ENSG00000164893,protein_coding -24414,FASTK,ENSG00000164896,protein_coding -24415,TMUB1,ENSG00000164897,protein_coding -24049,FMC1,ENSG00000164898,protein_coding -24417,GBX1,ENSG00000164900,protein_coding -17298,PHAX,ENSG00000164902,protein_coding -17296,ALDH7A1,ENSG00000164904,protein_coding -21638,FOXK1,ENSG00000164916,protein_coding -28751,COX6C,ENSG00000164919,protein_coding -28734,OSR2,ENSG00000164920,protein_coding -28783,YWHAZ,ENSG00000164924,protein_coding -28845,BAALC,ENSG00000164929,protein_coding -28849,FZD6,ENSG00000164930,protein_coding -28851,CTHRC1,ENSG00000164932,protein_coding -28854,SLC25A32,ENSG00000164933,protein_coding -28856,DCAF13,ENSG00000164934,protein_coding -28865,DCSTAMP,ENSG00000164935,protein_coding -28666,TP53INP1,ENSG00000164938,protein_coding -28661,INTS8,ENSG00000164941,protein_coding -28647,VIRMA,ENSG00000164944,protein_coding -29657,FREM1,ENSG00000164946,protein_coding -28643,GEM,ENSG00000164949,protein_coding -28634,PDP1,ENSG00000164951,protein_coding -28631,TMEM67,ENSG00000164953,protein_coding -29095,WASHC5,ENSG00000164961,protein_coding -29960,RPP25L,ENSG00000164967,protein_coding -29952,FAM219A,ENSG00000164970,protein_coding -29950,C9orf24,ENSG00000164972,protein_coding -29668,SNAPC3,ENSG00000164975,protein_coding -29949,MYORG,ENSG00000164976,protein_coding -29948,NUDT2,ENSG00000164978,protein_coding -29076,TMEM65,ENSG00000164983,protein_coding -29670,PSIP1,ENSG00000164985,protein_coding -29675,CCDC171,ENSG00000164989,protein_coding -29940,UBAP1,ENSG00000165006,protein_coding -30712,DIRAS2,ENSG00000165023,protein_coding -30715,SYK,ENSG00000165025,protein_coding -31011,NIPSNAP3B,ENSG00000165028,protein_coding -31012,ABCA1,ENSG00000165029,protein_coding -30726,NFIL3,ENSG00000165030,protein_coding -27805,LETM2,ENSG00000165046,protein_coding -23820,METTL2B,ENSG00000165055,protein_coding -30425,PRKACG,ENSG00000165059,protein_coding -30426,FXN,ENSG00000165060,protein_coding -27847,ZMAT4,ENSG00000165061,protein_coding -27865,NKX6-3,ENSG00000165066,protein_coding -29186,TMEM71,ENSG00000165071,protein_coding -30441,MAMDC2,ENSG00000165072,protein_coding -24111,PRSS37,ENSG00000165076,protein_coding -28255,CPA6,ENSG00000165078,protein_coding -28265,C8orf34,ENSG00000165084,protein_coding -30473,TMC1,ENSG00000165091,protein_coding -30477,ALDH1A1,ENSG00000165092,protein_coding -18808,KDM1B,ENSG00000165097,protein_coding -27907,HGSNAT,ENSG00000165102,protein_coding -30584,RASEF,ENSG00000165105,protein_coding -30596,GKAP1,ENSG00000165113,protein_coding -30599,KIF27,ENSG00000165115,protein_coding -30600,C9orf64,ENSG00000165118,protein_coding -30601,HNRNPK,ENSG00000165119,protein_coding -23890,SSMEM1,ENSG00000165120,protein_coding -31090,SVEP1,ENSG00000165124,protein_coding -24211,TRPV6,ENSG00000165125,protein_coding -24215,LLCFC1,ENSG00000165131,protein_coding -30920,ANKS6,ENSG00000165138,protein_coding -30813,FBP1,ENSG00000165140,protein_coding -30974,TMEM246,ENSG00000165152,protein_coding -29047,ZHX1,ENSG00000165156,protein_coding -24966,CFAP47,ENSG00000165164,protein_coding -24990,CYBB,ENSG00000165168,protein_coding -24991,DYNLT3,ENSG00000165169,protein_coding -22911,METTL27,ENSG00000165171,protein_coding -25012,MID1IP1,ENSG00000165175,protein_coding -31111,SHOC1,ENSG00000165181,protein_coding -24850,CXorf58,ENSG00000165182,protein_coding -31130,KIAA1958,ENSG00000165185,protein_coding -24841,PTCHD1,ENSG00000165186,protein_coding -31154,RNF183,ENSG00000165188,protein_coding -24731,ASB11,ENSG00000165192,protein_coding -25935,PCDH19,ENSG00000165194,protein_coding -24732,PIGA,ENSG00000165195,protein_coding -24733,VEGFD,ENSG00000165197,protein_coding -31273,OR1Q1,ENSG00000165202,protein_coding -31283,OR1K1,ENSG00000165204,protein_coding -31297,STRBP,ENSG00000165209,protein_coding -22909,CLDN3,ENSG00000165215,protein_coding -31339,GAPVD1,ENSG00000165219,protein_coding -30775,CARD19,ENSG00000165233,protein_coding -30778,WNK2,ENSG00000165238,protein_coding -25764,ATP7A,ENSG00000165240,protein_coding -30857,ZNF367,ENSG00000165244,protein_coding -57943,NLGN4Y,ENSG00000165246,protein_coding -25817,HDX,ENSG00000165259,protein_coding -29871,NDUFB6,ENSG00000165264,protein_coding -29897,AQP7,ENSG00000165269,protein_coding -29901,NOL6,ENSG00000165271,protein_coding -29899,AQP3,ENSG00000165272,protein_coding -30089,TRMT10B,ENSG00000165275,protein_coding -29990,VCP,ENSG00000165280,protein_coding -29992,PIGO,ENSG00000165282,protein_coding -29994,STOML2,ENSG00000165283,protein_coding -25791,BRWD3,ENSG00000165288,protein_coding -41552,SLITRK5,ENSG00000165300,protein_coding -30056,MELK,ENSG00000165304,protein_coding -35541,ARMC3,ENSG00000165309,protein_coding -35552,OTUD1,ENSG00000165312,protein_coding -35698,ARHGAP12,ENSG00000165322,protein_coding -34280,FAT3,ENSG00000165323,protein_coding -34300,DEUP1,ENSG00000165325,protein_coding -36711,HECTD2,ENSG00000165338,protein_coding -25596,SLC7A3,ENSG00000165349,protein_coding -42633,FBXO33,ENSG00000165355,protein_coding -26553,INTS6L,ENSG00000165359,protein_coding -26602,GPR101,ENSG00000165370,protein_coding -26099,CLDN2,ENSG00000165376,protein_coding -42643,LRFN5,ENSG00000165379,protein_coding -36018,LRRC18,ENSG00000165383,protein_coding -42517,SPTSSA,ENSG00000165389,protein_coding -27689,WRN,ENSG00000165392,protein_coding -35930,MARCH8,ENSG00000165406,protein_coding -43411,TSHR,ENSG00000165409,protein_coding -42530,CFL2,ENSG00000165410,protein_coding -41246,SUGT1,ENSG00000165416,protein_coding -43418,GTF2A1,ENSG00000165417,protein_coding -36477,ZCCHC24,ENSG00000165424,protein_coding -33928,PGM2L1,ENSG00000165434,protein_coding -36123,PHYHIPL,ENSG00000165443,protein_coding -36129,SLC16A9,ENSG00000165449,protein_coding -33853,FOLR2,ENSG00000165457,protein_coding -33854,INPPL1,ENSG00000165458,protein_coding -33855,PHOX2A,ENSG00000165462,protein_coding -36082,MBL2,ENSG00000165471,protein_coding -40611,GJB2,ENSG00000165474,protein_coding -40614,CRYL1,ENSG00000165475,protein_coding -36171,REEP3,ENSG00000165476,protein_coding -34974,HEPACAM,ENSG00000165478,protein_coding -40639,SKA3,ENSG00000165480,protein_coding -40653,MICU2,ENSG00000165487,protein_coding -34092,DDIAS,ENSG00000165490,protein_coding -34104,PCF11,ENSG00000165494,protein_coding -34983,PKNOX2,ENSG00000165495,protein_coding -42691,RPL10L,ENSG00000165496,protein_coding -42716,LRR1,ENSG00000165501,protein_coding -42719,RPL36AL,ENSG00000165502,protein_coding -42722,DNAAF2,ENSG00000165506,protein_coding -35908,DEPP1,ENSG00000165507,protein_coding -26656,MAGEC3,ENSG00000165509,protein_coding -35910,ZNF22,ENSG00000165512,protein_coding -42730,KLHDC2,ENSG00000165516,protein_coding -43489,AL121768.1,ENSG00000165521,protein_coding -42731,NEMF,ENSG00000165525,protein_coding -35012,RPUSD4,ENSG00000165526,protein_coding -42738,ARF6,ENSG00000165527,protein_coding -43492,TTC8,ENSG00000165533,protein_coding -43364,TMEM63C,ENSG00000165548,protein_coding -43369,NGB,ENSG00000165553,protein_coding -43376,NOXRED1,ENSG00000165555,protein_coding -40791,CDX2,ENSG00000165556,protein_coding -40730,AMER2,ENSG00000165566,protein_coding -35225,AKR1E2,ENSG00000165568,protein_coding -40991,KBTBD6,ENSG00000165572,protein_coding -25186,SSX5,ENSG00000165583,protein_coding -25201,SSX3,ENSG00000165584,protein_coding -42890,OTX2,ENSG00000165588,protein_coding -25473,FAAH2,ENSG00000165591,protein_coding -36028,DRGX,ENSG00000165606,protein_coding -35357,NUDT5,ENSG00000165609,protein_coding -42933,DACT1,ENSG00000165617,protein_coding -41652,OXGR1,ENSG00000165621,protein_coding -35380,UCMA,ENSG00000165623,protein_coding -35386,BEND7,ENSG00000165626,protein_coding -35304,ATP5F1C,ENSG00000165629,protein_coding -35389,PRPF18,ENSG00000165630,protein_coding -35305,TAF3,ENSG00000165632,protein_coding -36022,VSTM4,ENSG00000165633,protein_coding -36417,VDAC2,ENSG00000165637,protein_coding -31649,SOHLH1,ENSG00000165643,protein_coding -36418,COMTD1,ENSG00000165644,protein_coding -37189,SLC18A2,ENSG00000165646,protein_coding -37191,PDZD8,ENSG00000165650,protein_coding -36425,ZNF503,ENSG00000165655,protein_coding -37318,ABRAXAS2,ENSG00000165660,protein_coding -31659,QSOX2,ENSG00000165661,protein_coding -37202,FAM204A,ENSG00000165669,protein_coding -18308,NSD1,ENSG00000165671,protein_coding -37219,PRDX3,ENSG00000165672,protein_coding -26442,ENOX2,ENSG00000165675,protein_coding -36557,GHITM,ENSG00000165678,protein_coding -37851,CLEC1B,ENSG00000165682,protein_coding -31667,SNAPC4,ENSG00000165684,protein_coding -37861,TMEM52B,ENSG00000165685,protein_coding -31669,PMPCA,ENSG00000165688,protein_coding -31668,ENTR1,ENSG00000165689,protein_coding -26469,FRMD7,ENSG00000165694,protein_coding -31554,AK8,ENSG00000165695,protein_coding -31556,SPACA9,ENSG00000165698,protein_coding -31557,TSC1,ENSG00000165699,protein_coding -31558,GFI1B,ENSG00000165702,protein_coding -26503,HPRT1,ENSG00000165704,protein_coding -37939,BORCS5,ENSG00000165714,protein_coding -31683,DIPK1B,ENSG00000165716,protein_coding -31770,ZMYND19,ENSG00000165724,protein_coding -36234,STOX1,ENSG00000165730,protein_coding -35858,RET,ENSG00000165731,protein_coding -36239,DDX21,ENSG00000165732,protein_coding -35853,BMS1,ENSG00000165733,protein_coding -37412,STK32C,ENSG00000165752,protein_coding -35660,JCAD,ENSG00000165757,protein_coding -42011,OR4K2,ENSG00000165762,protein_coding -26947,FUNDC2,ENSG00000165775,protein_coding -42054,PIP4P1,ENSG00000165782,protein_coding -42088,METTL17,ENSG00000165792,protein_coding -42090,SLC39A2,ENSG00000165794,protein_coding -42091,NDRG2,ENSG00000165795,protein_coding -42097,RNASE7,ENSG00000165799,protein_coding -42099,ARHGEF40,ENSG00000165801,protein_coding -31765,NSMF,ENSG00000165802,protein_coding -42100,ZNF219,ENSG00000165804,protein_coding -39464,C12orf50,ENSG00000165805,protein_coding -37129,CASP7,ENSG00000165806,protein_coding -43041,PPP1R36,ENSG00000165807,protein_coding -18454,BTNL9,ENSG00000165810,protein_coding -37141,CCDC186,ENSG00000165813,protein_coding -37145,VWA2,ENSG00000165816,protein_coding -42121,METTL3,ENSG00000165819,protein_coding -42122,SALL2,ENSG00000165821,protein_coding -37443,PRAP1,ENSG00000165828,protein_coding -37158,TRUB1,ENSG00000165832,protein_coding -41083,ERICH6B,ENSG00000165837,protein_coding -36780,CYP2C19,ENSG00000165841,protein_coding -43216,ZFYVE1,ENSG00000165861,protein_coding -37175,C10orf82,ENSG00000165863,protein_coding -37177,HSPA12A,ENSG00000165868,protein_coding -36835,FRAT1,ENSG00000165879,protein_coding -36846,UBTD1,ENSG00000165886,protein_coding -36847,ANKRD2,ENSG00000165887,protein_coding -39369,E2F7,ENSG00000165891,protein_coding -34402,ARHGAP42,ENSG00000165895,protein_coding -43282,ISCA2,ENSG00000165898,protein_coding -39400,OTOGL,ENSG00000165899,protein_coding -32817,LARGE2,ENSG00000165905,protein_coding -32852,PACSIN3,ENSG00000165912,protein_coding -43526,TTC7B,ENSG00000165914,protein_coding -32866,SLC39A13,ENSG00000165915,protein_coding -32867,PSMC3,ENSG00000165916,protein_coding -32868,RAPSN,ENSG00000165917,protein_coding -32880,AGBL2,ENSG00000165923,protein_coding -43554,TC2N,ENSG00000165929,protein_coding -43560,CPSF2,ENSG00000165934,protein_coding -38186,SMCO2,ENSG00000165935,protein_coding -43574,MOAP1,ENSG00000165943,protein_coding -43598,IFI27L1,ENSG00000165948,protein_coding -43599,IFI27,ENSG00000165949,protein_coding -43609,SERPINA12,ENSG00000165953,protein_coding -43629,CLMN,ENSG00000165959,protein_coding -38377,PDZRN4,ENSG00000165966,protein_coding -32498,SLC6A5,ENSG00000165970,protein_coding -39609,CCDC38,ENSG00000165972,protein_coding -32499,NELL1,ENSG00000165973,protein_coding -35436,PTER,ENSG00000165983,protein_coding -35438,C1QL3,ENSG00000165985,protein_coding -35465,CACNB2,ENSG00000165995,protein_coding -35454,HACD1,ENSG00000165996,protein_coding -35470,ARL5B,ENSG00000165997,protein_coding -34301,SMCO4,ENSG00000166002,protein_coding -34307,CEP295,ENSG00000166004,protein_coding -39325,KCNC2,ENSG00000166006,protein_coding -34313,TAF1D,ENSG00000166012,protein_coding -32665,ABTB2,ENSG00000166016,protein_coding -36861,R3HCC1L,ENSG00000166024,protein_coding -34348,AMOTL1,ENSG00000166025,protein_coding -37275,HTRA1,ENSG00000166033,protein_coding -45299,LIPC,ENSG00000166035,protein_coding -34372,CEP57,ENSG00000166037,protein_coding -39801,TCP11L2,ENSG00000166046,protein_coding -26795,PASD1,ENSG00000166049,protein_coding -44784,SPRED1,ENSG00000166068,protein_coding -44780,TMCO5A,ENSG00000166069,protein_coding -44807,GPR176,ENSG00000166073,protein_coding -35126,JAM3,ENSG00000166086,protein_coding -42320,IL25,ENSG00000166090,protein_coding -42321,CMTM5,ENSG00000166091,protein_coding -35134,GLB1L3,ENSG00000166105,protein_coding -35084,ADAMTS15,ENSG00000166106,protein_coding -39855,SVOP,ENSG00000166111,protein_coding -35118,SPATA19,ENSG00000166118,protein_coding -47868,GPT2,ENSG00000166123,protein_coding -43903,AMN,ENSG00000166126,protein_coding -45413,RAB8B,ENSG00000166128,protein_coding -39666,IKBIP,ENSG00000166130,protein_coding -44850,RPUSD2,ENSG00000166133,protein_coding -36913,HIF1AN,ENSG00000166135,protein_coding -36912,NDUFB8,ENSG00000166136,protein_coding -44862,ZFYVE19,ENSG00000166140,protein_coding -44863,PPP1R14D,ENSG00000166143,protein_coding -44865,SPINT1,ENSG00000166145,protein_coding -45098,FBN1,ENSG00000166147,protein_coding -39105,AVPR1A,ENSG00000166148,protein_coding -47923,C16orf78,ENSG00000166152,protein_coding -39685,DEPDC4,ENSG00000166153,protein_coding -37524,LRTM2,ENSG00000166159,protein_coding -26892,OPN1MW2,ENSG00000166160,protein_coding -47952,BRD7,ENSG00000166164,protein_coding -43928,CKB,ENSG00000166165,protein_coding -43930,TRMT61A,ENSG00000166166,protein_coding -36940,BTRC,ENSG00000166167,protein_coding -36943,POLL,ENSG00000166169,protein_coding -43933,BAG5,ENSG00000166170,protein_coding -36942,DPCD,ENSG00000166171,protein_coding -45602,LARP6,ENSG00000166173,protein_coding -32746,API5,ENSG00000166181,protein_coding -43956,ASPG,ENSG00000166183,protein_coding -48159,ZNF319,ENSG00000166188,protein_coding -36958,HPS6,ENSG00000166189,protein_coding -45622,SENP8,ENSG00000166192,protein_coding -36963,NOLC1,ENSG00000166197,protein_coding -32767,ALKBH3,ENSG00000166199,protein_coding -45116,COPS2,ENSG00000166200,protein_coding -44515,GABRB3,ENSG00000166206,protein_coding -39708,SPIC,ENSG00000166211,protein_coding -36287,TBATA,ENSG00000166220,protein_coding -36289,SGPL1,ENSG00000166224,protein_coding -39254,FRS2,ENSG00000166225,protein_coding -39257,CCT2,ENSG00000166226,protein_coding -36290,PCBD1,ENSG00000166228,protein_coding -45639,ARIH1,ENSG00000166233,protein_coding -46812,C16orf71,ENSG00000166246,protein_coding -34893,CLMP,ENSG00000166250,protein_coding -34902,SCN3B,ENSG00000166257,protein_coding -51147,COX11,ENSG00000166260,protein_coding -34904,ZNF202,ENSG00000166261,protein_coding -45123,FAM227B,ENSG00000166262,protein_coding -51150,STXBP4,ENSG00000166263,protein_coding -59996,CYYR1,ENSG00000166265,protein_coding -34514,CUL5,ENSG00000166266,protein_coding -39264,MYRFL,ENSG00000166268,protein_coding -36984,WBP1L,ENSG00000166272,protein_coding -36990,BORCS7,ENSG00000166275,protein_coding -19369,C2,ENSG00000166278,protein_coding -56098,PLEKHF1,ENSG00000166289,protein_coding -51159,TMEM100,ENSG00000166292,protein_coding -36312,ANAPC16,ENSG00000166295,protein_coding -32165,SMPD1,ENSG00000166311,protein_coding -32166,APBB1,ENSG00000166313,protein_coding -36367,SYNPO2L,ENSG00000166317,protein_coding -36341,NUDT13,ENSG00000166321,protein_coding -34522,C11orf65,ENSG00000166323,protein_coding -32692,TRIM44,ENSG00000166326,protein_coding -51195,CCDC182,ENSG00000166329,protein_coding -32176,ILK,ENSG00000166333,protein_coding -32177,TAF10,ENSG00000166337,protein_coding -32180,TPP1,ENSG00000166340,protein_coding -32182,DCHS1,ENSG00000166341,protein_coding -53152,NETO1,ENSG00000166342,protein_coding -36358,MSS51,ENSG00000166343,protein_coding -53168,CYB5A,ENSG00000166347,protein_coding -36361,USP54,ENSG00000166348,protein_coding -32710,RAG1,ENSG00000166349,protein_coding -59803,POTED,ENSG00000166351,protein_coding -32712,C11orf74,ENSG00000166352,protein_coding -56163,WDR88,ENSG00000166359,protein_coding -32194,OR10A5,ENSG00000166363,protein_coding -32197,OR2D2,ENSG00000166368,protein_coding -53256,ATP9B,ENSG00000166377,protein_coding -32208,PPFIBP2,ENSG00000166387,protein_coding -33983,MOGAT2,ENSG00000166391,protein_coding -32210,CYB5R2,ENSG00000166394,protein_coding -53065,SERPINB7,ENSG00000166396,protein_coding -56191,KIAA0355,ENSG00000166398,protein_coding -53070,SERPINB8,ENSG00000166401,protein_coding -32231,TUB,ENSG00000166402,protein_coding -32235,RIC3,ENSG00000166405,protein_coding -32237,LMO1,ENSG00000166407,protein_coding -45827,IDH3A,ENSG00000166411,protein_coding -45219,WDR72,ENSG00000166415,protein_coding -45838,CRABP1,ENSG00000166426,protein_coding -43987,PLD4,ENSG00000166428,protein_coding -25985,ZMAT1,ENSG00000166432,protein_coding -33948,XRRA1,ENSG00000166435,protein_coding -32240,TRIM66,ENSG00000166436,protein_coding -33947,RNF169,ENSG00000166439,protein_coding -32242,RPL27A,ENSG00000166441,protein_coding -32245,ST5,ENSG00000166444,protein_coding -48698,CDYL2,ENSG00000166446,protein_coding -23292,TMEM130,ENSG00000166448,protein_coding -45245,PRTG,ENSG00000166450,protein_coding -48705,CENPN,ENSG00000166451,protein_coding -32250,AKIP1,ENSG00000166452,protein_coding -48709,ATMIN,ENSG00000166454,protein_coding -48710,C16orf46,ENSG00000166455,protein_coding -32265,TMEM41B,ENSG00000166471,protein_coding -45190,LEO1,ENSG00000166477,protein_coding -32271,ZNF143,ENSG00000166478,protein_coding -53108,TMX3,ENSG00000166479,protein_coding -49765,MFAP4,ENSG00000166482,protein_coding -32272,WEE1,ENSG00000166483,protein_coding -49764,MAPK7,ENSG00000166484,protein_coding -47293,PRKCB,ENSG00000166501,protein_coding -45982,HDGFL3,ENSG00000166503,protein_coding -36387,NDST2,ENSG00000166507,protein_coding -23346,MCM7,ENSG00000166508,protein_coding -48654,CLEC3A,ENSG00000166509,protein_coding -52911,CCDC68,ENSG00000166510,protein_coding -37782,CLEC4E,ENSG00000166523,protein_coding -23344,ZNF3,ENSG00000166526,protein_coding -37780,CLEC4D,ENSG00000166527,protein_coding -23343,ZSCAN21,ENSG00000166529,protein_coding -37792,RIMKLB,ENSG00000166532,protein_coding -37797,A2ML1,ENSG00000166535,protein_coding -48251,BEAN1,ENSG00000166546,protein_coding -48255,TK2,ENSG00000166548,protein_coding -45867,TMED3,ENSG00000166557,protein_coding -48766,SLC38A8,ENSG00000166558,protein_coding -52987,SEC11C,ENSG00000166562,protein_coding -52990,CPLX4,ENSG00000166569,protein_coding -53235,GALR1,ENSG00000166573,protein_coding -34168,TMEM135,ENSG00000166575,protein_coding -39976,IQCD,ENSG00000166578,protein_coding -49416,NDEL1,ENSG00000166579,protein_coding -49588,CENPV,ENSG00000166582,protein_coding -48277,CDH16,ENSG00000166589,protein_coding -48278,RRAD,ENSG00000166592,protein_coding -48279,CIAO2B,ENSG00000166595,protein_coding -49437,CFAP52,ENSG00000166596,protein_coding -39760,HSP90B1,ENSG00000166598,protein_coding -53016,MC4R,ENSG00000166603,protein_coding -54090,BLCAP,ENSG00000166619,protein_coding -53060,SERPINB12,ENSG00000166634,protein_coding -44609,CHRFAM7A,ENSG00000166664,protein_coding -46903,ATF7IP2,ENSG00000166669,protein_coding -34437,MMP10,ENSG00000166670,protein_coding -46922,TVP23A,ENSG00000166676,protein_coding -26045,BEX3,ENSG00000166681,protein_coding -34636,TMPRSS5,ENSG00000166682,protein_coding -51609,COG1,ENSG00000166685,protein_coding -32392,PLEKHA7,ENSG00000166689,protein_coding -57676,ZNF606,ENSG00000166704,protein_coding -26075,ZCCHC18,ENSG00000166707,protein_coding -45010,B2M,ENSG00000166710,protein_coding -46029,ZNF592,ENSG00000166716,protein_coding -44997,CASC4,ENSG00000166734,protein_coding -34651,HTR3A,ENSG00000166736,protein_coding -34655,NNMT,ENSG00000166741,protein_coding -47191,ACSM1,ENSG00000166743,protein_coding -48509,AP1G1,ENSG00000166747,protein_coding -50291,SLFN5,ENSG00000166750,protein_coding -44973,CATSPER2,ENSG00000166762,protein_coding -47052,BMERB1,ENSG00000166780,protein_coding -47053,MARF1,ENSG00000166783,protein_coding -32421,SAAL1,ENSG00000166788,protein_coding -33131,YPEL4,ENSG00000166793,protein_coding -45432,PPIB,ENSG00000166794,protein_coding -32444,LDHC,ENSG00000166796,protein_coding -45428,CIAO2A,ENSG00000166797,protein_coding -32446,LDHAL6A,ENSG00000166800,protein_coding -33201,FAM111A,ENSG00000166801,protein_coding -45436,PCLAF,ENSG00000166803,protein_coding -46124,KIF7,ENSG00000166813,protein_coding -48584,LDHD,ENSG00000166816,protein_coding -46125,PLIN1,ENSG00000166819,protein_coding -46126,PEX11A,ENSG00000166821,protein_coding -48600,TMEM170A,ENSG00000166822,protein_coding -46131,MESP1,ENSG00000166823,protein_coding -46134,ANPEP,ENSG00000166825,protein_coding -47267,SCNN1G,ENSG00000166828,protein_coding -45450,RBPMS2,ENSG00000166831,protein_coding -32480,NAV2,ENSG00000166833,protein_coding -45456,ANKDD1A,ENSG00000166839,protein_coding -33190,GLYATL1,ENSG00000166840,protein_coding -52901,C18orf54,ENSG00000166845,protein_coding -47284,DCTN5,ENSG00000166847,protein_coding -48616,TERF2IP,ENSG00000166848,protein_coding -47286,PLK1,ENSG00000166851,protein_coding -45470,CLPX,ENSG00000166855,protein_coding -38981,GPR182,ENSG00000166856,protein_coding -38982,ZBTB39,ENSG00000166860,protein_coding -59183,CACNG2,ENSG00000166862,protein_coding -38983,TAC3,ENSG00000166863,protein_coding -38984,MYO1A,ENSG00000166866,protein_coding -47292,CHP2,ENSG00000166869,protein_coding -38985,NEMP1,ENSG00000166881,protein_coding -33216,OR4D6,ENSG00000166884,protein_coding -38986,NAB2,ENSG00000166886,protein_coding -44919,VPS39,ENSG00000166887,protein_coding -38987,STAT6,ENSG00000166888,protein_coding -33227,PATL1,ENSG00000166889,protein_coding -39043,ATP23,ENSG00000166896,protein_coding -59213,ELFN2,ENSG00000166897,protein_coding -33231,STX3,ENSG00000166900,protein_coding -33237,MRPL16,ENSG00000166902,protein_coding -39013,PIP4K2C,ENSG00000166908,protein_coding -44643,MTMR10,ENSG00000166912,protein_coding -54239,YWHAB,ENSG00000166913,protein_coding -45050,C15orf48,ENSG00000166920,protein_coding -44685,SCG5,ENSG00000166922,protein_coding -44688,GREM1,ENSG00000166923,protein_coding -23383,NYAP1,ENSG00000166924,protein_coding -23381,TSC22D4,ENSG00000166925,protein_coding -33255,MS4A6E,ENSG00000166926,protein_coding -33256,MS4A7,ENSG00000166927,protein_coding -33257,MS4A14,ENSG00000166928,protein_coding -33258,MS4A5,ENSG00000166930,protein_coding -45501,DIS3L,ENSG00000166938,protein_coding -44954,CCNDBP1,ENSG00000166946,protein_coding -44955,EPB42,ENSG00000166947,protein_coding -53365,TGM6,ENSG00000166948,protein_coding -45530,SMAD3,ENSG00000166949,protein_coding -33264,MS4A8,ENSG00000166959,protein_coding -52637,CCDC178,ENSG00000166960,protein_coding -33266,AP004243.1,ENSG00000166961,protein_coding -44967,MAP1A,ENSG00000166963,protein_coding -46194,RCCD1,ENSG00000166965,protein_coding -48026,AKTIP,ENSG00000166971,protein_coding -52650,MAPRE2,ENSG00000166974,protein_coding -60117,EVA1C,ENSG00000166979,protein_coding -21456,TCP10L2,ENSG00000166984,protein_coding -39005,MARS,ENSG00000166986,protein_coding -39010,MBD6,ENSG00000166987,protein_coding -23353,CNPY4,ENSG00000166997,protein_coding -44983,PDIA3,ENSG00000167004,protein_coding -48084,NUDT21,ENSG00000167005,protein_coding -23424,NAT16,ENSG00000167011,protein_coding -45020,TERB2,ENSG00000167014,protein_coding -27531,NKX3-1,ENSG00000167034,protein_coding -58833,SGSM1,ENSG00000167037,protein_coding -59019,DUSP18,ENSG00000167065,protein_coding -59375,TEF,ENSG00000167074,protein_coding -59392,MEI1,ENSG00000167077,protein_coding -51016,B4GALNT2,ENSG00000167080,protein_coding -31350,PBX3,ENSG00000167081,protein_coding -51018,GNGT2,ENSG00000167083,protein_coding -51028,PHB,ENSG00000167085,protein_coding -52442,SNRPD1,ENSG00000167088,protein_coding -31383,TTC16,ENSG00000167094,protein_coding -53937,SUN5,ENSG00000167098,protein_coding -51061,SAMD14,ENSG00000167100,protein_coding -31400,PIP5KL1,ENSG00000167103,protein_coding -53939,BPIFB6,ENSG00000167104,protein_coding -51072,TMEM92,ENSG00000167105,protein_coding -31403,FAM102A,ENSG00000167106,protein_coding -51081,ACSF2,ENSG00000167107,protein_coding -31417,GOLGA2,ENSG00000167110,protein_coding -31419,TRUB2,ENSG00000167112,protein_coding -31421,COQ4,ENSG00000167113,protein_coding -31422,SLC27A4,ENSG00000167114,protein_coding -51101,ANKRD40CL,ENSG00000167117,protein_coding -31424,URM1,ENSG00000167118,protein_coding -31429,CERCAM,ENSG00000167123,protein_coding -31464,DOLPP1,ENSG00000167130,protein_coding -50824,CCDC103,ENSG00000167131,protein_coding -31451,ENDOG,ENSG00000167136,protein_coding -45671,TBC1D21,ENSG00000167139,protein_coding -31490,PRRX2,ENSG00000167157,protein_coding -9469,UGT1A6,ENSG00000167165,protein_coding -45732,C15orf39,ENSG00000167173,protein_coding -45681,ISLR2,ENSG00000167178,protein_coding -50940,SP2,ENSG00000167182,protein_coding -50946,PRR15L,ENSG00000167183,protein_coding -47144,COQ7,ENSG00000167186,protein_coding -47172,GPRC5B,ENSG00000167191,protein_coding -49035,CRK,ENSG00000167193,protein_coding -47485,C16orf92,ENSG00000167194,protein_coding -45738,GOLGA6C,ENSG00000167195,protein_coding -45779,FBXO22,ENSG00000167196,protein_coding -45822,TBC1D2B,ENSG00000167202,protein_coding -47962,NOD2,ENSG00000167207,protein_coding -47961,SNX20,ENSG00000167208,protein_coding -52768,LOXHD1,ENSG00000167210,protein_coding -52776,KATNAL2,ENSG00000167216,protein_coding -52785,HDHD2,ENSG00000167220,protein_coding -56021,ZNF91,ENSG00000167232,protein_coding -31922,IGF2,ENSG00000167244,protein_coding -34715,RNF214,ENSG00000167257,protein_coding -50478,CDK12,ENSG00000167258,protein_coding -48351,DPEP2,ENSG00000167261,protein_coding -48352,DUS2,ENSG00000167264,protein_coding -40134,POP5,ENSG00000167272,protein_coding -51857,ENGASE,ENSG00000167280,protein_coding -51858,RBFOX3,ENSG00000167281,protein_coding -34750,ATP5MG,ENSG00000167283,protein_coding -34745,CD3D,ENSG00000167286,protein_coding -51872,TBC1D16,ENSG00000167291,protein_coding -51923,TEPSIN,ENSG00000167302,protein_coding -52848,MYO5B,ENSG00000167306,protein_coding -31990,ART5,ENSG00000167311,protein_coding -52843,ACAA2,ENSG00000167315,protein_coding -32001,STIM1,ENSG00000167323,protein_coding -32010,RRM1,ENSG00000167325,protein_coding -32038,OR51E2,ENSG00000167332,protein_coding -32034,TRIM68,ENSG00000167333,protein_coding -32040,MMP26,ENSG00000167346,protein_coding -32095,OR51B5,ENSG00000167355,protein_coding -32106,OR51I1,ENSG00000167359,protein_coding -32104,OR51Q1,ENSG00000167360,protein_coding -52037,FN3K,ENSG00000167363,protein_coding -47467,PRRT2,ENSG00000167371,protein_coding -48491,ZNF23,ENSG00000167377,protein_coding -56691,IRGQ,ENSG00000167378,protein_coding -56728,ZNF226,ENSG00000167380,protein_coding -56741,ZNF180,ENSG00000167384,protein_coding -24563,PPP2R3B,ENSG00000167393,protein_coding -47584,ZNF668,ENSG00000167394,protein_coding -47586,ZNF646,ENSG00000167395,protein_coding -47589,VKORC1,ENSG00000167397,protein_coding -56864,AC093503.1,ENSG00000167414,protein_coding -51218,LPO,ENSG00000167419,protein_coding -51304,CA4,ENSG00000167434,protein_coding -51262,SMG8,ENSG00000167447,protein_coding -55645,TPM4,ENSG00000167460,protein_coding -55648,RAB8A,ENSG00000167461,protein_coding -54828,GPX4,ENSG00000167468,protein_coding -54835,MIDN,ENSG00000167470,protein_coding -54903,JSRP1,ENSG00000167476,protein_coding -55733,FAM129C,ENSG00000167483,protein_coding -55796,KLHL26,ENSG00000167487,protein_coding -55826,GATAD2A,ENSG00000167491,protein_coding -48913,MVD,ENSG00000167508,protein_coding -48926,CDT1,ENSG00000167513,protein_coding -48929,TRAPPC2L,ENSG00000167515,protein_coding -48946,ANKRD11,ENSG00000167522,protein_coding -48961,SPATA33,ENSG00000167523,protein_coding -50010,RSKR,ENSG00000167524,protein_coding -50017,PROCA1,ENSG00000167525,protein_coding -48956,RPL13,ENSG00000167526,protein_coding -38521,ZNF641,ENSG00000167528,protein_coding -38536,LALBA,ENSG00000167531,protein_coding -38551,CACNB3,ENSG00000167535,protein_coding -50045,DHRS13,ENSG00000167536,protein_coding -50071,TP53I13,ENSG00000167543,protein_coding -38567,KMT2D,ENSG00000167548,protein_coding -50076,CORO6,ENSG00000167549,protein_coding -38568,RHEBL1,ENSG00000167550,protein_coding -38576,TUBA1A,ENSG00000167552,protein_coding -38577,TUBA1C,ENSG00000167553,protein_coding -57249,ZNF610,ENSG00000167554,protein_coding -57253,ZNF528,ENSG00000167555,protein_coding -57260,ZNF701,ENSG00000167562,protein_coding -56534,SERTAD3,ENSG00000167565,protein_coding -38603,NCKAP5L,ENSG00000167566,protein_coding -56549,RAB4B,ENSG00000167578,protein_coding -38611,AQP2,ENSG00000167580,protein_coding -38619,GPD1,ENSG00000167588,protein_coding -56294,PROSER3,ENSG00000167595,protein_coding -56573,CYP2S1,ENSG00000167600,protein_coding -56574,AXL,ENSG00000167601,protein_coding -56303,NFKBID,ENSG00000167604,protein_coding -57405,TMC4,ENSG00000167608,protein_coding -38676,ANKRD33,ENSG00000167612,protein_coding -57430,LAIR1,ENSG00000167613,protein_coding -57432,TTYH1,ENSG00000167614,protein_coding -57439,LENG8,ENSG00000167615,protein_coding -57441,CDC42EP5,ENSG00000167617,protein_coding -57442,LAIR2,ENSG00000167618,protein_coding -56640,TMEM145,ENSG00000167619,protein_coding -56632,ZNF526,ENSG00000167625,protein_coding -29256,TRAPPC9,ENSG00000167632,protein_coding -57467,KIR3DL1,ENSG00000167633,protein_coding -57474,NLRP7,ENSG00000167634,protein_coding -56330,ZNF146,ENSG00000167635,protein_coding -56704,ZNF283,ENSG00000167637,protein_coding -56406,PPP1R14A,ENSG00000167641,protein_coding -56405,SPINT2,ENSG00000167642,protein_coding -56411,C19orf33,ENSG00000167644,protein_coding -56410,YIF1B,ENSG00000167645,protein_coding -57490,DNAAF3,ENSG00000167646,protein_coding -29307,PSCA,ENSG00000167653,protein_coding -54989,ATCAY,ENSG00000167654,protein_coding -29318,LY6D,ENSG00000167656,protein_coding -54992,DAPK3,ENSG00000167657,protein_coding -54994,EEF2,ENSG00000167658,protein_coding -55008,TMIGD2,ENSG00000167664,protein_coding -55018,CHAF1A,ENSG00000167670,protein_coding -55020,UBXN6,ENSG00000167671,protein_coding -55026,HDGFL2,ENSG00000167674,protein_coding -55027,PLIN4,ENSG00000167676,protein_coding -55031,SEMA6B,ENSG00000167680,protein_coding -57559,ZNF444,ENSG00000167685,protein_coding -49020,NXN,ENSG00000167693,protein_coding -49012,FAM57A,ENSG00000167695,protein_coding -49015,GLOD4,ENSG00000167699,protein_coding -29433,MFSD3,ENSG00000167700,protein_coding -29432,GPT,ENSG00000167701,protein_coding -29428,KIFC2,ENSG00000167702,protein_coding -49041,SLC43A2,ENSG00000167703,protein_coding -49044,RILP,ENSG00000167705,protein_coding -49052,SERPINF2,ENSG00000167711,protein_coding -49050,WDR81,ENSG00000167716,protein_coding -49082,SRR,ENSG00000167720,protein_coding -49084,TSR1,ENSG00000167721,protein_coding -49134,TRPV3,ENSG00000167723,protein_coding -55062,HSD11B1L,ENSG00000167733,protein_coding -49159,CYB5D2,ENSG00000167740,protein_coding -49173,GGT6,ENSG00000167741,protein_coding -57119,C19orf48,ENSG00000167747,protein_coding -57125,KLK1,ENSG00000167748,protein_coding -57131,KLK4,ENSG00000167749,protein_coding -57129,KLK2,ENSG00000167751,protein_coding -57133,KLK5,ENSG00000167754,protein_coding -57135,KLK6,ENSG00000167755,protein_coding -57144,KLK11,ENSG00000167757,protein_coding -57148,KLK13,ENSG00000167759,protein_coding -57264,ZNF83,ENSG00000167766,protein_coding -38690,KRT80,ENSG00000167767,protein_coding -38723,KRT1,ENSG00000167768,protein_coding -55092,ACER1,ENSG00000167769,protein_coding -33441,OTUB1,ENSG00000167770,protein_coding -33436,RCOR2,ENSG00000167771,protein_coding -55205,ANGPTL4,ENSG00000167772,protein_coding -55200,AC010323.1,ENSG00000167774,protein_coding -55199,CD320,ENSG00000167775,protein_coding -38748,SPRYD3,ENSG00000167778,protein_coding -38749,IGFBP6,ENSG00000167779,protein_coding -38750,SOAT2,ENSG00000167780,protein_coding -55226,ZNF558,ENSG00000167785,protein_coding -33688,CABP2,ENSG00000167791,protein_coding -33693,NDUFV1,ENSG00000167792,protein_coding -33687,CDK2AP2,ENSG00000167797,protein_coding -33695,NUDT8,ENSG00000167799,protein_coding -33696,TBX10,ENSG00000167800,protein_coding -55309,AC011511.1,ENSG00000167807,protein_coding -55473,PRDX2,ENSG00000167815,protein_coding -33042,OR8J3,ENSG00000167822,protein_coding -33025,OR5I1,ENSG00000167825,protein_coding -49221,ZNF232,ENSG00000167840,protein_coding -49238,MIS12,ENSG00000167842,protein_coding -51641,CD300C,ENSG00000167850,protein_coding -51639,CD300A,ENSG00000167851,protein_coding -49272,TEKT1,ENSG00000167858,protein_coding -51662,HID1,ENSG00000167861,protein_coding -51665,MRPL58,ENSG00000167862,protein_coding -51668,ATP5PD,ENSG00000167863,protein_coding -49365,TMEM88,ENSG00000167874,protein_coding -51728,EVPL,ENSG00000167880,protein_coding -51729,SRP68,ENSG00000167881,protein_coding -51782,MGAT5B,ENSG00000167889,protein_coding -51818,TMC8,ENSG00000167895,protein_coding -51822,TK1,ENSG00000167900,protein_coding -28070,TMEM68,ENSG00000167904,protein_coding -28124,CYP7A1,ENSG00000167910,protein_coding -50500,GSDMA,ENSG00000167914,protein_coding -50540,KRT24,ENSG00000167916,protein_coding -50548,TMEM99,ENSG00000167920,protein_coding -50652,GHDC,ENSG00000167925,protein_coding -46461,FAM234A,ENSG00000167930,protein_coding -50758,SOST,ENSG00000167941,protein_coding -46616,ZNF598,ENSG00000167962,protein_coding -46632,RAB26,ENSG00000167964,protein_coding -46638,MLST8,ENSG00000167965,protein_coding -46642,E4F1,ENSG00000167967,protein_coding -46644,DNASE1L2,ENSG00000167968,protein_coding -46645,ECI1,ENSG00000167969,protein_coding -46637,CASKIN1,ENSG00000167971,protein_coding -46652,ABCA3,ENSG00000167972,protein_coding -46688,KCTD5,ENSG00000167977,protein_coding -46692,SRRM2,ENSG00000167978,protein_coding -46758,ZNF597,ENSG00000167981,protein_coding -46766,NLRC3,ENSG00000167984,protein_coding -33301,SDHAF2,ENSG00000167985,protein_coding -33292,DDB1,ENSG00000167986,protein_coding -33285,VPS37C,ENSG00000167987,protein_coding -33291,VWCE,ENSG00000167992,protein_coding -33324,RAB3IL1,ENSG00000167994,protein_coding -33326,BEST1,ENSG00000167995,protein_coding -33327,FTH1,ENSG00000167996,protein_coding -33370,BSCL2,ENSG00000168000,protein_coding -33376,POLR2G,ENSG00000168002,protein_coding -33400,SLC3A2,ENSG00000168003,protein_coding -33417,PLAAT5,ENSG00000168004,protein_coding -33432,SPINDOC,ENSG00000168005,protein_coding -33883,ATG16L2,ENSG00000168010,protein_coding -33920,C2CD3,ENSG00000168014,protein_coding -10144,TRANK1,ENSG00000168016,protein_coding -10200,TTC21A,ENSG00000168026,protein_coding -10219,RPSA,ENSG00000168028,protein_coding -10235,ENTPD3,ENSG00000168032,protein_coding -10251,CTNNB1,ENSG00000168036,protein_coding -10252,ULK4,ENSG00000168038,protein_coding -33779,FADD,ENSG00000168040,protein_coding -33556,LTBP3,ENSG00000168056,protein_coding -33509,NAALADL1,ENSG00000168060,protein_coding -33508,SAC3D1,ENSG00000168061,protein_coding -33502,BATF2,ENSG00000168062,protein_coding -33480,SLC22A11,ENSG00000168065,protein_coding -33486,SF1,ENSG00000168066,protein_coding -33488,MAP4K2,ENSG00000168067,protein_coding -33501,MAJIN,ENSG00000168070,protein_coding -33470,CCDC88B,ENSG00000168071,protein_coding -27595,SCARA3,ENSG00000168077,protein_coding -27605,PBK,ENSG00000168078,protein_coding -27607,SCARA5,ENSG00000168079,protein_coding -27618,PNOC,ENSG00000168081,protein_coding -23345,COPS6,ENSG00000168090,protein_coding -34710,PAFAH1B2,ENSG00000168092,protein_coding -46810,ANKS3,ENSG00000168096,protein_coding -46809,NUDT16L1,ENSG00000168101,protein_coding -19988,KIAA1586,ENSG00000168116,protein_coding -5016,RAB4A,ENSG00000168118,protein_coding -46738,OR1F1,ENSG00000168124,protein_coding -19048,OR2B2,ENSG00000168131,protein_coding -59268,KCNJ4,ENSG00000168135,protein_coding -9760,SETD5,ENSG00000168137,protein_coding -46791,VASN,ENSG00000168140,protein_coding -19968,FAM83B,ENSG00000168143,protein_coding -4975,HIST3H3,ENSG00000168148,protein_coding -14093,THAP9,ENSG00000168152,protein_coding -46750,OR2C1,ENSG00000168158,protein_coding -4982,RNF187,ENSG00000168159,protein_coding -27900,HOOK3,ENSG00000168172,protein_coding -42854,MAPK1IP1L,ENSG00000168175,protein_coding -36314,DDIT4,ENSG00000168209,protein_coding -13317,RBPJ,ENSG00000168214,protein_coding -20069,LMBRD1,ENSG00000168216,protein_coding -13297,ZCCHC4,ENSG00000168228,protein_coding -42808,PTGDR,ENSG00000168229,protein_coding -52496,TTC39C,ENSG00000168234,protein_coding -10624,GLYCTK,ENSG00000168237,protein_coding -5165,GNG4,ENSG00000168243,protein_coding -18183,UBTD2,ENSG00000168246,protein_coding -23469,POLR2J3,ENSG00000168255,protein_coding -50641,NKIRAS2,ENSG00000168256,protein_coding -50640,DNAJC7,ENSG00000168259,protein_coding -29507,KCNV2,ENSG00000168263,protein_coding -5125,IRF2BP2,ENSG00000168264,protein_coding -35547,PTF1A,ENSG00000168267,protein_coding -10635,NT5DC2,ENSG00000168268,protein_coding -18149,FOXI1,ENSG00000168269,protein_coding -10636,SMIM4,ENSG00000168273,protein_coding -5118,COA6,ENSG00000168275,protein_coding -8097,KIF5C,ENSG00000168280,protein_coding -42720,MGAT2,ENSG00000168282,protein_coding -35530,BMI1,ENSG00000168283,protein_coding -48338,THAP11,ENSG00000168286,protein_coding -8105,MMADHC,ENSG00000168288,protein_coding -10747,PDHB,ENSG00000168291,protein_coding -10745,PXK,ENSG00000168297,protein_coding -18942,HIST1H1E,ENSG00000168298,protein_coding -28012,PCMTD1,ENSG00000168300,protein_coding -10750,KCTD6,ENSG00000168301,protein_coding -22267,MPLKIP,ENSG00000168303,protein_coding -10751,ACOX2,ENSG00000168306,protein_coding -10753,FAM107A,ENSG00000168309,protein_coding -15373,IRF2,ENSG00000168310,protein_coding -10224,MOBP,ENSG00000168314,protein_coding -10211,CX3CR1,ENSG00000168329,protein_coding -27992,PPDPFL,ENSG00000168333,protein_coding -10207,XIRP1,ENSG00000168334,protein_coding -42558,INSM2,ENSG00000168348,protein_coding -43722,DEGS2,ENSG00000168350,protein_coding -10196,SCN11A,ENSG00000168356,protein_coding -10727,ARF4,ENSG00000168374,protein_coding -9628,SEPT2,ENSG00000168385,protein_coding -11137,FILIP1L,ENSG00000168386,protein_coding -1289,MFSD2A,ENSG00000168389,protein_coding -9642,DTYMK,ENSG00000168393,protein_coding -19434,TAP1,ENSG00000168394,protein_coding -9643,ING5,ENSG00000168395,protein_coding -9641,ATG4B,ENSG00000168397,protein_coding -43653,BDKRB2,ENSG00000168398,protein_coding -48578,MLKL,ENSG00000168404,protein_coding -48576,RFWD3,ENSG00000168411,protein_coding -15432,MTNR1A,ENSG00000168412,protein_coding -48778,KCNG4,ENSG00000168418,protein_coding -13458,RHOH,ENSG00000168421,protein_coding -9548,KLHL30,ENSG00000168427,protein_coding -47270,COG7,ENSG00000168434,protein_coding -20593,CDC40,ENSG00000168438,protein_coding -33447,STIP1,ENSG00000168439,protein_coding -47269,SCNN1B,ENSG00000168447,protein_coding -27470,HR,ENSG00000168453,protein_coding -52256,TXNDC2,ENSG00000168454,protein_coding -52249,RAB31,ENSG00000168461,protein_coding -27471,REEP4,ENSG00000168476,protein_coding -19387,TNXB,ENSG00000168477,protein_coding -27472,LGI3,ENSG00000168481,protein_coding -27473,SFTPC,ENSG00000168484,protein_coding -27474,BMP1,ENSG00000168487,protein_coding -47401,ATXN2L,ENSG00000168488,protein_coding -27477,PHYHIP,ENSG00000168490,protein_coding -15408,CCDC110,ENSG00000168491,protein_coding -27479,POLR3D,ENSG00000168495,protein_coding -33318,FEN1,ENSG00000168496,protein_coding -8694,CAVIN2,ENSG00000168497,protein_coding -52222,MTCL1,ENSG00000168502,protein_coding -9512,GBX2,ENSG00000168505,protein_coding -2967,HJV,ENSG00000168509,protein_coding -33335,SCGB1D1,ENSG00000168515,protein_coding -50841,HEXIM2,ENSG00000168517,protein_coding -27903,FNTA,ENSG00000168522,protein_coding -1029,SERINC2,ENSG00000168528,protein_coding -9030,MYL1,ENSG00000168530,protein_coding -15357,TRAPPC11,ENSG00000168538,protein_coding -33401,CHRM1,ENSG00000168539,protein_coding -8640,COL3A1,ENSG00000168542,protein_coding -27461,GFRA2,ENSG00000168546,protein_coding -15353,ING2,ENSG00000168556,protein_coding -15350,CDKN2AIP,ENSG00000168564,protein_coding -18651,SNRNP48,ENSG00000168566,protein_coding -33379,TMEM223,ENSG00000168569,protein_coding -27887,SLC20A2,ENSG00000168575,protein_coding -8998,CRYGA,ENSG00000168582,protein_coding -48695,DYNLRB2,ENSG00000168589,protein_coding -50783,TMUB2,ENSG00000168591,protein_coding -15270,ADAM29,ENSG00000168594,protein_coding -50659,STAT3,ENSG00000168610,protein_coding -54298,ZSWIM1,ENSG00000168612,protein_coding -27822,ADAM9,ENSG00000168615,protein_coding -27830,ADAM18,ENSG00000168619,protein_coding -16085,GDNF,ENSG00000168621,protein_coding -19262,MUCL3,ENSG00000168631,protein_coding -54284,WFDC13,ENSG00000168634,protein_coding -51478,AXIN2,ENSG00000168646,protein_coding -1256,NDUFS5,ENSG00000168653,protein_coding -7252,VWA3B,ENSG00000168658,protein_coding -56225,ZNF30,ENSG00000168661,protein_coding -16050,UGT3A2,ENSG00000168671,protein_coding -29116,LRATD2,ENSG00000168672,protein_coding -52355,LDLRAD4,ENSG00000168675,protein_coding -48307,KCTD19,ENSG00000168676,protein_coding -2589,SLC16A4,ENSG00000168679,protein_coding -16044,IL7R,ENSG00000168685,protein_coding -48302,TMEM208,ENSG00000168701,protein_coding -8013,LRP1B,ENSG00000168702,protein_coding -54247,WFDC12,ENSG00000168703,protein_coding -2573,AHCYL1,ENSG00000168710,protein_coding -16036,DNAJC21,ENSG00000168724,protein_coding -54228,PKIG,ENSG00000168734,protein_coding -14378,NPNT,ENSG00000168743,protein_coding -48275,CA7,ENSG00000168748,protein_coding -7210,FAM178B,ENSG00000168754,protein_coding -57778,TSPY2,ENSG00000168757,protein_coding -7209,SEMA4C,ENSG00000168758,protein_coding -7206,CNNM3,ENSG00000168763,protein_coding -2560,GSTM4,ENSG00000168765,protein_coding -14363,TET2,ENSG00000168769,protein_coding -14352,CXXC4,ENSG00000168772,protein_coding -40258,TCTN2,ENSG00000168778,protein_coding -12140,SHOX2,ENSG00000168779,protein_coding -44968,PPIP5K1,ENSG00000168781,protein_coding -14264,TSPAN5,ENSG00000168785,protein_coding -50070,ABHD15,ENSG00000168792,protein_coding -30077,ZBTB5,ENSG00000168795,protein_coding -48409,CHTF8,ENSG00000168802,protein_coding -44962,ADAL,ENSG00000168803,protein_coding -44961,LCMT2,ENSG00000168806,protein_coding -48412,SNTB2,ENSG00000168807,protein_coding -12171,IL12A,ENSG00000168811,protein_coding -56138,ZNF507,ENSG00000168813,protein_coding -12996,STX18,ENSG00000168818,protein_coding -12995,NSG1,ENSG00000168824,protein_coding -12993,ZBTB49,ENSG00000168826,protein_coding -12149,GFM1,ENSG00000168827,protein_coding -30035,OR13J1,ENSG00000168828,protein_coding -20307,HTR1E,ENSG00000168830,protein_coding -15112,FSTL5,ENSG00000168843,protein_coding -48458,DDX19A,ENSG00000168872,protein_coding -6912,ATOH8,ENSG00000168874,protein_coding -11788,SOX14,ENSG00000168875,protein_coding -34340,ANKRD49,ENSG00000168876,protein_coding -6908,SFTPB,ENSG00000168878,protein_coding -6906,USP39,ENSG00000168883,protein_coding -12947,TNIP2,ENSG00000168884,protein_coding -6907,C2orf68,ENSG00000168887,protein_coding -6905,TMEM150A,ENSG00000168890,protein_coding -6904,RNF181,ENSG00000168894,protein_coding -6903,VAMP5,ENSG00000168899,protein_coding -18452,BTNL3,ENSG00000168903,protein_coding -46349,LRRC28,ENSG00000168904,protein_coding -6899,MAT2A,ENSG00000168906,protein_coding -44918,PLA2G4F,ENSG00000168907,protein_coding -29955,ENHO,ENSG00000168913,protein_coding -17275,ZNF608,ENSG00000168916,protein_coding -11778,SLC35G2,ENSG00000168917,protein_coding -9451,INPP5D,ENSG00000168918,protein_coding -12922,LETM1,ENSG00000168924,protein_coding -48589,CTRB1,ENSG00000168925,protein_coding -48588,CTRB2,ENSG00000168928,protein_coding -34225,TRIM49,ENSG00000168930,protein_coding -12920,TMEM129,ENSG00000168936,protein_coding -17256,PPIC,ENSG00000168938,protein_coding -26970,SPRY3,ENSG00000168939,protein_coding -17262,CEP120,ENSG00000168944,protein_coding -42406,STXBP6,ENSG00000168952,protein_coding -9307,TM4SF20,ENSG00000168955,protein_coding -9306,MFF,ENSG00000168958,protein_coding -34198,GRM5,ENSG00000168959,protein_coding -49946,LGALS9,ENSG00000168961,protein_coding -44905,JMJD7-PLA2G4B,ENSG00000168970,protein_coding -12887,CPLX1,ENSG00000168993,protein_coding -18562,PXDC1,ENSG00000168994,protein_coding -57154,SIGLEC7,ENSG00000168995,protein_coding -5671,NTSR2,ENSG00000169006,protein_coding -5663,E2F6,ENSG00000169016,protein_coding -45552,FEM1B,ENSG00000169018,protein_coding -13545,COMMD8,ENSG00000169019,protein_coding -12879,ATP5ME,ENSG00000169020,protein_coding -56086,UQCRFS1,ENSG00000169021,protein_coding -12881,SLC49A3,ENSG00000169026,protein_coding -9304,COL4A3,ENSG00000169031,protein_coding -45507,MAP2K1,ENSG00000169032,protein_coding -57138,KLK7,ENSG00000169035,protein_coding -18397,HNRNPH1,ENSG00000169045,protein_coding -9298,IRS1,ENSG00000169047,protein_coding -26887,MECP2,ENSG00000169057,protein_coding -24626,VCX3A,ENSG00000169059,protein_coding -41928,UPF3A,ENSG00000169062,protein_coding -12236,ZBBX,ENSG00000169064,protein_coding -16342,ACTBL2,ENSG00000169067,protein_coding -30728,ROR2,ENSG00000169071,protein_coding -25538,AR,ENSG00000169083,protein_coding -24586,DHRSX,ENSG00000169084,protein_coding -28234,VXN,ENSG00000169085,protein_coding -11474,HSPBAP1,ENSG00000169087,protein_coding -24580,ASMTL,ENSG00000169093,protein_coding -24577,SLC25A6,ENSG00000169100,protein_coding -44845,CHST14,ENSG00000169105,protein_coding -13957,PARM1,ENSG00000169116,protein_coding -45433,CSNK1G1,ENSG00000169118,protein_coding -28115,FAM110B,ENSG00000169122,protein_coding -35623,ARMC4,ENSG00000169126,protein_coding -37148,AFAP1L2,ENSG00000169129,protein_coding -18367,ZNF354A,ENSG00000169131,protein_coding -57076,ATF5,ENSG00000169136,protein_coding -27972,UBE2V2,ENSG00000169139,protein_coding -27781,GOT1L1,ENSG00000169154,protein_coding -31363,ZBTB43,ENSG00000169155,protein_coding -57054,CPT1C,ENSG00000169169,protein_coding -1749,PCSK9,ENSG00000169174,protein_coding -47359,XPO6,ENSG00000169180,protein_coding -47356,GSG1L,ENSG00000169181,protein_coding -58919,MN1,ENSG00000169184,protein_coding -25429,APEX2,ENSG00000169188,protein_coding -47344,NSMCE1,ENSG00000169189,protein_coding -21923,CCDC126,ENSG00000169193,protein_coding -17392,IL13,ENSG00000169194,protein_coding -47448,NPIPB12,ENSG00000169203,protein_coding -42125,OR10G3,ENSG00000169208,protein_coding -1631,RAB3B,ENSG00000169213,protein_coding -5398,OR6F1,ENSG00000169214,protein_coding -47511,CD2BP2,ENSG00000169217,protein_coding -1210,RSPO1,ENSG00000169218,protein_coding -18317,RGS14,ENSG00000169220,protein_coding -47513,TBC1D10B,ENSG00000169221,protein_coding -18315,LMAN2,ENSG00000169223,protein_coding -5387,GCSAML,ENSG00000169224,protein_coding -18312,RAB24,ENSG00000169228,protein_coding -18314,PRELID1,ENSG00000169230,protein_coding -3405,THBS3,ENSG00000169231,protein_coding -24742,CA5B,ENSG00000169239,protein_coding -3396,SLC50A1,ENSG00000169241,protein_coding -3395,EFNA1,ENSG00000169242,protein_coding -13983,CXCL10,ENSG00000169245,protein_coding -47214,NPIPB3,ENSG00000169246,protein_coding -17847,SH3TC2,ENSG00000169247,protein_coding -13984,CXCL11,ENSG00000169248,protein_coding -24746,ZRSR2,ENSG00000169249,protein_coding -12193,NMD3,ENSG00000169251,protein_coding -17846,ADRB2,ENSG00000169252,protein_coding -12192,B3GALNT1,ENSG00000169255,protein_coding -18294,GPRIN1,ENSG00000169258,protein_coding -16267,HSPB3,ENSG00000169271,protein_coding -12104,KCNAB1,ENSG00000169282,protein_coding -14022,MRPL1,ENSG00000169288,protein_coding -3364,SHE,ENSG00000169291,protein_coding -24924,NR0B1,ENSG00000169297,protein_coding -13396,PGM2,ENSG00000169299,protein_coding -17817,STK32A,ENSG00000169302,protein_coding -24910,IL1RAPL1,ENSG00000169306,protein_coding -12030,P2RY12,ENSG00000169313,protein_coding -58775,C22orf15,ENSG00000169314,protein_coding -42105,OR5AU1,ENSG00000169327,protein_coding -45869,MINAR1,ENSG00000169330,protein_coding -47179,PDILT,ENSG00000169340,protein_coding -47178,UMOD,ENSG00000169344,protein_coding -47177,GP2,ENSG00000169347,protein_coding -12098,SLC33A1,ENSG00000169359,protein_coding -45758,SNUPN,ENSG00000169371,protein_coding -39558,CRADD,ENSG00000169372,protein_coding -45750,SIN3A,ENSG00000169375,protein_coding -11056,ARL13B,ENSG00000169379,protein_coding -42085,RNASE2,ENSG00000169385,protein_coding -56923,AC010330.1,ENSG00000169393,protein_coding -42081,RNASE3,ENSG00000169397,protein_coding -29267,PTK2,ENSG00000169398,protein_coding -21710,RSPH10B2,ENSG00000169402,protein_coding -951,PTAFR,ENSG00000169403,protein_coding -45754,PTPN9,ENSG00000169410,protein_coding -42076,RNASE6,ENSG00000169413,protein_coding -3311,NPR1,ENSG00000169418,protein_coding -29255,KCNK9,ENSG00000169427,protein_coding -13929,CXCL8,ENSG00000169429,protein_coding -8309,SCN9A,ENSG00000169432,protein_coding -13927,RASSF6,ENSG00000169435,protein_coding -29249,COL22A1,ENSG00000169436,protein_coding -28695,SDC2,ENSG00000169439,protein_coding -862,CD52,ENSG00000169442,protein_coding -26572,MMGT1,ENSG00000169446,protein_coding -3269,SPRR1B,ENSG00000169469,protein_coding -3265,SPRR1A,ENSG00000169474,protein_coding -42019,OR4K14,ENSG00000169484,protein_coding -42017,OR4K15,ENSG00000169488,protein_coding -27821,TM2D2,ENSG00000169490,protein_coding -27820,HTRA4,ENSG00000169495,protein_coding -27818,PLEKHA2,ENSG00000169499,protein_coding -800,CLIC4,ENSG00000169504,protein_coding -8293,SLC38A11,ENSG00000169507,protein_coding -41696,GPR183,ENSG00000169508,protein_coding -3235,CRCT1,ENSG00000169509,protein_coding -56852,CCDC8,ENSG00000169515,protein_coding -32570,METTL15,ENSG00000169519,protein_coding -58671,ZNF280A,ENSG00000169548,protein_coding -32548,MUC15,ENSG00000169550,protein_coding -26536,CT55,ENSG00000169551,protein_coding -8046,ZEB2,ENSG00000169554,protein_coding -25609,GJB1,ENSG00000169562,protein_coding -6625,PCBP1,ENSG00000169564,protein_coding -17352,HINT1,ENSG00000169567,protein_coding -17201,DTWD2,ENSG00000169570,protein_coding -58639,VPREB1,ENSG00000169575,protein_coding -31722,CLIC3,ENSG00000169583,protein_coding -47483,INO80E,ENSG00000169592,protein_coding -45985,BNC1,ENSG00000169594,protein_coding -207,DFFB,ENSG00000169598,protein_coding -6609,NFU1,ENSG00000169599,protein_coding -6601,ANTXR1,ENSG00000169604,protein_coding -6600,GKN1,ENSG00000169605,protein_coding -7553,CKAP2L,ENSG00000169607,protein_coding -45972,C15orf40,ENSG00000169609,protein_coding -45971,RAMAC,ENSG00000169612,protein_coding -6593,PROKR1,ENSG00000169618,protein_coding -6591,APLF,ENSG00000169621,protein_coding -47500,BOLA2B,ENSG00000169627,protein_coding -7541,RGPD8,ENSG00000169629,protein_coding -58574,HIC2,ENSG00000169635,protein_coding -744,LUZP1,ENSG00000169641,protein_coding -52016,HEXD,ENSG00000169660,protein_coding -13135,DRD5,ENSG00000169676,protein_coding -7498,BUB1,ENSG00000169679,protein_coding -47416,SPNS1,ENSG00000169682,protein_coding -51986,LRRC45,ENSG00000169683,protein_coding -45845,CHRNA5,ENSG00000169684,protein_coding -48100,MT1B,ENSG00000169688,protein_coding -51985,CENPX,ENSG00000169689,protein_coding -31682,AGPAT2,ENSG00000169692,protein_coding -51984,ASPSCR1,ENSG00000169696,protein_coding -11630,GP9,ENSG00000169704,protein_coding -51994,FASN,ENSG00000169710,protein_coding -11636,CNBP,ENSG00000169714,protein_coding -48093,MT1E,ENSG00000169715,protein_coding -180,ACTRT2,ENSG00000169717,protein_coding -51993,DUS1L,ENSG00000169718,protein_coding -51992,GPS1,ENSG00000169727,protein_coding -51991,RFNG,ENSG00000169733,protein_coding -51988,DCXR,ENSG00000169738,protein_coding -35879,ZNF32,ENSG00000169740,protein_coding -13226,LDB2,ENSG00000169744,protein_coding -51987,RAC3,ENSG00000169750,protein_coding -45780,NRG4,ENSG00000169752,protein_coding -7439,LIMS1,ENSG00000169756,protein_coding -45782,TMEM266,ENSG00000169758,protein_coding -12344,NLGN1,ENSG00000169760,protein_coding -13218,TAPT1,ENSG00000169762,protein_coding -58145,PRYP3,ENSG00000169763,protein_coding -6501,UGP2,ENSG00000169764,protein_coding -15698,TAS2R1,ENSG00000169777,protein_coding -45810,LINGO1,ENSG00000169783,protein_coding -58112,PRY,ENSG00000169789,protein_coding -58100,RBMY1F,ENSG00000169800,protein_coding -58097,PRY2,ENSG00000169807,protein_coding -35866,HNRNPF,ENSG00000169813,protein_coding -9914,BTD,ENSG00000169814,protein_coding -35860,CSGALNACT2,ENSG00000169826,protein_coding -14347,TACR3,ENSG00000169836,protein_coding -40785,GSX1,ENSG00000169840,protein_coding -13352,PCDH7,ENSG00000169851,protein_coding -10974,ROBO1,ENSG00000169855,protein_coding -45213,ONECUT1,ENSG00000169856,protein_coding -44701,AVEN,ENSG00000169857,protein_coding -12052,P2RY1,ENSG00000169860,protein_coding -15743,CTNND2,ENSG00000169862,protein_coding -23419,TRIM56,ENSG00000169871,protein_coding -23417,MUC17,ENSG00000169876,protein_coding -47624,AHSP,ENSG00000169877,protein_coding -38560,WNT10B,ENSG00000169884,protein_coding -141,CALML6,ENSG00000169885,protein_coding -24767,REPS2,ENSG00000169891,protein_coding -23413,MUC3A,ENSG00000169894,protein_coding -24759,SYAP1,ENSG00000169895,protein_coding -47607,ITGAM,ENSG00000169896,protein_coding -47606,PYDC1,ENSG00000169900,protein_coding -22778,TPST1,ENSG00000169902,protein_coding -11979,TM4SF4,ENSG00000169903,protein_coding -4068,TOR1AIP2,ENSG00000169905,protein_coding -24758,S100G,ENSG00000169906,protein_coding -11972,TM4SF1,ENSG00000169908,protein_coding -648,OTUD3,ENSG00000169914,protein_coding -44653,OTUD7A,ENSG00000169918,protein_coding -22770,GUSB,ENSG00000169919,protein_coding -31601,BRD3,ENSG00000169925,protein_coding -44650,KLF13,ENSG00000169926,protein_coding -24689,FRMPD4,ENSG00000169933,protein_coding -28871,ZFPM2,ENSG00000169946,protein_coding -47537,ZNF764,ENSG00000169951,protein_coding -58024,HSFY2,ENSG00000169953,protein_coding -47534,ZNF747,ENSG00000169955,protein_coding -47531,ZNF768,ENSG00000169957,protein_coding -100,TAS1R3,ENSG00000169962,protein_coding -10340,TMEM42,ENSG00000169964,protein_coding -7733,MAP3K2,ENSG00000169967,protein_coding -95,PUSL1,ENSG00000169972,protein_coding -21089,SF3B5,ENSG00000169976,protein_coding -10332,ZNF35,ENSG00000169981,protein_coding -14983,TIGD4,ENSG00000169989,protein_coding -623,IFFO2,ENSG00000169991,protein_coding -49324,NLGN2,ENSG00000169992,protein_coding -7742,MYO7B,ENSG00000169994,protein_coding -49369,CHD3,ENSG00000170004,protein_coding -14981,TMEM154,ENSG00000170006,protein_coding -10227,MYRIP,ENSG00000170011,protein_coding -11194,ALCAM,ENSG00000170017,protein_coding -22987,YWHAG,ENSG00000170027,protein_coding -8560,UBE2E3,ENSG00000170035,protein_coding -49375,CNTROB,ENSG00000170037,protein_coding -49374,TRAPPC1,ENSG00000170043,protein_coding -11184,ZPLD1,ENSG00000170044,protein_coding -49372,KCNAB3,ENSG00000170049,protein_coding -43608,SERPINA9,ENSG00000170054,protein_coding -18337,FAM153A,ENSG00000170074,protein_coding -4397,GPR37L1,ENSG00000170075,protein_coding -18278,SIMC1,ENSG00000170085,protein_coding -15159,TMEM192,ENSG00000170088,protein_coding -18235,NSG2,ENSG00000170091,protein_coding -22962,SPDYE5,ENSG00000170092,protein_coding -43603,SERPINA6,ENSG00000170099,protein_coding -48944,ZNF778,ENSG00000170100,protein_coding -44362,NIPA1,ENSG00000170113,protein_coding -29472,FOXD4,ENSG00000170122,protein_coding -4349,GPR25,ENSG00000170128,protein_coding -9994,UBE2E1,ENSG00000170142,protein_coding -8496,HNRNPA3,ENSG00000170144,protein_coding -34574,SIK2,ENSG00000170145,protein_coding -30264,AL391987.1,ENSG00000170152,protein_coding -14812,RNF150,ENSG00000170153,protein_coding -49609,CCDC144A,ENSG00000170160,protein_coding -20711,VGLL2,ENSG00000170162,protein_coding -8466,HOXD4,ENSG00000170166,protein_coding -49335,CHRNB1,ENSG00000170175,protein_coding -8458,HOXD12,ENSG00000170178,protein_coding -14847,GYPA,ENSG00000170180,protein_coding -14827,USP38,ENSG00000170185,protein_coding -51672,SLC16A5,ENSG00000170190,protein_coding -53808,NANP,ENSG00000170191,protein_coding -34632,ANKK1,ENSG00000170209,protein_coding -18051,ADRA1B,ENSG00000170214,protein_coding -49466,ADPRM,ENSG00000170222,protein_coding -18057,FABP6,ENSG00000170231,protein_coding -18056,PWWP2A,ENSG00000170234,protein_coding -45149,USP50,ENSG00000170236,protein_coding -32320,USP47,ENSG00000170242,protein_coding -10123,PDCD6IP,ENSG00000170248,protein_coding -32468,MRGPRX1,ENSG00000170255,protein_coding -24347,ZNF212,ENSG00000170260,protein_coding -60112,MRAP,ENSG00000170262,protein_coding -6457,FAM161A,ENSG00000170264,protein_coding -24346,ZNF282,ENSG00000170265,protein_coding -10105,GLB1,ENSG00000170266,protein_coding -43577,GON7,ENSG00000170270,protein_coding -17972,FAXDC2,ENSG00000170271,protein_coding -10110,CRTAP,ENSG00000170275,protein_coding -34587,HSPB2,ENSG00000170276,protein_coding -24323,C7orf33,ENSG00000170279,protein_coding -28546,CNGB3,ENSG00000170289,protein_coding -34508,SLN,ENSG00000170290,protein_coding -49305,ELP5,ENSG00000170291,protein_coding -10088,CMTM8,ENSG00000170293,protein_coding -49300,GABARAP,ENSG00000170296,protein_coding -49820,LGALS9B,ENSG00000170298,protein_coding -49431,STX8,ENSG00000170310,protein_coding -36139,CDK1,ENSG00000170312,protein_coding -49589,UBB,ENSG00000170315,protein_coding -35068,NFRKB,ENSG00000170322,protein_coding -28466,FABP4,ENSG00000170323,protein_coding -36003,FRMPD2,ENSG00000170324,protein_coding -35069,PRDM10,ENSG00000170325,protein_coding -6468,B3GNT2,ENSG00000170340,protein_coding -43312,FOS,ENSG00000170345,protein_coding -43306,TMED10,ENSG00000170348,protein_coding -9705,SETMAR,ENSG00000170364,protein_coding -14869,SMAD1,ENSG00000170365,protein_coding -53770,CST5,ENSG00000170367,protein_coding -53769,CST2,ENSG00000170369,protein_coding -37194,EMX2,ENSG00000170370,protein_coding -53766,CST1,ENSG00000170373,protein_coding -38767,AC073611.1,ENSG00000170374,protein_coding -24253,TCAF2,ENSG00000170379,protein_coding -23081,SEMA3E,ENSG00000170381,protein_coding -4482,LRRN2,ENSG00000170382,protein_coding -4648,SLC30A1,ENSG00000170385,protein_coding -14933,DCLK2,ENSG00000170390,protein_coding -8604,ZNF804A,ENSG00000170396,protein_coding -51638,GPRC5C,ENSG00000170412,protein_coding -7336,TMEM182,ENSG00000170417,protein_coding -22471,VSTM2A,ENSG00000170419,protein_coding -38741,KRT8,ENSG00000170421,protein_coding -38739,KRT78,ENSG00000170423,protein_coding -49575,ADORA2B,ENSG00000170425,protein_coding -38976,SDR9C7,ENSG00000170426,protein_coding -37378,MGMT,ENSG00000170430,protein_coding -38889,METTL7B,ENSG00000170439,protein_coding -38695,KRT86,ENSG00000170442,protein_coding -17636,HARS,ENSG00000170445,protein_coding -13556,NFXL1,ENSG00000170448,protein_coding -38711,KRT75,ENSG00000170454,protein_coding -38263,DENND5B,ENSG00000170456,protein_coding -17629,CD14,ENSG00000170458,protein_coding -17580,DNAJC18,ENSG00000170464,protein_coding -38713,KRT6C,ENSG00000170465,protein_coding -43232,RIOX1,ENSG00000170468,protein_coding -17579,SPATA24,ENSG00000170469,protein_coding -54127,RALGAPB,ENSG00000170471,protein_coding -38908,PYM1,ENSG00000170473,protein_coding -17577,MZB1,ENSG00000170476,protein_coding -38736,KRT4,ENSG00000170477,protein_coding -17576,SLC23A1,ENSG00000170482,protein_coding -38717,KRT74,ENSG00000170484,protein_coding -7298,NPAS2,ENSG00000170485,protein_coding -38718,KRT72,ENSG00000170486,protein_coding -4468,KISS1,ENSG00000170498,protein_coding -7283,LONRF2,ENSG00000170500,protein_coding -14156,NUDT9,ENSG00000170502,protein_coding -14151,HSD17B13,ENSG00000170509,protein_coding -38923,PA2G4,ENSG00000170515,protein_coding -13541,COX7B2,ENSG00000170516,protein_coding -14434,ELOVL6,ENSG00000170522,protein_coding -38703,KRT83,ENSG00000170523,protein_coding -35274,PFKFB3,ENSG00000170525,protein_coding -47140,TMC7,ENSG00000170537,protein_coding -47136,ARL6IP1,ENSG00000170540,protein_coding -18536,SERPINB9,ENSG00000170542,protein_coding -38662,SMAGP,ENSG00000170545,protein_coding -15593,IRX1,ENSG00000170549,protein_coding -52566,CDH2,ENSG00000170558,protein_coding -15584,IRX2,ENSG00000170561,protein_coding -16212,EMB,ENSG00000170571,protein_coding -6222,SIX2,ENSG00000170577,protein_coding -52139,DLGAP1,ENSG00000170579,protein_coding -38948,STAT2,ENSG00000170581,protein_coding -18087,NUDCD2,ENSG00000170584,protein_coding -56823,IRF2BP1,ENSG00000170604,protein_coding -38857,OR9K2,ENSG00000170605,protein_coding -17416,HSPA4,ENSG00000170606,protein_coding -56822,FOXA3,ENSG00000170608,protein_coding -17998,FAM71B,ENSG00000170613,protein_coding -23503,SLC26A5,ENSG00000170615,protein_coding -29452,COMMD5,ENSG00000170619,protein_coding -17985,SGCD,ENSG00000170624,protein_coding -38841,GTSF1,ENSG00000170627,protein_coding -29455,ZNF16,ENSG00000170631,protein_coding -23491,ARMC10,ENSG00000170632,protein_coding -40168,RNF34,ENSG00000170633,protein_coding -6341,ACYP2,ENSG00000170634,protein_coding -59604,TRABD,ENSG00000170638,protein_coding -38780,ATF7,ENSG00000170653,protein_coding -23468,RASA4B,ENSG00000170667,protein_coding -53128,SOCS6,ENSG00000170677,protein_coding -30956,CAVIN4,ENSG00000170681,protein_coding -32226,OR10A3,ENSG00000170683,protein_coding -56776,ZNF296,ENSG00000170684,protein_coding -50980,HOXB9,ENSG00000170689,protein_coding -50995,TTLL6,ENSG00000170703,protein_coding -19769,POLH,ENSG00000170734,protein_coding -32205,SYT9,ENSG00000170743,protein_coding -5739,KCNS3,ENSG00000170745,protein_coding -32202,RBMXL2,ENSG00000170748,protein_coding -35702,KIF5B,ENSG00000170759,protein_coding -23769,GPR37,ENSG00000170775,protein_coding -46047,AKAP13,ENSG00000170776,protein_coding -29579,TPD52L3,ENSG00000170777,protein_coding -43990,CDCA4,ENSG00000170779,protein_coding -32196,OR10A4,ENSG00000170782,protein_coding -28089,SDR16C5,ENSG00000170786,protein_coding -36529,DYDC1,ENSG00000170788,protein_coding -32195,OR10A2,ENSG00000170790,protein_coding -28086,CHCHD7,ENSG00000170791,protein_coding -13060,HTRA3,ENSG00000170801,protein_coding -6286,FOXN2,ENSG00000170802,protein_coding -23752,LMOD2,ENSG00000170807,protein_coding -11723,BFSP2,ENSG00000170819,protein_coding -6300,FSHR,ENSG00000170820,protein_coding -51305,USP32,ENSG00000170832,protein_coding -31565,CEL,ENSG00000170835,protein_coding -51318,PPM1D,ENSG00000170836,protein_coding -10882,GPR27,ENSG00000170837,protein_coding -13027,AC093323.1,ENSG00000170846,protein_coding -56662,PSG6,ENSG00000170848,protein_coding -22124,KBTBD2,ENSG00000170852,protein_coding -11086,RIOX2,ENSG00000170854,protein_coding -40123,TRIAP1,ENSG00000170855,protein_coding -9877,LSM3,ENSG00000170860,protein_coding -13035,KIAA0232,ENSG00000170871,protein_coding -29084,MTSS1,ENSG00000170873,protein_coding -9873,TMEM43,ENSG00000170876,protein_coding -29079,RNF139,ENSG00000170881,protein_coding -57409,RPS9,ENSG00000170889,protein_coding -40117,PLA2G1B,ENSG00000170890,protein_coding -13005,CYTL1,ENSG00000170891,protein_coding -57407,TSEN34,ENSG00000170892,protein_coding -11664,TRH,ENSG00000170893,protein_coding -19923,GSTA4,ENSG00000170899,protein_coding -34486,MSANTD4,ENSG00000170903,protein_coding -57399,NDUFA3,ENSG00000170906,protein_coding -57398,OSCAR,ENSG00000170909,protein_coding -19903,PAQR8,ENSG00000170915,protein_coding -14616,NUDT6,ENSG00000170917,protein_coding -55237,OR7G3,ENSG00000170920,protein_coding -51379,TANC2,ENSG00000170921,protein_coding -55234,OR7G2,ENSG00000170923,protein_coding -26119,TEX13B,ENSG00000170925,protein_coding -19894,PKHD1,ENSG00000170927,protein_coding -55233,OR1M1,ENSG00000170929,protein_coding -26113,NCBP2L,ENSG00000170935,protein_coding -32604,DNAJC24,ENSG00000170946,protein_coding -55228,MBD3L1,ENSG00000170948,protein_coding -57286,ZNF160,ENSG00000170949,protein_coding -19872,PGK2,ENSG00000170950,protein_coding -34952,OR8B12,ENSG00000170953,protein_coding -57287,ZNF415,ENSG00000170954,protein_coding -32162,CAVIN3,ENSG00000170955,protein_coding -56612,CEACAM3,ENSG00000170956,protein_coding -32601,DCDC1,ENSG00000170959,protein_coding -29017,HAS2,ENSG00000170961,protein_coding -34460,PDGFD,ENSG00000170962,protein_coding -26513,PLAC1,ENSG00000170965,protein_coding -34462,DDI1,ENSG00000170967,protein_coding -2451,S1PR1,ENSG00000170989,protein_coding -26475,HS6ST2,ENSG00000171004,protein_coding -34912,OR4D5,ENSG00000171014,protein_coding -45243,PYGO1,ENSG00000171016,protein_coding -55180,LRRC8E,ENSG00000171017,protein_coding -28404,PKIA,ENSG00000171033,protein_coding -27246,XKR6,ENSG00000171044,protein_coding -29298,TSNARE1,ENSG00000171045,protein_coding -57213,FPR2,ENSG00000171049,protein_coding -57212,FPR1,ENSG00000171051,protein_coding -34995,PATE1,ENSG00000171053,protein_coding -26457,OR13H1,ENSG00000171054,protein_coding -6068,FEZ2,ENSG00000171055,protein_coding -27239,SOX7,ENSG00000171056,protein_coding -27237,C8orf74,ENSG00000171060,protein_coding -33730,C11orf24,ENSG00000171067,protein_coding -5980,ALK,ENSG00000171094,protein_coding -31454,KYAT1,ENSG00000171097,protein_coding -26775,MTM1,ENSG00000171100,protein_coding -31570,OBP2B,ENSG00000171102,protein_coding -5969,TRMT61B,ENSG00000171103,protein_coding -55141,INSR,ENSG00000171105,protein_coding -12407,MFN1,ENSG00000171109,protein_coding -24386,GIMAP8,ENSG00000171115,protein_coding -26761,HSFX1,ENSG00000171116,protein_coding -55068,NRTN,ENSG00000171119,protein_coding -12403,KCNMB3,ENSG00000171121,protein_coding -55070,FUT3,ENSG00000171124,protein_coding -6163,KCNG3,ENSG00000171126,protein_coding -24365,ATP6V0E2,ENSG00000171130,protein_coding -6230,PRKCE,ENSG00000171132,protein_coding -31101,OR2K2,ENSG00000171133,protein_coding -9776,JAGN1,ENSG00000171135,protein_coding -55531,RLN3,ENSG00000171136,protein_coding -9769,TADA3,ENSG00000171148,protein_coding -6251,SOCS5,ENSG00000171150,protein_coding -26312,C1GALT1C1,ENSG00000171155,protein_coding -31412,C9orf16,ENSG00000171159,protein_coding -36851,MORN4,ENSG00000171160,protein_coding -5465,ZNF672,ENSG00000171161,protein_coding -5466,ZNF692,ENSG00000171163,protein_coding -31405,NAIF1,ENSG00000171169,protein_coding -5948,RBKS,ENSG00000171174,protein_coding -5431,OR2M4,ENSG00000171180,protein_coding -60040,GRIK1,ENSG00000171189,protein_coding -13888,MUC7,ENSG00000171195,protein_coding -13887,OPRPN,ENSG00000171199,protein_coding -13886,SMR3B,ENSG00000171201,protein_coding -34132,TMEM126A,ENSG00000171202,protein_coding -34131,TMEM126B,ENSG00000171204,protein_coding -36980,TRIM8,ENSG00000171206,protein_coding -47873,NETO2,ENSG00000171208,protein_coding -13883,CSN3,ENSG00000171209,protein_coding -21275,CLDN20,ENSG00000171217,protein_coding -33490,CDC42BPG,ENSG00000171219,protein_coding -54051,SCAND1,ENSG00000171222,protein_coding -55472,JUNB,ENSG00000171223,protein_coding -36265,FAM241B,ENSG00000171224,protein_coding -7648,TMEM37,ENSG00000171227,protein_coding -13845,UGT2B7,ENSG00000171234,protein_coding -55030,LRG1,ENSG00000171236,protein_coding -47855,SHCBP1,ENSG00000171241,protein_coding -21815,SOSTDC1,ENSG00000171243,protein_coding -51895,NPTX1,ENSG00000171246,protein_coding -44785,FAM98B,ENSG00000171262,protein_coding -55396,ZNF439,ENSG00000171291,protein_coding -55394,ZNF440,ENSG00000171295,protein_coding -51878,GAA,ENSG00000171298,protein_coding -51854,CANT1,ENSG00000171302,protein_coding -5892,KCNK3,ENSG00000171303,protein_coding -36844,ZDHHC16,ENSG00000171307,protein_coding -39775,CHST11,ENSG00000171310,protein_coding -36843,EXOSC1,ENSG00000171311,protein_coding -36842,PGAM1,ENSG00000171314,protein_coding -28150,CHD7,ENSG00000171316,protein_coding -27603,ESCO2,ENSG00000171320,protein_coding -50616,KRT19,ENSG00000171345,protein_coding -50614,KRT15,ENSG00000171346,protein_coding -1509,LURAP1,ENSG00000171357,protein_coding -50606,KRT38,ENSG00000171360,protein_coding -25288,CLCN5,ENSG00000171365,protein_coding -15528,TPPP,ENSG00000171368,protein_coding -2646,KCND3,ENSG00000171385,protein_coding -26417,APLN,ENSG00000171388,protein_coding -50578,KRTAP4-4,ENSG00000171396,protein_coding -50612,KRT13,ENSG00000171401,protein_coding -25376,XAGE3,ENSG00000171402,protein_coding -50618,KRT9,ENSG00000171403,protein_coding -25373,XAGE5,ENSG00000171405,protein_coding -20961,PDE7B,ENSG00000171408,protein_coding -15568,MRPL36,ENSG00000171421,protein_coding -57536,ZNF581,ENSG00000171425,protein_coding -27413,NAT1,ENSG00000171428,protein_coding -50551,KRT20,ENSG00000171431,protein_coding -25229,GLOD5,ENSG00000171433,protein_coding -40056,KSR2,ENSG00000171435,protein_coding -57531,ZNF524,ENSG00000171443,protein_coding -17131,MCC,ENSG00000171444,protein_coding -50544,KRT27,ENSG00000171446,protein_coding -31288,ZBTB26,ENSG00000171448,protein_coding -9172,CDK5R2,ENSG00000171450,protein_coding -53091,DSEL,ENSG00000171451,protein_coding -19763,POLR1C,ENSG00000171453,protein_coding -53920,ASXL1,ENSG00000171456,protein_coding -31279,OR1L6,ENSG00000171459,protein_coding -19759,DLK2,ENSG00000171462,protein_coding -55267,ZNF562,ENSG00000171466,protein_coding -19750,ZNF318,ENSG00000171467,protein_coding -55265,ZNF561,ENSG00000171469,protein_coding -50515,WIPF2,ENSG00000171475,protein_coding -13694,HOPX,ENSG00000171476,protein_coding -25180,SPACA5B,ENSG00000171478,protein_coding -31276,OR1L3,ENSG00000171481,protein_coding -57554,NLRP5,ENSG00000171487,protein_coding -2241,LRRC8C,ENSG00000171488,protein_coding -25176,SPACA5,ENSG00000171489,protein_coding -46958,RSL1D1,ENSG00000171490,protein_coding -2246,LRRC8D,ENSG00000171492,protein_coding -16137,MROH2B,ENSG00000171495,protein_coding -31271,OR1L8,ENSG00000171496,protein_coding -15094,PPID,ENSG00000171497,protein_coding -31270,OR1N2,ENSG00000171501,protein_coding -2177,COL24A1,ENSG00000171502,protein_coding -15092,ETFDH,ENSG00000171503,protein_coding -31269,OR1N1,ENSG00000171505,protein_coding -15087,RXFP1,ENSG00000171509,protein_coding -2157,LPAR3,ENSG00000171517,protein_coding -16128,PTGER4,ENSG00000171522,protein_coding -16674,TBCA,ENSG00000171530,protein_coding -50481,NEUROD2,ENSG00000171532,protein_coding -33979,MAP6,ENSG00000171533,protein_coding -16673,OTP,ENSG00000171540,protein_coding -9431,ECEL1,ENSG00000171551,protein_coding -53892,BCL2L1,ENSG00000171552,protein_coding -15015,FGG,ENSG00000171557,protein_coding -15014,FGA,ENSG00000171560,protein_coding -33959,OR2AT4,ENSG00000171561,protein_coding -15013,FGB,ENSG00000171564,protein_coding -15010,PLRG1,ENSG00000171566,protein_coding -56550,RAB4B-EGLN2,ENSG00000171570,protein_coding -57717,ZNF584,ENSG00000171574,protein_coding -60302,DSCAM,ENSG00000171587,protein_coding -51632,DNAI2,ENSG00000171595,protein_coding -9400,NMUR1,ENSG00000171596,protein_coding -333,CLSTN1,ENSG00000171603,protein_coding -17593,CXXC5,ENSG00000171604,protein_coding -57690,ZNF274,ENSG00000171606,protein_coding -329,PIK3CD,ENSG00000171608,protein_coding -19725,PTCRA,ENSG00000171611,protein_coding -326,SLC25A33,ENSG00000171612,protein_coding -16600,ENC1,ENSG00000171617,protein_coding -321,SPSB1,ENSG00000171621,protein_coding -33892,P2RY6,ENSG00000171631,protein_coding -51522,BPTF,ENSG00000171634,protein_coding -16657,S100Z,ENSG00000171643,protein_coding -57649,ZIK1,ENSG00000171649,protein_coding -25064,GPR82,ENSG00000171657,protein_coding -25063,GPR34,ENSG00000171659,protein_coding -250,PLEKHG5,ENSG00000171680,protein_coding -37988,ATF7IP,ENSG00000171681,protein_coding -54742,LKAAEAR1,ENSG00000171695,protein_coding -54739,RGS19,ENSG00000171700,protein_coding -54737,TCEA2,ENSG00000171703,protein_coding -27158,DEFB4A,ENSG00000171711,protein_coding -32507,ANO5,ENSG00000171714,protein_coding -17733,HDAC3,ENSG00000171720,protein_coding -3726,SPATA46,ENSG00000171722,protein_coding -43080,GPHN,ENSG00000171723,protein_coding -48650,VAT1L,ENSG00000171724,protein_coding -496,TMEM51,ENSG00000171729,protein_coding -263,CAMTA1,ENSG00000171735,protein_coding -56434,LGALS4,ENSG00000171747,protein_coding -12273,LRRC34,ENSG00000171757,protein_coding -39740,PAH,ENSG00000171759,protein_coding -45049,SPATA5L1,ENSG00000171763,protein_coding -45047,GATM,ENSG00000171766,protein_coding -37458,SYCE1,ENSG00000171772,protein_coding -55728,NXNL1,ENSG00000171773,protein_coding -56420,RASGRP4,ENSG00000171777,protein_coding -3637,NHLH1,ENSG00000171786,protein_coding -1333,SLFNL1,ENSG00000171790,protein_coding -53050,BCL2,ENSG00000171791,protein_coding -37555,RHNO1,ENSG00000171792,protein_coding -1331,CTPS1,ENSG00000171793,protein_coding -37430,UTF1,ENSG00000171794,protein_coding -37429,KNDC1,ENSG00000171798,protein_coding -56397,WDR87,ENSG00000171804,protein_coding -3878,METTL18,ENSG00000171806,protein_coding -37422,CFAP46,ENSG00000171811,protein_coding -1171,COL8A2,ENSG00000171812,protein_coding -37414,PWWP2B,ENSG00000171813,protein_coding -17667,PCDHB1,ENSG00000171815,protein_coding -56384,ZNF540,ENSG00000171817,protein_coding -383,ANGPTL7,ENSG00000171819,protein_coding -37516,FBXL14,ENSG00000171823,protein_coding -377,EXOSC10,ENSG00000171824,protein_coding -56378,ZNF570,ENSG00000171827,protein_coding -37499,NINJ2,ENSG00000171840,protein_coding -29725,MLLT3,ENSG00000171843,protein_coding -37759,FAM90A1,ENSG00000171847,protein_coding -5627,RRM2,ENSG00000171848,protein_coding -5522,TRAPPC12,ENSG00000171853,protein_coding -29736,IFNB1,ENSG00000171855,protein_coding -54629,RPS21,ENSG00000171858,protein_coding -37746,C3AR1,ENSG00000171860,protein_coding -49017,MRM3,ENSG00000171861,protein_coding -36639,PTEN,ENSG00000171862,protein_coding -5532,RPS7,ENSG00000171863,protein_coding -53438,PRND,ENSG00000171864,protein_coding -5529,RNASEH1,ENSG00000171865,protein_coding -53437,PRNP,ENSG00000171867,protein_coding -1432,KLF17,ENSG00000171872,protein_coding -53432,ADRA1D,ENSG00000171873,protein_coding -44993,FRMD5,ENSG00000171877,protein_coding -52554,AQP4,ENSG00000171885,protein_coding -55635,CYP4F11,ENSG00000171903,protein_coding -45400,TLN2,ENSG00000171914,protein_coding -49706,LGALS9C,ENSG00000171916,protein_coding -49724,TVP23B,ENSG00000171928,protein_coding -49723,FBXW10,ENSG00000171931,protein_coding -55620,OR10H3,ENSG00000171936,protein_coding -54471,ZNF217,ENSG00000171940,protein_coding -55619,OR10H2,ENSG00000171942,protein_coding -2859,SRGAP2C,ENSG00000171943,protein_coding -32076,OR52A5,ENSG00000171944,protein_coding -9274,SCG2,ENSG00000171951,protein_coding -49672,ATPAF2,ENSG00000171953,protein_coding -55609,CYP4F22,ENSG00000171954,protein_coding -45352,FOXB1,ENSG00000171956,protein_coding -1364,PPIH,ENSG00000171960,protein_coding -49671,DRC3,ENSG00000171962,protein_coding -54940,ZNF57,ENSG00000171970,protein_coding -53469,SHLD1,ENSG00000171984,protein_coding -32031,C11orf40,ENSG00000171987,protein_coding -36164,JMJD1C,ENSG00000171988,protein_coding -45330,LDHAL6B,ENSG00000171989,protein_coding -17899,SYNPO,ENSG00000171992,protein_coding -54937,ZNF556,ENSG00000172000,protein_coding -7137,MAL,ENSG00000172005,protein_coding -54933,ZNF554,ENSG00000172006,protein_coding -14781,RAB33B,ENSG00000172007,protein_coding -54932,AC006538.1,ENSG00000172009,protein_coding -30327,ANKRD20A4,ENSG00000172014,protein_coding -6822,REG3A,ENSG00000172016,protein_coding -11359,GAP43,ENSG00000172020,protein_coding -6819,REG1B,ENSG00000172023,protein_coding -2276,EPHX4,ENSG00000172031,protein_coding -10490,LAMB2,ENSG00000172037,protein_coding -10489,USP19,ENSG00000172046,protein_coding -10487,QARS,ENSG00000172053,protein_coding -50497,ORMDL3,ENSG00000172057,protein_coding -16528,SERF1A,ENSG00000172058,protein_coding -5622,KLF11,ENSG00000172059,protein_coding -12704,LRRC15,ENSG00000172061,protein_coding -16529,SMN1,ENSG00000172062,protein_coding -6988,EIF2AK3,ENSG00000172071,protein_coding -6986,TEX37,ENSG00000172073,protein_coding -54892,MOB3A,ENSG00000172081,protein_coding -6975,KRCC1,ENSG00000172086,protein_coding -10440,NME6,ENSG00000172113,protein_coding -21944,CYCS,ENSG00000172115,protein_coding -6939,CD8B,ENSG00000172116,protein_coding -50301,SLFN12,ENSG00000172123,protein_coding -48486,CALB2,ENSG00000172137,protein_coding -11287,SLC9C1,ENSG00000172139,protein_coding -49119,OR1A1,ENSG00000172146,protein_coding -49118,OR1A2,ENSG00000172150,protein_coding -33036,OR8I2,ENSG00000172154,protein_coding -3254,LCE1D,ENSG00000172155,protein_coding -50260,CCL11,ENSG00000172156,protein_coding -30589,FRMD3,ENSG00000172159,protein_coding -29010,SNTB1,ENSG00000172164,protein_coding -29009,MTBP,ENSG00000172167,protein_coding -50141,TEFM,ENSG00000172171,protein_coding -29008,MRPL13,ENSG00000172172,protein_coding -52975,MALT1,ENSG00000172175,protein_coding -18856,PRL,ENSG00000172179,protein_coding -46094,ISG20,ENSG00000172183,protein_coding -33001,OR4C11,ENSG00000172188,protein_coding -18829,MBOAT1,ENSG00000172197,protein_coding -33061,OR8U1,ENSG00000172199,protein_coding -18826,ID4,ENSG00000172201,protein_coding -32894,OR4X2,ENSG00000172208,protein_coding -23558,GPR22,ENSG00000172209,protein_coding -10366,CXCR6,ENSG00000172215,protein_coding -54413,CEBPB,ENSG00000172216,protein_coding -54806,AZU1,ENSG00000172232,protein_coding -46539,TPSAB1,ENSG00000172236,protein_coding -14232,ATOH1,ENSG00000172238,protein_coding -16189,PAIP1,ENSG00000172239,protein_coding -37859,CLEC7A,ENSG00000172243,protein_coding -16185,C5orf34,ENSG00000172244,protein_coding -32878,C1QTNF4,ENSG00000172247,protein_coding -1995,NEGR1,ENSG00000172260,protein_coding -16173,ZNF131,ENSG00000172262,protein_coding -53574,MACROD2,ENSG00000172264,protein_coding -34800,DPAGT1,ENSG00000172269,protein_coding -54787,BSG,ENSG00000172270,protein_coding -34802,HINFP,ENSG00000172273,protein_coding -58224,CDY1,ENSG00000172288,protein_coding -33230,OR10V1,ENSG00000172289,protein_coding -8323,CERS6,ENSG00000172292,protein_coding -53562,SPTLC3,ENSG00000172296,protein_coding -50181,COPRS,ENSG00000172301,protein_coding -54327,TP53RK,ENSG00000172315,protein_coding -8319,B3GALT1,ENSG00000172318,protein_coding -33215,OR5A1,ENSG00000172320,protein_coding -37850,CLEC12A,ENSG00000172322,protein_coding -33214,OR5A2,ENSG00000172324,protein_coding -23965,BPGM,ENSG00000172331,protein_coding -23399,POP7,ENSG00000172336,protein_coding -2354,ALG14,ENSG00000172339,protein_coding -10839,SUCLG2,ENSG00000172340,protein_coding -45921,STARD5,ENSG00000172345,protein_coding -59383,CSDC2,ENSG00000172346,protein_coding -19822,RCAN2,ENSG00000172348,protein_coding -45919,IL16,ENSG00000172349,protein_coding -34803,ABCG4,ENSG00000172350,protein_coding -58165,CDY1B,ENSG00000172352,protein_coding -23397,GNB2,ENSG00000172354,protein_coding -52856,CFAP53,ENSG00000172361,protein_coding -33174,OR5B12,ENSG00000172362,protein_coding -33173,OR5B2,ENSG00000172365,protein_coding -46488,MCRIP2,ENSG00000172366,protein_coding -34805,PDZD3,ENSG00000172367,protein_coding -34801,C2CD2L,ENSG00000172375,protein_coding -33155,OR9I1,ENSG00000172377,protein_coding -45897,ARNT2,ENSG00000172379,protein_coding -1942,GNG12,ENSG00000172380,protein_coding -46690,PRSS27,ENSG00000172382,protein_coding -14555,MYOZ2,ENSG00000172399,protein_coding -14553,SYNPO2,ENSG00000172403,protein_coding -59355,DNAJB7,ENSG00000172404,protein_coding -33132,CLP1,ENSG00000172409,protein_coding -1921,INSL5,ENSG00000172410,protein_coding -51355,EFCAB3,ENSG00000172421,protein_coding -34755,TTC36,ENSG00000172425,protein_coding -19773,RSPH9,ENSG00000172426,protein_coding -9592,COPS9,ENSG00000172428,protein_coding -19771,GTPBP2,ENSG00000172432,protein_coding -1806,FGGY,ENSG00000172456,protein_coding -33091,OR9G4,ENSG00000172457,protein_coding -40621,IL17D,ENSG00000172458,protein_coding -33085,OR5AR1,ENSG00000172459,protein_coding -20419,FUT9,ENSG00000172461,protein_coding -33084,OR5AP2,ENSG00000172464,protein_coding -26054,TCEAL1,ENSG00000172465,protein_coding -52660,ZNF24,ENSG00000172466,protein_coding -58017,HSFY1,ENSG00000172468,protein_coding -20415,MANEA,ENSG00000172469,protein_coding -26048,RAB40A,ENSG00000172476,protein_coding -9615,MAB21L4,ENSG00000172478,protein_coding -9614,AGXT,ENSG00000172482,protein_coding -33059,OR8J1,ENSG00000172487,protein_coding -33051,OR5T3,ENSG00000172489,protein_coding -14144,AFF1,ENSG00000172493,protein_coding -16750,ACOT12,ENSG00000172497,protein_coding -33584,FIBP,ENSG00000172500,protein_coding -33676,CARNS1,ENSG00000172508,protein_coding -55623,OR10H5,ENSG00000172519,protein_coding -48897,BANP,ENSG00000172530,protein_coding -33674,PPP1CA,ENSG00000172531,protein_coding -26880,HCFC1,ENSG00000172534,protein_coding -36024,FAM170B,ENSG00000172538,protein_coding -33583,CTSW,ENSG00000172543,protein_coding -18008,NIPAL4,ENSG00000172548,protein_coding -38851,MUCL1,ENSG00000172551,protein_coding -5498,SNTG2,ENSG00000172554,protein_coding -18003,FNDC9,ENSG00000172568,protein_coding -38072,PDE3A,ENSG00000172572,protein_coding -44787,RASGRP1,ENSG00000172575,protein_coding -12495,KLHL6,ENSG00000172578,protein_coding -36382,CHCHD1,ENSG00000172586,protein_coding -42281,MRPL52,ENSG00000172590,protein_coding -20777,SMPDL3A,ENSG00000172594,protein_coding -38553,RND1,ENSG00000172602,protein_coding -33666,RAD9A,ENSG00000172613,protein_coding -33582,EFEMP2,ENSG00000172638,protein_coding -38516,OR10AD1,ENSG00000172640,protein_coding -36369,AGAP5,ENSG00000172650,protein_coding -35935,WASHC2C,ENSG00000172661,protein_coding -33683,TMEM134,ENSG00000172663,protein_coding -12398,ZMAT3,ENSG00000172667,protein_coding -35931,ZFAND4,ENSG00000172671,protein_coding -20828,THEMIS,ENSG00000172673,protein_coding -28083,MOS,ENSG00000172680,protein_coding -55923,ZNF738,ENSG00000172687,protein_coding -33267,MS4A10,ENSG00000172689,protein_coding -50294,SLFN11,ENSG00000172716,protein_coding -43083,FAM71D,ENSG00000172717,protein_coding -29971,CCL19,ENSG00000172724,protein_coding -33679,CORO1B,ENSG00000172725,protein_coding -27713,FUT10,ENSG00000172728,protein_coding -36278,LRRC20,ENSG00000172731,protein_coding -33581,MUS81,ENSG00000172732,protein_coding -27688,PURG,ENSG00000172733,protein_coding -19600,TMEM217,ENSG00000172738,protein_coding -33220,OR4D9,ENSG00000172742,protein_coding -26989,ZNF596,ENSG00000172748,protein_coding -11680,COL6A5,ENSG00000172752,protein_coding -33579,CFL1,ENSG00000172757,protein_coding -11658,TMCC1,ENSG00000172765,protein_coding -40999,NAA16,ENSG00000172766,protein_coding -33172,OR5B3,ENSG00000172769,protein_coding -11650,EFCAB12,ENSG00000172771,protein_coding -33163,OR10W1,ENSG00000172772,protein_coding -48125,FAM192A,ENSG00000172775,protein_coding -11631,RAB43,ENSG00000172780,protein_coding -51658,AC087651.1,ENSG00000172782,protein_coding -29473,CBWD1,ENSG00000172785,protein_coding -38812,HOXC5,ENSG00000172789,protein_coding -51649,RAB37,ENSG00000172794,protein_coding -17130,DCP2,ENSG00000172795,protein_coding -33580,SNX32,ENSG00000172803,protein_coding -51628,RPL38,ENSG00000172809,protein_coding -28206,CYP7B1,ENSG00000172817,protein_coding -33576,OVOL1,ENSG00000172818,protein_coding -38759,RARG,ENSG00000172819,protein_coding -48284,CES4A,ENSG00000172824,protein_coding -48283,CES3,ENSG00000172828,protein_coding -33664,SSH3,ENSG00000172830,protein_coding -48280,CES2,ENSG00000172831,protein_coding -48276,PDP2,ENSG00000172840,protein_coding -8414,SP3,ENSG00000172845,protein_coding -38722,KRT2,ENSG00000172867,protein_coding -17208,DMXL1,ENSG00000172869,protein_coding -8383,METAP1D,ENSG00000172878,protein_coding -10240,ZNF621,ENSG00000172888,protein_coding -31680,EGFL7,ENSG00000172889,protein_coding -33803,NADSYN1,ENSG00000172890,protein_coding -33801,DHCR7,ENSG00000172893,protein_coding -17169,LVRN,ENSG00000172901,protein_coding -40905,NBEA,ENSG00000172915,protein_coding -33572,RNASEH2C,ENSG00000172922,protein_coding -33754,MYEOV,ENSG00000172927,protein_coding -33663,ANKRD13D,ENSG00000172932,protein_coding -33746,MRGPRF,ENSG00000172935,protein_coding -10179,MYD88,ENSG00000172936,protein_coding -33743,MRGPRD,ENSG00000172938,protein_coding -10180,OXSR1,ENSG00000172939,protein_coding -10181,SLC22A13,ENSG00000172940,protein_coding -25412,PHF8,ENSG00000172943,protein_coding -5994,LCLAT1,ENSG00000172954,protein_coding -14284,ADH6,ENSG00000172955,protein_coding -58330,XKR3,ENSG00000172967,protein_coding -10956,FRG2C,ENSG00000172969,protein_coding -33571,KAT5,ENSG00000172977,protein_coding -7449,SH3RF3,ENSG00000172985,protein_coding -10907,GXYLT2,ENSG00000172986,protein_coding -36869,HPSE2,ENSG00000172987,protein_coding -50832,DCAKD,ENSG00000172992,protein_coding -10132,ARPP21,ENSG00000172995,protein_coding -13042,TADA2B,ENSG00000173011,protein_coding -13041,CCDC96,ENSG00000173013,protein_coding -33662,GRK2,ENSG00000173020,protein_coding -33568,RELA,ENSG00000173039,protein_coding -13010,EVC2,ENSG00000173040,protein_coding -22714,ZNF680,ENSG00000173041,protein_coding -39951,HECTD4,ENSG00000173064,protein_coding -50032,FAM222B,ENSG00000173065,protein_coding -29679,BNC2,ENSG00000173068,protein_coding -31189,DEC1,ENSG00000173077,protein_coding -3446,RXFP4,ENSG00000173080,protein_coding -14104,HPSE,ENSG00000173083,protein_coding -14103,COQ2,ENSG00000173085,protein_coding -39914,CCDC63,ENSG00000173093,protein_coding -3698,HSPA6,ENSG00000173110,protein_coding -33467,TRMT112,ENSG00000173113,protein_coding -23598,LRRN3,ENSG00000173114,protein_coding -33659,KDM2A,ENSG00000173120,protein_coding -36793,ACSM6,ENSG00000173124,protein_coding -29415,ADCK5,ENSG00000173137,protein_coding -40640,MRPL57,ENSG00000173141,protein_coding -36773,NOC3L,ENSG00000173145,protein_coding -33466,ESRRA,ENSG00000173153,protein_coding -33658,RHOD,ENSG00000173156,protein_coding -38412,ADAMTS20,ENSG00000173157,protein_coding -6463,COMMD1,ENSG00000173163,protein_coding -8903,RAPH1,ENSG00000173166,protein_coding -3406,MTX1,ENSG00000173171,protein_coding -11482,ADCY5,ENSG00000173175,protein_coding -11473,PARP14,ENSG00000173193,protein_coding -25770,CYSLTR1,ENSG00000173198,protein_coding -11471,PARP15,ENSG00000173200,protein_coding -3382,CKS1B,ENSG00000173207,protein_coding -38360,ABCD2,ENSG00000173208,protein_coding -17853,ABLIM3,ENSG00000173210,protein_coding -2739,MAB21L3,ENSG00000173212,protein_coding -52055,TUBB8P12,ENSG00000173213,protein_coding -20619,MFSD4B,ENSG00000173214,protein_coding -2731,VANGL1,ENSG00000173218,protein_coding -16925,GLRX,ENSG00000173221,protein_coding -11451,IQCB1,ENSG00000173226,protein_coding -33656,SYT12,ENSG00000173227,protein_coding -11450,GOLGB1,ENSG00000173230,protein_coding -33655,C11orf86,ENSG00000173237,protein_coding -36655,LIPM,ENSG00000173239,protein_coding -17808,GPR151,ENSG00000173250,protein_coding -29492,DMRT2,ENSG00000173253,protein_coding -31104,ZNF483,ENSG00000173258,protein_coding -17794,PLAC8L1,ENSG00000173261,protein_coding -37730,SLC2A14,ENSG00000173262,protein_coding -33461,GPR137,ENSG00000173264,protein_coding -36600,SNCG,ENSG00000173267,protein_coding -36598,MMRN2,ENSG00000173269,protein_coding -7890,MZT2A,ENSG00000173272,protein_coding -27216,TNKS,ENSG00000173273,protein_coding -26545,ZNF449,ENSG00000173275,protein_coding -60335,ZBTB21,ENSG00000173276,protein_coding -27203,PPP1R3B,ENSG00000173281,protein_coding -3549,OR10K1,ENSG00000173285,protein_coding -7844,GPR148,ENSG00000173302,protein_coding -15362,STOX2,ENSG00000173320,protein_coding -33563,MAP3K11,ENSG00000173327,protein_coding -29100,TRIB1,ENSG00000173334,protein_coding -53758,CST9,ENSG00000173335,protein_coding -33562,KCNK7,ENSG00000173338,protein_coding -7747,SFT2D3,ENSG00000173349,protein_coding -10618,AC097637.1,ENSG00000173366,protein_coding -733,C1QB,ENSG00000173369,protein_coding -731,C1QA,ENSG00000173372,protein_coding -14584,NDNF,ENSG00000173376,protein_coding -10597,IQCF1,ENSG00000173389,protein_coding -37860,OLR1,ENSG00000173391,protein_coding -39331,GLIPR1L1,ENSG00000173401,protein_coding -10510,DAG1,ENSG00000173402,protein_coding -53681,INSM1,ENSG00000173404,protein_coding -1778,DAB1,ENSG00000173406,protein_coding -5057,ARV1,ENSG00000173409,protein_coding -53672,NAA20,ENSG00000173418,protein_coding -10495,CCDC36,ENSG00000173421,protein_coding -42098,RNASE8,ENSG00000173431,protein_coding -32437,SAA1,ENSG00000173432,protein_coding -638,MICOS10,ENSG00000173436,protein_coding -33561,EHBP1L1,ENSG00000173442,protein_coding -39289,THAP2,ENSG00000173451,protein_coding -21859,TMEM196,ENSG00000173452,protein_coding -34812,RNF26,ENSG00000173456,protein_coding -33456,PPP1R14B,ENSG00000173457,protein_coding -42062,RNASE11,ENSG00000173464,protein_coding -33559,ZNRD2,ENSG00000173465,protein_coding -21827,AGR3,ENSG00000173467,protein_coding -10419,SMARCC1,ENSG00000173473,protein_coding -57671,ZNF417,ENSG00000173480,protein_coding -52204,PTPRM,ENSG00000173482,protein_coding -33454,FKBP2,ENSG00000173486,protein_coding -33453,VEGFB,ENSG00000173511,protein_coding -45803,PEAK1,ENSG00000173517,protein_coding -27513,TNFRSF10D,ENSG00000173530,protein_coding -10515,MST1,ENSG00000173531,protein_coding -27512,TNFRSF10C,ENSG00000173535,protein_coding -10519,GMPPB,ENSG00000173540,protein_coding -13902,MOB1B,ENSG00000173542,protein_coding -15789,ZNF622,ENSG00000173545,protein_coding -45765,CSPG4,ENSG00000173546,protein_coding -45764,SNX33,ENSG00000173548,protein_coding -5888,C2orf70,ENSG00000173557,protein_coding -8691,NABP1,ENSG00000173559,protein_coding -5884,ADGRF3,ENSG00000173567,protein_coding -57552,NLRP13,ENSG00000173572,protein_coding -46241,CHD2,ENSG00000173575,protein_coding -10367,XCR1,ENSG00000173578,protein_coding -57537,CCDC106,ENSG00000173581,protein_coding -10363,CCR9,ENSG00000173585,protein_coding -39571,CEP83,ENSG00000173588,protein_coding -13869,SULT1B1,ENSG00000173597,protein_coding -39549,NUDT4,ENSG00000173598,protein_coding -33651,PC,ENSG00000173599,protein_coding -13867,UGT2A1,ENSG00000173610,protein_coding -31331,SCAI,ENSG00000173611,protein_coding -20706,GPRC6A,ENSG00000173612,protein_coding -341,NMNAT1,ENSG00000173614,protein_coding -33652,LRFN4,ENSG00000173621,protein_coding -20698,TRAPPC3L,ENSG00000173626,protein_coding -4150,APOBEC4,ENSG00000173627,protein_coding -60481,SLC19A1,ENSG00000173638,protein_coding -539,HSPB7,ENSG00000173641,protein_coding -33650,RCE1,ENSG00000173653,protein_coding -1512,UQCRH,ENSG00000173660,protein_coding -254,TAS1R1,ENSG00000173662,protein_coding -241,HES3,ENSG00000173673,protein_coding -24808,EIF1AX,ENSG00000173674,protein_coding -23479,SPDYE2B,ENSG00000173678,protein_coding -31275,OR1L1,ENSG00000173679,protein_coding -24804,BCLAF3,ENSG00000173681,protein_coding -9380,PSMD1,ENSG00000173692,protein_coding -24798,ADGRG2,ENSG00000173698,protein_coding -9377,SPATA3,ENSG00000173699,protein_coding -11506,MUC13,ENSG00000173702,protein_coding -10112,SUSD5,ENSG00000173705,protein_coding -11507,HEG1,ENSG00000173706,protein_coding -51105,WFIKKN2,ENSG00000173714,protein_coding -33647,C11orf80,ENSG00000173715,protein_coding -5144,TOMM20,ENSG00000173726,protein_coding -5307,C1orf100,ENSG00000173728,protein_coding -9310,AGFG1,ENSG00000173744,protein_coding -50653,STAT5B,ENSG00000173757,protein_coding -52007,CD7,ENSG00000173762,protein_coding -10317,TOPAZ1,ENSG00000173769,protein_coding -50639,CNP,ENSG00000173786,protein_coding -50629,JUP,ENSG00000173801,protein_coding -50626,HAP1,ENSG00000173805,protein_coding -56151,AC008736.1,ENSG00000173809,protein_coding -50624,EIF1,ENSG00000173812,protein_coding -51891,ENDOV,ENSG00000173818,protein_coding -51886,AC124319.1,ENSG00000173821,protein_coding -33537,TIGD3,ENSG00000173825,protein_coding -51396,KCNH6,ENSG00000173826,protein_coding -51370,MARCH10,ENSG00000173838,protein_coding -1460,PLK3,ENSG00000173846,protein_coding -35246,NET1,ENSG00000173848,protein_coding -22152,DPY19L1,ENSG00000173852,protein_coding -46084,AC013489.1,ENSG00000173867,protein_coding -51020,PHOSPHO1,ENSG00000173868,protein_coding -55449,ZNF791,ENSG00000173875,protein_coding -12288,PHC3,ENSG00000173889,protein_coding -12285,GPR160,ENSG00000173890,protein_coding -51864,CBX2,ENSG00000173894,protein_coding -33645,SPTBN2,ENSG00000173898,protein_coding -12249,GOLIM4,ENSG00000173905,protein_coding -50545,KRT28,ENSG00000173908,protein_coding -33644,RBM4B,ENSG00000173914,protein_coding -37008,ATP5MD,ENSG00000173915,protein_coding -50968,HOXB2,ENSG00000173917,protein_coding -51856,C1QTNF1,ENSG00000173918,protein_coding -17306,MARCH3,ENSG00000173926,protein_coding -55365,SWSAP1,ENSG00000173928,protein_coding -17023,SLCO4C1,ENSG00000173930,protein_coding -33643,RBM4,ENSG00000173933,protein_coding -2623,PIFO,ENSG00000173947,protein_coding -12724,XXYLT1,ENSG00000173950,protein_coding -5820,UBXN2A,ENSG00000173960,protein_coding -54984,RAX2,ENSG00000173976,protein_coding -41097,LRRC63,ENSG00000173988,protein_coding -50485,TCAP,ENSG00000173991,protein_coding -33639,CCS,ENSG00000173992,protein_coding -12796,NRROS,ENSG00000174004,protein_coding -12798,CEP19,ENSG00000174007,protein_coding -24851,KLHL15,ENSG00000174010,protein_coding -12793,FBXO45,ENSG00000174013,protein_coding -41089,SPERT,ENSG00000174015,protein_coding -25787,TENT5D,ENSG00000174016,protein_coding -2147,GNG5,ENSG00000174021,protein_coding -41079,SLC25A30,ENSG00000174032,protein_coding -29988,C9orf131,ENSG00000174038,protein_coding -4585,CD34,ENSG00000174059,protein_coding -33637,CTSF,ENSG00000174080,protein_coding -39162,MSRB3,ENSG00000174099,protein_coding -39160,LEMD3,ENSG00000174106,protein_coding -46557,C16orf91,ENSG00000174109,protein_coding -13414,TLR10,ENSG00000174123,protein_coding -13415,TLR1,ENSG00000174125,protein_coding -13416,TLR6,ENSG00000174130,protein_coding -17013,FAM174A,ENSG00000174132,protein_coding -16979,RGMB,ENSG00000174136,protein_coding -12915,FAM53A,ENSG00000174137,protein_coding -13389,NWD2,ENSG00000174145,protein_coding -2548,CYB561D1,ENSG00000174151,protein_coding -19920,GSTA3,ENSG00000174156,protein_coding -33634,ZDHHC24,ENSG00000174165,protein_coding -11163,TRMT10C,ENSG00000174173,protein_coding -3873,SELP,ENSG00000174175,protein_coding -48918,CTU2,ENSG00000174177,protein_coding -44898,MGA,ENSG00000174197,protein_coding -39126,C12orf66,ENSG00000174206,protein_coding -25951,ARL13A,ENSG00000174225,protein_coding -28771,SNX31,ENSG00000174226,protein_coding -12873,PIGG,ENSG00000174227,protein_coding -49045,PRPF8,ENSG00000174231,protein_coding -38545,ADCY6,ENSG00000174233,protein_coding -38193,REP15,ENSG00000174236,protein_coding -49040,PITPNA,ENSG00000174238,protein_coding -38552,DDX23,ENSG00000174243,protein_coding -11343,ZNF80,ENSG00000174255,protein_coding -33516,ZNHIT2,ENSG00000174276,protein_coding -8456,EVX2,ENSG00000174279,protein_coding -49336,ZBTB4,ENSG00000174282,protein_coding -49320,TNK1,ENSG00000174292,protein_coding -54164,ZHX3,ENSG00000174306,protein_coding -4368,PHLDA3,ENSG00000174307,protein_coding -8639,DIRC1,ENSG00000174325,protein_coding -49288,SLC16A11,ENSG00000174326,protein_coding -49287,SLC16A13,ENSG00000174327,protein_coding -1701,GLIS1,ENSG00000174332,protein_coding -18439,OR2Y1,ENSG00000174339,protein_coding -13461,CHRNA9,ENSG00000174343,protein_coding -1679,PODN,ENSG00000174348,protein_coding -15547,SLC6A19,ENSG00000174358,protein_coding -35054,C11orf45,ENSG00000174370,protein_coding -5263,EXO1,ENSG00000174371,protein_coding -42559,RALGAPA1,ENSG00000174373,protein_coding -41804,LIG4,ENSG00000174405,protein_coding -28914,TRHR,ENSG00000174417,protein_coding -22939,GTF2IRD2B,ENSG00000174428,protein_coding -28892,ABRA,ENSG00000174429,protein_coding -39893,ATP2A2,ENSG00000174437,protein_coding -45519,ZWILCH,ENSG00000174442,protein_coding -45513,RPL4,ENSG00000174444,protein_coding -45511,SNAPC5,ENSG00000174446,protein_coding -52900,STARD6,ENSG00000174448,protein_coding -44383,GOLGA6L2,ENSG00000174450,protein_coding -9058,VWC2L,ENSG00000174453,protein_coding -39888,C12orf76,ENSG00000174456,protein_coding -26256,ZCCHC12,ENSG00000174460,protein_coding -24304,CNTNAP2,ENSG00000174469,protein_coding -15235,GALNTL6,ENSG00000174473,protein_coding -29839,LINGO2,ENSG00000174482,protein_coding -33632,BBS1,ENSG00000174483,protein_coding -45490,DENND4A,ENSG00000174485,protein_coding -45479,IGDCC3,ENSG00000174498,protein_coding -11285,GCSAM,ENSG00000174500,protein_coding -7181,ANKRD36C,ENSG00000174501,protein_coding -4522,SLC26A9,ENSG00000174502,protein_coding -4509,MFSD4A,ENSG00000174514,protein_coding -33628,PELI3,ENSG00000174516,protein_coding -56518,TTC9B,ENSG00000174521,protein_coding -39866,MYO1H,ENSG00000174527,protein_coding -4491,TMEM81,ENSG00000174529,protein_coding -33626,MRPL11,ENSG00000174547,protein_coding -57126,KLK15,ENSG00000174562,protein_coding -11783,IL20RB,ENSG00000174564,protein_coding -4469,GOLT1A,ENSG00000174567,protein_coding -1255,AKIRIN1,ENSG00000174574,protein_coding -33624,NPAS4,ENSG00000174576,protein_coding -11766,MSL2,ENSG00000174579,protein_coding -57708,ZNF497,ENSG00000174586,protein_coding -14529,TRAM1L1,ENSG00000174599,protein_coding -39836,CMKLR1,ENSG00000174600,protein_coding -4699,ANGEL2,ENSG00000174606,protein_coding -14505,UGT8,ENSG00000174607,protein_coding -11756,KY,ENSG00000174611,protein_coding -47169,IQCK,ENSG00000174628,protein_coding -11740,SLCO2A1,ENSG00000174640,protein_coding -55256,ZNF266,ENSG00000174652,protein_coding -55243,OR7D4,ENSG00000174667,protein_coding -33619,SLC29A2,ENSG00000174669,protein_coding -31881,BRSK2,ENSG00000174672,protein_coding -33617,B4GAT1,ENSG00000174684,protein_coding -16776,TMEM167A,ENSG00000174695,protein_coding -23807,LEP,ENSG00000174697,protein_coding -18186,SH3PXD2B,ENSG00000174705,protein_coding -38290,RESF1,ENSG00000174718,protein_coding -14475,LARP7,ENSG00000174720,protein_coding -36721,FGFBP3,ENSG00000174721,protein_coding -9998,NR1D2,ENSG00000174738,protein_coding -25874,PABPC5,ENSG00000174740,protein_coding -33616,BRMS1,ENSG00000174744,protein_coding -9997,RPL15,ENSG00000174748,protein_coding -14461,FAM241A,ENSG00000174749,protein_coding -31831,HRAS,ENSG00000174775,protein_coding -12239,WDR49,ENSG00000174776,protein_coding -13690,SRP72,ENSG00000174780,protein_coding -55159,PCP2,ENSG00000174788,protein_coding -33614,RIN1,ENSG00000174791,protein_coding -13970,ODAPH,ENSG00000174792,protein_coding -13969,THAP6,ENSG00000174796,protein_coding -13677,CEP135,ENSG00000174799,protein_coding -34165,FZD4,ENSG00000174804,protein_coding -33613,CD248,ENSG00000174807,protein_coding -13954,BTC,ENSG00000174808,protein_coding -2947,PDZK1,ENSG00000174827,protein_coding -55127,ADGRE1,ENSG00000174837,protein_coding -10730,DENND6A,ENSG00000174839,protein_coding -10726,PDE12,ENSG00000174840,protein_coding -2286,GLMN,ENSG00000174842,protein_coding -10718,DNAH12,ENSG00000174844,protein_coding -33610,YIF1A,ENSG00000174851,protein_coding -33609,CNIH2,ENSG00000174871,protein_coding -2475,AMY1B,ENSG00000174876,protein_coding -31806,NLRP6,ENSG00000174885,protein_coding -55074,NDUFA11,ENSG00000174886,protein_coding -12141,RSRC1,ENSG00000174891,protein_coding -55064,CATSPERD,ENSG00000174898,protein_coding -12135,PQLC2L,ENSG00000174899,protein_coding -33606,RAB1B,ENSG00000174903,protein_coding -33088,OR9G1,ENSG00000174914,protein_coding -31826,PTDSS2,ENSG00000174915,protein_coding -55061,MICOS13,ENSG00000174917,protein_coding -12096,C3orf33,ENSG00000174928,protein_coding -33069,OR5M3,ENSG00000174937,protein_coding -47475,SEZ6L2,ENSG00000174938,protein_coding -47476,ASPHD1,ENSG00000174939,protein_coding -47477,KCTD13,ENSG00000174943,protein_coding -12025,P2RY14,ENSG00000174944,protein_coding -21622,AMZ1,ENSG00000174945,protein_coding -12024,GPR171,ENSG00000174946,protein_coding -12072,GPR149,ENSG00000174948,protein_coding -913,CD164L2,ENSG00000174950,protein_coding -56965,FUT1,ENSG00000174951,protein_coding -12069,DHX36,ENSG00000174953,protein_coding -33046,OR5J2,ENSG00000174957,protein_coding -11937,ZIC4,ENSG00000174963,protein_coding -33028,OR10AG1,ENSG00000174970,protein_coding -33003,OR4S2,ENSG00000174982,protein_coding -40047,FBXW8,ENSG00000174989,protein_coding -48894,CA5A,ENSG00000174990,protein_coding -47460,ZG16,ENSG00000174992,protein_coding -33602,KLC2,ENSG00000174996,protein_coding -21364,SLC22A1,ENSG00000175003,protein_coding -37331,TEX36,ENSG00000175018,protein_coding -37324,CTBP2,ENSG00000175029,protein_coding -11901,CHST2,ENSG00000175040,protein_coding -21298,ZDHHC14,ENSG00000175048,protein_coding -11881,ATR,ENSG00000175054,protein_coding -54292,UBE2C,ENSG00000175063,protein_coding -52591,DSG4,ENSG00000175065,protein_coding -11877,GK5,ENSG00000175066,protein_coding -28237,VCPIP1,ENSG00000175073,protein_coding -12609,RTP1,ENSG00000175077,protein_coding -9207,DES,ENSG00000175084,protein_coding -849,PDIK1L,ENSG00000175087,protein_coding -11850,SPSB4,ENSG00000175093,protein_coding -32711,RAG2,ENSG00000175097,protein_coding -32708,TRAF6,ENSG00000175104,protein_coding -11033,ZNF654,ENSG00000175105,protein_coding -49548,TVP23C,ENSG00000175106,protein_coding -11816,MRPS22,ENSG00000175110,protein_coding -33600,PACS1,ENSG00000175115,protein_coding -54246,WFDC5,ENSG00000175121,protein_coding -1071,MARCKSL1,ENSG00000175130,protein_coding -5463,SH3BP5L,ENSG00000175137,protein_coding -5438,OR2T1,ENSG00000175143,protein_coding -51266,YPEL2,ENSG00000175155,protein_coding -11008,CADM2,ENSG00000175161,protein_coding -31572,ABO,ENSG00000175164,protein_coding -12531,PSMD2,ENSG00000175166,protein_coding -51245,PPM1E,ENSG00000175175,protein_coding -12534,FAM131A,ENSG00000175182,protein_coding -39366,CSRP2,ENSG00000175183,protein_coding -39000,INHBC,ENSG00000175189,protein_coding -12503,PARL,ENSG00000175193,protein_coding -39007,DDIT3,ENSG00000175197,protein_coding -41721,PCCA,ENSG00000175198,protein_coding -45649,HIGD2B,ENSG00000175202,protein_coding -39011,DCTN2,ENSG00000175203,protein_coding -406,NPPA,ENSG00000175206,protein_coding -32838,ZNF408,ENSG00000175213,protein_coding -39038,CTDSP2,ENSG00000175215,protein_coding -32840,CKAP5,ENSG00000175216,protein_coding -32837,ARHGAP1,ENSG00000175220,protein_coding -54810,MED16,ENSG00000175221,protein_coding -32836,ATG13,ENSG00000175224,protein_coding -33596,GAL3ST3,ENSG00000175229,protein_coding -370,C1orf127,ENSG00000175262,protein_coding -32803,CHST1,ENSG00000175264,protein_coding -44718,GOLGA8A,ENSG00000175265,protein_coding -47242,VWA3A,ENSG00000175267,protein_coding -32788,TP53I11,ENSG00000175274,protein_coding -360,CENPS,ENSG00000175279,protein_coding -31458,DOLK,ENSG00000175283,protein_coding -31456,PHYHD1,ENSG00000175287,protein_coding -33595,CATSPER1,ENSG00000175294,protein_coding -28662,CCNE2,ENSG00000175305,protein_coding -18356,PHYKPL,ENSG00000175309,protein_coding -47207,ANKS4B,ENSG00000175311,protein_coding -33594,CST6,ENSG00000175315,protein_coding -45625,GRAMD2A,ENSG00000175318,protein_coding -52381,ZNF519,ENSG00000175322,protein_coding -27794,LSM1,ENSG00000175324,protein_coding -18347,PROP1,ENSG00000175325,protein_coding -59130,ISX,ENSG00000175329,protein_coding -33593,BANF1,ENSG00000175334,protein_coding -38950,APOF,ENSG00000175336,protein_coding -44656,CHRNA7,ENSG00000175344,protein_coding -32254,TMEM9B,ENSG00000175348,protein_coding -32256,NRIP3,ENSG00000175352,protein_coding -52345,PTPN2,ENSG00000175354,protein_coding -32258,SCUBE2,ENSG00000175356,protein_coding -33592,EIF1AD,ENSG00000175376,protein_coding -52798,SMAD2,ENSG00000175387,protein_coding -32228,EIF3F,ENSG00000175390,protein_coding -35799,ZNF25,ENSG00000175395,protein_coding -38884,OR10P1,ENSG00000175398,protein_coding -18283,ARL10,ENSG00000175414,protein_coding -18287,CLTB,ENSG00000175416,protein_coding -16941,PCSK1,ENSG00000175426,protein_coding -27439,LPL,ENSG00000175445,protein_coding -16921,RFESD,ENSG00000175449,protein_coding -11490,CCDC14,ENSG00000175455,protein_coding -33675,TBC1D10C,ENSG00000175463,protein_coding -33590,SART1,ENSG00000175467,protein_coding -37404,PPP2R2D,ENSG00000175470,protein_coding -16910,MCTP1,ENSG00000175471,protein_coding -33667,POLD4,ENSG00000175482,protein_coding -32155,OR52W1,ENSG00000175485,protein_coding -55778,LRRC25,ENSG00000175489,protein_coding -7609,DPP10,ENSG00000175497,protein_coding -33671,CLCF1,ENSG00000175505,protein_coding -33589,TSGA10IP,ENSG00000175513,protein_coding -33681,GPR152,ENSG00000175514,protein_coding -32111,UBQLNL,ENSG00000175518,protein_coding -32110,UBQLN3,ENSG00000175520,protein_coding -37171,PNLIP,ENSG00000175535,protein_coding -33937,LIPT2,ENSG00000175536,protein_coding -33933,KCNE3,ENSG00000175538,protein_coding -33682,CABP4,ENSG00000175544,protein_coding -38347,ALG10B,ENSG00000175548,protein_coding -33588,DRAP1,ENSG00000175550,protein_coding -26260,LONRF3,ENSG00000175556,protein_coding -33919,UCP3,ENSG00000175564,protein_coding -33915,UCP2,ENSG00000175567,protein_coding -33587,C11orf68,ENSG00000175573,protein_coding -33911,PAAF1,ENSG00000175575,protein_coding -33907,MRPL48,ENSG00000175581,protein_coding -33904,RAB6A,ENSG00000175582,protein_coding -33889,P2RY2,ENSG00000175591,protein_coding -33586,FOSL1,ENSG00000175592,protein_coding -46993,ERCC4,ENSG00000175595,protein_coding -22269,SUGCT,ENSG00000175600,protein_coding -33585,CCDC85B,ENSG00000175602,protein_coding -28355,TMEM70,ENSG00000175606,protein_coding -32892,OR4B1,ENSG00000175619,protein_coding -33677,RPS6KB2,ENSG00000175634,protein_coding -46935,RMI2,ENSG00000175643,protein_coding -46941,PRM1,ENSG00000175646,protein_coding -49669,TOM1L2,ENSG00000175662,protein_coding -40849,TEX26,ENSG00000175664,protein_coding -54942,ZNF77,ENSG00000175691,protein_coding -11415,GPR156,ENSG00000175697,protein_coding -43595,CCDC197,ENSG00000175699,protein_coding -7478,MTLN,ENSG00000175701,protein_coding -892,KDF1,ENSG00000175707,protein_coding -52043,B3GNTL1,ENSG00000175711,protein_coding -26213,RBMXL3,ENSG00000175718,protein_coding -40194,MLXIP,ENSG00000175727,protein_coding -16892,NR2F1,ENSG00000175745,protein_coding -104,AURKAIP1,ENSG00000175756,protein_coding -31254,TTLL11,ENSG00000175764,protein_coding -18297,EIF4E1B,ENSG00000175766,protein_coding -30084,TOMM5,ENSG00000175768,protein_coding -44791,C15orf53,ENSG00000175779,protein_coding -39233,SLC35E3,ENSG00000175782,protein_coding -43588,PRIMA1,ENSG00000175785,protein_coding -30800,ZNF169,ENSG00000175787,protein_coding -11596,RUVBL1,ENSG00000175792,protein_coding -886,SFN,ENSG00000175793,protein_coding -27223,MSRA,ENSG00000175806,protein_coding -24832,CBLL2,ENSG00000175809,protein_coding -41754,CCDC168,ENSG00000175820,protein_coding -49303,CTDNEP1,ENSG00000175826,protein_coding -50753,ETV4,ENSG00000175832,protein_coding -31418,SWI5,ENSG00000175854,protein_coding -16351,GAPT,ENSG00000175857,protein_coding -51913,BAIAP2,ENSG00000175866,protein_coding -32374,CALCB,ENSG00000175868,protein_coding -7310,CREG2,ENSG00000175874,protein_coding -22912,TMEM270,ENSG00000175877,protein_coding -8463,HOXD8,ENSG00000175879,protein_coding -29655,ZDHHC21,ENSG00000175893,protein_coding -60427,TSPEAR,ENSG00000175894,protein_coding -28675,PLEKHF2,ENSG00000175895,protein_coding -37807,A2M,ENSG00000175899,protein_coding -50745,ARL4D,ENSG00000175906,protein_coding -12966,DOK7,ENSG00000175920,protein_coding -9701,LRRN1,ENSG00000175928,protein_coding -51747,UBE2O,ENSG00000175931,protein_coding -47575,ORAI3,ENSG00000175938,protein_coding -29062,KLHL38,ENSG00000175946,protein_coding -40142,UNC119B,ENSG00000175970,protein_coding -2708,DENND2C,ENSG00000175984,protein_coding -43147,PLEKHD1,ENSG00000175985,protein_coding -32252,ASCL3,ENSG00000176009,protein_coding -52331,TUBB6,ENSG00000176014,protein_coding -16861,LYSMD3,ENSG00000176018,protein_coding -10518,AMIGO3,ENSG00000176020,protein_coding -87,B3GALT6,ENSG00000176022,protein_coding -57223,ZNF613,ENSG00000176024,protein_coding -32251,C11orf16,ENSG00000176029,protein_coding -11282,TMPRSS7,ENSG00000176040,protein_coding -47380,NUPR1,ENSG00000176046,protein_coding -17821,JAKMIP2,ENSG00000176049,protein_coding -16859,MBLAC2,ENSG00000176055,protein_coding -31745,TPRN,ENSG00000176058,protein_coding -26138,KCNE5,ENSG00000176076,protein_coding -865,ZNF683,ENSG00000176083,protein_coding -17624,SLC35A4,ENSG00000176087,protein_coding -863,CRYBG2,ENSG00000176092,protein_coding -10520,IP6K1,ENSG00000176095,protein_coding -31744,SSNA1,ENSG00000176101,protein_coding -32639,CSTF3,ENSG00000176102,protein_coding -52077,YES1,ENSG00000176105,protein_coding -51907,CHMP6,ENSG00000176108,protein_coding -23409,UFSP1,ENSG00000176125,protein_coding -52376,MC5R,ENSG00000176136,protein_coding -11392,TMEM39A,ENSG00000176142,protein_coding -32636,TCP11L1,ENSG00000176148,protein_coding -43050,GPX2,ENSG00000176153,protein_coding -51995,CCDC57,ENSG00000176155,protein_coding -51230,HSF5,ENSG00000176160,protein_coding -42437,FOXG1,ENSG00000176165,protein_coding -51746,SPHK1,ENSG00000176170,protein_coding -37405,BNIP3,ENSG00000176171,protein_coding -59327,ENTHD1,ENSG00000176177,protein_coding -56824,MYPOP,ENSG00000176182,protein_coding -52327,CIDEA,ENSG00000176194,protein_coding -42035,OR11H4,ENSG00000176198,protein_coding -33219,OR4D11,ENSG00000176200,protein_coding -6802,LRRTM4,ENSG00000176204,protein_coding -50137,ATAD5,ENSG00000176208,protein_coding -27890,SMIM19,ENSG00000176209,protein_coding -42033,OR11H6,ENSG00000176219,protein_coding -56705,ZNF404,ENSG00000176222,protein_coding -53126,RTTN,ENSG00000176225,protein_coding -42025,OR4K17,ENSG00000176230,protein_coding -55636,OR10H4,ENSG00000176231,protein_coding -32099,OR51B6,ENSG00000176239,protein_coding -35421,ACBD7,ENSG00000176244,protein_coding -42022,OR4L1,ENSG00000176246,protein_coding -31743,ANAPC2,ENSG00000176248,protein_coding -42020,OR4K13,ENSG00000176253,protein_coding -1120,HMGB4,ENSG00000176256,protein_coding -1083,ZBTB8OS,ENSG00000176261,protein_coding -26985,OR4F21,ENSG00000176269,protein_coding -36763,SLC35G1,ENSG00000176273,protein_coding -42014,OR4K5,ENSG00000176281,protein_coding -57683,ZNF135,ENSG00000176293,protein_coding -42003,OR4N2,ENSG00000176294,protein_coding -42007,OR4M1,ENSG00000176299,protein_coding -34781,FOXR1,ENSG00000176302,protein_coding -33439,COX8A,ENSG00000176340,protein_coding -51046,TAC4,ENSG00000176358,protein_coding -46018,ZSCAN2,ENSG00000176371,protein_coding -21431,PRR18,ENSG00000176381,protein_coding -40197,B3GNT4,ENSG00000176383,protein_coding -31152,CDC26,ENSG00000176386,protein_coding -48313,HSD11B2,ENSG00000176387,protein_coding -50132,CRLF3,ENSG00000176390,protein_coding -4388,RNPEP,ENSG00000176393,protein_coding -56484,EID2,ENSG00000176396,protein_coding -29788,DMRTA1,ENSG00000176399,protein_coding -56481,EID2B,ENSG00000176401,protein_coding -23333,GJC3,ENSG00000176402,protein_coding -28860,RIMS2,ENSG00000176406,protein_coding -6873,KCMF1,ENSG00000176407,protein_coding -22902,DNAJC30,ENSG00000176410,protein_coding -38958,SPRYD4,ENSG00000176422,protein_coding -22901,VPS37D,ENSG00000176428,protein_coding -42609,CLEC14A,ENSG00000176435,protein_coding -43634,SYNE3,ENSG00000176438,protein_coding -3413,CLK2,ENSG00000176444,protein_coding -44716,LPCAT4,ENSG00000176454,protein_coding -46213,SLCO3A1,ENSG00000176463,protein_coding -56685,ZNF575,ENSG00000176472,protein_coding -43737,WDR25,ENSG00000176473,protein_coding -47383,SGF29,ENSG00000176476,protein_coding -33423,PLAAT3,ENSG00000176485,protein_coding -54926,DIRAS1,ENSG00000176490,protein_coding -33212,OR5AN1,ENSG00000176495,protein_coding -56683,PHLDB3,ENSG00000176531,protein_coding -22055,PRR15,ENSG00000176532,protein_coding -54918,GNG7,ENSG00000176533,protein_coding -32899,OR4C5,ENSG00000176540,protein_coding -11323,USF3,ENSG00000176542,protein_coding -32897,OR4C3,ENSG00000176547,protein_coding -32896,OR4S1,ENSG00000176555,protein_coding -50698,CNTD1,ENSG00000176563,protein_coding -28559,DCAF4L2,ENSG00000176566,protein_coding -32895,OR4X1,ENSG00000176567,protein_coding -28553,CNBD1,ENSG00000176571,protein_coding -27025,KBTBD11,ENSG00000176595,protein_coding -12487,B3GNT5,ENSG00000176597,protein_coding -7958,MAP3K19,ENSG00000176601,protein_coding -43692,C14orf177,ENSG00000176605,protein_coding -54913,LMNB2,ENSG00000176619,protein_coding -28542,RMDN1,ENSG00000176623,protein_coding -52877,MEX3C,ENSG00000176624,protein_coding -58985,HORMAD2,ENSG00000176635,protein_coding -53032,RNF152,ENSG00000176641,protein_coding -50220,MYO1D,ENSG00000176658,protein_coding -48853,FOXL1,ENSG00000176678,protein_coding -57758,TGIF2LY,ENSG00000176679,protein_coding -50890,LRRC37A,ENSG00000176681,protein_coding -48852,FOXC2,ENSG00000176692,protein_coding -54759,AC008977.1,ENSG00000176695,protein_coding -32563,BDNF,ENSG00000176697,protein_coding -5941,CCDC121,ENSG00000176714,protein_coding -48937,ACSF3,ENSG00000176715,protein_coding -9637,BOK,ENSG00000176720,protein_coding -47614,ZNF843,ENSG00000176723,protein_coding -28508,RBIS,ENSG00000176731,protein_coding -5830,PFN4,ENSG00000176732,protein_coding -32080,OR51V1,ENSG00000176742,protein_coding -24879,MAGEB6,ENSG00000176746,protein_coding -50219,CDK5R1,ENSG00000176749,protein_coding -37398,TCERG1L,ENSG00000176769,protein_coding -7934,NCKAP5,ENSG00000176771,protein_coding -24875,MAGEB18,ENSG00000176774,protein_coding -27154,DEFB104A,ENSG00000176782,protein_coding -18394,RUFY1,ENSG00000176783,protein_coding -32069,OR52E2,ENSG00000176787,protein_coding -15798,BASP1,ENSG00000176788,protein_coding -27156,DEFB103A,ENSG00000176797,protein_coding -32065,OR51L1,ENSG00000176798,protein_coding -51458,LRRC37A3,ENSG00000176809,protein_coding -40063,VSIG10,ENSG00000176834,protein_coding -48048,IRX5,ENSG00000176842,protein_coding -52045,METRNL,ENSG00000176845,protein_coding -29064,FAM91A1,ENSG00000176853,protein_coding -40061,WSB2,ENSG00000176871,protein_coding -31738,GRIN1,ENSG00000176884,protein_coding -5550,SOX11,ENSG00000176887,protein_coding -52074,TYMS,ENSG00000176890,protein_coding -32059,OR51G2,ENSG00000176893,protein_coding -40502,PXMP2,ENSG00000176894,protein_coding -32058,OR51A7,ENSG00000176895,protein_coding -24715,TCEANC,ENSG00000176896,protein_coding -32056,OR51T1,ENSG00000176900,protein_coding -43247,PNMA1,ENSG00000176903,protein_coding -27842,TCIM,ENSG00000176907,protein_coding -56962,MAMSTR,ENSG00000176909,protein_coding -40506,ANKLE2,ENSG00000176915,protein_coding -31715,C8G,ENSG00000176919,protein_coding -56961,FUT2,ENSG00000176920,protein_coding -32053,OR51S1,ENSG00000176922,protein_coding -32051,OR51F2,ENSG00000176925,protein_coding -50089,EFCAB5,ENSG00000176927,protein_coding -16616,GCNT4,ENSG00000176928,protein_coding -12750,MUC20,ENSG00000176945,protein_coding -9639,THAP4,ENSG00000176946,protein_coding -47412,NFATC2IP,ENSG00000176953,protein_coding -29333,LY6H,ENSG00000176956,protein_coding -32550,FIBIN,ENSG00000176971,protein_coding -33560,FAM89B,ENSG00000176973,protein_coding -49690,SHMT1,ENSG00000176974,protein_coding -31737,DPP7,ENSG00000176978,protein_coding -15154,TRIM60,ENSG00000176979,protein_coding -36379,SEC24C,ENSG00000176986,protein_coding -26734,FMR1NB,ENSG00000176988,protein_coding -49689,SMCR8,ENSG00000176994,protein_coding -402,MTHFR,ENSG00000177000,protein_coding -27121,DEFB104B,ENSG00000177023,protein_coding -57675,C19orf18,ENSG00000177025,protein_coding -31843,DEAF1,ENSG00000177030,protein_coding -16712,MTX3,ENSG00000177034,protein_coding -31846,TMEM80,ENSG00000177042,protein_coding -56813,SIX5,ENSG00000177045,protein_coding -29739,IFNW1,ENSG00000177047,protein_coding -56810,FBXO46,ENSG00000177051,protein_coding -32474,ZDHHC13,ENSG00000177054,protein_coding -16299,SLC38A9,ENSG00000177058,protein_coding -29714,ACER2,ENSG00000177076,protein_coding -46023,WDR73,ENSG00000177082,protein_coding -40501,POLE,ENSG00000177084,protein_coding -59411,PHETA2,ENSG00000177096,protein_coding -34736,SCN4B,ENSG00000177098,protein_coding -34723,DSCAML1,ENSG00000177103,protein_coding -31999,RHOG,ENSG00000177105,protein_coding -31845,EPS8L2,ENSG00000177106,protein_coding -43365,ZDHHC22,ENSG00000177108,protein_coding -38437,ANO6,ENSG00000177119,protein_coding -31364,ZBTB34,ENSG00000177125,protein_coding -24648,FAM9B,ENSG00000177138,protein_coding -52068,CETN1,ENSG00000177143,protein_coding -3056,NUDT4B,ENSG00000177144,protein_coding -52372,FAM210A,ENSG00000177150,protein_coding -5454,OR2T35,ENSG00000177151,protein_coding -31848,TALDO1,ENSG00000177156,protein_coding -40461,ULK1,ENSG00000177169,protein_coding -5435,OR14C36,ENSG00000177174,protein_coding -1355,RIMKLA,ENSG00000177181,protein_coding -28157,CLVS1,ENSG00000177182,protein_coding -5434,OR2M7,ENSG00000177186,protein_coding -24812,RPS6KA3,ENSG00000177189,protein_coding -56588,B3GNT8,ENSG00000177191,protein_coding -40464,PUS1,ENSG00000177192,protein_coding -48008,CHD9,ENSG00000177200,protein_coding -5433,OR2T12,ENSG00000177201,protein_coding -56953,SPACA4,ENSG00000177202,protein_coding -5432,OR2T33,ENSG00000177212,protein_coding -31849,GATD1,ENSG00000177225,protein_coding -47605,TRIM72,ENSG00000177238,protein_coding -31735,MAN1B1,ENSG00000177239,protein_coding -27119,DEFB103B,ENSG00000177243,protein_coding -27117,DEFB4B,ENSG00000177257,protein_coding -2601,KCNA3,ENSG00000177272,protein_coding -5411,OR2AJ1,ENSG00000177275,protein_coding -35764,FZD8,ENSG00000177283,protein_coding -35763,GJD4,ENSG00000177291,protein_coding -49271,FBXO39,ENSG00000177294,protein_coding -15346,CLDN22,ENSG00000177300,protein_coding -2599,KCNA2,ENSG00000177301,protein_coding -49686,TOP3A,ENSG00000177302,protein_coding -51698,CASKIN2,ENSG00000177303,protein_coding -11857,ZBTB38,ENSG00000177311,protein_coding -24783,BEND2,ENSG00000177324,protein_coding -10492,CCDC71,ENSG00000177352,protein_coding -36027,C10orf71,ENSG00000177354,protein_coding -33369,LRRN4CL,ENSG00000177363,protein_coding -49024,TIMM22,ENSG00000177370,protein_coding -49068,HIC1,ENSG00000177374,protein_coding -57008,PPFIA3,ENSG00000177380,protein_coding -12544,MAGEF1,ENSG00000177383,protein_coding -60337,UMODL1,ENSG00000177398,protein_coding -23191,SAMD9L,ENSG00000177409,protein_coding -1885,UBE2U,ENSG00000177414,protein_coding -39390,PAWR,ENSG00000177425,protein_coding -52136,TGIF1,ENSG00000177426,protein_coding -49684,MIEF2,ENSG00000177427,protein_coding -14194,NAP1L5,ENSG00000177432,protein_coding -16175,NIM1K,ENSG00000177453,protein_coding -47411,CD19,ENSG00000177455,protein_coding -28716,ERICH5,ENSG00000177459,protein_coding -5410,OR2T8,ENSG00000177462,protein_coding -9894,NR2C2,ENSG00000177463,protein_coding -56800,GPR4,ENSG00000177464,protein_coding -43241,ACOT4,ENSG00000177465,protein_coding -20992,OLIG3,ENSG00000177468,protein_coding -50660,CAVIN1,ENSG00000177469,protein_coding -5392,OR2G3,ENSG00000177476,protein_coding -10472,ARIH2,ENSG00000177479,protein_coding -9540,RBM44,ENSG00000177483,protein_coding -26304,ZBTB33,ENSG00000177485,protein_coding -5391,OR2G2,ENSG00000177489,protein_coding -11273,ZBED2,ENSG00000177494,protein_coding -24638,VCX2,ENSG00000177504,protein_coding -48038,IRX3,ENSG00000177508,protein_coding -52945,ST8SIA3,ENSG00000177511,protein_coding -8155,RPRM,ENSG00000177519,protein_coding -5385,OR2B11,ENSG00000177535,protein_coding -31854,SLC25A22,ENSG00000177542,protein_coding -47410,RABEP2,ENSG00000177548,protein_coding -2733,NHLH2,ENSG00000177551,protein_coding -17936,ATOX1,ENSG00000177556,protein_coding -56244,FAM187B,ENSG00000177558,protein_coding -12373,TBL1XR1,ENSG00000177565,protein_coding -28978,SAMD12,ENSG00000177570,protein_coding -37714,CD163,ENSG00000177575,protein_coding -52829,C18orf32,ENSG00000177576,protein_coding -31856,PIDD1,ENSG00000177595,protein_coding -55393,ZNF491,ENSG00000177599,protein_coding -31857,RPLP2,ENSG00000177600,protein_coding -49150,HASPIN,ENSG00000177602,protein_coding -1795,JUN,ENSG00000177606,protein_coding -36076,CSTF2T,ENSG00000177613,protein_coding -5040,PGBD5,ENSG00000177614,protein_coding -38530,C12orf54,ENSG00000177627,protein_coding -3410,GBA,ENSG00000177628,protein_coding -11625,ACAD9,ENSG00000177646,protein_coding -58352,IL17RA,ENSG00000177663,protein_coding -31859,PNPLA2,ENSG00000177666,protein_coding -27662,MBOAT4,ENSG00000177669,protein_coding -9404,TEX44,ENSG00000177673,protein_coding -399,AGTRAP,ENSG00000177674,protein_coding -37712,CD163L1,ENSG00000177675,protein_coding -22985,SRRM3,ENSG00000177679,protein_coding -23579,THAP5,ENSG00000177683,protein_coding -19876,DEFB114,ENSG00000177684,protein_coding -31861,CRACR2B,ENSG00000177685,protein_coding -21150,SUMO4,ENSG00000177688,protein_coding -24900,MAGEB10,ENSG00000177689,protein_coding -60154,DNAJC28,ENSG00000177692,protein_coding -46427,OR4F4,ENSG00000177693,protein_coding -12350,NAALADL2,ENSG00000177694,protein_coding -31862,CD151,ENSG00000177697,protein_coding -31863,POLR2L,ENSG00000177700,protein_coding -21556,FAM20C,ENSG00000177706,protein_coding -11268,NECTIN3,ENSG00000177707,protein_coding -27261,SLC35G5,ENSG00000177710,protein_coding -16170,ANXA2R,ENSG00000177721,protein_coding -51696,TMEM94,ENSG00000177728,protein_coding -49683,FLII,ENSG00000177731,protein_coding -53305,SOX12,ENSG00000177732,protein_coding -17524,HNRNPA0,ENSG00000177733,protein_coding -13522,YIPF7,ENSG00000177752,protein_coding -36366,MYOZ1,ENSG00000177791,protein_coding -3617,KCNJ10,ENSG00000177807,protein_coding -31866,CHID1,ENSG00000177830,protein_coding -17685,PCDHB9,ENSG00000177839,protein_coding -10238,ZNF620,ENSG00000177842,protein_coding -36810,ZNF518A,ENSG00000177853,protein_coding -26882,TMEM187,ENSG00000177854,protein_coding -1374,SVBP,ENSG00000177868,protein_coding -10237,ZNF619,ENSG00000177873,protein_coding -38513,CCDC184,ENSG00000177875,protein_coding -17167,AP3S1,ENSG00000177879,protein_coding -51688,GRB2,ENSG00000177885,protein_coding -4296,ZBTB41,ENSG00000177888,protein_coding -39550,UBE2N,ENSG00000177889,protein_coding -8144,ARL6IP6,ENSG00000177917,protein_coding -18383,ZNF354C,ENSG00000177932,protein_coding -38051,CAPZA3,ENSG00000177938,protein_coding -31708,MAMDC4,ENSG00000177943,protein_coding -48978,CENPBD1,ENSG00000177946,protein_coding -31799,ODF3,ENSG00000177947,protein_coding -31797,BET1L,ENSG00000177951,protein_coding -3338,RPS27,ENSG00000177954,protein_coding -31801,RIC8A,ENSG00000177963,protein_coding -45761,IMP3,ENSG00000177971,protein_coding -38508,ASB8,ENSG00000177981,protein_coding -31695,LCN15,ENSG00000177984,protein_coding -59627,ODF3B,ENSG00000177989,protein_coding -39109,DPY19L2,ENSG00000177990,protein_coding -30662,SPATA31E1,ENSG00000177992,protein_coding -6348,C2orf73,ENSG00000177994,protein_coding -16920,GPR150,ENSG00000178015,protein_coding -6347,TSPYL6,ENSG00000178021,protein_coding -58819,LRRC75B,ENSG00000178026,protein_coding -1440,DMAP1,ENSG00000178028,protein_coding -29693,ADAMTSL1,ENSG00000178031,protein_coding -20699,CALHM5,ENSG00000178033,protein_coding -10483,IMPDH2,ENSG00000178035,protein_coding -10387,ALS2CL,ENSG00000178038,protein_coding -12146,MLF1,ENSG00000178053,protein_coding -10397,PRSS42P,ENSG00000178055,protein_coding -10481,NDUFAF3,ENSG00000178057,protein_coding -8781,C2orf69,ENSG00000178074,protein_coding -11328,GRAMD1C,ENSG00000178075,protein_coding -55010,STAP2,ENSG00000178078,protein_coding -12511,HTR3C,ENSG00000178084,protein_coding -55829,TSSK6,ENSG00000178093,protein_coding -3101,BOLA1,ENSG00000178096,protein_coding -3059,PDE4DIP,ENSG00000178104,protein_coding -34525,DDX10,ENSG00000178105,protein_coding -44621,GOLGA8Q,ENSG00000178115,protein_coding -28245,PPP1R42,ENSG00000178125,protein_coding -52228,NDUFV2,ENSG00000178127,protein_coding -10479,DALRD3,ENSG00000178149,protein_coding -56936,ZNF114,ENSG00000178150,protein_coding -13154,ZNF518B,ENSG00000178163,protein_coding -7846,AMER3,ENSG00000178171,protein_coding -17833,SPINK6,ENSG00000178172,protein_coding -16556,ZNF366,ENSG00000178175,protein_coding -13243,LCORL,ENSG00000178177,protein_coding -53291,PARD6G,ENSG00000178184,protein_coding -18378,ZNF454,ENSG00000178187,protein_coding -47405,SH2B1,ENSG00000178188,protein_coding -21151,ZC3H12D,ENSG00000178199,protein_coding -57639,VN1R1,ENSG00000178201,protein_coding -34523,POGLUT3,ENSG00000178202,protein_coding -29384,PLEC,ENSG00000178209,protein_coding -36534,SH2D4B,ENSG00000178217,protein_coding -12897,RNF212,ENSG00000178222,protein_coding -47597,PRSS36,ENSG00000178226,protein_coding -57626,ZNF543,ENSG00000178229,protein_coding -19797,TMEM151B,ENSG00000178233,protein_coding -24445,GALNT11,ENSG00000178234,protein_coding -41520,SLITRK1,ENSG00000178235,protein_coding -10478,WDR6,ENSG00000178252,protein_coding -46938,PRM3,ENSG00000178257,protein_coding -46937,TNP2,ENSG00000178279,protein_coding -27155,SPAG11A,ENSG00000178287,protein_coding -5737,GEN1,ENSG00000178295,protein_coding -54911,TMPRSS9,ENSG00000178297,protein_coding -34030,AQP11,ENSG00000178301,protein_coding -49869,TMEM11,ENSG00000178307,protein_coding -18372,ZNF354B,ENSG00000178338,protein_coding -53279,KCNG2,ENSG00000178342,protein_coding -13500,SHISA3,ENSG00000178343,protein_coding -32198,OR2D3,ENSG00000178358,protein_coding -35249,CALML3,ENSG00000178363,protein_coding -35247,CALML5,ENSG00000178372,protein_coding -21584,ZFAND2A,ENSG00000178381,protein_coding -8987,PLEKHM3,ENSG00000178385,protein_coding -56720,AC092072.1,ENSG00000178386,protein_coding -16411,HTR1A,ENSG00000178394,protein_coding -4836,CCDC185,ENSG00000178395,protein_coding -21692,FAM220A,ENSG00000178397,protein_coding -38585,DNAJC22,ENSG00000178401,protein_coding -14470,NEUROG2,ENSG00000178403,protein_coding -51850,CEP295NL,ENSG00000178404,protein_coding -20524,BEND3,ENSG00000178409,protein_coding -20684,NT5DC1,ENSG00000178425,protein_coding -29582,GLDC,ENSG00000178445,protein_coding -38620,COX14,ENSG00000178449,protein_coding -28241,MCMDC2,ENSG00000178460,protein_coding -35245,TUBAL3,ENSG00000178462,protein_coding -10476,P4HTM,ENSG00000178467,protein_coding -35244,UCN3,ENSG00000178473,protein_coding -39014,DTX3,ENSG00000178498,protein_coding -50635,KLHL11,ENSG00000178502,protein_coding -13890,AMBN,ENSG00000178522,protein_coding -55187,CTXN1,ENSG00000178531,protein_coding -10470,SLC25A20,ENSG00000178537,protein_coding -28143,CA8,ENSG00000178538,protein_coding -8906,CD28,ENSG00000178562,protein_coding -10149,EPM2AIP1,ENSG00000178567,protein_coding -9037,ERBB4,ENSG00000178568,protein_coding -48683,MAF,ENSG00000178573,protein_coding -336,CTNNBIP1,ENSG00000178585,protein_coding -9589,OR6B3,ENSG00000178586,protein_coding -53294,DEFB125,ENSG00000178591,protein_coding -13048,PSAPL1,ENSG00000178597,protein_coding -9593,OTOS,ENSG00000178602,protein_coding -24561,GTPBP6,ENSG00000178605,protein_coding -51427,ERN1,ENSG00000178607,protein_coding -9607,GPR35,ENSG00000178623,protein_coding -36034,C10orf53,ENSG00000178645,protein_coding -8298,CSRNP3,ENSG00000178662,protein_coding -22510,ZNF713,ENSG00000178665,protein_coding -29386,PARP10,ENSG00000178685,protein_coding -52906,DYNAP,ENSG00000178690,protein_coding -50184,SUZ12,ENSG00000178691,protein_coding -11060,NSUN3,ENSG00000178694,protein_coding -41447,KCTD12,ENSG00000178695,protein_coding -11059,DHFR2,ENSG00000178700,protein_coding -45722,RPP25,ENSG00000178718,protein_coding -29387,GRINA,ENSG00000178719,protein_coding -53732,THBD,ENSG00000178726,protein_coding -12705,GP5,ENSG00000178732,protein_coding -45721,COX5A,ENSG00000178741,protein_coding -11058,STX19,ENSG00000178750,protein_coding -9550,ERFE,ENSG00000178752,protein_coding -45720,FAM219B,ENSG00000178761,protein_coding -29030,ZHX2,ENSG00000178764,protein_coding -12703,CPN2,ENSG00000178772,protein_coding -48958,CPNE7,ENSG00000178773,protein_coding -17826,C5orf46,ENSG00000178776,protein_coding -51640,CD300LB,ENSG00000178789,protein_coding -34024,GDPD4,ENSG00000178795,protein_coding -3194,RIIAD1,ENSG00000178796,protein_coding -45719,MPI,ENSG00000178802,protein_coding -11654,H1FOO,ENSG00000178804,protein_coding -22958,TRIM73,ENSG00000178809,protein_coding -29390,OPLAH,ENSG00000178814,protein_coding -142,TMEM52,ENSG00000178821,protein_coding -24228,TMEM139,ENSG00000178826,protein_coding -646,RNF186,ENSG00000178828,protein_coding -50922,EFCAB13,ENSG00000178852,protein_coding -28314,MSC,ENSG00000178860,protein_coding -37952,APOLD1,ENSG00000178878,protein_coding -40275,RFLNA,ENSG00000178882,protein_coding -29393,EXOSC4,ENSG00000178896,protein_coding -56140,DPY19L3,ENSG00000178904,protein_coding -17697,TAF7,ENSG00000178913,protein_coding -10323,ZNF852,ENSG00000178917,protein_coding -30903,FOXE1,ENSG00000178919,protein_coding -49401,PFAS,ENSG00000178921,protein_coding -1407,HYI,ENSG00000178922,protein_coding -52018,CYBC1,ENSG00000178927,protein_coding -56917,TPRX1,ENSG00000178928,protein_coding -56432,LGALS7B,ENSG00000178934,protein_coding -57662,ZNF552,ENSG00000178935,protein_coding -26547,SMIM10L2A,ENSG00000178947,protein_coding -12890,GAK,ENSG00000178950,protein_coding -54997,ZBTB7A,ENSG00000178951,protein_coding -47403,TUFM,ENSG00000178952,protein_coding -2021,ERICH3,ENSG00000178965,protein_coding -30604,RMI1,ENSG00000178966,protein_coding -49400,CTC1,ENSG00000178971,protein_coding -42861,FBXO34,ENSG00000178974,protein_coding -56914,SELENOW,ENSG00000178980,protein_coding -56426,EIF3K,ENSG00000178982,protein_coding -13032,MRFAP1L1,ENSG00000178988,protein_coding -16268,SNX18,ENSG00000178996,protein_coding -44881,EXD1,ENSG00000178997,protein_coding -49398,AURKB,ENSG00000178999,protein_coding -619,TAS1R2,ENSG00000179002,protein_coding -42958,C14orf39,ENSG00000179008,protein_coding -13024,MRFAP1,ENSG00000179010,protein_coding -11035,C3orf38,ENSG00000179021,protein_coding -615,KLHDC7A,ENSG00000179023,protein_coding -49391,TMEM107,ENSG00000179029,protein_coding -28229,RRS1,ENSG00000179041,protein_coding -48296,EXOC3L1,ENSG00000179044,protein_coding -15453,TRIML2,ENSG00000179046,protein_coding -604,RCC2,ENSG00000179051,protein_coding -31008,OR13D1,ENSG00000179055,protein_coding -32459,IGSF22,ENSG00000179057,protein_coding -31487,C9orf50,ENSG00000179058,protein_coding -15452,ZFP42,ENSG00000179059,protein_coding -34134,CCDC89,ENSG00000179071,protein_coding -25890,FAM133A,ENSG00000179083,protein_coding -3397,DPM3,ENSG00000179085,protein_coding -39746,C12orf42,ENSG00000179088,protein_coding -29396,CYC1,ENSG00000179091,protein_coding -49387,AC129492.1,ENSG00000179094,protein_coding -11031,HTR1F,ENSG00000179097,protein_coding -39430,TMTC2,ENSG00000179104,protein_coding -49386,HES7,ENSG00000179111,protein_coding -55488,FARSA,ENSG00000179115,protein_coding -32456,SPTY2D1,ENSG00000179119,protein_coding -35548,C10orf67,ENSG00000179133,protein_coding -56469,SAMD4B,ENSG00000179134,protein_coding -29325,CYP11B2,ENSG00000179142,protein_coding -24389,GIMAP7,ENSG00000179144,protein_coding -49385,ALOXE3,ENSG00000179148,protein_coding -45708,EDC3,ENSG00000179151,protein_coding -10319,TCAIM,ENSG00000179152,protein_coding -773,FUCA1,ENSG00000179163,protein_coding -19573,PXT1,ENSG00000179165,protein_coding -56416,GGN,ENSG00000179168,protein_coding -447,HNRNPCL1,ENSG00000179172,protein_coding -1393,TMEM125,ENSG00000179178,protein_coding -40273,ZNF664,ENSG00000179195,protein_coding -57165,SIGLECL1,ENSG00000179213,protein_coding -55490,CALR,ENSG00000179218,protein_coding -25323,MAGED1,ENSG00000179222,protein_coding -34002,GVQW3,ENSG00000179240,protein_coding -32697,LDLRAD3,ENSG00000179241,protein_coding -54606,CDH4,ENSG00000179242,protein_coding -38000,SMCO3,ENSG00000179256,protein_coding -55493,RAD23A,ENSG00000179262,protein_coding -5977,PCARE,ENSG00000179270,protein_coding -55494,GADD45GIP1,ENSG00000179271,protein_coding -55495,DAND5,ENSG00000179284,protein_coding -33611,TMEM151A,ENSG00000179292,protein_coding -39958,PTPN11,ENSG00000179295,protein_coding -13468,NSUN7,ENSG00000179299,protein_coding -25773,RTL3,ENSG00000179300,protein_coding -25377,FAM156B,ENSG00000179304,protein_coding -49246,WSCD1,ENSG00000179314,protein_coding -34512,RAB39A,ENSG00000179331,protein_coding -45705,CLK3,ENSG00000179335,protein_coding -19420,HLA-DQB1,ENSG00000179344,protein_coding -11604,GATA2,ENSG00000179348,protein_coding -45704,ARID3B,ENSG00000179361,protein_coding -26059,TMEM31,ENSG00000179363,protein_coding -44003,PACS2,ENSG00000179364,protein_coding -14801,ELMOD2,ENSG00000179387,protein_coding -27498,EGR3,ENSG00000179388,protein_coding -5311,CATSPERE,ENSG00000179397,protein_coding -41594,GPC5,ENSG00000179399,protein_coding -115,VWA1,ENSG00000179403,protein_coding -11602,DNAJB8,ENSG00000179407,protein_coding -49013,GEMIN4,ENSG00000179409,protein_coding -465,HNRNPCL4,ENSG00000179412,protein_coding -32690,FJX1,ENSG00000179431,protein_coding -42673,KLHL28,ENSG00000179454,protein_coding -44385,MKRN3,ENSG00000179455,protein_coding -5300,ZBTB18,ENSG00000179456,protein_coding -24217,OR9A2,ENSG00000179468,protein_coding -42671,C14orf28,ENSG00000179476,protein_coding -49382,ALOX12B,ENSG00000179477,protein_coding -39688,SLC17A8,ENSG00000179520,protein_coding -29397,SHARPIN,ENSG00000179526,protein_coding -6750,LBX2,ENSG00000179528,protein_coding -32173,DNHD1,ENSG00000179532,protein_coding -26673,SLITRK4,ENSG00000179542,protein_coding -748,HTR1D,ENSG00000179546,protein_coding -23796,GCC1,ENSG00000179562,protein_coding -10550,LSMEM2,ENSG00000179564,protein_coding -46608,RNF151,ENSG00000179580,protein_coding -46925,CIITA,ENSG00000179583,protein_coding -48907,ZFPM1,ENSG00000179588,protein_coding -49379,ALOX15B,ENSG00000179593,protein_coding -49640,PLD6,ENSG00000179598,protein_coding -43005,GPHB5,ENSG00000179600,protein_coding -23782,GRM8,ENSG00000179603,protein_coding -51615,CDC42EP4,ENSG00000179604,protein_coding -38882,OR2AP1,ENSG00000179615,protein_coding -38881,OR6C4,ENSG00000179626,protein_coding -43978,ZBTB42,ENSG00000179627,protein_coding -41039,LACC1,ENSG00000179630,protein_coding -29398,MAF1,ENSG00000179632,protein_coding -42093,TPPP2,ENSG00000179636,protein_coding -3582,FCER1A,ENSG00000179639,protein_coding -50908,RPRML,ENSG00000179673,protein_coding -12188,ARL14,ENSG00000179674,protein_coding -38877,OR6C2,ENSG00000179695,protein_coding -29399,WDR97,ENSG00000179698,protein_coding -57553,NLRP8,ENSG00000179709,protein_coding -38463,PCED1B,ENSG00000179715,protein_coding -59296,APOBEC3B,ENSG00000179750,protein_coding -56457,SYCN,ENSG00000179751,protein_coding -50049,PIPOX,ENSG00000179761,protein_coding -53898,FOXS1,ENSG00000179772,protein_coding -36212,ATOH7,ENSG00000179774,protein_coding -48249,CDH5,ENSG00000179776,protein_coding -10029,LRRC3B,ENSG00000179796,protein_coding -41022,FAM216B,ENSG00000179813,protein_coding -32428,MRGPRX4,ENSG00000179817,protein_coding -57388,MYADM,ENSG00000179820,protein_coding -32424,MRGPRX3,ENSG00000179826,protein_coding -29402,MROH1,ENSG00000179832,protein_coding -6523,SERTAD2,ENSG00000179833,protein_coding -43036,AKAP5,ENSG00000179841,protein_coding -56783,NKPD1,ENSG00000179846,protein_coding -54973,GIPC3,ENSG00000179855,protein_coding -49371,RNF227,ENSG00000179859,protein_coding -1326,CITED4,ENSG00000179862,protein_coding -22407,ABCA13,ENSG00000179869,protein_coding -57550,NLRP11,ENSG00000179873,protein_coding -29359,TIGD5,ENSG00000179886,protein_coding -47034,PDXDC1,ENSG00000179889,protein_coding -2538,C1orf194,ENSG00000179902,protein_coding -57658,ZNF154,ENSG00000179909,protein_coding -38997,R3HDM2,ENSG00000179912,protein_coding -55742,B3GNT3,ENSG00000179913,protein_coding -3653,ITLN1,ENSG00000179914,protein_coding -6307,NRXN1,ENSG00000179915,protein_coding -47522,SEPHS2,ENSG00000179918,protein_coding -38860,OR10A7,ENSG00000179919,protein_coding -9134,GPBAR1,ENSG00000179921,protein_coding -57534,ZNF784,ENSG00000179922,protein_coding -4105,ZNF648,ENSG00000179930,protein_coding -42303,C14orf119,ENSG00000179933,protein_coding -10214,CCR8,ENSG00000179934,protein_coding -44593,GOLGA8J,ENSG00000179938,protein_coding -39356,BBS10,ENSG00000179941,protein_coding -57530,FIZ1,ENSG00000179943,protein_coding -29376,PUF60,ENSG00000179950,protein_coding -57526,SSC5D,ENSG00000179954,protein_coding -47521,DCTPP1,ENSG00000179958,protein_coding -47519,ZNF771,ENSG00000179965,protein_coding -53187,TSHZ1,ENSG00000179981,protein_coding -37287,PSTK,ENSG00000179988,protein_coding -42853,SOCS4,ENSG00000180008,protein_coding -53186,ZADH2,ENSG00000180011,protein_coding -49128,OR1E1,ENSG00000180016,protein_coding -47516,ZNF48,ENSG00000180035,protein_coding -49127,AC087498.1,ENSG00000180042,protein_coding -57511,FAM71E2,ENSG00000180043,protein_coding -12174,C3orf80,ENSG00000180044,protein_coding -27532,NKX2-6,ENSG00000180053,protein_coding -57506,TMEM150B,ENSG00000180061,protein_coding -30114,ANKRD18A,ENSG00000180071,protein_coding -54282,WFDC11,ENSG00000180083,protein_coding -57497,TMEM86B,ENSG00000180089,protein_coding -49123,OR3A1,ENSG00000180090,protein_coding -47517,SEPT1,ENSG00000180096,protein_coding -968,TRNAU1AP,ENSG00000180098,protein_coding -15520,EXOC3,ENSG00000180104,protein_coding -19828,TDRD6,ENSG00000180113,protein_coding -38362,C12orf40,ENSG00000180116,protein_coding -40934,CSNK1A1L,ENSG00000180138,protein_coding -29317,LYNX1,ENSG00000180155,protein_coding -31928,TH,ENSG00000180176,protein_coding -25038,MED14,ENSG00000180182,protein_coding -46593,FAHD1,ENSG00000180185,protein_coding -26999,TDRP,ENSG00000180190,protein_coding -962,RCC1,ENSG00000180198,protein_coding -54279,WFDC9,ENSG00000180205,protein_coding -47515,MYLPF,ENSG00000180209,protein_coding -32839,F2,ENSG00000180210,protein_coding -39673,FAM71C,ENSG00000180219,protein_coding -8526,PRKRA,ENSG00000180228,protein_coding -22074,ZNRF2,ENSG00000180233,protein_coding -14429,RRH,ENSG00000180245,protein_coding -7333,SLC9A4,ENSG00000180251,protein_coding -57279,ZNF816,ENSG00000180257,protein_coding -39587,FGD6,ENSG00000180263,protein_coding -31315,ADGRD2,ENSG00000180264,protein_coding -47173,GPR139,ENSG00000180269,protein_coding -5270,PLD5,ENSG00000180287,protein_coding -45446,OAZ2,ENSG00000180304,protein_coding -54280,WFDC10A,ENSG00000180305,protein_coding -19569,PNPLA1,ENSG00000180316,protein_coding -39444,ALX1,ENSG00000180318,protein_coding -50811,CCDC43,ENSG00000180329,protein_coding -41070,KCTD4,ENSG00000180332,protein_coding -50810,MEIOC,ENSG00000180336,protein_coding -50806,FZD2,ENSG00000180340,protein_coding -14199,TIGD2,ENSG00000180346,protein_coding -22105,ITPRID1,ENSG00000180347,protein_coding -11447,HCLS1,ENSG00000180353,protein_coding -22069,MTURN,ENSG00000180354,protein_coding -45439,ZNF609,ENSG00000180357,protein_coding -12800,PAK2,ENSG00000180370,protein_coding -10706,CCDC66,ENSG00000180376,protein_coding -53879,DEFB124,ENSG00000180383,protein_coding -50592,KRTAP9-7,ENSG00000180386,protein_coding -6255,MCFD2,ENSG00000180398,protein_coding -32835,HARBI1,ENSG00000180423,protein_coding -53878,DEFB123,ENSG00000180424,protein_coding -34657,C11orf71,ENSG00000180425,protein_coding -10294,CYP8B1,ENSG00000180432,protein_coding -3566,OR6K6,ENSG00000180433,protein_coding -40920,SERTM1,ENSG00000180440,protein_coding -30639,GAS1,ENSG00000180447,protein_coding -54826,AC004151.1,ENSG00000180448,protein_coding -33161,OR10Q1,ENSG00000180475,protein_coding -56385,ZNF571,ENSG00000180479,protein_coding -39332,GLIPR1L2,ENSG00000180481,protein_coding -53875,DEFB119,ENSG00000180483,protein_coding -2069,MIGA1,ENSG00000180488,protein_coding -60180,KCNE1,ENSG00000180509,protein_coding -35163,PRR26,ENSG00000180525,protein_coding -59846,NRIP1,ENSG00000180530,protein_coding -57655,ZSCAN4,ENSG00000180532,protein_coding -23282,BHLHA15,ENSG00000180535,protein_coding -18746,RNF182,ENSG00000180537,protein_coding -28700,TSPYL5,ENSG00000180543,protein_coding -31725,FUT7,ENSG00000180549,protein_coding -18941,HIST1H2AC,ENSG00000180573,protein_coding -37875,EIF2S3B,ENSG00000180574,protein_coding -35509,SKIDA1,ENSG00000180592,protein_coding -18940,HIST1H2BC,ENSG00000180596,protein_coding -12672,MB21D2,ENSG00000180611,protein_coding -13638,GSX2,ENSG00000180613,protein_coding -51608,SSTR2,ENSG00000180616,protein_coding -49225,ZNF594,ENSG00000180626,protein_coding -36710,PCGF5,ENSG00000180628,protein_coding -49791,SLC47A2,ENSG00000180638,protein_coding -36285,PRF1,ENSG00000180644,protein_coding -20864,OR2A4,ENSG00000180658,protein_coding -40908,MAB21L1,ENSG00000180660,protein_coding -4561,YOD1,ENSG00000180667,protein_coding -28582,TMEM64,ENSG00000180694,protein_coding -11562,C3orf22,ENSG00000180697,protein_coding -3546,OR10K2,ENSG00000180708,protein_coding -32829,CHRM4,ENSG00000180720,protein_coding -40745,SHISA2,ENSG00000180730,protein_coding -55321,S1PR5,ENSG00000180739,protein_coding -37365,CLRN3,ENSG00000180745,protein_coding -316,GPR157,ENSG00000180758,protein_coding -11561,CHST13,ENSG00000180767,protein_coding -26230,AGTR2,ENSG00000180772,protein_coding -34293,SLC36A4,ENSG00000180773,protein_coding -40650,ZDHHC20,ENSG00000180776,protein_coding -52409,ANKRD30B,ENSG00000180777,protein_coding -32036,OR51E1,ENSG00000180785,protein_coding -49220,ZFP3,ENSG00000180787,protein_coding -14502,ARSJ,ENSG00000180801,protein_coding -38806,HOXC9,ENSG00000180806,protein_coding -24800,MAP3K15,ENSG00000180815,protein_coding -36276,PPA1,ENSG00000180817,protein_coding -38801,HOXC10,ENSG00000180818,protein_coding -18557,PSMG4,ENSG00000180822,protein_coding -28205,BHLHE22,ENSG00000180828,protein_coding -12502,MAP6D1,ENSG00000180834,protein_coding -55433,ZNF443,ENSG00000180855,protein_coding -9129,CXCR2,ENSG00000180871,protein_coding -19879,DEFB112,ENSG00000180872,protein_coding -5236,GREM2,ENSG00000180875,protein_coding -32157,C11orf42,ENSG00000180878,protein_coding -26870,SSR4,ENSG00000180879,protein_coding -39328,CAPS2,ENSG00000180881,protein_coding -56227,ZNF792,ENSG00000180884,protein_coding -51199,CUEDC1,ENSG00000180891,protein_coding -29374,SCRIB,ENSG00000180900,protein_coding -51667,KCTD2,ENSG00000180901,protein_coding -9645,D2HGDH,ENSG00000180902,protein_coding -9746,OXTR,ENSG00000180914,protein_coding -48484,CMTR2,ENSG00000180917,protein_coding -32150,OR56B4,ENSG00000180919,protein_coding -29370,FAM83H,ENSG00000180921,protein_coding -10602,GPR62,ENSG00000180929,protein_coding -32146,OR56A1,ENSG00000180934,protein_coding -29092,ZNF572,ENSG00000180938,protein_coding -45882,ST20,ENSG00000180953,protein_coding -58920,PITPNB,ENSG00000180957,protein_coding -26038,TCEAL8,ENSG00000180964,protein_coding -32137,OR52E4,ENSG00000180974,protein_coding -44932,LRRC57,ENSG00000180979,protein_coding -32133,OR52N2,ENSG00000180988,protein_coding -19787,MRPL14,ENSG00000180992,protein_coding -42812,GPR137C,ENSG00000180998,protein_coding -3938,C1orf105,ENSG00000180999,protein_coding -32131,OR52N1,ENSG00000181001,protein_coding -14611,BBS12,ENSG00000181004,protein_coding -56336,ZFP82,ENSG00000181007,protein_coding -32130,OR52N5,ENSG00000181009,protein_coding -51235,AC005666.1,ENSG00000181013,protein_coding -23614,LSMEM1,ENSG00000181016,protein_coding -48432,NQO1,ENSG00000181019,protein_coding -32127,OR56B1,ENSG00000181023,protein_coding -46093,AEN,ENSG00000181026,protein_coding -56874,FKRP,ENSG00000181027,protein_coding -55166,TRAPPC5,ENSG00000181029,protein_coding -48998,RPH3AL,ENSG00000181031,protein_coding -55811,SLC25A42,ENSG00000181035,protein_coding -3601,FCRL6,ENSG00000181036,protein_coding -51773,METTL23,ENSG00000181038,protein_coding -51885,SLC26A11,ENSG00000181045,protein_coding -10288,HIGD1A,ENSG00000181061,protein_coding -24002,CHRM2,ENSG00000181072,protein_coding -32128,OR52N4,ENSG00000181074,protein_coding -29368,MAPK15,ENSG00000181085,protein_coding -31774,EHMT1,ENSG00000181090,protein_coding -12602,ADIPOQ,ENSG00000181092,protein_coding -16654,F2R,ENSG00000181104,protein_coding -29364,ZNF707,ENSG00000181135,protein_coding -55230,MUC16,ENSG00000181143,protein_coding -18175,NPM1,ENSG00000181163,protein_coding -25555,PJA1,ENSG00000181191,protein_coding -35353,DHTKD1,ENSG00000181192,protein_coding -28091,PENK,ENSG00000181195,protein_coding -13013,C4orf50,ENSG00000181215,protein_coding -4977,HIST3H2A,ENSG00000181218,protein_coding -24356,ZNF746,ENSG00000181220,protein_coding -49338,POLR2A,ENSG00000181222,protein_coding -40372,TMEM132C,ENSG00000181234,protein_coding -55104,SLC25A41,ENSG00000181240,protein_coding -34842,TMEM136,ENSG00000181264,protein_coding -33102,OR5AK2,ENSG00000181273,protein_coding -36836,FRAT2,ENSG00000181274,protein_coding -49330,TMEM102,ENSG00000181284,protein_coding -50267,TMEM132E,ENSG00000181291,protein_coding -18986,ZNF322,ENSG00000181315,protein_coding -11800,NME9,ENSG00000181322,protein_coding -49326,SPEM1,ENSG00000181323,protein_coding -34330,HEPHL1,ENSG00000181333,protein_coding -49600,LRRC75A,ENSG00000181350,protein_coding -33071,OR5M8,ENSG00000181371,protein_coding -50263,CCL13,ENSG00000181374,protein_coding -9178,CFAP65,ENSG00000181378,protein_coding -15187,DDX60L,ENSG00000181381,protein_coding -56312,SYNE4,ENSG00000181392,protein_coding -52013,OGFOD3,ENSG00000181396,protein_coding -29464,WASHC1,ENSG00000181404,protein_coding -52011,UTS2R,ENSG00000181408,protein_coding -51914,AATK,ENSG00000181409,protein_coding -38563,DDN,ENSG00000181418,protein_coding -26570,SAGE1,ENSG00000181433,protein_coding -24359,ZNF467,ENSG00000181444,protein_coding -12456,SOX2,ENSG00000181449,protein_coding -4937,ZNF678,ENSG00000181450,protein_coding -11147,TMEM45A,ENSG00000181458,protein_coding -12056,RAP2B,ENSG00000181467,protein_coding -21215,ZBTB2,ENSG00000181472,protein_coding -50148,RNF135,ENSG00000181481,protein_coding -34913,OR6T1,ENSG00000181499,protein_coding -50837,ACBD4,ENSG00000181513,protein_coding -34911,OR8D4,ENSG00000181518,protein_coding -51884,SGSH,ENSG00000181523,protein_coding -14938,MAB21L2,ENSG00000181541,protein_coding -24726,FANCB,ENSG00000181544,protein_coding -42075,EDDM3B,ENSG00000181552,protein_coding -10404,SETD2,ENSG00000181555,protein_coding -42074,EDDM3A,ENSG00000181562,protein_coding -19783,C6orf223,ENSG00000181577,protein_coding -10388,TMIE,ENSG00000181585,protein_coding -54860,MEX3D,ENSG00000181588,protein_coding -32109,OR52D1,ENSG00000181609,protein_coding -51198,MRPS23,ENSG00000181610,protein_coding -32114,OR52H1,ENSG00000181616,protein_coding -13882,FDCSP,ENSG00000181617,protein_coding -42940,GPR135,ENSG00000181619,protein_coding -47441,SLX1B,ENSG00000181625,protein_coding -52317,ANKRD62,ENSG00000181626,protein_coding -12029,P2RY13,ENSG00000181631,protein_coding -31187,TNFSF15,ENSG00000181634,protein_coding -29335,ZFP41,ENSG00000181638,protein_coding -31954,PHLDA2,ENSG00000181649,protein_coding -24407,ATG9B,ENSG00000181652,protein_coding -2435,GPR88,ENSG00000181656,protein_coding -56372,ZNF875,ENSG00000181666,protein_coding -28085,PLAG1,ENSG00000181690,protein_coding -33053,OR8H1,ENSG00000181693,protein_coding -33052,OR5T1,ENSG00000181698,protein_coding -25547,YIPF6,ENSG00000181704,protein_coding -33050,OR5T2,ENSG00000181718,protein_coding -11345,ZBTB20,ENSG00000181722,protein_coding -55223,OR2Z1,ENSG00000181733,protein_coding -11912,DIPK2A,ENSG00000181744,protein_coding -17036,C5orf30,ENSG00000181751,protein_coding -33044,OR8K5,ENSG00000181752,protein_coding -2549,AMIGO1,ENSG00000181754,protein_coding -33040,OR8H3,ENSG00000181761,protein_coding -33038,OR8H2,ENSG00000181767,protein_coding -915,GPR3,ENSG00000181773,protein_coding -30416,TMEM252,ENSG00000181778,protein_coding -54780,ODF3L2,ENSG00000181781,protein_coding -33032,OR5AS1,ENSG00000181785,protein_coding -55222,ACTL9,ENSG00000181786,protein_coding -12014,SIAH2,ENSG00000181788,protein_coding -11638,COPG1,ENSG00000181789,protein_coding -29301,ADGRB1,ENSG00000181790,protein_coding -42066,OR6S1,ENSG00000181803,protein_coding -11904,SLC9A9,ENSG00000181804,protein_coding -1182,LSM10,ENSG00000181817,protein_coding -13393,RELL1,ENSG00000181826,protein_coding -45252,RFX7,ENSG00000181827,protein_coding -32810,SLC35C1,ENSG00000181830,protein_coding -11344,TIGIT,ENSG00000181847,protein_coding -38935,RNF41,ENSG00000181852,protein_coding -49308,SLC2A4,ENSG00000181856,protein_coding -17237,FTMT,ENSG00000181867,protein_coding -4962,IBA57,ENSG00000181873,protein_coding -49306,CLDN7,ENSG00000181885,protein_coding -57686,ZNF329,ENSG00000181894,protein_coding -55840,ZNF101,ENSG00000181896,protein_coding -33004,OR4C6,ENSG00000181903,protein_coding -17463,C5orf24,ENSG00000181904,protein_coding -36159,ADO,ENSG00000181915,protein_coding -33910,COA4,ENSG00000181924,protein_coding -33002,OR4P4,ENSG00000181927,protein_coding -38566,PRKAG1,ENSG00000181929,protein_coding -48172,GINS3,ENSG00000181938,protein_coding -32999,OR4C15,ENSG00000181939,protein_coding -32989,OR4A15,ENSG00000181958,protein_coding -32988,OR4A16,ENSG00000181961,protein_coding -32027,OR52K2,ENSG00000181963,protein_coding -17488,NEUROG1,ENSG00000181965,protein_coding -13290,CCDC149,ENSG00000181982,protein_coding -46085,MRPS11,ENSG00000181991,protein_coding -4455,SNRPE,ENSG00000182004,protein_coding -36150,RTKN2,ENSG00000182010,protein_coding -56854,PNMA8A,ENSG00000182013,protein_coding -37306,CHST15,ENSG00000182022,protein_coding -54130,ADIG,ENSG00000182035,protein_coding -51659,USH1G,ENSG00000182040,protein_coding -39450,MGAT4C,ENSG00000182050,protein_coding -32922,TRIM49B,ENSG00000182053,protein_coding -46150,IDH2,ENSG00000182054,protein_coding -32077,OR52A1,ENSG00000182070,protein_coding -9588,OR6B2,ENSG00000182083,protein_coding -54820,TMEM259,ENSG00000182087,protein_coding -60288,WRB,ENSG00000182093,protein_coding -21659,TNRC18,ENSG00000182095,protein_coding -34087,FAM181B,ENSG00000182103,protein_coding -42979,TMEM30B,ENSG00000182107,protein_coding -46926,DEXI,ENSG00000182108,protein_coding -22599,ZNF716,ENSG00000182111,protein_coding -44714,NOP10,ENSG00000182117,protein_coding -5058,FAM89A,ENSG00000182118,protein_coding -18156,KCNIP1,ENSG00000182132,protein_coding -3203,TDRKH,ENSG00000182134,protein_coding -55921,ZNF708,ENSG00000182141,protein_coding -48514,IST1,ENSG00000182149,protein_coding -30843,ERCC6L2,ENSG00000182150,protein_coding -31768,MRPL41,ENSG00000182154,protein_coding -51863,ENPP7,ENSG00000182156,protein_coding -24013,CREB3L2,ENSG00000182158,protein_coding -24581,P2RY8,ENSG00000182162,protein_coding -14243,UNC5C,ENSG00000182168,protein_coding -31965,MRGPRG,ENSG00000182170,protein_coding -51699,TSEN54,ENSG00000182173,protein_coding -46243,RGMA,ENSG00000182175,protein_coding -9514,ASB18,ENSG00000182177,protein_coding -10525,UBA7,ENSG00000182179,protein_coding -36350,MRPS16,ENSG00000182180,protein_coding -1659,SHISAL2A,ENSG00000182183,protein_coding -43113,RAD51B,ENSG00000182185,protein_coding -8997,CRYGB,ENSG00000182187,protein_coding -26645,LDOC1,ENSG00000182195,protein_coding -40227,ARL6IP4,ENSG00000182196,protein_coding -28977,EXT1,ENSG00000182197,protein_coding -38993,SHMT2,ENSG00000182199,protein_coding -31883,MOB2,ENSG00000182208,protein_coding -43710,HHIPL1,ENSG00000182218,protein_coding -25034,ATP6AP2,ENSG00000182220,protein_coding -13566,ZAR1,ENSG00000182223,protein_coding -49367,CYB5D1,ENSG00000182224,protein_coding -18269,FAM153B,ENSG00000182230,protein_coding -60312,BACE2,ENSG00000182240,protein_coding -9988,UBE2E2,ENSG00000182247,protein_coding -46344,SYNM,ENSG00000182253,protein_coding -32591,KCNA4,ENSG00000182255,protein_coding -44521,GABRG3,ENSG00000182256,protein_coding -32227,NLRP10,ENSG00000182261,protein_coding -8281,FIGN,ENSG00000182263,protein_coding -56964,IZUMO1,ENSG00000182264,protein_coding -50108,TMIGD1,ENSG00000182271,protein_coding -31821,B4GALNT4,ENSG00000182272,protein_coding -24747,AP1S2,ENSG00000182287,protein_coding -29462,C8orf33,ENSG00000182307,protein_coding -13493,DCAF4L1,ENSG00000182308,protein_coding -57207,SPACA6,ENSG00000182310,protein_coding -55139,MBD3L3,ENSG00000182315,protein_coding -57702,ZSCAN22,ENSG00000182318,protein_coding -56944,KCNJ14,ENSG00000182324,protein_coding -29414,FBXL6,ENSG00000182325,protein_coding -37697,C1S,ENSG00000182326,protein_coding -49192,GLTPD2,ENSG00000182327,protein_coding -8854,KIAA2012,ENSG00000182329,protein_coding -473,PRAMEF8,ENSG00000182330,protein_coding -36649,LIPF,ENSG00000182333,protein_coding -32222,OR5P3,ENSG00000182334,protein_coding -41775,DAOA,ENSG00000182346,protein_coding -23133,ZNF804B,ENSG00000182348,protein_coding -34487,KBTBD3,ENSG00000182359,protein_coding -60509,YBEY,ENSG00000182362,protein_coding -27014,CLN8,ENSG00000182372,protein_coding -24560,PLCXD1,ENSG00000182378,protein_coding -38991,NXPH4,ENSG00000182379,protein_coding -8132,CACNB4,ENSG00000182389,protein_coding -56464,IFNL1,ENSG00000182393,protein_coding -42622,TRAPPC6B,ENSG00000182400,protein_coding -44708,PGBD4,ENSG00000182405,protein_coding -57986,CDY2A,ENSG00000182415,protein_coding -51946,NPLOC4,ENSG00000182446,protein_coding -12201,OTOL1,ENSG00000182447,protein_coding -33462,KCNK4,ENSG00000182450,protein_coding -52009,TEX19,ENSG00000182459,protein_coding -54460,TSHZ2,ENSG00000182463,protein_coding -56429,CAPN12,ENSG00000182472,protein_coding -51732,EXOC7,ENSG00000182473,protein_coding -51528,KPNA2,ENSG00000182481,protein_coding -25949,XKRX,ENSG00000182489,protein_coding -26854,BGN,ENSG00000182492,protein_coding -11176,CEP97,ENSG00000182504,protein_coding -26175,LHFPL1,ENSG00000182508,protein_coding -46188,FES,ENSG00000182511,protein_coding -43638,GLRX5,ENSG00000182512,protein_coding -25437,FAM104B,ENSG00000182518,protein_coding -42868,TBPL2,ENSG00000182521,protein_coding -9745,CAV3,ENSG00000182533,protein_coding -51768,MXRA7,ENSG00000182534,protein_coding -59045,LIMK2,ENSG00000182541,protein_coding -38761,MFSD5,ENSG00000182544,protein_coding -42057,RNASE10,ENSG00000182545,protein_coding -5525,ADI1,ENSG00000182551,protein_coding -15355,RWDD4,ENSG00000182552,protein_coding -49168,SPNS3,ENSG00000182557,protein_coding -55168,CLEC4G,ENSG00000182566,protein_coding -9954,SATB1,ENSG00000182568,protein_coding -51038,NXPH3,ENSG00000182575,protein_coding -17885,CSF1R,ENSG00000182578,protein_coding -12543,EPHB3,ENSG00000182580,protein_coding -24633,VCX,ENSG00000182583,protein_coding -53959,ACTL10,ENSG00000182584,protein_coding -13949,EPGN,ENSG00000182585,protein_coding -60090,KRTAP11-1,ENSG00000182591,protein_coding -9447,SNORC,ENSG00000182600,protein_coding -47326,HS3ST4,ENSG00000182601,protein_coding -10260,TRAK1,ENSG00000182606,protein_coding -51948,TSPAN10,ENSG00000182612,protein_coding -18460,OR2V2,ENSG00000182613,protein_coding -53504,PLCB1,ENSG00000182621,protein_coding -51254,SKA2,ENSG00000182628,protein_coding -16013,RXFP3,ENSG00000182631,protein_coding -34920,OR10G7,ENSG00000182634,protein_coding -44388,NDN,ENSG00000182636,protein_coding -37166,CCDC172,ENSG00000182645,protein_coding -42005,AL512310.1,ENSG00000182652,protein_coding -35099,NTM,ENSG00000182667,protein_coding -60237,TTC3,ENSG00000182670,protein_coding -28322,KCNB2,ENSG00000182674,protein_coding -51962,PPP1R27,ENSG00000182676,protein_coding -46639,BRICD5,ENSG00000182685,protein_coding -51731,GALR2,ENSG00000182687,protein_coding -9203,RESP18,ENSG00000182698,protein_coding -17606,IGIP,ENSG00000182700,protein_coding -34015,TSKU,ENSG00000182704,protein_coding -26949,CMC4,ENSG00000182712,protein_coding -45356,ANXA2,ENSG00000182718,protein_coding -43204,RGS6,ENSG00000182732,protein_coding -50973,HOXB4,ENSG00000182742,protein_coding -20984,SLC35D3,ENSG00000182747,protein_coding -835,PAQR7,ENSG00000182749,protein_coding -31198,PAPPA,ENSG00000182752,protein_coding -29347,MAFA,ENSG00000182759,protein_coding -46160,NGRN,ENSG00000182768,protein_coding -36579,GRID1,ENSG00000182771,protein_coding -45952,RPS17,ENSG00000182774,protein_coding -40213,HCAR2,ENSG00000182782,protein_coding -5448,OR2T29,ENSG00000182783,protein_coding -33638,CCDC87,ENSG00000182791,protein_coding -19917,GSTA5,ENSG00000182793,protein_coding -4559,C1orf116,ENSG00000182795,protein_coding -24752,MAGEB17,ENSG00000182798,protein_coding -44010,CRIP2,ENSG00000182809,protein_coding -48356,DDX28,ENSG00000182810,protein_coding -60054,KRTAP13-2,ENSG00000182816,protein_coding -4897,ACBD3,ENSG00000182827,protein_coding -46884,C16orf72,ENSG00000182831,protein_coding -16139,PLCXD3,ENSG00000182836,protein_coding -49190,VMO1,ENSG00000182853,protein_coding -46421,OR4F15,ENSG00000182854,protein_coding -59592,ALG12,ENSG00000182858,protein_coding -1067,LCK,ENSG00000182866,protein_coding -40478,GALNT9,ENSG00000182870,protein_coding -60477,COL18A1,ENSG00000182871,protein_coding -25142,RBM10,ENSG00000182872,protein_coding -48147,ADGRG3,ENSG00000182885,protein_coding -26344,GLUD2,ENSG00000182890,protein_coding -49319,TMEM95,ENSG00000182896,protein_coding -3222,TCHHL1,ENSG00000182898,protein_coding -12838,RPL35A,ENSG00000182899,protein_coding -5247,RGS7,ENSG00000182901,protein_coding -58369,SLC25A18,ENSG00000182902,protein_coding -12872,ZNF721,ENSG00000182903,protein_coding -26042,TCEAL7,ENSG00000182916,protein_coding -34323,C11orf54,ENSG00000182919,protein_coding -11753,CEP63,ENSG00000182923,protein_coding -54283,WFDC10B,ENSG00000182931,protein_coding -35017,SRPRA,ENSG00000182934,protein_coding -51661,OTOP3,ENSG00000182938,protein_coding -58952,EWSR1,ENSG00000182944,protein_coding -45768,ODF3L1,ENSG00000182950,protein_coding -18981,HMGN4,ENSG00000182952,protein_coding -40695,SPATA13,ENSG00000182957,protein_coding -50820,GJC1,ENSG00000182963,protein_coding -41865,SOX1,ENSG00000182968,protein_coding -10099,CNOT10,ENSG00000182973,protein_coding -44307,AC135068.1,ENSG00000182974,protein_coding -44008,MTA1,ENSG00000182979,protein_coding -10296,ZNF662,ENSG00000182983,protein_coding -34670,CADM1,ENSG00000182985,protein_coding -57273,ZNF320,ENSG00000182986,protein_coding -37999,C12orf60,ENSG00000182993,protein_coding -51978,PYCR1,ENSG00000183010,protein_coding -49366,NAA38,ENSG00000183011,protein_coding -49171,AC118754.1,ENSG00000183018,protein_coding -55164,MCEMP1,ENSG00000183019,protein_coding -31867,AP2A2,ENSG00000183020,protein_coding -6143,SLC8A1,ENSG00000183023,protein_coding -49115,OR1G1,ENSG00000183024,protein_coding -42589,SLC25A21,ENSG00000183032,protein_coding -51660,OTOP2,ENSG00000183034,protein_coding -25814,CYLC1,ENSG00000183035,protein_coding -60301,PCP4,ENSG00000183036,protein_coding -46869,ABAT,ENSG00000183044,protein_coding -51959,SLC25A10,ENSG00000183048,protein_coding -35361,CAMK1D,ENSG00000183049,protein_coding -7495,RGPD6,ENSG00000183054,protein_coding -46362,LYSMD4,ENSG00000183060,protein_coding -59407,WBP2NL,ENSG00000183066,protein_coding -60300,IGSF5,ENSG00000183067,protein_coding -18215,NKX2-5,ENSG00000183072,protein_coding -51823,AFMID,ENSG00000183077,protein_coding -41912,GAS6,ENSG00000183087,protein_coding -14841,FREM3,ENSG00000183090,protein_coding -8128,NEB,ENSG00000183091,protein_coding -43741,BEGAIN,ENSG00000183092,protein_coding -41610,GPC6,ENSG00000183098,protein_coding -17870,ARHGEF37,ENSG00000183111,protein_coding -670,FAM43B,ENSG00000183114,protein_coding -27039,CSMD1,ENSG00000183117,protein_coding -37015,CALHM3,ENSG00000183128,protein_coding -33272,PTGDR2,ENSG00000183134,protein_coding -20574,CEP57L1,ENSG00000183137,protein_coding -60234,RIPPLY3,ENSG00000183145,protein_coding -30156,ANKRD20A2,ENSG00000183148,protein_coding -37947,GPR19,ENSG00000183150,protein_coding -50522,GJD3,ENSG00000183153,protein_coding -4420,RABIF,ENSG00000183155,protein_coding -39842,TMEM119,ENSG00000183160,protein_coding -32516,FANCF,ENSG00000183161,protein_coding -22849,CALN1,ENSG00000183166,protein_coding -59413,SMDT1,ENSG00000183172,protein_coding -11088,GABRR3,ENSG00000183185,protein_coding -54777,C2CD4C,ENSG00000183186,protein_coding -48602,CHST6,ENSG00000183196,protein_coding -52402,POTEC,ENSG00000183206,protein_coding -56982,RUVBL2,ENSG00000183207,protein_coding -46156,GDPGP1,ENSG00000183208,protein_coding -36189,CTNNA3,ENSG00000183230,protein_coding -58580,RIMBP3C,ENSG00000183246,protein_coding -55175,PRR36,ENSG00000183248,protein_coding -32094,OR51B4,ENSG00000183251,protein_coding -60453,PTTG1IP,ENSG00000183255,protein_coding -18329,DDX41,ENSG00000183258,protein_coding -54719,ABHD16B,ENSG00000183260,protein_coding -32135,OR52E8,ENSG00000183269,protein_coding -40090,CCDC60,ENSG00000183273,protein_coding -6945,PLGLB1,ENSG00000183281,protein_coding -38661,DAZAP2,ENSG00000183283,protein_coding -52993,CCBE1,ENSG00000183287,protein_coding -2194,SELENOF,ENSG00000183291,protein_coding -32221,OR5P2,ENSG00000183303,protein_coding -24643,FAM9A,ENSG00000183304,protein_coding -26820,MAGEA2B,ENSG00000183305,protein_coding -58354,TMEM121B,ENSG00000183307,protein_coding -29363,ZNF623,ENSG00000183309,protein_coding -5449,OR2T34,ENSG00000183310,protein_coding -32142,OR52L1,ENSG00000183313,protein_coding -1213,EPHA10,ENSG00000183317,protein_coding -49422,SPDYE4,ENSG00000183318,protein_coding -16493,CCDC125,ENSG00000183323,protein_coding -45662,REC114,ENSG00000183324,protein_coding -47438,BOLA2,ENSG00000183336,protein_coding -25028,BCOR,ENSG00000183337,protein_coding -34380,JRKL,ENSG00000183340,protein_coding -36146,CABCOCO1,ENSG00000183346,protein_coding -2234,GBP6,ENSG00000183347,protein_coding -29570,KIAA2026,ENSG00000183354,protein_coding -32211,OVCH2,ENSG00000183378,protein_coding -43276,SYNDIG1L,ENSG00000183379,protein_coding -1230,FHL3,ENSG00000183386,protein_coding -32144,OR56A4,ENSG00000183389,protein_coding -39732,PMCH,ENSG00000183395,protein_coding -10459,TMEM89,ENSG00000183396,protein_coding -54967,C19orf71,ENSG00000183397,protein_coding -55362,CCDC159,ENSG00000183401,protein_coding -60327,RIPK4,ENSG00000183421,protein_coding -14430,LRIT3,ENSG00000183423,protein_coding -47030,NPIPA1,ENSG00000183426,protein_coding -1228,SF3A3,ENSG00000183431,protein_coding -26481,TFDP3,ENSG00000183434,protein_coding -15148,TRIM61,ENSG00000183439,protein_coding -46896,GRIN2A,ENSG00000183454,protein_coding -40792,URAD,ENSG00000183463,protein_coding -16506,GTF2H2C,ENSG00000183474,protein_coding -46381,ASB7,ENSG00000183475,protein_coding -45824,SH2D7,ENSG00000183476,protein_coding -26851,TREX2,ENSG00000183479,protein_coding -43991,GPR132,ENSG00000183484,protein_coding -60316,MX2,ENSG00000183486,protein_coding -40466,EP400,ENSG00000183495,protein_coding -45932,MEX3B,ENSG00000183496,protein_coding -2778,TENT5C,ENSG00000183508,protein_coding -7258,COA5,ENSG00000183513,protein_coding -1231,UTP11,ENSG00000183520,protein_coding -60277,PSMG1,ENSG00000183527,protein_coding -59065,PRR14L,ENSG00000183530,protein_coding -37870,KLRC4,ENSG00000183542,protein_coding -47181,ACSM5,ENSG00000183549,protein_coding -37279,C10orf120,ENSG00000183559,protein_coding -34335,IZUMO1R,ENSG00000183560,protein_coding -59431,SERHL2,ENSG00000183569,protein_coding -60485,PCBP3,ENSG00000183570,protein_coding -46342,PGPEP1L,ENSG00000183571,protein_coding -43703,SETD3,ENSG00000183576,protein_coding -45161,TNFAIP8L3,ENSG00000183578,protein_coding -58941,ZNRF3,ENSG00000183579,protein_coding -15779,FBXL7,ENSG00000183580,protein_coding -58481,TANGO2,ENSG00000183597,protein_coding -3088,HIST2H3D,ENSG00000183598,protein_coding -37218,SFXN4,ENSG00000183605,protein_coding -6599,GKN2,ENSG00000183607,protein_coding -1066,FAM167B,ENSG00000183615,protein_coding -54983,MRPL54,ENSG00000183617,protein_coding -35683,ZNF438,ENSG00000183621,protein_coding -11641,HMCES,ENSG00000183624,protein_coding -10370,CCR3,ENSG00000183625,protein_coding -58422,DGCR6,ENSG00000183628,protein_coding -44554,GOLGA8G,ENSG00000183629,protein_coding -26395,PRR32,ENSG00000183631,protein_coding -47684,TP53TG3,ENSG00000183632,protein_coding -27235,RP1L1,ENSG00000183638,protein_coding -60088,KRTAP8-1,ENSG00000183640,protein_coding -34572,C11orf88,ENSG00000183644,protein_coding -57650,ZNF530,ENSG00000183647,protein_coding -43559,NDUFB1,ENSG00000183648,protein_coding -15783,MARCH11,ENSG00000183654,protein_coding -46060,KLHL25,ENSG00000183655,protein_coding -10842,TAFA1,ENSG00000183662,protein_coding -29077,TRMT12,ENSG00000183665,protein_coding -56673,PSG9,ENSG00000183668,protein_coding -8940,GPR1,ENSG00000183671,protein_coding -1264,BMP8A,ENSG00000183682,protein_coding -51970,ALYREF,ENSG00000183684,protein_coding -49005,RFLNB,ENSG00000183688,protein_coding -25087,EFHC2,ENSG00000183690,protein_coding -51166,NOG,ENSG00000183691,protein_coding -32473,MRGPRX2,ENSG00000183695,protein_coding -22406,UPP1,ENSG00000183696,protein_coding -44331,OR4N4,ENSG00000183706,protein_coding -56463,IFNL2,ENSG00000183709,protein_coding -35109,OPCML,ENSG00000183715,protein_coding -18474,TRIM52,ENSG00000183718,protein_coding -40963,LHFPL6,ENSG00000183722,protein_coding -48266,CMTM4,ENSG00000183723,protein_coding -820,TMEM50A,ENSG00000183726,protein_coding -28026,NPBWR1,ENSG00000183729,protein_coding -6644,FIGLA,ENSG00000183733,protein_coding -31930,ASCL2,ENSG00000183734,protein_coding -39137,TBK1,ENSG00000183735,protein_coding -59292,CBX6,ENSG00000183741,protein_coding -21864,MACC1,ENSG00000183742,protein_coding -47183,ACSM2A,ENSG00000183747,protein_coding -46610,TBL3,ENSG00000183751,protein_coding -58126,BPY2,ENSG00000183753,protein_coding -56453,ACP7,ENSG00000183760,protein_coding -58945,KREMEN1,ENSG00000183762,protein_coding -10527,TRAIP,ENSG00000183763,protein_coding -58934,CHEK2,ENSG00000183765,protein_coding -11813,FOXL2,ENSG00000183770,protein_coding -58534,AIFM3,ENSG00000183773,protein_coding -17778,KCTD16,ENSG00000183775,protein_coding -60296,B3GALT5,ENSG00000183778,protein_coding -27770,ZNF703,ENSG00000183779,protein_coding -5107,SLC35F3,ENSG00000183780,protein_coding -13520,KCTD8,ENSG00000183783,protein_coding -29475,C9orf66,ENSG00000183784,protein_coding -58389,TUBA8,ENSG00000183785,protein_coding -52777,ELOA3,ENSG00000183791,protein_coding -47049,NPIPA5,ENSG00000183793,protein_coding -58189,BPY2B,ENSG00000183795,protein_coding -54170,EMILIN3,ENSG00000183798,protein_coding -32207,OLFML1,ENSG00000183801,protein_coding -20705,FAM162B,ENSG00000183807,protein_coding -28627,RBM12B,ENSG00000183808,protein_coding -10104,CCR4,ENSG00000183813,protein_coding -4901,LIN9,ENSG00000183814,protein_coding -19620,BTBD9,ENSG00000183826,protein_coding -44000,NUDT14,ENSG00000183828,protein_coding -3958,ANKRD45,ENSG00000183831,protein_coding -11404,MAATS1,ENSG00000183833,protein_coding -26836,PNMA3,ENSG00000183837,protein_coding -7929,GPR39,ENSG00000183840,protein_coding -60315,FAM3B,ENSG00000183844,protein_coding -56008,ZNF730,ENSG00000183850,protein_coding -3533,KIRREL1,ENSG00000183853,protein_coding -3479,IQGAP3,ENSG00000183856,protein_coding -26798,CNGA2,ENSG00000183862,protein_coding -59378,TOB2,ENSG00000183864,protein_coding -10193,SCN5A,ENSG00000183873,protein_coding -17892,ARSI,ENSG00000183876,protein_coding -57935,UTY,ENSG00000183878,protein_coding -537,SRARP,ENSG00000183888,protein_coding -47081,AC138969.1,ENSG00000183889,protein_coding -5758,TTC32,ENSG00000183891,protein_coding -33108,LRRC55,ENSG00000183908,protein_coding -49362,DNAH2,ENSG00000183914,protein_coding -26377,SH2D1A,ENSG00000183918,protein_coding -47243,SDR42E2,ENSG00000183921,protein_coding -24608,PRKX,ENSG00000183943,protein_coding -40242,KMT5A,ENSG00000183955,protein_coding -9959,KCNH8,ENSG00000183960,protein_coding -59035,SMTN,ENSG00000183963,protein_coding -46617,NPW,ENSG00000183971,protein_coding -9965,PP2D1,ENSG00000183977,protein_coding -50697,COA3,ENSG00000183978,protein_coding -51972,NPB,ENSG00000183979,protein_coding -2047,ST6GALNAC3,ENSG00000184005,protein_coding -1048,PTP4A2,ENSG00000184007,protein_coding -51942,ACTG1,ENSG00000184009,protein_coding -60319,TMPRSS2,ENSG00000184012,protein_coding -32262,DENND5A,ENSG00000184014,protein_coding -5450,OR2T10,ENSG00000184022,protein_coding -60079,KRTAP20-2,ENSG00000184032,protein_coding -26924,CTAG1B,ENSG00000184033,protein_coding -40199,DIABLO,ENSG00000184047,protein_coding -46199,VPS33B,ENSG00000184056,protein_coding -58472,TBX1,ENSG00000184058,protein_coding -50143,ADAP2,ENSG00000184060,protein_coding -58977,UQCR10,ENSG00000184076,protein_coding -25413,FAM120C,ENSG00000184083,protein_coding -15456,TRIML1,ENSG00000184108,protein_coding -47391,EIF3C,ENSG00000184110,protein_coding -58465,CLDN5,ENSG00000184113,protein_coding -58968,NIPSNAP1,ENSG00000184117,protein_coding -46420,OR4F6,ENSG00000184140,protein_coding -4490,CNTN2,ENSG00000184144,protein_coding -3264,SPRR4,ENSG00000184148,protein_coding -33842,LRTOMT,ENSG00000184154,protein_coding -3591,OR10J5,ENSG00000184155,protein_coding -29183,KCNQ3,ENSG00000184156,protein_coding -12973,ADRA2C,ENSG00000184160,protein_coding -55817,NR2C2AP,ENSG00000184162,protein_coding -88,C1QTNF12,ENSG00000184163,protein_coding -59594,CRELD2,ENSG00000184164,protein_coding -49113,OR1D2,ENSG00000184166,protein_coding -13619,SCFD2,ENSG00000184178,protein_coding -9543,UBE2F,ENSG00000184182,protein_coding -49875,KCNJ12,ENSG00000184185,protein_coding -25380,GPR173,ENSG00000184194,protein_coding -12735,PPP1R2,ENSG00000184203,protein_coding -25382,TSPYL2,ENSG00000184205,protein_coding -46000,GOLGA6L4,ENSG00000184206,protein_coding -46641,PGP,ENSG00000184207,protein_coding -59391,C22orf46,ENSG00000184208,protein_coding -40245,SNRNP35,ENSG00000184209,protein_coding -25575,DGAT2L6,ENSG00000184210,protein_coding -26885,IRAK1,ENSG00000184216,protein_coding -11136,CMSS1,ENSG00000184220,protein_coding -60140,OLIG1,ENSG00000184221,protein_coding -41351,PCDH9,ENSG00000184226,protein_coding -43235,ACOT1,ENSG00000184227,protein_coding -34839,OAF,ENSG00000184232,protein_coding -46391,ALDH1A3,ENSG00000184254,protein_coding -26641,CDR1,ENSG00000184258,protein_coding -3099,HIST2H2AC,ENSG00000184260,protein_coding -6271,KCNK12,ENSG00000184261,protein_coding -3100,HIST2H2AB,ENSG00000184270,protein_coding -38659,AC139768.1,ENSG00000184271,protein_coding -33828,DEFB108B,ENSG00000184276,protein_coding -46413,TM2D3,ENSG00000184277,protein_coding -31938,TSSC4,ENSG00000184281,protein_coding -1791,TACSTD2,ENSG00000184292,protein_coding -37840,CLECL1,ENSG00000184293,protein_coding -42962,SIX6,ENSG00000184302,protein_coding -42451,PRKD1,ENSG00000184304,protein_coding -14211,CCSER1,ENSG00000184305,protein_coding -11333,ZDHHC23,ENSG00000184307,protein_coding -1734,MROH7,ENSG00000184313,protein_coding -3290,S100A7A,ENSG00000184330,protein_coding -26868,SRPK3,ENSG00000184343,protein_coding -37723,GDF3,ENSG00000184344,protein_coding -10593,IQCF2,ENSG00000184345,protein_coding -18135,SLIT3,ENSG00000184347,protein_coding -17057,EFNA5,ENSG00000184349,protein_coding -31967,MRGPRE,ENSG00000184350,protein_coding -60061,KRTAP19-1,ENSG00000184351,protein_coding -19041,HIST1H1B,ENSG00000184357,protein_coding -50848,SPATA32,ENSG00000184361,protein_coding -31822,PKP3,ENSG00000184363,protein_coding -24806,MAP7D2,ENSG00000184368,protein_coding -2572,CSF1,ENSG00000184371,protein_coding -28985,COLEC10,ENSG00000184374,protein_coding -12269,ACTRT3,ENSG00000184378,protein_coding -59255,PLA2G6,ENSG00000184381,protein_coding -34376,MAML2,ENSG00000184384,protein_coding -25666,PABPC1L2B,ENSG00000184388,protein_coding -1109,A3GALT2,ENSG00000184389,protein_coding -42026,OR4N5,ENSG00000184394,protein_coding -54619,SS18L1,ENSG00000184402,protein_coding -23708,KCND2,ENSG00000184408,protein_coding -29341,TOP1MT,ENSG00000184428,protein_coding -11825,COPB2,ENSG00000184432,protein_coding -29823,LRRC19,ENSG00000184434,protein_coding -58538,THAP7,ENSG00000184436,protein_coding -40209,KNTC1,ENSG00000184445,protein_coding -50684,CCR10,ENSG00000184451,protein_coding -795,NCMAP,ENSG00000184454,protein_coding -59103,BPIFC,ENSG00000184459,protein_coding -21509,WDR27,ENSG00000184465,protein_coding -58476,TXNRD2,ENSG00000184470,protein_coding -46528,C1QTNF8,ENSG00000184471,protein_coding -32140,OR56A3,ENSG00000184478,protein_coding -25602,FOXO4,ENSG00000184481,protein_coding -20442,POU3F2,ENSG00000184486,protein_coding -29284,PTP4A3,ENSG00000184489,protein_coding -7578,FOXD4L1,ENSG00000184492,protein_coding -41910,TMEM255B,ENSG00000184497,protein_coding -11053,PROS1,ENSG00000184500,protein_coding -50625,GAST,ENSG00000184502,protein_coding -44715,NUTM1,ENSG00000184507,protein_coding -46191,HDDC3,ENSG00000184508,protein_coding -25994,BEX5,ENSG00000184515,protein_coding -48586,ZFP1,ENSG00000184517,protein_coding -31853,CEND1,ENSG00000184524,protein_coding -20822,C6orf58,ENSG00000184530,protein_coding -49443,DHRS7C,ENSG00000184544,protein_coding -31884,DUSP8,ENSG00000184545,protein_coding -51832,SOCS3,ENSG00000184557,protein_coding -49327,SPEM2,ENSG00000184560,protein_coding -41536,SLITRK6,ENSG00000184564,protein_coding -58832,PIWIL3,ENSG00000184571,protein_coding -37657,LPAR5,ENSG00000184574,protein_coding -39134,XPOT,ENSG00000184575,protein_coding -17585,TMEM173,ENSG00000184584,protein_coding -1914,PDE4B,ENSG00000184588,protein_coding -2667,TAFA3,ENSG00000184599,protein_coding -43966,C14orf180,ENSG00000184601,protein_coding -46951,SNN,ENSG00000184602,protein_coding -8276,KCNH7,ENSG00000184611,protein_coding -38427,NELL2,ENSG00000184613,protein_coding -49411,KRBA2,ENSG00000184619,protein_coding -25606,MED12,ENSG00000184634,protein_coding -55858,ZNF93,ENSG00000184635,protein_coding -51795,SEPT9,ENSG00000184640,protein_coding -27234,PRSS55,ENSG00000184647,protein_coding -49410,ODF4,ENSG00000184650,protein_coding -30356,FOXD4L4,ENSG00000184659,protein_coding -27560,CDCA2,ENSG00000184661,protein_coding -28496,RALYL,ENSG00000184672,protein_coding -25496,AMER1,ENSG00000184675,protein_coding -727,ZBTB40,ENSG00000184677,protein_coding -3098,HIST2H2BE,ENSG00000184678,protein_coding -46717,CLDN6,ENSG00000184697,protein_coding -32102,OR51M1,ENSG00000184698,protein_coding -58469,SEPT5,ENSG00000184702,protein_coding -59055,EIF4ENIF1,ENSG00000184708,protein_coding -31739,LRRC26,ENSG00000184709,protein_coding -44988,SERINC4,ENSG00000184716,protein_coding -36645,RNLS,ENSG00000184719,protein_coding -60076,KRTAP6-1,ENSG00000184724,protein_coding -47378,APOBR,ENSG00000184730,protein_coding -5471,FAM110C,ENSG00000184731,protein_coding -24838,DDX53,ENSG00000184735,protein_coding -33425,ATL3,ENSG00000184743,protein_coding -39585,NDUFA12,ENSG00000184752,protein_coding -26525,SMIM10,ENSG00000184785,protein_coding -21514,TCTE3,ENSG00000184786,protein_coding -60450,UBE2G2,ENSG00000184787,protein_coding -25823,SATL1,ENSG00000184788,protein_coding -59021,OSBP2,ENSG00000184792,protein_coding -49032,TRARG1,ENSG00000184811,protein_coding -11817,PRR23B,ENSG00000184814,protein_coding -52803,ZBTB7C,ENSG00000184828,protein_coding -24848,APOO,ENSG00000184831,protein_coding -17225,PRR16,ENSG00000184838,protein_coding -18332,TMED9,ENSG00000184840,protein_coding -18254,DRD1,ENSG00000184845,protein_coding -46873,TMEM186,ENSG00000184857,protein_coding -48738,SDR42E1,ENSG00000184860,protein_coding -24507,RBM33,ENSG00000184863,protein_coding -25979,ARMCX2,ENSG00000184867,protein_coding -44002,BTBD6,ENSG00000184887,protein_coding -57744,SRY,ENSG00000184895,protein_coding -11642,H1FX,ENSG00000184897,protein_coding -8121,RBM43,ENSG00000184898,protein_coding -60452,SUMO3,ENSG00000184900,protein_coding -23596,IMMP2L,ENSG00000184903,protein_coding -25991,TCEAL2,ENSG00000184905,protein_coding -542,CLCNKB,ENSG00000184908,protein_coding -25658,DMRTC1B,ENSG00000184911,protein_coding -43997,JAG2,ENSG00000184916,protein_coding -50844,FMNL1,ENSG00000184922,protein_coding -36620,NUTM2A,ENSG00000184923,protein_coding -5844,PTRHD1,ENSG00000184924,protein_coding -31716,LCN12,ENSG00000184925,protein_coding -32191,OR6A2,ENSG00000184933,protein_coding -32620,WT1,ENSG00000184937,protein_coding -48385,ZFP90,ENSG00000184939,protein_coding -9611,AQP12A,ENSG00000184945,protein_coding -59274,FAM227A,ENSG00000184949,protein_coding -38878,OR6C70,ENSG00000184954,protein_coding -31869,MUC6,ENSG00000184956,protein_coding -40474,NOC4L,ENSG00000184967,protein_coding -58391,USP18,ENSG00000184979,protein_coding -59414,NDUFA6,ENSG00000184983,protein_coding -44702,CHRM5,ENSG00000184984,protein_coding -13046,SORCS2,ENSG00000184985,protein_coding -44014,TMEM121,ENSG00000184986,protein_coding -50726,TMEM106A,ENSG00000184988,protein_coding -43975,SIVA1,ENSG00000184990,protein_coding -40300,BRI3BP,ENSG00000184992,protein_coding -29767,IFNE,ENSG00000184995,protein_coding -33411,SLC22A10,ENSG00000184999,protein_coding -29407,DGAT1,ENSG00000185000,protein_coding -20707,RFX6,ENSG00000185002,protein_coding -10961,ROBO2,ENSG00000185008,protein_coding -36395,AP3M1,ENSG00000185009,protein_coding -26940,F8,ENSG00000185010,protein_coding -5746,NT5C1B,ENSG00000185013,protein_coding -28509,CA13,ENSG00000185015,protein_coding -53395,UBOX5,ENSG00000185019,protein_coding -59257,MAFF,ENSG00000185022,protein_coding -44001,BRF1,ENSG00000185024,protein_coding -15506,LRRC14B,ENSG00000185028,protein_coding -46152,SEMA4B,ENSG00000185033,protein_coding -9481,MROH2A,ENSG00000185038,protein_coding -22998,SPDYE16,ENSG00000185040,protein_coding -46155,CIB1,ENSG00000185043,protein_coding -39668,ANKS1B,ENSG00000185046,protein_coding -12925,NELFA,ENSG00000185049,protein_coding -53665,SLC24A3,ENSG00000185052,protein_coding -27360,SGCZ,ENSG00000185053,protein_coding -23531,EFCAB10,ENSG00000185055,protein_coding -18233,C5orf47,ENSG00000185056,protein_coding -38733,KRT76,ENSG00000185069,protein_coding -43457,FLRT2,ENSG00000185070,protein_coding -33361,INTS5,ENSG00000185085,protein_coding -45412,RPS27L,ENSG00000185088,protein_coding -1217,MANEAL,ENSG00000185090,protein_coding -43974,ADSSL1,ENSG00000185100,protein_coding -31824,ANO9,ENSG00000185101,protein_coding -1598,FAF1,ENSG00000185104,protein_coding -51980,MYADML2,ENSG00000185105,protein_coding -12717,FAM43A,ENSG00000185112,protein_coding -44579,NSMCE3,ENSG00000185115,protein_coding -29406,HSF1,ENSG00000185122,protein_coding -21510,C6orf120,ENSG00000185127,protein_coding -17605,PURA,ENSG00000185129,protein_coding -19030,HIST1H2BL,ENSG00000185130,protein_coding -59039,INPP5J,ENSG00000185133,protein_coding -15026,NPY2R,ENSG00000185149,protein_coding -4900,MIXL1,ENSG00000185155,protein_coding -49423,MFSD6L,ENSG00000185156,protein_coding -50187,LRRC37B,ENSG00000185158,protein_coding -40473,DDX51,ENSG00000185163,protein_coding -47128,NOMO2,ENSG00000185164,protein_coding -9609,AQP12B,ENSG00000185176,protein_coding -22562,ZNF479,ENSG00000185177,protein_coding -31823,SIGIRR,ENSG00000185187,protein_coding -29379,NRBP2,ENSG00000185189,protein_coding -54796,PRSS57,ENSG00000185198,protein_coding -31811,IFITM2,ENSG00000185201,protein_coding -43911,TNFAIP2,ENSG00000185215,protein_coding -10322,ZNF445,ENSG00000185219,protein_coding -5468,PGBD2,ENSG00000185220,protein_coding -26044,TCEAL9,ENSG00000185222,protein_coding -52377,MC2R,ENSG00000185231,protein_coding -55208,RAB11B,ENSG00000185236,protein_coding -32495,PRMT3,ENSG00000185238,protein_coding -49201,GP1BA,ENSG00000185245,protein_coding -42677,PRPF39,ENSG00000185246,protein_coding -26756,MAGEA11,ENSG00000185247,protein_coding -20582,PPIL6,ENSG00000185250,protein_coding -58508,ZNF74,ENSG00000185252,protein_coding -16903,KIAA0825,ENSG00000185261,protein_coding -51740,UBALD2,ENSG00000185262,protein_coding -59195,TEX33,ENSG00000185264,protein_coding -35409,CDNF,ENSG00000185267,protein_coding -51982,NOTUM,ENSG00000185269,protein_coding -42049,KLHL33,ENSG00000185271,protein_coding -59833,RBM11,ENSG00000185272,protein_coding -22845,GALNT17,ENSG00000185274,protein_coding -3968,ZBTB37,ENSG00000185278,protein_coding -22520,NUPR2,ENSG00000185290,protein_coding -24576,IL3RA,ENSG00000185291,protein_coding -50876,SPPL2C,ENSG00000185294,protein_coding -51951,CCDC137,ENSG00000185298,protein_coding -36485,SFTPA2,ENSG00000185303,protein_coding -6968,RGPD2,ENSG00000185304,protein_coding -16258,ARL15,ENSG00000185305,protein_coding -39130,C12orf56,ENSG00000185306,protein_coding -10194,SCN10A,ENSG00000185313,protein_coding -48963,CDK10,ENSG00000185324,protein_coding -46936,SOCS1,ENSG00000185338,protein_coding -59017,TCN2,ENSG00000185339,protein_coding -58953,GAS2L1,ENSG00000185340,protein_coding -40260,ATP6V0A2,ENSG00000185344,protein_coding -21386,PRKN,ENSG00000185345,protein_coding -44013,TEDC1,ENSG00000185347,protein_coding -41644,HS6ST3,ENSG00000185352,protein_coding -51953,HGS,ENSG00000185359,protein_coding -55033,TNFAIP8L1,ENSG00000185361,protein_coding -18459,OR2V1,ENSG00000185372,protein_coding -50280,RAD51D,ENSG00000185379,protein_coding -55577,OR7A17,ENSG00000185385,protein_coding -59613,MAPK11,ENSG00000185386,protein_coding -9356,SP140L,ENSG00000185404,protein_coding -7273,MRPL30,ENSG00000185414,protein_coding -46415,TARSL2,ENSG00000185418,protein_coding -5334,SMYD3,ENSG00000185420,protein_coding -38643,METTL7A,ENSG00000185432,protein_coding -786,IFNLR1,ENSG00000185436,protein_coding -60291,SH3BGR,ENSG00000185437,protein_coding -46227,FAM174B,ENSG00000185442,protein_coding -24949,FAM47A,ENSG00000185448,protein_coding -56930,ZSWIM9,ENSG00000185453,protein_coding -23299,KPNA7,ENSG00000185467,protein_coding -33381,TMEM179B,ENSG00000185475,protein_coding -14202,GPRIN3,ENSG00000185477,protein_coding -38712,KRT6B,ENSG00000185479,protein_coding -39731,PARPBP,ENSG00000185480,protein_coding -38995,STAC3,ENSG00000185482,protein_coding -1878,ROR1,ENSG00000185483,protein_coding -3402,MUC1,ENSG00000185499,protein_coding -51945,FAAP100,ENSG00000185504,protein_coding -31839,IRF7,ENSG00000185507,protein_coding -54197,L3MBTL1,ENSG00000185513,protein_coding -26950,BRCC3,ENSG00000185515,protein_coding -46206,SV2B,ENSG00000185518,protein_coding -543,FAM131C,ENSG00000185519,protein_coding -31833,LMNTD2,ENSG00000185522,protein_coding -4692,SPATA45,ENSG00000185523,protein_coding -51949,AC139530.1,ENSG00000185527,protein_coding -36070,PRKG1,ENSG00000185532,protein_coding -46292,NR2F2,ENSG00000185551,protein_coding -43746,DLK1,ENSG00000185559,protein_coding -49047,TLCD2,ENSG00000185561,protein_coding -11361,LSAMP,ENSG00000185565,protein_coding -43988,AHNAK2,ENSG00000185567,protein_coding -31325,OLFML2A,ENSG00000185585,protein_coding -38768,SP1,ENSG00000185591,protein_coding -46307,SPATA8,ENSG00000185594,protein_coding -58458,MRPL40,ENSG00000185608,protein_coding -38429,DBX2,ENSG00000185610,protein_coding -10524,INKA1,ENSG00000185614,protein_coding -46465,PDIA2,ENSG00000185615,protein_coding -12882,PCGF3,ENSG00000185619,protein_coding -12839,LMLN,ENSG00000185621,protein_coding -51963,P4HB,ENSG00000185624,protein_coding -31804,PSMD13,ENSG00000185627,protein_coding -3756,PBX1,ENSG00000185630,protein_coding -38994,NDUFA4L2,ENSG00000185633,protein_coding -45108,SHC4,ENSG00000185634,protein_coding -38737,KRT79,ENSG00000185640,protein_coding -43127,ZFP36L1,ENSG00000185650,protein_coding -58581,UBE2L3,ENSG00000185651,protein_coding -37620,NTF3,ENSG00000185652,protein_coding -60278,BRWD1,ENSG00000185658,protein_coding -18179,SMIM23,ENSG00000185662,protein_coding -38911,PMEL,ENSG00000185664,protein_coding -59105,SYN3,ENSG00000185666,protein_coding -1232,POU3F1,ENSG00000185668,protein_coding -48915,SNAI3,ENSG00000185669,protein_coding -33375,ZBTB3,ENSG00000185670,protein_coding -7274,LYG2,ENSG00000185674,protein_coding -31258,MORN5,ENSG00000185681,protein_coding -58672,PRAME,ENSG00000185686,protein_coding -18578,C6orf201,ENSG00000185689,protein_coding -28236,MYBL1,ENSG00000185697,protein_coding -47240,MOSMO,ENSG00000185716,protein_coding -59054,DRG1,ENSG00000185721,protein_coding -49160,ANKFY1,ENSG00000185722,protein_coding -28186,YTHDF3,ENSG00000185728,protein_coding -29339,ZNF696,ENSG00000185730,protein_coding -35174,ADARB2,ENSG00000185736,protein_coding -36542,NRG3,ENSG00000185737,protein_coding -46781,SRL,ENSG00000185739,protein_coding -34535,C11orf87,ENSG00000185742,protein_coding -36675,IFIT1,ENSG00000185745,protein_coding -25037,CXorf38,ENSG00000185753,protein_coding -15347,CLDN24,ENSG00000185758,protein_coding -20114,KCNQ5,ENSG00000185760,protein_coding -54857,ADAMTSL5,ENSG00000185761,protein_coding -13259,KCNIP4,ENSG00000185774,protein_coding -30209,SPATA31A6,ENSG00000185775,protein_coding -45856,MORF4L1,ENSG00000185787,protein_coding -57545,NLRP9,ENSG00000185792,protein_coding -12792,WDR53,ENSG00000185798,protein_coding -56818,DMWD,ENSG00000185800,protein_coding -29413,SLC52A2,ENSG00000185803,protein_coding -60236,PIGP,ENSG00000185808,protein_coding -22426,IKZF1,ENSG00000185811,protein_coding -51973,PCYT2,ENSG00000185813,protein_coding -12928,NAT8L,ENSG00000185818,protein_coding -38875,OR6C76,ENSG00000185821,protein_coding -44398,NPAP1,ENSG00000185823,protein_coding -26864,BCAP31,ENSG00000185825,protein_coding -50895,ARL17A,ENSG00000185829,protein_coding -58473,GNB1L,ENSG00000185838,protein_coding -4872,DNAH14,ENSG00000185842,protein_coding -3738,CCDC190,ENSG00000185860,protein_coding -50160,EVI2B,ENSG00000185862,protein_coding -31742,TMEM210,ENSG00000185863,protein_coding -47232,NPIPB4,ENSG00000185864,protein_coding -56357,ZNF829,ENSG00000185869,protein_coding -13812,TMPRSS11B,ENSG00000185873,protein_coding -35570,THNSL1,ENSG00000185875,protein_coding -45011,TRIM69,ENSG00000185880,protein_coding -46667,ATP6V0C,ENSG00000185883,protein_coding -31813,IFITM1,ENSG00000185885,protein_coding -4944,PRSS38,ENSG00000185888,protein_coding -58200,BPY2C,ENSG00000185894,protein_coding -41895,LAMP1,ENSG00000185896,protein_coding -56256,FFAR3,ENSG00000185897,protein_coding -24242,TAS2R60,ENSG00000185899,protein_coding -27905,POMK,ENSG00000185900,protein_coding -47458,C16orf54,ENSG00000185905,protein_coding -10493,KLHDC8B,ENSG00000185909,protein_coding -24821,KLHL34,ENSG00000185915,protein_coding -60197,SETD4,ENSG00000185917,protein_coding -30833,PTCH1,ENSG00000185920,protein_coding -49057,RTN4RL1,ENSG00000185924,protein_coding -32970,OR4C46,ENSG00000185926,protein_coding -37014,CALHM1,ENSG00000185933,protein_coding -31890,KRTAP5-5,ENSG00000185940,protein_coding -28173,NKAIN3,ENSG00000185942,protein_coding -2469,RNPC3,ENSG00000185946,protein_coding -47647,ZNF267,ENSG00000185947,protein_coding -41820,IRS2,ENSG00000185950,protein_coding -23380,C7orf61,ENSG00000185955,protein_coding -38630,FAM186A,ENSG00000185958,protein_coding -24569,SHOX,ENSG00000185960,protein_coding -3240,LCE3A,ENSG00000185962,protein_coding -30762,BICD2,ENSG00000185963,protein_coding -3236,LCE3E,ENSG00000185966,protein_coding -30050,CCIN,ENSG00000185972,protein_coding -26969,TMLHE,ENSG00000185973,protein_coding -41908,GRK1,ENSG00000185974,protein_coding -53297,DEFB128,ENSG00000185982,protein_coding -26690,SLITRK2,ENSG00000185985,protein_coding -54859,PLK5,ENSG00000185988,protein_coding -41920,RASA3,ENSG00000185989,protein_coding -12833,LRCH3,ENSG00000186001,protein_coding -4503,LEMD1,ENSG00000186007,protein_coding -41907,ATP4B,ENSG00000186009,protein_coding -55830,NDUFA13,ENSG00000186010,protein_coding -56338,ZNF566,ENSG00000186017,protein_coding -56343,ZNF529,ENSG00000186020,protein_coding -56721,ZNF284,ENSG00000186026,protein_coding -12515,HTR3E,ENSG00000186038,protein_coding -41190,DLEU7,ENSG00000186047,protein_coding -38720,KRT73,ENSG00000186049,protein_coding -31023,TAL2,ENSG00000186051,protein_coding -4822,AIDA,ENSG00000186063,protein_coding -44763,C15orf41,ENSG00000186073,protein_coding -51650,CD300LF,ENSG00000186074,protein_coding -50495,ZPBP2,ENSG00000186075,protein_coding -38715,KRT5,ENSG00000186081,protein_coding -2511,NBPF6,ENSG00000186086,protein_coding -23024,GSAP,ENSG00000186088,protein_coding -12510,HTR3D,ENSG00000186090,protein_coding -8,OR4F5,ENSG00000186092,protein_coding -1576,AGBL4,ENSG00000186094,protein_coding -11445,ARGFX,ENSG00000186103,protein_coding -32373,CYP2R1,ENSG00000186104,protein_coding -16405,LRRC70,ENSG00000186105,protein_coding -28769,ANKRD46,ENSG00000186106,protein_coding -54977,PIP5K1C,ENSG00000186111,protein_coding -33013,OR5D14,ENSG00000186113,protein_coding -55631,CYP4F2,ENSG00000186115,protein_coding -1526,TEX38,ENSG00000186118,protein_coding -33015,OR5D18,ENSG00000186119,protein_coding -31287,ZBTB6,ENSG00000186130,protein_coding -7646,C2orf76,ENSG00000186132,protein_coding -37911,TAS2R42,ENSG00000186136,protein_coding -2950,POLR3C,ENSG00000186141,protein_coding -5912,PRR30,ENSG00000186143,protein_coding -13115,DEFB131A,ENSG00000186146,protein_coding -2578,UBL4B,ENSG00000186150,protein_coding -48657,WWOX,ENSG00000186153,protein_coding -1539,CYP4Z1,ENSG00000186160,protein_coding -34785,CCDC84,ENSG00000186166,protein_coding -34776,BCL9L,ENSG00000186174,protein_coding -40782,POLR1D,ENSG00000186184,protein_coding -50827,KIF18B,ENSG00000186185,protein_coding -48582,ZNRF1,ENSG00000186187,protein_coding -36755,FFAR4,ENSG00000186188,protein_coding -53941,BPIFB3,ENSG00000186190,protein_coding -53942,BPIFB4,ENSG00000186191,protein_coding -31731,SAPCD2,ENSG00000186193,protein_coding -5183,EDARADD,ENSG00000186197,protein_coding -45462,SLC51B,ENSG00000186198,protein_coding -55618,CYP4F12,ENSG00000186204,protein_coding -4789,MARC1,ENSG00000186205,protein_coding -3234,LCE5A,ENSG00000186207,protein_coding -14007,SOWAHB,ENSG00000186212,protein_coding -13033,BLOC1S4,ENSG00000186222,protein_coding -3253,LCE1E,ENSG00000186226,protein_coding -57636,ZNF749,ENSG00000186230,protein_coding -20433,KLHL32,ENSG00000186231,protein_coding -46999,MRTFB,ENSG00000186260,protein_coding -11293,BTLA,ENSG00000186265,protein_coding -57635,ZNF17,ENSG00000186272,protein_coding -34352,KDM4D,ENSG00000186280,protein_coding -7184,GPAT2,ENSG00000186281,protein_coding -4044,TOR3A,ENSG00000186283,protein_coding -25667,PABPC1L2A,ENSG00000186288,protein_coding -44519,GABRA5,ENSG00000186297,protein_coding -39910,PPP1CC,ENSG00000186298,protein_coding -54934,ZNF555,ENSG00000186300,protein_coding -3545,OR10T2,ENSG00000186306,protein_coding -25889,NAP1L3,ENSG00000186310,protein_coding -17789,PRELID2,ENSG00000186314,protein_coding -34717,BACE1,ENSG00000186318,protein_coding -56147,RGS9BP,ENSG00000186326,protein_coding -12320,TMEM212,ENSG00000186329,protein_coding -17919,SLC36A3,ENSG00000186334,protein_coding -17920,SLC36A2,ENSG00000186335,protein_coding -21502,THBS2,ENSG00000186340,protein_coding -31610,RXRA,ENSG00000186350,protein_coding -15404,ANKRD37,ENSG00000186352,protein_coding -2951,NUDT17,ENSG00000186364,protein_coding -17340,MINAR2,ENSG00000186367,protein_coding -26540,ZNF75D,ENSG00000186376,protein_coding -1538,CYP4X1,ENSG00000186377,protein_coding -50543,KRT26,ENSG00000186393,protein_coding -50547,KRT10,ENSG00000186395,protein_coding -44613,GOLGA8R,ENSG00000186399,protein_coding -51647,CD300E,ENSG00000186407,protein_coding -1359,CCDC30,ENSG00000186409,protein_coding -26280,NKRF,ENSG00000186416,protein_coding -45169,GLDN,ENSG00000186417,protein_coding -57471,FCAR,ENSG00000186431,protein_coding -12184,KPNA4,ENSG00000186432,protein_coding -20781,TRDN,ENSG00000186439,protein_coding -3556,OR6P1,ENSG00000186440,protein_coding -38735,KRT3,ENSG00000186442,protein_coding -10336,ZNF501,ENSG00000186446,protein_coding -10330,ZNF197,ENSG00000186448,protein_coding -10710,SPATA12,ENSG00000186451,protein_coding -38641,TMPRSS12,ENSG00000186452,protein_coding -5833,FAM228A,ENSG00000186453,protein_coding -53299,DEFB132,ENSG00000186458,protein_coding -25670,NAP1L2,ENSG00000186462,protein_coding -16762,RPS23,ENSG00000186468,protein_coding -42799,GNG2,ENSG00000186469,protein_coding -18971,BTN3A2,ENSG00000186470,protein_coding -26291,AKAP14,ENSG00000186471,protein_coding -23078,PCLO,ENSG00000186472,protein_coding -57145,KLK12,ENSG00000186474,protein_coding -16414,RGS7BP,ENSG00000186479,protein_coding -24495,INSIG1,ENSG00000186480,protein_coding -5508,MYT1L,ENSG00000186487,protein_coding -15585,C5orf38,ENSG00000186493,protein_coding -52661,ZNF396,ENSG00000186496,protein_coding -907,TMEM222,ENSG00000186501,protein_coding -33147,OR9Q1,ENSG00000186509,protein_coding -540,CLCNKA,ENSG00000186510,protein_coding -33158,OR9Q2,ENSG00000186513,protein_coding -3664,ARHGAP30,ENSG00000186517,protein_coding -7453,SEPT10,ENSG00000186522,protein_coding -27306,FAM86B1,ENSG00000186523,protein_coding -55615,CYP4F8,ENSG00000186526,protein_coding -55616,CYP4F3,ENSG00000186529,protein_coding -49054,SMYD4,ENSG00000186532,protein_coding -27152,DEFB105A,ENSG00000186562,protein_coding -1553,FOXD2,ENSG00000186564,protein_coding -50803,GPATCH8,ENSG00000186566,protein_coding -56750,CEACAM19,ENSG00000186567,protein_coding -27151,DEFB107A,ENSG00000186572,protein_coding -58969,NF2,ENSG00000186575,protein_coding -19509,SMIM29,ENSG00000186577,protein_coding -27153,DEFB106A,ENSG00000186579,protein_coding -29388,SPATC1,ENSG00000186583,protein_coding -23881,UBE2H,ENSG00000186591,protein_coding -27123,DEFB105B,ENSG00000186599,protein_coding -1476,HPDL,ENSG00000186603,protein_coding -21161,KATNA1,ENSG00000186625,protein_coding -45966,FSD2,ENSG00000186628,protein_coding -33874,ARAP1,ENSG00000186635,protein_coding -29945,KIF24,ENSG00000186638,protein_coding -33869,PDE2A,ENSG00000186642,protein_coding -42350,CARMIL3,ENSG00000186648,protein_coding -33115,PRG2,ENSG00000186652,protein_coding -59489,PRR5,ENSG00000186654,protein_coding -33179,ZFP91,ENSG00000186660,protein_coding -51527,C17orf58,ENSG00000186665,protein_coding -38605,BCDIN3D,ENSG00000186666,protein_coding -25739,MAGEE2,ENSG00000186675,protein_coding -7729,CYP27C1,ENSG00000186684,protein_coding -17353,LYRM7,ENSG00000186687,protein_coding -39969,CFAP73,ENSG00000186710,protein_coding -32630,CCDC73,ENSG00000186714,protein_coding -58746,BCR,ENSG00000186716,protein_coding -55624,AC114267.1,ENSG00000186723,protein_coding -59460,MPPED1,ENSG00000186732,protein_coding -51944,FSCN2,ENSG00000186765,protein_coding -37363,FOXI2,ENSG00000186766,protein_coding -25488,SPIN4,ENSG00000186767,protein_coding -12861,ZNF732,ENSG00000186777,protein_coding -25468,SPIN2B,ENSG00000186787,protein_coding -30570,SPATA31D3,ENSG00000186788,protein_coding -1551,FOXE3,ENSG00000186790,protein_coding -10552,HYAL3,ENSG00000186792,protein_coding -37187,KCNK18,ENSG00000186795,protein_coding -29746,IFNA10,ENSG00000186803,protein_coding -57170,VSIG10L,ENSG00000186806,protein_coding -25624,CXCR3,ENSG00000186810,protein_coding -52653,ZNF397,ENSG00000186812,protein_coding -52654,ZSCAN30,ENSG00000186814,protein_coding -39977,TPCN1,ENSG00000186815,protein_coding -57452,LILRB4,ENSG00000186818,protein_coding -85,TNFRSF4,ENSG00000186827,protein_coding -50620,KRT16,ENSG00000186832,protein_coding -50839,HEXIM1,ENSG00000186834,protein_coding -56480,SELENOV,ENSG00000186838,protein_coding -3257,LCE1A,ENSG00000186844,protein_coding -50619,KRT14,ENSG00000186847,protein_coding -6867,TRABD2A,ENSG00000186854,protein_coding -50596,KRTAP17-1,ENSG00000186860,protein_coding -36926,PDZD7,ENSG00000186862,protein_coding -60468,POFUT2,ENSG00000186866,protein_coding -14591,QRFPR,ENSG00000186867,protein_coding -50877,MAPT,ENSG00000186868,protein_coding -25645,ERCC6L,ENSG00000186871,protein_coding -30997,OR13F1,ENSG00000186881,protein_coding -33009,OR5D3P,ENSG00000186886,protein_coding -6474,TMEM17,ENSG00000186889,protein_coding -84,TNFRSF18,ENSG00000186891,protein_coding -33768,FGF3,ENSG00000186895,protein_coding -38584,C1QL4,ENSG00000186897,protein_coding -33120,RTN4RL2,ENSG00000186907,protein_coding -39365,ZDHHC17,ENSG00000186908,protein_coding -43607,SERPINA11,ENSG00000186910,protein_coding -25579,P2RY4,ENSG00000186912,protein_coding -27619,ZNF395,ENSG00000186918,protein_coding -51730,ZACN,ENSG00000186919,protein_coding -60075,KRTAP22-1,ENSG00000186924,protein_coding -60070,KRTAP19-6,ENSG00000186925,protein_coding -60074,KRTAP6-2,ENSG00000186930,protein_coding -31001,OR13C8,ENSG00000186943,protein_coding -59531,PPARA,ENSG00000186951,protein_coding -17087,TMEM232,ENSG00000186952,protein_coding -60062,KRTAP19-2,ENSG00000186965,protein_coding -60064,KRTAP19-4,ENSG00000186967,protein_coding -60060,KRTAP15-1,ENSG00000186970,protein_coding -60058,KRTAP13-4,ENSG00000186971,protein_coding -1388,FAM183A,ENSG00000186973,protein_coding -59463,EFCAB6,ENSG00000186976,protein_coding -60065,KRTAP19-5,ENSG00000186977,protein_coding -60052,KRTAP23-1,ENSG00000186980,protein_coding -55203,KANK3,ENSG00000186994,protein_coding -58949,EMID1,ENSG00000186998,protein_coding -31067,ACTL7A,ENSG00000187003,protein_coding -60084,KRTAP21-1,ENSG00000187005,protein_coding -817,RHD,ENSG00000187010,protein_coding -246,ESPN,ENSG00000187017,protein_coding -37173,PNLIPRP1,ENSG00000187021,protein_coding -31380,PTRH1,ENSG00000187024,protein_coding -60083,KRTAP21-2,ENSG00000187026,protein_coding -12279,SAMD7,ENSG00000187033,protein_coding -22206,GPR141,ENSG00000187037,protein_coding -59202,TMPRSS6,ENSG00000187045,protein_coding -1533,CYP4A11,ENSG00000187048,protein_coding -33297,TMEM216,ENSG00000187049,protein_coding -59325,RPS19BP1,ENSG00000187051,protein_coding -13803,TMPRSS11A,ENSG00000187054,protein_coding -33512,TMEM262,ENSG00000187066,protein_coding -12550,C3orf70,ENSG00000187068,protein_coding -32333,TEAD1,ENSG00000187079,protein_coding -5415,OR2AK2,ENSG00000187080,protein_coding -27122,DEFB106B,ENSG00000187082,protein_coding -10173,PLCD1,ENSG00000187091,protein_coding -10264,CCK,ENSG00000187094,protein_coding -43263,ENTPD5,ENSG00000187097,protein_coding -10862,MITF,ENSG00000187098,protein_coding -43231,HEATR4,ENSG00000187105,protein_coding -39349,NAP1L1,ENSG00000187109,protein_coding -57425,LILRA5,ENSG00000187116,protein_coding -10048,CMC1,ENSG00000187118,protein_coding -36827,SLIT1,ENSG00000187122,protein_coding -8103,LYPD6,ENSG00000187123,protein_coding -35228,AKR1C1,ENSG00000187134,protein_coding -56096,VSTM2B,ENSG00000187135,protein_coding -1859,FOXD3,ENSG00000187140,protein_coding -555,SPATA21,ENSG00000187144,protein_coding -1444,RNF220,ENSG00000187147,protein_coding -34411,ANGPTL5,ENSG00000187151,protein_coding -37180,SHTN1,ENSG00000187164,protein_coding -38519,H1FNT,ENSG00000187166,protein_coding -3247,LCE4A,ENSG00000187170,protein_coding -3246,LCE2A,ENSG00000187173,protein_coding -60446,KRTAP12-1,ENSG00000187175,protein_coding -3244,LCE2C,ENSG00000187180,protein_coding -29969,AL162231.1,ENSG00000187186,protein_coding -56510,ZNF546,ENSG00000187187,protein_coding -20688,TSPYL4,ENSG00000187189,protein_coding -58194,DAZ3,ENSG00000187191,protein_coding -48105,MT1X,ENSG00000187193,protein_coding -30511,GCNT1,ENSG00000187210,protein_coding -3243,LCE2D,ENSG00000187223,protein_coding -8545,SESTD1,ENSG00000187231,protein_coding -3239,LCE3B,ENSG00000187238,protein_coding -31500,FNBP1,ENSG00000187239,protein_coding -34449,DYNC2H1,ENSG00000187240,protein_coding -50550,KRT12,ENSG00000187242,protein_coding -25332,MAGED4B,ENSG00000187243,protein_coding -56760,BCAM,ENSG00000187244,protein_coding -23030,RSBN1L,ENSG00000187257,protein_coding -22147,NPSR1,ENSG00000187258,protein_coding -24429,WDR86,ENSG00000187260,protein_coding -55366,EPOR,ENSG00000187266,protein_coding -24703,FAM9C,ENSG00000187268,protein_coding -50587,KRTAP9-8,ENSG00000187272,protein_coding -9775,CIDEC,ENSG00000187288,protein_coding -52888,DCC,ENSG00000187323,protein_coding -25768,TAF9B,ENSG00000187325,protein_coding -17690,PCDHB13,ENSG00000187372,protein_coding -23035,MAGI2,ENSG00000187391,protein_coding -32535,LUZP2,ENSG00000187398,protein_coding -23510,LHFPL3,ENSG00000187416,protein_coding -44883,CHP1,ENSG00000187446,protein_coding -57215,FPR3,ENSG00000187474,protein_coding -18939,HIST1H1T,ENSG00000187475,protein_coding -32771,C11orf96,ENSG00000187479,protein_coding -32408,KCNJ11,ENSG00000187486,protein_coding -10523,CDHR4,ENSG00000187492,protein_coding -41831,COL4A1,ENSG00000187498,protein_coding -39533,C12orf74,ENSG00000187510,protein_coding -1132,GJA4,ENSG00000187513,protein_coding -9406,PTMA,ENSG00000187514,protein_coding -24995,HYPM,ENSG00000187516,protein_coding -35411,HSPA14,ENSG00000187522,protein_coding -12676,ATP13A5,ENSG00000187527,protein_coding -51974,SIRT7,ENSG00000187531,protein_coding -13880,PRR27,ENSG00000187533,protein_coding -46568,IFT140,ENSG00000187535,protein_coding -41983,POTEG,ENSG00000187537,protein_coding -450,PRAMEF10,ENSG00000187545,protein_coding -21804,AGMO,ENSG00000187546,protein_coding -57527,SBK2,ENSG00000187550,protein_coding -36745,CYP26C1,ENSG00000187553,protein_coding -4831,TLR5,ENSG00000187554,protein_coding -46881,USP7,ENSG00000187555,protein_coding -55522,NANOS3,ENSG00000187556,protein_coding -30409,FOXD4L3,ENSG00000187559,protein_coding -18806,NHLRC1,ENSG00000187566,protein_coding -37724,DPPA3,ENSG00000187569,protein_coding -43584,COX8C,ENSG00000187581,protein_coding -63,PLEKHN1,ENSG00000187583,protein_coding -50642,ZNF385C,ENSG00000187595,protein_coding -25448,MAGEH1,ENSG00000187601,protein_coding -6717,TET3,ENSG00000187605,protein_coding -49558,ZNF286A,ENSG00000187607,protein_coding -67,ISG15,ENSG00000187608,protein_coding -31762,EXD3,ENSG00000187609,protein_coding -33024,OR5W2,ENSG00000187612,protein_coding -31592,MYMK,ENSG00000187616,protein_coding -49004,C17orf97,ENSG00000187624,protein_coding -19073,ZKSCAN4,ENSG00000187626,protein_coding -6942,RGPD1,ENSG00000187627,protein_coding -42346,DHRS4L2,ENSG00000187630,protein_coding -60,SAMD11,ENSG00000187634,protein_coding -64,PERM1,ENSG00000187642,protein_coding -55077,VMAC,ENSG00000187650,protein_coding -18013,C5orf52,ENSG00000187658,protein_coding -55821,HAPLN4,ENSG00000187664,protein_coding -10700,ERC2,ENSG00000187672,protein_coding -40855,B3GLCT,ENSG00000187676,protein_coding -17752,SPRY4,ENSG00000187678,protein_coding -25234,ERAS,ENSG00000187682,protein_coding -49593,TRPV2,ENSG00000187688,protein_coding -13889,AMTN,ENSG00000187689,protein_coding -25316,EZHIP,ENSG00000187690,protein_coding -8662,C2orf88,ENSG00000187699,protein_coding -5455,OR2T27,ENSG00000187701,protein_coding -31746,TMEM203,ENSG00000187713,protein_coding -36033,SLC18A3,ENSG00000187714,protein_coding -11593,KBTBD12,ENSG00000187715,protein_coding -45608,THSD4,ENSG00000187720,protein_coding -33913,DNAJB13,ENSG00000187726,protein_coding -145,GABRD,ENSG00000187730,protein_coding -2477,AMY1C,ENSG00000187733,protein_coding -28044,TCEA1,ENSG00000187735,protein_coding -9183,NHEJ1,ENSG00000187736,protein_coding -48969,FANCA,ENSG00000187741,protein_coding -30693,SECISBP2,ENSG00000187742,protein_coding -32119,OR52B6,ENSG00000187747,protein_coding -30629,C9orf153,ENSG00000187753,protein_coding -25359,SSX7,ENSG00000187754,protein_coding -14285,ADH1A,ENSG00000187758,protein_coding -30694,SEMA4D,ENSG00000187764,protein_coding -60438,KRTAP10-8,ENSG00000187766,protein_coding -20488,LIN28B,ENSG00000187772,protein_coding -53173,DIPK1C,ENSG00000187773,protein_coding -51837,DNAH17,ENSG00000187775,protein_coding -38591,MCRS1,ENSG00000187778,protein_coding -35904,TMEM72,ENSG00000187783,protein_coding -42681,FANCM,ENSG00000187790,protein_coding -29976,FAM205C,ENSG00000187791,protein_coding -58773,ZNF70,ENSG00000187792,protein_coding -31666,CARD9,ENSG00000187796,protein_coding -3503,PEAR1,ENSG00000187800,protein_coding -1305,ZFP69B,ENSG00000187801,protein_coding -45635,TMEM202,ENSG00000187806,protein_coding -26284,SOWAHD,ENSG00000187808,protein_coding -1308,ZFP69,ENSG00000187815,protein_coding -15395,HELT,ENSG00000187821,protein_coding -26174,RTL4,ENSG00000187823,protein_coding -49467,TMEM220,ENSG00000187824,protein_coding -6707,C2orf78,ENSG00000187833,protein_coding -18936,HIST1H1C,ENSG00000187837,protein_coding -49321,PLSCR3,ENSG00000187838,protein_coding -27785,EIF4EBP1,ENSG00000187840,protein_coding -40499,P2RX2,ENSG00000187848,protein_coding -39827,ASCL4,ENSG00000187855,protein_coding -38869,OR6C75,ENSG00000187857,protein_coding -58998,CCDC157,ENSG00000187860,protein_coding -3480,TTC24,ENSG00000187862,protein_coding -30421,FAM122A,ENSG00000187866,protein_coding -55534,PALM3,ENSG00000187867,protein_coding -19971,GFRAL,ENSG00000187871,protein_coding -1773,FYB2,ENSG00000187889,protein_coding -57519,SHISA7,ENSG00000187902,protein_coding -58548,LRRC74B,ENSG00000187905,protein_coding -37276,DMBT1,ENSG00000187908,protein_coding -55563,CLEC17A,ENSG00000187912,protein_coding -32107,OR51I2,ENSG00000187918,protein_coding -31691,LCN10,ENSG00000187922,protein_coding -704,LDLRAD2,ENSG00000187942,protein_coding -8739,C2orf66,ENSG00000187944,protein_coding -38227,OVCH1,ENSG00000187950,protein_coding -29426,CYHR1,ENSG00000187954,protein_coding -29007,COL14A1,ENSG00000187955,protein_coding -9340,DNER,ENSG00000187957,protein_coding -51614,CPSF4L,ENSG00000187959,protein_coding -62,KLHL17,ENSG00000187961,protein_coding -25716,ZCCHC13,ENSG00000187969,protein_coding -656,PLA2G2C,ENSG00000187980,protein_coding -19084,ZSCAN23,ENSG00000187987,protein_coding -56440,RINL,ENSG00000187994,protein_coding -51819,C17orf99,ENSG00000187997,protein_coding -55239,OR7D2,ENSG00000188000,protein_coding -12633,TPRG1,ENSG00000188001,protein_coding -6124,MORN2,ENSG00000188010,protein_coding -9654,RTP5,ENSG00000188011,protein_coding -3299,S100A3,ENSG00000188015,protein_coding -25461,UBQLN2,ENSG00000188021,protein_coding -40246,RILPL1,ENSG00000188026,protein_coding -55538,C19orf67,ENSG00000188032,protein_coding -55448,ZNF490,ENSG00000188033,protein_coding -24233,CLCN1,ENSG00000188037,protein_coding -48343,NRN1L,ENSG00000188038,protein_coding -55686,NWD1,ENSG00000188039,protein_coding -9497,ARL4C,ENSG00000188042,protein_coding -23739,RNF133,ENSG00000188050,protein_coding -55725,TMEM221,ENSG00000188051,protein_coding -19670,TREML4,ENSG00000188056,protein_coding -976,RAB42,ENSG00000188060,protein_coding -59517,WNT7B,ENSG00000188064,protein_coding -33430,C11orf95,ENSG00000188070,protein_coding -31798,SCGB1C1,ENSG00000188076,protein_coding -10393,PRSS45P,ENSG00000188086,protein_coding -44915,PLA2G4E,ENSG00000188089,protein_coding -3016,GPR89B,ENSG00000188092,protein_coding -46133,MESP2,ENSG00000188095,protein_coding -36610,FAM25A,ENSG00000188100,protein_coding -20039,EYS,ENSG00000188107,protein_coding -19708,C6orf132,ENSG00000188112,protein_coding -58131,DAZ1,ENSG00000188120,protein_coding -32189,OR2AG2,ENSG00000188124,protein_coding -59612,MAPK12,ENSG00000188130,protein_coding -29878,TMEM215,ENSG00000188133,protein_coding -30871,NUTM2G,ENSG00000188152,protein_coding -26125,COL4A5,ENSG00000188153,protein_coding -60436,KRTAP10-6,ENSG00000188155,protein_coding -70,AGRN,ENSG00000188157,protein_coding -24772,NHS,ENSG00000188158,protein_coding -32413,OTOG,ENSG00000188162,protein_coding -31754,FAM166A,ENSG00000188163,protein_coding -10108,TMPPE,ENSG00000188167,protein_coding -55895,ZNF626,ENSG00000188171,protein_coding -23192,HEPACAM2,ENSG00000188175,protein_coding -49176,SMTNL2,ENSG00000188176,protein_coding -7538,ZC3H6,ENSG00000188177,protein_coding -23357,LAMTOR4,ENSG00000188186,protein_coding -21566,PRKAR1B,ENSG00000188191,protein_coding -36496,NUTM2B,ENSG00000188199,protein_coding -32406,NCR3LG1,ENSG00000188211,protein_coding -47200,DCUN1D3,ENSG00000188215,protein_coding -7860,POTEE,ENSG00000188219,protein_coding -56289,AD000671.1,ENSG00000188223,protein_coding -56381,ZNF793,ENSG00000188227,protein_coding -31753,TUBB4B,ENSG00000188229,protein_coding -35938,AGAP4,ENSG00000188234,protein_coding -41432,COMMD6,ENSG00000188243,protein_coding -651,PLA2G2A,ENSG00000188257,protein_coding -59598,IL17REL,ENSG00000188263,protein_coding -45842,HYKK,ENSG00000188266,protein_coding -55573,OR7A5,ENSG00000188269,protein_coding -44861,C15orf62,ENSG00000188277,protein_coding -9127,RUFY4,ENSG00000188282,protein_coding -56368,ZNF383,ENSG00000188283,protein_coding -66,HES4,ENSG00000188290,protein_coding -56843,IGFL1,ENSG00000188293,protein_coding -5365,ZNF669,ENSG00000188295,protein_coding -54906,PEAK3,ENSG00000188305,protein_coding -12276,LRRIQ4,ENSG00000188306,protein_coding -30751,CENPP,ENSG00000188312,protein_coding -11929,PLSCR1,ENSG00000188313,protein_coding -10498,C3orf62,ENSG00000188315,protein_coding -37179,ENO4,ENSG00000188316,protein_coding -55251,ZNF559,ENSG00000188321,protein_coding -47365,SBK1,ENSG00000188322,protein_coding -38864,OR6C6,ENSG00000188324,protein_coding -56922,BSPH1,ENSG00000188334,protein_coding -10542,SLC38A3,ENSG00000188338,protein_coding -3567,OR6N2,ENSG00000188340,protein_coding -41067,GTF2F2,ENSG00000188342,protein_coding -28624,FAM92A,ENSG00000188343,protein_coding -29730,FOCAD,ENSG00000188352,protein_coding -56639,PRR19,ENSG00000188368,protein_coding -22990,ZP3,ENSG00000188372,protein_coding -36559,C10orf99,ENSG00000188373,protein_coding -38281,H3F3C,ENSG00000188375,protein_coding -29758,IFNA2,ENSG00000188379,protein_coding -37409,JAKMIP3,ENSG00000188385,protein_coding -30978,PPP3R2,ENSG00000188386,protein_coding -9653,PDCD1,ENSG00000188389,protein_coding -37847,CLEC2A,ENSG00000188393,protein_coding -31294,GPR21,ENSG00000188394,protein_coding -1461,TCTEX1D4,ENSG00000188396,protein_coding -3875,SELL,ENSG00000188404,protein_coding -24881,MAGEB5,ENSG00000188408,protein_coding -25832,CHM,ENSG00000188419,protein_coding -56825,NANOS2,ENSG00000188425,protein_coding -18658,BLOC1S5,ENSG00000188428,protein_coding -8567,CERKL,ENSG00000188452,protein_coding -45089,SLC24A5,ENSG00000188467,protein_coding -31469,IER5L,ENSG00000188483,protein_coding -34799,H2AFX,ENSG00000188486,protein_coding -32378,INSC,ENSG00000188487,protein_coding -43612,SERPINA5,ENSG00000188488,protein_coding -56545,C19orf54,ENSG00000188493,protein_coding -45520,LCTL,ENSG00000188501,protein_coding -56456,NCCRP1,ENSG00000188505,protein_coding -56265,KRTDAP,ENSG00000188508,protein_coding -14411,COL25A1,ENSG00000188517,protein_coding -49734,FAM83G,ENSG00000188522,protein_coding -31549,CFAP77,ENSG00000188523,protein_coding -780,SRSF10,ENSG00000188529,protein_coding -46454,HBA2,ENSG00000188536,protein_coding -9603,DUSP28,ENSG00000188542,protein_coding -44835,CCDC9B,ENSG00000188549,protein_coding -50725,NBR1,ENSG00000188554,protein_coding -5445,OR2G6,ENSG00000188558,protein_coding -53682,RALGAPA2,ENSG00000188559,protein_coding -31747,NDOR1,ENSG00000188566,protein_coding -18128,FBLL1,ENSG00000188573,protein_coding -20785,NKAIN2,ENSG00000188580,protein_coding -50563,KRTAP1-1,ENSG00000188581,protein_coding -11895,PAQR9,ENSG00000188582,protein_coding -4029,CLEC20A,ENSG00000188585,protein_coding -39627,CFAP54,ENSG00000188596,protein_coding -47377,CLN3,ENSG00000188603,protein_coding -2858,FAM72B,ENSG00000188610,protein_coding -36050,ASAH2,ENSG00000188611,protein_coding -51681,SUMO2,ENSG00000188612,protein_coding -37212,NANOS1,ENSG00000188613,protein_coding -37290,HMX3,ENSG00000188620,protein_coding -56834,IGFL3,ENSG00000188624,protein_coding -44562,GOLGA8M,ENSG00000188626,protein_coding -55254,ZNF177,ENSG00000188629,protein_coding -59484,RTL6,ENSG00000188636,protein_coding -2381,DPYD,ENSG00000188641,protein_coding -3302,S100A16,ENSG00000188643,protein_coding -30436,PTAR1,ENSG00000188647,protein_coding -36804,CC2D2B,ENSG00000188649,protein_coding -42060,RNASE9,ENSG00000188655,protein_coding -45938,SAXO2,ENSG00000188659,protein_coding -821,RHCE,ENSG00000188672,protein_coding -8999,C2orf80,ENSG00000188674,protein_coding -27838,IDO2,ENSG00000188676,protein_coding -59472,PARVB,ENSG00000188677,protein_coding -6728,SLC4A5,ENSG00000188687,protein_coding -37340,UROS,ENSG00000188690,protein_coding -32141,OR56A5,ENSG00000188691,protein_coding -60048,KRTAP24-1,ENSG00000188694,protein_coding -26423,ZDHHC9,ENSG00000188706,protein_coding -24375,ZBED6CL,ENSG00000188707,protein_coding -31517,QRFP,ENSG00000188710,protein_coding -36411,DUPD1,ENSG00000188716,protein_coding -16384,SMIM15,ENSG00000188725,protein_coding -12655,OSTN,ENSG00000188729,protein_coding -22418,VWC2,ENSG00000188730,protein_coding -21926,FAM221A,ENSG00000188732,protein_coding -40176,TMEM120B,ENSG00000188735,protein_coding -8611,FSIP2,ENSG00000188738,protein_coding -5146,RBM34,ENSG00000188739,protein_coding -31763,NOXA1,ENSG00000188747,protein_coding -9218,TMEM198,ENSG00000188760,protein_coding -2690,BCL2L15,ENSG00000188761,protein_coding -22894,FZD9,ENSG00000188763,protein_coding -56418,SPRED3,ENSG00000188766,protein_coding -4446,OPTC,ENSG00000188770,protein_coding -34612,PLET1,ENSG00000188771,protein_coding -27783,ADRB3,ENSG00000188778,protein_coding -45540,SKOR1,ENSG00000188779,protein_coding -855,CATSPER4,ENSG00000188782,protein_coding -4445,PRELP,ENSG00000188783,protein_coding -649,PLA2G2E,ENSG00000188784,protein_coding -57632,ZNF548,ENSG00000188785,protein_coding -1221,MTF1,ENSG00000188786,protein_coding -1296,TMCO2,ENSG00000188800,protein_coding -49483,SHISA6,ENSG00000188803,protein_coding -328,TMEM201,ENSG00000188807,protein_coding -40960,NHLRC3,ENSG00000188811,protein_coding -37291,HMX2,ENSG00000188816,protein_coding -10791,SNTN,ENSG00000188817,protein_coding -15534,ZDHHC11,ENSG00000188818,protein_coding -20696,CALHM6,ENSG00000188820,protein_coding -774,CNR2,ENSG00000188822,protein_coding -46769,SLX4,ENSG00000188827,protein_coding -26058,GLRA4,ENSG00000188828,protein_coding -31764,ENTPD8,ENSG00000188833,protein_coding -10236,RPL14,ENSG00000188846,protein_coding -13497,BEND4,ENSG00000188848,protein_coding -3790,FAM78B,ENSG00000188859,protein_coding -55426,ZNF563,ENSG00000188868,protein_coding -45924,TMC3,ENSG00000188869,protein_coding -51722,FBF1,ENSG00000188878,protein_coding -24054,KLRG2,ENSG00000188883,protein_coding -7186,ASTL,ENSG00000188886,protein_coding -50510,MSL1,ENSG00000188895,protein_coding -46947,AC099489.1,ENSG00000188897,protein_coding -38369,LRRK2,ENSG00000188906,protein_coding -34883,BSX,ENSG00000188909,protein_coding -1131,GJB3,ENSG00000188910,protein_coding -37360,INSYN2A,ENSG00000188916,protein_coding -25952,TRMT2B,ENSG00000188917,protein_coding -29734,HACD4,ENSG00000188921,protein_coding -3687,CFAP126,ENSG00000188931,protein_coding -25056,NYX,ENSG00000188937,protein_coding -30783,FAM120AOS,ENSG00000188938,protein_coding -12657,UTS2B,ENSG00000188958,protein_coding -31087,C9orf152,ENSG00000188959,protein_coding -61,NOC2L,ENSG00000188976,protein_coding -12960,MSANTD1,ENSG00000188981,protein_coding -440,AADACL3,ENSG00000188984,protein_coding -31757,NELFB,ENSG00000188986,protein_coding -38022,SLC15A5,ENSG00000188991,protein_coding -59831,LIPI,ENSG00000188992,protein_coding -13599,LRRC66,ENSG00000188993,protein_coding -20315,ZNF292,ENSG00000188994,protein_coding -18499,HUS1B,ENSG00000188996,protein_coding -34053,KCTD21,ENSG00000188997,protein_coding -56267,SBSN,ENSG00000189001,protein_coding -21071,ADAT2,ENSG00000189007,protein_coding -57466,KIR2DL4,ENSG00000189013,protein_coding -24964,MAGEB16,ENSG00000189023,protein_coding -3468,VHLL,ENSG00000189030,protein_coding -25098,DUSP21,ENSG00000189037,protein_coding -56349,ZNF567,ENSG00000189042,protein_coding -21768,NDUFA4,ENSG00000189043,protein_coding -16628,ANKDD1B,ENSG00000189045,protein_coding -39861,ALKBH2,ENSG00000189046,protein_coding -51285,RNFT1,ENSG00000189050,protein_coding -49415,RNF222,ENSG00000189051,protein_coding -56994,CGB5,ENSG00000189052,protein_coding -23507,RELN,ENSG00000189056,protein_coding -33198,FAM111B,ENSG00000189057,protein_coding -12741,APOD,ENSG00000189058,protein_coding -59231,H1F0,ENSG00000189060,protein_coding -25280,GAGE2A,ENSG00000189064,protein_coding -46950,LITAF,ENSG00000189067,protein_coding -57396,VSTM1,ENSG00000189068,protein_coding -22975,TMEM120A,ENSG00000189077,protein_coding -38443,ARID2,ENSG00000189079,protein_coding -35981,FAM25G,ENSG00000189090,protein_coding -48466,SF3B3,ENSG00000189091,protein_coding -14948,PRSS48,ENSG00000189099,protein_coding -26078,IL1RAPL2,ENSG00000189108,protein_coding -56785,BLOC1S3,ENSG00000189114,protein_coding -50936,SP6,ENSG00000189120,protein_coding -16733,ANKRD34B,ENSG00000189127,protein_coding -36519,PLAC9,ENSG00000189129,protein_coding -24957,FAM47B,ENSG00000189132,protein_coding -19074,NKAPL,ENSG00000189134,protein_coding -42664,FSCB,ENSG00000189139,protein_coding -22910,CLDN4,ENSG00000189143,protein_coding -56393,ZNF573,ENSG00000189144,protein_coding -49747,GRAPL,ENSG00000189152,protein_coding -13989,FAM47E,ENSG00000189157,protein_coding -51677,JPT1,ENSG00000189159,protein_coding -56375,ZNF527,ENSG00000189164,protein_coding -40866,ZAR1L,ENSG00000189167,protein_coding -60448,KRTAP10-12,ENSG00000189169,protein_coding -3304,S100A13,ENSG00000189171,protein_coding -35802,ZNF33A,ENSG00000189180,protein_coding -5457,OR14I1,ENSG00000189181,protein_coding -38724,KRT77,ENSG00000189182,protein_coding -14743,PCDH18,ENSG00000189184,protein_coding -24899,DCAF8L2,ENSG00000189186,protein_coding -57268,ZNF600,ENSG00000189190,protein_coding -2279,BTBD8,ENSG00000189195,protein_coding -25082,MAOA,ENSG00000189221,protein_coding -45536,C15orf61,ENSG00000189227,protein_coding -27612,NUGGC,ENSG00000189233,protein_coding -20691,TSPYL1,ENSG00000189241,protein_coding -26672,SPANXN3,ENSG00000189252,protein_coding -34230,TRIM64B,ENSG00000189253,protein_coding -779,PNRC2,ENSG00000189266,protein_coding -58765,DRICH1,ENSG00000189269,protein_coding -1128,GJB5,ENSG00000189280,protein_coding -10764,FHIT,ENSG00000189283,protein_coding -5475,ALKAL2,ENSG00000189292,protein_coding -19081,ZKSCAN3,ENSG00000189298,protein_coding -25453,FOXR2,ENSG00000189299,protein_coding -59430,RRP7A,ENSG00000189306,protein_coding -14094,LIN54,ENSG00000189308,protein_coding -37313,FAM53B,ENSG00000189319,protein_coding -23990,FAM180A,ENSG00000189320,protein_coding -19570,C6orf222,ENSG00000189325,protein_coding -26665,SPANXN4,ENSG00000189326,protein_coding -3303,S100A14,ENSG00000189334,protein_coding -489,KAZN,ENSG00000189337,protein_coding -131,SLC35E2B,ENSG00000189339,protein_coding -5975,TOGARAM2,ENSG00000189350,protein_coding -30568,SPATA31D4,ENSG00000189357,protein_coding -8671,NEMP2,ENSG00000189362,protein_coding -11541,ALG1L,ENSG00000189366,protein_coding -20819,KIAA0408,ENSG00000189367,protein_coding -25322,GSPT2,ENSG00000189369,protein_coding -49714,TBC1D28,ENSG00000189375,protein_coding -29044,C8orf76,ENSG00000189376,protein_coding -56647,CXCL17,ENSG00000189377,protein_coding -25569,OTUD6A,ENSG00000189401,protein_coding -40837,HMGB1,ENSG00000189403,protein_coding -128,MMP23B,ENSG00000189409,protein_coding -678,SH2D5,ENSG00000189410,protein_coding -26850,ZFP92,ENSG00000189420,protein_coding -57473,NCR1,ENSG00000189430,protein_coding -32340,RASSF10,ENSG00000189431,protein_coding -1129,GJB4,ENSG00000189433,protein_coding -5426,OR2L13,ENSG00000196071,protein_coding -36903,BLOC1S2,ENSG00000196072,protein_coding -54591,SYCP2,ENSG00000196074,protein_coding -56013,ZNF724,ENSG00000196081,protein_coding -12647,IL1RAP,ENSG00000196083,protein_coding -54181,PTPRT,ENSG00000196090,protein_coding -39709,MYBPC1,ENSG00000196091,protein_coding -30059,PAX5,ENSG00000196092,protein_coding -11106,OR5K4,ENSG00000196098,protein_coding -34907,OR6M1,ENSG00000196099,protein_coding -15176,SPOCK3,ENSG00000196104,protein_coding -55959,ZNF676,ENSG00000196109,protein_coding -55250,ZNF699,ENSG00000196110,protein_coding -30893,TDRD7,ENSG00000196116,protein_coding -47558,CCDC189,ENSG00000196118,protein_coding -34953,OR8A1,ENSG00000196119,protein_coding -48295,KIAA0895L,ENSG00000196123,protein_coding -19418,HLA-DRB1,ENSG00000196126,protein_coding -57295,VN1R2,ENSG00000196131,protein_coding -54744,MYT1,ENSG00000196132,protein_coding -43615,SERPINA3,ENSG00000196136,protein_coding -35232,AKR1C3,ENSG00000196139,protein_coding -8785,SPATS2L,ENSG00000196141,protein_coding -29454,ZNF250,ENSG00000196150,protein_coding -8223,WDSUB1,ENSG00000196151,protein_coding -31372,ZNF79,ENSG00000196152,protein_coding -3298,S100A4,ENSG00000196154,protein_coding -48306,PLEKHG4,ENSG00000196155,protein_coding -50579,KRTAP4-3,ENSG00000196156,protein_coding -14635,FAT4,ENSG00000196159,protein_coding -27810,C8orf86,ENSG00000196166,protein_coding -51634,KIF19,ENSG00000196169,protein_coding -3561,OR6K2,ENSG00000196171,protein_coding -56033,ZNF681,ENSG00000196172,protein_coding -37289,ACADSB,ENSG00000196177,protein_coding -1181,STK40,ENSG00000196182,protein_coding -3589,OR10J1,ENSG00000196184,protein_coding -4884,TMEM63A,ENSG00000196187,protein_coding -4525,CTSE,ENSG00000196188,protein_coding -3458,SEMA4A,ENSG00000196189,protein_coding -30037,HRCT1,ENSG00000196196,protein_coding -40596,MPHOSPH8,ENSG00000196199,protein_coding -5664,GREB1,ENSG00000196208,protein_coding -53337,SIRPB2,ENSG00000196209,protein_coding -57244,ZNF766,ENSG00000196214,protein_coding -56421,RYR1,ENSG00000196218,protein_coding -9750,SRGAP3,ENSG00000196220,protein_coding -31888,KRTAP5-3,ENSG00000196224,protein_coding -54592,FAM217B,ENSG00000196227,protein_coding -7429,SULT1C3,ENSG00000196228,protein_coding -19244,TUBB,ENSG00000196230,protein_coding -36825,LCOR,ENSG00000196233,protein_coding -56477,SUPT5H,ENSG00000196235,protein_coding -59354,XPNPEP3,ENSG00000196236,protein_coding -5440,OR2T2,ENSG00000196240,protein_coding -5388,OR2C3,ENSG00000196242,protein_coding -22722,ZNF107,ENSG00000196247,protein_coding -34914,OR10S1,ENSG00000196248,protein_coding -19260,SFTA2,ENSG00000196260,protein_coding -22344,PPIA,ENSG00000196262,protein_coding -57590,ZNF471,ENSG00000196263,protein_coding -57237,ZNF836,ENSG00000196267,protein_coding -55926,ZNF493,ENSG00000196268,protein_coding -22931,GTF2IRD2,ENSG00000196275,protein_coding -9731,GRM7,ENSG00000196277,protein_coding -19807,SUPT3H,ENSG00000196284,protein_coding -5265,BECN2,ENSG00000196289,protein_coding -8805,NIF3L1,ENSG00000196290,protein_coding -47407,ATP2A1,ENSG00000196296,protein_coding -30747,IARS,ENSG00000196305,protein_coding -30868,MFSD14C,ENSG00000196312,protein_coding -22861,POM121,ENSG00000196313,protein_coding -35078,ZBTB44,ENSG00000196323,protein_coding -24399,GIMAP5,ENSG00000196329,protein_coding -21928,STK31,ENSG00000196335,protein_coding -56997,CGB7,ENSG00000196337,protein_coding -25607,NLGN3,ENSG00000196338,protein_coding -34934,OR8D1,ENSG00000196341,protein_coding -14288,ADH7,ENSG00000196344,protein_coding -10326,ZKSCAN7,ENSG00000196345,protein_coding -55964,ZNF729,ENSG00000196350,protein_coding -4570,CD55,ENSG00000196352,protein_coding -11704,CPNE4,ENSG00000196353,protein_coding -56328,ZNF565,ENSG00000196357,protein_coding -31546,NTNG2,ENSG00000196358,protein_coding -55371,ELAVL3,ENSG00000196361,protein_coding -31605,WDR5,ENSG00000196363,protein_coding -55063,LONP1,ENSG00000196365,protein_coding -23293,TRRAP,ENSG00000196367,protein_coding -25317,NUDT11,ENSG00000196368,protein_coding -2924,SRGAP2B,ENSG00000196369,protein_coding -34343,FUT4,ENSG00000196371,protein_coding -35258,ASB13,ENSG00000196372,protein_coding -20723,SLC35F1,ENSG00000196376,protein_coding -29446,ZNF34,ENSG00000196378,protein_coding -56387,ZNF781,ENSG00000196381,protein_coding -40525,ZNF140,ENSG00000196387,protein_coding -49214,INCA1,ENSG00000196388,protein_coding -46170,ZNF774,ENSG00000196391,protein_coding -54419,PTPN1,ENSG00000196396,protein_coding -43716,EVL,ENSG00000196405,protein_coding -26655,SPANXD,ENSG00000196406,protein_coding -3211,THEM5,ENSG00000196407,protein_coding -46611,NOXO1,ENSG00000196408,protein_coding -23402,EPHB4,ENSG00000196411,protein_coding -54807,PRTN3,ENSG00000196415,protein_coding -57305,ZNF765,ENSG00000196417,protein_coding -5369,ZNF124,ENSG00000196418,protein_coding -59386,XRCC6,ENSG00000196419,protein_coding -3297,S100A5,ENSG00000196420,protein_coding -54735,C20orf204,ENSG00000196421,protein_coding -31637,PPP1R26,ENSG00000196422,protein_coding -2505,NBPF4,ENSG00000196427,protein_coding -12005,TSC22D2,ENSG00000196428,protein_coding -58887,CRYBA4,ENSG00000196431,protein_coding -24583,ASMT,ENSG00000196433,protein_coding -48567,NPIPB15,ENSG00000196436,protein_coding -56377,ZNF569,ENSG00000196437,protein_coding -25968,ARMCX4,ENSG00000196440,protein_coding -1219,YRDC,ENSG00000196449,protein_coding -24355,ZNF777,ENSG00000196453,protein_coding -11682,PIK3R4,ENSG00000196455,protein_coding -24381,ZNF775,ENSG00000196456,protein_coding -40516,ZNF605,ENSG00000196458,protein_coding -24717,TRAPPC2,ENSG00000196459,protein_coding -7312,RFX8,ENSG00000196460,protein_coding -38931,MYL6B,ENSG00000196465,protein_coding -55431,ZNF799,ENSG00000196466,protein_coding -25757,FGF16,ENSG00000196468,protein_coding -47900,SIAH1,ENSG00000196470,protein_coding -14045,GK2,ENSG00000196475,protein_coding -53301,C20orf96,ENSG00000196476,protein_coding -4731,ESRRG,ENSG00000196482,protein_coding -42367,IPO4,ENSG00000196497,protein_coding -40279,NCOR2,ENSG00000196498,protein_coding -47387,SULT1A1,ENSG00000196502,protein_coding -13691,ARL9,ENSG00000196503,protein_coding -8143,PRPF40A,ENSG00000196504,protein_coding -2783,GDAP2,ENSG00000196505,protein_coding -26052,TCEAL3,ENSG00000196507,protein_coding -39895,ANAPC7,ENSG00000196510,protein_coding -24294,TPK1,ENSG00000196511,protein_coding -1427,SLC6A9,ENSG00000196517,protein_coding -13051,AFAP1,ENSG00000196526,protein_coding -38968,NACA,ENSG00000196531,protein_coding -50054,MYO18A,ENSG00000196535,protein_coding -5441,OR2T3,ENSG00000196539,protein_coding -12197,SPTSSB,ENSG00000196542,protein_coding -49395,BORCS6,ENSG00000196544,protein_coding -46189,MAN2A2,ENSG00000196547,protein_coding -12080,MME,ENSG00000196549,protein_coding -4534,FAM72A,ENSG00000196550,protein_coding -43079,CCDC196,ENSG00000196553,protein_coding -46532,CACNA1H,ENSG00000196557,protein_coding -54348,SULF2,ENSG00000196562,protein_coding -32090,HBG2,ENSG00000196565,protein_coding -20837,LAMA2,ENSG00000196569,protein_coding -18319,PFN3,ENSG00000196570,protein_coding -59614,PLXNB2,ENSG00000196576,protein_coding -11091,OR5AC2,ENSG00000196578,protein_coding -219,AJAP1,ENSG00000196581,protein_coding -24461,XRCC2,ENSG00000196584,protein_coding -20177,MYO6,ENSG00000196586,protein_coding -59341,MRTFA,ENSG00000196588,protein_coding -55135,MBD3L2B,ENSG00000196589,protein_coding -20668,HDAC2,ENSG00000196591,protein_coding -30867,ZNF782,ENSG00000196597,protein_coding -33412,SLC22A25,ENSG00000196600,protein_coding -7792,POTEF,ENSG00000196604,protein_coding -55273,ZNF846,ENSG00000196605,protein_coding -34438,MMP1,ENSG00000196611,protein_coding -14286,ADH1B,ENSG00000196616,protein_coding -13825,UGT2B15,ENSG00000196620,protein_coding -52915,TCF4,ENSG00000196628,protein_coding -25415,WNK3,ENSG00000196632,protein_coding -23258,SDHAF3,ENSG00000196636,protein_coding -9817,HRH1,ENSG00000196639,protein_coding -31703,RABL6,ENSG00000196642,protein_coding -55421,ZNF136,ENSG00000196646,protein_coding -23314,ZKSCAN5,ENSG00000196652,protein_coding -10335,ZNF502,ENSG00000196653,protein_coding -34790,TRAPPC4,ENSG00000196655,protein_coding -8510,TTC30B,ENSG00000196659,protein_coding -4760,SLC30A10,ENSG00000196660,protein_coding -43891,TECPR2,ENSG00000196663,protein_coding -24699,TLR7,ENSG00000196664,protein_coding -32877,FAM180B,ENSG00000196666,protein_coding -18445,ZFP62,ENSG00000196670,protein_coding -47196,ERI2,ENSG00000196678,protein_coding -21898,TOMM7,ENSG00000196683,protein_coding -55650,HSH2D,ENSG00000196684,protein_coding -49135,TRPV1,ENSG00000196689,protein_coding -35840,ZNF33B,ENSG00000196693,protein_coding -54732,ZNF512B,ENSG00000196700,protein_coding -51543,AMZ2,ENSG00000196704,protein_coding -55913,ZNF431,ENSG00000196705,protein_coding -28023,ALKAL1,ENSG00000196711,protein_coding -50157,NF1,ENSG00000196712,protein_coding -22769,VKORC1L1,ENSG00000196715,protein_coding -57672,ZNF418,ENSG00000196724,protein_coding -30648,DAPK1,ENSG00000196730,protein_coding -3256,LCE1B,ENSG00000196734,protein_coding -19419,HLA-DQA1,ENSG00000196735,protein_coding -31171,COL27A1,ENSG00000196739,protein_coding -17918,GM2A,ENSG00000196743,protein_coding -19031,HIST1H2AI,ENSG00000196747,protein_coding -19557,CLPSL2,ENSG00000196748,protein_coding -3300,S100A2,ENSG00000196754,protein_coding -55399,ZNF700,ENSG00000196757,protein_coding -25811,POU3F4,ENSG00000196767,protein_coding -5404,OR14A16,ENSG00000196772,protein_coding -11226,CD47,ENSG00000196776,protein_coding -32028,OR52K1,ENSG00000196778,protein_coding -30561,TLE1,ENSG00000196781,protein_coding -14790,MAML3,ENSG00000196782,protein_coding -19000,HIST1H2AG,ENSG00000196787,protein_coding -42471,STRN3,ENSG00000196792,protein_coding -35872,ZNF239,ENSG00000196793,protein_coding -17832,SPINK14,ENSG00000196800,protein_coding -3272,SPRR2B,ENSG00000196805,protein_coding -9434,CHRNG,ENSG00000196811,protein_coding -19064,ZSCAN16,ENSG00000196812,protein_coding -31353,MVB12B,ENSG00000196814,protein_coding -19518,ILRUN,ENSG00000196821,protein_coding -55437,AC008758.1,ENSG00000196826,protein_coding -42031,OR11G2,ENSG00000196832,protein_coding -7827,POTEI,ENSG00000196834,protein_coding -54229,ADA,ENSG00000196839,protein_coding -7199,ARID5A,ENSG00000196843,protein_coding -34996,PATE2,ENSG00000196844,protein_coding -39906,PPTC7,ENSG00000196850,protein_coding -50554,KRT39,ENSG00000196859,protein_coding -42925,TOMM20L,ENSG00000196860,protein_coding -7419,RGPD4,ENSG00000196862,protein_coding -37135,NHLRC2,ENSG00000196865,protein_coding -18951,HIST1H2AD,ENSG00000196866,protein_coding -57593,ZFP28,ENSG00000196867,protein_coding -7264,KIAA1211L,ENSG00000196872,protein_coding -30407,CBWD3,ENSG00000196873,protein_coding -38668,SCN8A,ENSG00000196876,protein_coding -4610,LAMB3,ENSG00000196878,protein_coding -4978,HIST3H2BB,ENSG00000196890,protein_coding -17300,TEX43,ENSG00000196900,protein_coding -20704,KPNA5,ENSG00000196911,protein_coding -7240,ANKRD36B,ENSG00000196912,protein_coding -34843,ARHGEF12,ENSG00000196914,protein_coding -40215,HCAR1,ENSG00000196917,protein_coding -18326,PDLIM7,ENSG00000196923,protein_coding -26898,FLNA,ENSG00000196924,protein_coding -36143,TMEM26,ENSG00000196932,protein_coding -39117,SRGAP1,ENSG00000196935,protein_coding -23720,FAM3C,ENSG00000196937,protein_coding -42387,NOP9,ENSG00000196943,protein_coding -5436,OR2T4,ENSG00000196944,protein_coding -37754,ZNF705A,ENSG00000196946,protein_coding -8724,SLC39A10,ENSG00000196950,protein_coding -34469,CASP4,ENSG00000196954,protein_coding -57058,AP2A1,ENSG00000196961,protein_coding -56365,ZNF585A,ENSG00000196967,protein_coding -36381,FUT11,ENSG00000196968,protein_coding -26532,SMIM10L2B,ENSG00000196972,protein_coding -6615,ANXA4,ENSG00000196975,protein_coding -26912,LAGE3,ENSG00000196976,protein_coding -11463,WDR5B,ENSG00000196981,protein_coding -31594,FAM163B,ENSG00000196990,protein_coding -47395,NPIPB9,ENSG00000196993,protein_coding -25249,WDR45,ENSG00000196998,protein_coding -47223,METTL9,ENSG00000197006,protein_coding -22727,ZNF138,ENSG00000197008,protein_coding -55930,ZNF429,ENSG00000197013,protein_coding -57595,ZNF470,ENSG00000197016,protein_coding -56532,SERTAD1,ENSG00000197019,protein_coding -55941,ZNF100,ENSG00000197020,protein_coding -26768,CXorf40B,ENSG00000197021,protein_coding -24344,ZNF398,ENSG00000197024,protein_coding -23319,ZSCAN25,ENSG00000197037,protein_coding -17914,ANXA6,ENSG00000197043,protein_coding -55391,ZNF441,ENSG00000197044,protein_coding -42843,GMFB,ENSG00000197045,protein_coding -52755,SIGLEC15,ENSG00000197046,protein_coding -56360,ZNF420,ENSG00000197050,protein_coding -55402,ZNF763,ENSG00000197054,protein_coding -1140,ZMYM1,ENSG00000197056,protein_coding -13380,DTHD1,ENSG00000197057,protein_coding -18938,HIST1H4C,ENSG00000197061,protein_coding -19075,ZSCAN26,ENSG00000197062,protein_coding -51975,MAFG,ENSG00000197063,protein_coding -31771,ARRDC1,ENSG00000197070,protein_coding -58836,KIAA1671,ENSG00000197077,protein_coding -50610,KRT35,ENSG00000197079,protein_coding -21361,IGF2R,ENSG00000197081,protein_coding -3255,LCE1C,ENSG00000197084,protein_coding -23360,GAL3ST4,ENSG00000197093,protein_coding -43882,DYNC1H1,ENSG00000197102,protein_coding -2579,SLC6A17,ENSG00000197106,protein_coding -56459,IFNL3,ENSG00000197110,protein_coding -38772,PCBP2,ENSG00000197111,protein_coding -54708,ZGPAT,ENSG00000197114,protein_coding -43728,SLC25A29,ENSG00000197119,protein_coding -8741,PGAP1,ENSG00000197121,protein_coding -54088,SRC,ENSG00000197122,protein_coding -22704,ZNF679,ENSG00000197123,protein_coding -55861,ZNF682,ENSG00000197124,protein_coding -34946,OR8B8,ENSG00000197125,protein_coding -57641,ZNF772,ENSG00000197128,protein_coding -55952,ZNF257,ENSG00000197134,protein_coding -33564,PCNX3,ENSG00000197136,protein_coding -27824,ADAM32,ENSG00000197140,protein_coding -37110,ACSL5,ENSG00000197142,protein_coding -2238,LRRC8B,ENSG00000197147,protein_coding -24408,ABCB8,ENSG00000197150,protein_coding -19044,HIST1H3J,ENSG00000197153,protein_coding -23801,SND1,ENSG00000197157,protein_coding -47540,ZNF785,ENSG00000197162,protein_coding -47384,SULT1A2,ENSG00000197165,protein_coding -41225,NEK5,ENSG00000197168,protein_coding -51508,PSMD12,ENSG00000197170,protein_coding -26827,MAGEA6,ENSG00000197172,protein_coding -37426,ADGRA1,ENSG00000197177,protein_coding -27481,PIWIL2,ENSG00000197181,protein_coding -53921,NOL4L,ENSG00000197183,protein_coding -31750,CYSRT1,ENSG00000197191,protein_coding -17379,SLC22A4,ENSG00000197208,protein_coding -57562,ZSCAN5B,ENSG00000197213,protein_coding -27523,ENTPD4,ENSG00000197217,protein_coding -6577,C1D,ENSG00000197223,protein_coding -18414,TBC1D9B,ENSG00000197226,protein_coding -31267,OR1J2,ENSG00000197233,protein_coding -19035,HIST1H4J,ENSG00000197238,protein_coding -313,SLC2A7,ENSG00000197241,protein_coding -850,FAM110D,ENSG00000197245,protein_coding -43605,SERPINA1,ENSG00000197249,protein_coding -46538,TPSB2,ENSG00000197253,protein_coding -55351,KANK2,ENSG00000197256,protein_coding -19865,C6orf141,ENSG00000197261,protein_coding -27677,GTF2E2,ENSG00000197265,protein_coding -47379,IL27,ENSG00000197272,protein_coding -1352,GUCA2A,ENSG00000197273,protein_coding -28645,RAD54B,ENSG00000197275,protein_coding -19061,ZNF165,ENSG00000197279,protein_coding -19483,SYNGAP1,ENSG00000197283,protein_coding -54216,FITM2,ENSG00000197296,protein_coding -46182,BLM,ENSG00000197299,protein_coding -47640,ZNF720,ENSG00000197302,protein_coding -34926,OR10D3,ENSG00000197309,protein_coding -517,DDI2,ENSG00000197312,protein_coding -35653,SVIL,ENSG00000197321,protein_coding -2702,TRIM33,ENSG00000197323,protein_coding -42285,LRP10,ENSG00000197324,protein_coding -6505,PELI1,ENSG00000197329,protein_coding -23316,ZNF655,ENSG00000197343,protein_coding -33740,MRPL21,ENSG00000197345,protein_coding -29313,LYPD2,ENSG00000197353,protein_coding -31733,UAP1L1,ENSG00000197355,protein_coding -55968,ZNF98,ENSG00000197360,protein_coding -45421,FBXL22,ENSG00000197361,protein_coding -24341,ZNF786,ENSG00000197362,protein_coding -29450,ZNF517,ENSG00000197363,protein_coding -3292,S100A7L2,ENSG00000197364,protein_coding -56029,ZNF675,ENSG00000197372,protein_coding -17382,SLC22A5,ENSG00000197375,protein_coding -56866,DACT3,ENSG00000197380,protein_coding -60465,ADARB1,ENSG00000197381,protein_coding -10079,ZNF860,ENSG00000197385,protein_coding -12955,HTT,ENSG00000197386,protein_coding -3565,OR6N1,ENSG00000197403,protein_coding -56893,C5AR1,ENSG00000197405,protein_coding -43870,DIO3,ENSG00000197406,protein_coding -56564,CYP2B6,ENSG00000197408,protein_coding -18950,HIST1H3D,ENSG00000197409,protein_coding -15005,DCHS2,ENSG00000197410,protein_coding -12133,VEPH1,ENSG00000197415,protein_coding -28469,FABP12,ENSG00000197416,protein_coding -49139,SHPK,ENSG00000197417,protein_coding -32035,OR51D1,ENSG00000197428,protein_coding -1496,IPP,ENSG00000197429,protein_coding -36815,OPALIN,ENSG00000197430,protein_coding -5397,OR13G1,ENSG00000197437,protein_coding -20977,MAP3K5,ENSG00000197442,protein_coding -36035,OGDHL,ENSG00000197444,protein_coding -56569,CYP2F1,ENSG00000197446,protein_coding -24226,GSTK1,ENSG00000197448,protein_coding -18355,HNRNPAB,ENSG00000197451,protein_coding -5420,OR2L5,ENSG00000197454,protein_coding -54703,STMN3,ENSG00000197457,protein_coding -21564,PDGFA,ENSG00000197461,protein_coding -14843,GYPE,ENSG00000197465,protein_coding -36268,COL13A1,ENSG00000197467,protein_coding -47455,SPN,ENSG00000197471,protein_coding -5359,ZNF695,ENSG00000197472,protein_coding -17688,PCDHB11,ENSG00000197479,protein_coding -57524,ZNF628,ENSG00000197483,protein_coding -57561,GALP,ENSG00000197487,protein_coding -54329,SLC2A10,ENSG00000197496,protein_coding -57289,ZNF665,ENSG00000197497,protein_coding -20614,RPF2,ENSG00000197498,protein_coding -30608,SLC28A3,ENSG00000197506,protein_coding -4824,FAM177B,ENSG00000197520,protein_coding -127,MIB2,ENSG00000197530,protein_coding -3555,OR6Y1,ENSG00000197532,protein_coding -45206,MYO5A,ENSG00000197535,protein_coding -54785,GZMM,ENSG00000197540,protein_coding -9820,ATG7,ENSG00000197548,protein_coding -43197,SIPA1L1,ENSG00000197555,protein_coding -8512,TTC30A,ENSG00000197557,protein_coding -24360,SSPO,ENSG00000197558,protein_coding -54808,ELANE,ENSG00000197561,protein_coding -46485,RAB40C,ENSG00000197562,protein_coding -53033,PIGN,ENSG00000197563,protein_coding -26123,COL4A6,ENSG00000197565,protein_coding -49605,ZNF624,ENSG00000197566,protein_coding -1978,HHLA3,ENSG00000197568,protein_coding -21990,HOXA4,ENSG00000197576,protein_coding -29869,TOPORS,ENSG00000197579,protein_coding -34606,BCO2,ENSG00000197580,protein_coding -12392,KCNMB2,ENSG00000197584,protein_coding -53795,ENTPD6,ENSG00000197586,protein_coding -1518,DMBX1,ENSG00000197587,protein_coding -5407,OR11L1,ENSG00000197591,protein_coding -20868,ENPP1,ENSG00000197594,protein_coding -46559,CCDC154,ENSG00000197599,protein_coding -32351,FAR1,ENSG00000197601,protein_coding -16071,CPLANE1,ENSG00000197603,protein_coding -57231,ZNF841,ENSG00000197608,protein_coding -37789,MFAP5,ENSG00000197614,protein_coding -42322,MYH6,ENSG00000197616,protein_coding -57226,ZNF615,ENSG00000197619,protein_coding -26750,CXorf40A,ENSG00000197620,protein_coding -3158,CDC42SE1,ENSG00000197622,protein_coding -33203,MPEG1,ENSG00000197629,protein_coding -53066,SERPINB2,ENSG00000197632,protein_coding -8266,DPP4,ENSG00000197635,protein_coding -53061,SERPINB13,ENSG00000197641,protein_coding -29564,PDCD1LG2,ENSG00000197646,protein_coding -55406,ZNF433,ENSG00000197647,protein_coding -39505,CCER1,ENSG00000197651,protein_coding -40264,DNAH10,ENSG00000197653,protein_coding -33409,SLC22A24,ENSG00000197658,protein_coding -32039,OR51C1P,ENSG00000197674,protein_coding -60050,KRTAP26-1,ENSG00000197683,protein_coding -31437,SPTAN1,ENSG00000197694,protein_coding -46025,NMB,ENSG00000197696,protein_coding -32330,PARVA,ENSG00000197702,protein_coding -52633,KLHL14,ENSG00000197705,protein_coding -38861,OR6C74,ENSG00000197706,protein_coding -13417,FAM114A1,ENSG00000197712,protein_coding -9022,RPE,ENSG00000197713,protein_coding -57622,ZNF460,ENSG00000197714,protein_coding -4575,CR1L,ENSG00000197721,protein_coding -30785,PHF2,ENSG00000197724,protein_coding -38920,RPS26,ENSG00000197728,protein_coding -43389,C14orf178,ENSG00000197734,protein_coding -33340,SCGB1D4,ENSG00000197745,protein_coding -36303,PSAP,ENSG00000197746,protein_coding -3216,S100A10,ENSG00000197747,protein_coding -37031,CFAP43,ENSG00000197748,protein_coding -19560,LHFPL5,ENSG00000197753,protein_coding -9098,RPL37A,ENSG00000197756,protein_coding -38803,HOXC6,ENSG00000197757,protein_coding -11564,TXNRD3,ENSG00000197763,protein_coding -54809,CFD,ENSG00000197766,protein_coding -31756,STPG3,ENSG00000197768,protein_coding -5267,MAP1LC3C,ENSG00000197769,protein_coding -37236,MCMBP,ENSG00000197771,protein_coding -46588,EME2,ENSG00000197774,protein_coding -42728,KLHDC1,ENSG00000197776,protein_coding -25172,ZNF81,ENSG00000197779,protein_coding -2532,TAF13,ENSG00000197780,protein_coding -56514,ZNF780A,ENSG00000197782,protein_coding -119,ATAD3A,ENSG00000197785,protein_coding -33169,OR5B17,ENSG00000197786,protein_coding -32030,OR52M1,ENSG00000197790,protein_coding -35014,FAM118B,ENSG00000197798,protein_coding -56346,ZNF461,ENSG00000197808,protein_coding -30889,CCDC180,ENSG00000197816,protein_coding -54396,SLC9A8,ENSG00000197818,protein_coding -16504,OCLN,ENSG00000197822,protein_coding -14058,CFAP299,ENSG00000197826,protein_coding -37996,HIST4H4,ENSG00000197837,protein_coding -56568,CYP2A13,ENSG00000197838,protein_coding -56213,ZNF181,ENSG00000197841,protein_coding -34931,OR8G1,ENSG00000197849,protein_coding -2642,INKA2,ENSG00000197852,protein_coding -55423,ZNF44,ENSG00000197857,protein_coding -29395,GPAA1,ENSG00000197858,protein_coding -31593,ADAMTSL2,ENSG00000197859,protein_coding -16435,SGTB,ENSG00000197860,protein_coding -56354,ZNF790,ENSG00000197863,protein_coding -37914,PRB3,ENSG00000197870,protein_coding -5727,FAM49A,ENSG00000197872,protein_coding -49037,MYO1C,ENSG00000197879,protein_coding -9996,NKIRAS1,ENSG00000197885,protein_coding -33159,OR1S2,ENSG00000197887,protein_coding -13821,UGT2B17,ENSG00000197888,protein_coding -35415,MEIG1,ENSG00000197889,protein_coding -33481,SLC22A12,ENSG00000197891,protein_coding -27638,KIF13B,ENSG00000197892,protein_coding -37128,NRAP,ENSG00000197893,protein_coding -14280,ADH5,ENSG00000197894,protein_coding -33404,SLC22A6,ENSG00000197901,protein_coding -19002,HIST1H2BK,ENSG00000197903,protein_coding -37559,TEAD4,ENSG00000197905,protein_coding -48954,SPG7,ENSG00000197912,protein_coding -3229,HRNR,ENSG00000197915,protein_coding -29765,IFNA1,ENSG00000197919,protein_coding -164,HES5,ENSG00000197921,protein_coding -57293,ZNF677,ENSG00000197928,protein_coding -42813,ERO1A,ENSG00000197930,protein_coding -55388,ZNF823,ENSG00000197933,protein_coding -19110,ZNF311,ENSG00000197935,protein_coding -57288,ZNF347,ENSG00000197937,protein_coding -11104,OR5H2,ENSG00000197938,protein_coding -48735,PLCG2,ENSG00000197943,protein_coding -17735,FCHSD1,ENSG00000197948,protein_coding -57597,ZNF71,ENSG00000197951,protein_coding -12036,AADACL2,ENSG00000197953,protein_coding -3296,S100A6,ENSG00000197956,protein_coding -31373,RPL12,ENSG00000197958,protein_coding -3929,DNM3,ENSG00000197959,protein_coding -55264,ZNF121,ENSG00000197961,protein_coding -3828,MPZL1,ENSG00000197965,protein_coding -30526,VPS13A,ENSG00000197969,protein_coding -53228,MBP,ENSG00000197971,protein_coding -24582,AKAP17A,ENSG00000197976,protein_coding -18701,ELOVL2,ENSG00000197977,protein_coding -45944,GOLGA6L9,ENSG00000197978,protein_coding -12118,LEKR1,ENSG00000197980,protein_coding -1220,C1orf122,ENSG00000197982,protein_coding -41316,AL592490.1,ENSG00000197991,protein_coding -37854,CLEC9A,ENSG00000197992,protein_coding -24216,KEL,ENSG00000197993,protein_coding -30750,NOL8,ENSG00000198000,protein_coding -38419,IRAK4,ENSG00000198001,protein_coding -55369,CCDC151,ENSG00000198003,protein_coding -27002,DLGAP2,ENSG00000198010,protein_coding -39552,MRPL42,ENSG00000198015,protein_coding -36883,ENTPD7,ENSG00000198018,protein_coding -2851,FCGR1B,ENSG00000198019,protein_coding -26652,SPANXA1,ENSG00000198021,protein_coding -54306,ZNF335,ENSG00000198026,protein_coding -55258,ZNF560,ENSG00000198028,protein_coding -40579,TUBA3C,ENSG00000198033,protein_coding -25646,RPS4X,ENSG00000198034,protein_coding -22733,AC092161.1,ENSG00000198039,protein_coding -40522,ZNF84,ENSG00000198040,protein_coding -27716,MAK16,ENSG00000198042,protein_coding -57586,ZNF667,ENSG00000198046,protein_coding -4529,AVPR1B,ENSG00000198049,protein_coding -53354,SIRPA,ENSG00000198053,protein_coding -18321,GRK6,ENSG00000198055,protein_coding -38970,PRIM1,ENSG00000198056,protein_coding -36731,MARCH5,ENSG00000198060,protein_coding -58291,POTEH,ENSG00000198062,protein_coding -47508,NPIPB13,ENSG00000198064,protein_coding -23963,AKR1B10,ENSG00000198074,protein_coding -7435,SULT1C4,ENSG00000198075,protein_coding -56559,CYP2A7,ENSG00000198077,protein_coding -52165,ZBTB14,ENSG00000198081,protein_coding -50589,KRTAP9-9,ENSG00000198083,protein_coding -19840,CD2AP,ENSG00000198087,protein_coding -26102,NUP62CL,ENSG00000198088,protein_coding -59058,SFI1,ENSG00000198089,protein_coding -50576,KRTAP4-6,ENSG00000198090,protein_coding -13808,TMPRSS11F,ENSG00000198092,protein_coding -57221,ZNF649,ENSG00000198093,protein_coding -14282,ADH4,ENSG00000198099,protein_coding -5437,OR2T6,ENSG00000198104,protein_coding -35792,ZNF248,ENSG00000198105,protein_coding -17341,CHSY3,ENSG00000198108,protein_coding -31758,TOR4A,ENSG00000198113,protein_coding -31095,LPAR1,ENSG00000198121,protein_coding -59147,MB,ENSG00000198125,protein_coding -5424,OR2L3,ENSG00000198128,protein_coding -27124,DEFB107B,ENSG00000198129,protein_coding -8667,HIBCH,ENSG00000198130,protein_coding -57693,ZNF544,ENSG00000198131,protein_coding -43092,TMEM229B,ENSG00000198133,protein_coding -7456,SOWAHC,ENSG00000198142,protein_coding -44738,ZNF770,ENSG00000198146,protein_coding -47368,NPIPB6,ENSG00000198156,protein_coding -25798,HMGN5,ENSG00000198157,protein_coding -1924,MIER1,ENSG00000198160,protein_coding -2774,MAN1A2,ENSG00000198162,protein_coding -32519,SVIP,ENSG00000198168,protein_coding -29445,ZNF251,ENSG00000198169,protein_coding -53398,DDRGK1,ENSG00000198171,protein_coding -24977,FAM47C,ENSG00000198173,protein_coding -41906,TFDP1,ENSG00000198176,protein_coding -37725,CLEC4C,ENSG00000198178,protein_coding -56390,ZNF607,ENSG00000198182,protein_coding -53948,BPIFA1,ENSG00000198183,protein_coding -54323,ZNF334,ENSG00000198185,protein_coding -14153,HSD17B11,ENSG00000198189,protein_coding -1404,SZT2,ENSG00000198198,protein_coding -7431,SULT1C2,ENSG00000198203,protein_coding -25478,ZXDA,ENSG00000198205,protein_coding -43294,RPS6KL1,ENSG00000198208,protein_coding -48974,AC092143.1,ENSG00000198211,protein_coding -4099,CACNA1E,ENSG00000198216,protein_coding -10485,QRICH1,ENSG00000198218,protein_coding -24573,CSF2RA,ENSG00000198223,protein_coding -20024,FKBP1C,ENSG00000198225,protein_coding -51408,DDX42,ENSG00000198231,protein_coding -50019,RPL23A,ENSG00000198242,protein_coding -36296,SLC29A3,ENSG00000198246,protein_coding -42816,STYX,ENSG00000198252,protein_coding -55280,UBL5,ENSG00000198258,protein_coding -51500,HELZ,ENSG00000198265,protein_coding -39940,TMEM116,ENSG00000198270,protein_coding -50577,KRTAP4-5,ENSG00000198271,protein_coding -54728,UCKL1,ENSG00000198276,protein_coding -33177,OR5B21,ENSG00000198283,protein_coding -21625,CARD11,ENSG00000198286,protein_coding -35875,ZNF485,ENSG00000198298,protein_coding -57605,PEG3,ENSG00000198300,protein_coding -13979,SDAD1,ENSG00000198301,protein_coding -19066,ZKSCAN8,ENSG00000198315,protein_coding -39921,PHETA1,ENSG00000198324,protein_coding -53385,TMEM239,ENSG00000198326,protein_coding -35000,HYLS1,ENSG00000198331,protein_coding -50916,MYL4,ENSG00000198336,protein_coding -55427,ZNF442,ENSG00000198342,protein_coding -57312,ZNF813,ENSG00000198346,protein_coding -38809,HOXC4,ENSG00000198353,protein_coding -26388,DCAF12L2,ENSG00000198354,protein_coding -59596,PIM3,ENSG00000198355,protein_coding -55467,ASNA1,ENSG00000198356,protein_coding -28161,ASPH,ENSG00000198363,protein_coding -6542,SPRED2,ENSG00000198369,protein_coding -48437,WWP2,ENSG00000198373,protein_coding -6607,GFPT1,ENSG00000198380,protein_coding -33990,UVRAG,ENSG00000198382,protein_coding -60056,KRTAP13-1,ENSG00000198390,protein_coding -40518,ZNF26,ENSG00000198393,protein_coding -12646,TMEM207,ENSG00000198398,protein_coding -5835,ITSN2,ENSG00000198399,protein_coding -3501,NTRK1,ENSG00000198400,protein_coding -36952,OGA,ENSG00000198408,protein_coding -48101,MT1F,ENSG00000198417,protein_coding -24261,TCAF1,ENSG00000198420,protein_coding -55398,ZNF69,ENSG00000198429,protein_coding -39771,TXNRD1,ENSG00000198431,protein_coding -31761,NRARP,ENSG00000198435,protein_coding -57585,ZNF583,ENSG00000198440,protein_coding -50581,KRTAP4-1,ENSG00000198443,protein_coding -58316,CCT8L2,ENSG00000198445,protein_coding -56359,ZNF568,ENSG00000198453,protein_coding -25475,ZXDB,ENSG00000198455,protein_coding -57247,ZNF480,ENSG00000198464,protein_coding -57667,ZNF587,ENSG00000198466,protein_coding -30018,TPM2,ENSG00000198467,protein_coding -12617,RTP2,ENSG00000198471,protein_coding -20214,SH3BGRL2,ENSG00000198478,protein_coding -57259,ZNF808,ENSG00000198482,protein_coding -2954,ANKRD35,ENSG00000198483,protein_coding -34020,B3GNT6,ENSG00000198488,protein_coding -982,YTHDF2,ENSG00000198492,protein_coding -15127,TMA16,ENSG00000198498,protein_coding -19415,HLA-DRB5,ENSG00000198502,protein_coding -42759,ATL1,ENSG00000198513,protein_coding -13558,CNGA1,ENSG00000198515,protein_coding -21593,MAFK,ENSG00000198517,protein_coding -1447,ARMH1,ENSG00000198520,protein_coding -55945,ZNF43,ENSG00000198521,protein_coding -5942,GPN1,ENSG00000198522,protein_coding -20728,PLN,ENSG00000198523,protein_coding -45385,C2CD4A,ENSG00000198535,protein_coding -57269,ZNF28,ENSG00000198538,protein_coding -41736,ITGBL1,ENSG00000198542,protein_coding -37438,ZNF511,ENSG00000198546,protein_coding -53928,C20orf203,ENSG00000198547,protein_coding -55381,ZNF627,ENSG00000198551,protein_coding -41173,KCNRG,ENSG00000198553,protein_coding -42851,WDHD1,ENSG00000198554,protein_coding -23312,ZNF789,ENSG00000198556,protein_coding -33141,CTNND1,ENSG00000198561,protein_coding -19311,DDX39B,ENSG00000198563,protein_coding -31752,SLC34A3,ENSG00000198569,protein_coding -4646,RD3,ENSG00000198570,protein_coding -26649,SPANXC,ENSG00000198573,protein_coding -3728,SH2D1B,ENSG00000198574,protein_coding -29303,ARC,ENSG00000198576,protein_coding -11698,NUDT16,ENSG00000198585,protein_coding -8370,TLK1,ENSG00000198586,protein_coding -14935,LRBA,ENSG00000198589,protein_coding -56114,ZNF536,ENSG00000198597,protein_coding -40459,MMP17,ENSG00000198598,protein_coding -5429,OR2M2,ENSG00000198601,protein_coding -42532,BAZ1A,ENSG00000198604,protein_coding -35236,AKR1C4,ENSG00000198610,protein_coding -9525,COPS8,ENSG00000198612,protein_coding -17917,CCDC69,ENSG00000198624,protein_coding -4478,MDM4,ENSG00000198625,protein_coding -5196,RYR2,ENSG00000198626,protein_coding -57255,ZNF534,ENSG00000198633,protein_coding -29755,KLHL9,ENSG00000198642,protein_coding -10756,FAM3D,ENSG00000198643,protein_coding -53997,NCOA6,ENSG00000198646,protein_coding -8320,STK39,ENSG00000198648,protein_coding -48500,TAT,ENSG00000198650,protein_coding -19592,C6orf89,ENSG00000198663,protein_coding -43521,CALM1,ENSG00000198668,protein_coding -21371,LPA,ENSG00000198670,protein_coding -39078,TAFA2,ENSG00000198673,protein_coding -34915,OR10G6,ENSG00000198674,protein_coding -16917,TTC37,ENSG00000198677,protein_coding -38535,OR5BS1P,ENSG00000198678,protein_coding -29809,TUSC1,ENSG00000198680,protein_coding -26840,MAGEA1,ENSG00000198681,protein_coding -36634,PAPSS2,ENSG00000198682,protein_coding -26573,SLC9A6,ENSG00000198689,protein_coding -44641,FAN1,ENSG00000198690,protein_coding -2336,ABCA4,ENSG00000198691,protein_coding -58054,EIF1AY,ENSG00000198692,protein_coding -60553,MT-ND6,ENSG00000198695,protein_coding -4380,IPO9,ENSG00000198700,protein_coding -19089,GPX6,ENSG00000198704,protein_coding -39466,CEP290,ENSG00000198707,protein_coding -60539,MT-CO2,ENSG00000198712,protein_coding -3467,GLMP,ENSG00000198715,protein_coding -42674,TOGARAM1,ENSG00000198718,protein_coding -21528,DLL1,ENSG00000198719,protein_coding -50074,ANKRD13B,ENSG00000198720,protein_coding -18579,ECI2,ENSG00000198721,protein_coding -29997,UNC13B,ENSG00000198722,protein_coding -55147,TEX45,ENSG00000198723,protein_coding -60555,MT-CYB,ENSG00000198727,protein_coding -36961,LDB1,ENSG00000198728,protein_coding -21191,PPP1R14C,ENSG00000198729,protein_coding -32299,CTR9,ENSG00000198730,protein_coding -43156,SMOC1,ENSG00000198732,protein_coding -3872,F5,ENSG00000198734,protein_coding -46599,MSRB1,ENSG00000198736,protein_coding -36192,LRRTM3,ENSG00000198739,protein_coding -51023,ZNF652,ENSG00000198740,protein_coding -23297,SMURF1,ENSG00000198742,protein_coding -60168,SLC5A3,ENSG00000198743,protein_coding -889,GPATCH3,ENSG00000198746,protein_coding -43904,CDC42BPB,ENSG00000198752,protein_coding -26867,PLXNB3,ENSG00000198753,protein_coding -1278,OXCT2,ENSG00000198754,protein_coding -19543,RPL10A,ENSG00000198755,protein_coding -4153,COLGALT2,ENSG00000198756,protein_coding -2567,EPS8L3,ENSG00000198758,protein_coding -24711,EGFL6,ENSG00000198759,protein_coding -60530,MT-ND2,ENSG00000198763,protein_coding -2716,SYCP1,ENSG00000198765,protein_coding -54550,APCDD1L,ENSG00000198768,protein_coding -3827,RCSD1,ENSG00000198771,protein_coding -39448,RASSF9,ENSG00000198774,protein_coding -16605,FAM169A,ENSG00000198780,protein_coding -50273,ZNF830,ENSG00000198783,protein_coding -30977,GRIN3A,ENSG00000198785,protein_coding -60552,MT-ND5,ENSG00000198786,protein_coding -31871,MUC2,ENSG00000198788,protein_coding -27387,CNOT7,ENSG00000198791,protein_coding -59258,TMEM184B,ENSG00000198792,protein_coding -380,MTOR,ENSG00000198793,protein_coding -45723,SCAMP5,ENSG00000198794,protein_coding -52526,ZNF521,ENSG00000198795,protein_coding -52967,ALPK2,ENSG00000198796,protein_coding -4017,BRINP2,ENSG00000198797,protein_coding -24920,MAGEB3,ENSG00000198798,protein_coding -2677,LRIG2,ENSG00000198799,protein_coding -60536,MT-CO1,ENSG00000198804,protein_coding -42055,PNP,ENSG00000198805,protein_coding -42588,PAX9,ENSG00000198807,protein_coding -39258,LRRC10,ENSG00000198812,protein_coding -24928,GK,ENSG00000198814,protein_coding -1353,FOXJ3,ENSG00000198815,protein_coding -55149,ZNF358,ENSG00000198816,protein_coding -21433,SFT2D1,ENSG00000198818,protein_coding -3819,CD247,ENSG00000198821,protein_coding -23099,GRM3,ENSG00000198822,protein_coding -41930,CHAMP1,ENSG00000198824,protein_coding -37232,INPP5F,ENSG00000198825,protein_coding -44683,ARHGAP11A,ENSG00000198826,protein_coding -12042,SUCNR1,ENSG00000198829,protein_coding -870,HMGN2,ENSG00000198830,protein_coding -59037,SELENOM,ENSG00000198832,protein_coding -20359,UBE2J1,ENSG00000198833,protein_coding -4960,GJC2,ENSG00000198835,protein_coding -12681,OPA1,ENSG00000198836,protein_coding -3326,DENND4B,ENSG00000198837,protein_coding -44696,RYR3,ENSG00000198838,protein_coding -23606,ZNF277,ENSG00000198839,protein_coding -60545,MT-ND3,ENSG00000198840,protein_coding -1637,KTI12,ENSG00000198841,protein_coding -3813,DUSP27,ENSG00000198842,protein_coding -12008,SELENOT,ENSG00000198843,protein_coding -49406,ARHGEF15,ENSG00000198844,protein_coding -28129,TOX,ENSG00000198846,protein_coding -48070,CES1,ENSG00000198848,protein_coding -34743,CD3E,ENSG00000198851,protein_coding -30001,RUSC2,ENSG00000198853,protein_coding -3248,C1orf68,ENSG00000198854,protein_coding -39838,FICD,ENSG00000198855,protein_coding -14405,OSTC,ENSG00000198856,protein_coding -54812,R3HDM4,ENSG00000198858,protein_coding -4154,TSEN15,ENSG00000198860,protein_coding -60019,LTN1,ENSG00000198862,protein_coding -50712,RUNDC1,ENSG00000198863,protein_coding -16155,CCDC152,ENSG00000198865,protein_coding -31587,STKLD1,ENSG00000198870,protein_coding -37220,GRK5,ENSG00000198873,protein_coding -22812,TYW1,ENSG00000198874,protein_coding -29937,DCAF12,ENSG00000198876,protein_coding -35295,SFMBT2,ENSG00000198879,protein_coding -25499,ASB12,ENSG00000198881,protein_coding -26833,PNMA5,ENSG00000198883,protein_coding -7195,ITPRIPL1,ENSG00000198885,protein_coding -60548,MT-ND4,ENSG00000198886,protein_coding -30445,SMC5,ENSG00000198887,protein_coding -60526,MT-ND1,ENSG00000198888,protein_coding -26393,DCAF12L1,ENSG00000198889,protein_coding -2495,PRMT6,ENSG00000198890,protein_coding -4382,SHISA4,ENSG00000198892,protein_coding -43362,CIPC,ENSG00000198894,protein_coding -23659,CAPZA2,ENSG00000198898,protein_coding -60542,MT-ATP6,ENSG00000198899,protein_coding -54160,TOP1,ENSG00000198900,protein_coding -46195,PRC1,ENSG00000198901,protein_coding -26016,BHLHB9,ENSG00000198908,protein_coding -51401,MAP3K3,ENSG00000198909,protein_coding -26873,L1CAM,ENSG00000198910,protein_coding -59398,SREBF2,ENSG00000198911,protein_coding -208,C1orf174,ENSG00000198912,protein_coding -7368,POU3F3,ENSG00000198914,protein_coding -35861,RASGEF1A,ENSG00000198915,protein_coding -31452,SPOUT1,ENSG00000198917,protein_coding -26285,RPL39,ENSG00000198918,protein_coding -11236,DZIP3,ENSG00000198919,protein_coding -49256,KIAA0753,ENSG00000198920,protein_coding -37134,DCLRE1A,ENSG00000198924,protein_coding -9193,ATG9A,ENSG00000198925,protein_coding -3719,NOS1AP,ENSG00000198929,protein_coding -26821,CSAG1,ENSG00000198930,protein_coding -48927,APRT,ENSG00000198931,protein_coding -26010,GPRASP1,ENSG00000198932,protein_coding -50930,TBKBP1,ENSG00000198933,protein_coding -25749,MAGEE1,ENSG00000198934,protein_coding -19608,CCDC167,ENSG00000198937,protein_coding -60543,MT-CO3,ENSG00000198938,protein_coding -18374,ZFP2,ENSG00000198939,protein_coding -17398,SOWAHA,ENSG00000198944,protein_coding -20849,L3MBTL3,ENSG00000198945,protein_coding -24937,DMD,ENSG00000198947,protein_coding -15215,MFAP3L,ENSG00000198948,protein_coding -59409,NAGA,ENSG00000198951,protein_coding -3465,SMG5,ENSG00000198952,protein_coding -36240,KIF1BP,ENSG00000198954,protein_coding -54105,TGM2,ENSG00000198959,protein_coding -25973,ARMCX6,ENSG00000198960,protein_coding -17075,PJA2,ENSG00000198961,protein_coding -30488,RORB,ENSG00000198963,protein_coding -36052,SGMS1,ENSG00000198964,protein_coding -3550,OR10R2,ENSG00000198965,protein_coding -3558,OR10Z1,ENSG00000198967,protein_coding -57303,ZNF525,ENSG00000203326,protein_coding -43972,INF2,ENSG00000203485,protein_coding -29372,IQANK1,ENSG00000203499,protein_coding -42484,AL139353.1,ENSG00000203546,protein_coding -58471,GP1BB,ENSG00000203618,protein_coding -5443,OR2T5,ENSG00000203661,protein_coding -5422,OR2L2,ENSG00000203663,protein_coding -5324,EFCAB2,ENSG00000203666,protein_coding -5318,COX20,ENSG00000203667,protein_coding -5259,CHML,ENSG00000203668,protein_coding -4910,STUM,ENSG00000203685,protein_coding -21466,TCP10,ENSG00000203690,protein_coding -4837,CAPN8,ENSG00000203697,protein_coding -4690,TATDN3,ENSG00000203705,protein_coding -4573,CR1,ENSG00000203710,protein_coding -21178,RAET1G,ENSG00000203722,protein_coding -4307,C1orf53,ENSG00000203724,protein_coding -21127,SAMD5,ENSG00000203727,protein_coding -4115,TEDDM1,ENSG00000203730,protein_coding -21053,GJE1,ENSG00000203733,protein_coding -21019,ECT2L,ENSG00000203734,protein_coding -3979,GPR52,ENSG00000203737,protein_coding -3886,METTL11B,ENSG00000203740,protein_coding -3700,FCGR3A,ENSG00000203747,protein_coding -20848,TMEM244,ENSG00000203756,protein_coding -3562,OR6K3,ENSG00000203757,protein_coding -20804,CENPW,ENSG00000203760,protein_coding -37451,SPRN,ENSG00000203772,protein_coding -20637,FAM229B,ENSG00000203778,protein_coding -37346,FANK1,ENSG00000203780,protein_coding -3281,LOR,ENSG00000203782,protein_coding -3279,PRR9,ENSG00000203783,protein_coding -3278,LELP1,ENSG00000203784,protein_coding -3273,SPRR2E,ENSG00000203785,protein_coding -3251,KPRP,ENSG00000203786,protein_coding -37316,EEF1AKMT2,ENSG00000203791,protein_coding -37285,FAM24A,ENSG00000203795,protein_coding -20596,DDO,ENSG00000203797,protein_coding -37243,PLPP4,ENSG00000203805,protein_coding -3090,HIST2H3C,ENSG00000203811,protein_coding -3091,HIST2H2AA3,ENSG00000203812,protein_coding -3086,HIST2H2BF,ENSG00000203814,protein_coding -37168,PNLIPRP3,ENSG00000203837,protein_coding -3095,HIST2H3A,ENSG00000203852,protein_coding -2810,HSD3B1,ENSG00000203857,protein_coding -2804,HSD3B2,ENSG00000203859,protein_coding -37086,RBM20,ENSG00000203867,protein_coding -26939,SMIM9,ENSG00000203870,protein_coding -20320,C6orf163,ENSG00000203872,protein_coding -20263,RIPPLY2,ENSG00000203877,protein_coding -26908,GDI1,ENSG00000203879,protein_coding -54746,PCMTD2,ENSG00000203880,protein_coding -54736,SOX18,ENSG00000203883,protein_coding -54711,LIME1,ENSG00000203896,protein_coding -20132,OOEP,ENSG00000203907,protein_coding -20131,KHDC3L,ENSG00000203908,protein_coding -20130,DPPA5,ENSG00000203909,protein_coding -2284,C1orf146,ENSG00000203910,protein_coding -26685,SPANXN1,ENSG00000203923,protein_coding -26653,SPANXA2,ENSG00000203926,protein_coding -26629,CXorf66,ENSG00000203933,protein_coding -36850,C10orf62,ENSG00000203942,protein_coding -2141,SAMD13,ENSG00000203943,protein_coding -26529,RTL8A,ENSG00000203950,protein_coding -26500,CCDC160,ENSG00000203952,protein_coding -1926,C1orf141,ENSG00000203963,protein_coding -1866,EFCAB7,ENSG00000203965,protein_coding -19878,DEFB110,ENSG00000203970,protein_coding -19864,GLYATL3,ENSG00000203972,protein_coding -1711,LDLRAD1,ENSG00000203985,protein_coding -26297,RHOXF2B,ENSG00000203989,protein_coding -1667,ZYG11A,ENSG00000203995,protein_coding -31694,LCN8,ENSG00000204001,protein_coding -31690,AL355987.1,ENSG00000204003,protein_coding -1606,C1orf185,ENSG00000204006,protein_coding -31647,GLT6D1,ENSG00000204007,protein_coding -36674,IFIT1B,ENSG00000204010,protein_coding -26233,CT83,ENSG00000204019,protein_coding -36653,LIPN,ENSG00000204020,protein_coding -36651,LIPK,ENSG00000204021,protein_coding -36647,LIPJ,ENSG00000204022,protein_coding -26170,TRPC5OS,ENSG00000204025,protein_coding -36561,LRIT2,ENSG00000204033,protein_coding -19762,LRRC73,ENSG00000204052,protein_coding -1340,FOXO6,ENSG00000204060,protein_coding -26040,TCEAL5,ENSG00000204065,protein_coding -54259,SYS1,ENSG00000204070,protein_coding -25992,TCEAL6,ENSG00000204071,protein_coding -1223,INPP5B,ENSG00000204084,protein_coding -25916,RPA4,ENSG00000204086,protein_coding -9651,NEU4,ENSG00000204099,protein_coding -54155,MAFB,ENSG00000204103,protein_coding -9558,TRAF3IP1,ENSG00000204104,protein_coding -25679,CHIC1,ENSG00000204116,protein_coding -9441,GIGYF2,ENSG00000204120,protein_coding -9378,C2orf72,ENSG00000204128,protein_coding -36217,RUFY2,ENSG00000204130,protein_coding -25638,NHSL2,ENSG00000204131,protein_coding -958,PHACTR4,ENSG00000204138,protein_coding -19558,CLPSL1,ENSG00000204140,protein_coding -36065,ASAH2B,ENSG00000204147,protein_coding -36044,AGAP6,ENSG00000204149,protein_coding -36040,TIMM23B,ENSG00000204152,protein_coding -885,ZDHHC18,ENSG00000204160,protein_coding -36025,TMEM273,ENSG00000204161,protein_coding -25603,CXorf65,ENSG00000204165,protein_coding -35983,AGAP9,ENSG00000204172,protein_coding -35957,NPY4R,ENSG00000204174,protein_coding -35958,GPRIN2,ENSG00000204175,protein_coding -35959,SYT15,ENSG00000204176,protein_coding -823,MACO1,ENSG00000204178,protein_coding -35974,PTPN20,ENSG00000204179,protein_coding -54023,GDF5OS,ENSG00000204183,protein_coding -8946,ZDBF2,ENSG00000204186,protein_coding -31089,TXNDC8,ENSG00000204193,protein_coding -25577,AWAT1,ENSG00000204195,protein_coding -19473,DAXX,ENSG00000204209,protein_coding -8873,BMPR2,ENSG00000204217,protein_coding -757,TCEA3,ENSG00000204219,protein_coding -19468,PFDN6,ENSG00000204220,protein_coding -19458,RING1,ENSG00000204227,protein_coding -19456,HSD17B8,ENSG00000204228,protein_coding -19453,RXRB,ENSG00000204231,protein_coding -51950,OXLD1,ENSG00000204237,protein_coding -31000,OR13C3,ENSG00000204246,protein_coding -19452,COL11A2,ENSG00000204248,protein_coding -19443,HLA-DOA,ENSG00000204252,protein_coding -19440,BRD2,ENSG00000204256,protein_coding -19439,HLA-DMA,ENSG00000204257,protein_coding -8644,COL5A2,ENSG00000204262,protein_coding -19431,PSMB8,ENSG00000204264,protein_coding -19430,TAP2,ENSG00000204267,protein_coding -25465,SPIN3,ENSG00000204271,protein_coding -25463,NBDY,ENSG00000204272,protein_coding -51826,TMEM235,ENSG00000204278,protein_coding -25444,PAGE3,ENSG00000204279,protein_coding -19413,HLA-DRA,ENSG00000204287,protein_coding -19412,BTNL2,ENSG00000204290,protein_coding -30925,COL15A1,ENSG00000204291,protein_coding -19409,TSBP1,ENSG00000204296,protein_coding -34910,TMEM225,ENSG00000204300,protein_coding -19407,NOTCH4,ENSG00000204301,protein_coding -19404,PBX2,ENSG00000204304,protein_coding -19403,AGER,ENSG00000204305,protein_coding -19401,RNF5,ENSG00000204308,protein_coding -19399,AGPAT1,ENSG00000204310,protein_coding -8527,PJVK,ENSG00000204311,protein_coding -19393,PRRT1,ENSG00000204314,protein_coding -19392,FKBPL,ENSG00000204315,protein_coding -51720,MRPL38,ENSG00000204316,protein_coding -51703,SMIM5,ENSG00000204323,protein_coding -8364,ERICH2,ENSG00000204334,protein_coding -8360,SP5,ENSG00000204335,protein_coding -19378,STK19,ENSG00000204344,protein_coding -51644,CD300LD,ENSG00000204345,protein_coding -51635,BTBD17,ENSG00000204347,protein_coding -19377,DXO,ENSG00000204348,protein_coding -19376,SKIV2L,ENSG00000204351,protein_coding -30779,C9orf129,ENSG00000204352,protein_coding -19374,NELFE,ENSG00000204356,protein_coding -34668,NXPE2,ENSG00000204361,protein_coding -25371,SPANXN5,ENSG00000204363,protein_coding -19370,ZBTB12,ENSG00000204366,protein_coding -34601,SDHD,ENSG00000204370,protein_coding -19368,EHMT2,ENSG00000204371,protein_coding -25348,XAGE1A,ENSG00000204379,protein_coding -34573,LAYN,ENSG00000204381,protein_coding -25350,XAGE1B,ENSG00000204382,protein_coding -19366,SLC44A4,ENSG00000204385,protein_coding -19365,NEU1,ENSG00000204386,protein_coding -19362,SNHG32,ENSG00000204387,protein_coding -19360,HSPA1B,ENSG00000204388,protein_coding -19359,HSPA1A,ENSG00000204389,protein_coding -19358,HSPA1L,ENSG00000204390,protein_coding -19357,LSM2,ENSG00000204392,protein_coding -19355,VARS,ENSG00000204394,protein_coding -19354,VWA7,ENSG00000204396,protein_coding -34473,CARD16,ENSG00000204397,protein_coding -8085,MBD5,ENSG00000204406,protein_coding -19349,MSH5,ENSG00000204410,protein_coding -51418,CSHL1,ENSG00000204414,protein_coding -19345,MPIG6B,ENSG00000204420,protein_coding -19346,LY6G6C,ENSG00000204421,protein_coding -19339,AL662899.1,ENSG00000204422,protein_coding -19341,LY6G6F,ENSG00000204424,protein_coding -19338,ABHD16A,ENSG00000204427,protein_coding -19337,LY6G5C,ENSG00000204428,protein_coding -19334,CSNK2B,ENSG00000204435,protein_coding -19332,GPANK1,ENSG00000204438,protein_coding -19330,C6orf47,ENSG00000204439,protein_coding -41797,FAM155A,ENSG00000204442,protein_coding -19329,APOM,ENSG00000204444,protein_coding -34246,TRIM49C,ENSG00000204449,protein_coding -34241,TRIM64,ENSG00000204450,protein_coding -19328,BAG6,ENSG00000204463,protein_coding -19325,PRRC2A,ENSG00000204469,protein_coding -19324,AIF1,ENSG00000204472,protein_coding -19322,NCR3,ENSG00000204475,protein_coding -480,PRAMEF20,ENSG00000204478,protein_coding -479,PRAMEF17,ENSG00000204479,protein_coding -478,PRAMEF19,ENSG00000204480,protein_coding -477,PRAMEF14,ENSG00000204481,protein_coding -19321,LST1,ENSG00000204482,protein_coding -19317,NFKBIL1,ENSG00000204498,protein_coding -475,PRAMEF15,ENSG00000204501,protein_coding -466,PRAMEF9,ENSG00000204505,protein_coding -451,PRAMEF7,ENSG00000204510,protein_coding -19310,MCCD1,ENSG00000204511,protein_coding -57666,ZNF814,ENSG00000204514,protein_coding -19305,MICB,ENSG00000204516,protein_coding -438,AADACL4,ENSG00000204518,protein_coding -57656,ZNF551,ENSG00000204519,protein_coding -19299,MICA,ENSG00000204520,protein_coding -57619,ZNF805,ENSG00000204524,protein_coding -19283,HLA-C,ENSG00000204525,protein_coding -19276,POU5F1,ENSG00000204531,protein_coding -57563,ZSCAN5C,ENSG00000204532,protein_coding -19274,CCHCR1,ENSG00000204536,protein_coding -19272,PSORS1C2,ENSG00000204538,protein_coding -19271,CDSN,ENSG00000204539,protein_coding -19270,PSORS1C1,ENSG00000204540,protein_coding -19269,C6orf15,ENSG00000204542,protein_coding -19265,MUC21,ENSG00000204544,protein_coding -53876,DEFB121,ENSG00000204548,protein_coding -19238,DHX16,ENSG00000204560,protein_coding -19237,C6orf136,ENSG00000204564,protein_coding -19234,MRPS18B,ENSG00000204568,protein_coding -19233,PPP1R10,ENSG00000204569,protein_coding -33812,KRTAP5-11,ENSG00000204571,protein_coding -33810,KRTAP5-10,ENSG00000204572,protein_coding -19231,ABCF1,ENSG00000204574,protein_coding -19230,PRR3,ENSG00000204576,protein_coding -57411,LILRB3,ENSG00000204577,protein_coding -19256,DDR1,ENSG00000204580,protein_coding -40498,LRCOL1,ENSG00000204583,protein_coding -19229,GNL1,ENSG00000204590,protein_coding -19227,HLA-E,ENSG00000204592,protein_coding -57321,DPRX,ENSG00000204595,protein_coding -19216,TRIM39,ENSG00000204599,protein_coding -57271,ZNF468,ENSG00000204604,protein_coding -19208,TRIM15,ENSG00000204610,protein_coding -57233,ZNF616,ENSG00000204611,protein_coding -30522,FOXB2,ENSG00000204612,protein_coding -19207,TRIM10,ENSG00000204613,protein_coding -19206,TRIM40,ENSG00000204614,protein_coding -19203,TRIM31,ENSG00000204616,protein_coding -19202,RNF39,ENSG00000204618,protein_coding -19201,PPP1R11,ENSG00000204619,protein_coding -390,DISP3,ENSG00000204624,protein_coding -18470,RACK1,ENSG00000204628,protein_coding -19174,HLA-G,ENSG00000204632,protein_coding -7303,TBC1D8,ENSG00000204634,protein_coding -7288,NMS,ENSG00000204640,protein_coding -19158,HLA-F,ENSG00000204642,protein_coding -19156,ZFP57,ENSG00000204644,protein_coding -57107,ASPDH,ENSG00000204653,protein_coding -19155,MOG,ENSG00000204655,protein_coding -19153,OR2H2,ENSG00000204657,protein_coding -18402,CBY3,ENSG00000204659,protein_coding -30462,C9orf57,ENSG00000204669,protein_coding -40196,IL31,ENSG00000204671,protein_coding -57070,AKT1S1,ENSG00000204673,protein_coding -18348,FAM153CP,ENSG00000204677,protein_coding -19148,GABBR1,ENSG00000204681,protein_coding -35507,MIR1915HG,ENSG00000204682,protein_coding -35497,C10orf113,ENSG00000204683,protein_coding -19141,MAS1L,ENSG00000204687,protein_coding -19138,OR2H1,ENSG00000204688,protein_coding -19135,OR11A1,ENSG00000204694,protein_coding -19129,OR14J1,ENSG00000204695,protein_coding -19122,OR2J2,ENSG00000204700,protein_coding -19118,OR2J3,ENSG00000204701,protein_coding -19117,OR2J1,ENSG00000204702,protein_coding -19116,OR2B3,ENSG00000204703,protein_coding -19113,OR2W1,ENSG00000204704,protein_coding -33526,SPDYC,ENSG00000204710,protein_coding -30438,C9orf135,ENSG00000204711,protein_coding -19103,TRIM27,ENSG00000204713,protein_coding -35474,MALRD1,ENSG00000204740,protein_coding -18163,RANBP17,ENSG00000204764,protein_coding -18147,INSYN2B,ENSG00000204767,protein_coding -30350,FOXD4L5,ENSG00000204779,protein_coding -30263,AL391987.2,ENSG00000204805,protein_coding -50638,TTC25,ENSG00000204815,protein_coding -6747,MRPL53,ENSG00000204822,protein_coding -29353,MROH6,ENSG00000204839,protein_coding -39925,ATXN2,ENSG00000204842,protein_coding -6733,DCTN1,ENSG00000204843,protein_coding -30128,SPATA31A1,ENSG00000204849,protein_coding -56856,PNMA8B,ENSG00000204851,protein_coding -39907,TCTN1,ENSG00000204852,protein_coding -39901,FAM216A,ENSG00000204856,protein_coding -255,ZBTB48,ENSG00000204859,protein_coding -56836,IGFL2,ENSG00000204866,protein_coding -56831,IGFL4,ENSG00000204869,protein_coding -50586,KRTAP9-3,ENSG00000204873,protein_coding -50571,KRTAP4-8,ENSG00000204880,protein_coding -29282,GPR20,ENSG00000204882,protein_coding -50561,KRTAP1-4,ENSG00000204887,protein_coding -50555,KRT40,ENSG00000204889,protein_coding -50542,KRT25,ENSG00000204897,protein_coding -41399,MZT1,ENSG00000204899,protein_coding -17839,SPINK9,ENSG00000204909,protein_coding -50499,LRRC3C,ENSG00000204913,protein_coding -41278,PRR20B,ENSG00000204918,protein_coding -41276,PRR20A,ENSG00000204919,protein_coding -56712,ZNF155,ENSG00000204920,protein_coding -33367,UQCC3,ENSG00000204922,protein_coding -6590,FBXO48,ENSG00000204923,protein_coding -17791,GRXCR2,ENSG00000204928,protein_coding -30033,FAM221B,ENSG00000204930,protein_coding -56678,CD177,ENSG00000204936,protein_coding -56671,PSG5,ENSG00000204941,protein_coding -24348,ZNF783,ENSG00000204946,protein_coding -24342,ZNF425,ENSG00000204947,protein_coding -33306,LRRC10B,ENSG00000204950,protein_coding -50458,FBXO47,ENSG00000204952,protein_coding -39762,C12orf73,ENSG00000204954,protein_coding -17701,PCDHGA1,ENSG00000204956,protein_coding -17653,PCDHA9,ENSG00000204961,protein_coding -17652,PCDHA8,ENSG00000204962,protein_coding -17651,PCDHA7,ENSG00000204963,protein_coding -17649,PCDHA5,ENSG00000204965,protein_coding -17648,PCDHA4,ENSG00000204967,protein_coding -17646,PCDHA2,ENSG00000204969,protein_coding -17645,PCDHA1,ENSG00000204970,protein_coding -41171,TRIM13,ENSG00000204977,protein_coding -56592,ERICH4,ENSG00000204978,protein_coding -33261,MS4A13,ENSG00000204979,protein_coding -24188,PRSS1,ENSG00000204983,protein_coding -48970,SPIRE2,ENSG00000204991,protein_coding -28967,AARD,ENSG00000205002,protein_coding -48930,PABPN1L,ENSG00000205022,protein_coding -33017,OR5D16,ENSG00000205029,protein_coding -33016,OR5L2,ENSG00000205030,protein_coding -28918,PKHD1L1,ENSG00000205038,protein_coding -50304,SLFN12L,ENSG00000205045,protein_coding -39528,CLLU1OS,ENSG00000205057,protein_coding -23959,SLC35B4,ENSG00000205060,protein_coding -56431,LGALS7,ENSG00000205076,protein_coding -48639,SYCE1L,ENSG00000205078,protein_coding -48608,TMEM231,ENSG00000205084,protein_coding -23835,FAM71F2,ENSG00000205085,protein_coding -6156,C2orf91,ENSG00000205086,protein_coding -17396,CCNI2,ENSG00000205089,protein_coding -120,TMEM240,ENSG00000205090,protein_coding -15486,FRG2,ENSG00000205097,protein_coding -29973,FAM205A,ENSG00000205108,protein_coding -6134,CDKL4,ENSG00000205111,protein_coding -113,TMEM88B,ENSG00000205116,protein_coding -32773,ACCSL,ENSG00000205126,protein_coding -15407,C4orf47,ENSG00000205129,protein_coding -28614,TRIQK,ENSG00000205133,protein_coding -56311,SDHAF1,ENSG00000205138,protein_coding -29962,ARID3C,ENSG00000205143,protein_coding -56288,PSENEN,ENSG00000205155,protein_coding -32651,C11orf91,ENSG00000205177,protein_coding -28465,FABP9,ENSG00000205186,protein_coding -28439,ZBTB10,ENSG00000205189,protein_coding -15091,C4orf46,ENSG00000205208,protein_coding -56204,SCGB2B2,ENSG00000205209,protein_coding -49855,CCDC144NL,ENSG00000205212,protein_coding -32554,LGR4,ENSG00000205213,protein_coding -48347,PSMB10,ENSG00000205220,protein_coding -6070,VIT,ENSG00000205221,protein_coding -23476,AC105052.1,ENSG00000205236,protein_coding -23472,SPDYE2,ENSG00000205238,protein_coding -48297,E2F4,ENSG00000205250,protein_coding -28220,PDE7A,ENSG00000205268,protein_coding -18714,TMEM170B,ENSG00000205269,protein_coding -23415,MUC12,ENSG00000205277,protein_coding -17321,CTXN3,ENSG00000205279,protein_coding -14799,MGAT4D,ENSG00000205301,protein_coding -17252,SNX2,ENSG00000205302,protein_coding -23388,SAP25,ENSG00000205307,protein_coding -49646,NT5M,ENSG00000205309,protein_coding -38899,SARNP,ENSG00000205323,protein_coding -38879,OR6C68,ENSG00000205327,protein_coding -38873,OR6C65,ENSG00000205328,protein_coding -38867,OR6C3,ENSG00000205329,protein_coding -38866,OR6C1,ENSG00000205330,protein_coding -48145,ADGRG1,ENSG00000205336,protein_coding -32267,IPO7,ENSG00000205339,protein_coding -38770,PRR13,ENSG00000205352,protein_coding -23283,TECPR1,ENSG00000205356,protein_coding -48103,MT1H,ENSG00000205358,protein_coding -17025,SLCO6A1,ENSG00000205359,protein_coding -48096,MT1A,ENSG00000205362,protein_coding -45668,INSYN1,ENSG00000205363,protein_coding -48094,MT1M,ENSG00000205364,protein_coding -14426,CFI,ENSG00000205403,protein_coding -32134,OR52E6,ENSG00000205409,protein_coding -23190,SAMD9,ENSG00000205413,protein_coding -38714,KRT6A,ENSG00000205420,protein_coding -47937,CNEP1R1,ENSG00000205423,protein_coding -38700,KRT81,ENSG00000205426,protein_coding -43909,EXOC3L4,ENSG00000205436,protein_coding -60443,KRTAP12-3,ENSG00000205439,protein_coding -29804,IZUMO3,ENSG00000205442,protein_coding -60432,KRTAP10-2,ENSG00000205445,protein_coding -47667,TP53TG3D,ENSG00000205456,protein_coding -47719,TP53TG3C,ENSG00000205457,protein_coding -16763,ATP6AP1L,ENSG00000205464,protein_coding -43706,CCDC85C,ENSG00000205476,protein_coding -32068,OR52J3,ENSG00000205495,protein_coding -32063,OR51A2,ENSG00000205496,protein_coding -32062,OR51A4,ENSG00000205497,protein_coding -45387,C2CD4B,ENSG00000205502,protein_coding -55367,RGL3,ENSG00000205517,protein_coding -31955,NAP1L4,ENSG00000205531,protein_coding -24702,TMSB4X,ENSG00000205542,protein_coding -49323,TMEM256,ENSG00000205544,protein_coding -29678,C9orf92,ENSG00000205549,protein_coding -59632,CPT1B,ENSG00000205560,protein_coding -16515,SMN2,ENSG00000205571,protein_coding -16514,SERF1B,ENSG00000205572,protein_coding -60285,HMGN1,ENSG00000205581,protein_coding -38371,MUC19,ENSG00000205592,protein_coding -59615,DENND6B,ENSG00000205593,protein_coding -47370,EIF3CL,ENSG00000205609,protein_coding -47317,LCMT1,ENSG00000205629,protein_coding -5823,MFSD2B,ENSG00000205639,protein_coding -24639,VCX3B,ENSG00000205642,protein_coding -59533,CDPF1,ENSG00000205643,protein_coding -13877,HTN3,ENSG00000205649,protein_coding -43267,LIN52,ENSG00000205659,protein_coding -24599,ARSH,ENSG00000205667,protein_coding -43242,ACOT6,ENSG00000205669,protein_coding -60175,SMIM11A,ENSG00000205670,protein_coding -13756,TECRL,ENSG00000205678,protein_coding -43209,DPF3,ENSG00000205683,protein_coding -38199,MANSC4,ENSG00000205693,protein_coding -59404,LINC00634,ENSG00000205704,protein_coding -38139,ETFRF1,ENSG00000205707,protein_coding -49200,C17orf107,ENSG00000205710,protein_coding -55137,MBD3L4,ENSG00000205718,protein_coding -60162,ITSN1,ENSG00000205726,protein_coding -47147,ITPRIPL2,ENSG00000205730,protein_coding -55107,DENND1C,ENSG00000205744,protein_coding -38079,SLCO1B7,ENSG00000205754,protein_coding -24572,CRLF2,ENSG00000205755,protein_coding -60161,CRYZL1,ENSG00000205758,protein_coding -16145,C5orf51,ENSG00000205765,protein_coding -25279,GAGE1,ENSG00000205777,protein_coding -55047,ARRDC5,ENSG00000205784,protein_coding -5624,CYS1,ENSG00000205795,protein_coding -29527,PLPP6,ENSG00000205808,protein_coding -37873,KLRC2,ENSG00000205809,protein_coding -37872,KLRC3,ENSG00000205810,protein_coding -46800,C16orf96,ENSG00000205832,protein_coding -12654,GMNC,ENSG00000205835,protein_coding -16031,TTC23L,ENSG00000205838,protein_coding -37777,CLEC6A,ENSG00000205846,protein_coding -59083,C22orf42,ENSG00000205856,protein_coding -37726,NANOGNB,ENSG00000205857,protein_coding -21817,LRRC72,ENSG00000205858,protein_coding -40691,C1QTNF9B,ENSG00000205863,protein_coding -31895,KRTAP5-6,ENSG00000205864,protein_coding -31887,KRTAP5-2,ENSG00000205867,protein_coding -31886,KRTAP5-1,ENSG00000205869,protein_coding -27288,DEFB134,ENSG00000205882,protein_coding -27287,DEFB135,ENSG00000205883,protein_coding -27286,DEFB136,ENSG00000205884,protein_coding -49031,BHLHA9,ENSG00000205899,protein_coding -21704,ZNF316,ENSG00000205903,protein_coding -58195,DAZ4,ENSG00000205916,protein_coding -54870,ONECUT3,ENSG00000205922,protein_coding -46671,CEMP1,ENSG00000205923,protein_coding -60138,OLIG2,ENSG00000205927,protein_coding -60132,C21orf62,ENSG00000205929,protein_coding -46647,RNPS1,ENSG00000205937,protein_coding -58132,DAZ2,ENSG00000205944,protein_coding -42396,NYNRIN,ENSG00000205978,protein_coding -12446,DNAJC19,ENSG00000205981,protein_coding -31809,IFITM5,ENSG00000206013,protein_coding -53190,SMIM21,ENSG00000206026,protein_coding -53170,C18orf63,ENSG00000206043,protein_coding -27084,DEFA1,ENSG00000206047,protein_coding -53120,DOK6,ENSG00000206052,protein_coding -46579,JPT2,ENSG00000206053,protein_coding -58835,TMEM211,ENSG00000206069,protein_coding -53063,SERPINB11,ENSG00000206072,protein_coding -53062,SERPINB4,ENSG00000206073,protein_coding -53057,SERPINB5,ENSG00000206075,protein_coding -15530,ZDHHC11B,ENSG00000206077,protein_coding -60091,KRTAP19-8,ENSG00000206102,protein_coding -60080,KRTAP20-3,ENSG00000206104,protein_coding -60078,KRTAP20-4,ENSG00000206105,protein_coding -60072,KRTAP22-2,ENSG00000206106,protein_coding -60051,KRTAP27-1,ENSG00000206107,protein_coding -12940,CFAP99,ENSG00000206113,protein_coding -44671,GOLGA8O,ENSG00000206127,protein_coding -58576,TMEM191C,ENSG00000206140,protein_coding -42096,RNASE13,ENSG00000206150,protein_coding -46455,HBA1,ENSG00000206172,protein_coding -46451,HBM,ENSG00000206177,protein_coding -52781,ELOA2,ENSG00000206181,protein_coding -44502,ATP10A,ENSG00000206190,protein_coding -11986,ANKUB1,ENSG00000206199,protein_coding -58445,TSSK2,ENSG00000206203,protein_coding -11815,PRR23A,ENSG00000206260,protein_coding -11814,FOXL2NB,ENSG00000206262,protein_coding -11681,COL6A6,ENSG00000206384,protein_coding -52218,RAB12,ENSG00000206418,protein_coding -52201,LRRC30,ENSG00000206422,protein_coding -52173,TMEM200C,ENSG00000206432,protein_coding -19136,OR10C1,ENSG00000206474,protein_coding -19186,HLA-A,ENSG00000206503,protein_coding -11484,HACD2,ENSG00000206527,protein_coding -11313,CFAP44,ENSG00000206530,protein_coding -11302,CD200R1L,ENSG00000206531,protein_coding -11146,LNP1,ENSG00000206535,protein_coding -11107,OR5K3,ENSG00000206536,protein_coding -11019,VGLL3,ENSG00000206538,protein_coding -10389,PRSS50,ENSG00000206549,protein_coding -10103,TRIM71,ENSG00000206557,protein_coding -10050,ZCWPW2,ENSG00000206559,protein_coding -9915,ANKRD28,ENSG00000206560,protein_coding -9909,COLQ,ENSG00000206561,protein_coding -9905,METTL6,ENSG00000206562,protein_coding -28065,XKR4,ENSG00000206579,protein_coding -17912,GPX3,ENSG00000211445,protein_coding -43407,DIO2,ENSG00000211448,protein_coding -33138,SELENOH,ENSG00000211450,protein_coding -1707,DIO1,ENSG00000211452,protein_coding -38183,STK38L,ENSG00000211455,protein_coding -10356,SACM1L,ENSG00000211456,protein_coding -7690,TSN,ENSG00000211460,protein_coding -38485,SLC48A1,ENSG00000211584,protein_coding -17133,TSSK1B,ENSG00000212122,protein_coding -55065,PRR22,ENSG00000212123,protein_coding -37900,TAS2R19,ENSG00000212124,protein_coding -37898,TAS2R50,ENSG00000212126,protein_coding -37896,TAS2R14,ENSG00000212127,protein_coding -37894,TAS2R13,ENSG00000212128,protein_coding -50595,KRTAP16-1,ENSG00000212657,protein_coding -50594,KRTAP29-1,ENSG00000212658,protein_coding -50590,KRTAP9-6,ENSG00000212659,protein_coding -52470,CTAGE1,ENSG00000212710,protein_coding -49876,C17orf51,ENSG00000212719,protein_coding -50574,KRTAP4-11,ENSG00000212721,protein_coding -50573,KRTAP4-9,ENSG00000212722,protein_coding -50566,KRTAP2-3,ENSG00000212724,protein_coding -50564,KRTAP2-1,ENSG00000212725,protein_coding -26526,RTL8B,ENSG00000212747,protein_coding -24281,OR2A42,ENSG00000212807,protein_coding -31749,RNF208,ENSG00000212864,protein_coding -50556,KRTAP3-3,ENSG00000212899,protein_coding -50557,KRTAP3-2,ENSG00000212900,protein_coding -50559,KRTAP3-1,ENSG00000212901,protein_coding -60547,MT-ND4L,ENSG00000212907,protein_coding -5090,MAP10,ENSG00000212916,protein_coding -60442,KRTAP12-4,ENSG00000212933,protein_coding -60433,KRTAP10-3,ENSG00000212935,protein_coding -60073,KRTAP6-3,ENSG00000212938,protein_coding -29127,POU5F1B,ENSG00000212993,protein_coding -57535,ZNF580,ENSG00000213015,protein_coding -57267,ZNF611,ENSG00000213020,protein_coding -57141,KLK9,ENSG00000213022,protein_coding -57110,SYT3,ENSG00000213023,protein_coding -57074,NUP62,ENSG00000213024,protein_coding -5017,SPHAR,ENSG00000213029,protein_coding -56995,CGB8,ENSG00000213030,protein_coding -4301,DENND1B,ENSG00000213047,protein_coding -3840,SFT2D2,ENSG00000213064,protein_coding -21451,FGFR1OP,ENSG00000213066,protein_coding -21266,SCAF8,ENSG00000213079,protein_coding -3608,CFAP45,ENSG00000213085,protein_coding -3579,ACKR1,ENSG00000213088,protein_coding -56046,ZNF254,ENSG00000213096,protein_coding -12774,TCTEX1D2,ENSG00000213123,protein_coding -12574,CRYGS,ENSG00000213139,protein_coding -44011,CRIP1,ENSG00000213145,protein_coding -8344,KLHL23,ENSG00000213160,protein_coding -3206,LINGO4,ENSG00000213171,protein_coding -37283,FAM24B,ENSG00000213185,protein_coding -12181,TRIM59,ENSG00000213186,protein_coding -3159,MLLT11,ENSG00000213190,protein_coding -24410,ASIC3,ENSG00000213199,protein_coding -24397,GIMAP1,ENSG00000213203,protein_coding -20323,AL049697.1,ENSG00000213204,protein_coding -31699,CCDC183,ENSG00000213213,protein_coding -24279,ARHGEF35,ENSG00000213214,protein_coding -24263,OR2F1,ENSG00000213215,protein_coding -51413,CSH2,ENSG00000213218,protein_coding -31665,DNLZ,ENSG00000213221,protein_coding -43646,TCL1B,ENSG00000213231,protein_coding -51227,SUPT4H1,ENSG00000213246,protein_coding -23910,TSGA13,ENSG00000213265,protein_coding -2711,NRAS,ENSG00000213281,protein_coding -55418,ZNF625-ZNF20,ENSG00000213297,protein_coding -18406,LTC4S,ENSG00000213316,protein_coding -7208,ANKRD39,ENSG00000213337,protein_coding -55332,QTRT1,ENSG00000213339,protein_coding -36896,CHUK,ENSG00000213341,protein_coding -18313,MXD3,ENSG00000213347,protein_coding -2561,GSTM2,ENSG00000213366,protein_coding -48417,COG8,ENSG00000213380,protein_coding -36831,ARHGAP19,ENSG00000213390,protein_coding -26852,HAUS7,ENSG00000213397,protein_coding -48348,LCAT,ENSG00000213398,protein_coding -26822,MAGEA12,ENSG00000213401,protein_coding -33680,PTPRCAP,ENSG00000213402,protein_coding -23364,PVRIG,ENSG00000213413,protein_coding -50575,KRTAP4-12,ENSG00000213416,protein_coding -50567,KRTAP2-4,ENSG00000213417,protein_coding -23361,GPC2,ENSG00000213420,protein_coding -50538,KRT222,ENSG00000213424,protein_coding -33566,SIPA1,ENSG00000213445,protein_coding -22739,ERV3-1,ENSG00000213462,protein_coding -43167,SYNJ2BP,ENSG00000213463,protein_coding -33503,ARL2,ENSG00000213465,protein_coding -46158,TTLL13P,ENSG00000213471,protein_coding -2226,GBP7,ENSG00000213512,protein_coding -2219,RBMXL1,ENSG00000213516,protein_coding -17619,SRA1,ENSG00000213523,protein_coding -10660,STIMATE,ENSG00000213533,protein_coding -36346,DNAJC9,ENSG00000213551,protein_coding -29438,C8orf82,ENSG00000213563,protein_coding -45715,CPLX3,ENSG00000213578,protein_coding -17431,VDAC1,ENSG00000213585,protein_coding -19486,ZBTB9,ENSG00000213588,protein_coding -33135,TMX2,ENSG00000213593,protein_coding -45632,HEXA,ENSG00000213614,protein_coding -32872,NDUFS3,ENSG00000213619,protein_coding -1908,LEPROT,ENSG00000213625,protein_coding -5990,LBH,ENSG00000213626,protein_coding -54883,ADAT3,ENSG00000213638,protein_coding -5966,PPP1CB,ENSG00000213639,protein_coding -47443,SULT1A4,ENSG00000213648,protein_coding -19406,GPSM3,ENSG00000213654,protein_coding -47418,LAT,ENSG00000213658,protein_coding -10466,NCKIPSD,ENSG00000213672,protein_coding -19391,ATF6B,ENSG00000213676,protein_coding -10451,TREX1,ENSG00000213689,protein_coding -30688,S1PR3,ENSG00000213694,protein_coding -5893,SLC35F6,ENSG00000213699,protein_coding -54506,FAM209B,ENSG00000213714,protein_coding -19348,CLIC1,ENSG00000213719,protein_coding -19347,DDAH2,ENSG00000213722,protein_coding -42712,RPS29,ENSG00000213741,protein_coding -13852,UGT2B11,ENSG00000213759,protein_coding -19316,ATP6V1G2,ENSG00000213760,protein_coding -57651,ZNF134,ENSG00000213762,protein_coding -19258,GTF2H4,ENSG00000213780,protein_coding -37955,DDX47,ENSG00000213782,protein_coding -57275,ZNF888,ENSG00000213793,protein_coding -57302,ZNF845,ENSG00000213799,protein_coding -37869,KLRK1,ENSG00000213809,protein_coding -57189,CEACAM18,ENSG00000213822,protein_coding -46908,EMP2,ENSG00000213853,protein_coding -49317,KCTD11,ENSG00000213859,protein_coding -19147,UBD,ENSG00000213886,protein_coding -56797,PPM1N,ENSG00000213889,protein_coding -56752,CEACAM16,ENSG00000213892,protein_coding -9186,SLC23A3,ENSG00000213901,protein_coding -42390,LTB4R,ENSG00000213903,protein_coding -42389,LTB4R2,ENSG00000213906,protein_coding -46770,DNASE1,ENSG00000213918,protein_coding -42377,MDP1,ENSG00000213920,protein_coding -56502,LEUTX,ENSG00000213921,protein_coding -59262,CSNK1E,ENSG00000213923,protein_coding -29967,CCL27,ENSG00000213927,protein_coding -42365,IRF9,ENSG00000213928,protein_coding -29964,GALT,ENSG00000213930,protein_coding -32092,HBE1,ENSG00000213931,protein_coding -32088,HBG1,ENSG00000213934,protein_coding -46716,CLDN9,ENSG00000213937,protein_coding -16240,ITGA1,ENSG00000213949,protein_coding -56149,NUDT19,ENSG00000213965,protein_coding -56040,ZNF726,ENSG00000213967,protein_coding -55993,ZNF99,ENSG00000213973,protein_coding -49143,TAX1BP3,ENSG00000213977,protein_coding -42332,AP1G2,ENSG00000213983,protein_coding -55865,ZNF90,ENSG00000213988,protein_coding -41839,NAXD,ENSG00000213995,protein_coding -55823,TM6SF2,ENSG00000213996,protein_coding -55813,MEF2B,ENSG00000213999,protein_coding -44923,GANC,ENSG00000214013,protein_coding -9772,TTLL3,ENSG00000214021,protein_coding -24380,REPIN1,ENSG00000214022,protein_coding -31914,MRPL23,ENSG00000214026,protein_coding -40526,ZNF891,ENSG00000214029,protein_coding -29745,IFNA7,ENSG00000214042,protein_coding -55681,SMIM7,ENSG00000214046,protein_coding -27620,FBXO16,ENSG00000214050,protein_coding -31864,TSPAN4,ENSG00000214063,protein_coding -54035,CPNE1,ENSG00000214078,protein_coding -51952,ARL16,ENSG00000214087,protein_coding -12789,SMCO1,ENSG00000214097,protein_coding -24100,WEE2,ENSG00000214102,protein_coding -24922,MAGEB1,ENSG00000214107,protein_coding -18603,LYRM4,ENSG00000214113,protein_coding -1248,MYCBP,ENSG00000214114,protein_coding -24036,TMEM213,ENSG00000214128,protein_coding -51752,PRCD,ENSG00000214140,protein_coding -12526,ALG3,ENSG00000214160,protein_coding -1177,SH3D21,ENSG00000214193,protein_coding -23624,SMIM30,ENSG00000214194,protein_coding -55339,C19orf38,ENSG00000214212,protein_coding -12162,IQCJ,ENSG00000214216,protein_coding -51167,C17orf67,ENSG00000214226,protein_coding -12017,MINDY4B,ENSG00000214237,protein_coding -23431,FIS1,ENSG00000214253,protein_coding -44404,AC124312.1,ENSG00000214265,protein_coding -42068,ANG,ENSG00000214274,protein_coding -37453,SCART1,ENSG00000214279,protein_coding -37361,NPS,ENSG00000214285,protein_coding -34560,COLCA2,ENSG00000214290,protein_coding -23365,SPDYE3,ENSG00000214300,protein_coding -23354,MBLAC1,ENSG00000214309,protein_coding -11573,C3orf56,ENSG00000214324,protein_coding -6985,FOXI3,ENSG00000214336,protein_coding -20821,SOGA3,ENSG00000214338,protein_coding -18190,NEURL1B,ENSG00000214357,protein_coding -18182,EFCAB9,ENSG00000214360,protein_coding -12932,HAUS3,ENSG00000214367,protein_coding -34326,VSTM5,ENSG00000214376,protein_coding -31720,LCNL1,ENSG00000214402,protein_coding -37093,BBIP1,ENSG00000214413,protein_coding -34215,TRIM77,ENSG00000214414,protein_coding -23062,GNAT3,ENSG00000214415,protein_coding -36993,AS3MT,ENSG00000214435,protein_coding -50825,FAM187A,ENSG00000214447,protein_coding -55028,PLIN5,ENSG00000214456,protein_coding -59012,SEC14L6,ENSG00000214491,protein_coding -17836,SPINK13,ENSG00000214510,protein_coding -38647,HIGD1C,ENSG00000214511,protein_coding -6687,NOTO,ENSG00000214513,protein_coding -33922,PPME1,ENSG00000214517,protein_coding -50565,KRTAP2-2,ENSG00000214518,protein_coding -33879,STARD10,ENSG00000214530,protein_coding -33827,ZNF705E,ENSG00000214534,protein_coding -36622,NUTM2D,ENSG00000214562,protein_coding -45955,CPEB1,ENSG00000214575,protein_coding -6354,EML6,ENSG00000214595,protein_coding -19877,DEFB113,ENSG00000214642,protein_coding -19875,DEFB133,ENSG00000214643,protein_coding -22686,ZNF727,ENSG00000214652,protein_coding -31232,B3GNT10,ENSG00000214654,protein_coding -36383,ZSWIM8,ENSG00000214655,protein_coding -10596,IQCF5,ENSG00000214681,protein_coding -10589,IQCF6,ENSG00000214686,protein_coding -36300,C10orf105,ENSG00000214688,protein_coding -6126,ARHGEF33,ENSG00000214694,protein_coding -38179,C12orf71,ENSG00000214700,protein_coding -10551,IFRD2,ENSG00000214706,protein_coding -6001,CAPN14,ENSG00000214711,protein_coding -24588,ZBED1,ENSG00000214717,protein_coding -19711,AL096814.1,ENSG00000214732,protein_coding -19695,TOMM6,ENSG00000214736,protein_coding -33373,HNRNPUL2,ENSG00000214753,protein_coding -33365,CSKMT,ENSG00000214756,protein_coding -33265,MS4A18,ENSG00000214782,protein_coding -33253,MS4A4E,ENSG00000214787,protein_coding -29065,FER1L6,ENSG00000214814,protein_coding -49837,CDRT15L2,ENSG00000214819,protein_coding -26948,MTCP1,ENSG00000214827,protein_coding -5734,RAD51AP2,ENSG00000214842,protein_coding -49692,EVPLL,ENSG00000214860,protein_coding -5539,DCDC2C,ENSG00000214866,protein_coding -33125,SMTNL1,ENSG00000214872,protein_coding -32923,TRIM64C,ENSG00000214891,protein_coding -26848,PNMA6E,ENSG00000214897,protein_coding -30573,SPATA31D1,ENSG00000214929,protein_coding -47114,NPIPA8,ENSG00000214940,protein_coding -49576,ZSWIM7,ENSG00000214941,protein_coding -42488,GPR33,ENSG00000214943,protein_coding -16589,ARHGEF28,ENSG00000214944,protein_coding -49561,TBC1D26,ENSG00000214946,protein_coding -28593,LRRC69,ENSG00000214954,protein_coding -21812,CRPPA,ENSG00000214960,protein_coding -47087,NPIPA7,ENSG00000214967,protein_coding -49445,GSG1L2,ENSG00000214978,protein_coding -37711,ACSM4,ENSG00000215009,protein_coding -58475,RTL10,ENSG00000215012,protein_coding -21728,COL28A1,ENSG00000215018,protein_coding -37693,PHB2,ENSG00000215021,protein_coding -26002,TCP11X2,ENSG00000215029,protein_coding -49314,NEURL4,ENSG00000215041,protein_coding -21699,GRID2IP,ENSG00000215045,protein_coding -25634,CXorf49B,ENSG00000215113,protein_coding -28122,UBXN2B,ENSG00000215114,protein_coding -25629,CXorf49,ENSG00000215115,protein_coding -30184,CBWD6,ENSG00000215126,protein_coding -46762,C16orf90,ENSG00000215131,protein_coding -46697,PRSS41,ENSG00000215148,protein_coding -25476,NLRP2B,ENSG00000215174,protein_coding -31872,MUC5AC,ENSG00000215182,protein_coding -30025,MSMP,ENSG00000215183,protein_coding -45647,GOLGA6B,ENSG00000215186,protein_coding -30004,FAM166B,ENSG00000215187,protein_coding -58385,PEX26,ENSG00000215193,protein_coding -13507,GRXCR1,ENSG00000215203,protein_coding -15656,C5orf49,ENSG00000215217,protein_coding -15628,UBE2QL1,ENSG00000215218,protein_coding -53396,FASTKD5,ENSG00000215251,protein_coding -44726,GOLGA8B,ENSG00000215252,protein_coding -27754,KCNU1,ENSG00000215262,protein_coding -25277,GAGE12G,ENSG00000215269,protein_coding -42311,HOMEZ,ENSG00000215271,protein_coding -25268,GAGE10,ENSG00000215274,protein_coding -42308,RNF212B,ENSG00000215277,protein_coding -25054,DDX3X,ENSG00000215301,protein_coding -53387,VPS16,ENSG00000215305,protein_coding -27298,ZNF705D,ENSG00000215343,protein_coding -27159,ZNF705B,ENSG00000215356,protein_coding -27115,ZNF705G,ENSG00000215372,protein_coding -12880,MYL5,ENSG00000215375,protein_coding -53315,SCRT2,ENSG00000215397,protein_coding -53180,ZNF407,ENSG00000215421,protein_coding -54556,NPEPL1,ENSG00000215440,protein_coding -60434,KRTAP10-4,ENSG00000215454,protein_coding -60431,KRTAP10-1,ENSG00000215455,protein_coding -52830,RPL17-C18orf32,ENSG00000215472,protein_coding -52789,SKOR2,ENSG00000215474,protein_coding -41091,SIAH3,ENSG00000215475,protein_coding -53936,EFCAB8,ENSG00000215529,protein_coding -53868,DEFB116,ENSG00000215545,protein_coding -53865,DEFB115,ENSG00000215547,protein_coding -58346,GAB4,ENSG00000215568,protein_coding -53326,C20orf202,ENSG00000215595,protein_coding -13071,HMX1,ENSG00000215612,protein_coding -51960,GCGR,ENSG00000215644,protein_coding -518,RSC1A1,ENSG00000215695,protein_coding -507,CELA2B,ENSG00000215704,protein_coding -21292,TMEM242,ENSG00000215712,protein_coding -2534,TMEM167B,ENSG00000215717,protein_coding -2926,FAM72D,ENSG00000215784,protein_coding -249,TNFRSF25,ENSG00000215788,protein_coding -137,SLC35E2A,ENSG00000215790,protein_coding -4759,ZC3H11B,ENSG00000215817,protein_coding -3662,TSTD1,ENSG00000215845,protein_coding -3226,RPTN,ENSG00000215853,protein_coding -1721,CYB5RL,ENSG00000215883,protein_coding -738,LACTBL1,ENSG00000215906,protein_coding -400,C1orf167,ENSG00000215910,protein_coding -173,TTC34,ENSG00000215912,protein_coding -116,ATAD3C,ENSG00000215915,protein_coding -55762,IFI30,ENSG00000216490,protein_coding -56746,IGSF23,ENSG00000216588,protein_coding -25276,GAGE12E,ENSG00000216649,protein_coding -9656,FAM240C,ENSG00000216921,protein_coding -35717,CCDC7,ENSG00000216937,protein_coding -17359,FNIP1,ENSG00000217128,protein_coding -8427,SP9,ENSG00000217236,protein_coding -59631,SYCE3,ENSG00000217442,protein_coding -48259,CKLF,ENSG00000217555,protein_coding -46786,PAM16,ENSG00000217930,protein_coding -22413,CDC14C,ENSG00000218305,protein_coding -15328,TENM3,ENSG00000218336,protein_coding -6080,CEBPZOS,ENSG00000218739,protein_coding -5792,TDRD15,ENSG00000218819,protein_coding -21645,PAPOLB,ENSG00000218823,protein_coding -57529,ZNF579,ENSG00000218891,protein_coding -708,CELA3B,ENSG00000219073,protein_coding -49280,RNASEK,ENSG00000219200,protein_coding -33465,CATSPERZ,ENSG00000219435,protein_coding -59564,TAFA5,ENSG00000219438,protein_coding -566,NBPF1,ENSG00000219481,protein_coding -21734,UMAD1,ENSG00000219545,protein_coding -18602,PPP1R3G,ENSG00000219607,protein_coding -5828,FAM228B,ENSG00000219626,protein_coding -54907,LINGO3,ENSG00000220008,protein_coding -55308,ZGLP1,ENSG00000220201,protein_coding -49390,VAMP2,ENSG00000220205,protein_coding -32919,TRIM51GP,ENSG00000220948,protein_coding -24267,OR6B1,ENSG00000221813,protein_coding -27565,EBF2,ENSG00000221818,protein_coding -19723,C6orf226,ENSG00000221821,protein_coding -6583,PPP3R1,ENSG00000221823,protein_coding -56656,PSG3,ENSG00000221826,protein_coding -29991,FANCG,ENSG00000221829,protein_coding -24268,OR2A5,ENSG00000221836,protein_coding -60439,KRTAP10-9,ENSG00000221837,protein_coding -23350,AP4M1,ENSG00000221838,protein_coding -32978,OR4A5,ENSG00000221840,protein_coding -5938,C2orf16,ENSG00000221843,protein_coding -50560,KRTAP1-5,ENSG00000221852,protein_coding -24243,TAS2R41,ENSG00000221855,protein_coding -24271,OR2A12,ENSG00000221858,protein_coding -60440,KRTAP10-10,ENSG00000221859,protein_coding -60444,KRTAP12-2,ENSG00000221864,protein_coding -23940,PLXNA4,ENSG00000221866,protein_coding -26818,MAGEA3,ENSG00000221867,protein_coding -27966,CEBPD,ENSG00000221869,protein_coding -57278,ZNF816-ZNF321P,ENSG00000221874,protein_coding -56663,PSG7,ENSG00000221878,protein_coding -50562,KRTAP1-3,ENSG00000221880,protein_coding -49122,AC087498.2,ENSG00000221882,protein_coding -10471,ARIH2OS,ENSG00000221883,protein_coding -18064,ZBED8,ENSG00000221886,protein_coding -53068,HMSD,ENSG00000221887,protein_coding -5402,OR1C1,ENSG00000221888,protein_coding -59291,NPTXR,ENSG00000221890,protein_coding -22454,POM121L12,ENSG00000221900,protein_coding -23315,FAM200A,ENSG00000221909,protein_coding -24262,OR2F2,ENSG00000221910,protein_coding -27568,PPP2R2A,ENSG00000221914,protein_coding -57007,C19orf73,ENSG00000221916,protein_coding -57250,ZNF880,ENSG00000221923,protein_coding -49555,TRIM16,ENSG00000221926,protein_coding -34905,OR6X1,ENSG00000221931,protein_coding -34975,HEPN1,ENSG00000221932,protein_coding -24269,OR2A25,ENSG00000221933,protein_coding -24224,TAS2R40,ENSG00000221937,protein_coding -24274,OR2A14,ENSG00000221938,protein_coding -9435,TIGD1,ENSG00000221944,protein_coding -56242,FXYD7,ENSG00000221946,protein_coding -28304,XKR9,ENSG00000221947,protein_coding -32954,OR4C12,ENSG00000221954,protein_coding -11510,SLC12A8,ENSG00000221955,protein_coding -59149,APOL6,ENSG00000221963,protein_coding -33322,FADS3,ENSG00000221968,protein_coding -24289,OR2A1,ENSG00000221970,protein_coding -42133,OR4E2,ENSG00000221977,protein_coding -106,CCNL2,ENSG00000221978,protein_coding -55791,UBA52,ENSG00000221983,protein_coding -2543,MYBPHL,ENSG00000221986,protein_coding -19396,PPT2,ENSG00000221988,protein_coding -24272,OR2A2,ENSG00000221989,protein_coding -25175,ZNF630,ENSG00000221994,protein_coding -50055,TIAF1,ENSG00000221995,protein_coding -32021,OR52B4,ENSG00000221996,protein_coding -1462,BTBD19,ENSG00000222009,protein_coding -23483,FAM185A,ENSG00000222011,protein_coding -7782,RAB6C,ENSG00000222014,protein_coding -60176,FAM243A,ENSG00000222018,protein_coding -42300,PSMB11,ENSG00000222028,protein_coding -41961,POTEM,ENSG00000222036,protein_coding -7838,POTEJ,ENSG00000222038,protein_coding -1062,DCDC2B,ENSG00000222046,protein_coding -36392,C10orf55,ENSG00000222047,protein_coding -34235,TRIM49D1,ENSG00000223417,protein_coding -27302,USP17L2,ENSG00000223443,protein_coding -48452,EXOSC6,ENSG00000223496,protein_coding -19463,VPS52,ENSG00000223501,protein_coding -49518,CDRT15,ENSG00000223510,protein_coding -55410,ZNF844,ENSG00000223547,protein_coding -13092,USP17L15,ENSG00000223569,protein_coding -44978,CKMT1A,ENSG00000223572,protein_coding -55056,TINCR,ENSG00000223573,protein_coding -25321,CENPVL1,ENSG00000223591,protein_coding -35521,EBLN1,ENSG00000223601,protein_coding -32085,HBD,ENSG00000223609,protein_coding -22703,ZNF735,ENSG00000223614,protein_coding -57547,RFPL4A,ENSG00000223638,protein_coding -6190,C1GALT1C1L,ENSG00000223658,protein_coding -55800,CERS1,ENSG00000223802,protein_coding -19445,HLA-DPB1,ENSG00000223865,protein_coding -34815,C1QTNF5,ENSG00000223953,protein_coding -99,CPTP,ENSG00000224051,protein_coding -26323,CT47A10,ENSG00000224089,protein_coding -26533,ETDB,ENSG00000224107,protein_coding -25319,CENPVL3,ENSG00000224109,protein_coding -51425,PRR29,ENSG00000224383,protein_coding -19384,C4B,ENSG00000224389,protein_coding -57052,ADM5,ENSG00000224420,protein_coding -26703,CXorf51A,ENSG00000224440,protein_coding -48513,ATXN1L,ENSG00000224470,protein_coding -18704,SMIM13,ENSG00000224531,protein_coding -47985,HNRNPA1P48,ENSG00000224578,protein_coding -19090,GPX5,ENSG00000224586,protein_coding -25269,GAGE12J,ENSG00000224659,protein_coding -47015,NPIPA3,ENSG00000224712,protein_coding -51925,NDUFAF8,ENSG00000224877,protein_coding -25278,GAGE12H,ENSG00000224902,protein_coding -56769,APOC4-APOC2,ENSG00000224916,protein_coding -23812,PRRT4,ENSG00000224940,protein_coding -24896,PPP4R3C,ENSG00000224960,protein_coding -40092,TMEM233,ENSG00000224982,protein_coding -26845,PNMA6F,ENSG00000225110,protein_coding -51920,PVALEF,ENSG00000225180,protein_coding -50855,PLEKHM1,ENSG00000225190,protein_coding -27164,USP17L3,ENSG00000225327,protein_coding -45609,CT62,ENSG00000225362,protein_coding -25661,FAM236D,ENSG00000225396,protein_coding -9844,MKRN2OS,ENSG00000225526,protein_coding -59332,Z82206.1,ENSG00000225528,protein_coding -3208,C2CD4D,ENSG00000225556,protein_coding -48906,ZNF469,ENSG00000225614,protein_coding -51961,MCRIP1,ENSG00000225663,protein_coding -10460,SLC26A6,ENSG00000225697,protein_coding -24219,OR6V1,ENSG00000225781,protein_coding -33832,DEFB131B,ENSG00000225805,protein_coding -1074,FAM229A,ENSG00000225828,protein_coding -36030,ERCC6,ENSG00000225830,protein_coding -37462,FRG2B,ENSG00000225899,protein_coding -18741,NOL7,ENSG00000225921,protein_coding -24278,CTAGE4,ENSG00000225932,protein_coding -16324,C5orf67,ENSG00000225940,protein_coding -57001,NTF4,ENSG00000225950,protein_coding -21599,ELFN1,ENSG00000225968,protein_coding -45233,PIGBOS1,ENSG00000225973,protein_coding -26331,CT47A6,ENSG00000226023,protein_coding -8777,FTCDNL1,ENSG00000226124,protein_coding -44006,TEX22,ENSG00000226174,protein_coding -32032,OR52I2,ENSG00000226288,protein_coding -9616,CROCC2,ENSG00000226321,protein_coding -24907,DCAF8L1,ENSG00000226372,protein_coding -27301,USP17L7,ENSG00000226430,protein_coding -7665,TMEM185B,ENSG00000226479,protein_coding -29290,AC138647.1,ENSG00000226490,protein_coding -26325,CT47A9,ENSG00000226600,protein_coding -17979,KIF4B,ENSG00000226650,protein_coding -26319,CT47A12,ENSG00000226685,protein_coding -21780,AC013470.2,ENSG00000226690,protein_coding -53283,HSBP1L1,ENSG00000226742,protein_coding -37903,TAS2R46,ENSG00000226761,protein_coding -56693,SRRM5,ENSG00000226763,protein_coding -25767,PGAM4,ENSG00000226784,protein_coding -41196,C13orf42,ENSG00000226792,protein_coding -13615,ERVMER34-1,ENSG00000226887,protein_coding -26321,CT47A11,ENSG00000226929,protein_coding -58109,RBMY1J,ENSG00000226941,protein_coding -19318,LTA,ENSG00000226979,protein_coding -43652,C14orf132,ENSG00000227051,protein_coding -19466,WDR46,ENSG00000227057,protein_coding -40533,ANHX,ENSG00000227059,protein_coding -10959,ZNF717,ENSG00000227124,protein_coding -13104,USP17L5,ENSG00000227140,protein_coding -41279,PRR20D,ENSG00000227151,protein_coding -26644,SPANXB1,ENSG00000227234,protein_coding -36638,KLLN,ENSG00000227268,protein_coding -36038,PARG,ENSG00000227345,protein_coding -23964,AKR1B15,ENSG00000227471,protein_coding -25274,GAGE12D,ENSG00000227488,protein_coding -54882,SCAMP4,ENSG00000227500,protein_coding -19320,LTB,ENSG00000227507,protein_coding -13089,USP17L12,ENSG00000227551,protein_coding -43954,RD3L,ENSG00000227729,protein_coding -739,TEX46,ENSG00000227868,protein_coding -36130,MRLN,ENSG00000227877,protein_coding -23480,POLR2J2,ENSG00000228049,protein_coding -52942,BOD1L2,ENSG00000228075,protein_coding -29751,IFNA14,ENSG00000228083,protein_coding -39180,AC078927.1,ENSG00000228144,protein_coding -5430,OR2M3,ENSG00000228198,protein_coding -60541,MT-ATP8,ENSG00000228253,protein_coding -31174,ORM2,ENSG00000228278,protein_coding -54838,C19orf24,ENSG00000228300,protein_coding -5906,OST4,ENSG00000228474,protein_coding -7246,C2orf92,ENSG00000228486,protein_coding -26329,CT47A7,ENSG00000228517,protein_coding -57296,VN1R4,ENSG00000228567,protein_coding -36498,NUTM2E,ENSG00000228570,protein_coding -124,FNDC10,ENSG00000228594,protein_coding -34642,CLDN25,ENSG00000228607,protein_coding -17578,PROB1,ENSG00000228672,protein_coding -50889,ARL17B,ENSG00000228696,protein_coding -16736,DHFR,ENSG00000228716,protein_coding -19352,SAPCD1,ENSG00000228727,protein_coding -26563,CT45A5,ENSG00000228836,protein_coding -13109,USP17L30,ENSG00000228856,protein_coding -57855,TSPY3,ENSG00000228927,protein_coding -38925,RPL41,ENSG00000229117,protein_coding -33288,PGA4,ENSG00000229183,protein_coding -57548,RFPL4AL1,ENSG00000229292,protein_coding -31173,ORM1,ENSG00000229314,protein_coding -42580,SFTA3,ENSG00000229415,protein_coding -10442,SPINK8,ENSG00000229453,protein_coding -45008,PATL2,ENSG00000229474,protein_coding -37308,NKX1-2,ENSG00000229544,protein_coding -57849,TSPY8,ENSG00000229549,protein_coding -458,PRAMEF25,ENSG00000229571,protein_coding -13103,USP17L26,ENSG00000229579,protein_coding -50989,PRAC2,ENSG00000229637,protein_coding -41277,PRR20C,ENSG00000229665,protein_coding -24996,AL121578.2,ENSG00000229674,protein_coding -55987,ZNF492,ENSG00000229676,protein_coding -47538,ZNF688,ENSG00000229809,protein_coding -55157,PET100,ENSG00000229833,protein_coding -33287,PGA3,ENSG00000229859,protein_coding -15162,GK3P,ENSG00000229894,protein_coding -13086,FAM90A26,ENSG00000229924,protein_coding -21844,PRPS1L1,ENSG00000229937,protein_coding -10592,IQCF3,ENSG00000229972,protein_coding -44260,POTEB2,ENSG00000230031,protein_coding -31183,TEX53,ENSG00000230054,protein_coding -19830,ANKRD66,ENSG00000230062,protein_coding -4080,ACBD6,ENSG00000230124,protein_coding -18483,OR4F3,ENSG00000230178,protein_coding -23489,NFE4,ENSG00000230257,protein_coding -23765,SSU72P8,ENSG00000230268,protein_coding -11103,OR5H6,ENSG00000230301,protein_coding -26327,CT47A8,ENSG00000230347,protein_coding -22815,SPDYE21P,ENSG00000230358,protein_coding -13102,USP17L25,ENSG00000230430,protein_coding -29905,ANKRD18B,ENSG00000230453,protein_coding -56855,PPP5D1,ENSG00000230510,protein_coding -55138,MBD3L2,ENSG00000230522,protein_coding -27112,USP17L1,ENSG00000230549,protein_coding -17323,CCDC192,ENSG00000230561,protein_coding -26335,CT47A4,ENSG00000230594,protein_coding -31184,TEX48,ENSG00000230601,protein_coding -23861,AC011005.1,ENSG00000230626,protein_coding -37915,PRB4,ENSG00000230657,protein_coding -2278,SETSIP,ENSG00000230667,protein_coding -44829,ANKRD63,ENSG00000230778,protein_coding -24825,YY2,ENSG00000230797,protein_coding -18790,STMND1,ENSG00000230873,protein_coding -48760,HSBP1,ENSG00000230989,protein_coding -13106,USP17L28,ENSG00000231051,protein_coding -60081,KRTAP21-3,ENSG00000231068,protein_coding -11095,OR5H1,ENSG00000231192,protein_coding -11931,PLSCR5,ENSG00000231213,protein_coding -50760,CFAP97D1,ENSG00000231256,protein_coding -57528,SBK3,ENSG00000231274,protein_coding -19444,HLA-DPA1,ENSG00000231389,protein_coding -13087,USP17L10,ENSG00000231396,protein_coding -19464,RPS18,ENSG00000231500,protein_coding -13107,USP17L29,ENSG00000231637,protein_coding -39442,TSPAN19,ENSG00000231738,protein_coding -4255,AL136454.1,ENSG00000231767,protein_coding -52158,AKAIN1,ENSG00000231824,protein_coding -19386,CYP21A2,ENSG00000231852,protein_coding -11110,OR5K2,ENSG00000231861,protein_coding -37887,PRH1,ENSG00000231887,protein_coding -56661,PSG1,ENSG00000231924,protein_coding -19471,TAPBP,ENSG00000231925,protein_coding -17990,PPP1R2B,ENSG00000231989,protein_coding -24877,MAGEB6B,ENSG00000232030,protein_coding -19091,ZBED9,ENSG00000232040,protein_coding -42101,TMEM253,ENSG00000232070,protein_coding -10448,TMA7,ENSG00000232112,protein_coding -3959,TEX50,ENSG00000232113,protein_coding -26311,MCTS1,ENSG00000232119,protein_coding -8953,DYTN,ENSG00000232125,protein_coding -46754,MTRNR2L4,ENSG00000232196,protein_coding -4357,ASCL5,ENSG00000232237,protein_coding -46865,TMEM114,ENSG00000232258,protein_coding -60049,KRTAP25-1,ENSG00000232263,protein_coding -13101,USP17L24,ENSG00000232264,protein_coding -32033,OR52I1,ENSG00000232268,protein_coding -11109,OR5K1,ENSG00000232382,protein_coding -53651,SMIM26,ENSG00000232388,protein_coding -13090,USP17L13,ENSG00000232399,protein_coding -454,PRAMEF6,ENSG00000232423,protein_coding -31706,AJM1,ENSG00000232434,protein_coding -25384,KANTR,ENSG00000232593,protein_coding -19427,HLA-DQB2,ENSG00000232629,protein_coding -44680,GOLGA8N,ENSG00000232653,protein_coding -19319,TNF,ENSG00000232810,protein_coding -53631,PET117,ENSG00000232838,protein_coding -49959,LYRM9,ENSG00000232859,protein_coding -27310,DEFB130A,ENSG00000232948,protein_coding -47119,NPIPA9,ENSG00000233024,protein_coding -44837,PHGR1,ENSG00000233041,protein_coding -27295,DEFB130B,ENSG00000233050,protein_coding -7868,RAB6D,ENSG00000233087,protein_coding -13088,USP17L11,ENSG00000233136,protein_coding -31751,RNF224,ENSG00000233198,protein_coding -47376,NPIPB7,ENSG00000233232,protein_coding -11097,OR5H15,ENSG00000233412,protein_coding -33140,BTBD18,ENSG00000233436,protein_coding -57515,TMEM238,ENSG00000233493,protein_coding -9569,TWIST2,ENSG00000233608,protein_coding -49477,PIRT,ENSG00000233670,protein_coding -11818,PRR23C,ENSG00000233701,protein_coding -7144,AC092835.1,ENSG00000233757,protein_coding -34236,TRIM49D2,ENSG00000233802,protein_coding -57847,TSPY4,ENSG00000233803,protein_coding -29757,IFNA13,ENSG00000233816,protein_coding -19037,HIST1H2BN,ENSG00000233822,protein_coding -44317,POTEB,ENSG00000233917,protein_coding -55202,RPS28,ENSG00000233927,protein_coding -45091,CTXN2,ENSG00000233932,protein_coding -527,UQCRHL,ENSG00000233954,protein_coding -25434,PAGE2,ENSG00000234068,protein_coding -19209,TRIM26,ENSG00000234127,protein_coding -23761,TMEM229A,ENSG00000234224,protein_coding -41280,PRR20E,ENSG00000234278,protein_coding -18382,ZNF879,ENSG00000234284,protein_coding -60381,H2BFS,ENSG00000234289,protein_coding -58495,CCDC188,ENSG00000234409,protein_coding -58075,RBMY1A1,ENSG00000234414,protein_coding -45465,KBTBD13,ENSG00000234438,protein_coding -22706,ZNF736,ENSG00000234444,protein_coding -56690,PINLYP,ENSG00000234465,protein_coding -24669,CLDN34,ENSG00000234469,protein_coding -18152,C5orf58,ENSG00000234511,protein_coding -23183,FAM133B,ENSG00000234545,protein_coding -34919,OR10G8,ENSG00000234560,protein_coding -16288,MCIDAS,ENSG00000234602,protein_coding -29306,JRK,ENSG00000234616,protein_coding -46965,NPIPB2,ENSG00000234719,protein_coding -19284,HLA-B,ENSG00000234745,protein_coding -32815,C11orf94,ENSG00000234776,protein_coding -14925,IQCM,ENSG00000234828,protein_coding -29749,IFNA17,ENSG00000234829,protein_coding -33371,HNRNPUL2-BSCL2,ENSG00000234857,protein_coding -56770,APOC2,ENSG00000234906,protein_coding -59400,SHISA8,ENSG00000234965,protein_coding -57111,C19orf81,ENSG00000235034,protein_coding -111,ANKRD65,ENSG00000235098,protein_coding -31600,BRD3OS,ENSG00000235106,protein_coding -19080,ZSCAN31,ENSG00000235109,protein_coding -8952,FAM237A,ENSG00000235118,protein_coding -39787,C12orf75,ENSG00000235162,protein_coding -202,SMIM1,ENSG00000235169,protein_coding -29400,HGH1,ENSG00000235173,protein_coding -42313,PPP1R3E,ENSG00000235194,protein_coding -34354,KDM4E,ENSG00000235268,protein_coding -21442,RAMACL,ENSG00000235272,protein_coding -37003,RPEL1,ENSG00000235376,protein_coding -30038,SPAAR,ENSG00000235387,protein_coding -29870,SMIM27,ENSG00000235453,protein_coding -59427,NFAM1,ENSG00000235568,protein_coding -12912,NKX1-1,ENSG00000235608,protein_coding -23740,RNF148,ENSG00000235631,protein_coding -26702,CXorf51B,ENSG00000235699,protein_coding -5063,AL109810.2,ENSG00000235710,protein_coding -45866,ANKRD34C,ENSG00000235711,protein_coding -34814,MFRP,ENSG00000235718,protein_coding -3993,KIAA0040,ENSG00000235750,protein_coding -13105,USP17L27,ENSG00000235780,protein_coding -19465,B3GALT4,ENSG00000235863,protein_coding -3258,LCE6A,ENSG00000235942,protein_coding -26837,PNMA6A,ENSG00000235961,protein_coding -34997,PATE3,ENSG00000236027,protein_coding -11096,OR5H14,ENSG00000236032,protein_coding -19472,ZBTB22,ENSG00000236104,protein_coding -27113,USP17L4,ENSG00000236125,protein_coding -26337,CT47A3,ENSG00000236126,protein_coding -24056,CLEC2L,ENSG00000236279,protein_coding -32303,ZBED5,ENSG00000236287,protein_coding -50314,SLFN14,ENSG00000236320,protein_coding -3051,PPIAL4G,ENSG00000236334,protein_coding -25275,GAGE12F,ENSG00000236362,protein_coding -26341,CT47A1,ENSG00000236371,protein_coding -50727,CCDC200,ENSG00000236383,protein_coding -52289,SLC35G4,ENSG00000236396,protein_coding -24222,TAS2R39,ENSG00000236398,protein_coding -57863,TSPY10,ENSG00000236424,protein_coding -40836,UBE2L5,ENSG00000236444,protein_coding -26316,CT47B1,ENSG00000236446,protein_coding -31646,AL354761.1,ENSG00000236543,protein_coding -21703,ZNF853,ENSG00000236609,protein_coding -29743,IFNA4,ENSG00000236637,protein_coding -14372,ARHGEF38,ENSG00000236699,protein_coding -25272,GAGE12B,ENSG00000236737,protein_coding -20865,CTAGE9,ENSG00000236761,protein_coding -852,AL391650.1,ENSG00000236782,protein_coding -10494,C3orf84,ENSG00000236980,protein_coding -34918,OR10G9,ENSG00000236981,protein_coding -27163,USP17L8,ENSG00000237038,protein_coding -20884,TAAR9,ENSG00000237110,protein_coding -14875,C4orf51,ENSG00000237136,protein_coding -48289,B3GNT9,ENSG00000237172,protein_coding -17446,CDKN2AIPNL,ENSG00000237190,protein_coding -55136,MBD3L5,ENSG00000237247,protein_coding -44969,CKMT1B,ENSG00000237289,protein_coding -74,RNF223,ENSG00000237330,protein_coding -34998,PATE4,ENSG00000237353,protein_coding -41335,AL445989.1,ENSG00000237378,protein_coding -32904,OR4A47,ENSG00000237388,protein_coding -9432,PRSS56,ENSG00000237412,protein_coding -55891,AC008554.1,ENSG00000237440,protein_coding -19470,RGL2,ENSG00000237441,protein_coding -56812,BHMG1,ENSG00000237452,protein_coding -37388,C10orf143,ENSG00000237489,protein_coding -46987,SHISA9,ENSG00000237515,protein_coding -55245,OR7E24,ENSG00000237521,protein_coding -7721,TEX51,ENSG00000237524,protein_coding -19425,HLA-DQA2,ENSG00000237541,protein_coding -19479,KIFC1,ENSG00000237649,protein_coding -6440,C2orf74,ENSG00000237651,protein_coding -25273,GAGE12C,ENSG00000237671,protein_coding -17907,IRGM,ENSG00000237693,protein_coding -474,PRAMEF33,ENSG00000237700,protein_coding -2473,AMY1A,ENSG00000237763,protein_coding -13208,FAM200B,ENSG00000237765,protein_coding -26333,CT47A5,ENSG00000237957,protein_coding -19146,OR2I1P,ENSG00000237988,protein_coding -57859,TSPY9P,ENSG00000238074,protein_coding -50894,LRRC37A2,ENSG00000238083,protein_coding -25036,MPC1L,ENSG00000238205,protein_coding -26541,ETDA,ENSG00000238210,protein_coding -31656,TMEM250,ENSG00000238227,protein_coding -5409,OR2W3,ENSG00000238243,protein_coding -25433,PAGE2B,ENSG00000238269,protein_coding -18654,TXNDC5,ENSG00000239264,protein_coding -58994,CASTOR1,ENSG00000239282,protein_coding -6936,RNF103,ENSG00000239305,protein_coding -33641,RBM14,ENSG00000239306,protein_coding -56314,ALKBH6,ENSG00000239382,protein_coding -10716,ASB14,ENSG00000239388,protein_coding -17662,PCDHA13,ENSG00000239389,protein_coding -5399,AC118470.1,ENSG00000239395,protein_coding -8337,KLHL41,ENSG00000239474,protein_coding -23363,CASTOR3,ENSG00000239521,protein_coding -31268,OR1J4,ENSG00000239590,protein_coding -6259,STPG4,ENSG00000239605,protein_coding -11578,PRR20G,ENSG00000239620,protein_coding -17362,MEIKIN,ENSG00000239642,protein_coding -51115,NME1,ENSG00000239672,protein_coding -49340,TNFSF12,ENSG00000239697,protein_coding -49545,CDRT4,ENSG00000239704,protein_coding -59303,APOBEC3G,ENSG00000239713,protein_coding -10617,TLR9,ENSG00000239732,protein_coding -6744,WBP1,ENSG00000239779,protein_coding -22512,MRPS17,ENSG00000239789,protein_coding -445,PRAMEF11,ENSG00000239810,protein_coding -27088,DEFA3,ENSG00000239839,protein_coding -21571,GET4,ENSG00000239857,protein_coding -50585,KRTAP9-2,ENSG00000239886,protein_coding -3727,C1orf226,ENSG00000239887,protein_coding -59337,ADSL,ENSG00000239900,protein_coding -32091,AC104389.4,ENSG00000239920,protein_coding -57429,LILRA4,ENSG00000239961,protein_coding -57446,LILRA2,ENSG00000239998,protein_coding -4032,TEX35,ENSG00000240021,protein_coding -2470,AMY2B,ENSG00000240038,protein_coding -12083,STRIT1,ENSG00000240045,protein_coding -19336,LY6G5B,ENSG00000240053,protein_coding -19433,PSMB9,ENSG00000240065,protein_coding -17725,PCDHGC3,ENSG00000240184,protein_coding -23872,SMKR1,ENSG00000240204,protein_coding -9471,UGT1A5,ENSG00000240224,protein_coding -21574,COX19,ENSG00000240230,protein_coding -27086,DEFA1B,ENSG00000240247,protein_coding -11713,ACAD11,ENSG00000240303,protein_coding -8803,PPIL3,ENSG00000240344,protein_coding -3252,LCE1F,ENSG00000240386,protein_coding -57469,KIR3DL2,ENSG00000240403,protein_coding -60057,KRTAP13-3,ENSG00000240432,protein_coding -49719,FOXO3B,ENSG00000240445,protein_coding -49628,TNFRSF13B,ENSG00000240505,protein_coding -50583,KRTAP9-1,ENSG00000240542,protein_coding -1834,L1TD1,ENSG00000240563,protein_coding -22097,AQP1,ENSG00000240583,protein_coding -40700,C1QTNF9,ENSG00000240654,protein_coding -11634,ISY1,ENSG00000240682,protein_coding -27574,PNMA2,ENSG00000240694,protein_coding -23169,LRRD1,ENSG00000240720,protein_coding -10293,KRBOX1,ENSG00000240747,protein_coding -17728,PCDHGC5,ENSG00000240764,protein_coding -39015,ARHGEF25,ENSG00000240771,protein_coding -54408,TMEM189,ENSG00000240849,protein_coding -5744,RDH14,ENSG00000240857,protein_coding -50569,KRTAP4-7,ENSG00000240871,protein_coding -11275,PLCXD2,ENSG00000240891,protein_coding -58785,MIF,ENSG00000240972,protein_coding -35469,NSUN6,ENSG00000241058,protein_coding -19428,HLA-DOB,ENSG00000241106,protein_coding -9467,UGT1A9,ENSG00000241119,protein_coding -60435,KRTAP10-5,ENSG00000241123,protein_coding -22247,YAE1,ENSG00000241127,protein_coding -5400,OR14A2,ENSG00000241128,protein_coding -22682,AC115220.1,ENSG00000241149,protein_coding -10382,TDGF1,ENSG00000241186,protein_coding -11244,C3orf85,ENSG00000241224,protein_coding -33807,KRTAP5-8,ENSG00000241233,protein_coding -50572,KRTAP4-16,ENSG00000241241,protein_coding -22776,CRCP,ENSG00000241258,protein_coding -49551,CDRT1,ENSG00000241322,protein_coding -25964,RPL36A,ENSG00000241343,protein_coding -59225,PDXP,ENSG00000241360,protein_coding -19218,RPP21,ENSG00000241370,protein_coding -8233,CD302,ENSG00000241399,protein_coding -19398,EGFL8,ENSG00000241404,protein_coding -23311,ATP5MF,ENSG00000241468,protein_coding -25364,SSX2,ENSG00000241476,protein_coding -59491,ARHGAP8,ENSG00000241484,protein_coding -26746,AC244197.3,ENSG00000241489,protein_coding -9770,ARPC4,ENSG00000241553,protein_coding -361,CORT,ENSG00000241563,protein_coding -50588,KRTAP9-4,ENSG00000241595,protein_coding -31889,KRTAP5-4,ENSG00000241598,protein_coding -9480,UGT1A1,ENSG00000241635,protein_coding -22091,INMT,ENSG00000241644,protein_coding -23303,ARPC1A,ENSG00000241685,protein_coding -53383,AL035460.1,ENSG00000241690,protein_coding -30955,TMEFF1,ENSG00000241697,protein_coding -3271,SPRR2A,ENSG00000241794,protein_coding -60163,ATP5PO,ENSG00000241837,protein_coding -45453,PLEKHO2,ENSG00000241839,protein_coding -27493,C8orf58,ENSG00000241852,protein_coding -59063,PISD,ENSG00000241878,protein_coding -36848,HOGA1,ENSG00000241935,protein_coding -60397,PWP2,ENSG00000241945,protein_coding -7268,AC079447.1,ENSG00000241962,protein_coding -58528,PI4KA,ENSG00000241973,protein_coding -31086,AKAP2,ENSG00000241978,protein_coding -57460,KIR3DL3,ENSG00000242019,protein_coding -44989,HYPK,ENSG00000242028,protein_coding -16016,AMACR,ENSG00000242110,protein_coding -59005,MTFP1,ENSG00000242114,protein_coding -10867,MDFIC2,ENSG00000242120,protein_coding -46464,ARHGDIG,ENSG00000242173,protein_coding -60122,TCP10L,ENSG00000242220,protein_coding -56668,PSG2,ENSG00000242221,protein_coding -59445,ARFGAP3,ENSG00000242247,protein_coding -3463,BGLAP,ENSG00000242252,protein_coding -58456,C22orf39,ENSG00000242259,protein_coding -23217,PEG10,ENSG00000242265,protein_coding -26339,CT47A2,ENSG00000242362,protein_coding -9464,UGT1A8,ENSG00000242366,protein_coding -54019,EIF6,ENSG00000242372,protein_coding -58088,RBMY1E,ENSG00000242389,protein_coding -17727,PCDHGC4,ENSG00000242419,protein_coding -6293,GTF2A1L,ENSG00000242441,protein_coding -108,MRPL20,ENSG00000242485,protein_coding -46141,ARPIN,ENSG00000242498,protein_coding -9465,UGT1A10,ENSG00000242515,protein_coding -53067,SERPINB10,ENSG00000242550,protein_coding -19437,HLA-DMB,ENSG00000242574,protein_coding -46471,DECR2,ENSG00000242612,protein_coding -31110,GNG10,ENSG00000242616,protein_coding -33182,CNTF,ENSG00000242689,protein_coding -40914,CCDC169,ENSG00000242715,protein_coding -25641,RTL5,ENSG00000242732,protein_coding -21640,AP5Z1,ENSG00000242802,protein_coding -55436,ZNF709,ENSG00000242852,protein_coding -44970,STRC,ENSG00000242866,protein_coding -58074,RBMY1B,ENSG00000242875,protein_coding -23179,ERVW-1,ENSG00000242950,protein_coding -17620,EIF4EBP3,ENSG00000243056,protein_coding -449,PRAMEF4,ENSG00000243073,protein_coding -56666,PSG11,ENSG00000243130,protein_coding -9474,UGT1A3,ENSG00000243135,protein_coding -56672,PSG4,ENSG00000243137,protein_coding -5947,MRPL33,ENSG00000243147,protein_coding -58376,MICAL3,ENSG00000243156,protein_coding -55292,PPAN-P2RY11,ENSG00000243207,protein_coding -17665,PCDHAC2,ENSG00000243232,protein_coding -6291,STON1,ENSG00000243244,protein_coding -25247,PRAF2,ENSG00000243279,protein_coding -3606,VSIG8,ENSG00000243284,protein_coding -23986,STMP1,ENSG00000243317,protein_coding -22796,KCTD7,ENSG00000243335,protein_coding -3391,EFNA4,ENSG00000243364,protein_coding -17158,TICAM2,ENSG00000243414,protein_coding -31082,PALM2,ENSG00000243444,protein_coding -12927,C4orf48,ENSG00000243449,protein_coding -10553,NAA80,ENSG00000243477,protein_coding -2472,AMY2A,ENSG00000243480,protein_coding -60441,KRTAP10-11,ENSG00000243489,protein_coding -20122,AL365232.1,ENSG00000243501,protein_coding -54706,TNFRSF6B,ENSG00000243509,protein_coding -54271,WFDC6,ENSG00000243543,protein_coding -22994,UPK3B,ENSG00000243566,protein_coding -60178,SMIM34A,ENSG00000243627,protein_coding -60147,IL10RB,ENSG00000243646,protein_coding -19373,CFB,ENSG00000243649,protein_coding -35869,ZNF487,ENSG00000243660,protein_coding -6579,WDR92,ENSG00000243667,protein_coding -51117,NME2,ENSG00000243678,protein_coding -10656,AC006254.1,ENSG00000243696,protein_coding -44906,PLA2G4B,ENSG00000243708,protein_coding -4885,LEFTY1,ENSG00000243709,protein_coding -1391,CFAP57,ENSG00000243710,protein_coding -47255,NPIPB5,ENSG00000243716,protein_coding -1736,TTC4,ENSG00000243725,protein_coding -19131,OR5V1,ENSG00000243729,protein_coding -1138,TMEM35B,ENSG00000243749,protein_coding -57462,KIR2DL3,ENSG00000243772,protein_coding -44904,JMJD7,ENSG00000243789,protein_coding -59300,APOBEC3D,ENSG00000243811,protein_coding -24284,OR2A7,ENSG00000243896,protein_coding -9198,TUBA4B,ENSG00000243910,protein_coding -60167,MRPS6,ENSG00000243927,protein_coding -5939,ZNF512,ENSG00000243943,protein_coding -19915,GSTA1,ENSG00000243955,protein_coding -26150,RTL9,ENSG00000243978,protein_coding -10608,ACY1,ENSG00000243989,protein_coding -54040,NFS1,ENSG00000244005,protein_coding -5194,MT1HL1,ENSG00000244020,protein_coding -60063,KRTAP19-3,ENSG00000244025,protein_coding -675,DDOST,ENSG00000244038,protein_coding -49984,TMEM199,ENSG00000244045,protein_coding -3238,LCE3C,ENSG00000244057,protein_coding -19913,GSTA2,ENSG00000244067,protein_coding -3274,SPRR2F,ENSG00000244094,protein_coding -31109,DNAJC25-GNG10,ENSG00000244115,protein_coding -9468,UGT1A7,ENSG00000244122,protein_coding -55296,P2RY11,ENSG00000244165,protein_coding -31697,TMEM141,ENSG00000244187,protein_coding -23318,TMEM225B,ENSG00000244219,protein_coding -18354,GMCL2,ENSG00000244234,protein_coding -31896,IFITM10,ENSG00000244242,protein_coding -19371,AL645922.1,ENSG00000244255,protein_coding -54263,DBNDD2,ENSG00000244274,protein_coding -19344,LY6G6D,ENSG00000244355,protein_coding -60071,KRTAP19-7,ENSG00000244362,protein_coding -58086,RBMY1D,ENSG00000244395,protein_coding -12567,ETV5,ENSG00000244405,protein_coding -33806,KRTAP5-7,ENSG00000244411,protein_coding -4287,CFHR1,ENSG00000244414,protein_coding -54039,RBM12,ENSG00000244462,protein_coding -9472,UGT1A4,ENSG00000244474,protein_coding -18705,ERVFRD-1,ENSG00000244476,protein_coding -57414,LILRA6,ENSG00000244482,protein_coding -58510,SCARF2,ENSG00000244486,protein_coding -59298,APOBEC3C,ENSG00000244509,protein_coding -50580,KRTAP4-2,ENSG00000244537,protein_coding -53327,RAD21L1,ENSG00000244588,protein_coding -10284,CCDC13,ENSG00000244607,protein_coding -6622,ASPRV1,ENSG00000244617,protein_coding -23331,OR2AE1,ENSG00000244623,protein_coding -60077,KRTAP20-1,ENSG00000244624,protein_coding -54406,UBE2V1,ENSG00000244687,protein_coding -24285,CTAGE8,ENSG00000244693,protein_coding -19850,PTCHD4,ENSG00000244694,protein_coding -19379,C4A,ENSG00000244731,protein_coding -32084,HBB,ENSG00000244734,protein_coding -58845,CRYBB2,ENSG00000244752,protein_coding -40872,N4BP2L2,ENSG00000244754,protein_coding -56367,ZNF585B,ENSG00000245680,protein_coding -56170,CEBPA,ENSG00000245848,protein_coding -37997,H2AFJ,ENSG00000246705,protein_coding -45467,UBAP1L,ENSG00000246922,protein_coding -40504,PGAM5,ENSG00000247077,protein_coding -53302,ZCCHC3,ENSG00000247315,protein_coding -32453,SPTY2D1OS,ENSG00000247595,protein_coding -10619,TWF2,ENSG00000247596,protein_coding -8762,MARS2,ENSG00000247626,protein_coding -25449,USP51,ENSG00000247746,protein_coding -56586,BCKDHA,ENSG00000248098,protein_coding -55743,INSL3,ENSG00000248099,protein_coding -17835,MARCOL,ENSG00000248109,protein_coding -14287,ADH1C,ENSG00000248144,protein_coding -19217,TRIM39-RPP21,ENSG00000248167,protein_coding -27492,AC037459.1,ENSG00000248235,protein_coding -15139,APELA,ENSG00000248329,protein_coding -129,CDK11B,ENSG00000248333,protein_coding -17663,PCDHAC1,ENSG00000248383,protein_coding -57397,TARM1,ENSG00000248385,protein_coding -59490,PRR5-ARHGAP8,ENSG00000248405,protein_coding -16898,POU5F2,ENSG00000248483,protein_coding -3684,PCP4L1,ENSG00000248485,protein_coding -10606,ABHD14A,ENSG00000248487,protein_coding -10659,STIMATE-MUSTN1,ENSG00000248592,protein_coding -33642,RBM14-RBM4,ENSG00000248643,protein_coding -8234,LY75-CD302,ENSG00000248672,protein_coding -12175,AC079594.2,ENSG00000248710,protein_coding -34806,CCDC153,ENSG00000248712,protein_coding -14294,C4orf54,ENSG00000248713,protein_coding -33635,ACTN3,ENSG00000248746,protein_coding -58995,AC004997.1,ENSG00000248751,protein_coding -21558,AC187653.1,ENSG00000248767,protein_coding -15137,SMIM31,ENSG00000248771,protein_coding -49341,TNFSF12-TNFSF13,ENSG00000248871,protein_coding -44689,FMN1,ENSG00000248905,protein_coding -23308,ATP5MF-PTCD1,ENSG00000248919,protein_coding -13096,USP17L19,ENSG00000248920,protein_coding -13099,USP17L22,ENSG00000248933,protein_coding -19438,AL645941.2,ENSG00000248993,protein_coding -13094,USP17L17,ENSG00000249104,protein_coding -56276,HAUS5,ENSG00000249115,protein_coding -54272,EPPIN-WFDC6,ENSG00000249139,protein_coding -21445,AL159163.1,ENSG00000249141,protein_coding -15830,TAF11L12,ENSG00000249156,protein_coding -17658,PCDHA11,ENSG00000249158,protein_coding -60160,AP000311.1,ENSG00000249209,protein_coding -59439,ATP5MGL,ENSG00000249222,protein_coding -45454,AC069368.1,ENSG00000249240,protein_coding -14080,TMEM150C,ENSG00000249242,protein_coding -22775,AC068533.4,ENSG00000249319,protein_coding -16531,NAIP,ENSG00000249437,protein_coding -49718,ZNF286B,ENSG00000249459,protein_coding -57723,ZNF324B,ENSG00000249471,protein_coding -19801,SPATS1,ENSG00000249481,protein_coding -13234,CLRN2,ENSG00000249581,protein_coding -59001,AC004832.3,ENSG00000249590,protein_coding -60145,AP000295.1,ENSG00000249624,protein_coding -15812,LINC02218,ENSG00000249662,protein_coding -13692,THEGL,ENSG00000249693,protein_coding -55443,ZNF564,ENSG00000249709,protein_coding -7201,FER1L5,ENSG00000249715,protein_coding -17583,ECSCR,ENSG00000249751,protein_coding -22509,AC092647.5,ENSG00000249773,protein_coding -13098,USP17L21,ENSG00000249811,protein_coding -20671,HS3ST5,ENSG00000249853,protein_coding -36099,MTRNR2L5,ENSG00000249860,protein_coding -56494,LGALS16,ENSG00000249861,protein_coding -6933,RNF103-CHMP3,ENSG00000249884,protein_coding -15513,PDCD6,ENSG00000249915,protein_coding -44665,GOLGA8K,ENSG00000249931,protein_coding -48272,TERB1,ENSG00000249961,protein_coding -36849,AL355315.1,ENSG00000249967,protein_coding -10350,TMEM158,ENSG00000249992,protein_coding -46136,ARPIN-AP3S2,ENSG00000250021,protein_coding -55832,YJEFN3,ENSG00000250067,protein_coding -17655,PCDHA10,ENSG00000250120,protein_coding -9771,ARPC4-TTLL3,ENSG00000250151,protein_coding -13400,PTTG2,ENSG00000250254,protein_coding -19429,AL669918.1,ENSG00000250264,protein_coding -14383,GIMD1,ENSG00000250298,protein_coding -27343,TRMT9B,ENSG00000250305,protein_coding -12855,ZNF718,ENSG00000250312,protein_coding -13313,SMIM20,ENSG00000250317,protein_coding -24982,AF241726.2,ENSG00000250349,protein_coding -14845,GYPB,ENSG00000250361,protein_coding -43650,TUNAR,ENSG00000250366,protein_coding -15158,TRIM75P,ENSG00000250374,protein_coding -26262,KIAA1210,ENSG00000250423,protein_coding -22096,AC004691.2,ENSG00000250424,protein_coding -58776,CHCHD10,ENSG00000250479,protein_coding -15149,FAM218A,ENSG00000250486,protein_coding -51727,CDK3,ENSG00000250506,protein_coding -37672,GPR162,ENSG00000250510,protein_coding -6243,ATP6V1E2,ENSG00000250565,protein_coding -29337,GLI4,ENSG00000250571,protein_coding -19342,LY6G6F-LY6G6D,ENSG00000250641,protein_coding -31897,AC068580.4,ENSG00000250644,protein_coding -14884,REELD1,ENSG00000250673,protein_coding -40913,CCDC169-SOHLH2,ENSG00000250709,protein_coding -1507,P3R3URF,ENSG00000250719,protein_coding -16156,SELENOP,ENSG00000250722,protein_coding -5745,NT5C1B-RDH14,ENSG00000250741,protein_coding -13097,USP17L20,ENSG00000250745,protein_coding -15835,TAF11L14,ENSG00000250782,protein_coding -56298,PRODH2,ENSG00000250799,protein_coding -17242,AC010255.3,ENSG00000250803,protein_coding -13672,EXOC1L,ENSG00000250821,protein_coding -13095,USP17L18,ENSG00000250844,protein_coding -13100,USP17L23,ENSG00000250913,protein_coding -11384,AC083800.1,ENSG00000251012,protein_coding -31457,AL672142.1,ENSG00000251184,protein_coding -25125,ZNF674,ENSG00000251192,protein_coding -17157,TMED7-TICAM2,ENSG00000251201,protein_coding -3392,AL691442.1,ENSG00000251246,protein_coding -56355,ZNF345,ENSG00000251247,protein_coding -20646,RFPL4B,ENSG00000251258,protein_coding -11671,ALG1L2,ENSG00000251287,protein_coding -59641,SHANK3,ENSG00000251322,protein_coding -30954,MSANTD3-TMEFF1,ENSG00000251349,protein_coding -58782,AP000350.4,ENSG00000251357,protein_coding -57647,ZNF550,ENSG00000251369,protein_coding -17485,DCANP1,ENSG00000251380,protein_coding -16580,FOXD1,ENSG00000251493,protein_coding -359,CENPS-CORT,ENSG00000251503,protein_coding -49552,AC005324.3,ENSG00000251537,protein_coding -8336,AC093899.2,ENSG00000251569,protein_coding -37916,PRB1,ENSG00000251655,protein_coding -17659,PCDHA12,ENSG00000251664,protein_coding -46566,PTX4,ENSG00000251692,protein_coding -29180,OC90,ENSG00000253117,protein_coding -4251,RGS21,ENSG00000253148,protein_coding -17722,PCDHGA12,ENSG00000253159,protein_coding -28587,C8orf88,ENSG00000253250,protein_coding -16433,SHLD3,ENSG00000253251,protein_coding -23548,CCDC71L,ENSG00000253276,protein_coding -22002,HOXA10,ENSG00000253293,protein_coding -990,TMEM200B,ENSG00000253304,protein_coding -17716,PCDHGB6,ENSG00000253305,protein_coding -41201,SERPINE3,ENSG00000253309,protein_coding -1394,C1orf210,ENSG00000253313,protein_coding -896,TRNP1,ENSG00000253368,protein_coding -27679,SMIM18,ENSG00000253457,protein_coding -17708,PCDHGA5,ENSG00000253485,protein_coding -51339,NACA2,ENSG00000253506,protein_coding -17711,PCDHGA7,ENSG00000253537,protein_coding -12660,PYDC2,ENSG00000253548,protein_coding -28475,SLC10A5,ENSG00000253598,protein_coding -36482,EIF5AL1,ENSG00000253626,protein_coding -27232,PRSS51,ENSG00000253649,protein_coding -41223,ALG11,ENSG00000253710,protein_coding -39319,ATXN7L3B,ENSG00000253719,protein_coding -27967,PRKDC,ENSG00000253729,protein_coding -17710,PCDHGA6,ENSG00000253731,protein_coding -17713,PCDHGA8,ENSG00000253767,protein_coding -41224,UTP14C,ENSG00000253797,protein_coding -3510,ETV3L,ENSG00000253831,protein_coding -17717,PCDHGA10,ENSG00000253846,protein_coding -17719,PCDHGA11,ENSG00000253873,protein_coding -17707,PCDHGB2,ENSG00000253910,protein_coding -17712,PCDHGB4,ENSG00000253953,protein_coding -27191,CLDN23,ENSG00000253958,protein_coding -56341,ZNF260,ENSG00000254004,protein_coding -28073,LYN,ENSG00000254087,protein_coding -27242,PINX1,ENSG00000254093,protein_coding -17718,PCDHGB7,ENSG00000254122,protein_coding -7402,CD8B2,ENSG00000254126,protein_coding -47436,NPIPB11,ENSG00000254206,protein_coding -17704,PCDHGB1,ENSG00000254221,protein_coding -17703,PCDHGA3,ENSG00000254245,protein_coding -29437,LRRC24,ENSG00000254402,protein_coding -59633,CHKB-CPT1B,ENSG00000254413,protein_coding -57204,SIGLEC14,ENSG00000254415,protein_coding -21007,PBOV1,ENSG00000254440,protein_coding -34588,HSPB2-C11orf52,ENSG00000254445,protein_coding -33136,TMX2-CTNND1,ENSG00000254462,protein_coding -33217,OR4D10,ENSG00000254466,protein_coding -33831,AP002495.1,ENSG00000254469,protein_coding -33575,AP5B1,ENSG00000254470,protein_coding -42375,CHMP4A,ENSG00000254505,protein_coding -57190,SIGLEC12,ENSG00000254521,protein_coding -14716,PABPC4L,ENSG00000254535,protein_coding -37449,AL360181.3,ENSG00000254536,protein_coding -34022,OMP,ENSG00000254550,protein_coding -1080,AL033529.1,ENSG00000254553,protein_coding -44387,MAGEL2,ENSG00000254585,protein_coding -32312,CSNK2A3,ENSG00000254598,protein_coding -37274,ARMS2,ENSG00000254636,protein_coding -31927,INS,ENSG00000254647,protein_coding -43758,RTL1,ENSG00000254656,protein_coding -27902,AC110275.1,ENSG00000254673,protein_coding -2015,FPGT,ENSG00000254685,protein_coding -42371,AL136295.1,ENSG00000254692,protein_coding -3725,AL512785.2,ENSG00000254706,protein_coding -58724,IGLL5,ENSG00000254709,protein_coding -3456,MEX3A,ENSG00000254726,protein_coding -33139,AP001931.1,ENSG00000254732,protein_coding -34917,OR10G4,ENSG00000254737,protein_coding -33351,EEF1G,ENSG00000254772,protein_coding -48260,CKLF-CMTM1,ENSG00000254788,protein_coding -54260,SYS1-DBNDD2,ENSG00000254806,protein_coding -31952,SLC22A18AS,ENSG00000254827,protein_coding -33078,OR5M10,ENSG00000254834,protein_coding -47018,NPIPA2,ENSG00000254852,protein_coding -55763,MPV17L2,ENSG00000254858,protein_coding -19312,ATP6V1G2-DDX39B,ENSG00000254870,protein_coding -55815,BORCS8,ENSG00000254901,protein_coding -22093,INMT-MINDY4,ENSG00000254959,protein_coding -33116,AP000781.2,ENSG00000254979,protein_coding -33630,DPP3,ENSG00000254986,protein_coding -54555,STX16-NPEPL1,ENSG00000254995,protein_coding -17617,ANKHD1-EIF4EBP3,ENSG00000254996,protein_coding -33808,KRTAP5-9,ENSG00000254997,protein_coding -9794,BRK1,ENSG00000254999,protein_coding -34252,UBTFL1,ENSG00000255009,protein_coding -33080,OR5M1,ENSG00000255012,protein_coding -829,AL020996.2,ENSG00000255054,protein_coding -32433,SAA2-SAA4,ENSG00000255071,protein_coding -14187,PIGY,ENSG00000255072,protein_coding -33180,ZFP91-CNTF,ENSG00000255073,protein_coding -49559,AC005324.4,ENSG00000255104,protein_coding -52300,CHMP1B,ENSG00000255112,protein_coding -39774,EID3,ENSG00000255150,protein_coding -33197,GLYATL1B,ENSG00000255151,protein_coding -19350,MSH5-SAPCD1,ENSG00000255152,protein_coding -10743,HTD2,ENSG00000255154,protein_coding -29366,CCDC166,ENSG00000255181,protein_coding -44744,NANOGP8,ENSG00000255192,protein_coding -34475,CARD17,ENSG00000255221,protein_coding -33077,OR5M11,ENSG00000255223,protein_coding -34728,FXYD6-FXYD2,ENSG00000255245,protein_coding -27126,PRR23D1,ENSG00000255251,protein_coding -34733,SMIM35,ENSG00000255274,protein_coding -34602,AP002884.3,ENSG00000255292,protein_coding -34932,OR8G5,ENSG00000255298,protein_coding -45109,EID1,ENSG00000255302,protein_coding -32154,OR52B2,ENSG00000255307,protein_coding -20820,AL096711.2,ENSG00000255330,protein_coding -36911,AL133352.1,ENSG00000255339,protein_coding -45563,NOX5,ENSG00000255346,protein_coding -32521,CCDC179,ENSG00000255359,protein_coding -37905,TAS2R43,ENSG00000255374,protein_coding -27149,PRR23D2,ENSG00000255378,protein_coding -33245,OOSP4B,ENSG00000255393,protein_coding -40214,HCAR3,ENSG00000255398,protein_coding -17647,PCDHA3,ENSG00000255408,protein_coding -10912,EBLN2,ENSG00000255423,protein_coding -33362,AP001458.2,ENSG00000255432,protein_coding -47588,AC135050.2,ENSG00000255439,protein_coding -34477,CARD18,ENSG00000255501,protein_coding -33352,AP002990.1,ENSG00000255508,protein_coding -47389,NPIPB8,ENSG00000255524,protein_coding -42378,NEDD8-MDP1,ENSG00000255526,protein_coding -45286,POLR2M,ENSG00000255529,protein_coding -19343,LY6G6E,ENSG00000255552,protein_coding -34583,FDXACB1,ENSG00000255561,protein_coding -42130,OR10G2,ENSG00000255582,protein_coding -19586,RAB44,ENSG00000255587,protein_coding -37601,AC005833.1,ENSG00000255639,protein_coding -37871,AC068775.1,ENSG00000255641,protein_coding -34659,AP002373.1,ENSG00000255663,protein_coding -22041,TRIL,ENSG00000255690,protein_coding -51214,OR4D2,ENSG00000255713,protein_coding -56581,AC011462.1,ENSG00000255730,protein_coding -42273,OR6J1,ENSG00000255804,protein_coding -37868,KLRC4-KLRK1,ENSG00000255819,protein_coding -32292,MTRNR2L8,ENSG00000255823,protein_coding -17486,TIFAB,ENSG00000255833,protein_coding -4886,AL117348.2,ENSG00000255835,protein_coding -37899,TAS2R20,ENSG00000255837,protein_coding -34358,KDM4F,ENSG00000255855,protein_coding -30085,AL138752.2,ENSG00000255872,protein_coding -56558,CYP2A6,ENSG00000255974,protein_coding -3603,AL590560.1,ENSG00000256029,protein_coding -15050,CTSO,ENSG00000256043,protein_coding -25442,MTRNR2L10,ENSG00000256045,protein_coding -43935,APOPT1,ENSG00000256053,protein_coding -57630,TRAPPC2B,ENSG00000256060,protein_coding -45241,DNAAF4,ENSG00000256061,protein_coding -57228,ZNF432,ENSG00000256087,protein_coding -33440,AP000721.1,ENSG00000256100,protein_coding -20855,SMLR1,ENSG00000256162,protein_coding -37907,TAS2R30,ENSG00000256188,protein_coding -32369,AC018523.2,ENSG00000256206,protein_coding -54529,MTRNR2L3,ENSG00000256222,protein_coding -40529,ZNF10,ENSG00000256223,protein_coding -55872,ZNF486,ENSG00000256229,protein_coding -17906,SMIM3,ENSG00000256235,protein_coding -34798,HMBS,ENSG00000256269,protein_coding -56725,ZNF225,ENSG00000256294,protein_coding -33631,AP002748.4,ENSG00000256349,protein_coding -2932,PPIAL4D,ENSG00000256374,protein_coding -15048,ASIC5,ENSG00000256394,protein_coding -1718,AL357673.1,ENSG00000256407,protein_coding -37901,TAS2R31,ENSG00000256436,protein_coding -17635,DND1,ENSG00000256453,protein_coding -53255,SALL3,ENSG00000256463,protein_coding -43936,AL139300.1,ENSG00000256500,protein_coding -33668,AP003419.1,ENSG00000256514,protein_coding -51439,POLG2,ENSG00000256525,protein_coding -37910,SMIM10L1,ENSG00000256537,protein_coding -53368,AL049650.1,ENSG00000256566,protein_coding -35927,OR13A1,ENSG00000256574,protein_coding -33300,AP003108.2,ENSG00000256591,protein_coding -49907,MTRNR2L1,ENSG00000256618,protein_coding -22284,AC010132.3,ENSG00000256646,protein_coding -37852,CLEC12B,ENSG00000256660,protein_coding -7492,LIMS4,ENSG00000256671,protein_coding -57225,ZNF350,ENSG00000256683,protein_coding -33290,PGA5,ENSG00000256713,protein_coding -50881,STH,ENSG00000256762,protein_coding -55855,ZNF253,ENSG00000256771,protein_coding -37846,KLRF2,ENSG00000256797,protein_coding -49261,C17orf100,ENSG00000256806,protein_coding -48065,CAPNS2,ENSG00000256812,protein_coding -40530,AC026786.1,ENSG00000256825,protein_coding -40200,AC048338.1,ENSG00000256861,protein_coding -39697,SLC5A8,ENSG00000256870,protein_coding -35790,MTRNR2L7,ENSG00000256892,protein_coding -40187,AC069503.2,ENSG00000256950,protein_coding -30083,AL513165.2,ENSG00000256966,protein_coding -7463,LIMS3,ENSG00000256977,protein_coding -20123,KHDC1L,ENSG00000256980,protein_coding -51636,GPR142,ENSG00000257008,protein_coding -48524,HP,ENSG00000257017,protein_coding -38077,AC011604.2,ENSG00000257046,protein_coding -34341,C11orf97,ENSG00000257057,protein_coding -38080,AC022335.1,ENSG00000257062,protein_coding -21080,AL049844.1,ENSG00000257065,protein_coding -24097,KIAA1147,ENSG00000257093,protein_coding -56190,LSM14A,ENSG00000257103,protein_coding -46483,NHLRC4,ENSG00000257108,protein_coding -41942,OR11H12,ENSG00000257115,protein_coding -24118,TAS2R38,ENSG00000257138,protein_coding -21999,AC004080.3,ENSG00000257184,protein_coding -7491,AC112229.3,ENSG00000257207,protein_coding -40124,GATC,ENSG00000257218,protein_coding -4452,ZBED6,ENSG00000257315,protein_coding -24114,MGAM,ENSG00000257335,protein_coding -44012,AL928654.3,ENSG00000257341,protein_coding -55407,AC008770.1,ENSG00000257355,protein_coding -43052,FNTB,ENSG00000257365,protein_coding -38900,AC023055.1,ENSG00000257390,protein_coding -38922,AC034102.3,ENSG00000257411,protein_coding -55409,ZNF878,ENSG00000257446,protein_coding -31396,AL157935.2,ENSG00000257524,protein_coding -25965,RPL36A-HNRNPH2,ENSG00000257529,protein_coding -55420,ZNF625,ENSG00000257591,protein_coding -39487,GALNT4,ENSG00000257594,protein_coding -56892,INAFM1,ENSG00000257704,protein_coding -38945,CNPY2,ENSG00000257727,protein_coding -24119,MGAM2,ENSG00000257743,protein_coding -39932,AC002996.1,ENSG00000257767,protein_coding -39032,AC025165.3,ENSG00000257921,protein_coding -23442,CUX1,ENSG00000257923,protein_coding -51726,TEN1,ENSG00000257949,protein_coding -49144,P2RX5-TAX1BP3,ENSG00000257950,protein_coding -38544,TEX49,ENSG00000257987,protein_coding -39263,AC025263.2,ENSG00000258052,protein_coding -39290,AC073612.1,ENSG00000258064,protein_coding -24116,OR9A4,ENSG00000258083,protein_coding -40035,MAP1LC3B2,ENSG00000258102,protein_coding -24121,PRSS58,ENSG00000258223,protein_coding -24117,CLEC5A,ENSG00000258227,protein_coding -43048,CHURC1,ENSG00000258289,protein_coding -38891,AC009779.3,ENSG00000258311,protein_coding -49282,C17orf49,ENSG00000258315,protein_coding -54704,RTEL1,ENSG00000258366,protein_coding -19397,PPT2-EGFL8,ENSG00000258388,protein_coding -57256,ZNF578,ENSG00000258405,protein_coding -29181,AC100868.1,ENSG00000258417,protein_coding -48418,PDF,ENSG00000258429,protein_coding -42064,RNASE12,ENSG00000258436,protein_coding -42002,OR11H2,ENSG00000258453,protein_coding -44925,AC012651.1,ENSG00000258461,protein_coding -3629,AL139011.2,ENSG00000258465,protein_coding -43104,AL049779.1,ENSG00000258466,protein_coding -49998,AC005726.1,ENSG00000258472,protein_coding -45559,SPESP1,ENSG00000258484,protein_coding -34580,AP001781.2,ENSG00000258529,protein_coding -37312,AC068896.1,ENSG00000258539,protein_coding -58811,SPECC1L-ADORA2A,ENSG00000258555,protein_coding -32121,TRIM6-TRIM34,ENSG00000258588,protein_coding -42315,BCL2L2-PABPN1,ENSG00000258643,protein_coding -43165,SYNJ2BP-COX16,ENSG00000258644,protein_coding -43254,AC005520.1,ENSG00000258653,protein_coding -32122,TRIM34,ENSG00000258659,protein_coding -55831,AC011448.1,ENSG00000258674,protein_coding -28348,AC022826.2,ENSG00000258677,protein_coding -43654,AL355102.2,ENSG00000258691,protein_coding -53384,C20orf141,ENSG00000258713,protein_coding -27240,PINX1,ENSG00000258724,protein_coding -29965,AL162231.3,ENSG00000258728,protein_coding -42543,AL121594.1,ENSG00000258790,protein_coding -32953,OR4C13,ENSG00000258817,protein_coding -42069,RNASE4,ENSG00000258818,protein_coding -38996,AC137834.1,ENSG00000258830,protein_coding -48973,MC1R,ENSG00000258839,protein_coding -17125,AC008575.1,ENSG00000258864,protein_coding -57615,DUXA,ENSG00000258873,protein_coding -6653,AC007040.2,ENSG00000258881,protein_coding -51444,CEP95,ENSG00000258890,protein_coding -48976,TUBB3,ENSG00000258947,protein_coding -9544,UBE2F-SCLY,ENSG00000258984,protein_coding -43965,TMEM179,ENSG00000258986,protein_coding -42981,AL355916.3,ENSG00000258989,protein_coding -57857,TSPY1,ENSG00000258992,protein_coding -49546,TVP23C-CDRT4,ENSG00000259024,protein_coding -2016,FPGT-TNNI3K,ENSG00000259030,protein_coding -18655,BLOC1S5-TXNDC5,ENSG00000259040,protein_coding -42061,AL163195.3,ENSG00000259060,protein_coding -43576,AL110118.2,ENSG00000259066,protein_coding -39486,POC1B-GALNT4,ENSG00000259075,protein_coding -34046,NDUFC2-KCTD14,ENSG00000259112,protein_coding -51704,SMIM6,ENSG00000259120,protein_coding -42292,AL132780.3,ENSG00000259132,protein_coding -43363,AC007375.1,ENSG00000259164,protein_coding -42070,AL163636.2,ENSG00000259171,protein_coding -50917,ITGB3,ENSG00000259207,protein_coding -49337,SLC35G6,ENSG00000259224,protein_coding -44822,BUB1B-PAK6,ENSG00000259288,protein_coding -29045,ZHX1-C8orf76,ENSG00000259305,protein_coding -45434,AC087632.1,ENSG00000259316,protein_coding -44834,INAFM2,ENSG00000259330,protein_coding -45879,ST20-MTHFS,ENSG00000259332,protein_coding -42355,AL136295.3,ENSG00000259371,protein_coding -51419,GH1,ENSG00000259384,protein_coding -54067,TGIF2-RAB5IF,ENSG00000259399,protein_coding -45894,CTXND1,ENSG00000259417,protein_coding -42331,THTPA,ENSG00000259431,protein_coding -46083,MRPL46,ENSG00000259494,protein_coding -45996,UBE2Q2L,ENSG00000259511,protein_coding -42368,AL136295.4,ENSG00000259522,protein_coding -42363,AL136295.5,ENSG00000259529,protein_coding -34865,BLID,ENSG00000259571,protein_coding -47742,AC136428.1,ENSG00000259680,protein_coding -50918,AC068234.1,ENSG00000259753,protein_coding -46668,AC093525.1,ENSG00000259784,protein_coding -48942,SLC22A31,ENSG00000259803,protein_coding -5461,LYPD8,ENSG00000259823,protein_coding -48421,AC026464.1,ENSG00000259900,protein_coding -7113,AL845331.2,ENSG00000259916,protein_coding -10581,RBM15B,ENSG00000259956,protein_coding -55184,TGFBR3L,ENSG00000260001,protein_coding -45547,AC107871.1,ENSG00000260007,protein_coding -50978,HOXB7,ENSG00000260027,protein_coding -48606,AC009163.2,ENSG00000260092,protein_coding -23456,SPDYE6,ENSG00000260097,protein_coding -45061,AC090527.2,ENSG00000260170,protein_coding -31660,CCDC187,ENSG00000260220,protein_coding -31074,FRRS1L,ENSG00000260230,protein_coding -12018,AC020636.2,ENSG00000260234,protein_coding -3461,PMF1-BGLAP,ENSG00000260238,protein_coding -46666,AC093525.2,ENSG00000260272,protein_coding -18887,ARMH2,ENSG00000260286,protein_coding -50367,TBC1D3G,ENSG00000260287,protein_coding -48762,AC009119.2,ENSG00000260300,protein_coding -35460,MRC1,ENSG00000260314,protein_coding -50647,HSPB9,ENSG00000260325,protein_coding -47135,AC138811.2,ENSG00000260342,protein_coding -48420,AC026464.3,ENSG00000260371,protein_coding -29405,SCX,ENSG00000260428,protein_coding -48865,C16orf95,ENSG00000260456,protein_coding -49891,KCNJ18,ENSG00000260458,protein_coding -48456,AC012184.2,ENSG00000260537,protein_coding -26128,AL035425.2,ENSG00000260548,protein_coding -15500,DUX4,ENSG00000260596,protein_coding -48711,AC092718.3,ENSG00000260643,protein_coding -30404,ANKRD20A1,ENSG00000260691,protein_coding -45631,AC009690.1,ENSG00000260729,protein_coding -48489,TLE7,ENSG00000260734,protein_coding -26162,SERTM2,ENSG00000260802,protein_coding -45953,AC245033.1,ENSG00000260836,protein_coding -48254,AC010542.3,ENSG00000260851,protein_coding -53341,AL049634.2,ENSG00000260861,protein_coding -53903,XKR7,ENSG00000260903,protein_coding -48415,AC026464.4,ENSG00000260914,protein_coding -45235,CCPG1,ENSG00000260916,protein_coding -47503,SULT1A3,ENSG00000261052,protein_coding -24087,TMEM178B,ENSG00000261115,protein_coding -47050,AC140504.1,ENSG00000261130,protein_coding -46159,AC091167.2,ENSG00000261147,protein_coding -29382,EPPK1,ENSG00000261150,protein_coding -47152,CLEC19A,ENSG00000261210,protein_coding -57532,ZNF865,ENSG00000261221,protein_coding -29403,BOP1,ENSG00000261236,protein_coding -44598,GOLGA8T,ENSG00000261247,protein_coding -19266,MUC22,ENSG00000261272,protein_coding -38673,FIGNL2,ENSG00000261308,protein_coding -57116,AC010325.1,ENSG00000261341,protein_coding -51435,PECAM1,ENSG00000261371,protein_coding -51725,TEN1-CDK3,ENSG00000261408,protein_coding -35152,TUBB8,ENSG00000261456,protein_coding -47532,AC002310.4,ENSG00000261459,protein_coding -47724,TP53TG3B,ENSG00000261509,protein_coding -54018,AL121753.1,ENSG00000261582,protein_coding -29411,TMEM249,ENSG00000261587,protein_coding -33965,TPBGL,ENSG00000261594,protein_coding -48722,GAN,ENSG00000261609,protein_coding -48492,AC010547.4,ENSG00000261611,protein_coding -44571,GOLGA6L7,ENSG00000261649,protein_coding -45240,C15orf65,ENSG00000261652,protein_coding -29332,LY6L,ENSG00000261667,protein_coding -29410,SCRT1,ENSG00000261678,protein_coding -48525,HPR,ENSG00000261701,protein_coding -48598,AC009163.4,ENSG00000261717,protein_coding -46578,AL031708.1,ENSG00000261732,protein_coding -44379,GOLGA8S,ENSG00000261739,protein_coding -47439,BOLA2-SMG1P6,ENSG00000261740,protein_coding -28244,TCF24,ENSG00000261787,protein_coding -31740,AL929554.1,ENSG00000261793,protein_coding -44626,GOLGA8H,ENSG00000261794,protein_coding -11632,ISY1-RAB43,ENSG00000261796,protein_coding -47375,AC138894.1,ENSG00000261832,protein_coding -56547,MIA,ENSG00000261857,protein_coding -51155,SMIM36,ENSG00000261873,protein_coding -48346,AC040162.1,ENSG00000261884,protein_coding -49313,AC026954.2,ENSG00000261915,protein_coding -17715,PCDHGA9,ENSG00000261934,protein_coding -57023,GFY,ENSG00000261949,protein_coding -46713,LINC00514,ENSG00000262152,protein_coding -49194,AC233723.1,ENSG00000262165,protein_coding -19791,MYMX,ENSG00000262179,protein_coding -4200,OCLM,ENSG00000262180,protein_coding -17709,PCDHGB3,ENSG00000262209,protein_coding -46790,CORO7,ENSG00000262246,protein_coding -49304,AC003688.1,ENSG00000262302,protein_coding -49137,AC027796.3,ENSG00000262304,protein_coding -34441,MMP12,ENSG00000262406,protein_coding -22879,SPDYE9P,ENSG00000262461,protein_coding -49322,TMEM256-PLSCR3,ENSG00000262481,protein_coding -56444,CCER2,ENSG00000262484,protein_coding -49301,AC120057.2,ENSG00000262526,protein_coding -21008,SMIM28,ENSG00000262543,protein_coding -44985,AC018512.1,ENSG00000262560,protein_coding -17706,PCDHGA4,ENSG00000262576,protein_coding -46756,AC025283.2,ENSG00000262621,protein_coding -49112,OR1D5,ENSG00000262628,protein_coding -50906,AC005670.2,ENSG00000262633,protein_coding -32362,SPON1,ENSG00000262655,protein_coding -51958,AC139530.3,ENSG00000262660,protein_coding -49063,OVCA2,ENSG00000262664,protein_coding -49373,AC104581.2,ENSG00000262730,protein_coding -51957,MRPL12,ENSG00000262814,protein_coding -57179,C19orf84,ENSG00000262874,protein_coding -26856,CCNQ,ENSG00000262919,protein_coding -22927,GTF2I,ENSG00000263001,protein_coding -56726,ZNF234,ENSG00000263002,protein_coding -19335,AL662899.2,ENSG00000263020,protein_coding -45284,MYZAP,ENSG00000263155,protein_coding -48354,DPEP2NB,ENSG00000263201,protein_coding -55142,AC119396.1,ENSG00000263264,protein_coding -2843,PPIAL4A,ENSG00000263353,protein_coding -49475,TMEM238L,ENSG00000263429,protein_coding -3076,PPIAL4C,ENSG00000263464,protein_coding -34360,SRSF8,ENSG00000263465,protein_coding -2897,FAM72C,ENSG00000263513,protein_coding -4537,IKBKE,ENSG00000263528,protein_coding -49389,AC129492.4,ENSG00000263620,protein_coding -35945,MSMB,ENSG00000263639,protein_coding -50871,LINC02210-CRHR1,ENSG00000263715,protein_coding -35976,GDF2,ENSG00000263761,protein_coding -49413,AC135178.2,ENSG00000263809,protein_coding -50457,LINC00672,ENSG00000263874,protein_coding -3023,NBPF11,ENSG00000263956,protein_coding -4527,RHEX,ENSG00000263961,protein_coding -35235,AKR1C8P,ENSG00000264006,protein_coding -50536,AC073508.2,ENSG00000264058,protein_coding -49641,AC055811.2,ENSG00000264187,protein_coding -35953,ANXA8L1,ENSG00000264230,protein_coding -6727,AC006030.1,ENSG00000264324,protein_coding -2972,NOTCH2NLA,ENSG00000264343,protein_coding -51210,DYNLL2,ENSG00000264364,protein_coding -49458,MYH4,ENSG00000264424,protein_coding -3105,OTUD7B,ENSG00000264522,protein_coding -29773,AL359922.1,ENSG00000264545,protein_coding -29336,AC138696.1,ENSG00000264668,protein_coding -35996,NPY4R2,ENSG00000264717,protein_coding -51394,AC113554.1,ENSG00000264813,protein_coding -3011,GJA5,ENSG00000265107,protein_coding -50161,AC134669.1,ENSG00000265118,protein_coding -35979,ANXA8,ENSG00000265190,protein_coding -35977,RBP3,ENSG00000265203,protein_coding -2958,RBM8A,ENSG00000265241,protein_coding -51260,AC099850.2,ENSG00000265303,protein_coding -35942,TIMM23,ENSG00000265354,protein_coding -2949,RNF115,ENSG00000265491,protein_coding -60123,C21orf59-TCP10L,ENSG00000265590,protein_coding -52834,RPL17,ENSG00000265681,protein_coding -48292,AC074143.2,ENSG00000265690,protein_coding -35978,ZNF488,ENSG00000265763,protein_coding -2829,SEC22B,ENSG00000265808,protein_coding -28646,FSBP,ENSG00000265817,protein_coding -18659,EEF1E1-BLOC1S5,ENSG00000265818,protein_coding -2966,TXNIP,ENSG00000265972,protein_coding -4535,SRGAP2,ENSG00000266028,protein_coding -51936,BAHCC1,ENSG00000266074,protein_coding -51479,AC004805.1,ENSG00000266076,protein_coding -51204,AC015813.2,ENSG00000266086,protein_coding -4541,RASSF5,ENSG00000266094,protein_coding -51403,STRADA,ENSG00000266173,protein_coding -49957,AC005697.1,ENSG00000266202,protein_coding -23912,KLF14,ENSG00000266265,protein_coding -49610,AC098850.3,ENSG00000266302,protein_coding -2911,NBPF15,ENSG00000266338,protein_coding -35944,NCOA4,ENSG00000266412,protein_coding -3119,MRPS21,ENSG00000266472,protein_coding -35975,GDF10,ENSG00000266524,protein_coding -51701,MYO15B,ENSG00000266714,protein_coding -49944,AC015688.4,ENSG00000266728,protein_coding -51238,AC011195.2,ENSG00000266826,protein_coding -56195,AC092073.1,ENSG00000266953,protein_coding -56240,FXYD1,ENSG00000266964,protein_coding -50709,AARSD1,ENSG00000266967,protein_coding -52844,AC090227.1,ENSG00000266997,protein_coding -54927,AC006538.3,ENSG00000267001,protein_coding -56719,AC067968.1,ENSG00000267022,protein_coding -56350,ZNF850,ENSG00000267041,protein_coding -54864,AC005943.1,ENSG00000267059,protein_coding -50711,PTGES3L,ENSG00000267060,protein_coding -57489,AC010327.2,ENSG00000267110,protein_coding -56286,AD000671.2,ENSG00000267120,protein_coding -53285,AC090360.1,ENSG00000267127,protein_coding -52665,AC007998.2,ENSG00000267140,protein_coding -55066,AC011499.1,ENSG00000267157,protein_coding -51774,AC005837.2,ENSG00000267168,protein_coding -56736,AC245748.1,ENSG00000267173,protein_coding -55400,AC008770.2,ENSG00000267179,protein_coding -31692,LCN6,ENSG00000267206,protein_coding -50643,C17orf113,ENSG00000267221,protein_coding -52786,AC012254.2,ENSG00000267228,protein_coding -50646,AC099811.2,ENSG00000267261,protein_coding -38778,ATF7-NPFF,ENSG00000267281,protein_coding -55311,AC011511.4,ENSG00000267303,protein_coding -55076,AC104532.1,ENSG00000267314,protein_coding -51284,AC005702.1,ENSG00000267318,protein_coding -56987,AC008687.1,ENSG00000267335,protein_coding -56364,AC012309.1,ENSG00000267360,protein_coding -23477,UPK3BL1,ENSG00000267368,protein_coding -55029,AC011498.4,ENSG00000267385,protein_coding -51721,AC087289.4,ENSG00000267426,protein_coding -56768,APOC4,ENSG00000267467,protein_coding -55372,AC008481.3,ENSG00000267477,protein_coding -56737,ZNF285,ENSG00000267508,protein_coding -55299,S1PR2,ENSG00000267534,protein_coding -56388,AC093227.2,ENSG00000267552,protein_coding -2197,AC093155.3,ENSG00000267561,protein_coding -50276,AC004223.3,ENSG00000267618,protein_coding -56991,CGB1,ENSG00000267631,protein_coding -23478,AC105052.3,ENSG00000267645,protein_coding -55310,FDX2,ENSG00000267673,protein_coding -56723,ZNF224,ENSG00000267680,protein_coding -23993,LUZP6,ENSG00000267697,protein_coding -52874,AC091551.1,ENSG00000267699,protein_coding -57510,AC020922.1,ENSG00000267706,protein_coding -57567,EDDM13,ENSG00000267710,protein_coding -55073,AC024592.3,ENSG00000267740,protein_coding -56407,AC011479.1,ENSG00000267748,protein_coding -46817,SMIM22,ENSG00000267795,protein_coding -56291,LIN37,ENSG00000267796,protein_coding -55201,NDUFA7,ENSG00000267855,protein_coding -56609,AC243967.1,ENSG00000267881,protein_coding -43149,CCDC177,ENSG00000267909,protein_coding -55148,AC008878.1,ENSG00000267952,protein_coding -26752,MAGEA9B,ENSG00000267978,protein_coding -25204,SSX4,ENSG00000268009,protein_coding -56621,AC010616.1,ENSG00000268041,protein_coding -2985,NBPF12,ENSG00000268043,protein_coding -56436,AC008982.1,ENSG00000268083,protein_coding -26816,GABRQ,ENSG00000268089,protein_coding -26232,SLC6A14,ENSG00000268104,protein_coding -57644,AC003005.1,ENSG00000268107,protein_coding -57631,AC003002.1,ENSG00000268133,protein_coding -57637,AC004076.1,ENSG00000268163,protein_coding -55760,AC007192.1,ENSG00000268173,protein_coding -57599,SMIM17,ENSG00000268182,protein_coding -55804,AC002985.1,ENSG00000268193,protein_coding -26890,OPN1MW,ENSG00000268221,protein_coding -17171,ARL14EPL,ENSG00000268223,protein_coding -25996,TCP11X1,ENSG00000268235,protein_coding -9874,AC090004.1,ENSG00000268279,protein_coding -48995,SCGB1C2,ENSG00000268320,protein_coding -25379,FAM156A,ENSG00000268350,protein_coding -56688,L34079.1,ENSG00000268361,protein_coding -55158,AC008763.2,ENSG00000268400,protein_coding -56817,AC011530.1,ENSG00000268434,protein_coding -25368,SSX2B,ENSG00000268447,protein_coding -56946,AC008403.1,ENSG00000268465,protein_coding -57201,AC018755.2,ENSG00000268500,protein_coding -57634,AC003002.2,ENSG00000268533,protein_coding -26825,MAGEA2,ENSG00000268606,protein_coding -55152,AC008878.2,ENSG00000268614,protein_coding -26081,TEX13A,ENSG00000268629,protein_coding -56634,AC006486.1,ENSG00000268643,protein_coding -26922,CTAG1A,ENSG00000268651,protein_coding -56985,AC008687.4,ENSG00000268655,protein_coding -60616,AC213203.1,ENSG00000268674,protein_coding -55997,ZNF723,ENSG00000268696,protein_coding -26753,HSFX2,ENSG00000268738,protein_coding -57663,AC010522.1,ENSG00000268750,protein_coding -55675,AC008764.4,ENSG00000268790,protein_coding -56553,AC008537.1,ENSG00000268797,protein_coding -15813,AC106774.4,ENSG00000268799,protein_coding -55143,AC008878.3,ENSG00000268861,protein_coding -55432,AC008758.5,ENSG00000268870,protein_coding -26819,CSAG2,ENSG00000268902,protein_coding -26826,CSAG3,ENSG00000268916,protein_coding -26559,CT45A1,ENSG00000268940,protein_coding -57285,ERVV-2,ENSG00000268964,protein_coding -56548,MIA-RAB4B,ENSG00000268975,protein_coding -26675,SPANXN2,ENSG00000268988,protein_coding -25660,FAM236B,ENSG00000268994,protein_coding -57657,AC003006.1,ENSG00000269026,protein_coding -11074,MTRNR2L12,ENSG00000269028,protein_coding -55726,AC010319.2,ENSG00000269035,protein_coding -55664,CALR3,ENSG00000269058,protein_coding -56002,ZNF728,ENSG00000269067,protein_coding -55705,AC010646.1,ENSG00000269095,protein_coding -26562,CT45A3,ENSG00000269096,protein_coding -1558,TRABD2B,ENSG00000269113,protein_coding -57075,AC011452.1,ENSG00000269179,protein_coding -56449,FBXO17,ENSG00000269190,protein_coding -26073,TMSB15B,ENSG00000269226,protein_coding -55927,AC010615.4,ENSG00000269237,protein_coding -55451,AC010422.3,ENSG00000269242,protein_coding -55708,AC010463.1,ENSG00000269307,protein_coding -25254,MAGIX,ENSG00000269313,protein_coding -26919,IKBKG,ENSG00000269335,protein_coding -57664,ZNF587B,ENSG00000269343,protein_coding -57174,AC008750.8,ENSG00000269403,protein_coding -57100,SPIB,ENSG00000269404,protein_coding -25997,NXF2,ENSG00000269405,protein_coding -26894,OPN1MW3,ENSG00000269433,protein_coding -26000,NXF2B,ENSG00000269437,protein_coding -15840,H3.Y,ENSG00000269466,protein_coding -57027,AC010619.1,ENSG00000269469,protein_coding -57670,AC010326.2,ENSG00000269476,protein_coding -25662,DMRTC1,ENSG00000269502,protein_coding -57284,ERVV-1,ENSG00000269526,protein_coding -57633,AC003002.3,ENSG00000269533,protein_coding -56446,AC011455.2,ENSG00000269547,protein_coding -26754,TMEM185A,ENSG00000269556,protein_coding -26569,CT45A10,ENSG00000269586,protein_coding -55453,AC010422.5,ENSG00000269590,protein_coding -55444,AC010422.6,ENSG00000269693,protein_coding -57603,ZIM2,ENSG00000269699,protein_coding -55165,AC008763.3,ENSG00000269711,protein_coding -3065,NBPF9,ENSG00000269713,protein_coding -55718,CCDC194,ENSG00000269720,protein_coding -57140,AC011473.4,ENSG00000269741,protein_coding -26074,SLC25A53,ENSG00000269743,protein_coding -55434,AC008758.6,ENSG00000269755,protein_coding -25205,SSX4B,ENSG00000269791,protein_coding -57715,RNF225,ENSG00000269855,protein_coding -56552,EGLN2,ENSG00000269858,protein_coding -36829,ARHGAP19-SLIT1,ENSG00000269891,protein_coding -35529,COMMD3-BMI1,ENSG00000269897,protein_coding -24051,FMC1-LUC7L2,ENSG00000269955,protein_coding -20192,MEI4,ENSG00000269964,protein_coding -55252,ZNF559-ZNF177,ENSG00000270011,protein_coding -36803,AL365273.2,ENSG00000270099,protein_coding -5075,TSNAX-DISC1,ENSG00000270106,protein_coding -642,MICOS10-NBL1,ENSG00000270136,protein_coding -3660,AL591806.3,ENSG00000270149,protein_coding -12807,NCBP2AS2,ENSG00000270170,protein_coding -41759,BIVM-ERCC5,ENSG00000270181,protein_coding -5206,MTRNR2L11,ENSG00000270188,protein_coding -23471,AC093668.1,ENSG00000270249,protein_coding -3096,HIST2H4B,ENSG00000270276,protein_coding -53314,AL121758.1,ENSG00000270299,protein_coding -36991,BORCS7-ASMT,ENSG00000270316,protein_coding -2977,PPIAL4H,ENSG00000270339,protein_coding -50332,HEATR9,ENSG00000270379,protein_coding -14517,MTRNR2L13,ENSG00000270394,protein_coding -470,PRAMEF5,ENSG00000270601,protein_coding -22302,URGCP-MRPS24,ENSG00000270617,protein_coding -3053,NBPF14,ENSG00000270629,protein_coding -50323,TAF15,ENSG00000270647,protein_coding -24178,MTRNR2L6,ENSG00000270672,protein_coding -8756,HSPE1-MOB4,ENSG00000270757,protein_coding -50325,GAS2L2,ENSG00000270765,protein_coding -19513,RPS10-NUDT3,ENSG00000270800,protein_coding -50328,C17orf50,ENSG00000270806,protein_coding -3089,HIST2H4A,ENSG00000270882,protein_coding -50324,RASL10B,ENSG00000270885,protein_coding -26568,CT45A9,ENSG00000270946,protein_coding -24248,CTAGE15,ENSG00000271079,protein_coding -2357,TMEM56-RWDD3,ENSG00000271092,protein_coding -60612,AC240274.1,ENSG00000271254,protein_coding -13868,UGT2A2,ENSG00000271271,protein_coding -53313,SRXN1,ENSG00000271303,protein_coding -24256,CTAGE6,ENSG00000271321,protein_coding -3075,NBPF19,ENSG00000271383,protein_coding -2971,NBPF10,ENSG00000271425,protein_coding -50326,MMP28,ENSG00000271447,protein_coding -26565,CT45A2,ENSG00000271449,protein_coding -50335,CCL5,ENSG00000271503,protein_coding -2906,PPIAL4E,ENSG00000271567,protein_coding -2962,LIX1L,ENSG00000271601,protein_coding -51438,MILR1,ENSG00000271605,protein_coding -29412,AC233992.2,ENSG00000271698,protein_coding -1735,MROH7-TTC4,ENSG00000271723,protein_coding -1137,AC114490.2,ENSG00000271741,protein_coding -20286,AL589666.1,ENSG00000271793,protein_coding -2664,AL603832.3,ENSG00000271810,protein_coding -17510,SMIM32,ENSG00000271824,protein_coding -2242,AC093423.3,ENSG00000271949,protein_coding -2963,ANKRD34A,ENSG00000272031,protein_coding -21310,GTF2H5,ENSG00000272047,protein_coding -10563,Z84492.2,ENSG00000272104,protein_coding -18695,AL024498.2,ENSG00000272162,protein_coding -3094,HIST2H2AA4,ENSG00000272196,protein_coding -15430,AC018709.1,ENSG00000272297,protein_coding -10664,AC096887.1,ENSG00000272305,protein_coding -19512,NUDT3,ENSG00000272325,protein_coding -56284,KMT2B,ENSG00000272333,protein_coding -22960,POM121C,ENSG00000272391,protein_coding -20522,CD24,ENSG00000272398,protein_coding -9799,AC022384.1,ENSG00000272410,protein_coding -13991,FAM47E-STBD1,ENSG00000272414,protein_coding -19798,AL353588.1,ENSG00000272442,protein_coding -20322,CFAP206,ENSG00000272514,protein_coding -10658,MUSTN1,ENSG00000272573,protein_coding -12853,ZNF595,ENSG00000272602,protein_coding -48419,AC026464.6,ENSG00000272617,protein_coding -48996,DOC2B,ENSG00000272636,protein_coding -23317,AC005020.2,ENSG00000272647,protein_coding -17682,PCDHB16,ENSG00000272674,protein_coding -12772,AC069257.3,ENSG00000272741,protein_coding -17436,AC104109.3,ENSG00000272772,protein_coding -60437,KRTAP10-7,ENSG00000272804,protein_coding -38556,AC073610.2,ENSG00000272822,protein_coding -10671,DCP1A,ENSG00000272886,protein_coding -31698,AL355987.3,ENSG00000272896,protein_coding -54036,AL109827.1,ENSG00000272897,protein_coding -23849,ATP6V1FNB,ENSG00000272899,protein_coding -36385,AC022400.7,ENSG00000272916,protein_coding -37596,AC005832.4,ENSG00000272921,protein_coding -21649,RBAK-RBAKDN,ENSG00000272968,protein_coding -33504,ARL2-SNX15,ENSG00000273003,protein_coding -45628,AC009690.3,ENSG00000273025,protein_coding -7269,C2orf15,ENSG00000273045,protein_coding -54712,AL121845.2,ENSG00000273047,protein_coding -38802,AC012531.3,ENSG00000273049,protein_coding -37974,GRIN2B,ENSG00000273079,protein_coding -3400,AL713999.1,ENSG00000273088,protein_coding -56615,LYPD4,ENSG00000273111,protein_coding -2840,NBPF26,ENSG00000273136,protein_coding -54709,AL121845.3,ENSG00000273154,protein_coding -7271,AC092587.1,ENSG00000273155,protein_coding -40694,AL359736.1,ENSG00000273167,protein_coding -49989,AC002094.3,ENSG00000273171,protein_coding -44403,SNURF,ENSG00000273173,protein_coding -2894,HIST2H3PS2,ENSG00000273213,protein_coding -17357,AC008695.1,ENSG00000273217,protein_coding -12874,TMEM271,ENSG00000273238,protein_coding -43613,AL049839.2,ENSG00000273259,protein_coding -6260,AC073283.3,ENSG00000273269,protein_coding -1081,ZBTB8B,ENSG00000273274,protein_coding -10292,AC092042.3,ENSG00000273291,protein_coding -16017,C1QTNF3-AMACR,ENSG00000273294,protein_coding -12776,TM4SF19-TCTEX1D2,ENSG00000273331,protein_coding -6581,AC017083.3,ENSG00000273398,protein_coding -60581,AC011841.1,ENSG00000273496,protein_coding -50412,TBC1D3K,ENSG00000273513,protein_coding -30183,FOXD4L6,ENSG00000273514,protein_coding -22871,SPDYE8P,ENSG00000273520,protein_coding -46067,AGBL1,ENSG00000273540,protein_coding -19036,HIST1H4K,ENSG00000273542,protein_coding -60562,AC136616.1,ENSG00000273554,protein_coding -50446,CWC25,ENSG00000273559,protein_coding -59707,SMIM11B,ENSG00000273590,protein_coding -50431,EPOP,ENSG00000273604,protein_coding -50372,ZNHIT3,ENSG00000273611,protein_coding -26566,CT45A7,ENSG00000273696,protein_coding -19034,HIST1H2BM,ENSG00000273703,protein_coding -50384,LHX1,ENSG00000273706,protein_coding -54905,AC005258.1,ENSG00000273734,protein_coding -60589,AL592183.1,ENSG00000273748,protein_coding -44364,CYFIP1,ENSG00000273749,protein_coding -56742,CEACAM20,ENSG00000273777,protein_coding -18955,HIST1H2BG,ENSG00000273802,protein_coding -25286,USP27X,ENSG00000273820,protein_coding -16498,TAF9,ENSG00000273841,protein_coding -59228,NOL12,ENSG00000273899,protein_coding -44367,GOLGA6L1,ENSG00000273976,protein_coding -18965,HIST1H3G,ENSG00000273983,protein_coding -6743,INO80B-WBP1,ENSG00000274049,protein_coding -22936,CASTOR2,ENSG00000274070,protein_coding -44330,OR4M2,ENSG00000274102,protein_coding -60558,AC133551.1,ENSG00000274175,protein_coding -49872,NATD1,ENSG00000274180,protein_coding -26941,H2AFB1,ENSG00000274183,protein_coding -35949,ANTXRL,ENSG00000274209,protein_coding -50426,SOCS7,ENSG00000274211,protein_coding -50368,TBC1D3H,ENSG00000274226,protein_coding -58400,GGTLC3,ENSG00000274252,protein_coding -25270,GAGE13,ENSG00000274274,protein_coding -59676,CBSL,ENSG00000274276,protein_coding -7185,ADRA2B,ENSG00000274286,protein_coding -18946,HIST1H2BE,ENSG00000274290,protein_coding -53330,AL136531.2,ENSG00000274322,protein_coding -30382,ZNF658,ENSG00000274349,protein_coding -1373,TMEM269,ENSG00000274386,protein_coding -59783,TPTE,ENSG00000274391,protein_coding -50415,TBC1D3D,ENSG00000274419,protein_coding -28335,C8orf89,ENSG00000274443,protein_coding -50414,TBC1D3L,ENSG00000274512,protein_coding -22937,RCC1L,ENSG00000274523,protein_coding -49988,SEBOX,ENSG00000274529,protein_coding -59663,CU639417.1,ENSG00000274559,protein_coding -25302,DGKK,ENSG00000274588,protein_coding -58570,RIMBP3B,ENSG00000274600,protein_coding -50421,TBC1D3,ENSG00000274611,protein_coding -18961,HIST1H4F,ENSG00000274618,protein_coding -19046,HIST1H2BO,ENSG00000274641,protein_coding -50349,CCL23,ENSG00000274736,protein_coding -52780,ELOA3D,ENSG00000274744,protein_coding -60089,KRTAP7-1,ENSG00000274749,protein_coding -18958,HIST1H3E,ENSG00000274750,protein_coding -456,PRAMEF27,ENSG00000274764,protein_coding -26962,F8A2,ENSG00000274791,protein_coding -60572,AC171558.1,ENSG00000274792,protein_coding -50357,TBC1D3B,ENSG00000274808,protein_coding -11714,NPHP3-ACAD11,ENSG00000274810,protein_coding -60579,MAFIP,ENSG00000274847,protein_coding -31855,PANO1,ENSG00000274897,protein_coding -50364,TBC1D3I,ENSG00000274933,protein_coding -1249,AL139260.3,ENSG00000274944,protein_coding -19003,HIST1H2AH,ENSG00000274997,protein_coding -58670,ZNF280B,ENSG00000275004,protein_coding -50435,MLLT6,ENSG00000275023,protein_coding -47722,TP53TG3E,ENSG00000275034,protein_coding -6374,PPP4R3B,ENSG00000275052,protein_coding -60611,AC233755.1,ENSG00000275063,protein_coding -50402,SYNRG,ENSG00000275066,protein_coding -27469,NUDT18,ENSG00000275074,protein_coding -7142,ZNF2,ENSG00000275111,protein_coding -25271,GAGE2E,ENSG00000275113,protein_coding -19043,HIST1H4L,ENSG00000275126,protein_coding -50342,CCL16,ENSG00000275152,protein_coding -12393,AC117457.1,ENSG00000275163,protein_coding -57440,LENG9,ENSG00000275183,protein_coding -19038,HIST1H2AK,ENSG00000275221,protein_coding -60571,AC171558.3,ENSG00000275249,protein_coding -50353,CCL4,ENSG00000275302,protein_coding -27183,PRAG1,ENSG00000275342,protein_coding -23770,C7orf77,ENSG00000275356,protein_coding -19042,HIST1H3I,ENSG00000275379,protein_coding -50350,CCL18,ENSG00000275385,protein_coding -56507,FCGBP,ENSG00000275395,protein_coding -50408,HNF1B,ENSG00000275410,protein_coding -59654,FP565260.1,ENSG00000275464,protein_coding -50448,C17orf98,ENSG00000275489,protein_coding -30400,AL627230.1,ENSG00000275493,protein_coding -25664,FAM236A,ENSG00000275520,protein_coding -21616,GRIFIN,ENSG00000275572,protein_coding -27074,XKR5,ENSG00000275591,protein_coding -18962,HIST1H4G,ENSG00000275663,protein_coding -46161,AC091167.6,ENSG00000275674,protein_coding -50345,CCL15-CCL14,ENSG00000275688,protein_coding -50386,AATF,ENSG00000275700,protein_coding -18964,HIST1H2BH,ENSG00000275713,protein_coding -18928,HIST1H3A,ENSG00000275714,protein_coding -50346,CCL15,ENSG00000275718,protein_coding -50340,LYZL6,ENSG00000275722,protein_coding -17799,AC091959.3,ENSG00000275740,protein_coding -461,HNRNPCL2,ENSG00000275774,protein_coding -37891,AC018630.2,ENSG00000275778,protein_coding -58407,RIMBP3,ENSG00000275793,protein_coding -50427,ARHGAP23,ENSG00000275832,protein_coding -44365,TUBGCP5,ENSG00000275835,protein_coding -60560,AC136612.1,ENSG00000275869,protein_coding -59677,U2AF1L5,ENSG00000275895,protein_coding -24189,PRSS2,ENSG00000275896,protein_coding -50370,TBC1D3F,ENSG00000275954,protein_coding -30389,SPATA31A3,ENSG00000275969,protein_coding -59669,SIK1B,ENSG00000275993,protein_coding -60593,AC007325.1,ENSG00000276017,protein_coding -50401,DUSP14,ENSG00000276023,protein_coding -30249,SPATA31A7,ENSG00000276040,protein_coding -55048,UHRF1,ENSG00000276043,protein_coding -40173,ORAI1,ENSG00000276045,protein_coding -50360,CCL4L2,ENSG00000276070,protein_coding -59681,CRYAA2,ENSG00000276076,protein_coding -50359,CCL3L1,ENSG00000276085,protein_coding -5831,AC008073.3,ENSG00000276087,protein_coding -31004,OR13C2,ENSG00000276119,protein_coding -19001,HIST1H4I,ENSG00000276180,protein_coding -30372,ANKRD20A3,ENSG00000276203,protein_coding -49424,PIK3R6,ENSG00000276231,protein_coding -50398,TADA2A,ENSG00000276234,protein_coding -42134,OR4E1,ENSG00000276240,protein_coding -60583,AC011043.1,ENSG00000276256,protein_coding -59710,KCNE1B,ENSG00000276289,protein_coding -50445,PIP4K2B,ENSG00000276293,protein_coding -19076,AL021997.3,ENSG00000276302,protein_coding -60606,AC004556.1,ENSG00000276345,protein_coding -19033,HIST1H2AJ,ENSG00000276368,protein_coding -50343,CCL14,ENSG00000276409,protein_coding -18933,HIST1H2BB,ENSG00000276410,protein_coding -28424,AC036214.3,ENSG00000276418,protein_coding -35998,FAM25C,ENSG00000276430,protein_coding -36779,AL583836.1,ENSG00000276490,protein_coding -17714,PCDHGB5,ENSG00000276547,protein_coding -30243,SPATA31A5,ENSG00000276581,protein_coding -4524,RAB7B,ENSG00000276600,protein_coding -59653,FP565260.2,ENSG00000276612,protein_coding -41387,DACH1,ENSG00000276644,protein_coding -603,PADI6,ENSG00000276747,protein_coding -60569,AC136352.2,ENSG00000276760,protein_coding -19040,HIST1H2AL,ENSG00000276903,protein_coding -58800,GSTT4,ENSG00000276950,protein_coding -18954,HIST1H4E,ENSG00000276966,protein_coding -457,HNRNPCL3,ENSG00000277058,protein_coding -18956,HIST1H2AE,ENSG00000277075,protein_coding -59650,FP565260.3,ENSG00000277117,protein_coding -22853,TYW1B,ENSG00000277149,protein_coding -26964,F8A3,ENSG00000277150,protein_coding -18947,HIST1H4D,ENSG00000277157,protein_coding -50375,PIGW,ENSG00000277161,protein_coding -60595,AC007325.2,ENSG00000277196,protein_coding -26942,F8A1,ENSG00000277203,protein_coding -18952,HIST1H2BF,ENSG00000277224,protein_coding -50441,PCGF2,ENSG00000277258,protein_coding -59708,FAM243B,ENSG00000277277,protein_coding -35894,C10orf142,ENSG00000277288,protein_coding -44241,GOLGA6L6,ENSG00000277322,protein_coding -50429,SRCIN1,ENSG00000277363,protein_coding -50425,GPR179,ENSG00000277399,protein_coding -60578,AC145212.1,ENSG00000277400,protein_coding -20665,MARCKS,ENSG00000277443,protein_coding -5362,ZNF670,ENSG00000277462,protein_coding -60615,AC213203.2,ENSG00000277475,protein_coding -48518,PKD1L3,ENSG00000277481,protein_coding -29334,GPIHBP1,ENSG00000277494,protein_coding -56853,PNMA8C,ENSG00000277531,protein_coding -26261,AL772284.2,ENSG00000277535,protein_coding -31003,OR13C5,ENSG00000277556,protein_coding -27547,NEFL,ENSG00000277586,protein_coding -54199,Z98752.3,ENSG00000277611,protein_coding -60596,BX072566.1,ENSG00000277630,protein_coding -50352,CCL3,ENSG00000277632,protein_coding -48007,AC007906.2,ENSG00000277639,protein_coding -60568,AC136352.3,ENSG00000277666,protein_coding -373,AL109811.3,ENSG00000277726,protein_coding -26966,H2AFB3,ENSG00000277745,protein_coding -35994,FO681492.1,ENSG00000277758,protein_coding -60564,AC136616.2,ENSG00000277761,protein_coding -18963,HIST1H3F,ENSG00000277775,protein_coding -50443,PSMB3,ENSG00000277791,protein_coding -60565,AC141272.1,ENSG00000277836,protein_coding -60610,AC233755.2,ENSG00000277856,protein_coding -26961,H2AFB2,ENSG00000277858,protein_coding -44349,GOLGA6L22,ENSG00000277865,protein_coding -6006,SRD5A2,ENSG00000277893,protein_coding -32138,OR52E5,ENSG00000277932,protein_coding -49345,SENP3-EIF4A1,ENSG00000277957,protein_coding -58511,AC007731.4,ENSG00000277971,protein_coding -50439,CISD3,ENSG00000277972,protein_coding -50339,RDM1,ENSG00000278023,protein_coding -50404,DDX52,ENSG00000278053,protein_coding -26895,TEX28,ENSG00000278057,protein_coding -26567,CT45A8,ENSG00000278085,protein_coding -57697,ZNF8,ENSG00000278129,protein_coding -1502,AL358075.4,ENSG00000278139,protein_coding -59207,SSTR3,ENSG00000278195,protein_coding -60584,AC011043.2,ENSG00000278198,protein_coding -19694,PRICKLE4,ENSG00000278224,protein_coding -50374,MYO19,ENSG00000278259,protein_coding -26564,CT45A6,ENSG00000278289,protein_coding -50416,TBC1D3C,ENSG00000278299,protein_coding -50376,GGNBP2,ENSG00000278311,protein_coding -56739,ZNF229,ENSG00000278318,protein_coding -60602,AL354822.1,ENSG00000278384,protein_coding -18932,HIST1H2AB,ENSG00000278463,protein_coding -50396,C17orf78,ENSG00000278505,protein_coding -44294,POTEB3,ENSG00000278522,protein_coding -50378,DHRS11,ENSG00000278535,protein_coding -50392,ACACA,ENSG00000278540,protein_coding -58403,TMEM191B,ENSG00000278558,protein_coding -45615,NR2E3,ENSG00000278570,protein_coding -18967,HIST1H2BI,ENSG00000278588,protein_coding -50419,TBC1D3E,ENSG00000278599,protein_coding -33363,C11orf98,ENSG00000278615,protein_coding -50379,MRM1,ENSG00000278619,protein_coding -60603,AC023491.2,ENSG00000278633,protein_coding -18929,HIST1H4A,ENSG00000278637,protein_coding -26318,AC008162.2,ENSG00000278646,protein_coding -45940,GOLGA6L10,ENSG00000278662,protein_coding -52779,ELOA3B,ENSG00000278674,protein_coding -19045,HIST1H2AM,ENSG00000278677,protein_coding -24419,IQCA1L,ENSG00000278685,protein_coding -60577,BX004987.1,ENSG00000278704,protein_coding -18930,HIST1H4B,ENSG00000278705,protein_coding -60563,AC136616.3,ENSG00000278782,protein_coding -26844,AC236972.4,ENSG00000278803,protein_coding -60594,AC007325.4,ENSG00000278817,protein_coding -19032,HIST1H3H,ENSG00000278828,protein_coding -50424,MRPL45,ENSG00000278845,protein_coding -47727,TP53TG3F,ENSG00000278848,protein_coding -32060,OR51G1,ENSG00000278870,protein_coding -30041,OR2S2,ENSG00000278889,protein_coding -59709,SMIM34B,ENSG00000278961,protein_coding -32225,OR10A6,ENSG00000279000,protein_coding -32097,OR51B2,ENSG00000279012,protein_coding -33149,OR6Q1,ENSG00000279051,protein_coding -31726,AL807752.6,ENSG00000279073,protein_coding -3557,OR10X1,ENSG00000279111,protein_coding -34935,OR8D2,ENSG00000279116,protein_coding -467,PRAMEF13,ENSG00000279169,protein_coding -20452,TSTD3,ENSG00000279170,protein_coding -5414,OR2L8,ENSG00000279263,protein_coding -32050,OR52R1,ENSG00000279270,protein_coding -5453,OR2T11,ENSG00000279301,protein_coding -33014,OR5L1,ENSG00000279395,protein_coding -44308,AC135068.3,ENSG00000279408,protein_coding -32190,OR2AG1,ENSG00000279486,protein_coding -59649,FP565260.4,ENSG00000279493,protein_coding -33000,OR4C16,ENSG00000279514,protein_coding -17581,AC142391.1,ENSG00000279686,protein_coding -33011,OR5D13,ENSG00000279761,protein_coding -46240,AC013394.1,ENSG00000279765,protein_coding -2917,PPIAL4F,ENSG00000279782,protein_coding -468,PRAMEF18,ENSG00000279804,protein_coding -6294,AC073082.1,ENSG00000279956,protein_coding -33065,OR5R1,ENSG00000279961,protein_coding -21017,GVQW2,ENSG00000279968,protein_coding -17692,AC244517.10,ENSG00000279983,protein_coding -32047,OR51F1,ENSG00000280021,protein_coding -59652,GATD3B,ENSG00000280071,protein_coding -34945,OR8B4,ENSG00000280090,protein_coding -31274,OR1B1,ENSG00000280094,protein_coding -21082,AL049844.3,ENSG00000280148,protein_coding -41315,PCDH20,ENSG00000280165,protein_coding -33160,OR1S1,ENSG00000280204,protein_coding -19133,OR12D2,ENSG00000280236,protein_coding -464,PRAMEF26,ENSG00000280267,protein_coding -33055,OR8K3,ENSG00000280314,protein_coding -59655,FP565260.6,ENSG00000280433,protein_coding -9184,AC068946.1,ENSG00000280537,protein_coding -10283,AC006059.2,ENSG00000280571,protein_coding -1481,CCDC163,ENSG00000280670,protein_coding -44301,LINC02203,ENSG00000280709,protein_coding -2961,AC243547.3,ENSG00000280778,protein_coding -47469,PAGR1,ENSG00000280789,protein_coding -47468,AC009133.6,ENSG00000280893,protein_coding -58057,RPS4Y2,ENSG00000280969,protein_coding -17567,MATR3,ENSG00000280987,protein_coding -22083,AC005154.5,ENSG00000281039,protein_coding -41217,TMEM272,ENSG00000281106,protein_coding -47470,AC120114.4,ENSG00000281348,protein_coding -4504,BLACAT1,ENSG00000281406,protein_coding -22077,AC006978.2,ENSG00000281593,protein_coding -20644,AL365214.3,ENSG00000281613,protein_coding -41040,AL512506.3,ENSG00000281883,protein_coding -24398,GIMAP1-GIMAP5,ENSG00000281887,protein_coding -17363,AC034228.4,ENSG00000281938,protein_coding -47555,TMEM265,ENSG00000281991,protein_coding -47553,AC106886.5,ENSG00000282034,protein_coding -20715,AL132671.2,ENSG00000282218,protein_coding -35391,AL157392.5,ENSG00000282246,protein_coding -13623,AC058822.1,ENSG00000282278,protein_coding -23324,CYP3A7-CYP3A51P,ENSG00000282301,protein_coding -26378,TEX13D,ENSG00000282419,protein_coding -2637,ADORA3,ENSG00000282608,protein_coding -48618,DUXB,ENSG00000282757,protein_coding -18888,AL512428.1,ENSG00000282804,protein_coding -26384,TEX13C,ENSG00000282815,protein_coding -851,C1orf232,ENSG00000282872,protein_coding -1519,TMEM275,ENSG00000282881,protein_coding -49258,AC004706.3,ENSG00000282936,protein_coding -18949,AL031777.3,ENSG00000282988,protein_coding -1434,KLF18,ENSG00000283039,protein_coding -43907,LBHD2,ENSG00000283071,protein_coding -25320,CENPVL2,ENSG00000283093,protein_coding -12578,AC068631.2,ENSG00000283149,protein_coding -12161,IQCJ-SCHIP1,ENSG00000283154,protein_coding -10507,AC104452.1,ENSG00000283189,protein_coding -41919,C13orf46,ENSG00000283199,protein_coding -56014,AC092329.3,ENSG00000283201,protein_coding -30665,AL353572.3,ENSG00000283205,protein_coding -41454,AC001226.2,ENSG00000283208,protein_coding -3263,SPRR5,ENSG00000283227,protein_coding -8131,AC068547.1,ENSG00000283228,protein_coding -27017,AC019257.8,ENSG00000283239,protein_coding -22375,CCDC201,ENSG00000283247,protein_coding -23146,FAM237B,ENSG00000283267,protein_coding -33389,TEX54,ENSG00000283268,protein_coding -17584,SMIM33,ENSG00000283288,protein_coding -37553,TEX52,ENSG00000283297,protein_coding -21835,AC019117.3,ENSG00000283321,protein_coding -3146,CTXND2,ENSG00000283324,protein_coding -30119,FAM240B,ENSG00000283329,protein_coding -41922,CFAP97D2,ENSG00000283361,protein_coding -30254,CNTNAP3C,ENSG00000283378,protein_coding -9288,CCDC195,ENSG00000283428,protein_coding -11037,CSNKA2IP,ENSG00000283434,protein_coding -49328,SPEM3,ENSG00000283439,protein_coding -26769,HSFX4,ENSG00000283463,protein_coding -10384,FAM240A,ENSG00000283473,protein_coding -30110,FAM95C,ENSG00000283486,protein_coding -37439,ZNF511-PRAP1,ENSG00000283496,protein_coding -57694,AC020915.5,ENSG00000283515,protein_coding -46880,LITAFD,ENSG00000283516,protein_coding -31539,PRRT1B,ENSG00000283526,protein_coding -24254,TCAF2C,ENSG00000283528,protein_coding -38760,AC021072.1,ENSG00000283536,protein_coding -10051,AC098650.1,ENSG00000283563,protein_coding -57521,C19orf85,ENSG00000283567,protein_coding -1371,AC098484.3,ENSG00000283580,protein_coding -25663,FAM236C,ENSG00000283594,protein_coding -46333,FAM169B,ENSG00000283597,protein_coding -25626,BX276092.9,ENSG00000283599,protein_coding -56786,EXOC3L2,ENSG00000283632,protein_coding -26543,ETDC,ENSG00000283644,protein_coding -42304,LMLN2,ENSG00000283654,protein_coding -56998,AC008687.8,ENSG00000283663,protein_coding -3917,MYOCOS,ENSG00000283683,protein_coding -26751,HSFX3,ENSG00000283697,protein_coding -35005,VSIG10L2,ENSG00000283703,protein_coding -10390,PRSS50,ENSG00000283706,protein_coding -15829,TAF11L11,ENSG00000283740,protein_coding -48621,CPHXL,ENSG00000283755,protein_coding -56273,PMIS2,ENSG00000283758,protein_coding -2413,AC118553.2,ENSG00000283761,protein_coding -12501,AC131160.1,ENSG00000283765,protein_coding -15834,TAF11L13,ENSG00000283776,protein_coding -17384,AC116366.3,ENSG00000283782,protein_coding -31911,PRR33,ENSG00000283787,protein_coding -58423,AC007326.4,ENSG00000283809,protein_coding -32018,SSU72P4,ENSG00000283873,protein_coding -10383,AC104304.1,ENSG00000283877,protein_coding -59263,TPTEP2-CSNK1E,ENSG00000283900,protein_coding -35808,AL117339.5,ENSG00000283930,protein_coding -4635,AC092017.3,ENSG00000283952,protein_coding -15824,TAF11L8,ENSG00000283967,protein_coding -32170,AC084337.2,ENSG00000283977,protein_coding -55460,GNG14,ENSG00000283980,protein_coding -15825,TAF11L9,ENSG00000283988,protein_coding -29316,SLURP2,ENSG00000283992,protein_coding -32015,SSU72P5,ENSG00000284018,protein_coding -35410,HSPA14,ENSG00000284024,protein_coding -15821,TAF11L6,ENSG00000284042,protein_coding -34324,AP001273.2,ENSG00000284057,protein_coding -5315,AL451007.3,ENSG00000284188,protein_coding -59624,SCO2,ENSG00000284194,protein_coding -15820,TAF11L5,ENSG00000284234,protein_coding -15819,TAF11L4,ENSG00000284283,protein_coding -23302,AC004922.1,ENSG00000284292,protein_coding -4633,AL590132.1,ENSG00000284299,protein_coding -32016,SSU72P2,ENSG00000284306,protein_coding -6737,C2orf81,ENSG00000284308,protein_coding -7464,AC013271.1,ENSG00000284337,protein_coding -31719,AL807752.7,ENSG00000284341,protein_coding -15826,TAF11L10,ENSG00000284356,protein_coding -15815,TAF11L2,ENSG00000284373,protein_coding -37752,AC092111.3,ENSG00000284393,protein_coding -46558,AL032819.3,ENSG00000284395,protein_coding -59338,AL022238.4,ENSG00000284431,protein_coding -32020,SSU72P7,ENSG00000284438,protein_coding -15818,TAF11L3,ENSG00000284439,protein_coding -22797,AC027644.4,ENSG00000284461,protein_coding -15822,TAF11L7,ENSG00000284465,protein_coding -7852,SMIM39,ENSG00000284479,protein_coding -48617,AC025287.4,ENSG00000284484,protein_coding -55475,THSD8,ENSG00000284491,protein_coding -29315,LYNX1-SLURP2,ENSG00000284505,protein_coding -48707,AC092718.8,ENSG00000284512,protein_coding -51760,AC015802.6,ENSG00000284526,protein_coding -32019,SSU72P3,ENSG00000284546,protein_coding -59299,AL022318.4,ENSG00000284554,protein_coding -34942,OR8B3,ENSG00000284609,protein_coding -46157,AC091167.7,ENSG00000284626,protein_coding -9308,SCYGR1,ENSG00000284629,protein_coding -9316,SCYGR4,ENSG00000284631,protein_coding -9322,SCYGR8,ENSG00000284635,protein_coding -54962,AC005551.1,ENSG00000284638,protein_coding -9312,SCYGR2,ENSG00000284643,protein_coding -42,OR4F16,ENSG00000284662,protein_coding -9318,SCYGR5,ENSG00000284667,protein_coding -34941,OR8B2,ENSG00000284680,protein_coding -13016,AC092442.1,ENSG00000284684,protein_coding -1765,AC119674.2,ENSG00000284686,protein_coding -24384,AC073111.4,ENSG00000284691,protein_coding -13871,AC108941.2,ENSG00000284695,protein_coding -6242,TMEM247,ENSG00000284701,protein_coding -9315,SCYGR3,ENSG00000284704,protein_coding -33753,SMIM38,ENSG00000284713,protein_coding -9321,SCYGR7,ENSG00000284718,protein_coding -38534,OR8S1,ENSG00000284723,protein_coding -9320,SCYGR6,ENSG00000284725,protein_coding -38671,C12orf81,ENSG00000284730,protein_coding -33067,AP002512.3,ENSG00000284732,protein_coding -27,OR4F29,ENSG00000284733,protein_coding -8514,PDE11A,ENSG00000284741,protein_coding -12527,EEF1AKMT4,ENSG00000284753,protein_coding -16666,AC022414.1,ENSG00000284762,protein_coding -5155,TBCE,ENSG00000284770,protein_coding -44972,AC011330.3,ENSG00000284772,protein_coding -1135,AC114490.3,ENSG00000284773,protein_coding -53652,AL121900.2,ENSG00000284776,protein_coding -31921,AC132217.2,ENSG00000284779,protein_coding -38686,SMIM41,ENSG00000284791,protein_coding -55768,AC008397.1,ENSG00000284797,protein_coding -25999,AC235565.2,ENSG00000284800,protein_coding -9191,AC068946.2,ENSG00000284820,protein_coding -5389,GCSAML-AS1,ENSG00000284824,protein_coding -37892,AC006518.7,ENSG00000284826,protein_coding -33845,AP000812.4,ENSG00000284844,protein_coding -12436,CCDC39,ENSG00000284862,protein_coding -33242,OOSP1,ENSG00000284873,protein_coding -58470,AC000093.1,ENSG00000284874,protein_coding -1347,AC119676.1,ENSG00000284895,protein_coding -44632,ARHGAP11B,ENSG00000284906,protein_coding -12528,ECE2,ENSG00000284917,protein_coding -33843,AP000812.5,ENSG00000284922,protein_coding -32089,AC104389.5,ENSG00000284931,protein_coding -40198,DIABLO,ENSG00000284934,protein_coding -46196,AC068831.7,ENSG00000284946,protein_coding -12097,AC104472.3,ENSG00000284952,protein_coding -27511,AC107959.5,ENSG00000284956,protein_coding -32650,AL049629.2,ENSG00000284969,protein_coding -31748,BX255925.3,ENSG00000284976,protein_coding -23470,AC093668.2,ENSG00000284981,protein_coding -26874,U52112.2,ENSG00000284987,protein_coding -1411,AL451062.3,ENSG00000284989,protein_coding -16661,AC008581.2,ENSG00000285000,protein_coding -33244,OOSP4A,ENSG00000285010,protein_coding -59323,AL022312.1,ENSG00000285025,protein_coding -47487,AC093512.2,ENSG00000285043,protein_coding -5151,TBCE,ENSG00000285053,protein_coding -19474,SMIM40,ENSG00000285064,protein_coding -44631,AC091057.6,ENSG00000285077,protein_coding -31214,AL160272.2,ENSG00000285082,protein_coding -19394,AL662884.4,ENSG00000285085,protein_coding -30427,AL358113.1,ENSG00000285130,protein_coding -39008,AC022506.1,ENSG00000285133,protein_coding -22048,AC004593.3,ENSG00000285162,protein_coding -25604,AL590764.2,ENSG00000285171,protein_coding -55766,AC008397.2,ENSG00000285188,protein_coding -12296,AC026316.4,ENSG00000285218,protein_coding -33240,OOSP3,ENSG00000285231,protein_coding -37653,AC006064.6,ENSG00000285238,protein_coding -31568,AL162417.1,ENSG00000285245,protein_coding -45265,AC090517.4,ENSG00000285253,protein_coding -10797,ATXN7,ENSG00000285258,protein_coding -30853,AL160269.1,ENSG00000285269,protein_coding -32610,AL035078.4,ENSG00000285283,protein_coding -24421,ABCF2,ENSG00000285292,protein_coding -59223,Z83844.3,ENSG00000285304,protein_coding -46751,AC025283.3,ENSG00000285329,protein_coding -14425,AC126283.2,ENSG00000285330,protein_coding -53929,FO393400.1,ENSG00000285382,protein_coding -59067,Z82190.2,ENSG00000285404,protein_coding -23473,POLR2J3,ENSG00000285437,protein_coding -21348,SOD2,ENSG00000285441,protein_coding -41214,AL162377.3,ENSG00000285444,protein_coding -20692,Z84488.2,ENSG00000285446,protein_coding -31139,ZNF883,ENSG00000285447,protein_coding -14141,AC093827.5,ENSG00000285458,protein_coding -49245,AC007846.2,ENSG00000285471,protein_coding -44326,AC134980.3,ENSG00000285472,protein_coding -18270,AC139491.7,ENSG00000285476,protein_coding -24420,AC021097.2,ENSG00000285480,protein_coding -56623,AC010616.2,ENSG00000285505,protein_coding -53530,AL034430.1,ENSG00000285508,protein_coding -34850,AP000646.1,ENSG00000285509,protein_coding -56235,AC020907.6,ENSG00000285526,protein_coding -38957,AC097104.1,ENSG00000285528,protein_coding -6204,AC013717.1,ENSG00000285542,protein_coding -25649,AL133500.1,ENSG00000285547,protein_coding -36156,AC067752.1,ENSG00000285551,protein_coding -11866,AC112504.2,ENSG00000285558,protein_coding -41340,AL445238.1,ENSG00000285566,protein_coding -11405,AC069444.2,ENSG00000285585,protein_coding -55455,AC010422.8,ENSG00000285589,protein_coding -24737,AC097625.2,ENSG00000285602,protein_coding -27146,AC084121.5,ENSG00000285607,protein_coding -27142,AC084121.6,ENSG00000285620,protein_coding -38969,AC117378.1,ENSG00000285625,protein_coding -235,AL031847.2,ENSG00000285629,protein_coding -11112,AC021660.3,ENSG00000285635,protein_coding -3335,AL358472.6,ENSG00000285641,protein_coding -30031,AL133410.3,ENSG00000285645,protein_coding -28230,AC009879.2,ENSG00000285655,protein_coding -27145,AC084121.7,ENSG00000285657,protein_coding -27131,AC134684.8,ENSG00000285687,protein_coding -10874,AC097634.4,ENSG00000285708,protein_coding -14849,AC098588.1,ENSG00000285713,protein_coding -27144,AC084121.8,ENSG00000285720,protein_coding -53531,AL034430.2,ENSG00000285723,protein_coding -21507,AL031315.1,ENSG00000285733,protein_coding -10590,AC097636.2,ENSG00000285749,protein_coding -58796,AC253536.7,ENSG00000285762,protein_coding -27132,AC134684.9,ENSG00000285765,protein_coding -23166,AC000120.2,ENSG00000285772,protein_coding -3957,AL139142.2,ENSG00000285777,protein_coding -3329,AL358472.7,ENSG00000285779,protein_coding -28231,AC009879.3,ENSG00000285791,protein_coding -27140,AC084121.9,ENSG00000285814,protein_coding -60289,WRB-SH3BGR,ENSG00000285815,protein_coding -33533,AP000944.2,ENSG00000285816,protein_coding -34749,AP001267.5,ENSG00000285827,protein_coding -1636,AL445685.3,ENSG00000285839,protein_coding -38090,AC010197.2,ENSG00000285854,protein_coding -18002,AC008676.3,ENSG00000285868,protein_coding -27782,AC144573.1,ENSG00000285880,protein_coding -18365,AC113348.1,ENSG00000285891,protein_coding -51228,AC004687.2,ENSG00000285897,protein_coding -37586,AC008012.1,ENSG00000285901,protein_coding -27141,AC084121.11,ENSG00000285913,protein_coding -44885,AC087721.2,ENSG00000285920,protein_coding -36881,AL133353.2,ENSG00000285932,protein_coding -27143,AC084121.12,ENSG00000285937,protein_coding -12620,AC072022.2,ENSG00000285938,protein_coding -44931,AC018362.3,ENSG00000285942,protein_coding -11315,AC112128.1,ENSG00000285943,protein_coding -3259,AL162596.1,ENSG00000285946,protein_coding -51420,AC127029.3,ENSG00000285947,protein_coding -27147,AC084121.13,ENSG00000285950,protein_coding -23167,AC000120.3,ENSG00000285953,protein_coding -27129,AC134684.11,ENSG00000285975,protein_coding -20035,AL135905.2,ENSG00000285976,protein_coding -18366,AC113348.2,ENSG00000285978,protein_coding -28855,AC012213.5,ENSG00000285982,protein_coding -21170,AL355312.6,ENSG00000285991,protein_coding -15511,AC021087.5,ENSG00000286001,protein_coding -49329,AC113189.9,ENSG00000286007,protein_coding -22952,AC211486.6,ENSG00000286014,protein_coding -5456,CR589904.2,ENSG00000286015,protein_coding -3054,AC239811.1,ENSG00000286019,protein_coding -53363,AL121899.2,ENSG00000286022,protein_coding -22946,AC211486.7,ENSG00000286038,protein_coding -8654,ASDURF,ENSG00000286053,protein_coding -58818,GGT1,ENSG00000286070,protein_coding -21693,AC009412.1,ENSG00000286075,protein_coding -30398,AL627230.3,ENSG00000286079,protein_coding -37282,AC073585.2,ENSG00000286088,protein_coding -15531,AC026740.3,ENSG00000286094,protein_coding -9213,SPEGNB,ENSG00000286095,protein_coding -55404,AC008770.4,ENSG00000286098,protein_coding -58567,AP000552.3,ENSG00000286102,protein_coding -19601,AL353579.1,ENSG00000286105,protein_coding -2839,AC253572.1,ENSG00000286106,protein_coding -31453,AL441992.2,ENSG00000286112,protein_coding -27704,AC083977.1,ENSG00000286131,protein_coding -55412,AC022415.2,ENSG00000286132,protein_coding -37278,AL603764.2,ENSG00000286135,protein_coding -22949,AC211486.8,ENSG00000286137,protein_coding -48408,DERPC,ENSG00000286140,protein_coding -9212,AC053503.7,ENSG00000286143,protein_coding -8655,AC012488.2,ENSG00000286165,protein_coding -15514,AHRR,ENSG00000286169,protein_coding -58410,AC023490.4,ENSG00000286175,protein_coding -3074,AC242842.3,ENSG00000286185,protein_coding -49240,AC055839.2,ENSG00000286190,protein_coding -21604,AC069288.1,ENSG00000286192,protein_coding -3073,AC242843.1,ENSG00000286219,protein_coding -48702,AC009070.1,ENSG00000286221,protein_coding -60512,AP000471.1,ENSG00000286224,protein_coding -22876,SPDYE17,ENSG00000286228,protein_coding -4790,AL445423.2,ENSG00000286231,protein_coding -53475,AL035461.3,ENSG00000286235,protein_coding -26015,ARMCX5-GPRASP2,ENSG00000286237,protein_coding -9277,AC093884.1,ENSG00000286239,protein_coding -57309,AC022137.3,ENSG00000286261,protein_coding -33455,AP001453.5,ENSG00000286264,protein_coding -58039,AC007244.1,ENSG00000286265,protein_coding -25215,AF196969.1,ENSG00000286268,protein_coding -18931,HIST1H3B,ENSG00000286522,protein_coding -5528,AC108488.2,ENSG00000286905,protein_coding -19475,AL662820.1,ENSG00000286920,protein_coding -18935,HIST1H3C,ENSG00000287080,protein_coding -19710,AL096814.2,ENSG00000287363,protein_coding -14188,AC098582.1,ENSG00000287542,protein_coding -26744,AC231656.1,ENSG00000287585,protein_coding -48629,AC106741.1,ENSG00000287694,protein_coding -33749,AP003071.5,ENSG00000287725,protein_coding -5071,AL445524.2,ENSG00000287856,protein_coding -39017,AC025165.6,ENSG00000287908,protein_coding -54195,AL031681.2,ENSG00000288000,protein_coding -25248,AC231657.3,ENSG00000288053,protein_coding diff --git a/sfaira/versions/genome_versions/human/__init__.py b/sfaira/versions/genome_versions/human/__init__.py deleted file mode 100644 index 90be3cbe7..000000000 --- a/sfaira/versions/genome_versions/human/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .genome_sizes import GENOME_SIZE_DICT -from .genome_container import GenomeContainer diff --git a/sfaira/versions/genome_versions/human/genome_container.py b/sfaira/versions/genome_versions/human/genome_container.py deleted file mode 100644 index 80e2bd8a3..000000000 --- a/sfaira/versions/genome_versions/human/genome_container.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import pandas - -from .genome_sizes import GENOME_SIZE_DICT - - -class GenomeContainer: - available_genomes = ["Homo_sapiens_GRCh38_97"] - - def __init__(self): - self.genomes = { - "Homo_sapiens_GRCh38_97": "Homo_sapiens_GRCh38_97.csv" - } - self.genome_sizes = { - "Homo_sapiens_GRCh38_97": GENOME_SIZE_DICT["Homo_sapiens_GRCh38_97"] - } - - def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/human/genome_sizes.py b/sfaira/versions/genome_versions/human/genome_sizes.py deleted file mode 100644 index 5d898aeaa..000000000 --- a/sfaira/versions/genome_versions/human/genome_sizes.py +++ /dev/null @@ -1,3 +0,0 @@ -GENOME_SIZE_DICT = { - "Homo_sapiens_GRCh38_97": (19986, ) -} diff --git a/sfaira/versions/genome_versions/mouse/Mus_musculus_GRCm38_97.csv b/sfaira/versions/genome_versions/mouse/Mus_musculus_GRCm38_97.csv deleted file mode 100644 index 9d74abdef..000000000 --- a/sfaira/versions/genome_versions/mouse/Mus_musculus_GRCm38_97.csv +++ /dev/null @@ -1,21901 +0,0 @@ -,name,ensg,type -11915,Gnai3,ENSMUSG00000000001,protein_coding -8485,Pbsn,ENSMUSG00000000003,protein_coding -47507,Cdc45,ENSMUSG00000000028,protein_coding -9749,Scml2,ENSMUSG00000000037,protein_coding -39914,Apoh,ENSMUSG00000000049,protein_coding -40286,Narf,ENSMUSG00000000056,protein_coding -19056,Cav2,ENSMUSG00000000058,protein_coding -40367,Klf6,ENSMUSG00000000078,protein_coding -14547,Scmh1,ENSMUSG00000000085,protein_coding -35704,Cox5a,ENSMUSG00000000088,protein_coding -39152,Tbx2,ENSMUSG00000000093,protein_coding -39154,Tbx4,ENSMUSG00000000094,protein_coding -51115,Zfy2,ENSMUSG00000000103,protein_coding -39346,Ngfr,ENSMUSG00000000120,protein_coding -39797,Wnt3,ENSMUSG00000000125,protein_coding -38283,Wnt9a,ENSMUSG00000000126,protein_coding -50617,Fer,ENSMUSG00000000127,protein_coding -26110,Xpo6,ENSMUSG00000000131,protein_coding -7359,Tfe3,ENSMUSG00000000134,protein_coding -39922,Axin2,ENSMUSG00000000142,protein_coding -18586,Brat1,ENSMUSG00000000148,protein_coding -18588,Gna12,ENSMUSG00000000149,protein_coding -26686,Slc22a18,ENSMUSG00000000154,protein_coding -48729,Itgb2l,ENSMUSG00000000157,protein_coding -48727,Igsf5,ENSMUSG00000000159,protein_coding -35552,Pih1d2,ENSMUSG00000000167,protein_coding -35553,Dlat,ENSMUSG00000000168,protein_coding -35549,Sdhd,ENSMUSG00000000171,protein_coding -21437,Fgf23,ENSMUSG00000000182,protein_coding -21436,Fgf6,ENSMUSG00000000183,protein_coding -21441,Ccnd2,ENSMUSG00000000184,protein_coding -4180,Gpr107,ENSMUSG00000000194,protein_coding -34426,Nalcn,ENSMUSG00000000197,protein_coding -39992,Btbd17,ENSMUSG00000000202,protein_coding -39074,Slfn4,ENSMUSG00000000204,protein_coding -26662,Th,ENSMUSG00000000214,protein_coding -26658,Ins2,ENSMUSG00000000215,protein_coding -26038,Scnn1g,ENSMUSG00000000216,protein_coding -9223,Drp2,ENSMUSG00000000223,protein_coding -26665,Tspan32,ENSMUSG00000000244,protein_coding -4406,Lhx2,ENSMUSG00000000247,protein_coding -21523,Clec2g,ENSMUSG00000000248,protein_coding -41327,Gmpr,ENSMUSG00000000253,protein_coding -38178,Glra1,ENSMUSG00000000263,protein_coding -9393,Mid2,ENSMUSG00000000266,protein_coding -39249,Trim25,ENSMUSG00000000275,protein_coding -39250,Dgke,ENSMUSG00000000276,protein_coding -39244,Scpep1,ENSMUSG00000000278,protein_coding -38815,Mnt,ENSMUSG00000000282,protein_coding -28000,Itgb2,ENSMUSG00000000290,protein_coding -27210,Hddc2,ENSMUSG00000000295,protein_coding -27213,Tpd52l1,ENSMUSG00000000296,protein_coding -38314,Pemt,ENSMUSG00000000301,protein_coding -31422,Cdh1,ENSMUSG00000000303,protein_coding -7158,Cdh4,ENSMUSG00000000305,protein_coding -5905,Ckmt1,ENSMUSG00000000308,protein_coding -38620,Bcl6b,ENSMUSG00000000317,protein_coding -38617,Clec10a,ENSMUSG00000000318,protein_coding -38627,Alox12,ENSMUSG00000000320,protein_coding -47491,Arvcf,ENSMUSG00000000325,protein_coding -47494,Comt,ENSMUSG00000000326,protein_coding -12036,Rtca,ENSMUSG00000000339,protein_coding -12038,Dbt,ENSMUSG00000000340,protein_coding -46978,Dazap2,ENSMUSG00000000346,protein_coding -7845,Mcts1,ENSMUSG00000000355,protein_coding -6493,Rem1,ENSMUSG00000000359,protein_coding -33539,Rnf17,ENSMUSG00000000365,protein_coding -28048,Trappc10,ENSMUSG00000000374,protein_coding -37434,Ccm2,ENSMUSG00000000378,protein_coding -37440,Wap,ENSMUSG00000000381,protein_coding -37437,Tbrg4,ENSMUSG00000000384,protein_coding -48744,Tmprss2,ENSMUSG00000000385,protein_coding -4654,Fap,ENSMUSG00000000392,protein_coding -4653,Gcg,ENSMUSG00000000394,protein_coding -21427,Ndufa9,ENSMUSG00000000399,protein_coding -9826,Egfl6,ENSMUSG00000000402,protein_coding -14786,Lck,ENSMUSG00000000409,protein_coding -14781,Tssk3,ENSMUSG00000000411,protein_coding -19085,Cttnbp2,ENSMUSG00000000416,protein_coding -52935,Galnt1,ENSMUSG00000000420,protein_coding -28802,Myf5,ENSMUSG00000000435,protein_coding -21101,Mkrn2,ENSMUSG00000000439,protein_coding -21096,Pparg,ENSMUSG00000000440,protein_coding -21103,Raf1,ENSMUSG00000000441,protein_coding -26196,Sept1,ENSMUSG00000000486,protein_coding -46433,Pdgfb,ENSMUSG00000000489,protein_coding -46992,Acvrl1,ENSMUSG00000000530,protein_coding -46998,Grasp,ENSMUSG00000000531,protein_coding -46994,Acvr1b,ENSMUSG00000000532,protein_coding -38321,Tom1l2,ENSMUSG00000000538,protein_coding -2806,Gpa33,ENSMUSG00000000544,protein_coding -47125,Zfp385a,ENSMUSG00000000552,protein_coding -47126,Itga5,ENSMUSG00000000555,protein_coding -16875,Gabra2,ENSMUSG00000000560,protein_coding -11837,Wdr77,ENSMUSG00000000561,protein_coding -11833,Adora3,ENSMUSG00000000562,protein_coding -11836,Atp5f1,ENSMUSG00000000563,protein_coding -39964,Sox9,ENSMUSG00000000567,protein_coding -17389,Hnrnpd,ENSMUSG00000000568,protein_coding -48819,Dynlt1c,ENSMUSG00000000579,protein_coding -37532,C1d,ENSMUSG00000000581,protein_coding -38162,Gm2a,ENSMUSG00000000594,protein_coding -15662,Krit1,ENSMUSG00000000600,protein_coding -22252,Clcn4,ENSMUSG00000000605,protein_coding -33107,Vmn2r88,ENSMUSG00000000606,protein_coding -38028,Grm6,ENSMUSG00000000617,protein_coding -20465,Sema4f,ENSMUSG00000000627,protein_coding -20458,Hk2,ENSMUSG00000000628,protein_coding -38913,Myo18a,ENSMUSG00000000631,protein_coding -38917,Sez6,ENSMUSG00000000632,protein_coding -50913,Haao,ENSMUSG00000000673,protein_coding -14944,Cd52,ENSMUSG00000000682,protein_coding -38903,Abhd15,ENSMUSG00000000686,protein_coding -39385,Hoxb6,ENSMUSG00000000690,protein_coding -20471,Loxl3,ENSMUSG00000000693,protein_coding -44659,Tcl1b5,ENSMUSG00000000701,protein_coding -40831,Btn1a1,ENSMUSG00000000706,protein_coding -50445,Kat2b,ENSMUSG00000000708,protein_coding -29255,Rab5b,ENSMUSG00000000711,protein_coding -38911,Cryba1,ENSMUSG00000000724,protein_coding -28043,Dnmt3l,ENSMUSG00000000730,protein_coding -28042,Aire,ENSMUSG00000000731,protein_coding -28044,Icosl,ENSMUSG00000000732,protein_coding -31733,Spg7,ENSMUSG00000000738,protein_coding -31739,Sult5a1,ENSMUSG00000000739,protein_coding -31734,Rpl13,ENSMUSG00000000740,protein_coding -31742,Chmp1a,ENSMUSG00000000743,protein_coding -38835,Rpa1,ENSMUSG00000000751,protein_coding -38838,Serpinf1,ENSMUSG00000000753,protein_coding -29589,Tubgcp3,ENSMUSG00000000759,protein_coding -26782,Oprm1,ENSMUSG00000000766,protein_coding -33864,Polr3d,ENSMUSG00000000776,protein_coding -38084,Tcf7,ENSMUSG00000000782,protein_coding -7504,Ddx3x,ENSMUSG00000000787,protein_coding -30627,Il12rb1,ENSMUSG00000000791,protein_coding -30632,Slc5a5,ENSMUSG00000000792,protein_coding -11197,Kcnn3,ENSMUSG00000000794,protein_coding -39134,Usp32,ENSMUSG00000000804,protein_coding -39133,Car4,ENSMUSG00000000805,protein_coding -20722,Txnrd3,ENSMUSG00000000811,protein_coding -2689,Fasl,ENSMUSG00000000817,protein_coding -7234,Zfp512b,ENSMUSG00000000823,protein_coding -7231,Dnajc5,ENSMUSG00000000826,protein_coding -7229,Tpd52l2,ENSMUSG00000000827,protein_coding -8236,Fmr1,ENSMUSG00000000838,protein_coding -37655,Bcl11a,ENSMUSG00000000861,protein_coding -38113,Il4,ENSMUSG00000000869,protein_coding -6556,Pxmp4,ENSMUSG00000000876,protein_coding -8757,Dlg3,ENSMUSG00000000881,protein_coding -47498,Gnb1l,ENSMUSG00000000884,protein_coding -4062,Dbh,ENSMUSG00000000889,protein_coding -27951,Mmp11,ENSMUSG00000000901,protein_coding -27950,Smarcb1,ENSMUSG00000000902,protein_coding -27954,Vpreb3,ENSMUSG00000000903,protein_coding -18111,Hip1r,ENSMUSG00000000915,protein_coding -18361,Nsun5,ENSMUSG00000000916,protein_coding -46200,Top1mt,ENSMUSG00000000934,protein_coding -19775,Hoxa10,ENSMUSG00000000938,protein_coding -19769,Hoxa4,ENSMUSG00000000942,protein_coding -24497,Gm38393,ENSMUSG00000000948,protein_coding -33404,Mmp14,ENSMUSG00000000957,protein_coding -33402,Slc7a7,ENSMUSG00000000958,protein_coding -33401,Oxa1l,ENSMUSG00000000959,protein_coding -39107,Heatr6,ENSMUSG00000000976,protein_coding -39097,Ccl3,ENSMUSG00000000982,protein_coding -39105,Wfdc18,ENSMUSG00000000983,protein_coding -34763,Icam4,ENSMUSG00000001014,protein_coding -11239,Ilf2,ENSMUSG00000001016,protein_coding -11241,Chtop,ENSMUSG00000001017,protein_coding -11240,Snapin,ENSMUSG00000001018,protein_coding -11251,S100a4,ENSMUSG00000001020,protein_coding -11249,S100a3,ENSMUSG00000001021,protein_coding -11252,S100a5,ENSMUSG00000001023,protein_coding -11253,S100a6,ENSMUSG00000001025,protein_coding -39863,Scn4a,ENSMUSG00000001027,protein_coding -39865,Icam2,ENSMUSG00000001029,protein_coding -38381,Mapk7,ENSMUSG00000001034,protein_coding -38384,Epn2,ENSMUSG00000001036,protein_coding -38383,B9d1,ENSMUSG00000001039,protein_coding -12289,Sec24b,ENSMUSG00000001052,protein_coding -38059,N4bp3,ENSMUSG00000001053,protein_coding -38058,Rmnd5b,ENSMUSG00000001054,protein_coding -38057,Nhp2,ENSMUSG00000001056,protein_coding -31749,Vps9d1,ENSMUSG00000001062,protein_coding -31750,Zfp276,ENSMUSG00000001065,protein_coding -46901,C1ql4,ENSMUSG00000001076,protein_coding -16301,Mfsd10,ENSMUSG00000001082,protein_coding -15032,Luzp1,ENSMUSG00000001089,protein_coding -38950,Slc13a2,ENSMUSG00000001095,protein_coding -17784,Kctd10,ENSMUSG00000001098,protein_coding -38957,Poldip2,ENSMUSG00000001100,protein_coding -38955,Sebox,ENSMUSG00000001103,protein_coding -38959,Ift20,ENSMUSG00000001105,protein_coding -27984,Col6a1,ENSMUSG00000001119,protein_coding -27986,Pcbp3,ENSMUSG00000001120,protein_coding -38972,Lgals9,ENSMUSG00000001123,protein_coding -7581,Araf,ENSMUSG00000001127,protein_coding -7589,Cfp,ENSMUSG00000001128,protein_coding -7587,Timp1,ENSMUSG00000001131,protein_coding -7591,Uxt,ENSMUSG00000001134,protein_coding -515,Cnnm3,ENSMUSG00000001138,protein_coding -511,Lman2l,ENSMUSG00000001143,protein_coding -27976,Mcm3ap,ENSMUSG00000001150,protein_coding -27971,Pcnt,ENSMUSG00000001151,protein_coding -27981,Ftcd,ENSMUSG00000001155,protein_coding -20620,Mxd1,ENSMUSG00000001156,protein_coding -20624,Gmcl1,ENSMUSG00000001157,protein_coding -20622,Snrnp27,ENSMUSG00000001158,protein_coding -17975,Oas1c,ENSMUSG00000001166,protein_coding -17979,Oas1h,ENSMUSG00000001168,protein_coding -7925,Ocrl,ENSMUSG00000001173,protein_coding -44510,Calm1,ENSMUSG00000001175,protein_coding -28051,Agpat3,ENSMUSG00000001211,protein_coding -43341,Slc26a3,ENSMUSG00000001225,protein_coding -50507,Sema6b,ENSMUSG00000001227,protein_coding -50519,Uhrf1,ENSMUSG00000001228,protein_coding -50511,Dpp9,ENSMUSG00000001229,protein_coding -39664,Ramp2,ENSMUSG00000001240,protein_coding -23418,Lsr,ENSMUSG00000001247,protein_coding -23428,Gramd1a,ENSMUSG00000001248,protein_coding -23426,Hpn,ENSMUSG00000001249,protein_coding -16874,Gabrg1,ENSMUSG00000001260,protein_coding -44902,Ckb,ENSMUSG00000001270,protein_coding -47074,Sp1,ENSMUSG00000001280,protein_coding -47061,Itgb7,ENSMUSG00000001281,protein_coding -47069,Myg1,ENSMUSG00000001285,protein_coding -47062,Rarg,ENSMUSG00000001288,protein_coding -47068,Pfdn5,ENSMUSG00000001289,protein_coding -29488,Efnb2,ENSMUSG00000001300,protein_coding -3287,Rrp15,ENSMUSG00000001305,protein_coding -39690,Rnd2,ENSMUSG00000001313,protein_coding -38821,Srr,ENSMUSG00000001323,protein_coding -14772,Sync,ENSMUSG00000001333,protein_coding -14766,Fndc5,ENSMUSG00000001334,protein_coding -34830,Acp5,ENSMUSG00000001348,protein_coding -34826,Cnn1,ENSMUSG00000001349,protein_coding -36165,Fbxo9,ENSMUSG00000001366,protein_coding -18901,Vps50,ENSMUSG00000001376,protein_coding -53140,Hars,ENSMUSG00000001380,protein_coding -53142,Zmat2,ENSMUSG00000001383,protein_coding -6839,Ube2c,ENSMUSG00000001403,protein_coding -11111,Smg5,ENSMUSG00000001415,protein_coding -11108,Cct3,ENSMUSG00000001416,protein_coding -11109,Glmp,ENSMUSG00000001418,protein_coding -11096,Mef2d,ENSMUSG00000001419,protein_coding -11110,Tmem79,ENSMUSG00000001420,protein_coding -19185,Snd1,ENSMUSG00000001424,protein_coding -27992,Col18a1,ENSMUSG00000001435,protein_coding -27991,Slc19a1,ENSMUSG00000001436,protein_coding -39427,Kpnb1,ENSMUSG00000001440,protein_coding -39429,Npepps,ENSMUSG00000001441,protein_coding -39423,Tbx21,ENSMUSG00000001444,protein_coding -39418,Mrpl10,ENSMUSG00000001445,protein_coding -15669,Cyp51,ENSMUSG00000001467,protein_coding -31753,Tcf25,ENSMUSG00000001472,protein_coding -53723,Tubb6,ENSMUSG00000001473,protein_coding -31757,Def8,ENSMUSG00000001482,protein_coding -39706,Meox1,ENSMUSG00000001493,protein_coding -39710,Sost,ENSMUSG00000001494,protein_coding -43678,Nkx2-1,ENSMUSG00000001496,protein_coding -43687,Pax9,ENSMUSG00000001497,protein_coding -42066,Irx2,ENSMUSG00000001504,protein_coding -39322,Col1a1,ENSMUSG00000001506,protein_coding -39330,Itga3,ENSMUSG00000001507,protein_coding -39324,Sgca,ENSMUSG00000001508,protein_coding -39331,Dlx3,ENSMUSG00000001510,protein_coding -21485,Foxm1,ENSMUSG00000001517,protein_coding -21491,Itfg2,ENSMUSG00000001518,protein_coding -21490,Nrip2,ENSMUSG00000001520,protein_coding -21480,Tulp3,ENSMUSG00000001521,protein_coding -49935,Gtf2h4,ENSMUSG00000001524,protein_coding -49944,Tubb5,ENSMUSG00000001525,protein_coding -42134,Ell2,ENSMUSG00000001542,protein_coding -39620,Jup,ENSMUSG00000001552,protein_coding -39623,Fkbp10,ENSMUSG00000001555,protein_coding -39625,Klhl10,ENSMUSG00000001558,protein_coding -16112,Mnx1,ENSMUSG00000001566,protein_coding -16111,Nom1,ENSMUSG00000001569,protein_coding -49434,Ergic1,ENSMUSG00000001576,protein_coding -38591,Tnk1,ENSMUSG00000001583,protein_coding -38594,Acap1,ENSMUSG00000001588,protein_coding -15025,Tcea3,ENSMUSG00000001604,protein_coding -17139,Csn3,ENSMUSG00000001622,protein_coding -43475,Ifrd1,ENSMUSG00000001627,protein_coding -21943,Stk38l,ENSMUSG00000001630,protein_coding -21034,Brpf1,ENSMUSG00000001632,protein_coding -19303,Akr1b3,ENSMUSG00000001642,protein_coding -47095,Hoxc13,ENSMUSG00000001655,protein_coding -47102,Hoxc11,ENSMUSG00000001656,protein_coding -47107,Hoxc8,ENSMUSG00000001657,protein_coding -47108,Hoxc6,ENSMUSG00000001661,protein_coding -27944,Gstt1,ENSMUSG00000001663,protein_coding -27943,Gstt3,ENSMUSG00000001665,protein_coding -27941,Ddt,ENSMUSG00000001666,protein_coding -31482,Tat,ENSMUSG00000001670,protein_coding -31480,Marveld3,ENSMUSG00000001672,protein_coding -1979,Ddx18,ENSMUSG00000001674,protein_coding -18786,Ubl3,ENSMUSG00000001687,protein_coding -53493,Gramd3,ENSMUSG00000001700,protein_coding -41194,Eef1e1,ENSMUSG00000001707,protein_coding -44935,Akt1,ENSMUSG00000001729,protein_coding -18415,Cldn15,ENSMUSG00000001739,protein_coding -24969,Il16,ENSMUSG00000001741,protein_coding -54088,Tcirg1,ENSMUSG00000001750,protein_coding -39651,Naglu,ENSMUSG00000001751,protein_coding -39653,Coasy,ENSMUSG00000001755,protein_coding -19218,Smo,ENSMUSG00000001761,protein_coding -19217,Tspan33,ENSMUSG00000001763,protein_coding -6323,Crnkl1,ENSMUSG00000001767,protein_coding -6318,Rin2,ENSMUSG00000001768,protein_coding -25070,Folh1,ENSMUSG00000001773,protein_coding -34651,Chordc1,ENSMUSG00000001774,protein_coding -28388,Rtcb,ENSMUSG00000001783,protein_coding -28383,Pwp1,ENSMUSG00000001785,protein_coding -28392,Fbxo7,ENSMUSG00000001786,protein_coding -23342,Capns1,ENSMUSG00000001794,protein_coding -23598,Lrp3,ENSMUSG00000001802,protein_coding -52892,Dsg4,ENSMUSG00000001804,protein_coding -4844,Evx2,ENSMUSG00000001815,protein_coding -4845,Hoxd13,ENSMUSG00000001819,protein_coding -4846,Hoxd12,ENSMUSG00000001823,protein_coding -25368,Folr1,ENSMUSG00000001827,protein_coding -25361,Clpb,ENSMUSG00000001829,protein_coding -34886,Sept7,ENSMUSG00000001833,protein_coding -18633,Zdhhc4,ENSMUSG00000001844,protein_coding -18645,Rac1,ENSMUSG00000001847,protein_coding -4201,Nup214,ENSMUSG00000001855,protein_coding -4199,Aif1l,ENSMUSG00000001864,protein_coding -10078,Cpa3,ENSMUSG00000001865,protein_coding -50806,Ltbp1,ENSMUSG00000001870,protein_coding -37595,Ugp2,ENSMUSG00000001891,protein_coding -39843,Kcnh6,ENSMUSG00000001901,protein_coding -30938,Trmt1,ENSMUSG00000001909,protein_coding -30937,Nacc1,ENSMUSG00000001910,protein_coding -30939,Nfix,ENSMUSG00000001911,protein_coding -22684,Slc1a5,ENSMUSG00000001918,protein_coding -7578,Uba1,ENSMUSG00000001924,protein_coding -21412,Vwf,ENSMUSG00000001930,protein_coding -35097,Siae,ENSMUSG00000001942,protein_coding -35095,Vsig2,ENSMUSG00000001943,protein_coding -35094,Esam,ENSMUSG00000001946,protein_coding -35098,Spa17,ENSMUSG00000001948,protein_coding -8380,Fam50a,ENSMUSG00000001962,protein_coding -8370,Emd,ENSMUSG00000001964,protein_coding -39847,Taco1,ENSMUSG00000001983,protein_coding -14680,Grik3,ENSMUSG00000001985,protein_coding -7874,Gria3,ENSMUSG00000001986,protein_coding -22668,Npas1,ENSMUSG00000001988,protein_coding -31862,Sipa1l2,ENSMUSG00000001995,protein_coding -6002,Ap4e1,ENSMUSG00000001998,protein_coding -6004,Blvra,ENSMUSG00000001999,protein_coding -8350,Pdzd4,ENSMUSG00000002006,protein_coding -8347,Srpk3,ENSMUSG00000002007,protein_coding -8348,Idh3g,ENSMUSG00000002010,protein_coding -8341,Pnck,ENSMUSG00000002012,protein_coding -8349,Ssr4,ENSMUSG00000002014,protein_coding -8343,Bcap31,ENSMUSG00000002015,protein_coding -50809,Fam98a,ENSMUSG00000002017,protein_coding -44194,Ltbp2,ENSMUSG00000002020,protein_coding -35420,Kmt2a,ENSMUSG00000002028,protein_coding -35416,Ift46,ENSMUSG00000002031,protein_coding -35417,Tmem25,ENSMUSG00000002032,protein_coding -35428,Cd3g,ENSMUSG00000002033,protein_coding -22815,Trappc6a,ENSMUSG00000002043,protein_coding -38938,Supt6,ENSMUSG00000002052,protein_coding -38944,Spag5,ENSMUSG00000002055,protein_coding -38948,Foxn1,ENSMUSG00000002057,protein_coding -38947,Unc119,ENSMUSG00000002058,protein_coding -38936,Rab34,ENSMUSG00000002059,protein_coding -38939,Sdf2,ENSMUSG00000002064,protein_coding -23648,Ccne1,ENSMUSG00000002068,protein_coding -49666,Hsf2bp,ENSMUSG00000002076,protein_coding -22661,Bbc3,ENSMUSG00000002083,protein_coding -5385,Mybpc3,ENSMUSG00000002100,protein_coding -5382,Psmc3,ENSMUSG00000002102,protein_coding -5390,Acp2,ENSMUSG00000002103,protein_coding -5380,Rapsn,ENSMUSG00000002104,protein_coding -5383,Slc39a13,ENSMUSG00000002105,protein_coding -3523,Celf2,ENSMUSG00000002107,protein_coding -5387,Nr1h3,ENSMUSG00000002108,protein_coding -5391,Ddb2,ENSMUSG00000002109,protein_coding -5384,Spi1,ENSMUSG00000002111,protein_coding -37352,Sf3a1,ENSMUSG00000002129,protein_coding -29173,Stat6,ENSMUSG00000002147,protein_coding -30872,Clgn,ENSMUSG00000002190,protein_coding -23930,Napsa,ENSMUSG00000002204,protein_coding -23939,Vrk3,ENSMUSG00000002205,protein_coding -23053,Smg9,ENSMUSG00000002210,protein_coding -16086,Paxip1,ENSMUSG00000002221,protein_coding -20305,Rmnd5a,ENSMUSG00000002222,protein_coding -11810,Mov10,ENSMUSG00000002227,protein_coding -11808,Ppm1j,ENSMUSG00000002228,protein_coding -11809,Rhoc,ENSMUSG00000002233,protein_coding -17133,Prr27,ENSMUSG00000002240,protein_coding -49520,Tead3,ENSMUSG00000002249,protein_coding -49515,Ppard,ENSMUSG00000002250,protein_coding -49513,Def6,ENSMUSG00000002257,protein_coding -22226,Peg3,ENSMUSG00000002265,protein_coding -22225,Zim1,ENSMUSG00000002266,protein_coding -49382,Metrn,ENSMUSG00000002274,protein_coding -49364,Lmf1,ENSMUSG00000002279,protein_coding -49376,Narfl,ENSMUSG00000002280,protein_coding -49743,Angptl4,ENSMUSG00000002289,protein_coding -15728,Dbf4,ENSMUSG00000002297,protein_coding -49755,Daxx,ENSMUSG00000002307,protein_coding -49748,Cd320,ENSMUSG00000002308,protein_coding -33482,Ipo4,ENSMUSG00000002319,protein_coding -33484,Tm9sf1,ENSMUSG00000002320,protein_coding -33481,Rec8,ENSMUSG00000002324,protein_coding -33480,Irf9,ENSMUSG00000002325,protein_coding -33490,Gmpr2,ENSMUSG00000002326,protein_coding -33488,Mdp1,ENSMUSG00000002329,protein_coding -33496,Dhrs1,ENSMUSG00000002332,protein_coding -30579,Ncan,ENSMUSG00000002341,protein_coding -30586,Tmem161a,ENSMUSG00000002342,protein_coding -30588,Armc6,ENSMUSG00000002343,protein_coding -30584,Borcs8,ENSMUSG00000002345,protein_coding -30587,Slc25a42,ENSMUSG00000002346,protein_coding -48796,Snx9,ENSMUSG00000002365,protein_coding -50531,Ranbp3,ENSMUSG00000002372,protein_coding -50533,Ndufa11,ENSMUSG00000002379,protein_coding -14608,Bmp8b,ENSMUSG00000002384,protein_coding -30647,Nr2f6,ENSMUSG00000002393,protein_coding -30645,Use1,ENSMUSG00000002395,protein_coding -30646,Ocel1,ENSMUSG00000002396,protein_coding -23231,Dyrk1b,ENSMUSG00000002409,protein_coding -19399,Braf,ENSMUSG00000002413,protein_coding -19398,Ndufb2,ENSMUSG00000002416,protein_coding -10076,Hltf,ENSMUSG00000002428,protein_coding -7236,Prpf6,ENSMUSG00000002455,protein_coding -7242,Rgs19,ENSMUSG00000002458,protein_coding -38,Rgs20,ENSMUSG00000002459,protein_coding -52785,Abhd3,ENSMUSG00000002475,protein_coding -52784,Snrpd1,ENSMUSG00000002477,protein_coding -17793,Tchp,ENSMUSG00000002486,protein_coding -48560,Tiam1,ENSMUSG00000002489,protein_coding -49300,Tsc2,ENSMUSG00000002496,protein_coding -49315,Rpl3l,ENSMUSG00000002500,protein_coding -49302,Slc9a3r2,ENSMUSG00000002504,protein_coding -46224,Puf60,ENSMUSG00000002524,protein_coding -4217,Golga2,ENSMUSG00000002546,protein_coding -4214,Uck1,ENSMUSG00000002550,protein_coding -43472,Scin,ENSMUSG00000002565,protein_coding -29251,Ikzf4,ENSMUSG00000002578,protein_coding -39489,Mien1,ENSMUSG00000002580,protein_coding -18936,Pon1,ENSMUSG00000002588,protein_coding -23131,Axl,ENSMUSG00000002602,protein_coding -23128,Tgfb1,ENSMUSG00000002603,protein_coding -23129,Ccdc97,ENSMUSG00000002608,protein_coding -49205,Zfp40,ENSMUSG00000002617,protein_coding -49679,Akap8l,ENSMUSG00000002625,protein_coding -16100,Shh,ENSMUSG00000002633,protein_coding -23563,Pdcd2l,ENSMUSG00000002635,protein_coding -50547,Gtf2f1,ENSMUSG00000002658,protein_coding -50544,Clpp,ENSMUSG00000002660,protein_coding -50545,Alkbh7,ENSMUSG00000002661,protein_coding -50546,Pspn,ENSMUSG00000002664,protein_coding -50554,Dennd1c,ENSMUSG00000002668,protein_coding -44116,Med6,ENSMUSG00000002679,protein_coding -43566,Prkd1,ENSMUSG00000002688,protein_coding -37799,Lcp2,ENSMUSG00000002699,protein_coding -6895,Cse1l,ENSMUSG00000002718,protein_coding -6322,Naa20,ENSMUSG00000002728,protein_coding -4897,Prkra,ENSMUSG00000002731,protein_coding -4899,Fkbp7,ENSMUSG00000002732,protein_coding -4900,Plekha3,ENSMUSG00000002733,protein_coding -37408,Ykt6,ENSMUSG00000002741,protein_coding -18355,Baz1b,ENSMUSG00000002748,protein_coding -50315,Pex6,ENSMUSG00000002763,protein_coding -50309,Mrpl2,ENSMUSG00000002767,protein_coding -50313,Mea1,ENSMUSG00000002768,protein_coding -50316,Gnmt,ENSMUSG00000002769,protein_coding -24049,Grin2d,ENSMUSG00000002771,protein_coding -24050,Kdelr1,ENSMUSG00000002778,protein_coding -24053,Tmem143,ENSMUSG00000002781,protein_coding -19839,Ggct,ENSMUSG00000002797,protein_coding -44949,Jag2,ENSMUSG00000002799,protein_coding -44955,Btbd6,ENSMUSG00000002803,protein_coding -44951,Nudt14,ENSMUSG00000002804,protein_coding -40631,Epdr1,ENSMUSG00000002808,protein_coding -38351,Flii,ENSMUSG00000002812,protein_coding -38355,Top3a,ENSMUSG00000002814,protein_coding -34781,Atg4d,ENSMUSG00000002820,protein_coding -34790,Qtrt1,ENSMUSG00000002825,protein_coding -50504,Plin4,ENSMUSG00000002831,protein_coding -50503,Hdgfl2,ENSMUSG00000002833,protein_coding -50501,Chaf1a,ENSMUSG00000002835,protein_coding -47975,Adprh,ENSMUSG00000002844,protein_coding -47981,Tmem39a,ENSMUSG00000002845,protein_coding -47978,Timmdc1,ENSMUSG00000002846,protein_coding -47974,Pla1a,ENSMUSG00000002847,protein_coding -20706,Mcm2,ENSMUSG00000002870,protein_coding -20707,Tpra1,ENSMUSG00000002871,protein_coding -780,Nab1,ENSMUSG00000002881,protein_coding -30888,Adgre5,ENSMUSG00000002885,protein_coding -21209,Il17ra,ENSMUSG00000002897,protein_coding -43339,Lamb1,ENSMUSG00000002900,protein_coding -30629,Kcnn1,ENSMUSG00000002908,protein_coding -30628,Arrdc2,ENSMUSG00000002910,protein_coding -19855,Ppp1r17,ENSMUSG00000002930,protein_coding -15892,Cd36,ENSMUSG00000002944,protein_coding -29422,Map2k7,ENSMUSG00000002948,protein_coding -29427,Timm44,ENSMUSG00000002949,protein_coding -26600,Ap2a2,ENSMUSG00000002957,protein_coding -23947,Pnkp,ENSMUSG00000002963,protein_coding -23949,Med25,ENSMUSG00000002968,protein_coding -22832,Bcam,ENSMUSG00000002980,protein_coding -22823,Clptm1,ENSMUSG00000002981,protein_coding -22822,Relb,ENSMUSG00000002983,protein_coding -22829,Tomm40,ENSMUSG00000002984,protein_coding -22828,Apoe,ENSMUSG00000002985,protein_coding -22824,Apoc2,ENSMUSG00000002992,protein_coding -43356,Hbp1,ENSMUSG00000002996,protein_coding -43357,Prkar2b,ENSMUSG00000002997,protein_coding -21742,Cdkn1b,ENSMUSG00000003031,protein_coding -13537,Klf4,ENSMUSG00000003032,protein_coding -30704,Ap1m1,ENSMUSG00000003033,protein_coding -30699,Rab8a,ENSMUSG00000003037,protein_coding -14938,Hmgn2,ENSMUSG00000003038,protein_coding -30703,Fam32a,ENSMUSG00000003039,protein_coding -2238,Elf3,ENSMUSG00000003051,protein_coding -55033,Cyp2c29,ENSMUSG00000003053,protein_coding -40626,Stard3nl,ENSMUSG00000003062,protein_coding -28150,Stk11,ENSMUSG00000003068,protein_coding -28156,Efna2,ENSMUSG00000003070,protein_coding -28152,Atp5d,ENSMUSG00000003072,protein_coding -22699,Ppp5c,ENSMUSG00000003099,protein_coding -39475,Cdk12,ENSMUSG00000003119,protein_coding -23111,Lipe,ENSMUSG00000003123,protein_coding -35471,Pafah1b2,ENSMUSG00000003131,protein_coding -588,Tbc1d8,ENSMUSG00000003134,protein_coding -592,Cnot11,ENSMUSG00000003135,protein_coding -21278,Slc2a3,ENSMUSG00000003153,protein_coding -21282,Foxj2,ENSMUSG00000003154,protein_coding -15721,Sri,ENSMUSG00000003161,protein_coding -47465,Dgcr2,ENSMUSG00000003166,protein_coding -23959,Irf3,ENSMUSG00000003184,protein_coding -23958,Bcl2l12,ENSMUSG00000003190,protein_coding -50489,Zfp959,ENSMUSG00000003198,protein_coding -50498,Mpnd,ENSMUSG00000003199,protein_coding -50500,Sh3gl1,ENSMUSG00000003200,protein_coding -50492,Ebi3,ENSMUSG00000003206,protein_coding -50494,Yju2,ENSMUSG00000003208,protein_coding -27650,Ranbp2,ENSMUSG00000003226,protein_coding -27653,Edar,ENSMUSG00000003227,protein_coding -55436,Grk5,ENSMUSG00000003228,protein_coding -47567,Dvl3,ENSMUSG00000003233,protein_coding -47570,Abcf3,ENSMUSG00000003234,protein_coding -47565,Eif2b5,ENSMUSG00000003235,protein_coding -24044,Cyth2,ENSMUSG00000003269,protein_coding -24041,Sult2b1,ENSMUSG00000003271,protein_coding -24035,Car11,ENSMUSG00000003273,protein_coding -50708,Dlgap1,ENSMUSG00000003279,protein_coding -12785,Plag1,ENSMUSG00000003282,protein_coding -6513,Hck,ENSMUSG00000003283,protein_coding -34760,Mrpl4,ENSMUSG00000003299,protein_coding -34778,Keap1,ENSMUSG00000003308,protein_coding -34784,Ap1m2,ENSMUSG00000003309,protein_coding -31519,Glg1,ENSMUSG00000003316,protein_coding -28181,Atp8b3,ENSMUSG00000003341,protein_coding -28192,Btbd2,ENSMUSG00000003344,protein_coding -28191,Csnk1g2,ENSMUSG00000003345,protein_coding -28185,Abhd17a,ENSMUSG00000003346,protein_coding -28195,Mob3a,ENSMUSG00000003348,protein_coding -46870,Cacnb3,ENSMUSG00000003352,protein_coding -46874,Ccdc65,ENSMUSG00000003354,protein_coding -46875,Fkbp11,ENSMUSG00000003355,protein_coding -46871,Ddx23,ENSMUSG00000003360,protein_coding -23205,Pld3,ENSMUSG00000003363,protein_coding -23092,Grik5,ENSMUSG00000003378,protein_coding -23084,Cd79a,ENSMUSG00000003379,protein_coding -23088,Rabac1,ENSMUSG00000003380,protein_coding -11071,Etv3,ENSMUSG00000003382,protein_coding -34819,Prkcsh,ENSMUSG00000003402,protein_coding -34820,Elavl3,ENSMUSG00000003410,protein_coding -14268,Rab3b,ENSMUSG00000003411,protein_coding -3737,St8sia6,ENSMUSG00000003418,protein_coding -23968,Fcgrt,ENSMUSG00000003420,protein_coding -23965,Nosip,ENSMUSG00000003421,protein_coding -23982,Pih1d1,ENSMUSG00000003423,protein_coding -23971,Rps11,ENSMUSG00000003429,protein_coding -23239,Supt5,ENSMUSG00000003435,protein_coding -23237,Dll3,ENSMUSG00000003436,protein_coding -23247,Paf1,ENSMUSG00000003437,protein_coding -23238,Timm50,ENSMUSG00000003438,protein_coding -23246,Med29,ENSMUSG00000003444,protein_coding -22011,Bicd1,ENSMUSG00000003452,protein_coding -2949,Ncstn,ENSMUSG00000003458,protein_coding -2953,Pex19,ENSMUSG00000003464,protein_coding -33866,Phyhip,ENSMUSG00000003469,protein_coding -19841,Crhr2,ENSMUSG00000003476,protein_coding -19843,Inmt,ENSMUSG00000003477,protein_coding -30685,Cyp4f18,ENSMUSG00000003484,protein_coding -19197,Impdh1,ENSMUSG00000003500,protein_coding -22731,Psg18,ENSMUSG00000003505,protein_coding -39712,Dusp3,ENSMUSG00000003518,protein_coding -47475,Prodh,ENSMUSG00000003526,protein_coding -47468,Ess2,ENSMUSG00000003527,protein_coding -47471,Slc25a1,ENSMUSG00000003528,protein_coding -47474,Dgcr6,ENSMUSG00000003531,protein_coding -49936,Ddr1,ENSMUSG00000003534,protein_coding -49942,Ier3,ENSMUSG00000003541,protein_coding -22798,Fosb,ENSMUSG00000003545,protein_coding -50308,Klc4,ENSMUSG00000003546,protein_coding -22801,Ercc1,ENSMUSG00000003549,protein_coding -55227,Cyp17a1,ENSMUSG00000003555,protein_coding -55230,As3mt,ENSMUSG00000003559,protein_coding -30590,Homer3,ENSMUSG00000003573,protein_coding -30597,Crtc1,ENSMUSG00000003575,protein_coding -37349,Rnf215,ENSMUSG00000003581,protein_coding -37346,Sec14l2,ENSMUSG00000003585,protein_coding -5727,Aven,ENSMUSG00000003604,protein_coding -10074,Cp,ENSMUSG00000003617,protein_coding -15740,Crot,ENSMUSG00000003623,protein_coding -14934,Rps6ka1,ENSMUSG00000003644,protein_coding -31489,Calb2,ENSMUSG00000003657,protein_coding -6008,Snrnp200,ENSMUSG00000003660,protein_coding -6009,Ciao1,ENSMUSG00000003662,protein_coding -49046,Has1,ENSMUSG00000003665,protein_coding -54386,Taf6l,ENSMUSG00000003680,protein_coding -1971,Insig2,ENSMUSG00000003721,protein_coding -14795,Kpna6,ENSMUSG00000003731,protein_coding -27606,Man1a,ENSMUSG00000003746,protein_coding -23187,Itpkc,ENSMUSG00000003752,protein_coding -23188,Coq8b,ENSMUSG00000003762,protein_coding -53058,Brd8,ENSMUSG00000003778,protein_coding -53059,Kif20a,ENSMUSG00000003779,protein_coding -30947,Farsa,ENSMUSG00000003808,protein_coding -30950,Gcdh,ENSMUSG00000003809,protein_coding -14389,Mast2,ENSMUSG00000003810,protein_coding -30952,Dnase2a,ENSMUSG00000003812,protein_coding -30944,Rad23a,ENSMUSG00000003813,protein_coding -30945,Calr,ENSMUSG00000003814,protein_coding -30949,Syce2,ENSMUSG00000003824,protein_coding -31443,Nfat5,ENSMUSG00000003847,protein_coding -31445,Nob1,ENSMUSG00000003848,protein_coding -31444,Nqo1,ENSMUSG00000003849,protein_coding -23997,Ppfia3,ENSMUSG00000003863,protein_coding -24008,Gys1,ENSMUSG00000003865,protein_coding -24007,Ruvbl2,ENSMUSG00000003868,protein_coding -23999,Lin7b,ENSMUSG00000003872,protein_coding -24012,Bax,ENSMUSG00000003873,protein_coding -45388,Il7r,ENSMUSG00000003882,protein_coding -27880,Tfam,ENSMUSG00000003923,protein_coding -49729,Zfp81,ENSMUSG00000003929,protein_coding -38555,Efnb3,ENSMUSG00000003934,protein_coding -39265,Mmd,ENSMUSG00000003948,protein_coding -39266,Hlf,ENSMUSG00000003949,protein_coding -47890,Fam162a,ENSMUSG00000003955,protein_coding -46286,Rpl8,ENSMUSG00000003970,protein_coding -15754,Grm3,ENSMUSG00000003974,protein_coding -42301,Ssbp2,ENSMUSG00000003992,protein_coding -37674,Fancl,ENSMUSG00000004018,protein_coding -2604,Brinp2,ENSMUSG00000004031,protein_coding -11896,Gstm5,ENSMUSG00000004032,protein_coding -11899,Gstm7,ENSMUSG00000004035,protein_coding -11902,Gstm3,ENSMUSG00000004038,protein_coding -39647,Stat3,ENSMUSG00000004040,protein_coding -39646,Stat5a,ENSMUSG00000004043,protein_coding -39648,Cavin1,ENSMUSG00000004044,protein_coding -54224,Map3k11,ENSMUSG00000004054,protein_coding -23207,Akt2,ENSMUSG00000004056,protein_coding -47202,Dnaja3,ENSMUSG00000004069,protein_coding -47204,Hmox2,ENSMUSG00000004070,protein_coding -47206,Cdip1,ENSMUSG00000004071,protein_coding -4795,Map3k20,ENSMUSG00000004085,protein_coding -34582,Cwc15,ENSMUSG00000004096,protein_coding -34747,Col5a3,ENSMUSG00000004098,protein_coding -34757,Dnmt1,ENSMUSG00000004099,protein_coding -34752,Ppan,ENSMUSG00000004100,protein_coding -4267,Angptl2,ENSMUSG00000004105,protein_coding -2535,Cacna1e,ENSMUSG00000004110,protein_coding -3911,Cacna1b,ENSMUSG00000004113,protein_coding -12442,Trmt10a,ENSMUSG00000004127,protein_coding -43453,Etv1,ENSMUSG00000004151,protein_coding -46271,Kifc2,ENSMUSG00000004187,protein_coding -27684,Psap,ENSMUSG00000004207,protein_coding -8387,Ikbkg,ENSMUSG00000004221,protein_coding -55164,Pax2,ENSMUSG00000004231,protein_coding -11650,Wars2,ENSMUSG00000004233,protein_coding -21359,Atn1,ENSMUSG00000004263,protein_coding -21348,Phb2,ENSMUSG00000004264,protein_coding -21354,Ptpn6,ENSMUSG00000004266,protein_coding -21360,Eno2,ENSMUSG00000004267,protein_coding -21347,Emg1,ENSMUSG00000004268,protein_coding -21346,Lpcat3,ENSMUSG00000004270,protein_coding -19209,Atp6v1f,ENSMUSG00000004285,protein_coding -37881,Il12b,ENSMUSG00000004296,protein_coding -7327,Clcn5,ENSMUSG00000004317,protein_coding -30424,Clcn3,ENSMUSG00000004319,protein_coding -22700,Hif3a,ENSMUSG00000004328,protein_coding -40658,Gpx6,ENSMUSG00000004341,protein_coding -40656,Gpx5,ENSMUSG00000004344,protein_coding -19856,Pde1c,ENSMUSG00000004347,protein_coding -28475,Utp20,ENSMUSG00000004356,protein_coding -28469,Spic,ENSMUSG00000004359,protein_coding -27170,9330159F19Rik,ENSMUSG00000004360,protein_coding -1324,Cul3,ENSMUSG00000004364,protein_coding -47684,Sst,ENSMUSG00000004366,protein_coding -22130,Il11,ENSMUSG00000004371,protein_coding -30731,Large1,ENSMUSG00000004383,protein_coding -37416,Ddx56,ENSMUSG00000004393,protein_coding -37417,Tmed4,ENSMUSG00000004394,protein_coding -18409,Col26a1,ENSMUSG00000004415,protein_coding -21222,Bid,ENSMUSG00000004446,protein_coding -1933,Ralb,ENSMUSG00000004451,protein_coding -18024,Ppp1cc,ENSMUSG00000004455,protein_coding -47646,Dnajb11,ENSMUSG00000004460,protein_coding -47645,Tbccd1,ENSMUSG00000004462,protein_coding -23909,Clec11a,ENSMUSG00000004473,protein_coding -22500,Zfp324,ENSMUSG00000004500,protein_coding -25217,Gab2,ENSMUSG00000004508,protein_coding -17766,Coro1c,ENSMUSG00000004530,protein_coding -19803,Tax1bp1,ENSMUSG00000004535,protein_coding -22763,Psg17,ENSMUSG00000004540,protein_coding -22761,Psg19,ENSMUSG00000004542,protein_coding -2120,Ctse,ENSMUSG00000004552,protein_coding -33133,Ndrg2,ENSMUSG00000004558,protein_coding -33131,Mettl17,ENSMUSG00000004561,protein_coding -33137,Arhgef40,ENSMUSG00000004562,protein_coding -29380,Pnpla6,ENSMUSG00000004565,protein_coding -29379,Mcoln1,ENSMUSG00000004567,protein_coding -29374,Arhgef18,ENSMUSG00000004568,protein_coding -12506,Pkn2,ENSMUSG00000004591,protein_coding -23827,Cd33,ENSMUSG00000004609,protein_coding -23818,Etfb,ENSMUSG00000004610,protein_coding -23816,Nkg7,ENSMUSG00000004612,protein_coding -29388,Stxbp2,ENSMUSG00000004626,protein_coding -29387,Pcp2,ENSMUSG00000004630,protein_coding -18926,Sgce,ENSMUSG00000004631,protein_coding -19821,Chn2,ENSMUSG00000004633,protein_coding -31562,Wwox,ENSMUSG00000004637,protein_coding -16258,Slbp,ENSMUSG00000004642,protein_coding -25079,Tyr,ENSMUSG00000004651,protein_coding -19848,Ghrhr,ENSMUSG00000004654,protein_coding -19847,Aqp1,ENSMUSG00000004655,protein_coding -35718,Arid3b,ENSMUSG00000004661,protein_coding -28144,Cnn2,ENSMUSG00000004665,protein_coding -28147,Polr2e,ENSMUSG00000004667,protein_coding -37470,Abca13,ENSMUSG00000004668,protein_coding -30644,Myo9b,ENSMUSG00000004677,protein_coding -43394,Hdac9,ENSMUSG00000004698,protein_coding -2929,Ly9,ENSMUSG00000004707,protein_coding -2928,Cd244a,ENSMUSG00000004709,protein_coding -50568,Adgre1,ENSMUSG00000004730,protein_coding -37345,Mtfp1,ENSMUSG00000004748,protein_coding -440,Rab23,ENSMUSG00000004768,protein_coding -35863,Rab11a,ENSMUSG00000004771,protein_coding -44209,Eif2b2,ENSMUSG00000004788,protein_coding -44205,Dlst,ENSMUSG00000004789,protein_coding -44207,Pgf,ENSMUSG00000004791,protein_coding -38390,Ulk2,ENSMUSG00000004798,protein_coding -18367,Ccl24,ENSMUSG00000004814,protein_coding -17600,Dgkq,ENSMUSG00000004815,protein_coding -17606,Tmed11,ENSMUSG00000004821,protein_coding -38386,Grap,ENSMUSG00000004837,protein_coding -48298,Pou1f1,ENSMUSG00000004842,protein_coding -48299,Chmp2b,ENSMUSG00000004843,protein_coding -18417,Plod3,ENSMUSG00000004846,protein_coding -18422,Ap1s1,ENSMUSG00000004849,protein_coding -49541,Mapk13,ENSMUSG00000004864,protein_coding -49537,Srpk1,ENSMUSG00000004865,protein_coding -1287,Pax3,ENSMUSG00000004872,protein_coding -3189,Lbr,ENSMUSG00000004880,protein_coding -11086,Crabp2,ENSMUSG00000004885,protein_coding -11088,Nes,ENSMUSG00000004891,protein_coding -11089,Bcan,ENSMUSG00000004892,protein_coding -11090,Hapln2,ENSMUSG00000004894,protein_coding -11081,Prcc,ENSMUSG00000004895,protein_coding -11084,Rrnad1,ENSMUSG00000004896,protein_coding -11082,Hdgf,ENSMUSG00000004897,protein_coding -21218,Slc25a18,ENSMUSG00000004902,protein_coding -28219,Thop1,ENSMUSG00000004929,protein_coding -28236,Apba3,ENSMUSG00000004931,protein_coding -28234,Matk,ENSMUSG00000004933,protein_coding -28225,Pias4,ENSMUSG00000004934,protein_coding -35855,Map2k1,ENSMUSG00000004936,protein_coding -28218,Sgta,ENSMUSG00000004937,protein_coding -28231,Nmrk2,ENSMUSG00000004939,protein_coding -48787,Tmem242,ENSMUSG00000004945,protein_coding -18388,Dtx2,ENSMUSG00000004947,protein_coding -18387,Zp3,ENSMUSG00000004948,protein_coding -18383,Hspb1,ENSMUSG00000004951,protein_coding -18391,Rasa4,ENSMUSG00000004952,protein_coding -22113,Syt5,ENSMUSG00000004961,protein_coding -19745,Hnrnpa2b1,ENSMUSG00000004980,protein_coding -21170,Fxyd4,ENSMUSG00000004988,protein_coding -30929,Ccdc130,ENSMUSG00000004994,protein_coding -30928,Mri1,ENSMUSG00000004996,protein_coding -12593,Prkacb,ENSMUSG00000005034,protein_coding -40183,Sgsh,ENSMUSG00000005043,protein_coding -15512,Chd5,ENSMUSG00000005045,protein_coding -28060,Cstb,ENSMUSG00000005054,protein_coding -18402,Sh2b2,ENSMUSG00000005057,protein_coding -21335,Pex5,ENSMUSG00000005069,protein_coding -43926,Jkamp,ENSMUSG00000005078,protein_coding -5522,Cd44,ENSMUSG00000005087,protein_coding -5520,Slc1a2,ENSMUSG00000005089,protein_coding -5803,Eif2ak4,ENSMUSG00000005102,protein_coding -16384,Wdr1,ENSMUSG00000005103,protein_coding -16382,Slc2a9,ENSMUSG00000005107,protein_coding -46068,Ccn4,ENSMUSG00000005124,protein_coding -46069,Ndrg1,ENSMUSG00000005125,protein_coding -35603,4930550C14Rik,ENSMUSG00000005131,protein_coding -30973,Man2b1,ENSMUSG00000005142,protein_coding -34174,Klf5,ENSMUSG00000005148,protein_coding -30971,Wdr83,ENSMUSG00000005150,protein_coding -30958,Prdx2,ENSMUSG00000005161,protein_coding -38579,Polr2a,ENSMUSG00000005198,protein_coding -38559,Shbg,ENSMUSG00000005202,protein_coding -38573,Senp3,ENSMUSG00000005204,protein_coding -16885,Corin,ENSMUSG00000005220,protein_coding -19831,Plekha8,ENSMUSG00000005225,protein_coding -4731,G6pc2,ENSMUSG00000005232,protein_coding -4730,Spc25,ENSMUSG00000005233,protein_coding -38552,Dnah2,ENSMUSG00000005237,protein_coding -48745,Ripk4,ENSMUSG00000005251,protein_coding -47508,Ufd1,ENSMUSG00000005262,protein_coding -38421,Zfp287,ENSMUSG00000005267,protein_coding -45391,Prlr,ENSMUSG00000005268,protein_coding -16265,Letm1,ENSMUSG00000005299,protein_coding -41599,Ubqln1,ENSMUSG00000005312,protein_coding -41529,Fgfr4,ENSMUSG00000005320,protein_coding -3000,Cadm3,ENSMUSG00000005338,protein_coding -2996,Fcer1a,ENSMUSG00000005339,protein_coding -46328,Txn2,ENSMUSG00000005354,protein_coding -28083,Casp14,ENSMUSG00000005355,protein_coding -28086,Slc1a6,ENSMUSG00000005357,protein_coding -45374,Slc1a3,ENSMUSG00000005360,protein_coding -20961,Crbn,ENSMUSG00000005362,protein_coding -20959,Il5ra,ENSMUSG00000005364,protein_coding -51007,Msh6,ENSMUSG00000005370,protein_coding -51008,Fbxo11,ENSMUSG00000005371,protein_coding -18351,Mlxipl,ENSMUSG00000005373,protein_coding -18353,Tbl2,ENSMUSG00000005374,protein_coding -18347,Bud23,ENSMUSG00000005378,protein_coding -40510,Nid1,ENSMUSG00000005397,protein_coding -30748,Mcm5,ENSMUSG00000005410,protein_coding -30747,Hmox1,ENSMUSG00000005413,protein_coding -38300,Mprip,ENSMUSG00000005417,protein_coding -23104,Cic,ENSMUSG00000005442,protein_coding -23105,Pafah1b3,ENSMUSG00000005447,protein_coding -30905,Il27ra,ENSMUSG00000005465,protein_coding -30898,Prkaca,ENSMUSG00000005469,protein_coding -30897,Asf1b,ENSMUSG00000005470,protein_coding -18408,Myl10,ENSMUSG00000005474,protein_coding -30887,Ddx39,ENSMUSG00000005481,protein_coding -30883,Dnajb1,ENSMUSG00000005483,protein_coding -12678,Msh4,ENSMUSG00000005493,protein_coding -1523,Usp40,ENSMUSG00000005501,protein_coding -19793,Evx1,ENSMUSG00000005503,protein_coding -5375,Kbtbd4,ENSMUSG00000005505,protein_coding -5378,Celf1,ENSMUSG00000005506,protein_coding -5374,Ndufs3,ENSMUSG00000005510,protein_coding -18373,Por,ENSMUSG00000005514,protein_coding -24652,Igf1r,ENSMUSG00000005533,protein_coding -29370,Insr,ENSMUSG00000005534,protein_coding -29396,Fcer2a,ENSMUSG00000005540,protein_coding -23167,Cyp2a5,ENSMUSG00000005547,protein_coding -23395,Atp4a,ENSMUSG00000005553,protein_coding -22503,Trim28,ENSMUSG00000005566,protein_coding -22506,Ube2m,ENSMUSG00000005575,protein_coding -47193,Adcy9,ENSMUSG00000005580,protein_coding -42224,Mef2c,ENSMUSG00000005583,protein_coding -25843,Ctr9,ENSMUSG00000005609,protein_coding -25844,Eif4g2,ENSMUSG00000005610,protein_coding -25841,Mrvi1,ENSMUSG00000005611,protein_coding -47817,Pcyt1a,ENSMUSG00000005615,protein_coding -24896,Zfp592,ENSMUSG00000005621,protein_coding -11450,Psmd4,ENSMUSG00000005625,protein_coding -11455,Tmod4,ENSMUSG00000005628,protein_coding -11079,Insrr,ENSMUSG00000005640,protein_coding -22528,Cabp5,ENSMUSG00000005649,protein_coding -43632,Snx6,ENSMUSG00000005656,protein_coding -20495,Mthfd2,ENSMUSG00000005667,protein_coding -16976,Kit,ENSMUSG00000005672,protein_coding -2898,Tomm40l,ENSMUSG00000005674,protein_coding -2897,Nr1i3,ENSMUSG00000005677,protein_coding -2899,Apoa2,ENSMUSG00000005681,protein_coding -29218,Pan2,ENSMUSG00000005682,protein_coding -29220,Cs,ENSMUSG00000005683,protein_coding -25837,Ampd3,ENSMUSG00000005686,protein_coding -11742,Bcas2,ENSMUSG00000005687,protein_coding -7888,Sh2d1a,ENSMUSG00000005696,protein_coding -31375,Ctcf,ENSMUSG00000005698,protein_coding -31379,Pard6a,ENSMUSG00000005699,protein_coding -31369,Agrp,ENSMUSG00000005705,protein_coding -46336,Pvalb,ENSMUSG00000005716,protein_coding -47197,Tfap4,ENSMUSG00000005718,protein_coding -47482,Ranbp1,ENSMUSG00000005732,protein_coding -2795,Cd247,ENSMUSG00000005763,protein_coding -11442,Rfx5,ENSMUSG00000005774,protein_coding -11439,Psmb4,ENSMUSG00000005779,protein_coding -34496,Mmp8,ENSMUSG00000005800,protein_coding -5954,Slc30a4,ENSMUSG00000005802,protein_coding -5958,Sqor,ENSMUSG00000005803,protein_coding -5957,Bloc1s6,ENSMUSG00000005804,protein_coding -12454,Metap1,ENSMUSG00000005813,protein_coding -50564,Gpr108,ENSMUSG00000005823,protein_coding -50561,Tnfsf14,ENSMUSG00000005824,protein_coding -52796,Gata6,ENSMUSG00000005836,protein_coding -47310,Rsl1d1,ENSMUSG00000005846,protein_coding -8290,Cnga2,ENSMUSG00000005864,protein_coding -53049,Apc,ENSMUSG00000005871,protein_coding -53051,Reep5,ENSMUSG00000005873,protein_coding -6608,Ergic3,ENSMUSG00000005881,protein_coding -6603,Uqcc1,ENSMUSG00000005882,protein_coding -7012,Spo11,ENSMUSG00000005883,protein_coding -157,Ncoa2,ENSMUSG00000005886,protein_coding -40962,Prl4a1,ENSMUSG00000005891,protein_coding -20809,Trh,ENSMUSG00000005892,protein_coding -20805,Nr2c2,ENSMUSG00000005893,protein_coding -28570,Nr2c1,ENSMUSG00000005897,protein_coding -47450,Smpd4,ENSMUSG00000005899,protein_coding -15653,Pex1,ENSMUSG00000005907,protein_coding -37608,Otx1,ENSMUSG00000005917,protein_coding -49553,Kctd20,ENSMUSG00000005936,protein_coding -38735,Itgae,ENSMUSG00000005947,protein_coding -38740,Ctns,ENSMUSG00000005949,protein_coding -38737,P2rx5,ENSMUSG00000005950,protein_coding -38741,Shpk,ENSMUSG00000005951,protein_coding -38743,Trpv1,ENSMUSG00000005952,protein_coding -47605,Ephb3,ENSMUSG00000005958,protein_coding -11429,Tuft1,ENSMUSG00000005968,protein_coding -5590,Rcn1,ENSMUSG00000005973,protein_coding -47186,Dnase1,ENSMUSG00000005980,protein_coding -47187,Trap1,ENSMUSG00000005981,protein_coding -47174,Naa60,ENSMUSG00000005982,protein_coding -47176,1700037C18Rik,ENSMUSG00000005983,protein_coding -54133,Ankrd13d,ENSMUSG00000005986,protein_coding -13818,Tyrp1,ENSMUSG00000005994,protein_coding -2447,Tpr,ENSMUSG00000006005,protein_coding -2443,Pdc,ENSMUSG00000006007,protein_coding -2446,Odr4,ENSMUSG00000006010,protein_coding -2448,Prg4,ENSMUSG00000006014,protein_coding -22654,Dhx34,ENSMUSG00000006019,protein_coding -22651,Kptn,ENSMUSG00000006021,protein_coding -22650,Napa,ENSMUSG00000006024,protein_coding -53130,Sra1,ENSMUSG00000006050,protein_coding -39372,Calcoco2,ENSMUSG00000006056,protein_coding -39370,Atp5g1,ENSMUSG00000006057,protein_coding -39368,Snf8,ENSMUSG00000006058,protein_coding -23345,Tbcb,ENSMUSG00000006095,protein_coding -38855,Inpp5k,ENSMUSG00000006127,protein_coding -47441,Crkl,ENSMUSG00000006134,protein_coding -18390,Upk3bl,ENSMUSG00000006143,protein_coding -22107,Eps8l1,ENSMUSG00000006154,protein_coding -37901,Clint1,ENSMUSG00000006169,protein_coding -40720,Prss16,ENSMUSG00000006179,protein_coding -40978,Cdkal1,ENSMUSG00000006191,protein_coding -7820,Rhox6,ENSMUSG00000006200,protein_coding -26338,5430419D17Rik,ENSMUSG00000006204,protein_coding -26334,Htra1,ENSMUSG00000006205,protein_coding -15180,Zbtb17,ENSMUSG00000006215,protein_coding -15177,Clcnkb,ENSMUSG00000006216,protein_coding -15174,Fam131c,ENSMUSG00000006218,protein_coding -15183,Fblim1,ENSMUSG00000006219,protein_coding -15178,Hspb7,ENSMUSG00000006221,protein_coding -34816,Epor,ENSMUSG00000006235,protein_coding -34813,Ccdc159,ENSMUSG00000006241,protein_coding -17166,Mob1b,ENSMUSG00000006262,protein_coding -20518,Atp6v1b1,ENSMUSG00000006269,protein_coding -55399,Vax1,ENSMUSG00000006270,protein_coding -30543,Atp6v1b2,ENSMUSG00000006273,protein_coding -30708,Eps15l1,ENSMUSG00000006276,protein_coding -33069,Tep1,ENSMUSG00000006281,protein_coding -33063,Ttc5,ENSMUSG00000006288,protein_coding -33072,Osgep,ENSMUSG00000006289,protein_coding -1197,Aamp,ENSMUSG00000006299,protein_coding -1201,Tmbim1,ENSMUSG00000006301,protein_coding -1195,Arpc2,ENSMUSG00000006304,protein_coding -23382,Kmt2b,ENSMUSG00000006307,protein_coding -23384,Zbtb32,ENSMUSG00000006310,protein_coding -23387,Etv2,ENSMUSG00000006311,protein_coding -23385,Upk1a,ENSMUSG00000006313,protein_coding -23396,Tmem147,ENSMUSG00000006315,protein_coding -22067,Rps9,ENSMUSG00000006333,protein_coding -22057,Tfpt,ENSMUSG00000006335,protein_coding -27938,Susd2,ENSMUSG00000006342,protein_coding -27934,Ggt5,ENSMUSG00000006344,protein_coding -27933,Ggt1,ENSMUSG00000006345,protein_coding -44961,Crip2,ENSMUSG00000006356,protein_coding -44962,Crip1,ENSMUSG00000006360,protein_coding -31718,Cbfa2t3,ENSMUSG00000006362,protein_coding -46634,Fbln1,ENSMUSG00000006369,protein_coding -7761,Pgrmc1,ENSMUSG00000006373,protein_coding -46380,Gcat,ENSMUSG00000006378,protein_coding -14012,Tek,ENSMUSG00000006386,protein_coding -14476,Mpl,ENSMUSG00000006389,protein_coding -14473,Elovl1,ENSMUSG00000006390,protein_coding -14472,Med8,ENSMUSG00000006392,protein_coding -14474,Cdc20,ENSMUSG00000006398,protein_coding -2902,Adamts4,ENSMUSG00000006403,protein_coding -2916,Nectin4,ENSMUSG00000006411,protein_coding -2912,Pfdn2,ENSMUSG00000006412,protein_coding -6916,Rnf114,ENSMUSG00000006418,protein_coding -7769,C330007P06Rik,ENSMUSG00000006423,protein_coding -55245,Neurl1a,ENSMUSG00000006435,protein_coding -15415,Srm,ENSMUSG00000006442,protein_coding -15170,Epha2,ENSMUSG00000006445,protein_coding -54152,Rbm14,ENSMUSG00000006456,protein_coding -54157,Actn3,ENSMUSG00000006457,protein_coding -54158,Zdhhc24,ENSMUSG00000006463,protein_coding -54159,Bbs1,ENSMUSG00000006464,protein_coding -3940,Slc34a3,ENSMUSG00000006469,protein_coding -3945,Ndor1,ENSMUSG00000006471,protein_coding -3926,Nsmf,ENSMUSG00000006476,protein_coding -40951,Prl7a1,ENSMUSG00000006488,protein_coding -40948,Prl8a9,ENSMUSG00000006490,protein_coding -4788,Pdk1,ENSMUSG00000006494,protein_coding -28126,Ptbp1,ENSMUSG00000006498,protein_coding -31703,Mvd,ENSMUSG00000006517,protein_coding -31702,Cyba,ENSMUSG00000006519,protein_coding -32477,Itih3,ENSMUSG00000006522,protein_coding -32474,Stimate,ENSMUSG00000006526,protein_coding -32471,Sfmbt1,ENSMUSG00000006527,protein_coding -32478,Itih1,ENSMUSG00000006529,protein_coding -1232,Ihh,ENSMUSG00000006538,protein_coding -1220,Prkag3,ENSMUSG00000006542,protein_coding -1228,Cryba2,ENSMUSG00000006546,protein_coding -29833,Atp7b,ENSMUSG00000006567,protein_coding -29822,Defb2,ENSMUSG00000006570,protein_coding -39734,Slc4a1,ENSMUSG00000006574,protein_coding -39736,Rundc3a,ENSMUSG00000006575,protein_coding -1269,Slc4a3,ENSMUSG00000006576,protein_coding -31711,Cdt1,ENSMUSG00000006585,protein_coding -12903,Runx1t1,ENSMUSG00000006586,protein_coding -31705,Snai3,ENSMUSG00000006587,protein_coding -31712,Aprt,ENSMUSG00000006589,protein_coding -24087,Gtf2h1,ENSMUSG00000006599,protein_coding -40862,Hfe,ENSMUSG00000006611,protein_coding -16166,Slc5a6,ENSMUSG00000006641,protein_coding -16165,Tcf23,ENSMUSG00000006642,protein_coding -23367,Nphs1,ENSMUSG00000006649,protein_coding -23365,Aplp1,ENSMUSG00000006651,protein_coding -36863,Qrich1,ENSMUSG00000006673,protein_coding -36874,P4htm,ENSMUSG00000006675,protein_coding -36861,Usp19,ENSMUSG00000006676,protein_coding -8642,Pola1,ENSMUSG00000006678,protein_coding -15055,Cdc42,ENSMUSG00000006699,protein_coding -49652,Pknox1,ENSMUSG00000006705,protein_coding -40907,D130043K22Rik,ENSMUSG00000006711,protein_coding -40902,Gmnn,ENSMUSG00000006715,protein_coding -40905,Acot13,ENSMUSG00000006717,protein_coding -40716,Zfp184,ENSMUSG00000006720,protein_coding -29135,Cyp27b1,ENSMUSG00000006724,protein_coding -29137,Cdk4,ENSMUSG00000006728,protein_coding -29144,B4galnt1,ENSMUSG00000006731,protein_coding -29134,Mettl1,ENSMUSG00000006732,protein_coding -29138,Tspan31,ENSMUSG00000006736,protein_coding -52706,Kif5b,ENSMUSG00000006740,protein_coding -24079,Saal1,ENSMUSG00000006763,protein_coding -28899,Tph2,ENSMUSG00000006764,protein_coding -39537,Krt23,ENSMUSG00000006777,protein_coding -39631,Cnp,ENSMUSG00000006782,protein_coding -39630,Ttc25,ENSMUSG00000006784,protein_coding -6881,Sulf2,ENSMUSG00000006800,protein_coding -48934,Sod2,ENSMUSG00000006818,protein_coding -53135,Tmco6,ENSMUSG00000006850,protein_coding -20505,Stambp,ENSMUSG00000006906,protein_coding -39663,Ezh1,ENSMUSG00000006920,protein_coding -39617,Hap1,ENSMUSG00000006930,protein_coding -39622,P3h4,ENSMUSG00000006931,protein_coding -37169,Ctnnb1,ENSMUSG00000006932,protein_coding -37147,Eif1b,ENSMUSG00000006941,protein_coding -23851,Klk4,ENSMUSG00000006948,protein_coding -47591,Chrd,ENSMUSG00000006958,protein_coding -47583,Psmd2,ENSMUSG00000006998,protein_coding -49305,Syngr3,ENSMUSG00000007021,protein_coding -49867,Vars,ENSMUSG00000007029,protein_coding -49868,Vwa7,ENSMUSG00000007030,protein_coding -49864,Hspa1l,ENSMUSG00000007033,protein_coding -49855,Slc44a4,ENSMUSG00000007034,protein_coding -49870,Msh5,ENSMUSG00000007035,protein_coding -49879,Abhd16a,ENSMUSG00000007036,protein_coding -49856,Neu1,ENSMUSG00000007038,protein_coding -49873,Ddah2,ENSMUSG00000007039,protein_coding -49872,Clic1,ENSMUSG00000007041,protein_coding -49866,Lsm2,ENSMUSG00000007050,protein_coding -17660,Pole,ENSMUSG00000007080,protein_coding -2962,Atp1a2,ENSMUSG00000007097,protein_coding -2959,Atp1a4,ENSMUSG00000007107,protein_coding -2957,Casq1,ENSMUSG00000007122,protein_coding -18346,Stx1a,ENSMUSG00000007207,protein_coding -22682,Ceacam9,ENSMUSG00000007209,protein_coding -19667,Zfp775,ENSMUSG00000007216,protein_coding -25812,Scube2,ENSMUSG00000007279,protein_coding -54257,Mrpl49,ENSMUSG00000007338,protein_coding -11741,Dennd2c,ENSMUSG00000007379,protein_coding -44900,Mark3,ENSMUSG00000007411,protein_coding -15656,Gatad1,ENSMUSG00000007415,protein_coding -53162,Pcdha11,ENSMUSG00000007440,protein_coding -17141,2310003L06Rik,ENSMUSG00000007457,protein_coding -21259,M6pr,ENSMUSG00000007458,protein_coding -4146,Lrrc8a,ENSMUSG00000007476,protein_coding -53743,Mc5r,ENSMUSG00000007480,protein_coding -49118,Ppp2r1a,ENSMUSG00000007564,protein_coding -49516,Fance,ENSMUSG00000007570,protein_coding -33491,Tinf2,ENSMUSG00000007589,protein_coding -33486,Tssk4,ENSMUSG00000007591,protein_coding -30578,Hapln4,ENSMUSG00000007594,protein_coding -50536,Dus3l,ENSMUSG00000007603,protein_coding -30655,Gtpbp3,ENSMUSG00000007610,protein_coding -13420,Tgfbr1,ENSMUSG00000007613,protein_coding -42329,Homer1,ENSMUSG00000007617,protein_coding -39187,Rad51c,ENSMUSG00000007646,protein_coding -37855,Gabrb2,ENSMUSG00000007653,protein_coding -19057,Cav1,ENSMUSG00000007655,protein_coding -36092,Arpp19,ENSMUSG00000007656,protein_coding -6502,Bcl2l1,ENSMUSG00000007659,protein_coding -50549,Khsrp,ENSMUSG00000007670,protein_coding -44388,Dio2,ENSMUSG00000007682,protein_coding -30631,Ccdc124,ENSMUSG00000007721,protein_coding -37629,Cct4,ENSMUSG00000007739,protein_coding -38061,0610009B22Rik,ENSMUSG00000007777,protein_coding -23954,Cpt1c,ENSMUSG00000007783,protein_coding -1620,Twist2,ENSMUSG00000007805,protein_coding -18698,Zfp655,ENSMUSG00000007812,protein_coding -36846,Rhoa,ENSMUSG00000007815,protein_coding -32369,Zmiz1,ENSMUSG00000007817,protein_coding -21185,Ankrd26,ENSMUSG00000007827,protein_coding -23981,Aldh16a1,ENSMUSG00000007833,protein_coding -41595,Hnrnpa0,ENSMUSG00000007836,protein_coding -23964,Prrg2,ENSMUSG00000007837,protein_coding -38018,Hnrnph1,ENSMUSG00000007850,protein_coding -44237,Ift43,ENSMUSG00000007867,protein_coding -15022,Id3,ENSMUSG00000007872,protein_coding -39484,Tcap,ENSMUSG00000007877,protein_coding -14930,Arid1a,ENSMUSG00000007880,protein_coding -30601,Crlf1,ENSMUSG00000007888,protein_coding -26640,Ctsd,ENSMUSG00000007891,protein_coding -35802,Rplp1,ENSMUSG00000007892,protein_coding -17143,Cabs1,ENSMUSG00000007907,protein_coding -36122,Hmgcll1,ENSMUSG00000007908,protein_coding -23209,Ttc9b,ENSMUSG00000007944,protein_coding -25363,Phox2a,ENSMUSG00000007946,protein_coding -30651,Abhd8,ENSMUSG00000007950,protein_coding -18412,Ift22,ENSMUSG00000007987,protein_coding -33741,Fzd3,ENSMUSG00000007989,protein_coding -23912,1700008O03Rik,ENSMUSG00000008028,protein_coding -7459,Mid1ip1,ENSMUSG00000008035,protein_coding -22683,Ap2s1,ENSMUSG00000008036,protein_coding -17603,Fgfrl1,ENSMUSG00000008090,protein_coding -30914,4930432K21Rik,ENSMUSG00000008129,protein_coding -648,Fhl2,ENSMUSG00000008136,protein_coding -23921,Emc10,ENSMUSG00000008140,protein_coding -21336,Clstn3,ENSMUSG00000008153,protein_coding -30967,Fbxw9,ENSMUSG00000008167,protein_coding -23925,Spib,ENSMUSG00000008193,protein_coding -5367,Fnbp4,ENSMUSG00000008200,protein_coding -29436,Cers4,ENSMUSG00000008206,protein_coding -4816,Scrn3,ENSMUSG00000008226,protein_coding -53497,Phax,ENSMUSG00000008301,protein_coding -13752,Tle1,ENSMUSG00000008305,protein_coding -15650,1700109H08Rik,ENSMUSG00000008307,protein_coding -25336,Relt,ENSMUSG00000008318,protein_coding -6270,Snrpb2,ENSMUSG00000008333,protein_coding -18160,Ubc,ENSMUSG00000008348,protein_coding -22058,Prpf31,ENSMUSG00000008373,protein_coding -23201,Sertad1,ENSMUSG00000008384,protein_coding -47252,Carhsp1,ENSMUSG00000008393,protein_coding -28540,Elk3,ENSMUSG00000008398,protein_coding -34883,Herpud2,ENSMUSG00000008429,protein_coding -22104,Rdh13,ENSMUSG00000008435,protein_coding -44115,Adam21,ENSMUSG00000008438,protein_coding -31388,Nutf2,ENSMUSG00000008450,protein_coding -24026,Fut1,ENSMUSG00000008461,protein_coding -2491,Arpc5,ENSMUSG00000008475,protein_coding -49309,Rnf151,ENSMUSG00000008482,protein_coding -13974,Elavl2,ENSMUSG00000008489,protein_coding -23094,Pou2f2,ENSMUSG00000008496,protein_coding -21807,Mgst1,ENSMUSG00000008540,protein_coding -13831,Nfib,ENSMUSG00000008575,protein_coding -35523,Htr3b,ENSMUSG00000008590,protein_coding -11128,Rab25,ENSMUSG00000008601,protein_coding -11130,Ubqln4,ENSMUSG00000008604,protein_coding -47234,Rbfox1,ENSMUSG00000008658,protein_coding -49764,Rps18,ENSMUSG00000008668,protein_coding -8372,Rpl10,ENSMUSG00000008682,protein_coding -25933,Rps15a,ENSMUSG00000008683,protein_coding -46710,Ncaph2,ENSMUSG00000008690,protein_coding -11758,Hipk1,ENSMUSG00000008730,protein_coding -25961,Gprc5b,ENSMUSG00000008734,protein_coding -11674,Man1a2,ENSMUSG00000008763,protein_coding -22716,Ceacam5,ENSMUSG00000008789,protein_coding -33134,Tppp2,ENSMUSG00000008813,protein_coding -44213,Acyp1,ENSMUSG00000008822,protein_coding -18333,Cldn13,ENSMUSG00000008843,protein_coding -21330,Cd163,ENSMUSG00000008845,protein_coding -39727,Hdac5,ENSMUSG00000008855,protein_coding -40586,Rala,ENSMUSG00000008859,protein_coding -31561,Clec3a,ENSMUSG00000008874,protein_coding -29851,Vdac3,ENSMUSG00000008892,protein_coding -14235,Slc1a7,ENSMUSG00000008932,protein_coding -11454,Vps72,ENSMUSG00000008958,protein_coding -48469,Gabpa,ENSMUSG00000008976,protein_coding -7011,Bmp7,ENSMUSG00000008999,protein_coding -17829,Dynll1,ENSMUSG00000009013,protein_coding -4387,Pdcl,ENSMUSG00000009030,protein_coding -46401,Tmem184b,ENSMUSG00000009035,protein_coding -48524,Gm5965,ENSMUSG00000009047,protein_coding -27917,Rsph14,ENSMUSG00000009070,protein_coding -37370,Nf2,ENSMUSG00000009073,protein_coding -37369,Cabp7,ENSMUSG00000009075,protein_coding -37368,Zmat5,ENSMUSG00000009076,protein_coding -37381,Ewsr1,ENSMUSG00000009079,protein_coding -37376,Ap1b1,ENSMUSG00000009090,protein_coding -27949,Derl3,ENSMUSG00000009092,protein_coding -27945,Gstt4,ENSMUSG00000009093,protein_coding -47500,Tbx1,ENSMUSG00000009097,protein_coding -11914,Gnat2,ENSMUSG00000009108,protein_coding -21221,Bcl2l13,ENSMUSG00000009112,protein_coding -27974,2610028H24Rik,ENSMUSG00000009114,protein_coding -27979,Spatc1l,ENSMUSG00000009115,protein_coding -20475,Dqx1,ENSMUSG00000009145,protein_coding -39035,Ccl8,ENSMUSG00000009185,protein_coding -4839,Lnpk,ENSMUSG00000009207,protein_coding -39864,Prr29,ENSMUSG00000009210,protein_coding -4058,Mymk,ENSMUSG00000009214,protein_coding -4060,Fam163b,ENSMUSG00000009216,protein_coding -26668,Trpm5,ENSMUSG00000009246,protein_coding -26663,Ascl2,ENSMUSG00000009248,protein_coding -19662,Rarres2,ENSMUSG00000009281,protein_coding -28001,Pttg1ip,ENSMUSG00000009291,protein_coding -28037,Trpm2,ENSMUSG00000009292,protein_coding -28004,Ube2g2,ENSMUSG00000009293,protein_coding -39213,Mpo,ENSMUSG00000009350,protein_coding -39215,Lpo,ENSMUSG00000009356,protein_coding -19060,Met,ENSMUSG00000009376,protein_coding -54945,Slc16a12,ENSMUSG00000009378,protein_coding -21092,Syn2,ENSMUSG00000009394,protein_coding -7590,Elk1,ENSMUSG00000009406,protein_coding -2246,Nav1,ENSMUSG00000009418,protein_coding -42463,Tnpo1,ENSMUSG00000009470,protein_coding -24070,Myod1,ENSMUSG00000009471,protein_coding -24068,Otog,ENSMUSG00000009487,protein_coding -53742,Rnmt,ENSMUSG00000009535,protein_coding -26670,Kcnq1,ENSMUSG00000009545,protein_coding -5804,Srp14,ENSMUSG00000009549,protein_coding -4248,6330409D20Rik,ENSMUSG00000009551,protein_coding -4244,Cdk9,ENSMUSG00000009555,protein_coding -4250,Tor2a,ENSMUSG00000009563,protein_coding -4242,Fpgs,ENSMUSG00000009566,protein_coding -47331,Mrtfb,ENSMUSG00000009569,protein_coding -47118,Cbx5,ENSMUSG00000009575,protein_coding -17136,Odam,ENSMUSG00000009580,protein_coding -46431,Apobec3,ENSMUSG00000009585,protein_coding -40104,St6galnac1,ENSMUSG00000009588,protein_coding -9224,Taf7l,ENSMUSG00000009596,protein_coding -4063,Sardh,ENSMUSG00000009614,protein_coding -4065,Vav2,ENSMUSG00000009621,protein_coding -30037,Tex15,ENSMUSG00000009628,protein_coding -30038,Ppp2cb,ENSMUSG00000009630,protein_coding -3409,G0s2,ENSMUSG00000009633,protein_coding -14449,Dmap1,ENSMUSG00000009640,protein_coding -27666,Pla2g12b,ENSMUSG00000009646,protein_coding -27668,Mcu,ENSMUSG00000009647,protein_coding -27667,Oit3,ENSMUSG00000009654,protein_coding -8758,Tex11,ENSMUSG00000009670,protein_coding -27921,Bcr,ENSMUSG00000009681,protein_coding -23420,Fxyd5,ENSMUSG00000009687,protein_coding -7362,Kcnd1,ENSMUSG00000009731,protein_coding -46972,Tfcp2,ENSMUSG00000009733,protein_coding -40600,Pou6f2,ENSMUSG00000009734,protein_coding -46973,Pou6f1,ENSMUSG00000009739,protein_coding -37022,Ubp1,ENSMUSG00000009741,protein_coding -2147,Nuak2,ENSMUSG00000009772,protein_coding -36166,Ick,ENSMUSG00000009828,protein_coding -15155,Sdhb,ENSMUSG00000009863,protein_coding -6500,Cox4i2,ENSMUSG00000009876,protein_coding -38287,Snap47,ENSMUSG00000009894,protein_coding -38282,Wnt3a,ENSMUSG00000009900,protein_coding -1822,Kdsr,ENSMUSG00000009905,protein_coding -1823,Vps4b,ENSMUSG00000009907,protein_coding -35388,Rps25,ENSMUSG00000009927,protein_coding -9246,Nxf2,ENSMUSG00000009941,protein_coding -8377,Taz,ENSMUSG00000009995,protein_coding -39991,Kif19a,ENSMUSG00000010021,protein_coding -38373,Aldh3a2,ENSMUSG00000010025,protein_coding -36788,Zmynd10,ENSMUSG00000010044,protein_coding -36785,Tmem115,ENSMUSG00000010045,protein_coding -36793,Hyal2,ENSMUSG00000010047,protein_coding -36798,Ifrd2,ENSMUSG00000010048,protein_coding -36795,Hyal1,ENSMUSG00000010051,protein_coding -36792,Tusc2,ENSMUSG00000010054,protein_coding -36787,Nprl2,ENSMUSG00000010057,protein_coding -36804,Slc38a3,ENSMUSG00000010064,protein_coding -36782,Cacna2d2,ENSMUSG00000010066,protein_coding -36790,Rassf1,ENSMUSG00000010067,protein_coding -39301,Epn3,ENSMUSG00000010080,protein_coding -38379,Rnf112,ENSMUSG00000010086,protein_coding -54369,Slc3a2,ENSMUSG00000010095,protein_coding -54383,Nxf1,ENSMUSG00000010097,protein_coding -54381,Stx5a,ENSMUSG00000010110,protein_coding -38376,Slc47a1,ENSMUSG00000010122,protein_coding -11840,Pifo,ENSMUSG00000010136,protein_coding -38368,Tnfrsf13b,ENSMUSG00000010142,protein_coding -31752,Spire2,ENSMUSG00000010154,protein_coding -3330,Prox1,ENSMUSG00000010175,protein_coding -34771,Raver1,ENSMUSG00000010205,protein_coding -38940,2610507B11Rik,ENSMUSG00000010277,protein_coding -647,AI597479,ENSMUSG00000010290,protein_coding -24103,Tmem86a,ENSMUSG00000010307,protein_coding -2191,Optc,ENSMUSG00000010311,protein_coding -39188,Tex14,ENSMUSG00000010342,protein_coding -39687,Ifi35,ENSMUSG00000010358,protein_coding -39698,Rdm1,ENSMUSG00000010362,protein_coding -33489,Nedd8,ENSMUSG00000010376,protein_coding -38881,Gosr1,ENSMUSG00000010392,protein_coding -33403,Mrpl52,ENSMUSG00000010406,protein_coding -29588,Spaca7,ENSMUSG00000010435,protein_coding -508,Kansl3,ENSMUSG00000010453,protein_coding -27061,Eya4,ENSMUSG00000010461,protein_coding -26443,Ebf3,ENSMUSG00000010476,protein_coding -7246,Myt1,ENSMUSG00000010505,protein_coding -14288,Faf1,ENSMUSG00000010517,protein_coding -44895,Gm266,ENSMUSG00000010529,protein_coding -11106,Tsacc,ENSMUSG00000010538,protein_coding -38814,Mettl16,ENSMUSG00000010554,protein_coding -50407,Dazl,ENSMUSG00000010592,protein_coding -46302,Apol7a,ENSMUSG00000010601,protein_coding -34831,Pigyl,ENSMUSG00000010607,protein_coding -44146,Rbm25,ENSMUSG00000010608,protein_coding -3136,Psen2,ENSMUSG00000010609,protein_coding -37101,Acaa1b,ENSMUSG00000010651,protein_coding -37099,Plcd1,ENSMUSG00000010660,protein_coding -54444,Fads1,ENSMUSG00000010663,protein_coding -16107,Lmbr1,ENSMUSG00000010721,protein_coding -26696,Tnfrsf22,ENSMUSG00000010751,protein_coding -26690,Cars,ENSMUSG00000010755,protein_coding -26687,Phlda2,ENSMUSG00000010760,protein_coding -19078,Asz1,ENSMUSG00000010796,protein_coding -19077,Wnt2,ENSMUSG00000010797,protein_coding -37853,Gabra1,ENSMUSG00000010803,protein_coding -18636,Grid2ip,ENSMUSG00000010825,protein_coding -46409,Kdelr3,ENSMUSG00000010830,protein_coding -39713,Cfap97d1,ENSMUSG00000010841,protein_coding -5525,Apip,ENSMUSG00000010911,protein_coding -5524,Pdhx,ENSMUSG00000010914,protein_coding -31497,Vac14,ENSMUSG00000010936,protein_coding -12575,Mcoln2,ENSMUSG00000011008,protein_coding -16245,Slc5a1,ENSMUSG00000011034,protein_coding -23945,Akt1s1,ENSMUSG00000011096,protein_coding -35099,Tbrg1,ENSMUSG00000011114,protein_coding -35100,Panx3,ENSMUSG00000011118,protein_coding -44933,Adssl1,ENSMUSG00000011148,protein_coding -24973,Cfap161,ENSMUSG00000011154,protein_coding -44953,Brf1,ENSMUSG00000011158,protein_coding -45239,Vipr2,ENSMUSG00000011171,protein_coding -43047,Odc1,ENSMUSG00000011179,protein_coding -37903,Thg1l,ENSMUSG00000011254,protein_coding -37907,Adam19,ENSMUSG00000011256,protein_coding -14616,Pabpc4,ENSMUSG00000011257,protein_coding -22812,Exoc3l2,ENSMUSG00000011263,protein_coding -22819,Zfp296,ENSMUSG00000011267,protein_coding -50505,Plin5,ENSMUSG00000011305,protein_coding -30576,Sugp1,ENSMUSG00000011306,protein_coding -23082,Dmrtc2,ENSMUSG00000011349,protein_coding -24014,Dhdh,ENSMUSG00000011382,protein_coding -23317,Zfp790,ENSMUSG00000011427,protein_coding -10080,Cpb1,ENSMUSG00000011463,protein_coding -50550,Slc25a41,ENSMUSG00000011486,protein_coding -50496,Fsd1,ENSMUSG00000011589,protein_coding -23061,Pinlyp,ENSMUSG00000011632,protein_coding -23951,Fuz,ENSMUSG00000011658,protein_coding -23196,Sptbn4,ENSMUSG00000011751,protein_coding -55081,Pgam1,ENSMUSG00000011752,protein_coding -17565,Evi5,ENSMUSG00000011831,protein_coding -29419,Evi5l,ENSMUSG00000011832,protein_coding -29425,Snapc2,ENSMUSG00000011837,protein_coding -38901,Git1,ENSMUSG00000011877,protein_coding -17792,Gltp,ENSMUSG00000011884,protein_coding -35966,Bnip2,ENSMUSG00000011958,protein_coding -46865,Ccnt1,ENSMUSG00000011960,protein_coding -47460,Scarf2,ENSMUSG00000012017,protein_coding -12443,4930579F01Rik,ENSMUSG00000012042,protein_coding -43664,Brms1l,ENSMUSG00000012076,protein_coding -47452,Med15,ENSMUSG00000012114,protein_coding -14939,Dhdds,ENSMUSG00000012117,protein_coding -14943,Crybg2,ENSMUSG00000012123,protein_coding -14945,Ubxn11,ENSMUSG00000012126,protein_coding -1296,Mogat1,ENSMUSG00000012187,protein_coding -44959,Tex22,ENSMUSG00000012211,protein_coding -53055,Wnt8a,ENSMUSG00000012282,protein_coding -50290,Tjap1,ENSMUSG00000012296,protein_coding -5530,Ehf,ENSMUSG00000012350,protein_coding -21277,Nanog,ENSMUSG00000012396,protein_coding -32229,Rpl15,ENSMUSG00000012405,protein_coding -42281,Tmem167,ENSMUSG00000012422,protein_coding -15719,Steap4,ENSMUSG00000012428,protein_coding -40578,Mplkip,ENSMUSG00000012429,protein_coding -54991,Kif11,ENSMUSG00000012443,protein_coding -18984,Rpa3,ENSMUSG00000012483,protein_coding -31522,Mlkl,ENSMUSG00000012519,protein_coding -16818,Phox2b,ENSMUSG00000012520,protein_coding -19215,Tnpo3,ENSMUSG00000012535,protein_coding -44232,Ttll5,ENSMUSG00000012609,protein_coding -23809,Zfp715,ENSMUSG00000012640,protein_coding -29389,Retn,ENSMUSG00000012705,protein_coding -23904,Acp4,ENSMUSG00000012777,protein_coding -27685,Cdh23,ENSMUSG00000012819,protein_coding -22495,Rps5,ENSMUSG00000012848,protein_coding -30912,Podnl1,ENSMUSG00000012889,protein_coding -30893,Adgrl1,ENSMUSG00000013033,protein_coding -34584,Amotl1,ENSMUSG00000013076,protein_coding -23393,2200002J24Rik,ENSMUSG00000013083,protein_coding -47633,Etv5,ENSMUSG00000013089,protein_coding -22131,Tmem190,ENSMUSG00000013091,protein_coding -31382,Gfod2,ENSMUSG00000013150,protein_coding -31380,Enkd1,ENSMUSG00000013155,protein_coding -31381,4933405L10Rik,ENSMUSG00000013158,protein_coding -31367,Atp6v0d1,ENSMUSG00000013160,protein_coding -50521,Ptprs,ENSMUSG00000013236,protein_coding -2124,Slc41a1,ENSMUSG00000013275,protein_coding -6609,Fer1l4,ENSMUSG00000013338,protein_coding -23824,4931406B18Rik,ENSMUSG00000013353,protein_coding -23823,Iglon5,ENSMUSG00000013367,protein_coding -39362,Igf2bp1,ENSMUSG00000013415,protein_coding -39359,B4galnt2,ENSMUSG00000013418,protein_coding -37189,Zfp651,ENSMUSG00000013419,protein_coding -3935,Nelfb,ENSMUSG00000013465,protein_coding -40182,Card14,ENSMUSG00000013483,protein_coding -17599,Tmem175,ENSMUSG00000013495,protein_coding -6974,Bcas1,ENSMUSG00000013523,protein_coding -47489,Tango2,ENSMUSG00000013539,protein_coding -35993,Aldh1a2,ENSMUSG00000013584,protein_coding -2901,Ndufs2,ENSMUSG00000013593,protein_coding -45693,Snx31,ENSMUSG00000013611,protein_coding -16167,Atraid,ENSMUSG00000013622,protein_coding -16168,Cad,ENSMUSG00000013629,protein_coding -38220,Lypd8,ENSMUSG00000013643,protein_coding -38219,Sh3bp5l,ENSMUSG00000013646,protein_coding -38223,1810065E05Rik,ENSMUSG00000013653,protein_coding -54896,Atad1,ENSMUSG00000013662,protein_coding -54897,Pten,ENSMUSG00000013663,protein_coding -26336,4933402N03Rik,ENSMUSG00000013668,protein_coding -2956,Pea15a,ENSMUSG00000013698,protein_coding -32526,Timm23,ENSMUSG00000013701,protein_coding -11459,Tnfaip8l2,ENSMUSG00000013707,protein_coding -20960,Trnt1,ENSMUSG00000013736,protein_coding -49877,Ly6g6e,ENSMUSG00000013766,protein_coding -49854,Ehmt2,ENSMUSG00000013787,protein_coding -34827,Elof1,ENSMUSG00000013822,protein_coding -28134,Med16,ENSMUSG00000013833,protein_coding -46073,St3gal1,ENSMUSG00000013846,protein_coding -28142,Tmem259,ENSMUSG00000013858,protein_coding -29941,Rnf170,ENSMUSG00000013878,protein_coding -23351,Clip3,ENSMUSG00000013921,protein_coding -23350,Thap8,ENSMUSG00000013928,protein_coding -18022,Myl2,ENSMUSG00000013936,protein_coding -2910,Dedd,ENSMUSG00000013973,protein_coding -29391,Mcemp1,ENSMUSG00000013974,protein_coding -2911,Nit1,ENSMUSG00000013997,protein_coding -13350,Pax5,ENSMUSG00000014030,protein_coding -48746,Prdm15,ENSMUSG00000014039,protein_coding -47811,Rnf168,ENSMUSG00000014074,protein_coding -47816,Tctex1d2,ENSMUSG00000014075,protein_coding -5844,Chp1,ENSMUSG00000014077,protein_coding -19517,Sval2,ENSMUSG00000014104,protein_coding -17791,Trpv4,ENSMUSG00000014158,protein_coding -41593,Klhl3,ENSMUSG00000014164,protein_coding -38430,Tvp23b,ENSMUSG00000014177,protein_coding -39632,Dnajc7,ENSMUSG00000014195,protein_coding -39634,Zfp385c,ENSMUSG00000014198,protein_coding -2633,Cacybp,ENSMUSG00000014226,protein_coding -47177,Cluap1,ENSMUSG00000014232,protein_coding -38399,Zswim7,ENSMUSG00000014243,protein_coding -38404,Pigl,ENSMUSG00000014245,protein_coding -53136,Ndufa2,ENSMUSG00000014294,protein_coding -47199,Pam16,ENSMUSG00000014301,protein_coding -47198,Glis2,ENSMUSG00000014303,protein_coding -45673,Cox6c,ENSMUSG00000014313,protein_coding -27872,Bicc1,ENSMUSG00000014329,protein_coding -39369,Ube2z,ENSMUSG00000014349,protein_coding -39366,Gip,ENSMUSG00000014351,protein_coding -6050,Tmem87b,ENSMUSG00000014353,protein_coding -6046,Anapc1,ENSMUSG00000014355,protein_coding -6047,Mertk,ENSMUSG00000014361,protein_coding -24092,Tsg101,ENSMUSG00000014402,protein_coding -24085,Hps5,ENSMUSG00000014418,protein_coding -48903,Map3k4,ENSMUSG00000014426,protein_coding -31709,Piezo1,ENSMUSG00000014444,protein_coding -33705,Blk,ENSMUSG00000014453,protein_coding -31706,Rnf166,ENSMUSG00000014470,protein_coding -32512,Ankrd28,ENSMUSG00000014496,protein_coding -29223,Ankrd52,ENSMUSG00000014498,protein_coding -53052,Pkd2l2,ENSMUSG00000014503,protein_coding -53050,Srp19,ENSMUSG00000014504,protein_coding -15657,Tmbim7,ENSMUSG00000014529,protein_coding -20510,Clec4f,ENSMUSG00000014542,protein_coding -21569,Klra17,ENSMUSG00000014543,protein_coding -33686,Wdfy2,ENSMUSG00000014547,protein_coding -20808,Rbsn,ENSMUSG00000014550,protein_coding -20807,Mrps25,ENSMUSG00000014551,protein_coding -20503,Dguok,ENSMUSG00000014554,protein_coding -15483,Camta1,ENSMUSG00000014592,protein_coding -11888,Csf1,ENSMUSG00000014599,protein_coding -11885,Strip1,ENSMUSG00000014601,protein_coding -1660,Kif1a,ENSMUSG00000014602,protein_coding -11884,Alx3,ENSMUSG00000014603,protein_coding -38653,Slc25a11,ENSMUSG00000014606,protein_coding -38649,Chrne,ENSMUSG00000014609,protein_coding -31578,Cmc2,ENSMUSG00000014633,protein_coding -17649,Chfr,ENSMUSG00000014668,protein_coding -22841,Ceacam16,ENSMUSG00000014686,protein_coding -19761,Hoxa2,ENSMUSG00000014704,protein_coding -33807,Adam28,ENSMUSG00000014725,protein_coding -20520,Ankrd53,ENSMUSG00000014747,protein_coding -20521,Tex261,ENSMUSG00000014748,protein_coding -48996,Fam120b,ENSMUSG00000014763,protein_coding -49000,Tbp,ENSMUSG00000014767,protein_coding -48999,Psmb1,ENSMUSG00000014769,protein_coding -49001,Pdcd2,ENSMUSG00000014771,protein_coding -48994,Dll1,ENSMUSG00000014773,protein_coding -31348,Nol3,ENSMUSG00000014776,protein_coding -31358,Fhod1,ENSMUSG00000014778,protein_coding -31360,Plekhg4,ENSMUSG00000014782,protein_coding -31359,Slc9a5,ENSMUSG00000014786,protein_coding -31353,Elmo3,ENSMUSG00000014791,protein_coding -33811,Stc1,ENSMUSG00000014813,protein_coding -31349,4931428F04Rik,ENSMUSG00000014837,protein_coding -31363,Tppp3,ENSMUSG00000014846,protein_coding -42311,Msh3,ENSMUSG00000014850,protein_coding -4053,Adamts13,ENSMUSG00000014852,protein_coding -31357,Tmem208,ENSMUSG00000014856,protein_coding -31352,E2f4,ENSMUSG00000014859,protein_coding -4050,Surf4,ENSMUSG00000014867,protein_coding -4049,Surf2,ENSMUSG00000014873,protein_coding -43511,Dnajb9,ENSMUSG00000014905,protein_coding -30510,Naf1,ENSMUSG00000014907,protein_coding -16228,Yes1,ENSMUSG00000014932,protein_coding -16220,Ppp1cb,ENSMUSG00000014956,protein_coding -4763,Gorasp2,ENSMUSG00000014959,protein_coding -2483,Tsen15,ENSMUSG00000014980,protein_coding -46054,Oc90,ENSMUSG00000015001,protein_coding -46052,Efr3a,ENSMUSG00000015002,protein_coding -31715,Trappc2l,ENSMUSG00000015013,protein_coding -31724,Acsf3,ENSMUSG00000015016,protein_coding -31510,Ddx19a,ENSMUSG00000015023,protein_coding -31713,Galns,ENSMUSG00000015027,protein_coding -20683,Gata2,ENSMUSG00000015053,protein_coding -3970,C8g,ENSMUSG00000015083,protein_coding -3961,Entpd2,ENSMUSG00000015085,protein_coding -3980,Rabl6,ENSMUSG00000015087,protein_coding -3968,Ptgds,ENSMUSG00000015090,protein_coding -3975,Edf1,ENSMUSG00000015092,protein_coding -3967,Clic3,ENSMUSG00000015093,protein_coding -3962,Npdc1,ENSMUSG00000015094,protein_coding -3971,Fbxw5,ENSMUSG00000015095,protein_coding -18945,Slc25a13,ENSMUSG00000015112,protein_coding -49344,Ube2i,ENSMUSG00000015120,protein_coding -49341,Tsr3,ENSMUSG00000015126,protein_coding -49339,Unkl,ENSMUSG00000015127,protein_coding -24612,Lrrk1,ENSMUSG00000015133,protein_coding -24617,Aldh1a3,ENSMUSG00000015134,protein_coding -44067,Actn1,ENSMUSG00000015143,protein_coding -23269,Sirt2,ENSMUSG00000015149,protein_coding -23271,Hnrnpl,ENSMUSG00000015165,protein_coding -55203,Nolc1,ENSMUSG00000015176,protein_coding -18924,Casd1,ENSMUSG00000015189,protein_coding -26785,Cnksr3,ENSMUSG00000015202,protein_coding -8277,Mtmr1,ENSMUSG00000015214,protein_coding -8282,Hmgb3,ENSMUSG00000015217,protein_coding -1078,Map2,ENSMUSG00000015222,protein_coding -14045,Cyp2j9,ENSMUSG00000015224,protein_coding -13492,Nipsnap3a,ENSMUSG00000015242,protein_coding -13494,Abca1,ENSMUSG00000015243,protein_coding -13493,Nipsnap3b,ENSMUSG00000015247,protein_coding -8383,Lage3,ENSMUSG00000015289,protein_coding -8384,Ubl4a,ENSMUSG00000015290,protein_coding -8379,Gdi1,ENSMUSG00000015291,protein_coding -26810,Sash1,ENSMUSG00000015305,protein_coding -28212,Gadd45b,ENSMUSG00000015312,protein_coding -2946,Slamf6,ENSMUSG00000015314,protein_coding -2942,Slamf1,ENSMUSG00000015316,protein_coding -4138,Zdhhc12,ENSMUSG00000015335,protein_coding -4142,Endog,ENSMUSG00000015337,protein_coding -7441,Cybb,ENSMUSG00000015340,protein_coding -29874,Golga7,ENSMUSG00000015341,protein_coding -7435,Xk,ENSMUSG00000015342,protein_coding -36482,Pcolce2,ENSMUSG00000015354,protein_coding -2935,Cd48,ENSMUSG00000015355,protein_coding -35879,Clpx,ENSMUSG00000015357,protein_coding -46693,Trabd,ENSMUSG00000015363,protein_coding -46689,Mov10l1,ENSMUSG00000015365,protein_coding -46701,Dennd6b,ENSMUSG00000015377,protein_coding -41293,Cd83,ENSMUSG00000015396,protein_coding -9796,Cltrn,ENSMUSG00000015401,protein_coding -9798,Ace2,ENSMUSG00000015405,protein_coding -33535,Gzmb,ENSMUSG00000015437,protein_coding -33531,Gzmf,ENSMUSG00000015441,protein_coding -33527,Gzmn,ENSMUSG00000015443,protein_coding -49822,Ager,ENSMUSG00000015452,protein_coding -49832,Atf6b,ENSMUSG00000015461,protein_coding -49826,Egfl8,ENSMUSG00000015467,protein_coding -49818,Notch4,ENSMUSG00000015468,protein_coding -49828,Ppt2,ENSMUSG00000015474,protein_coding -49829,Prrt1,ENSMUSG00000015476,protein_coding -49823,Rnf5,ENSMUSG00000015478,protein_coding -2563,Fam163a,ENSMUSG00000015484,protein_coding -4054,Cacfd1,ENSMUSG00000015488,protein_coding -26903,Hivep2,ENSMUSG00000015501,protein_coding -27163,2310057J18Rik,ENSMUSG00000015519,protein_coding -11480,Arnt,ENSMUSG00000015522,protein_coding -42722,Itga2,ENSMUSG00000015533,protein_coding -42721,Mocs2,ENSMUSG00000015536,protein_coding -40010,Nat9,ENSMUSG00000015542,protein_coding -30538,Lpl,ENSMUSG00000015568,protein_coding -49437,Atp6v0e,ENSMUSG00000015575,protein_coding -49440,Nkx2-5,ENSMUSG00000015579,protein_coding -50294,Zfp318,ENSMUSG00000015597,protein_coding -50300,Ttbk1,ENSMUSG00000015599,protein_coding -50305,Srf,ENSMUSG00000015605,protein_coding -3545,Gata3,ENSMUSG00000015619,protein_coding -7177,Gata5,ENSMUSG00000015627,protein_coding -7171,Lama5,ENSMUSG00000015647,protein_coding -15695,Steap1,ENSMUSG00000015652,protein_coding -15693,Steap2,ENSMUSG00000015653,protein_coding -35276,Hspa8,ENSMUSG00000015656,protein_coding -48799,Serac1,ENSMUSG00000015659,protein_coding -8747,Awat1,ENSMUSG00000015665,protein_coding -8751,Pdzd11,ENSMUSG00000015668,protein_coding -40537,Psma2,ENSMUSG00000015671,protein_coding -40536,Mrpl32,ENSMUSG00000015672,protein_coding -11473,Setdb1,ENSMUSG00000015697,protein_coding -11470,Anxa9,ENSMUSG00000015702,protein_coding -24988,Arnt2,ENSMUSG00000015709,protein_coding -11468,Prune1,ENSMUSG00000015711,protein_coding -11472,Cers2,ENSMUSG00000015714,protein_coding -23000,Nlrp5,ENSMUSG00000015721,protein_coding -19061,Capza2,ENSMUSG00000015733,protein_coding -11511,Plekho1,ENSMUSG00000015745,protein_coding -11512,Vps45,ENSMUSG00000015747,protein_coding -11500,Prpf3,ENSMUSG00000015748,protein_coding -11508,Anp32e,ENSMUSG00000015749,protein_coding -11506,Aph1a,ENSMUSG00000015750,protein_coding -26804,Tab2,ENSMUSG00000015755,protein_coding -26799,Ppil4,ENSMUSG00000015757,protein_coding -32928,Cnih1,ENSMUSG00000015759,protein_coding -21801,Eps8,ENSMUSG00000015766,protein_coding -4043,Med22,ENSMUSG00000015776,protein_coding -4041,Abo,ENSMUSG00000015787,protein_coding -4048,Surf1,ENSMUSG00000015790,protein_coding -16488,Med28,ENSMUSG00000015804,protein_coding -16484,Qdpr,ENSMUSG00000015806,protein_coding -33795,Gnrh1,ENSMUSG00000015812,protein_coding -2623,Tnr,ENSMUSG00000015829,protein_coding -38008,Sqstm1,ENSMUSG00000015837,protein_coding -4880,Nfe2l2,ENSMUSG00000015839,protein_coding -2838,Rxrg,ENSMUSG00000015843,protein_coding -4074,Rxra,ENSMUSG00000015846,protein_coding -11492,Adamtsl4,ENSMUSG00000015850,protein_coding -11067,Fcrls,ENSMUSG00000015852,protein_coding -11068,Cd5l,ENSMUSG00000015854,protein_coding -40086,Prpsap1,ENSMUSG00000015869,protein_coding -16489,Fam184b,ENSMUSG00000015879,protein_coding -16496,Ncapg,ENSMUSG00000015880,protein_coding -16497,Lcorl,ENSMUSG00000015882,protein_coding -28545,Lta4h,ENSMUSG00000015889,protein_coding -28547,Amdhd1,ENSMUSG00000015890,protein_coding -6283,Dstn,ENSMUSG00000015932,protein_coding -41565,H2afy,ENSMUSG00000015937,protein_coding -18307,Gtf2ird2,ENSMUSG00000015942,protein_coding -11520,Bola1,ENSMUSG00000015943,protein_coding -18304,Castor2,ENSMUSG00000015944,protein_coding -11542,Fcgr1,ENSMUSG00000015947,protein_coding -18309,Ncf1,ENSMUSG00000015950,protein_coding -25260,Wnt11,ENSMUSG00000015957,protein_coding -3099,Adss,ENSMUSG00000015961,protein_coding -3098,1700016C15Rik,ENSMUSG00000015962,protein_coding -32455,Il17rb,ENSMUSG00000015966,protein_coding -32458,Cacna1d,ENSMUSG00000015968,protein_coding -32456,Chdh,ENSMUSG00000015970,protein_coding -32453,Actr8,ENSMUSG00000015971,protein_coding -26473,Lrrc27,ENSMUSG00000015980,protein_coding -26470,Stk32c,ENSMUSG00000015981,protein_coding -29936,Fnta,ENSMUSG00000015994,protein_coding -42674,Mtrex,ENSMUSG00000016018,protein_coding -6690,Lbp,ENSMUSG00000016024,protein_coding -46652,Celsr1,ENSMUSG00000016028,protein_coding -34975,Fli1,ENSMUSG00000016087,protein_coding -18869,Stard13,ENSMUSG00000016128,protein_coding -7889,Tenm1,ENSMUSG00000016150,protein_coding -3412,Camk1g,ENSMUSG00000016179,protein_coding -3400,Diexf,ENSMUSG00000016181,protein_coding -3407,Hsd11b1,ENSMUSG00000016194,protein_coding -3396,Syt14,ENSMUSG00000016200,protein_coding -50071,H2-M3,ENSMUSG00000016206,protein_coding -7754,Lonrf3,ENSMUSG00000016239,protein_coding -7052,Atp5e,ENSMUSG00000016252,protein_coding -7048,Nelfcd,ENSMUSG00000016253,protein_coding -7051,Tubb1,ENSMUSG00000016255,protein_coding -7049,Ctsz,ENSMUSG00000016256,protein_coding -7053,Prelid3b,ENSMUSG00000016257,protein_coding -3393,Sertad4,ENSMUSG00000016262,protein_coding -50089,H2-M2,ENSMUSG00000016283,protein_coding -7771,Ube2a,ENSMUSG00000016308,protein_coding -7766,Slc25a5,ENSMUSG00000016319,protein_coding -7839,Atp1b4,ENSMUSG00000016327,protein_coding -7212,Ppdpf,ENSMUSG00000016344,protein_coding -7209,Kcnq2,ENSMUSG00000016346,protein_coding -7210,Eef1a2,ENSMUSG00000016349,protein_coding -7207,Col20a1,ENSMUSG00000016356,protein_coding -8450,Pls3,ENSMUSG00000016382,protein_coding -5610,Mpped2,ENSMUSG00000016386,protein_coding -7782,Nkap,ENSMUSG00000016409,protein_coding -7784,Ndufa1,ENSMUSG00000016427,protein_coding -5588,Wt1,ENSMUSG00000016458,protein_coding -40980,E2f3,ENSMUSG00000016477,protein_coding -3440,Cr1l,ENSMUSG00000016481,protein_coding -21949,Ppfibp1,ENSMUSG00000016487,protein_coding -3435,Cd46,ENSMUSG00000016493,protein_coding -3428,Cd34,ENSMUSG00000016494,protein_coding -54853,Plgrkt,ENSMUSG00000016495,protein_coding -54854,Cd274,ENSMUSG00000016496,protein_coding -54856,Pdcd1lg2,ENSMUSG00000016498,protein_coding -18751,Gtf3a,ENSMUSG00000016503,protein_coding -18752,Mtif3,ENSMUSG00000016510,protein_coding -18753,Lnx2,ENSMUSG00000016520,protein_coding -2098,Il19,ENSMUSG00000016524,protein_coding -2104,Dyrk3,ENSMUSG00000016526,protein_coding -2100,Mapkapk2,ENSMUSG00000016528,protein_coding -2099,Il10,ENSMUSG00000016529,protein_coding -7842,Lamp2,ENSMUSG00000016534,protein_coding -46635,Atxn10,ENSMUSG00000016541,protein_coding -46329,Foxred2,ENSMUSG00000016552,protein_coding -46330,Eif3d,ENSMUSG00000016554,protein_coding -40061,H3f3b,ENSMUSG00000016559,protein_coding -46623,Nup50,ENSMUSG00000016619,protein_coding -46619,Phf21b,ENSMUSG00000016624,protein_coding -25702,Nlrp14,ENSMUSG00000016626,protein_coding -46334,Ift27,ENSMUSG00000016637,protein_coding -46587,Pacsin2,ENSMUSG00000016664,protein_coding -40894,Cmah,ENSMUSG00000016756,protein_coding -46595,Ttll12,ENSMUSG00000016757,protein_coding -46591,Bik,ENSMUSG00000016758,protein_coding -46596,Scube1,ENSMUSG00000016763,protein_coding -33156,Tox4,ENSMUSG00000016831,protein_coding -17417,Mrps18c,ENSMUSG00000016833,protein_coding -151,Sulf1,ENSMUSG00000016918,protein_coding -6758,Srsf6,ENSMUSG00000016921,protein_coding -6730,Plcg1,ENSMUSG00000016933,protein_coding -40024,Kctd2,ENSMUSG00000016940,protein_coding -46349,Tmprss6,ENSMUSG00000016942,protein_coding -49266,Kctd5,ENSMUSG00000016946,protein_coding -40719,Pom121l2,ENSMUSG00000016982,protein_coding -37536,Etaa1,ENSMUSG00000016984,protein_coding -6809,Matn4,ENSMUSG00000016995,protein_coding -6803,Svs4,ENSMUSG00000016998,protein_coding -6805,Svs6,ENSMUSG00000017000,protein_coding -6807,Slpi,ENSMUSG00000017002,protein_coding -6804,Svs3a,ENSMUSG00000017003,protein_coding -6806,Svs5,ENSMUSG00000017004,protein_coding -6810,Rbpjl,ENSMUSG00000017007,protein_coding -6811,Sdc4,ENSMUSG00000017009,protein_coding -29832,Ccdc70,ENSMUSG00000017049,protein_coding -7749,Il13ra1,ENSMUSG00000017057,protein_coding -40964,Prl5a1,ENSMUSG00000017064,protein_coding -39693,Nbr1,ENSMUSG00000017119,protein_coding -40151,Cyth1,ENSMUSG00000017132,protein_coding -4525,Rnd3,ENSMUSG00000017144,protein_coding -39692,Brca1,ENSMUSG00000017146,protein_coding -39616,Gast,ENSMUSG00000017165,protein_coding -39661,Cntnap1,ENSMUSG00000017167,protein_coding -39624,Nt5c3b,ENSMUSG00000017176,protein_coding -39668,Coa3,ENSMUSG00000017188,protein_coding -39494,Zpbp2,ENSMUSG00000017195,protein_coding -39500,Gsdma,ENSMUSG00000017204,protein_coding -39504,Med24,ENSMUSG00000017210,protein_coding -39499,Gsdma2,ENSMUSG00000017211,protein_coding -39501,Psmd3,ENSMUSG00000017221,protein_coding -15412,Exosc10,ENSMUSG00000017264,protein_coding -38867,Glod4,ENSMUSG00000017286,protein_coding -38866,Vps53,ENSMUSG00000017288,protein_coding -38905,Taok1,ENSMUSG00000017291,protein_coding -6836,Dnttip1,ENSMUSG00000017299,protein_coding -6840,Tnnc2,ENSMUSG00000017300,protein_coding -6842,Acot8,ENSMUSG00000017307,protein_coding -39716,Cd300lg,ENSMUSG00000017309,protein_coding -6833,Spint4,ENSMUSG00000017310,protein_coding -39719,Pyy,ENSMUSG00000017311,protein_coding -39717,Mpp2,ENSMUSG00000017314,protein_coding -39718,Ppy,ENSMUSG00000017316,protein_coding -38954,Vtn,ENSMUSG00000017344,protein_coding -38963,Nlk,ENSMUSG00000017376,protein_coding -38928,Traf4,ENSMUSG00000017386,protein_coding -38945,Aldoc,ENSMUSG00000017390,protein_coding -39469,Stac2,ENSMUSG00000017400,protein_coding -39467,Rpl19,ENSMUSG00000017404,protein_coding -38929,Nek8,ENSMUSG00000017405,protein_coding -4549,Cacnb4,ENSMUSG00000017412,protein_coding -39461,Plxdc1,ENSMUSG00000017417,protein_coding -3770,Arl5b,ENSMUSG00000017418,protein_coding -39012,Zfp207,ENSMUSG00000017421,protein_coding -39013,Psmd11,ENSMUSG00000017428,protein_coding -40159,C1qtnf1,ENSMUSG00000017446,protein_coding -38915,Pipox,ENSMUSG00000017453,protein_coding -40155,Timp2,ENSMUSG00000017466,protein_coding -31700,Zc3h18,ENSMUSG00000017478,protein_coding -32211,Top2b,ENSMUSG00000017485,protein_coding -32212,Rarb,ENSMUSG00000017491,protein_coding -39521,Igfbp4,ENSMUSG00000017493,protein_coding -39514,Cdc6,ENSMUSG00000017499,protein_coding -39001,Suz12,ENSMUSG00000017548,protein_coding -39004,Atad5,ENSMUSG00000017550,protein_coding -39003,Crlf3,ENSMUSG00000017561,protein_coding -39530,Krt27,ENSMUSG00000017588,protein_coding -39522,Tns4,ENSMUSG00000017607,protein_coding -38958,Tnfaip1,ENSMUSG00000017615,protein_coding -38875,Abr,ENSMUSG00000017631,protein_coding -38987,Rab11fip4,ENSMUSG00000017639,protein_coding -6855,Cd40,ENSMUSG00000017652,protein_coding -6860,Slc35c2,ENSMUSG00000017664,protein_coding -6863,Zfp334,ENSMUSG00000017667,protein_coding -6861,Elmo2,ENSMUSG00000017670,protein_coding -38976,Wsb1,ENSMUSG00000017677,protein_coding -6780,Ttpal,ENSMUSG00000017679,protein_coding -39009,Rhot1,ENSMUSG00000017686,protein_coding -9877,Hnf4g,ENSMUSG00000017688,protein_coding -39010,Rhbdl3,ENSMUSG00000017692,protein_coding -6785,Ada,ENSMUSG00000017697,protein_coding -6781,Serinc3,ENSMUSG00000017707,protein_coding -40143,Tha1,ENSMUSG00000017713,protein_coding -40147,Pgs1,ENSMUSG00000017715,protein_coding -40141,Birc5,ENSMUSG00000017716,protein_coding -40140,Afmid,ENSMUSG00000017718,protein_coding -6815,Trp53tg5,ENSMUSG00000017720,protein_coding -6817,Pigt,ENSMUSG00000017721,protein_coding -6819,Wfdc2,ENSMUSG00000017723,protein_coding -39705,Etv4,ENSMUSG00000017724,protein_coding -6822,Eppin,ENSMUSG00000017733,protein_coding -6816,Dbndd2,ENSMUSG00000017734,protein_coding -6852,Mmp9,ENSMUSG00000017737,protein_coding -6853,Slc12a5,ENSMUSG00000017740,protein_coding -39643,Ghdc,ENSMUSG00000017747,protein_coding -6848,Pltp,ENSMUSG00000017754,protein_coding -42084,Slc12a7,ENSMUSG00000017756,protein_coding -6847,Ctsa,ENSMUSG00000017760,protein_coding -6844,Zswim1,ENSMUSG00000017764,protein_coding -31396,Slc12a4,ENSMUSG00000017765,protein_coding -6845,Spata25,ENSMUSG00000017767,protein_coding -38856,Myo1c,ENSMUSG00000017774,protein_coding -38857,Crk,ENSMUSG00000017776,protein_coding -42256,Cox7c,ENSMUSG00000017778,protein_coding -38852,Pitpna,ENSMUSG00000017781,protein_coding -39654,Mlx,ENSMUSG00000017801,protein_coding -39656,Retreg3,ENSMUSG00000017802,protein_coding -6770,Jph2,ENSMUSG00000017817,protein_coding -39637,Dhx58,ENSMUSG00000017830,protein_coding -50442,Rab5a,ENSMUSG00000017831,protein_coding -39639,Hspb9,ENSMUSG00000017832,protein_coding -39633,Nkiras2,ENSMUSG00000017837,protein_coding -44857,Ppp2r5c,ENSMUSG00000017843,protein_coding -6762,Ift52,ENSMUSG00000017858,protein_coding -6763,Mybl2,ENSMUSG00000017861,protein_coding -6761,Sgk2,ENSMUSG00000017868,protein_coding -6869,Eya2,ENSMUSG00000017897,protein_coding -40936,Prl3c1,ENSMUSG00000017922,protein_coding -6913,B4galt5,ENSMUSG00000017929,protein_coding -6773,Gdap1l1,ENSMUSG00000017943,protein_coding -6778,Hnf4a,ENSMUSG00000017950,protein_coding -6910,Ptgis,ENSMUSG00000017969,protein_coding -19138,Cadps2,ENSMUSG00000017978,protein_coding -6899,Ddx27,ENSMUSG00000017999,protein_coding -18649,Cyth3,ENSMUSG00000018001,protein_coding -46357,Cyth4,ENSMUSG00000018008,protein_coding -40256,Rac3,ENSMUSG00000018012,protein_coding -46577,Rrp7a,ENSMUSG00000018040,protein_coding -46580,Cyb5r3,ENSMUSG00000018042,protein_coding -39159,Ints2,ENSMUSG00000018068,protein_coding -17920,Med13l,ENSMUSG00000018076,protein_coding -40859,Hist1h2bc,ENSMUSG00000018102,protein_coding -46397,Baiap2l2,ENSMUSG00000018126,protein_coding -18556,Mafk,ENSMUSG00000018143,protein_coding -39473,Med1,ENSMUSG00000018160,protein_coding -29246,Erbb3,ENSMUSG00000018166,protein_coding -39483,Stard3,ENSMUSG00000018167,protein_coding -39492,Ikzf3,ENSMUSG00000018168,protein_coding -46360,Mfng,ENSMUSG00000018169,protein_coding -39167,Vmp1,ENSMUSG00000018171,protein_coding -2384,Uchl5,ENSMUSG00000018189,protein_coding -2382,Glrx2,ENSMUSG00000018196,protein_coding -2383,Ro60,ENSMUSG00000018199,protein_coding -6792,Stk4,ENSMUSG00000018209,protein_coding -6800,Wfdc15b,ENSMUSG00000018211,protein_coding -38436,Pmp22,ENSMUSG00000018217,protein_coding -38102,Gdf9,ENSMUSG00000018238,protein_coding -38095,Zcchc10,ENSMUSG00000018239,protein_coding -40944,Prl8a2,ENSMUSG00000018259,protein_coding -17943,Tbx5,ENSMUSG00000018263,protein_coding -38643,Psmb6,ENSMUSG00000018286,protein_coding -38658,Spag7,ENSMUSG00000018287,protein_coding -38655,Pfn1,ENSMUSG00000018293,protein_coding -6791,Tomm34,ENSMUSG00000018322,protein_coding -6789,Ywhab,ENSMUSG00000018326,protein_coding -38973,Ksr1,ENSMUSG00000018334,protein_coding -38157,Gpx3,ENSMUSG00000018339,protein_coding -38160,Anxa6,ENSMUSG00000018340,protein_coding -20053,Il12rb2,ENSMUSG00000018341,protein_coding -38465,Zkscan6,ENSMUSG00000018347,protein_coding -39883,Kpna2,ENSMUSG00000018362,protein_coding -39880,Smurf2,ENSMUSG00000018363,protein_coding -39878,Cep95,ENSMUSG00000018372,protein_coding -39228,Vezf1,ENSMUSG00000018377,protein_coding -39229,Cuedc1,ENSMUSG00000018378,protein_coding -39226,Srsf1,ENSMUSG00000018379,protein_coding -39355,Abi3,ENSMUSG00000018381,protein_coding -38104,Shroom1,ENSMUSG00000018387,protein_coding -38112,Kif3a,ENSMUSG00000018395,protein_coding -38110,Sept8,ENSMUSG00000018398,protein_coding -39203,Mtmr4,ENSMUSG00000018401,protein_coding -39125,Mrm1,ENSMUSG00000018405,protein_coding -39803,Mapt,ENSMUSG00000018411,protein_coding -39807,Kansl1,ENSMUSG00000018412,protein_coding -38327,Gid4,ENSMUSG00000018415,protein_coding -765,Myo1b,ENSMUSG00000018417,protein_coding -39172,Dhx40,ENSMUSG00000018425,protein_coding -39173,Ypel2,ENSMUSG00000018427,protein_coding -39240,Akap1,ENSMUSG00000018428,protein_coding -39888,Nol11,ENSMUSG00000018433,protein_coding -38677,Derl2,ENSMUSG00000018442,protein_coding -38674,C1qbp,ENSMUSG00000018446,protein_coding -38673,Rpain,ENSMUSG00000018449,protein_coding -38679,6330403K07Rik,ENSMUSG00000018451,protein_coding -6865,Slc13a3,ENSMUSG00000018459,protein_coding -38542,Kcnab3,ENSMUSG00000018470,protein_coding -38545,Chd3,ENSMUSG00000018474,protein_coding -38550,Kdm6b,ENSMUSG00000018476,protein_coding -39139,1700125H20Rik,ENSMUSG00000018479,protein_coding -39140,Appbp2,ENSMUSG00000018481,protein_coding -39796,Wnt9b,ENSMUSG00000018486,protein_coding -38398,Adora2b,ENSMUSG00000018500,protein_coding -38402,Ncor1,ENSMUSG00000018501,protein_coding -38411,Trpv2,ENSMUSG00000018507,protein_coding -38407,Cenpv,ENSMUSG00000018509,protein_coding -39446,Pcgf2,ENSMUSG00000018537,protein_coding -39451,Cwc25,ENSMUSG00000018541,protein_coding -39453,1700001P01Rik,ENSMUSG00000018543,protein_coding -39448,Pip4k2b,ENSMUSG00000018547,protein_coding -39183,Trim37,ENSMUSG00000018548,protein_coding -38600,Ybx2,ENSMUSG00000018554,protein_coding -38606,Ctdnep1,ENSMUSG00000018559,protein_coding -38604,Elp5,ENSMUSG00000018565,protein_coding -38601,Slc2a4,ENSMUSG00000018566,protein_coding -38607,Gabarap,ENSMUSG00000018567,protein_coding -38603,Cldn7,ENSMUSG00000018569,protein_coding -38596,2810408A11Rik,ENSMUSG00000018570,protein_coding -38608,Phf23,ENSMUSG00000018572,protein_coding -38610,Acadvl,ENSMUSG00000018574,protein_coding -45267,Dnah11,ENSMUSG00000018581,protein_coding -38174,G3bp1,ENSMUSG00000018583,protein_coding -38172,Atox1,ENSMUSG00000018585,protein_coding -9812,Glra2,ENSMUSG00000018589,protein_coding -38171,Sparc,ENSMUSG00000018593,protein_coding -9322,Glra4,ENSMUSG00000018595,protein_coding -38352,Mief2,ENSMUSG00000018599,protein_coding -17939,Tbx3,ENSMUSG00000018604,protein_coding -34499,Mmp20,ENSMUSG00000018620,protein_coding -34502,Mmp7,ENSMUSG00000018623,protein_coding -39801,Crhr1,ENSMUSG00000018634,protein_coding -39115,Dusp14,ENSMUSG00000018648,protein_coding -39116,Tada2a,ENSMUSG00000018651,protein_coding -37485,Ikzf1,ENSMUSG00000018654,protein_coding -19544,Tcaf3,ENSMUSG00000018656,protein_coding -39411,Pnpo,ENSMUSG00000018659,protein_coding -39972,Cog1,ENSMUSG00000018661,protein_coding -39400,Cbx1,ENSMUSG00000018666,protein_coding -39407,Cdk5rap3,ENSMUSG00000018669,protein_coding -39403,Copz2,ENSMUSG00000018672,protein_coding -39737,Slc25a39,ENSMUSG00000018677,protein_coding -39412,Sp2,ENSMUSG00000018678,protein_coding -39120,Aatf,ENSMUSG00000018697,protein_coding -39121,Lhx1,ENSMUSG00000018698,protein_coding -44863,Dync1h1,ENSMUSG00000018707,protein_coding -39976,Cpsf4l,ENSMUSG00000018727,protein_coding -39082,Pex12,ENSMUSG00000018733,protein_coding -38511,Ndel1,ENSMUSG00000018736,protein_coding -38517,Slc25a35,ENSMUSG00000018740,protein_coding -38581,Zbtb4,ENSMUSG00000018750,protein_coding -38575,Tnfsfm13,ENSMUSG00000018752,protein_coding -38566,Mpdu1,ENSMUSG00000018761,protein_coding -38563,Fxr2,ENSMUSG00000018765,protein_coding -4833,Atp5g3,ENSMUSG00000018770,protein_coding -38568,Cd68,ENSMUSG00000018774,protein_coding -38580,Slc35g3,ENSMUSG00000018776,protein_coding -30254,Acsl1,ENSMUSG00000018796,protein_coding -39952,Abca5,ENSMUSG00000018800,protein_coding -38837,Smyd4,ENSMUSG00000018809,protein_coding -26644,Lsp1,ENSMUSG00000018819,protein_coding -55095,Zfyve27,ENSMUSG00000018820,protein_coding -55092,Avpi1,ENSMUSG00000018821,protein_coding -55096,Sfrp5,ENSMUSG00000018822,protein_coding -47361,Myh11,ENSMUSG00000018830,protein_coding -39049,Rad51d,ENSMUSG00000018841,protein_coding -39050,Fndc8,ENSMUSG00000018844,protein_coding -39052,Unc45b,ENSMUSG00000018845,protein_coding -37814,Pank3,ENSMUSG00000018846,protein_coding -37818,Rars,ENSMUSG00000018848,protein_coding -37819,Wwc1,ENSMUSG00000018849,protein_coding -40021,Mrpl58,ENSMUSG00000018858,protein_coding -40014,Fdxr,ENSMUSG00000018861,protein_coding -40018,Otop3,ENSMUSG00000018862,protein_coding -46600,Sult4a1,ENSMUSG00000018865,protein_coding -46602,Pnpla5,ENSMUSG00000018868,protein_coding -39431,Mrpl45,ENSMUSG00000018882,protein_coding -46292,Mb,ENSMUSG00000018893,protein_coding -38122,Irf1,ENSMUSG00000018899,protein_coding -38125,Slc22a5,ENSMUSG00000018900,protein_coding -38134,P4ha2,ENSMUSG00000018906,protein_coding -38628,Alox12e,ENSMUSG00000018907,protein_coding -25283,Arrb1,ENSMUSG00000018909,protein_coding -38140,Il3,ENSMUSG00000018914,protein_coding -38138,Csf2,ENSMUSG00000018916,protein_coding -38639,Tm4sf5,ENSMUSG00000018919,protein_coding -38637,Cxcl16,ENSMUSG00000018920,protein_coding -38631,Pelp1,ENSMUSG00000018921,protein_coding -38636,Med11,ENSMUSG00000018923,protein_coding -38629,Alox15,ENSMUSG00000018924,protein_coding -39090,Heatr9,ENSMUSG00000018925,protein_coding -39095,Ccl6,ENSMUSG00000018927,protein_coding -39098,Ccl4,ENSMUSG00000018930,protein_coding -38364,Natd1,ENSMUSG00000018931,protein_coding -38366,Map2k3,ENSMUSG00000018932,protein_coding -16242,Ywhah,ENSMUSG00000018965,protein_coding -39393,Hoxb1,ENSMUSG00000018973,protein_coding -17760,Sart3,ENSMUSG00000018974,protein_coding -15023,E2f2,ENSMUSG00000018983,protein_coding -39075,Slfn3,ENSMUSG00000018986,protein_coding -25216,Nars2,ENSMUSG00000018995,protein_coding -19300,Slc35b4,ENSMUSG00000018999,protein_coding -32497,Dnah1,ENSMUSG00000019027,protein_coding -36872,Dalrd3,ENSMUSG00000019039,protein_coding -18414,Fis1,ENSMUSG00000019054,protein_coding -15386,Plod1,ENSMUSG00000019055,protein_coding -34811,Rab3d,ENSMUSG00000019066,protein_coding -46275,Mfsd3,ENSMUSG00000019080,protein_coding -26588,Slc25a22,ENSMUSG00000019082,protein_coding -8378,Atp6ap1,ENSMUSG00000019087,protein_coding -8374,Dnase1l1,ENSMUSG00000019088,protein_coding -38372,Aldh3a1,ENSMUSG00000019102,protein_coding -39093,Ccl9,ENSMUSG00000019122,protein_coding -19829,Scrn1,ENSMUSG00000019124,protein_coding -40903,BC005537,ENSMUSG00000019132,protein_coding -30607,Isyna1,ENSMUSG00000019139,protein_coding -53141,Hars2,ENSMUSG00000019143,protein_coding -46332,Cacng2,ENSMUSG00000019146,protein_coding -22667,Tmem160,ENSMUSG00000019158,protein_coding -39640,Rab5c,ENSMUSG00000019173,protein_coding -18377,Styxl1,ENSMUSG00000019178,protein_coding -18378,Mdh2,ENSMUSG00000019179,protein_coding -6495,H13,ENSMUSG00000019188,protein_coding -37885,Rnf145,ENSMUSG00000019189,protein_coding -23427,Scn1b,ENSMUSG00000019194,protein_coding -21219,Atp6v1e1,ENSMUSG00000019210,protein_coding -49370,Chtf18,ENSMUSG00000019214,protein_coding -2329,Lhx9,ENSMUSG00000019230,protein_coding -12298,Etnppl,ENSMUSG00000019232,protein_coding -44206,Rps6kl1,ENSMUSG00000019235,protein_coding -22108,Ppp1r12c,ENSMUSG00000019254,protein_coding -43407,Ahr,ENSMUSG00000019256,protein_coding -30636,Map1s,ENSMUSG00000019261,protein_coding -31741,Dpep1,ENSMUSG00000019278,protein_coding -16260,Tmem129,ENSMUSG00000019295,protein_coding -33497,Nop9,ENSMUSG00000019297,protein_coding -39652,Hsd17b1,ENSMUSG00000019301,protein_coding -39649,Atp6v0a1,ENSMUSG00000019302,protein_coding -39655,Psmc3ip,ENSMUSG00000019303,protein_coding -39491,Grb7,ENSMUSG00000019312,protein_coding -49307,Noxo1,ENSMUSG00000019320,protein_coding -39674,Aoc3,ENSMUSG00000019326,protein_coding -11448,Zfp687,ENSMUSG00000019338,protein_coding -8753,Gdpd2,ENSMUSG00000019359,protein_coding -30927,D8Ertd738e,ENSMUSG00000019362,protein_coding -37343,Sec14l4,ENSMUSG00000019368,protein_coding -22693,Calm3,ENSMUSG00000019370,protein_coding -38307,Cops3,ENSMUSG00000019373,protein_coding -30605,Fkbp8,ENSMUSG00000019428,protein_coding -23405,Ffar3,ENSMUSG00000019429,protein_coding -49902,Ddx39b,ENSMUSG00000019432,protein_coding -30884,Gipc1,ENSMUSG00000019433,protein_coding -38930,Tlcd1,ENSMUSG00000019437,protein_coding -38590,Plscr3,ENSMUSG00000019461,protein_coding -30886,Ptger1,ENSMUSG00000019464,protein_coding -29146,Arhgef25,ENSMUSG00000019467,protein_coding -29384,Xab2,ENSMUSG00000019470,protein_coding -34775,Cdc37,ENSMUSG00000019471,protein_coding -31820,Rab4a,ENSMUSG00000019478,protein_coding -50565,Trip10,ENSMUSG00000019487,protein_coding -50560,Cd70,ENSMUSG00000019489,protein_coding -18492,Cops6,ENSMUSG00000019494,protein_coding -38409,Ubb,ENSMUSG00000019505,protein_coding -18497,Ap4m1,ENSMUSG00000019518,protein_coding -10077,Gyg,ENSMUSG00000019528,protein_coding -23966,Rcn3,ENSMUSG00000019539,protein_coding -8342,Slc6a8,ENSMUSG00000019558,protein_coding -28139,Arid3a,ENSMUSG00000019564,protein_coding -18942,Pdk4,ENSMUSG00000019577,protein_coding -50502,Ubxn6,ENSMUSG00000019578,protein_coding -50510,Mydgf,ENSMUSG00000019579,protein_coding -39838,Cyb561,ENSMUSG00000019590,protein_coding -53403,Sema6a,ENSMUSG00000019647,protein_coding -36964,Ccdc12,ENSMUSG00000019659,protein_coding -19370,Fmc1,ENSMUSG00000019689,protein_coding -3083,Akt3,ENSMUSG00000019699,protein_coding -11083,Mrpl24,ENSMUSG00000019710,protein_coding -4132,Gle1,ENSMUSG00000019715,protein_coding -43925,L3hypdh,ENSMUSG00000019718,protein_coding -40513,Lyst,ENSMUSG00000019726,protein_coding -30715,Slc35e1,ENSMUSG00000019731,protein_coding -30710,Calr3,ENSMUSG00000019732,protein_coding -22063,Tmc4,ENSMUSG00000019734,protein_coding -23354,Syne4,ENSMUSG00000019737,protein_coding -23346,Polr2i,ENSMUSG00000019738,protein_coding -40949,Prl8a1,ENSMUSG00000019756,protein_coding -39532,Krt10,ENSMUSG00000019761,protein_coding -26747,Iyd,ENSMUSG00000019762,protein_coding -26760,Rmnd1,ENSMUSG00000019763,protein_coding -26763,Ccdc170,ENSMUSG00000019767,protein_coding -26766,Esr1,ENSMUSG00000019768,protein_coding -26773,Vip,ENSMUSG00000019772,protein_coding -26775,Fbxo5,ENSMUSG00000019773,protein_coding -26776,Mtrf1l,ENSMUSG00000019774,protein_coding -26777,Rgs17,ENSMUSG00000019775,protein_coding -27290,Hdac2,ENSMUSG00000019777,protein_coding -27275,Frk,ENSMUSG00000019779,protein_coding -27258,Rwdd1,ENSMUSG00000019782,protein_coding -27242,Clvs2,ENSMUSG00000019785,protein_coding -27232,Trdn,ENSMUSG00000019787,protein_coding -27203,Hey2,ENSMUSG00000019789,protein_coding -26832,Stxbp5,ENSMUSG00000019790,protein_coding -27197,Hint3,ENSMUSG00000019791,protein_coding -27195,Trmt11,ENSMUSG00000019792,protein_coding -26795,Katna1,ENSMUSG00000019794,protein_coding -26792,Pcmt1,ENSMUSG00000019795,protein_coding -26790,Lrp11,ENSMUSG00000019796,protein_coding -27423,1700021F05Rik,ENSMUSG00000019797,protein_coding -27402,Sec63,ENSMUSG00000019802,protein_coding -27398,Nr2e1,ENSMUSG00000019803,protein_coding -27397,Snx3,ENSMUSG00000019804,protein_coding -26897,Aig1,ENSMUSG00000019806,protein_coding -26895,Adat2,ENSMUSG00000019808,protein_coding -26894,Pex3,ENSMUSG00000019809,protein_coding -26893,Fuca2,ENSMUSG00000019810,protein_coding -27384,Cep57l1,ENSMUSG00000019813,protein_coding -26890,Ltv1,ENSMUSG00000019814,protein_coding -26889,Zc2hc1b,ENSMUSG00000019815,protein_coding -26887,Plagl1,ENSMUSG00000019817,protein_coding -27378,Cd164,ENSMUSG00000019818,protein_coding -26879,Utrn,ENSMUSG00000019820,protein_coding -27376,Smpd2,ENSMUSG00000019822,protein_coding -27375,Mical1,ENSMUSG00000019823,protein_coding -27374,Zbtb24,ENSMUSG00000019826,protein_coding -26853,Grm1,ENSMUSG00000019828,protein_coding -27360,Wasf1,ENSMUSG00000019831,protein_coding -26849,Rab32,ENSMUSG00000019832,protein_coding -27355,Slc22a16,ENSMUSG00000019834,protein_coding -27344,Gtf3c6,ENSMUSG00000019837,protein_coding -27338,Slc16a10,ENSMUSG00000019838,protein_coding -27324,Rev3l,ENSMUSG00000019841,protein_coding -27320,Traf3ip2,ENSMUSG00000019842,protein_coding -27316,Fyn,ENSMUSG00000019843,protein_coding -27310,Tube1,ENSMUSG00000019845,protein_coding -27308,Lama4,ENSMUSG00000019846,protein_coding -27465,Popdc3,ENSMUSG00000019848,protein_coding -27463,Prep,ENSMUSG00000019849,protein_coding -26972,Tnfaip3,ENSMUSG00000019850,protein_coding -26970,Perp,ENSMUSG00000019851,protein_coding -26966,Arfgef3,ENSMUSG00000019852,protein_coding -26964,Hebp2,ENSMUSG00000019853,protein_coding -26957,Reps1,ENSMUSG00000019854,protein_coding -27604,Fam184a,ENSMUSG00000019856,protein_coding -27602,Asf1a,ENSMUSG00000019857,protein_coding -27563,Gopc,ENSMUSG00000019861,protein_coding -27434,Qrsl1,ENSMUSG00000019863,protein_coding -27435,Rtn4ip1,ENSMUSG00000019864,protein_coding -26919,Nmbr,ENSMUSG00000019865,protein_coding -27436,Crybg1,ENSMUSG00000019866,protein_coding -26920,Gje1,ENSMUSG00000019867,protein_coding -26917,Vta1,ENSMUSG00000019868,protein_coding -27641,Smpdl3a,ENSMUSG00000019872,protein_coding -27808,Reep3,ENSMUSG00000019873,protein_coding -27640,Fabp7,ENSMUSG00000019874,protein_coding -27638,Pkib,ENSMUSG00000019876,protein_coding -27636,Serinc1,ENSMUSG00000019877,protein_coding -27635,Hsf2,ENSMUSG00000019878,protein_coding -27180,Rspo3,ENSMUSG00000019880,protein_coding -27172,Echdc1,ENSMUSG00000019883,protein_coding -28721,Mgat4c,ENSMUSG00000019888,protein_coding -27156,Ptprk,ENSMUSG00000019889,protein_coding -28729,Nts,ENSMUSG00000019890,protein_coding -27562,Dcbld1,ENSMUSG00000019891,protein_coding -28736,Lrriq1,ENSMUSG00000019892,protein_coding -27559,Ros1,ENSMUSG00000019893,protein_coding -28739,Slc6a15,ENSMUSG00000019894,protein_coding -28779,Ccdc59,ENSMUSG00000019897,protein_coding -27143,Lama2,ENSMUSG00000019899,protein_coding -27550,Rfx6,ENSMUSG00000019900,protein_coding -27548,Gprc6a,ENSMUSG00000019905,protein_coding -28799,Lin7a,ENSMUSG00000019906,protein_coding -28808,Ppp1r12a,ENSMUSG00000019907,protein_coding -27547,Fam162b,ENSMUSG00000019909,protein_coding -27534,Sim1,ENSMUSG00000019913,protein_coding -27664,P4ha1,ENSMUSG00000019916,protein_coding -27661,Sept10,ENSMUSG00000019917,protein_coding -27648,Lims1,ENSMUSG00000019920,protein_coding -27903,Zwint,ENSMUSG00000019923,protein_coding -27881,Ube2d1,ENSMUSG00000019927,protein_coding -28646,Dcn,ENSMUSG00000019929,protein_coding -28649,Kera,ENSMUSG00000019932,protein_coding -27853,Mrln,ENSMUSG00000019933,protein_coding -28481,Slc17a8,ENSMUSG00000019935,protein_coding -28650,Epyc,ENSMUSG00000019936,protein_coding -27847,Cdk1,ENSMUSG00000019942,protein_coding -28661,Atp2b1,ENSMUSG00000019943,protein_coding -27844,Rhobtb1,ENSMUSG00000019944,protein_coding -27835,Cabcoco1,ENSMUSG00000019945,protein_coding -27832,Arid5b,ENSMUSG00000019947,protein_coding -28484,Actr6,ENSMUSG00000019948,protein_coding -28485,Uhrf1bp1l,ENSMUSG00000019951,protein_coding -28664,Poc1b,ENSMUSG00000019952,protein_coding -28671,Dusp6,ENSMUSG00000019960,protein_coding -28500,Tmpo,ENSMUSG00000019961,protein_coding -28687,Kitl,ENSMUSG00000019966,protein_coding -44148,Psen1,ENSMUSG00000019969,protein_coding -27030,Sgk1,ENSMUSG00000019970,protein_coding -28713,Cep290,ENSMUSG00000019971,protein_coding -28497,Ikbip,ENSMUSG00000019975,protein_coding -27018,Hbs1l,ENSMUSG00000019977,protein_coding -27113,Epb41l2,ENSMUSG00000019978,protein_coding -28495,Apaf1,ENSMUSG00000019979,protein_coding -27012,Myb,ENSMUSG00000019982,protein_coding -27100,Med23,ENSMUSG00000019984,protein_coding -27008,Ahi1,ENSMUSG00000019986,protein_coding -27103,Arg1,ENSMUSG00000019987,protein_coding -28532,Nedd1,ENSMUSG00000019988,protein_coding -27099,Enpp3,ENSMUSG00000019989,protein_coding -27003,Pde7b,ENSMUSG00000019990,protein_coding -27001,Mtfr2,ENSMUSG00000019992,protein_coding -26993,Map7,ENSMUSG00000019996,protein_coding -27097,Ccn2,ENSMUSG00000019997,protein_coding -27091,Stx7,ENSMUSG00000019998,protein_coding -27092,Moxd1,ENSMUSG00000020000,protein_coding -26990,Pex7,ENSMUSG00000020003,protein_coding -26986,Il20ra,ENSMUSG00000020007,protein_coding -26983,Ifngr1,ENSMUSG00000020009,protein_coding -27070,Vnn3,ENSMUSG00000020010,protein_coding -28536,Cfap54,ENSMUSG00000020014,protein_coding -28538,Cdk17,ENSMUSG00000020015,protein_coding -28546,Hal,ENSMUSG00000020017,protein_coding -28549,Snrpf,ENSMUSG00000020018,protein_coding -28552,Ntn4,ENSMUSG00000020019,protein_coding -28558,Usp44,ENSMUSG00000020020,protein_coding -28567,Fgd6,ENSMUSG00000020021,protein_coding -28572,Ndufa12,ENSMUSG00000020022,protein_coding -28574,Tmcc3,ENSMUSG00000020023,protein_coding -28582,Cep83,ENSMUSG00000020024,protein_coding -28597,Socs2,ENSMUSG00000020027,protein_coding -28604,Nudt4,ENSMUSG00000020029,protein_coding -28346,Nuak1,ENSMUSG00000020032,protein_coding -28354,Tcp11l2,ENSMUSG00000020034,protein_coding -28358,Rfx4,ENSMUSG00000020037,protein_coding -28364,Cry1,ENSMUSG00000020038,protein_coding -28369,Btbd11,ENSMUSG00000020042,protein_coding -28397,Timp3,ENSMUSG00000020044,protein_coding -28421,Hsp90b1,ENSMUSG00000020048,protein_coding -28440,Pah,ENSMUSG00000020051,protein_coding -28438,Ascl1,ENSMUSG00000020052,protein_coding -28443,Igf1,ENSMUSG00000020053,protein_coding -28454,Washc3,ENSMUSG00000020056,protein_coding -28457,Dram1,ENSMUSG00000020057,protein_coding -28462,Sycp3,ENSMUSG00000020059,protein_coding -28463,Mybpc1,ENSMUSG00000020061,protein_coding -28476,Slc5a8,ENSMUSG00000020062,protein_coding -27778,Sirt1,ENSMUSG00000020063,protein_coding -27774,Herc4,ENSMUSG00000020064,protein_coding -27772,Mypn,ENSMUSG00000020067,protein_coding -27765,Hnrnph3,ENSMUSG00000020069,protein_coding -27763,Rufy2,ENSMUSG00000020070,protein_coding -27766,Pbld2,ENSMUSG00000020072,protein_coding -27751,Ccar1,ENSMUSG00000020074,protein_coding -27744,Ddx21,ENSMUSG00000020075,protein_coding -27746,Ddx50,ENSMUSG00000020076,protein_coding -27741,Srgn,ENSMUSG00000020077,protein_coding -27740,Vps26a,ENSMUSG00000020078,protein_coding -27738,Supv3l1,ENSMUSG00000020079,protein_coding -27737,Hkdc1,ENSMUSG00000020080,protein_coding -27734,Tacr2,ENSMUSG00000020081,protein_coding -27730,Fam241b,ENSMUSG00000020083,protein_coding -27723,Aifm2,ENSMUSG00000020085,protein_coding -27724,H2afy2,ENSMUSG00000020086,protein_coding -27722,Tysnd1,ENSMUSG00000020087,protein_coding -27721,Sar1a,ENSMUSG00000020088,protein_coding -27719,Ppa1,ENSMUSG00000020089,protein_coding -27718,Npffr1,ENSMUSG00000020090,protein_coding -27709,Eif4ebp2,ENSMUSG00000020091,protein_coding -27705,Pald1,ENSMUSG00000020092,protein_coding -27701,Tbata,ENSMUSG00000020096,protein_coding -27699,Sgpl1,ENSMUSG00000020097,protein_coding -27698,Pcbd1,ENSMUSG00000020098,protein_coding -27689,Unc5b,ENSMUSG00000020099,protein_coding -27688,Slc29a3,ENSMUSG00000020100,protein_coding -27686,Vsir,ENSMUSG00000020101,protein_coding -29108,Slc16a7,ENSMUSG00000020102,protein_coding -29112,Lrig3,ENSMUSG00000020105,protein_coding -27678,Anapc16,ENSMUSG00000020107,protein_coding -27677,Ddit4,ENSMUSG00000020108,protein_coding -27676,Dnajb12,ENSMUSG00000020109,protein_coding -27670,Micu1,ENSMUSG00000020111,protein_coding -29005,Cand1,ENSMUSG00000020114,protein_coding -29064,Tbk1,ENSMUSG00000020115,protein_coding -37531,Pno1,ENSMUSG00000020116,protein_coding -37525,Plek,ENSMUSG00000020120,protein_coding -29070,Srgap1,ENSMUSG00000020121,protein_coding -37521,Egfr,ENSMUSG00000020122,protein_coding -29081,Avpr1a,ENSMUSG00000020123,protein_coding -29093,Usp15,ENSMUSG00000020124,protein_coding -28131,Elane,ENSMUSG00000020125,protein_coding -37591,Vps54,ENSMUSG00000020128,protein_coding -28900,Tbc1d15,ENSMUSG00000020130,protein_coding -28166,Pcsk4,ENSMUSG00000020131,protein_coding -28903,Rab21,ENSMUSG00000020132,protein_coding -28165,2310011J03Rik,ENSMUSG00000020133,protein_coding -37586,Peli1,ENSMUSG00000020134,protein_coding -28164,Apc2,ENSMUSG00000020135,protein_coding -28906,Thap2,ENSMUSG00000020137,protein_coding -28909,Lgr5,ENSMUSG00000020140,protein_coding -37574,Slc1a4,ENSMUSG00000020142,protein_coding -37802,Dock2,ENSMUSG00000020143,protein_coding -37571,Rab1a,ENSMUSG00000020149,protein_coding -28159,Gamt,ENSMUSG00000020150,protein_coding -28914,Ptprr,ENSMUSG00000020151,protein_coding -37567,Actr2,ENSMUSG00000020152,protein_coding -28158,Ndufs7,ENSMUSG00000020153,protein_coding -28916,Ptprb,ENSMUSG00000020154,protein_coding -37797,Kcnmb1,ENSMUSG00000020155,protein_coding -28157,Pwwp3a,ENSMUSG00000020156,protein_coding -37792,Gabrp,ENSMUSG00000020159,protein_coding -37551,Meis1,ENSMUSG00000020160,protein_coding -28176,Uqcr11,ENSMUSG00000020163,protein_coding -28922,Cnot2,ENSMUSG00000020166,protein_coding -28177,Tcf3,ENSMUSG00000020167,protein_coding -25061,Olfr299,ENSMUSG00000020168,protein_coding -28937,Best3,ENSMUSG00000020169,protein_coding -28941,Frs2,ENSMUSG00000020170,protein_coding -28944,Yeats4,ENSMUSG00000020171,protein_coding -37494,Cobl,ENSMUSG00000020173,protein_coding -27920,Rab36,ENSMUSG00000020175,protein_coding -37491,Grb10,ENSMUSG00000020176,protein_coding -28946,9530003J23Rik,ENSMUSG00000020177,protein_coding -27924,Adora2a,ENSMUSG00000020178,protein_coding -27931,Snrpd3,ENSMUSG00000020180,protein_coding -28823,Nav3,ENSMUSG00000020181,protein_coding -37488,Ddc,ENSMUSG00000020182,protein_coding -28959,Cpm,ENSMUSG00000020183,protein_coding -28961,Mdm2,ENSMUSG00000020184,protein_coding -28832,E2f7,ENSMUSG00000020185,protein_coding -28837,Csrp2,ENSMUSG00000020186,protein_coding -28843,Osbpl8,ENSMUSG00000020189,protein_coding -28193,Mknk2,ENSMUSG00000020190,protein_coding -37481,Spata48,ENSMUSG00000020191,protein_coding -37477,Zpbp,ENSMUSG00000020193,protein_coding -27939,Cabin1,ENSMUSG00000020196,protein_coding -28197,Ap3d1,ENSMUSG00000020198,protein_coding -28850,Phlda1,ENSMUSG00000020205,protein_coding -28201,Sf3a2,ENSMUSG00000020211,protein_coding -28971,Mdm1,ENSMUSG00000020212,protein_coding -28866,Glipr1l1,ENSMUSG00000020213,protein_coding -28867,Glipr1l2,ENSMUSG00000020214,protein_coding -28203,Jsrp1,ENSMUSG00000020216,protein_coding -29047,Wif1,ENSMUSG00000020218,protein_coding -28210,Timm13,ENSMUSG00000020219,protein_coding -15285,Vps13d,ENSMUSG00000020220,protein_coding -29024,Llph,ENSMUSG00000020224,protein_coding -29023,Tmbim4,ENSMUSG00000020225,protein_coding -27961,Slc5a4b,ENSMUSG00000020226,protein_coding -29021,Irak3,ENSMUSG00000020227,protein_coding -29020,Helb,ENSMUSG00000020228,protein_coding -27964,Slc5a4a,ENSMUSG00000020229,protein_coding -27966,Prmt2,ENSMUSG00000020230,protein_coding -27969,Dip2a,ENSMUSG00000020231,protein_coding -28244,Hmg20b,ENSMUSG00000020232,protein_coding -28246,4930404N11Rik,ENSMUSG00000020234,protein_coding -28247,Fzr1,ENSMUSG00000020235,protein_coding -28258,Ncln,ENSMUSG00000020238,protein_coding -27982,Col6a2,ENSMUSG00000020241,protein_coding -28315,Hcfc2,ENSMUSG00000020246,protein_coding -28316,Nfyb,ENSMUSG00000020248,protein_coding -28320,Txnrd1,ENSMUSG00000020250,protein_coding -28314,Glt8d2,ENSMUSG00000020251,protein_coding -36733,Ppm1m,ENSMUSG00000020253,protein_coding -28332,D10Wsu102e,ENSMUSG00000020255,protein_coding -28333,Aldh1l2,ENSMUSG00000020256,protein_coding -36731,Wdr82,ENSMUSG00000020257,protein_coding -36729,Glyctk,ENSMUSG00000020258,protein_coding -27995,Pofut2,ENSMUSG00000020260,protein_coding -38169,Slc36a1,ENSMUSG00000020261,protein_coding -27996,Adarb1,ENSMUSG00000020262,protein_coding -28335,Appl2,ENSMUSG00000020263,protein_coding -38167,Slc36a2,ENSMUSG00000020264,protein_coding -28002,Sumo3,ENSMUSG00000020265,protein_coding -38156,Hint1,ENSMUSG00000020267,protein_coding -38154,Lyrm7,ENSMUSG00000020268,protein_coding -37776,Smim23,ENSMUSG00000020270,protein_coding -37773,Fbxw11,ENSMUSG00000020271,protein_coding -37772,Stk10,ENSMUSG00000020272,protein_coding -37649,Papolg,ENSMUSG00000020273,protein_coding -37647,Rel,ENSMUSG00000020275,protein_coding -28041,Pfkl,ENSMUSG00000020277,protein_coding -37759,Il9r,ENSMUSG00000020279,protein_coding -37646,Pus10,ENSMUSG00000020280,protein_coding -37761,Rhbdf1,ENSMUSG00000020282,protein_coding -37645,Pex13,ENSMUSG00000020283,protein_coding -28040,1810043G02Rik,ENSMUSG00000020284,protein_coding -37641,1700093K21Rik,ENSMUSG00000020286,protein_coding -37763,Mpg,ENSMUSG00000020287,protein_coding -37638,Ahsa2,ENSMUSG00000020288,protein_coding -37762,Nprl3,ENSMUSG00000020289,protein_coding -37635,Xpo1,ENSMUSG00000020290,protein_coding -37768,Hbq1a,ENSMUSG00000020295,protein_coding -37756,Nsg2,ENSMUSG00000020297,protein_coding -37755,4930524B15Rik,ENSMUSG00000020299,protein_coding -37754,Cpeb4,ENSMUSG00000020300,protein_coding -37749,Stc2,ENSMUSG00000020303,protein_coding -37741,Asb3,ENSMUSG00000020305,protein_coding -28112,Cdc34,ENSMUSG00000020307,protein_coding -28111,Tpgs1,ENSMUSG00000020308,protein_coding -37744,Chac2,ENSMUSG00000020309,protein_coding -28110,Madcam1,ENSMUSG00000020310,protein_coding -37743,Erlec1,ENSMUSG00000020311,protein_coding -28107,Shc2,ENSMUSG00000020312,protein_coding -37727,Sptbn1,ENSMUSG00000020315,protein_coding -28105,Theg,ENSMUSG00000020317,protein_coding -37604,Wdpcp,ENSMUSG00000020319,protein_coding -37602,Mdh1,ENSMUSG00000020321,protein_coding -28121,Prss57,ENSMUSG00000020323,protein_coding -28120,Fstl3,ENSMUSG00000020325,protein_coding -37843,Ccng1,ENSMUSG00000020326,protein_coding -28118,Fgf22,ENSMUSG00000020327,protein_coding -37842,Nudcd2,ENSMUSG00000020328,protein_coding -28117,Polrmt,ENSMUSG00000020329,protein_coding -37841,Hmmr,ENSMUSG00000020330,protein_coding -28116,Hcn2,ENSMUSG00000020331,protein_coding -38143,Meikin,ENSMUSG00000020332,protein_coding -38141,Acsl6,ENSMUSG00000020333,protein_coding -38132,Slc22a4,ENSMUSG00000020334,protein_coding -38032,Zfp354b,ENSMUSG00000020335,protein_coding -37913,Cyfip2,ENSMUSG00000020340,protein_coding -37972,Mgat1,ENSMUSG00000020346,protein_coding -38074,Ppp2ca,ENSMUSG00000020349,protein_coding -37936,Sgcd,ENSMUSG00000020354,protein_coding -37991,Flt4,ENSMUSG00000020357,protein_coding -38056,Hnrnpab,ENSMUSG00000020358,protein_coding -38055,Phykpl,ENSMUSG00000020359,protein_coding -38094,Hspa4,ENSMUSG00000020361,protein_coding -37993,Cnot6,ENSMUSG00000020362,protein_coding -37998,Gfpt2,ENSMUSG00000020363,protein_coding -38043,Zfp354a,ENSMUSG00000020364,protein_coding -37999,Mapk9,ENSMUSG00000020366,protein_coding -38014,Canx,ENSMUSG00000020368,protein_coding -37946,Rack1,ENSMUSG00000020372,protein_coding -38001,Rasgef1c,ENSMUSG00000020374,protein_coding -38019,Rufy1,ENSMUSG00000020375,protein_coding -38002,Rnf130,ENSMUSG00000020376,protein_coding -38011,Ltc4s,ENSMUSG00000020377,protein_coding -38116,Rad50,ENSMUSG00000020380,protein_coding -38007,Mrnip,ENSMUSG00000020381,protein_coding -38114,Il13,ENSMUSG00000020383,protein_coding -38052,Clk4,ENSMUSG00000020385,protein_coding -38064,Sar1b,ENSMUSG00000020386,protein_coding -38065,Jade2,ENSMUSG00000020387,protein_coding -38133,Pdlim4,ENSMUSG00000020388,protein_coding -38072,Cdkl3,ENSMUSG00000020389,protein_coding -38070,Ube2b,ENSMUSG00000020390,protein_coding -38069,Cdkn2aipnl,ENSMUSG00000020392,protein_coding -37384,Kremen1,ENSMUSG00000020393,protein_coding -37916,Itk,ENSMUSG00000020395,protein_coding -37375,Nefh,ENSMUSG00000020396,protein_coding -37919,Med7,ENSMUSG00000020397,protein_coding -37922,Havcr2,ENSMUSG00000020399,protein_coding -38158,Tnip1,ENSMUSG00000020400,protein_coding -37918,Fam71b,ENSMUSG00000020401,protein_coding -38086,Vdac1,ENSMUSG00000020402,protein_coding -37872,Fabp6,ENSMUSG00000020405,protein_coding -37468,Upp1,ENSMUSG00000020407,protein_coding -37866,Slu7,ENSMUSG00000020409,protein_coding -37911,Nipal4,ENSMUSG00000020411,protein_coding -37366,Ascc2,ENSMUSG00000020412,protein_coding -37465,Hus1,ENSMUSG00000020413,protein_coding -37865,Pttg1,ENSMUSG00000020415,protein_coding -37361,Hormad2,ENSMUSG00000020419,protein_coding -23217,Zfp607a,ENSMUSG00000020420,protein_coding -37461,Tns3,ENSMUSG00000020422,protein_coding -2197,Btg2,ENSMUSG00000020423,protein_coding -37354,Castor1,ENSMUSG00000020424,protein_coding -37454,Igfbp3,ENSMUSG00000020427,protein_coding -37854,Gabra6,ENSMUSG00000020428,protein_coding -37453,Igfbp1,ENSMUSG00000020429,protein_coding -37340,Pes1,ENSMUSG00000020430,protein_coding -37451,Adcy1,ENSMUSG00000020431,protein_coding -37337,Tcn2,ENSMUSG00000020432,protein_coding -37334,4921536K21Rik,ENSMUSG00000020434,protein_coding -37331,Osbp2,ENSMUSG00000020435,protein_coding -37852,Gabrg2,ENSMUSG00000020436,protein_coding -37430,Myo1g,ENSMUSG00000020437,protein_coding -37317,Smtn,ENSMUSG00000020439,protein_coding -19181,Arf5,ENSMUSG00000020440,protein_coding -38280,2310033P09Rik,ENSMUSG00000020441,protein_coding -38276,Guk1,ENSMUSG00000020444,protein_coding -37414,Npc1l1,ENSMUSG00000020447,protein_coding -37310,Rnf185,ENSMUSG00000020448,protein_coding -37308,Limk2,ENSMUSG00000020451,protein_coding -37304,Patz1,ENSMUSG00000020453,protein_coding -37303,Eif4enif1,ENSMUSG00000020454,protein_coding -38271,Trim11,ENSMUSG00000020455,protein_coding -37419,Ogdh,ENSMUSG00000020456,protein_coding -37300,Drg1,ENSMUSG00000020457,protein_coding -37721,Rtn4,ENSMUSG00000020458,protein_coding -37713,Mtif2,ENSMUSG00000020459,protein_coding -37714,Rps27a,ENSMUSG00000020460,protein_coding -37715,Clhc1,ENSMUSG00000020461,protein_coding -37705,Cfap36,ENSMUSG00000020462,protein_coding -37704,Ppp4r3b,ENSMUSG00000020463,protein_coding -37702,Pnpt1,ENSMUSG00000020464,protein_coding -37700,Efemp1,ENSMUSG00000020467,protein_coding -37405,Myl7,ENSMUSG00000020469,protein_coding -37404,Pold2,ENSMUSG00000020471,protein_coding -38292,Zkscan17,ENSMUSG00000020472,protein_coding -37403,Aebp1,ENSMUSG00000020473,protein_coding -37402,Polm,ENSMUSG00000020474,protein_coding -37400,Pgam2,ENSMUSG00000020475,protein_coding -37399,Dbnl,ENSMUSG00000020476,protein_coding -37396,Mrps24,ENSMUSG00000020477,protein_coding -37393,Ankrd36,ENSMUSG00000020481,protein_coding -37391,Ccdc117,ENSMUSG00000020482,protein_coding -39223,Dynll2,ENSMUSG00000020483,protein_coding -37390,Xbp1,ENSMUSG00000020484,protein_coding -39208,Supt4a,ENSMUSG00000020485,protein_coding -39202,Sept4,ENSMUSG00000020486,protein_coding -38262,Btnl10,ENSMUSG00000020490,protein_coding -38258,2810021J22Rik,ENSMUSG00000020491,protein_coding -39180,Ska2,ENSMUSG00000020492,protein_coding -39179,Prr11,ENSMUSG00000020493,protein_coding -39177,Smg8,ENSMUSG00000020495,protein_coding -38263,Rnf187,ENSMUSG00000020496,protein_coding -39166,Tubd1,ENSMUSG00000020513,protein_coding -38210,Mrpl22,ENSMUSG00000020514,protein_coding -38207,Cnot8,ENSMUSG00000020515,protein_coding -39165,Rps6kb1,ENSMUSG00000020516,protein_coding -38198,Sap30l,ENSMUSG00000020519,protein_coding -38193,Galnt10,ENSMUSG00000020520,protein_coding -39164,Rnft1,ENSMUSG00000020521,protein_coding -38190,Mfap3,ENSMUSG00000020522,protein_coding -38189,Fam114a2,ENSMUSG00000020523,protein_coding -38187,Gria1,ENSMUSG00000020524,protein_coding -39144,Ppm1d,ENSMUSG00000020525,protein_coding -39132,Znhit3,ENSMUSG00000020526,protein_coding -39130,Myo19,ENSMUSG00000020527,protein_coding -38389,Prpsap2,ENSMUSG00000020528,protein_coding -39128,Ggnbp2,ENSMUSG00000020530,protein_coding -39117,Acaca,ENSMUSG00000020532,protein_coding -38357,Shmt1,ENSMUSG00000020534,protein_coding -38350,Llgl1,ENSMUSG00000020536,protein_coding -38330,Drg2,ENSMUSG00000020537,protein_coding -38318,Srebf1,ENSMUSG00000020538,protein_coding -39271,Tom1l1,ENSMUSG00000020541,protein_coding -38460,Myocd,ENSMUSG00000020542,protein_coding -39270,Cox11,ENSMUSG00000020544,protein_coding -43425,Lrrc72,ENSMUSG00000020545,protein_coding -39267,Stxbp4,ENSMUSG00000020546,protein_coding -43420,Bzw2,ENSMUSG00000020547,protein_coding -38458,1700086D15Rik,ENSMUSG00000020548,protein_coding -38454,Elac2,ENSMUSG00000020549,protein_coding -39259,Pctp,ENSMUSG00000020553,protein_coding -43387,Twistnb,ENSMUSG00000020561,protein_coding -43385,Efcab10,ENSMUSG00000020562,protein_coding -43382,Atxn7l1,ENSMUSG00000020564,protein_coding -43042,Atp6v1c2,ENSMUSG00000020566,protein_coding -43376,Sypl,ENSMUSG00000020570,protein_coding -43041,Pdia6,ENSMUSG00000020571,protein_coding -43369,Nampt,ENSMUSG00000020572,protein_coding -43360,Pik3cg,ENSMUSG00000020573,protein_coding -42981,Nbas,ENSMUSG00000020576,protein_coding -43418,Tspan13,ENSMUSG00000020577,protein_coding -43033,Rock2,ENSMUSG00000020580,protein_coding -43417,Agr2,ENSMUSG00000020581,protein_coding -42911,Matn3,ENSMUSG00000020583,protein_coding -42910,Laptm4a,ENSMUSG00000020585,protein_coding -42962,Fam49a,ENSMUSG00000020589,protein_coding -43401,Snx13,ENSMUSG00000020590,protein_coding -43026,Ntsr2,ENSMUSG00000020591,protein_coding -42907,Sdc1,ENSMUSG00000020592,protein_coding -43025,Lpin1,ENSMUSG00000020593,protein_coding -42905,Pum2,ENSMUSG00000020594,protein_coding -43516,Nrcam,ENSMUSG00000020598,protein_coding -39928,Rgs9,ENSMUSG00000020599,protein_coding -42904,Slc7a15,ENSMUSG00000020600,protein_coding -43013,Trib2,ENSMUSG00000020601,protein_coding -39937,Arsg,ENSMUSG00000020604,protein_coding -42897,Hs1bp3,ENSMUSG00000020605,protein_coding -42997,Fam84a,ENSMUSG00000020607,protein_coding -42944,Smc6,ENSMUSG00000020608,protein_coding -42892,Apob,ENSMUSG00000020609,protein_coding -39933,Amz2,ENSMUSG00000020610,protein_coding -39931,Gna13,ENSMUSG00000020611,protein_coding -39941,Prkar1a,ENSMUSG00000020612,protein_coding -39942,Fam20a,ENSMUSG00000020614,protein_coding -39944,1700012B07Rik,ENSMUSG00000020617,protein_coding -39947,Abca8b,ENSMUSG00000020620,protein_coding -42931,Rdh14,ENSMUSG00000020621,protein_coding -42930,Nt5c1b,ENSMUSG00000020622,protein_coding -39953,Map2k6,ENSMUSG00000020623,protein_coding -42873,Klhl29,ENSMUSG00000020627,protein_coding -43300,Trappc12,ENSMUSG00000020628,protein_coding -43298,Adi1,ENSMUSG00000020629,protein_coding -43297,Rnaseh1,ENSMUSG00000020630,protein_coding -43292,Dcdc2c,ENSMUSG00000020633,protein_coding -42869,Ubxn2a,ENSMUSG00000020634,protein_coding -42866,Fkbp1b,ENSMUSG00000020635,protein_coding -43294,Allc,ENSMUSG00000020636,protein_coding -43276,Cmpk2,ENSMUSG00000020638,protein_coding -42863,Pfn4,ENSMUSG00000020639,protein_coding -42856,Itsn2,ENSMUSG00000020640,protein_coding -43275,Rsad2,ENSMUSG00000020641,protein_coding -43271,Rnf144a,ENSMUSG00000020642,protein_coding -43257,Id2,ENSMUSG00000020644,protein_coding -43253,Mboat2,ENSMUSG00000020646,protein_coding -42848,Ncoa1,ENSMUSG00000020647,protein_coding -43349,Dus4l,ENSMUSG00000020648,protein_coding -43246,Rrm2,ENSMUSG00000020649,protein_coding -43348,Bcap29,ENSMUSG00000020650,protein_coding -43346,Slc26a4,ENSMUSG00000020651,protein_coding -42842,Cenpo,ENSMUSG00000020652,protein_coding -43244,Klf11,ENSMUSG00000020653,protein_coding -42841,Adcy3,ENSMUSG00000020654,protein_coding -43242,Grhl1,ENSMUSG00000020656,protein_coding -42840,Dnajc27,ENSMUSG00000020657,protein_coding -42837,Efr3b,ENSMUSG00000020658,protein_coding -43343,Cbll1,ENSMUSG00000020659,protein_coding -42836,Pomc,ENSMUSG00000020660,protein_coding -42833,Dnmt3a,ENSMUSG00000020661,protein_coding -43340,Dld,ENSMUSG00000020664,protein_coding -42824,Kif3c,ENSMUSG00000020668,protein_coding -43325,Sh3yl1,ENSMUSG00000020669,protein_coding -42822,Rab10,ENSMUSG00000020671,protein_coding -43316,Sntg2,ENSMUSG00000020672,protein_coding -43314,Tpo,ENSMUSG00000020673,protein_coding -43313,Pxdn,ENSMUSG00000020674,protein_coding -39033,Ccl11,ENSMUSG00000020676,protein_coding -39113,Ddx52,ENSMUSG00000020677,protein_coding -39112,Hnf1b,ENSMUSG00000020679,protein_coding -39089,Taf15,ENSMUSG00000020680,protein_coding -39841,Ace,ENSMUSG00000020681,protein_coding -39088,Mmp28,ENSMUSG00000020682,protein_coding -39085,Rasl10b,ENSMUSG00000020684,protein_coding -39086,Gas2l2,ENSMUSG00000020686,protein_coding -39810,Cdc27,ENSMUSG00000020687,protein_coding -39816,Itgb3,ENSMUSG00000020689,protein_coding -39823,Efcab3,ENSMUSG00000020690,protein_coding -39825,Mettl2,ENSMUSG00000020691,protein_coding -39051,Nle1,ENSMUSG00000020692,protein_coding -39829,Tlk2,ENSMUSG00000020694,protein_coding -39830,Mrc2,ENSMUSG00000020695,protein_coding -39048,Rffl,ENSMUSG00000020696,protein_coding -39047,Lig3,ENSMUSG00000020697,protein_coding -39041,Cct6b,ENSMUSG00000020698,protein_coding -39848,Map3k3,ENSMUSG00000020700,protein_coding -39038,Tmem132e,ENSMUSG00000020701,protein_coding -39036,Ccl1,ENSMUSG00000020702,protein_coding -39027,5530401A14Rik,ENSMUSG00000020703,protein_coding -39021,Asic2,ENSMUSG00000020704,protein_coding -39855,Ddx42,ENSMUSG00000020705,protein_coding -39856,Ftsj3,ENSMUSG00000020706,protein_coding -39008,Rnf135,ENSMUSG00000020707,protein_coding -39857,Psmc5,ENSMUSG00000020708,protein_coding -39006,Adap2,ENSMUSG00000020709,protein_coding -39860,Tcam1,ENSMUSG00000020712,protein_coding -39861,Gh,ENSMUSG00000020713,protein_coding -39866,Ern1,ENSMUSG00000020715,protein_coding -38979,Nf1,ENSMUSG00000020716,protein_coding -39871,Pecam1,ENSMUSG00000020717,protein_coding -39874,Polg2,ENSMUSG00000020718,protein_coding -39875,Ddx5,ENSMUSG00000020719,protein_coding -39897,Psmd12,ENSMUSG00000020720,protein_coding -39902,Helz,ENSMUSG00000020721,protein_coding -39904,Cacng1,ENSMUSG00000020722,protein_coding -39905,Cacng4,ENSMUSG00000020723,protein_coding -39917,Cep112,ENSMUSG00000020728,protein_coding -40007,Rab37,ENSMUSG00000020732,protein_coding -40009,Slc9a3r1,ENSMUSG00000020733,protein_coding -40012,Grin2c,ENSMUSG00000020734,protein_coding -40030,Nt5c,ENSMUSG00000020736,protein_coding -40032,Jpt1,ENSMUSG00000020737,protein_coding -40033,Sumo2,ENSMUSG00000020738,protein_coding -40034,Nup85,ENSMUSG00000020739,protein_coding -40035,Gga3,ENSMUSG00000020740,protein_coding -38810,Cluh,ENSMUSG00000020741,protein_coding -40038,Mif4gd,ENSMUSG00000020743,protein_coding -40039,Slc25a19,ENSMUSG00000020744,protein_coding -38811,Pafah1b1,ENSMUSG00000020745,protein_coding -40046,Tmem94,ENSMUSG00000020747,protein_coding -40052,Recql5,ENSMUSG00000020752,protein_coding -40056,Sap30bp,ENSMUSG00000020755,protein_coding -40058,Itgb4,ENSMUSG00000020758,protein_coding -40060,Galk1,ENSMUSG00000020766,protein_coding -40062,Unk,ENSMUSG00000020770,protein_coding -40065,Trim47,ENSMUSG00000020773,protein_coding -38745,Aspa,ENSMUSG00000020774,protein_coding -40067,Mrpl38,ENSMUSG00000020775,protein_coding -40068,Fbf1,ENSMUSG00000020776,protein_coding -40071,Acox1,ENSMUSG00000020777,protein_coding -40072,Ten1,ENSMUSG00000020778,protein_coding -40075,Srp68,ENSMUSG00000020780,protein_coding -40049,Tsen54,ENSMUSG00000020781,protein_coding -40050,Llgl2,ENSMUSG00000020782,protein_coding -38734,Ncbp3,ENSMUSG00000020783,protein_coding -38733,Camkk1,ENSMUSG00000020785,protein_coding -38731,P2rx1,ENSMUSG00000020787,protein_coding -38730,Atp2a3,ENSMUSG00000020788,protein_coding -38723,Ankfy1,ENSMUSG00000020790,protein_coding -40077,Exoc7,ENSMUSG00000020792,protein_coding -40076,Galr2,ENSMUSG00000020793,protein_coding -38720,Ube2g1,ENSMUSG00000020794,protein_coding -38718,Spns3,ENSMUSG00000020798,protein_coding -38712,Tekt1,ENSMUSG00000020799,protein_coding -38704,Med31,ENSMUSG00000020801,protein_coding -40090,Ube2o,ENSMUSG00000020802,protein_coding -38703,Txndc17,ENSMUSG00000020803,protein_coding -40092,Aanat,ENSMUSG00000020804,protein_coding -38706,Slc13a5,ENSMUSG00000020805,protein_coding -40093,Rhbdf2,ENSMUSG00000020806,protein_coding -38701,4933427D14Rik,ENSMUSG00000020807,protein_coding -38699,Pimreg,ENSMUSG00000020808,protein_coding -40094,Cygb,ENSMUSG00000020810,protein_coding -38694,Wscd1,ENSMUSG00000020811,protein_coding -40105,Mxra7,ENSMUSG00000020814,protein_coding -38668,Rabep1,ENSMUSG00000020817,protein_coding -40110,Mfsd11,ENSMUSG00000020818,protein_coding -38661,Kif1c,ENSMUSG00000020821,protein_coding -40119,Sec14l1,ENSMUSG00000020823,protein_coding -38971,Nos2,ENSMUSG00000020826,protein_coding -38647,Mink1,ENSMUSG00000020827,protein_coding -38646,Pld2,ENSMUSG00000020828,protein_coding -38952,Slc46a1,ENSMUSG00000020829,protein_coding -38640,Vmo1,ENSMUSG00000020830,protein_coding -38625,0610010K14Rik,ENSMUSG00000020831,protein_coding -38925,Eral1,ENSMUSG00000020832,protein_coding -38920,Dhrs13,ENSMUSG00000020834,protein_coding -38898,Coro6,ENSMUSG00000020836,protein_coding -38886,Slc6a4,ENSMUSG00000020838,protein_coding -38883,Tmigd1,ENSMUSG00000020839,protein_coding -38884,Blmh,ENSMUSG00000020840,protein_coding -38882,Cpd,ENSMUSG00000020841,protein_coding -38874,Timm22,ENSMUSG00000020843,protein_coding -38873,Nxn,ENSMUSG00000020844,protein_coding -38865,Rflnb,ENSMUSG00000020846,protein_coding -38862,Rph3al,ENSMUSG00000020847,protein_coding -38860,Doc2b,ENSMUSG00000020848,protein_coding -38858,Ywhae,ENSMUSG00000020849,protein_coding -38846,Prpf8,ENSMUSG00000020850,protein_coding -39287,Nme2,ENSMUSG00000020857,protein_coding -39290,Spag9,ENSMUSG00000020859,protein_coding -39296,Luc7l3,ENSMUSG00000020863,protein_coding -39297,Ankrd40,ENSMUSG00000020864,protein_coding -39298,Abcc3,ENSMUSG00000020865,protein_coding -39299,Cacna1g,ENSMUSG00000020866,protein_coding -39300,Spata20,ENSMUSG00000020867,protein_coding -39310,Xylt2,ENSMUSG00000020868,protein_coding -39307,Lrrc59,ENSMUSG00000020869,protein_coding -39313,Cdc34b,ENSMUSG00000020870,protein_coding -39332,Dlx4,ENSMUSG00000020871,protein_coding -39336,Tac4,ENSMUSG00000020872,protein_coding -39342,Slc35b1,ENSMUSG00000020873,protein_coding -39381,Hoxb9,ENSMUSG00000020875,protein_coding -39399,Snx11,ENSMUSG00000020876,protein_coding -39415,Scrn2,ENSMUSG00000020877,protein_coding -39416,Lrrc46,ENSMUSG00000020878,protein_coding -39465,Cacnb1,ENSMUSG00000020882,protein_coding -39471,Fbxl20,ENSMUSG00000020883,protein_coding -38613,Asgr1,ENSMUSG00000020884,protein_coding -38612,Dlg4,ENSMUSG00000020886,protein_coding -38609,Dvl2,ENSMUSG00000020888,protein_coding -39507,Nr1d1,ENSMUSG00000020889,protein_coding -38537,Gucy2e,ENSMUSG00000020890,protein_coding -38536,Alox8,ENSMUSG00000020891,protein_coding -38534,Aloxe3,ENSMUSG00000020892,protein_coding -38531,Per1,ENSMUSG00000020893,protein_coding -38530,Vamp2,ENSMUSG00000020894,protein_coding -38526,Tmem107,ENSMUSG00000020895,protein_coding -38523,Aurkb,ENSMUSG00000020897,protein_coding -38522,Ctc1,ENSMUSG00000020898,protein_coding -38519,Pfas,ENSMUSG00000020899,protein_coding -38510,Myh10,ENSMUSG00000020900,protein_coding -38505,Pik3r5,ENSMUSG00000020901,protein_coding -38502,Ntn1,ENSMUSG00000020902,protein_coding -38497,Stx8,ENSMUSG00000020903,protein_coding -38496,Cfap52,ENSMUSG00000020904,protein_coding -38494,Usp43,ENSMUSG00000020905,protein_coding -38491,Rcvrn,ENSMUSG00000020907,protein_coding -38477,Myh3,ENSMUSG00000020908,protein_coding -38475,Adprm,ENSMUSG00000020910,protein_coding -39605,Krt19,ENSMUSG00000020911,protein_coding -39534,Krt12,ENSMUSG00000020912,protein_coding -39526,Krt24,ENSMUSG00000020913,protein_coding -39518,Top2a,ENSMUSG00000020914,protein_coding -39601,Krt36,ENSMUSG00000020916,protein_coding -39628,Acly,ENSMUSG00000020917,protein_coding -39638,Kat2a,ENSMUSG00000020918,protein_coding -39645,Stat5b,ENSMUSG00000020919,protein_coding -39724,Tmem101,ENSMUSG00000020921,protein_coding -39725,Lsm12,ENSMUSG00000020922,protein_coding -39733,Ubtf,ENSMUSG00000020923,protein_coding -39754,Ccdc43,ENSMUSG00000020925,protein_coding -39757,Adam11,ENSMUSG00000020926,protein_coding -39760,Higd1b,ENSMUSG00000020928,protein_coding -39761,Eftud2,ENSMUSG00000020929,protein_coding -39763,Ccdc103,ENSMUSG00000020930,protein_coding -39765,Gfap,ENSMUSG00000020932,protein_coding -39769,Dcakd,ENSMUSG00000020935,protein_coding -39771,Nmt1,ENSMUSG00000020936,protein_coding -39772,Plcd3,ENSMUSG00000020937,protein_coding -39778,1700023F06Rik,ENSMUSG00000020940,protein_coding -39780,Map3k14,ENSMUSG00000020941,protein_coding -39791,Lyzl6,ENSMUSG00000020945,protein_coding -39793,Gosr2,ENSMUSG00000020946,protein_coding -43781,Klhl28,ENSMUSG00000020948,protein_coding -43786,Fkbp3,ENSMUSG00000020949,protein_coding -43556,Foxg1,ENSMUSG00000020950,protein_coding -43575,Scfd1,ENSMUSG00000020952,protein_coding -43577,Coch,ENSMUSG00000020953,protein_coding -43578,Strn3,ENSMUSG00000020954,protein_coding -43579,Ap4s1,ENSMUSG00000020955,protein_coding -43585,Dtd2,ENSMUSG00000020956,protein_coding -44407,Ston2,ENSMUSG00000020961,protein_coding -44404,Gtf2a1,ENSMUSG00000020962,protein_coding -44402,Tshr,ENSMUSG00000020963,protein_coding -44411,Sel1l,ENSMUSG00000020964,protein_coding -43846,Dnaaf2,ENSMUSG00000020973,protein_coding -43849,Pole2,ENSMUSG00000020974,protein_coding -43853,Klhdc2,ENSMUSG00000020978,protein_coding -43854,Nemf,ENSMUSG00000020982,protein_coding -43709,Sec23a,ENSMUSG00000020986,protein_coding -43866,L2hgdh,ENSMUSG00000020988,protein_coding -43868,Cdkl1,ENSMUSG00000020990,protein_coding -43712,Trappc6b,ENSMUSG00000020993,protein_coding -43713,Pnn,ENSMUSG00000020994,protein_coding -43716,Mia2,ENSMUSG00000021000,protein_coding -44456,Galc,ENSMUSG00000021003,protein_coding -44466,Spata7,ENSMUSG00000021007,protein_coding -44469,Ptpn21,ENSMUSG00000021009,protein_coding -43601,Npas3,ENSMUSG00000021010,protein_coding -44470,Zc3h14,ENSMUSG00000021012,protein_coding -44477,Ttc8,ENSMUSG00000021013,protein_coding -47589,Polr2h,ENSMUSG00000021018,protein_coding -43652,Ppp2r3c,ENSMUSG00000021022,protein_coding -43653,1110008L16Rik,ENSMUSG00000021023,protein_coding -43654,Psma6,ENSMUSG00000021024,protein_coding -43656,Nfkbia,ENSMUSG00000021025,protein_coding -43660,Ralgapa1,ENSMUSG00000021027,protein_coding -43673,Mbip,ENSMUSG00000021028,protein_coding -44260,Ngb,ENSMUSG00000021032,protein_coding -44262,Gstz1,ENSMUSG00000021033,protein_coding -44271,Sptlc2,ENSMUSG00000021036,protein_coding -44268,Ahsa1,ENSMUSG00000021037,protein_coding -44267,Vipas39,ENSMUSG00000021038,protein_coding -44281,Snw1,ENSMUSG00000021039,protein_coding -44280,Slirp,ENSMUSG00000021040,protein_coding -44369,Adck1,ENSMUSG00000021044,protein_coding -43538,Nova1,ENSMUSG00000021047,protein_coding -43990,Mthfd1,ENSMUSG00000021048,protein_coding -43979,Ppp2r5e,ENSMUSG00000021051,protein_coding -43983,Sgpp1,ENSMUSG00000021054,protein_coding -43986,Esr2,ENSMUSG00000021055,protein_coding -43988,Tex21,ENSMUSG00000021056,protein_coding -43993,Akap5,ENSMUSG00000021057,protein_coding -44006,Sptb,ENSMUSG00000021061,protein_coding -44011,Rab15,ENSMUSG00000021062,protein_coding -44018,Fut8,ENSMUSG00000021065,protein_coding -43874,Atl1,ENSMUSG00000021066,protein_coding -43878,Sav1,ENSMUSG00000021067,protein_coding -43882,Nin,ENSMUSG00000021068,protein_coding -43885,Pygl,ENSMUSG00000021069,protein_coding -44669,Bdkrb2,ENSMUSG00000021070,protein_coding -43887,Trim9,ENSMUSG00000021071,protein_coding -43890,Tmx1,ENSMUSG00000021072,protein_coding -43902,Actr10,ENSMUSG00000021076,protein_coding -43906,Tomm20l,ENSMUSG00000021078,protein_coding -43907,Timm9,ENSMUSG00000021079,protein_coding -44603,Serpina1f,ENSMUSG00000021081,protein_coding -43927,Ccdc175,ENSMUSG00000021086,protein_coding -43930,Rtn1,ENSMUSG00000021087,protein_coding -43931,Lrrc9,ENSMUSG00000021090,protein_coding -44640,Serpina3n,ENSMUSG00000021091,protein_coding -43936,Dhrs7,ENSMUSG00000021094,protein_coding -44642,Gsc,ENSMUSG00000021095,protein_coding -43938,Ppm1a,ENSMUSG00000021096,protein_coding -44646,Clmn,ENSMUSG00000021097,protein_coding -43940,4930447C04Rik,ENSMUSG00000021098,protein_coding -43941,Six6,ENSMUSG00000021099,protein_coding -44655,Glrx5,ENSMUSG00000021102,protein_coding -43946,Mnat1,ENSMUSG00000021103,protein_coding -43957,Prkch,ENSMUSG00000021108,protein_coding -43961,Hif1a,ENSMUSG00000021109,protein_coding -44675,Papola,ENSMUSG00000021111,protein_coding -44038,Mpp5,ENSMUSG00000021112,protein_coding -43964,Snapc1,ENSMUSG00000021113,protein_coding -44040,Atp6v1d,ENSMUSG00000021114,protein_coding -44679,Vrk1,ENSMUSG00000021115,protein_coding -44041,Eif2s1,ENSMUSG00000021116,protein_coding -44042,Plek2,ENSMUSG00000021118,protein_coding -44045,Pigh,ENSMUSG00000021120,protein_coding -44055,Rdh12,ENSMUSG00000021123,protein_coding -44051,Vti1b,ENSMUSG00000021124,protein_coding -44049,Arg2,ENSMUSG00000021125,protein_coding -44063,Zfp36l1,ENSMUSG00000021127,protein_coding -44079,Galnt16,ENSMUSG00000021130,protein_coding -44084,Erh,ENSMUSG00000021131,protein_coding -44093,Susd6,ENSMUSG00000021133,protein_coding -44098,Srsf5,ENSMUSG00000021134,protein_coding -44099,Slc10a1,ENSMUSG00000021135,protein_coding -44100,Smoc1,ENSMUSG00000021136,protein_coding -44104,Gm20498,ENSMUSG00000021139,protein_coding -44124,Pcnx,ENSMUSG00000021140,protein_coding -44957,Pacs2,ENSMUSG00000021143,protein_coding -44960,Mta1,ENSMUSG00000021144,protein_coding -40400,Wdr37,ENSMUSG00000021147,protein_coding -40407,Gm9745,ENSMUSG00000021148,protein_coding -40412,Gtpbp4,ENSMUSG00000021149,protein_coding -40434,Zmynd11,ENSMUSG00000021156,protein_coding -45249,Esyt2,ENSMUSG00000021171,protein_coding -45265,Cdca7l,ENSMUSG00000021175,protein_coding -44495,Efcab11,ENSMUSG00000021176,protein_coding -44500,Tdp1,ENSMUSG00000021177,protein_coding -44506,Psmc1,ENSMUSG00000021178,protein_coding -44507,Nrde2,ENSMUSG00000021179,protein_coding -44518,Rps6ka5,ENSMUSG00000021180,protein_coding -44528,Ccdc88c,ENSMUSG00000021182,protein_coding -44524,Dglucy,ENSMUSG00000021185,protein_coding -44545,Fbln5,ENSMUSG00000021186,protein_coding -44543,Tc2n,ENSMUSG00000021187,protein_coding -44546,Trip11,ENSMUSG00000021188,protein_coding -44550,Atxn3,ENSMUSG00000021189,protein_coding -44558,Lgmn,ENSMUSG00000021190,protein_coding -44560,Golga5,ENSMUSG00000021192,protein_coding -40377,Pitrm1,ENSMUSG00000021193,protein_coding -44561,Chga,ENSMUSG00000021194,protein_coding -40378,Pfkp,ENSMUSG00000021196,protein_coding -44580,Unc79,ENSMUSG00000021198,protein_coding -44587,Asb2,ENSMUSG00000021200,protein_coding -44590,Otub2,ENSMUSG00000021203,protein_coding -40355,Akr1c21,ENSMUSG00000021207,protein_coding -44596,Ifi27l2b,ENSMUSG00000021208,protein_coding -44598,Ppp4r4,ENSMUSG00000021209,protein_coding -40352,Akr1c6,ENSMUSG00000021210,protein_coding -40349,Akr1c12,ENSMUSG00000021211,protein_coding -40346,Akr1c13,ENSMUSG00000021213,protein_coding -40343,Akr1c18,ENSMUSG00000021214,protein_coding -40333,Net1,ENSMUSG00000021215,protein_coding -40335,Tubal3,ENSMUSG00000021216,protein_coding -23627,Tshz3,ENSMUSG00000021217,protein_coding -40322,Gdi2,ENSMUSG00000021218,protein_coding -44132,Rgs6,ENSMUSG00000021219,protein_coding -44140,Dpf3,ENSMUSG00000021221,protein_coding -44142,Dcaf4,ENSMUSG00000021222,protein_coding -44152,Papln,ENSMUSG00000021223,protein_coding -44153,Numb,ENSMUSG00000021224,protein_coding -44160,Acot2,ENSMUSG00000021226,protein_coding -44166,Acot3,ENSMUSG00000021228,protein_coding -44180,Fam161b,ENSMUSG00000021234,protein_coding -44181,Coq6,ENSMUSG00000021235,protein_coding -44182,Entpd5,ENSMUSG00000021236,protein_coding -44185,Aldh6a1,ENSMUSG00000021238,protein_coding -44187,Vsx2,ENSMUSG00000021239,protein_coding -44188,Abcd4,ENSMUSG00000021240,protein_coding -44193,Isca2,ENSMUSG00000021241,protein_coding -44192,Npc2,ENSMUSG00000021242,protein_coding -44200,Fcf1,ENSMUSG00000021243,protein_coding -44201,Ylpm1,ENSMUSG00000021244,protein_coding -44210,Mlh3,ENSMUSG00000021245,protein_coding -44216,Tmed10,ENSMUSG00000021248,protein_coding -44221,Fos,ENSMUSG00000021250,protein_coding -44231,Erg28,ENSMUSG00000021252,protein_coding -44234,Tgfb3,ENSMUSG00000021253,protein_coding -44239,Gpatch2l,ENSMUSG00000021254,protein_coding -44242,Esrrb,ENSMUSG00000021255,protein_coding -44246,Vash1,ENSMUSG00000021256,protein_coding -44247,Angel1,ENSMUSG00000021257,protein_coding -44707,Ccnk,ENSMUSG00000021258,protein_coding -44711,Cyp46a1,ENSMUSG00000021259,protein_coding -44709,Hhipl1,ENSMUSG00000021260,protein_coding -44715,Evl,ENSMUSG00000021262,protein_coding -44717,Degs2,ENSMUSG00000021263,protein_coding -44723,Yy1,ENSMUSG00000021264,protein_coding -44724,Slc25a29,ENSMUSG00000021265,protein_coding -44727,Wars,ENSMUSG00000021266,protein_coding -44867,Hsp90aa1,ENSMUSG00000021270,protein_coding -44872,Zfp839,ENSMUSG00000021271,protein_coding -33700,Fdft1,ENSMUSG00000021273,protein_coding -44876,Tecpr2,ENSMUSG00000021275,protein_coding -44874,Cinp,ENSMUSG00000021276,protein_coding -44886,Traf3,ENSMUSG00000021277,protein_coding -44887,Amn,ENSMUSG00000021278,protein_coding -44889,Cdc42bpb,ENSMUSG00000021279,protein_coding -44891,Exoc3l4,ENSMUSG00000021280,protein_coding -44893,Tnfaip2,ENSMUSG00000021281,protein_coding -44896,Eif5,ENSMUSG00000021282,protein_coding -44912,Ppp1r13b,ENSMUSG00000021285,protein_coding -44911,Zfyve21,ENSMUSG00000021286,protein_coding -44910,Xrcc3,ENSMUSG00000021287,protein_coding -44909,Klc1,ENSMUSG00000021288,protein_coding -44917,Atp5mpl,ENSMUSG00000021290,protein_coding -44924,Kif26a,ENSMUSG00000021294,protein_coding -44946,Gpr132,ENSMUSG00000021298,protein_coding -40531,Hecw1,ENSMUSG00000021301,protein_coding -40525,Ggps1,ENSMUSG00000021302,protein_coding -40514,Gng4,ENSMUSG00000021303,protein_coding -40505,Gpr137b,ENSMUSG00000021306,protein_coding -40461,Mtr,ENSMUSG00000021311,protein_coding -40454,Ryr2,ENSMUSG00000021313,protein_coding -40604,Amph,ENSMUSG00000021314,protein_coding -40547,Gli3,ENSMUSG00000021318,protein_coding -40632,Sfrp4,ENSMUSG00000021319,protein_coding -40647,Aoah,ENSMUSG00000021322,protein_coding -40655,Trim27,ENSMUSG00000021326,protein_coding -40662,Zkscan3,ENSMUSG00000021327,protein_coding -40881,Slc17a1,ENSMUSG00000021335,protein_coding -40882,Slc17a4,ENSMUSG00000021336,protein_coding -40887,Scgn,ENSMUSG00000021337,protein_coding -40889,Carmil1,ENSMUSG00000021338,protein_coding -40912,Mrs2,ENSMUSG00000021339,protein_coding -40911,Gpld1,ENSMUSG00000021340,protein_coding -40932,Prl,ENSMUSG00000021342,protein_coding -40946,Prl8a6,ENSMUSG00000021345,protein_coding -40947,Prl8a8,ENSMUSG00000021346,protein_coding -40950,Prl7b1,ENSMUSG00000021347,protein_coding -40954,Prl7d1,ENSMUSG00000021348,protein_coding -40995,Irf4,ENSMUSG00000021356,protein_coding -40997,Exoc2,ENSMUSG00000021357,protein_coding -41223,Tfap2a,ENSMUSG00000021359,protein_coding -41231,Gcnt2,ENSMUSG00000021360,protein_coding -41239,Tmem14c,ENSMUSG00000021361,protein_coding -41242,Gcm2,ENSMUSG00000021362,protein_coding -41240,Mak,ENSMUSG00000021363,protein_coding -41245,Elovl2,ENSMUSG00000021364,protein_coding -41250,Nedd9,ENSMUSG00000021365,protein_coding -41267,Hivep1,ENSMUSG00000021366,protein_coding -41268,Edn1,ENSMUSG00000021367,protein_coding -41279,Tbc1d7,ENSMUSG00000021368,protein_coding -41289,Mcur1,ENSMUSG00000021371,protein_coding -41337,Cap2,ENSMUSG00000021373,protein_coding -41341,Nup153,ENSMUSG00000021374,protein_coding -41342,Kif13a,ENSMUSG00000021375,protein_coding -41351,Tpmt,ENSMUSG00000021376,protein_coding -41354,Dek,ENSMUSG00000021377,protein_coding -41368,Id4,ENSMUSG00000021379,protein_coding -41383,Barx1,ENSMUSG00000021381,protein_coding -41392,Susd3,ENSMUSG00000021384,protein_coding -41395,Ippk,ENSMUSG00000021385,protein_coding -41399,Aspn,ENSMUSG00000021388,protein_coding -41401,Ogn,ENSMUSG00000021390,protein_coding -41396,Cenpp,ENSMUSG00000021391,protein_coding -41402,Nol8,ENSMUSG00000021392,protein_coding -41446,Spin1,ENSMUSG00000021395,protein_coding -41447,Nxnl2,ENSMUSG00000021396,protein_coding -41030,Wrnip1,ENSMUSG00000021400,protein_coding -41038,Serpinb9b,ENSMUSG00000021403,protein_coding -41043,Serpinb9c,ENSMUSG00000021404,protein_coding -41071,Ripk1,ENSMUSG00000021408,protein_coding -41090,Pxdc1,ENSMUSG00000021411,protein_coding -41100,Prpf4b,ENSMUSG00000021413,protein_coding -41101,Fam217a,ENSMUSG00000021414,protein_coding -41102,4933417A18Rik,ENSMUSG00000021415,protein_coding -41104,Eci3,ENSMUSG00000021416,protein_coding -41107,Eci2,ENSMUSG00000021417,protein_coding -41123,Rpp40,ENSMUSG00000021418,protein_coding -41133,Fars2,ENSMUSG00000021420,protein_coding -41153,Ly86,ENSMUSG00000021423,protein_coding -41175,Ssr1,ENSMUSG00000021427,protein_coding -41177,Riok1,ENSMUSG00000021428,protein_coding -41181,Snrnp48,ENSMUSG00000021431,protein_coding -41202,Slc35b3,ENSMUSG00000021432,protein_coding -41686,Ctsq,ENSMUSG00000021439,protein_coding -41695,Cts7,ENSMUSG00000021440,protein_coding -41689,Cts6,ENSMUSG00000021441,protein_coding -41455,Shc3,ENSMUSG00000021448,protein_coding -41459,Sema4d,ENSMUSG00000021451,protein_coding -41461,Gadd45g,ENSMUSG00000021453,protein_coding -41757,Fbp2,ENSMUSG00000021456,protein_coding -41468,Syk,ENSMUSG00000021457,protein_coding -41760,Aopep,ENSMUSG00000021458,protein_coding -41470,Auh,ENSMUSG00000021460,protein_coding -41769,Fancc,ENSMUSG00000021461,protein_coding -41475,Ror2,ENSMUSG00000021464,protein_coding -41779,Ptch1,ENSMUSG00000021466,protein_coding -41476,Sptlc1,ENSMUSG00000021468,protein_coding -41479,Msx2,ENSMUSG00000021469,protein_coding -41788,Ercc6l2,ENSMUSG00000021470,protein_coding -41491,Sfxn1,ENSMUSG00000021474,protein_coding -41795,Habp4,ENSMUSG00000021476,protein_coding -41803,Ctsl,ENSMUSG00000021477,protein_coding -41490,Drd1,ENSMUSG00000021478,protein_coding -41526,Zfp346,ENSMUSG00000021481,protein_coding -41800,Prxl2c,ENSMUSG00000021482,protein_coding -41807,Cdk20,ENSMUSG00000021483,protein_coding -41535,Lman2,ENSMUSG00000021484,protein_coding -41534,Mxd3,ENSMUSG00000021485,protein_coding -41533,Prelid1,ENSMUSG00000021486,protein_coding -41530,Nsd1,ENSMUSG00000021488,protein_coding -41537,Slc34a1,ENSMUSG00000021490,protein_coding -41539,F12,ENSMUSG00000021492,protein_coding -41545,Pdlim7,ENSMUSG00000021493,protein_coding -41548,Ddx41,ENSMUSG00000021494,protein_coding -41550,Fam193b,ENSMUSG00000021495,protein_coding -41558,Pcbd2,ENSMUSG00000021496,protein_coding -41557,Txndc15,ENSMUSG00000021497,protein_coding -41560,Catsper3,ENSMUSG00000021499,protein_coding -41555,Ddx46,ENSMUSG00000021500,protein_coding -41554,Caml,ENSMUSG00000021501,protein_coding -41552,B4galt7,ENSMUSG00000021504,protein_coding -41561,Pitx1,ENSMUSG00000021506,protein_coding -41571,Cxcl14,ENSMUSG00000021508,protein_coding -41576,Slc25a48,ENSMUSG00000021509,protein_coding -41980,Zfp729a,ENSMUSG00000021510,protein_coding -41838,Zfp369,ENSMUSG00000021514,protein_coding -41938,Ptdss1,ENSMUSG00000021518,protein_coding -41937,Mterf3,ENSMUSG00000021519,protein_coding -41935,Uqcrb,ENSMUSG00000021520,protein_coding -42005,Fastkd3,ENSMUSG00000021532,protein_coding -42007,1700001L19Rik,ENSMUSG00000021534,protein_coding -42008,Adcy2,ENSMUSG00000021536,protein_coding -42216,Cetn3,ENSMUSG00000021537,protein_coding -41577,Il9,ENSMUSG00000021538,protein_coding -41579,Lect2,ENSMUSG00000021539,protein_coding -41583,Smad5,ENSMUSG00000021540,protein_coding -41588,Trpc7,ENSMUSG00000021541,protein_coding -50362,1700067P10Rik,ENSMUSG00000021545,protein_coding -41609,Hnrnpk,ENSMUSG00000021546,protein_coding -42249,Ccnh,ENSMUSG00000021548,protein_coding -42250,Rasa1,ENSMUSG00000021549,protein_coding -41608,2210016F16Rik,ENSMUSG00000021550,protein_coding -41603,Gkap1,ENSMUSG00000021552,protein_coding -41613,Slc28a3,ENSMUSG00000021553,protein_coding -41636,Naa35,ENSMUSG00000021555,protein_coding -41638,Golm1,ENSMUSG00000021556,protein_coding -41634,Agtpbp1,ENSMUSG00000021557,protein_coding -41668,Dapk1,ENSMUSG00000021559,protein_coding -42082,Slc6a19,ENSMUSG00000021565,protein_coding -42085,Nkd2,ENSMUSG00000021567,protein_coding -42087,Trip13,ENSMUSG00000021569,protein_coding -42096,Cep72,ENSMUSG00000021572,protein_coding -42094,Tppp,ENSMUSG00000021573,protein_coding -42099,Ahrr,ENSMUSG00000021575,protein_coding -42100,Pdcd6,ENSMUSG00000021576,protein_coding -42101,Sdha,ENSMUSG00000021577,protein_coding -42103,Ccdc127,ENSMUSG00000021578,protein_coding -42104,Lrrc14b,ENSMUSG00000021579,protein_coding -42120,Erap1,ENSMUSG00000021583,protein_coding -42121,Cast,ENSMUSG00000021585,protein_coding -42124,Pcsk1,ENSMUSG00000021587,protein_coding -42139,Rhobtb3,ENSMUSG00000021589,protein_coding -42144,Spata9,ENSMUSG00000021590,protein_coding -42138,Glrx,ENSMUSG00000021591,protein_coding -42148,Arsk,ENSMUSG00000021592,protein_coding -42022,Srd5a1,ENSMUSG00000021594,protein_coding -42020,Nsun2,ENSMUSG00000021595,protein_coding -42154,Mctp1,ENSMUSG00000021596,protein_coding -42160,Slf1,ENSMUSG00000021597,protein_coding -42033,Med10,ENSMUSG00000021598,protein_coding -42071,Irx4,ENSMUSG00000021604,protein_coding -42074,Ndufs6,ENSMUSG00000021606,protein_coding -42075,Mrpl36,ENSMUSG00000021607,protein_coding -42077,Lpcat1,ENSMUSG00000021608,protein_coding -42078,Slc6a3,ENSMUSG00000021609,protein_coding -42079,Clptm1l,ENSMUSG00000021610,protein_coding -42080,Tert,ENSMUSG00000021611,protein_coding -42081,Slc6a18,ENSMUSG00000021612,protein_coding -42274,Hapln1,ENSMUSG00000021613,protein_coding -42276,Vcan,ENSMUSG00000021614,protein_coding -42279,Xrcc4,ENSMUSG00000021615,protein_coding -42295,Atg10,ENSMUSG00000021619,protein_coding -42304,Acot12,ENSMUSG00000021620,protein_coding -42307,Zcchc9,ENSMUSG00000021621,protein_coding -42308,Ckmt2,ENSMUSG00000021622,protein_coding -42541,Cd180,ENSMUSG00000021624,protein_coding -42515,Slc30a5,ENSMUSG00000021629,protein_coding -42506,Rad17,ENSMUSG00000021635,protein_coding -42504,Marveld2,ENSMUSG00000021636,protein_coding -42501,Ocln,ENSMUSG00000021638,protein_coding -42499,Gtf2h2,ENSMUSG00000021639,protein_coding -42497,Naip1,ENSMUSG00000021640,protein_coding -42488,Serf1,ENSMUSG00000021643,protein_coding -42489,Smn1,ENSMUSG00000021645,protein_coding -42485,Mccc2,ENSMUSG00000021646,protein_coding -42484,Cartpt,ENSMUSG00000021647,protein_coding -42475,Ptcd2,ENSMUSG00000021650,protein_coding -42443,Btf3,ENSMUSG00000021660,protein_coding -42441,Ankra2,ENSMUSG00000021661,protein_coding -42439,Arhgef28,ENSMUSG00000021662,protein_coding -42421,Hexb,ENSMUSG00000021665,protein_coding -42420,Gfm2,ENSMUSG00000021666,protein_coding -42400,Polk,ENSMUSG00000021668,protein_coding -42402,Col4a3bp,ENSMUSG00000021669,protein_coding -42407,Hmgcr,ENSMUSG00000021670,protein_coding -42398,Poc5,ENSMUSG00000021671,protein_coding -42387,F2rl2,ENSMUSG00000021675,protein_coding -42385,Iqgap2,ENSMUSG00000021676,protein_coding -42381,F2rl1,ENSMUSG00000021678,protein_coding -42380,S100z,ENSMUSG00000021679,protein_coding -42379,Crhbp,ENSMUSG00000021680,protein_coding -42377,Aggf1,ENSMUSG00000021681,protein_coding -42372,Pde8b,ENSMUSG00000021684,protein_coding -42367,Otp,ENSMUSG00000021685,protein_coding -42358,Ap3b1,ENSMUSG00000021686,protein_coding -42353,Scamp1,ENSMUSG00000021687,protein_coding -42334,Jmy,ENSMUSG00000021690,protein_coding -42589,Dimt1,ENSMUSG00000021692,protein_coding -42590,Kif2a,ENSMUSG00000021693,protein_coding -42607,Ercc8,ENSMUSG00000021694,protein_coding -42608,Elovl7,ENSMUSG00000021696,protein_coding -42613,Depdc1b,ENSMUSG00000021697,protein_coding -42614,Pde4d,ENSMUSG00000021699,protein_coding -42620,Rab3c,ENSMUSG00000021700,protein_coding -42623,Plk2,ENSMUSG00000021701,protein_coding -42321,Thbs4,ENSMUSG00000021702,protein_coding -42319,Serinc5,ENSMUSG00000021703,protein_coding -42322,Mtx3,ENSMUSG00000021704,protein_coding -42317,Zfyve16,ENSMUSG00000021706,protein_coding -42313,Dhfr,ENSMUSG00000021707,protein_coding -42310,Rasgrf2,ENSMUSG00000021708,protein_coding -42553,Erbin,ENSMUSG00000021709,protein_coding -42556,Nln,ENSMUSG00000021710,protein_coding -42558,Trappc13,ENSMUSG00000021711,protein_coding -42559,Trim23,ENSMUSG00000021712,protein_coding -42560,Ppwd1,ENSMUSG00000021713,protein_coding -42561,Cenpk,ENSMUSG00000021714,protein_coding -42563,Cwc27,ENSMUSG00000021715,protein_coding -42565,Srek1ip1,ENSMUSG00000021716,protein_coding -42569,4933425L06Rik,ENSMUSG00000021718,protein_coding -42568,Rgs7bp,ENSMUSG00000021719,protein_coding -42570,Rnf180,ENSMUSG00000021720,protein_coding -42572,Htr1a,ENSMUSG00000021721,protein_coding -42743,Parp8,ENSMUSG00000021725,protein_coding -42748,Emb,ENSMUSG00000021728,protein_coding -42752,Hcn1,ENSMUSG00000021730,protein_coding -42756,Mrps30,ENSMUSG00000021731,protein_coding -42764,Fgf10,ENSMUSG00000021732,protein_coding -32182,Slc4a7,ENSMUSG00000021733,protein_coding -32162,Psmd6,ENSMUSG00000021737,protein_coding -32161,Atxn7,ENSMUSG00000021738,protein_coding -32139,Fezf2,ENSMUSG00000021743,protein_coding -32129,Ptprg,ENSMUSG00000021745,protein_coding -32102,4930452B06Rik,ENSMUSG00000021747,protein_coding -32095,Pdhb,ENSMUSG00000021748,protein_coding -32099,Oit1,ENSMUSG00000021749,protein_coding -32098,Fam107a,ENSMUSG00000021750,protein_coding -32097,Acox2,ENSMUSG00000021751,protein_coding -32096,Kctd6,ENSMUSG00000021752,protein_coding -42647,Map3k1,ENSMUSG00000021754,protein_coding -42665,Il6st,ENSMUSG00000021756,protein_coding -42668,Ddx4,ENSMUSG00000021758,protein_coding -42673,Plpp1,ENSMUSG00000021759,protein_coding -42685,Gpx8,ENSMUSG00000021760,protein_coding -42693,BC067074,ENSMUSG00000021763,protein_coding -42713,Ndufs4,ENSMUSG00000021764,protein_coding -42716,Fst,ENSMUSG00000021765,protein_coding -32316,Kat6b,ENSMUSG00000021767,protein_coding -32318,Dusp13,ENSMUSG00000021768,protein_coding -32320,Samd8,ENSMUSG00000021770,protein_coding -32322,Vdac2,ENSMUSG00000021771,protein_coding -32230,Nkiras1,ENSMUSG00000021772,protein_coding -32323,Comtd1,ENSMUSG00000021773,protein_coding -32231,Ube2e1,ENSMUSG00000021774,protein_coding -32227,Nr1d2,ENSMUSG00000021775,protein_coding -32218,Thrb,ENSMUSG00000021779,protein_coding -32345,Dlg5,ENSMUSG00000021782,protein_coding -32206,Ngly1,ENSMUSG00000021785,protein_coding -32204,Oxsm,ENSMUSG00000021786,protein_coding -32665,Sftpa1,ENSMUSG00000021789,protein_coding -32663,Dydc1,ENSMUSG00000021790,protein_coding -32661,Dydc2,ENSMUSG00000021791,protein_coding -32660,Prxl2a,ENSMUSG00000021792,protein_coding -32588,Glud1,ENSMUSG00000021794,protein_coding -32668,Sftpd,ENSMUSG00000021795,protein_coding -32593,Bmpr1a,ENSMUSG00000021796,protein_coding -32595,9230112D13Rik,ENSMUSG00000021797,protein_coding -32597,Ldb3,ENSMUSG00000021798,protein_coding -32598,Opn4,ENSMUSG00000021799,protein_coding -32628,Cdhr1,ENSMUSG00000021803,protein_coding -32625,Rgr,ENSMUSG00000021804,protein_coding -32269,Nid2,ENSMUSG00000021806,protein_coding -32270,Rtraf,ENSMUSG00000021807,protein_coding -32282,Nudt13,ENSMUSG00000021809,protein_coding -32283,Ecd,ENSMUSG00000021810,protein_coding -32286,Dnajc9,ENSMUSG00000021811,protein_coding -32290,Anxa7,ENSMUSG00000021814,protein_coding -32291,Mss51,ENSMUSG00000021815,protein_coding -32292,Ppp3cb,ENSMUSG00000021816,protein_coding -32304,Zswim8,ENSMUSG00000021819,protein_coding -32306,Camk2g,ENSMUSG00000021820,protein_coding -32308,Plau,ENSMUSG00000021822,protein_coding -32310,Vcl,ENSMUSG00000021823,protein_coding -32311,Ap3m1,ENSMUSG00000021824,protein_coding -32878,Txndc16,ENSMUSG00000021830,protein_coding -32882,Ero1l,ENSMUSG00000021831,protein_coding -32883,Psmc6,ENSMUSG00000021832,protein_coding -32912,Bmp4,ENSMUSG00000021835,protein_coding -32936,Samd4,ENSMUSG00000021838,protein_coding -32954,Mapk1ip1l,ENSMUSG00000021840,protein_coding -32967,Ktn1,ENSMUSG00000021843,protein_coding -32978,Peli2,ENSMUSG00000021846,protein_coding -32995,Otx2,ENSMUSG00000021848,protein_coding -33015,ccdc198,ENSMUSG00000021850,protein_coding -33017,Slc35f4,ENSMUSG00000021852,protein_coding -32380,Anxa11,ENSMUSG00000021866,protein_coding -32396,Tmem254b,ENSMUSG00000021867,protein_coding -32374,Ppif,ENSMUSG00000021868,protein_coding -32404,Slmap,ENSMUSG00000021870,protein_coding -33077,Gm49342,ENSMUSG00000021871,protein_coding -33082,Rnase10,ENSMUSG00000021872,protein_coding -33087,Rnase4,ENSMUSG00000021876,protein_coding -32411,Arf4,ENSMUSG00000021877,protein_coding -32415,Dnah12,ENSMUSG00000021879,protein_coding -33090,Rnase6,ENSMUSG00000021880,protein_coding -32510,Hacl1,ENSMUSG00000021884,protein_coding -44458,Gpr65,ENSMUSG00000021886,protein_coding -32505,Eaf1,ENSMUSG00000021890,protein_coding -32504,Mettl6,ENSMUSG00000021891,protein_coding -32502,Sh3bp5,ENSMUSG00000021892,protein_coding -32501,Capn7,ENSMUSG00000021893,protein_coding -32421,Arhgef3,ENSMUSG00000021895,protein_coding -32416,Asb14,ENSMUSG00000021898,protein_coding -32511,Btd,ENSMUSG00000021900,protein_coding -32496,Bap1,ENSMUSG00000021901,protein_coding -32495,Phf7,ENSMUSG00000021902,protein_coding -32519,Galnt15,ENSMUSG00000021903,protein_coding -32494,Sema3g,ENSMUSG00000021904,protein_coding -32521,Dph3,ENSMUSG00000021905,protein_coding -32522,Oxnad1,ENSMUSG00000021906,protein_coding -32523,Msmb,ENSMUSG00000021907,protein_coding -32492,Nisch,ENSMUSG00000021910,protein_coding -32528,Parg,ENSMUSG00000021911,protein_coding -32534,Ogdhl,ENSMUSG00000021913,protein_coding -32481,Glt8d1,ENSMUSG00000021916,protein_coding -32480,Spcs1,ENSMUSG00000021917,protein_coding -32479,Nek4,ENSMUSG00000021918,protein_coding -32536,Chat,ENSMUSG00000021919,protein_coding -32476,Itih4,ENSMUSG00000021922,protein_coding -33641,Ebpl,ENSMUSG00000021928,protein_coding -33643,Kpna3,ENSMUSG00000021929,protein_coding -33647,Spryd7,ENSMUSG00000021930,protein_coding -33670,Rnaseh2b,ENSMUSG00000021932,protein_coding -33671,Gucy1b2,ENSMUSG00000021933,protein_coding -32556,Mapk8,ENSMUSG00000021936,protein_coding -33548,Pspc1,ENSMUSG00000021938,protein_coding -33699,Ctsb,ENSMUSG00000021939,protein_coding -32560,Ptpn20,ENSMUSG00000021940,protein_coding -32564,Gdf10,ENSMUSG00000021943,protein_coding -33702,Gata4,ENSMUSG00000021944,protein_coding -33557,Zmym2,ENSMUSG00000021945,protein_coding -33565,Cryl1,ENSMUSG00000021947,protein_coding -32468,Prkcd,ENSMUSG00000021948,protein_coding -32573,Anxa8,ENSMUSG00000021950,protein_coding -33573,Eef1akmt1,ENSMUSG00000021951,protein_coding -33575,Xpo4,ENSMUSG00000021952,protein_coding -33709,Tdh,ENSMUSG00000021953,protein_coding -32466,Tkt,ENSMUSG00000021957,protein_coding -33715,Pinx1,ENSMUSG00000021958,protein_coding -33577,Lats2,ENSMUSG00000021959,protein_coding -33717,4930578I06Rik,ENSMUSG00000021961,protein_coding -32465,Dcp1a,ENSMUSG00000021962,protein_coding -33580,Sap18,ENSMUSG00000021963,protein_coding -33582,Ska3,ENSMUSG00000021965,protein_coding -33722,Prss52,ENSMUSG00000021966,protein_coding -33583,Mrpl57,ENSMUSG00000021967,protein_coding -33585,Zdhhc20,ENSMUSG00000021969,protein_coding -33732,Hmbox1,ENSMUSG00000021972,protein_coding -33586,Micu2,ENSMUSG00000021973,protein_coding -33589,Fgf9,ENSMUSG00000021974,protein_coding -33736,Ints9,ENSMUSG00000021975,protein_coding -33600,1700129C05Rik,ENSMUSG00000021977,protein_coding -33737,Extl3,ENSMUSG00000021978,protein_coding -33610,Cab39l,ENSMUSG00000021981,protein_coding -33611,Cdadc1,ENSMUSG00000021982,protein_coding -33614,Atp8a2,ENSMUSG00000021983,protein_coding -33625,Amer2,ENSMUSG00000021986,protein_coding -33624,Mtmr6,ENSMUSG00000021987,protein_coding -33629,Spata13,ENSMUSG00000021990,protein_coding -32439,Cacna2d3,ENSMUSG00000021991,protein_coding -33632,Mipep,ENSMUSG00000021993,protein_coding -32434,Wnt5a,ENSMUSG00000021994,protein_coding -33927,Esd,ENSMUSG00000021996,protein_coding -33932,Lrrc63,ENSMUSG00000021997,protein_coding -33933,Lcp1,ENSMUSG00000021998,protein_coding -33936,Cpb2,ENSMUSG00000021999,protein_coding -33939,Zc3h13,ENSMUSG00000022000,protein_coding -33943,Erich6b,ENSMUSG00000022002,protein_coding -33947,Slc25a30,ENSMUSG00000022003,protein_coding -33955,Gpalpp1,ENSMUSG00000022008,protein_coding -33956,Nufip1,ENSMUSG00000022009,protein_coding -33959,Tsc22d1,ENSMUSG00000022010,protein_coding -33972,Enox1,ENSMUSG00000022012,protein_coding -33982,Dnajc15,ENSMUSG00000022013,protein_coding -33983,Epsti1,ENSMUSG00000022014,protein_coding -33991,Tnfsf11,ENSMUSG00000022015,protein_coding -33994,Akap11,ENSMUSG00000022016,protein_coding -34011,Rgcc,ENSMUSG00000022018,protein_coding -34082,Tdrd3,ENSMUSG00000022019,protein_coding -34012,Naa16,ENSMUSG00000022020,protein_coding -34076,Diaph3,ENSMUSG00000022021,protein_coding -34013,Mtrf1,ENSMUSG00000022022,protein_coding -34016,Wbp4,ENSMUSG00000022023,protein_coding -34019,Sugt1,ENSMUSG00000022024,protein_coding -34020,Cnmd,ENSMUSG00000022025,protein_coding -34030,Olfm4,ENSMUSG00000022026,protein_coding -33749,Elp3,ENSMUSG00000022031,protein_coding -33751,Scara5,ENSMUSG00000022032,protein_coding -33752,Pbk,ENSMUSG00000022033,protein_coding -33753,Esco2,ENSMUSG00000022034,protein_coding -33754,Ccdc25,ENSMUSG00000022035,protein_coding -33757,Clu,ENSMUSG00000022037,protein_coding -33759,Adam2,ENSMUSG00000022039,protein_coding -33761,Ephx2,ENSMUSG00000022040,protein_coding -33762,Chrna2,ENSMUSG00000022041,protein_coding -33765,Trim35,ENSMUSG00000022043,protein_coding -33767,Stmn4,ENSMUSG00000022044,protein_coding -33772,Dpysl2,ENSMUSG00000022048,protein_coding -33778,Bnip3l,ENSMUSG00000022051,protein_coding -33779,Ppp2r2a,ENSMUSG00000022052,protein_coding -33783,Ebf2,ENSMUSG00000022053,protein_coding -33799,Nefm,ENSMUSG00000022054,protein_coding -33800,Nefl,ENSMUSG00000022055,protein_coding -33805,Adam7,ENSMUSG00000022056,protein_coding -33806,Adamdec1,ENSMUSG00000022057,protein_coding -33815,Nkx3-1,ENSMUSG00000022061,protein_coding -34171,Pibf1,ENSMUSG00000022064,protein_coding -33834,Entpd4b,ENSMUSG00000022066,protein_coding -34169,Bora,ENSMUSG00000022070,protein_coding -33844,Tnfrsf10b,ENSMUSG00000022074,protein_coding -33845,Rhobtb2,ENSMUSG00000022075,protein_coding -34143,Klhl1,ENSMUSG00000022076,protein_coding -33846,Pebp4,ENSMUSG00000022085,protein_coding -33849,Bin3,ENSMUSG00000022089,protein_coding -33852,Pdlim2,ENSMUSG00000022090,protein_coding -33853,Sorbs3,ENSMUSG00000022091,protein_coding -33855,Ppp3cc,ENSMUSG00000022092,protein_coding -33859,Slc39a14,ENSMUSG00000022094,protein_coding -33873,Fam160b2,ENSMUSG00000022095,protein_coding -33871,Hr,ENSMUSG00000022096,protein_coding -33868,Sftpc,ENSMUSG00000022097,protein_coding -33867,Bmp1,ENSMUSG00000022098,protein_coding -33874,Dmtn,ENSMUSG00000022099,protein_coding -33877,Xpo7,ENSMUSG00000022100,protein_coding -33875,Fgf17,ENSMUSG00000022101,protein_coding -33879,Dok2,ENSMUSG00000022102,protein_coding -33881,Gfra2,ENSMUSG00000022103,protein_coding -33902,Rb1,ENSMUSG00000022105,protein_coding -33900,Rcbtb2,ENSMUSG00000022106,protein_coding -33906,Itm2b,ENSMUSG00000022108,protein_coding -33908,Med4,ENSMUSG00000022109,protein_coding -33910,Sucla2,ENSMUSG00000022110,protein_coding -34205,Uchl3,ENSMUSG00000022111,protein_coding -34329,Gpc5,ENSMUSG00000022112,protein_coding -34264,Trim52,ENSMUSG00000022113,protein_coding -34261,Spry2,ENSMUSG00000022114,protein_coding -34244,Rbm26,ENSMUSG00000022119,protein_coding -34237,Rnf219,ENSMUSG00000022120,protein_coding -34232,Ednrb,ENSMUSG00000022122,protein_coding -34228,Scel,ENSMUSG00000022123,protein_coding -34223,Fbxl3,ENSMUSG00000022124,protein_coding -34222,Cln5,ENSMUSG00000022125,protein_coding -34221,Acod1,ENSMUSG00000022126,protein_coding -34336,Dct,ENSMUSG00000022129,protein_coding -34339,Tgds,ENSMUSG00000022130,protein_coding -34340,Gpr180,ENSMUSG00000022131,protein_coding -34357,Cldn10,ENSMUSG00000022132,protein_coding -34361,Dnajc3,ENSMUSG00000022136,protein_coding -34375,Mbnl2,ENSMUSG00000022139,protein_coding -45367,Nipbl,ENSMUSG00000022141,protein_coding -45364,Nup155,ENSMUSG00000022142,protein_coding -45360,Gdnf,ENSMUSG00000022144,protein_coding -45350,Osmr,ENSMUSG00000022146,protein_coding -45346,Fyb,ENSMUSG00000022148,protein_coding -45344,C9,ENSMUSG00000022149,protein_coding -45341,Dab2,ENSMUSG00000022150,protein_coding -45327,Ttc33,ENSMUSG00000022151,protein_coding -45317,Mroh2b,ENSMUSG00000022155,protein_coding -33523,Gzme,ENSMUSG00000022156,protein_coding -33520,Mcpt8,ENSMUSG00000022157,protein_coding -33154,Rab2b,ENSMUSG00000022159,protein_coding -33157,Mettl3,ENSMUSG00000022160,protein_coding -33394,Dad1,ENSMUSG00000022174,protein_coding -33408,Lrp10,ENSMUSG00000022175,protein_coding -33409,Rem2,ENSMUSG00000022176,protein_coding -33411,Haus4,ENSMUSG00000022177,protein_coding -33412,Ajuba,ENSMUSG00000022178,protein_coding -33414,4931414P19Rik,ENSMUSG00000022179,protein_coding -33426,Slc7a8,ENSMUSG00000022180,protein_coding -45313,C6,ENSMUSG00000022181,protein_coding -45304,Fbxo4,ENSMUSG00000022184,protein_coding -33420,Acin1,ENSMUSG00000022185,protein_coding -45309,Oxct1,ENSMUSG00000022186,protein_coding -45442,Drosha,ENSMUSG00000022191,protein_coding -33415,Psmb5,ENSMUSG00000022193,protein_coding -33434,Pabpn1,ENSMUSG00000022194,protein_coding -45441,6030458C11Rik,ENSMUSG00000022195,protein_coding -45433,Pdzd2,ENSMUSG00000022197,protein_coding -33435,Slc22a17,ENSMUSG00000022199,protein_coding -45432,Golph3,ENSMUSG00000022200,protein_coding -45423,Zfr,ENSMUSG00000022201,protein_coding -33436,Efs,ENSMUSG00000022203,protein_coding -33448,Ngdn,ENSMUSG00000022204,protein_coding -45420,Sub1,ENSMUSG00000022205,protein_coding -45416,Npr3,ENSMUSG00000022206,protein_coding -33455,Jph4,ENSMUSG00000022208,protein_coding -33459,Dhrs2,ENSMUSG00000022209,protein_coding -33465,Dhrs4,ENSMUSG00000022210,protein_coding -33466,Carmil3,ENSMUSG00000022211,protein_coding -33467,Cpne6,ENSMUSG00000022212,protein_coding -33473,Dcaf11,ENSMUSG00000022214,protein_coding -33475,Fitm1,ENSMUSG00000022215,protein_coding -33476,Psme1,ENSMUSG00000022216,protein_coding -33477,Emc9,ENSMUSG00000022217,protein_coding -33494,Tgm1,ENSMUSG00000022218,protein_coding -33498,Cideb,ENSMUSG00000022219,protein_coding -33501,Adcy4,ENSMUSG00000022220,protein_coding -33502,Ripk3,ENSMUSG00000022221,protein_coding -33508,Sdr39u1,ENSMUSG00000022223,protein_coding -33510,Cma1,ENSMUSG00000022225,protein_coding -33518,Mcpt2,ENSMUSG00000022226,protein_coding -33516,Mcpt1,ENSMUSG00000022227,protein_coding -40667,Zscan26,ENSMUSG00000022228,protein_coding -33537,Atp12a,ENSMUSG00000022229,protein_coding -45621,Sema5a,ENSMUSG00000022231,protein_coding -45611,Cct5,ENSMUSG00000022234,protein_coding -45610,Cmbl,ENSMUSG00000022235,protein_coding -45603,Ropn1l,ENSMUSG00000022236,protein_coding -45601,Ankrd33b,ENSMUSG00000022237,protein_coding -45590,Ctnnd2,ENSMUSG00000022240,protein_coding -45412,Tars,ENSMUSG00000022241,protein_coding -45408,Slc45a2,ENSMUSG00000022243,protein_coding -45407,Amacr,ENSMUSG00000022244,protein_coding -35829,Skor1,ENSMUSG00000022245,protein_coding -45400,Rai14,ENSMUSG00000022246,protein_coding -45395,Brix1,ENSMUSG00000022247,protein_coding -45396,Rad1,ENSMUSG00000022248,protein_coding -45397,Ttc23l,ENSMUSG00000022249,protein_coding -45379,Nadk2,ENSMUSG00000022253,protein_coding -45637,Mtdh,ENSMUSG00000022255,protein_coding -45640,Laptm4b,ENSMUSG00000022257,protein_coding -45625,Sdc2,ENSMUSG00000022261,protein_coding -45578,Dnah5,ENSMUSG00000022262,protein_coding -45570,Trio,ENSMUSG00000022263,protein_coding -45563,Ank,ENSMUSG00000022265,protein_coding -45551,March11,ENSMUSG00000022269,protein_coding -45547,Retreg1,ENSMUSG00000022270,protein_coding -45543,Myo10,ENSMUSG00000022272,protein_coding -45683,Rnf19a,ENSMUSG00000022280,protein_coding -45695,Pabpc1,ENSMUSG00000022283,protein_coding -45701,Ywhaz,ENSMUSG00000022285,protein_coding -45712,Grhl2,ENSMUSG00000022286,protein_coding -45720,4930447A16Rik,ENSMUSG00000022288,protein_coding -45729,Rrm2b,ENSMUSG00000022292,protein_coding -45755,Atp6v1c1,ENSMUSG00000022295,protein_coding -45758,Baalc,ENSMUSG00000022296,protein_coding -45761,Fzd6,ENSMUSG00000022297,protein_coding -45766,Slc25a32,ENSMUSG00000022299,protein_coding -45767,Dcaf13,ENSMUSG00000022300,protein_coding -45772,Dcstamp,ENSMUSG00000022303,protein_coding -45773,Dpys,ENSMUSG00000022304,protein_coding -45775,Lrp12,ENSMUSG00000022305,protein_coding -45787,Zfpm2,ENSMUSG00000022306,protein_coding -45793,Oxr1,ENSMUSG00000022307,protein_coding -45807,Angpt1,ENSMUSG00000022309,protein_coding -45848,Csmd3,ENSMUSG00000022311,protein_coding -45866,Eif3h,ENSMUSG00000022312,protein_coding -45867,Utp23,ENSMUSG00000022313,protein_coding -45870,Rad21,ENSMUSG00000022314,protein_coding -45876,Slc30a8,ENSMUSG00000022315,protein_coding -45482,Cdh10,ENSMUSG00000022321,protein_coding -29446,Shcbp1,ENSMUSG00000022322,protein_coding -45650,Rida,ENSMUSG00000022323,protein_coding -45643,Matn2,ENSMUSG00000022324,protein_coding -45651,Pop1,ENSMUSG00000022325,protein_coding -45658,Stk3,ENSMUSG00000022329,protein_coding -45660,Osr2,ENSMUSG00000022330,protein_coding -46091,Khdrbs3,ENSMUSG00000022332,protein_coding -46077,Zfat,ENSMUSG00000022335,protein_coding -45812,Eif3e,ENSMUSG00000022336,protein_coding -45815,Emc2,ENSMUSG00000022337,protein_coding -45824,Eny2,ENSMUSG00000022338,protein_coding -45827,Ebag9,ENSMUSG00000022339,protein_coding -45828,Sybu,ENSMUSG00000022340,protein_coding -45834,Kcnv1,ENSMUSG00000022342,protein_coding -46005,Myc,ENSMUSG00000022346,protein_coding -45994,A1bg,ENSMUSG00000022347,protein_coding -45972,Washc5,ENSMUSG00000022350,protein_coding -45971,Sqle,ENSMUSG00000022351,protein_coding -45965,Mtss1,ENSMUSG00000022353,protein_coding -45964,Ndufb9,ENSMUSG00000022354,protein_coding -45950,Klhl38,ENSMUSG00000022357,protein_coding -45948,Fbxo32,ENSMUSG00000022358,protein_coding -45945,Wdyhv1,ENSMUSG00000022359,protein_coding -45944,Atad2,ENSMUSG00000022360,protein_coding -45943,Zhx1,ENSMUSG00000022361,protein_coding -45941,Gm29394,ENSMUSG00000022362,protein_coding -45938,Tbc1d31,ENSMUSG00000022364,protein_coding -45937,Derl1,ENSMUSG00000022365,protein_coding -45930,Slc22a22,ENSMUSG00000022366,protein_coding -45925,Has2,ENSMUSG00000022367,protein_coding -45915,Mtbp,ENSMUSG00000022369,protein_coding -45914,Mrpl13,ENSMUSG00000022370,protein_coding -45910,Col14a1,ENSMUSG00000022371,protein_coding -46067,Sla,ENSMUSG00000022372,protein_coding -46062,Lrrc6,ENSMUSG00000022375,protein_coding -46043,Adcy8,ENSMUSG00000022376,protein_coding -46037,Asap1,ENSMUSG00000022377,protein_coding -46035,Fam49b,ENSMUSG00000022378,protein_coding -46639,Wnt7b,ENSMUSG00000022382,protein_coding -46645,Ppara,ENSMUSG00000022383,protein_coding -46649,Gtse1,ENSMUSG00000022385,protein_coding -46651,Trmu,ENSMUSG00000022386,protein_coding -46679,Brd1,ENSMUSG00000022387,protein_coding -46686,Ttll8,ENSMUSG00000022388,protein_coding -46493,Tef,ENSMUSG00000022389,protein_coding -46491,Zc3h7b,ENSMUSG00000022390,protein_coding -46490,Rangap1,ENSMUSG00000022391,protein_coding -46487,L3mbtl2,ENSMUSG00000022394,protein_coding -46479,Rbx1,ENSMUSG00000022400,protein_coding -46476,Xpnpep3,ENSMUSG00000022401,protein_coding -46475,St13,ENSMUSG00000022403,protein_coding -46474,Slc25a17,ENSMUSG00000022404,protein_coding -46463,Adsl,ENSMUSG00000022407,protein_coding -46458,Fam83f,ENSMUSG00000022408,protein_coding -46446,Mief1,ENSMUSG00000022412,protein_coding -46442,Tab1,ENSMUSG00000022414,protein_coding -46441,Syngr1,ENSMUSG00000022415,protein_coding -46450,Cacna1i,ENSMUSG00000022416,protein_coding -45908,Deptor,ENSMUSG00000022419,protein_coding -46424,Dnal4,ENSMUSG00000022420,protein_coding -46425,Nptxr,ENSMUSG00000022421,protein_coding -45905,Dscc1,ENSMUSG00000022422,protein_coding -45900,Enpp2,ENSMUSG00000022425,protein_coding -46416,Josd1,ENSMUSG00000022426,protein_coding -46415,Tomm22,ENSMUSG00000022427,protein_coding -46414,Cby1,ENSMUSG00000022428,protein_coding -46411,Dmc1,ENSMUSG00000022429,protein_coding -46632,Ribc2,ENSMUSG00000022431,protein_coding -46630,Smc1b,ENSMUSG00000022432,protein_coding -46403,Csnk1e,ENSMUSG00000022433,protein_coding -46629,Fam118a,ENSMUSG00000022434,protein_coding -46628,Upk3a,ENSMUSG00000022435,protein_coding -46369,Sh3bp1,ENSMUSG00000022436,protein_coding -46608,Samm50,ENSMUSG00000022437,protein_coding -46609,Parvb,ENSMUSG00000022438,protein_coding -46611,Parvg,ENSMUSG00000022439,protein_coding -46351,C1qtnf6,ENSMUSG00000022440,protein_coding -46598,Efcab6,ENSMUSG00000022441,protein_coding -46589,Ttll1,ENSMUSG00000022442,protein_coding -46326,Myh9,ENSMUSG00000022443,protein_coding -46567,Cyp2d26,ENSMUSG00000022445,protein_coding -46785,Adamts20,ENSMUSG00000022449,protein_coding -46528,Ndufa6,ENSMUSG00000022450,protein_coding -46788,Twf1,ENSMUSG00000022451,protein_coding -46526,Smdt1,ENSMUSG00000022452,protein_coding -46524,Naga,ENSMUSG00000022453,protein_coding -46792,Nell2,ENSMUSG00000022454,protein_coding -46523,Wbp2nl,ENSMUSG00000022455,protein_coding -46522,Sept3,ENSMUSG00000022456,protein_coding -46816,Slc38a2,ENSMUSG00000022462,protein_coding -46514,Srebf2,ENSMUSG00000022463,protein_coding -46820,Slc38a4,ENSMUSG00000022464,protein_coding -46826,Rpap3,ENSMUSG00000022466,protein_coding -46827,Endou,ENSMUSG00000022468,protein_coding -46828,Rapgef3,ENSMUSG00000022469,protein_coding -46505,Xrcc6,ENSMUSG00000022471,protein_coding -46506,Desi1,ENSMUSG00000022472,protein_coding -46501,Pmm1,ENSMUSG00000022474,protein_coding -46832,Hdac7,ENSMUSG00000022475,protein_coding -46499,Polr3h,ENSMUSG00000022476,protein_coding -46498,Aco2,ENSMUSG00000022477,protein_coding -46833,Vdr,ENSMUSG00000022479,protein_coding -46836,Col2a1,ENSMUSG00000022483,protein_coding -47103,Hoxc10,ENSMUSG00000022484,protein_coding -47104,Hoxc5,ENSMUSG00000022485,protein_coding -47129,Gtsf1,ENSMUSG00000022487,protein_coding -47131,Nckap1l,ENSMUSG00000022488,protein_coding -47132,Pde1b,ENSMUSG00000022489,protein_coding -47133,Ppp1r1a,ENSMUSG00000022490,protein_coding -47134,Glycam1,ENSMUSG00000022491,protein_coding -47321,Shisa9,ENSMUSG00000022494,protein_coding -47314,Tnfrsf17,ENSMUSG00000022496,protein_coding -47303,Txndc11,ENSMUSG00000022498,protein_coding -47295,Litaf,ENSMUSG00000022500,protein_coding -47287,Prm1,ENSMUSG00000022501,protein_coding -47277,Nubp1,ENSMUSG00000022503,protein_coding -47279,Ciita,ENSMUSG00000022504,protein_coding -47274,Emp2,ENSMUSG00000022505,protein_coding -47256,1810013L24Rik,ENSMUSG00000022507,protein_coding -47686,Bcl6,ENSMUSG00000022508,protein_coding -47704,Trp63,ENSMUSG00000022510,protein_coding -47710,Cldn1,ENSMUSG00000022512,protein_coding -47713,Il1rap,ENSMUSG00000022514,protein_coding -47213,Anks3,ENSMUSG00000022515,protein_coding -47212,Nudt16l1,ENSMUSG00000022516,protein_coding -47210,Mgrn1,ENSMUSG00000022517,protein_coding -47208,4930562C15Rik,ENSMUSG00000022518,protein_coding -47195,Srl,ENSMUSG00000022519,protein_coding -47190,Crebbp,ENSMUSG00000022521,protein_coding -47724,Fgf12,ENSMUSG00000022523,protein_coding -47728,Hrasls,ENSMUSG00000022525,protein_coding -46282,Zfp251,ENSMUSG00000022526,protein_coding -47740,Hes1,ENSMUSG00000022528,protein_coding -47167,Zfp263,ENSMUSG00000022529,protein_coding -47750,Atp13a3,ENSMUSG00000022533,protein_coding -47165,Mefv,ENSMUSG00000022534,protein_coding -47219,Glyr1,ENSMUSG00000022536,protein_coding -47752,Tmem44,ENSMUSG00000022537,protein_coding -47753,Lsg1,ENSMUSG00000022538,protein_coding -47218,Rogdi,ENSMUSG00000022540,protein_coding -47215,Sept12,ENSMUSG00000022542,protein_coding -47230,Eef2kmt,ENSMUSG00000022544,protein_coding -47330,Ercc4,ENSMUSG00000022545,protein_coding -46274,Gpt,ENSMUSG00000022546,protein_coding -47771,Apod,ENSMUSG00000022548,protein_coding -46263,Adck5,ENSMUSG00000022550,protein_coding -46246,Cyc1,ENSMUSG00000022551,protein_coding -46247,Sharpin,ENSMUSG00000022552,protein_coding -46248,Maf1,ENSMUSG00000022553,protein_coding -46250,Hgh1,ENSMUSG00000022554,protein_coding -46257,Dgat1,ENSMUSG00000022555,protein_coding -46256,Hsf1,ENSMUSG00000022556,protein_coding -46254,Bop1,ENSMUSG00000022557,protein_coding -46252,Mroh1,ENSMUSG00000022558,protein_coding -46261,Fbxl6,ENSMUSG00000022559,protein_coding -46262,Slc52a2,ENSMUSG00000022560,protein_coding -46244,Gpaa1,ENSMUSG00000022561,protein_coding -46242,Oplah,ENSMUSG00000022562,protein_coding -46235,Grina,ENSMUSG00000022564,protein_coding -46231,Plec,ENSMUSG00000022565,protein_coding -46222,Scrib,ENSMUSG00000022568,protein_coding -46213,Tsta3,ENSMUSG00000022570,protein_coding -46212,Pycrl,ENSMUSG00000022571,protein_coding -46209,Naprt,ENSMUSG00000022574,protein_coding -46207,Gsdmd,ENSMUSG00000022575,protein_coding -46195,Ly6h,ENSMUSG00000022577,protein_coding -46196,Gpihbp1,ENSMUSG00000022579,protein_coding -46202,Rhpn1,ENSMUSG00000022580,protein_coding -46178,Ly6g,ENSMUSG00000022582,protein_coding -46183,Ly6f,ENSMUSG00000022583,protein_coding -46176,Ly6c2,ENSMUSG00000022584,protein_coding -46167,Ly6i,ENSMUSG00000022586,protein_coding -46166,Ly6e,ENSMUSG00000022587,protein_coding -46161,Cyp11b2,ENSMUSG00000022589,protein_coding -1345,Gm9747,ENSMUSG00000022591,protein_coding -46153,Lynx1,ENSMUSG00000022594,protein_coding -46151,Lypd2,ENSMUSG00000022595,protein_coding -46150,Slurp1,ENSMUSG00000022596,protein_coding -46147,Psca,ENSMUSG00000022598,protein_coding -48163,Zbtb11,ENSMUSG00000022601,protein_coding -46144,Arc,ENSMUSG00000022602,protein_coding -46143,Mroh4,ENSMUSG00000022603,protein_coding -48158,Cep97,ENSMUSG00000022604,protein_coding -46121,Ptk2,ENSMUSG00000022607,protein_coding -46697,Mapk12,ENSMUSG00000022610,protein_coding -46708,Miox,ENSMUSG00000022613,protein_coding -46709,Lmf2,ENSMUSG00000022614,protein_coding -46712,Tymp,ENSMUSG00000022615,protein_coding -46718,Chkb,ENSMUSG00000022617,protein_coding -46720,Mapk8ip2,ENSMUSG00000022619,protein_coding -46721,Arsa,ENSMUSG00000022620,protein_coding -46727,Rabl2,ENSMUSG00000022621,protein_coding -46726,Acr,ENSMUSG00000022622,protein_coding -46724,Shank3,ENSMUSG00000022623,protein_coding -46744,Kif21a,ENSMUSG00000022629,protein_coding -46774,Yaf2,ENSMUSG00000022634,protein_coding -46776,Zcrb1,ENSMUSG00000022635,protein_coding -48134,Alcam,ENSMUSG00000022636,protein_coding -48132,Cblb,ENSMUSG00000022637,protein_coding -48119,Bbx,ENSMUSG00000022641,protein_coding -48103,Retnlb,ENSMUSG00000022650,protein_coding -48105,Retnlg,ENSMUSG00000022651,protein_coding -48100,Morc1,ENSMUSG00000022652,protein_coding -48082,Nectin3,ENSMUSG00000022656,protein_coding -48078,Cd96,ENSMUSG00000022657,protein_coding -48070,Tagln3,ENSMUSG00000022658,protein_coding -48066,Gcsam,ENSMUSG00000022659,protein_coding -48059,Cd200,ENSMUSG00000022661,protein_coding -48056,Atg3,ENSMUSG00000022663,protein_coding -48055,Slc35a5,ENSMUSG00000022664,protein_coding -48054,Ccdc80,ENSMUSG00000022665,protein_coding -48046,Cd200r1,ENSMUSG00000022667,protein_coding -48045,Gtpbp8,ENSMUSG00000022668,protein_coding -47387,Mzt2,ENSMUSG00000022671,protein_coding -47385,Prkdc,ENSMUSG00000022672,protein_coding -47384,Mcm4,ENSMUSG00000022673,protein_coding -47381,Ube2v2,ENSMUSG00000022674,protein_coding -47370,Snai2,ENSMUSG00000022676,protein_coding -47362,Fopnl,ENSMUSG00000022677,protein_coding -47360,Nde1,ENSMUSG00000022678,protein_coding -47349,Mpv17l,ENSMUSG00000022679,protein_coding -47348,Pdxdc1,ENSMUSG00000022680,protein_coding -47346,Ntan1,ENSMUSG00000022681,protein_coding -47345,Rrn3,ENSMUSG00000022682,protein_coding -47342,Pla2g10,ENSMUSG00000022683,protein_coding -47340,Bfar,ENSMUSG00000022684,protein_coding -47339,Parn,ENSMUSG00000022685,protein_coding -47548,B3gnt5,ENSMUSG00000022686,protein_coding -48042,Boc,ENSMUSG00000022687,protein_coding -48038,Sidt1,ENSMUSG00000022696,protein_coding -48036,Naa50,ENSMUSG00000022698,protein_coding -48031,Ccdc191,ENSMUSG00000022701,protein_coding -47512,Hira,ENSMUSG00000022702,protein_coding -48029,Qtrt2,ENSMUSG00000022704,protein_coding -48026,Drd3,ENSMUSG00000022705,protein_coding -47511,Mrpl40,ENSMUSG00000022706,protein_coding -48327,Gbe1,ENSMUSG00000022707,protein_coding -48013,Zbtb20,ENSMUSG00000022708,protein_coding -47254,Usp7,ENSMUSG00000022710,protein_coding -47251,Pmm2,ENSMUSG00000022711,protein_coding -47242,Tmem114,ENSMUSG00000022715,protein_coding -47486,Dgcr8,ENSMUSG00000022718,protein_coding -47485,Trmt2a,ENSMUSG00000022721,protein_coding -48253,Arl6,ENSMUSG00000022722,protein_coding -48252,Crybg3,ENSMUSG00000022723,protein_coding -48251,Riox2,ENSMUSG00000022724,protein_coding -47469,Gsc2,ENSMUSG00000022738,protein_coding -48211,Cpox,ENSMUSG00000022742,protein_coding -48213,Cldnd1,ENSMUSG00000022744,protein_coding -48206,St3gal6,ENSMUSG00000022747,protein_coding -48190,Cmss1,ENSMUSG00000022748,protein_coding -48186,Tbc1d23,ENSMUSG00000022749,protein_coding -47459,Klhl22,ENSMUSG00000022750,protein_coding -48184,Nit2,ENSMUSG00000022751,protein_coding -48183,Tomm70a,ENSMUSG00000022752,protein_coding -48188,Tmem30c,ENSMUSG00000022753,protein_coding -48177,Tmem45a,ENSMUSG00000022754,protein_coding -48175,Adgrg7,ENSMUSG00000022755,protein_coding -47449,Slc7a4,ENSMUSG00000022756,protein_coding -48174,Tfg,ENSMUSG00000022757,protein_coding -47448,P2rx6,ENSMUSG00000022758,protein_coding -47447,Lrrc74b,ENSMUSG00000022759,protein_coding -47445,Thap7,ENSMUSG00000022760,protein_coding -47444,Lztr1,ENSMUSG00000022761,protein_coding -48436,Ncam2,ENSMUSG00000022762,protein_coding -47443,Aifm3,ENSMUSG00000022763,protein_coding -47440,Snap29,ENSMUSG00000022765,protein_coding -47438,Serpind1,ENSMUSG00000022766,protein_coding -47428,Ccdc116,ENSMUSG00000022768,protein_coding -47426,Sdf2l1,ENSMUSG00000022769,protein_coding -47782,Dlg1,ENSMUSG00000022770,protein_coding -47421,Ppil2,ENSMUSG00000022771,protein_coding -47793,Senp5,ENSMUSG00000022772,protein_coding -47419,Ypel1,ENSMUSG00000022773,protein_coding -47792,Ncbp2,ENSMUSG00000022774,protein_coding -47411,Top3b,ENSMUSG00000022779,protein_coding -47787,Meltf,ENSMUSG00000022780,protein_coding -47799,Pak2,ENSMUSG00000022781,protein_coding -47405,Spag6l,ENSMUSG00000022783,protein_coding -47808,Wdr53,ENSMUSG00000022787,protein_coding -47400,Fgd4,ENSMUSG00000022788,protein_coding -47396,Dnm1l,ENSMUSG00000022789,protein_coding -47991,Igsf11,ENSMUSG00000022790,protein_coding -47824,Tnk2,ENSMUSG00000022791,protein_coding -47395,Yars2,ENSMUSG00000022792,protein_coding -47985,B4galt4,ENSMUSG00000022793,protein_coding -47823,Tfrc,ENSMUSG00000022797,protein_coding -47988,Tex55,ENSMUSG00000022798,protein_coding -47983,Arhgap31,ENSMUSG00000022799,protein_coding -47832,Fyttd1,ENSMUSG00000022800,protein_coding -47833,Lrch3,ENSMUSG00000022801,protein_coding -47839,Lmln,ENSMUSG00000022802,protein_coding -47973,Popdc2,ENSMUSG00000022803,protein_coding -47969,Maats1,ENSMUSG00000022805,protein_coding -47843,Osbpl11,ENSMUSG00000022807,protein_coding -47844,Snx4,ENSMUSG00000022808,protein_coding -47968,Nr1i2,ENSMUSG00000022809,protein_coding -47846,Zfp148,ENSMUSG00000022811,protein_coding -47964,Gsk3b,ENSMUSG00000022812,protein_coding -47853,Umps,ENSMUSG00000022814,protein_coding -47957,Fstl1,ENSMUSG00000022816,protein_coding -47852,Itgb5,ENSMUSG00000022817,protein_coding -47560,Cyp2ab1,ENSMUSG00000022818,protein_coding -47955,Ndufb4,ENSMUSG00000022820,protein_coding -47953,Hgd,ENSMUSG00000022821,protein_coding -47561,Abcc5,ENSMUSG00000022822,protein_coding -47850,Muc13,ENSMUSG00000022824,protein_coding -47951,Rabl3,ENSMUSG00000022827,protein_coding -47950,Gtf2e1,ENSMUSG00000022828,protein_coding -47947,Stxbp5l,ENSMUSG00000022829,protein_coding -47943,Hcls1,ENSMUSG00000022831,protein_coding -47857,Ropn1,ENSMUSG00000022832,protein_coding -47858,Ccdc14,ENSMUSG00000022833,protein_coding -47859,Mylk,ENSMUSG00000022836,protein_coding -47937,Iqcb1,ENSMUSG00000022837,protein_coding -47936,Eaf2,ENSMUSG00000022838,protein_coding -47864,Adcy5,ENSMUSG00000022840,protein_coding -47568,Ap2m1,ENSMUSG00000022841,protein_coding -47582,Ece2,ENSMUSG00000022842,protein_coding -47588,Clcn2,ENSMUSG00000022843,protein_coding -47871,Pdia5,ENSMUSG00000022844,protein_coding -47590,Thpo,ENSMUSG00000022847,protein_coding -47876,Slc49a4,ENSMUSG00000022848,protein_coding -47878,Hspbap1,ENSMUSG00000022849,protein_coding -47612,Ehhadh,ENSMUSG00000022853,protein_coding -47623,Senp2,ENSMUSG00000022855,protein_coding -47621,Tmem41a,ENSMUSG00000022856,protein_coding -48426,Tmprss15,ENSMUSG00000022857,protein_coding -47630,Tra2b,ENSMUSG00000022858,protein_coding -48425,Chodl,ENSMUSG00000022860,protein_coding -47634,Dgkg,ENSMUSG00000022861,protein_coding -48415,Btg3,ENSMUSG00000022863,protein_coding -48420,D16Ertd472e,ENSMUSG00000022864,protein_coding -48414,Cxadr,ENSMUSG00000022865,protein_coding -48387,Usp25,ENSMUSG00000022867,protein_coding -47648,Ahsg,ENSMUSG00000022868,protein_coding -47649,Fetub,ENSMUSG00000022871,protein_coding -47656,Kng1,ENSMUSG00000022875,protein_coding -48372,Samsn1,ENSMUSG00000022876,protein_coding -47650,Hrg,ENSMUSG00000022877,protein_coding -47668,Adipoq,ENSMUSG00000022878,protein_coding -47665,Rfc4,ENSMUSG00000022881,protein_coding -48342,Robo1,ENSMUSG00000022883,protein_coding -47658,Eif4a2,ENSMUSG00000022884,protein_coding -47673,St6gal1,ENSMUSG00000022885,protein_coding -40956,Prl2a1,ENSMUSG00000022886,protein_coding -47677,Masp1,ENSMUSG00000022887,protein_coding -48464,Mrpl39,ENSMUSG00000022889,protein_coding -48468,Atp5j,ENSMUSG00000022890,protein_coding -48471,App,ENSMUSG00000022892,protein_coding -48480,Adamts1,ENSMUSG00000022893,protein_coding -48482,Adamts5,ENSMUSG00000022894,protein_coding -48709,Ets2,ENSMUSG00000022895,protein_coding -48696,Dyrk1a,ENSMUSG00000022897,protein_coding -48695,Vps26c,ENSMUSG00000022898,protein_coding -47933,Slc15a2,ENSMUSG00000022899,protein_coding -47931,Ildr1,ENSMUSG00000022900,protein_coding -47930,Cd86,ENSMUSG00000022901,protein_coding -47919,Stfa2,ENSMUSG00000022902,protein_coding -47888,Kpna1,ENSMUSG00000022905,protein_coding -47886,Parp9,ENSMUSG00000022906,protein_coding -48274,Arl13b,ENSMUSG00000022911,protein_coding -48277,Pros1,ENSMUSG00000022912,protein_coding -48715,Psmg1,ENSMUSG00000022913,protein_coding -48716,Brwd1,ENSMUSG00000022914,protein_coding -48528,Krtap15,ENSMUSG00000022931,protein_coding -48506,Grik1,ENSMUSG00000022935,protein_coding -48740,Fam3b,ENSMUSG00000022938,protein_coding -48690,Pigp,ENSMUSG00000022940,protein_coding -48689,Ripply3,ENSMUSG00000022941,protein_coding -48675,Chaf1b,ENSMUSG00000022945,protein_coding -48671,Dop1b,ENSMUSG00000022946,protein_coding -48670,Cbr3,ENSMUSG00000022947,protein_coding -48664,Setd4,ENSMUSG00000022948,protein_coding -48652,Clic6,ENSMUSG00000022949,protein_coding -48649,Rcan1,ENSMUSG00000022951,protein_coding -48654,Runx1,ENSMUSG00000022952,protein_coding -48629,Atp5o,ENSMUSG00000022956,protein_coding -48626,Itsn1,ENSMUSG00000022957,protein_coding -48622,Donson,ENSMUSG00000022960,protein_coding -48621,Son,ENSMUSG00000022961,protein_coding -48620,Gart,ENSMUSG00000022962,protein_coding -48617,Tmem50b,ENSMUSG00000022964,protein_coding -48616,Ifngr2,ENSMUSG00000022965,protein_coding -48612,Ifnar1,ENSMUSG00000022967,protein_coding -48607,Il10rb,ENSMUSG00000022969,protein_coding -48605,Ifnar2,ENSMUSG00000022971,protein_coding -48585,Cfap298,ENSMUSG00000022972,protein_coding -48587,Synj1,ENSMUSG00000022973,protein_coding -48591,Paxbp1,ENSMUSG00000022974,protein_coding -48577,Mis18a,ENSMUSG00000022978,protein_coding -48564,Sod1,ENSMUSG00000022982,protein_coding -48565,Scaf4,ENSMUSG00000022983,protein_coding -47022,Krt75,ENSMUSG00000022986,protein_coding -46848,Zfp641,ENSMUSG00000022987,protein_coding -46858,Lalba,ENSMUSG00000022991,protein_coding -46861,Kansl2,ENSMUSG00000022992,protein_coding -46867,4930415O20Rik,ENSMUSG00000022993,protein_coding -46868,Adcy6,ENSMUSG00000022994,protein_coding -3192,Enah,ENSMUSG00000022995,protein_coding -46877,Wnt10b,ENSMUSG00000022996,protein_coding -46878,Wnt1,ENSMUSG00000022997,protein_coding -46888,Lmbr1l,ENSMUSG00000022999,protein_coding -46886,Dhh,ENSMUSG00000023000,protein_coding -46889,Tuba1b,ENSMUSG00000023004,protein_coding -46912,Prpf40b,ENSMUSG00000023007,protein_coding -46913,Fmnl3,ENSMUSG00000023008,protein_coding -46918,Nckap5l,ENSMUSG00000023009,protein_coding -46916,Tmbim6,ENSMUSG00000023010,protein_coding -46923,Faim2,ENSMUSG00000023011,protein_coding -46925,Aqp2,ENSMUSG00000023013,protein_coding -46928,Racgap1,ENSMUSG00000023015,protein_coding -46929,Asic1,ENSMUSG00000023017,protein_coding -46930,Smarcd1,ENSMUSG00000023018,protein_coding -46935,Gpd1,ENSMUSG00000023019,protein_coding -46936,Cox14,ENSMUSG00000023020,protein_coding -46937,Cers5,ENSMUSG00000023021,protein_coding -46939,Lima1,ENSMUSG00000023022,protein_coding -46952,Larp4,ENSMUSG00000023025,protein_coding -46954,Dip2b,ENSMUSG00000023026,protein_coding -46958,Atf1,ENSMUSG00000023027,protein_coding -46966,Slc11a2,ENSMUSG00000023030,protein_coding -46982,Cela1,ENSMUSG00000023031,protein_coding -46986,Slc4a8,ENSMUSG00000023032,protein_coding -46988,Scn8a,ENSMUSG00000023033,protein_coding -46999,Nr4a1,ENSMUSG00000023034,protein_coding -53227,Pcdhgc4,ENSMUSG00000023036,protein_coding -47008,Krt7,ENSMUSG00000023039,protein_coding -47031,Krt6b,ENSMUSG00000023041,protein_coding -47053,Krt18,ENSMUSG00000023043,protein_coding -47059,Csad,ENSMUSG00000023044,protein_coding -47058,Soat2,ENSMUSG00000023045,protein_coding -47057,Igfbp6,ENSMUSG00000023046,protein_coding -47075,Amhr2,ENSMUSG00000023047,protein_coding -47078,Prr13,ENSMUSG00000023048,protein_coding -47081,Map3k12,ENSMUSG00000023050,protein_coding -47083,Tarbp2,ENSMUSG00000023051,protein_coding -47084,Npff,ENSMUSG00000023052,protein_coding -47092,Calcoco1,ENSMUSG00000023055,protein_coding -12153,Fabp2,ENSMUSG00000023057,protein_coding -32591,Sncg,ENSMUSG00000023064,protein_coding -54036,Rttn,ENSMUSG00000023066,protein_coding -49562,Cdkn1a,ENSMUSG00000023067,protein_coding -27569,Nus1,ENSMUSG00000023068,protein_coding -7573,Rgn,ENSMUSG00000023070,protein_coding -23605,Cep89,ENSMUSG00000023072,protein_coding -29454,Slc10a2,ENSMUSG00000023073,protein_coding -8019,Mospd1,ENSMUSG00000023074,protein_coding -14630,Akirin1,ENSMUSG00000023075,protein_coding -17324,Cxcl13,ENSMUSG00000023078,protein_coding -18314,Gtf2ird1,ENSMUSG00000023079,protein_coding -50005,H2-M10.2,ENSMUSG00000023083,protein_coding -11076,Lrrc71,ENSMUSG00000023084,protein_coding -10488,Noct,ENSMUSG00000023087,protein_coding -47365,Abcc1,ENSMUSG00000023088,protein_coding -19148,Ndufa5,ENSMUSG00000023089,protein_coding -8087,Fhl1,ENSMUSG00000023092,protein_coding -35053,Pate12,ENSMUSG00000023093,protein_coding -3818,Msrb2,ENSMUSG00000023094,protein_coding -18325,Rfc2,ENSMUSG00000023104,protein_coding -18108,Denr,ENSMUSG00000023106,protein_coding -33410,Prmt5,ENSMUSG00000023110,protein_coding -22777,Sympk,ENSMUSG00000023118,protein_coding -14814,Gm853,ENSMUSG00000023120,protein_coding -50456,Sult1c2,ENSMUSG00000023122,protein_coding -42687,Gzma,ENSMUSG00000023132,protein_coding -20414,Reg2,ENSMUSG00000023140,protein_coding -47224,Nagpa,ENSMUSG00000023143,protein_coding -48722,Wrb,ENSMUSG00000023147,protein_coding -2465,Ivns1abp,ENSMUSG00000023150,protein_coding -12911,Lrrc69,ENSMUSG00000023151,protein_coding -15570,Tmem52,ENSMUSG00000023153,protein_coding -32093,Rpp14,ENSMUSG00000023156,protein_coding -22707,Psg29,ENSMUSG00000023159,protein_coding -7397,Ssxb2,ENSMUSG00000023165,protein_coding -46814,Slc38a1,ENSMUSG00000023169,protein_coding -38598,Gps2,ENSMUSG00000023170,protein_coding -28115,Bsg,ENSMUSG00000023175,protein_coding -47747,Cpn2,ENSMUSG00000023176,protein_coding -22719,Ceacam14,ENSMUSG00000023185,protein_coding -35164,Vwa5a,ENSMUSG00000023186,protein_coding -21367,P3h3,ENSMUSG00000023191,protein_coding -36758,Grm2,ENSMUSG00000023192,protein_coding -3708,Il15ra,ENSMUSG00000023206,protein_coding -3999,Lcn9,ENSMUSG00000023210,protein_coding -5892,Epb42,ENSMUSG00000023216,protein_coding -5007,Serping1,ENSMUSG00000023224,protein_coding -14815,Serinc2,ENSMUSG00000023232,protein_coding -29430,Ccl25,ENSMUSG00000023235,protein_coding -5742,Scg5,ENSMUSG00000023236,protein_coding -32276,Kcnk5,ENSMUSG00000023243,protein_coding -14540,Guca2a,ENSMUSG00000023247,protein_coding -36748,Parp3,ENSMUSG00000023249,protein_coding -7531,Cypt1,ENSMUSG00000023257,protein_coding -36889,Slc26a6,ENSMUSG00000023259,protein_coding -36742,Acy1,ENSMUSG00000023262,protein_coding -14597,9530002B09Rik,ENSMUSG00000023263,protein_coding -50348,Frs3,ENSMUSG00000023266,protein_coding -13094,Gabrr2,ENSMUSG00000023267,protein_coding -46682,Creld2,ENSMUSG00000023272,protein_coding -21369,Cd4,ENSMUSG00000023274,protein_coding -36734,Twf2,ENSMUSG00000023277,protein_coding -7321,Bmp15,ENSMUSG00000023279,protein_coding -17647,Zfp605,ENSMUSG00000023284,protein_coding -15605,Ube2j2,ENSMUSG00000023286,protein_coding -19520,Sva,ENSMUSG00000023289,protein_coding -54987,March5,ENSMUSG00000023307,protein_coding -18431,Ache,ENSMUSG00000023328,protein_coding -5985,Dtwd1,ENSMUSG00000023330,protein_coding -36163,Gcm1,ENSMUSG00000023333,protein_coding -31622,Wfdc1,ENSMUSG00000023336,protein_coding -36738,Poc1a,ENSMUSG00000023345,protein_coding -18437,Trip6,ENSMUSG00000023348,protein_coding -21297,Clec4n,ENSMUSG00000023349,protein_coding -16016,Agap3,ENSMUSG00000023353,protein_coding -19686,Tmem176a,ENSMUSG00000023367,protein_coding -32281,Kcnk16,ENSMUSG00000023387,protein_coding -4780,Dlx2,ENSMUSG00000023391,protein_coding -7191,Slc17a9,ENSMUSG00000023393,protein_coding -19711,Stk31,ENSMUSG00000023403,protein_coding -33503,Nfatc4,ENSMUSG00000023411,protein_coding -15060,Cela3b,ENSMUSG00000023433,protein_coding -21366,Gnb3,ENSMUSG00000023439,protein_coding -9345,Esx1,ENSMUSG00000023443,protein_coding -16233,Pisd,ENSMUSG00000023452,protein_coding -21363,Tpi1,ENSMUSG00000023456,protein_coding -50663,Rab12,ENSMUSG00000023460,protein_coding -24015,Tulp2,ENSMUSG00000023467,protein_coding -36887,Celsr3,ENSMUSG00000023473,protein_coding -46897,Prph,ENSMUSG00000023484,protein_coding -36746,Pcbp4,ENSMUSG00000023495,protein_coding -21365,Cdca3,ENSMUSG00000023505,protein_coding -15606,C1qtnf12,ENSMUSG00000023571,protein_coding -5891,Ccndbp1,ENSMUSG00000023572,protein_coding -36753,Iqcf3,ENSMUSG00000023577,protein_coding -18119,Ogfod2,ENSMUSG00000023707,protein_coding -39230,Mrps23,ENSMUSG00000023723,protein_coding -46885,Rhebl1,ENSMUSG00000023755,protein_coding -37297,Sfi1,ENSMUSG00000023764,protein_coding -38533,Hes7,ENSMUSG00000023781,protein_coding -47801,Pigx,ENSMUSG00000023791,protein_coding -48769,Tiam2,ENSMUSG00000023800,protein_coding -48775,Nox3,ENSMUSG00000023802,protein_coding -48798,Synj2,ENSMUSG00000023805,protein_coding -48829,Rsph3b,ENSMUSG00000023806,protein_coding -48836,Rps6ka2,ENSMUSG00000023809,protein_coding -48894,Prkn,ENSMUSG00000023826,protein_coding -48901,Agpat4,ENSMUSG00000023827,protein_coding -48910,Slc22a3,ENSMUSG00000023828,protein_coding -48913,Slc22a1,ENSMUSG00000023829,protein_coding -48914,Igf2r,ENSMUSG00000023830,protein_coding -48931,Acat2,ENSMUSG00000023832,protein_coding -49035,Lnpep,ENSMUSG00000023845,protein_coding -49009,Chd1,ENSMUSG00000023852,protein_coding -48857,Mpc1,ENSMUSG00000023861,protein_coding -48867,Pde10a,ENSMUSG00000023868,protein_coding -48871,1700010I14Rik,ENSMUSG00000023873,protein_coding -49143,Zfp54,ENSMUSG00000023882,protein_coding -48983,Phf10,ENSMUSG00000023883,protein_coding -48977,Thbs2,ENSMUSG00000023885,protein_coding -48973,Smoc2,ENSMUSG00000023886,protein_coding -49144,Zfp51,ENSMUSG00000023892,protein_coding -49221,Zscan10,ENSMUSG00000023902,protein_coding -49223,Mmp25,ENSMUSG00000023903,protein_coding -49228,Hcfc1r1,ENSMUSG00000023904,protein_coding -49229,Tnfrsf12a,ENSMUSG00000023905,protein_coding -49230,Cldn6,ENSMUSG00000023906,protein_coding -49233,Pkmyt1,ENSMUSG00000023908,protein_coding -49234,Paqr4,ENSMUSG00000023909,protein_coding -49240,Flywch2,ENSMUSG00000023911,protein_coding -50233,Slc25a27,ENSMUSG00000023912,protein_coding -50230,Pla2g7,ENSMUSG00000023913,protein_coding -50228,Mep1a,ENSMUSG00000023914,protein_coding -50223,Tnfrsf21,ENSMUSG00000023915,protein_coding -50219,Adgrf4,ENSMUSG00000023918,protein_coding -50209,Cenpq,ENSMUSG00000023919,protein_coding -50210,Mmut,ENSMUSG00000023921,protein_coding -50411,Tbc1d5,ENSMUSG00000023923,protein_coding -50204,Rhag,ENSMUSG00000023926,protein_coding -50417,Satb1,ENSMUSG00000023927,protein_coding -50203,Crisp2,ENSMUSG00000023930,protein_coding -50440,Efhb,ENSMUSG00000023931,protein_coding -50252,Cdc5l,ENSMUSG00000023932,protein_coding -50254,Spats1,ENSMUSG00000023935,protein_coding -50256,Aars2,ENSMUSG00000023938,protein_coding -50269,Mrpl14,ENSMUSG00000023939,protein_coding -50447,Sgo1,ENSMUSG00000023940,protein_coding -50262,Slc29a1,ENSMUSG00000023942,protein_coding -50463,Sult1c1,ENSMUSG00000023943,protein_coding -50261,Hsp90ab1,ENSMUSG00000023944,protein_coding -50466,Slc5a7,ENSMUSG00000023945,protein_coding -50259,Nfkbie,ENSMUSG00000023947,protein_coding -50257,Tcte1,ENSMUSG00000023949,protein_coding -50276,Vegfa,ENSMUSG00000023951,protein_coding -50282,Gtpbp2,ENSMUSG00000023952,protein_coding -50283,Polh,ENSMUSG00000023953,protein_coding -50240,Clic5,ENSMUSG00000023959,protein_coding -50238,Enpp5,ENSMUSG00000023960,protein_coding -50239,Enpp4,ENSMUSG00000023961,protein_coding -50234,Cyp39a1,ENSMUSG00000023963,protein_coding -18903,Calcr,ENSMUSG00000023964,protein_coding -50607,Fbxl17,ENSMUSG00000023965,protein_coding -50280,Rsph9,ENSMUSG00000023966,protein_coding -50279,Mrps18a,ENSMUSG00000023967,protein_coding -50297,Crip3,ENSMUSG00000023968,protein_coding -50311,Rrp36,ENSMUSG00000023971,protein_coding -50307,Ptk7,ENSMUSG00000023972,protein_coding -50317,Cnpy3,ENSMUSG00000023973,protein_coding -50324,Ubr2,ENSMUSG00000023977,protein_coding -50323,Prph2,ENSMUSG00000023978,protein_coding -50331,Guca1b,ENSMUSG00000023979,protein_coding -50337,Taf8,ENSMUSG00000023980,protein_coding -50332,Guca1a,ENSMUSG00000023982,protein_coding -50341,Gm20517,ENSMUSG00000023984,protein_coding -50350,Pgc,ENSMUSG00000023987,protein_coding -50339,Bysl,ENSMUSG00000023988,protein_coding -50351,Tfeb,ENSMUSG00000023990,protein_coding -50355,Foxp4,ENSMUSG00000023991,protein_coding -50373,Trem2,ENSMUSG00000023992,protein_coding -50374,Treml1,ENSMUSG00000023993,protein_coding -50375,Nfya,ENSMUSG00000023994,protein_coding -50379,Tspo2,ENSMUSG00000023995,protein_coding -50395,Kif6,ENSMUSG00000023999,protein_coding -49675,Brd4,ENSMUSG00000024002,protein_coding -49554,Stk38,ENSMUSG00000024006,protein_coding -49569,Ppil1,ENSMUSG00000024007,protein_coding -49567,Cpne5,ENSMUSG00000024008,protein_coding -49573,Pi16,ENSMUSG00000024011,protein_coding -49575,Mtch1,ENSMUSG00000024012,protein_coding -49576,Fgd2,ENSMUSG00000024013,protein_coding -49580,Pim1,ENSMUSG00000024014,protein_coding -49592,Ccdc167,ENSMUSG00000024018,protein_coding -49591,Cmtr1,ENSMUSG00000024019,protein_coding -49611,Glo1,ENSMUSG00000024026,protein_coding -49622,Glp1r,ENSMUSG00000024027,protein_coding -49631,Tff2,ENSMUSG00000024028,protein_coding -49630,Tff3,ENSMUSG00000024029,protein_coding -49628,Abcg1,ENSMUSG00000024030,protein_coding -49633,Tff1,ENSMUSG00000024032,protein_coding -49637,Rsph1,ENSMUSG00000024033,protein_coding -49634,Tmprss3,ENSMUSG00000024034,protein_coding -49638,Slc37a1,ENSMUSG00000024036,protein_coding -49644,Wdr4,ENSMUSG00000024037,protein_coding -49646,Ndufv3,ENSMUSG00000024038,protein_coding -49655,Cbs,ENSMUSG00000024039,protein_coding -49659,Cryaa,ENSMUSG00000024041,protein_coding -49662,Sik1,ENSMUSG00000024042,protein_coding -50682,Arhgap28,ENSMUSG00000024043,protein_coding -50693,Epb41l3,ENSMUSG00000024044,protein_coding -49678,Akap8,ENSMUSG00000024045,protein_coding -50724,Myl12a,ENSMUSG00000024048,protein_coding -50725,Myom1,ENSMUSG00000024049,protein_coding -49680,Wiz,ENSMUSG00000024050,protein_coding -50728,Lpin2,ENSMUSG00000024052,protein_coding -50729,Emilin2,ENSMUSG00000024053,protein_coding -50732,Smchd1,ENSMUSG00000024054,protein_coding -49713,Cyp4f13,ENSMUSG00000024055,protein_coding -50738,Ndc80,ENSMUSG00000024056,protein_coding -50753,Clip4,ENSMUSG00000024059,protein_coding -50761,Lbh,ENSMUSG00000024063,protein_coding -50770,Galnt14,ENSMUSG00000024064,protein_coding -50776,Ehd3,ENSMUSG00000024065,protein_coding -50778,Xdh,ENSMUSG00000024066,protein_coding -50789,Dpy30,ENSMUSG00000024067,protein_coding -50793,Spast,ENSMUSG00000024068,protein_coding -50794,Slc30a6,ENSMUSG00000024069,protein_coding -50848,Prkd3,ENSMUSG00000024070,protein_coding -50797,Yipf4,ENSMUSG00000024072,protein_coding -50799,Birc6,ENSMUSG00000024073,protein_coding -50827,Crim1,ENSMUSG00000024074,protein_coding -50833,Vit,ENSMUSG00000024076,protein_coding -50835,Strn,ENSMUSG00000024077,protein_coding -50801,Ttc27,ENSMUSG00000024078,protein_coding -50841,Eif2ak2,ENSMUSG00000024079,protein_coding -50846,Cebpz,ENSMUSG00000024081,protein_coding -50847,Ndufaf7,ENSMUSG00000024082,protein_coding -50619,Pja2,ENSMUSG00000024083,protein_coding -50850,Qpct,ENSMUSG00000024084,protein_coding -50623,Man2a1,ENSMUSG00000024085,protein_coding -50856,Cyp1b1,ENSMUSG00000024087,protein_coding -50626,4930583I09Rik,ENSMUSG00000024088,protein_coding -50637,Vapa,ENSMUSG00000024091,protein_coding -50865,Hnrnpll,ENSMUSG00000024095,protein_coding -50642,Ralbp1,ENSMUSG00000024096,protein_coding -50871,Srsf7,ENSMUSG00000024097,protein_coding -50646,Twsg1,ENSMUSG00000024098,protein_coding -50649,Ndufv2,ENSMUSG00000024099,protein_coding -50652,Washc1,ENSMUSG00000024101,protein_coding -21127,Washc2,ENSMUSG00000024104,protein_coding -50665,Themis3,ENSMUSG00000024105,protein_coding -51023,Lhcgr,ENSMUSG00000024107,protein_coding -51031,Nrxn1,ENSMUSG00000024109,protein_coding -49354,Cacna1h,ENSMUSG00000024112,protein_coding -49246,Prss41,ENSMUSG00000024114,protein_coding -49249,Prss21,ENSMUSG00000024116,protein_coding -49275,Tedc2,ENSMUSG00000024118,protein_coding -50938,Lrpprc,ENSMUSG00000024120,protein_coding -49271,Atp6v0c,ENSMUSG00000024121,protein_coding -49267,Pdpk1,ENSMUSG00000024122,protein_coding -49259,Prss30,ENSMUSG00000024124,protein_coding -49258,Sbpl,ENSMUSG00000024125,protein_coding -50946,Prepl,ENSMUSG00000024127,protein_coding -49257,Sbp,ENSMUSG00000024128,protein_coding -49281,Abca3,ENSMUSG00000024130,protein_coding -50945,Slc3a1,ENSMUSG00000024131,protein_coding -49285,Eci1,ENSMUSG00000024132,protein_coding -50961,Six2,ENSMUSG00000024134,protein_coding -50965,Srbd1,ENSMUSG00000024135,protein_coding -49286,Dnase1l2,ENSMUSG00000024136,protein_coding -49287,E4f1,ENSMUSG00000024137,protein_coding -50972,Epas1,ENSMUSG00000024140,protein_coding -49289,Mlst8,ENSMUSG00000024142,protein_coding -50978,Rhoq,ENSMUSG00000024143,protein_coding -50980,Pigf,ENSMUSG00000024145,protein_coding -50981,Cript,ENSMUSG00000024146,protein_coding -50987,Mcfd2,ENSMUSG00000024150,protein_coding -51001,Msh2,ENSMUSG00000024151,protein_coding -51022,Gtf2a1l,ENSMUSG00000024154,protein_coding -49319,Meiob,ENSMUSG00000024155,protein_coding -49320,Hagh,ENSMUSG00000024158,protein_coding -49324,Spsb3,ENSMUSG00000024160,protein_coding -49326,Mapk8ip3,ENSMUSG00000024163,protein_coding -50562,C3,ENSMUSG00000024164,protein_coding -49329,Jpt2,ENSMUSG00000024165,protein_coding -49332,Tmem204,ENSMUSG00000024168,protein_coding -49331,Ift140,ENSMUSG00000024169,protein_coding -49333,Telo2,ENSMUSG00000024170,protein_coding -49347,Prss28,ENSMUSG00000024171,protein_coding -50479,St6gal2,ENSMUSG00000024172,protein_coding -49351,Tpsab1,ENSMUSG00000024173,protein_coding -50482,Pot1b,ENSMUSG00000024174,protein_coding -49355,Tekt4,ENSMUSG00000024175,protein_coding -49362,Sox8,ENSMUSG00000024176,protein_coding -49408,Nme4,ENSMUSG00000024177,protein_coding -49410,Tmem8,ENSMUSG00000024180,protein_coding -49411,Mrpl28,ENSMUSG00000024181,protein_coding -49412,Axin1,ENSMUSG00000024182,protein_coding -49414,Pdia2,ENSMUSG00000024184,protein_coding -49416,Rgs11,ENSMUSG00000024186,protein_coding -49417,Fam234a,ENSMUSG00000024187,protein_coding -49422,Luc7l,ENSMUSG00000024188,protein_coding -49430,Dusp1,ENSMUSG00000024190,protein_coding -49439,Bnip1,ENSMUSG00000024191,protein_coding -49447,Phf1,ENSMUSG00000024193,protein_coding -49448,Cuta,ENSMUSG00000024194,protein_coding -50517,Plin3,ENSMUSG00000024197,protein_coding -50520,Kdm4b,ENSMUSG00000024201,protein_coding -50538,Rfx2,ENSMUSG00000024206,protein_coding -50539,Acsbg2,ENSMUSG00000024207,protein_coding -49461,Uqcc2,ENSMUSG00000024208,protein_coding -50540,1700061G19Rik,ENSMUSG00000024209,protein_coding -49463,Ip6k3,ENSMUSG00000024210,protein_coding -19170,Grm8,ENSMUSG00000024211,protein_coding -50541,Mllt1,ENSMUSG00000024212,protein_coding -49487,Nudt3,ENSMUSG00000024213,protein_coding -49493,Spdef,ENSMUSG00000024215,protein_coding -49497,Snrpc,ENSMUSG00000024217,protein_coding -49500,Taf11,ENSMUSG00000024218,protein_coding -49502,Anks1,ENSMUSG00000024219,protein_coding -49512,Zfp523,ENSMUSG00000024220,protein_coding -49522,Fkbp5,ENSMUSG00000024222,protein_coding -49532,Armc12,ENSMUSG00000024223,protein_coding -49533,Clpsl2,ENSMUSG00000024224,protein_coding -49534,Clps,ENSMUSG00000024225,protein_coding -50583,Pdzph1,ENSMUSG00000024227,protein_coding -50585,Nudt12,ENSMUSG00000024228,protein_coding -52665,Cul2,ENSMUSG00000024231,protein_coding -52670,Bambi,ENSMUSG00000024232,protein_coding -52678,Lyzl1,ENSMUSG00000024233,protein_coding -52685,Mtpap,ENSMUSG00000024234,protein_coding -52681,Map3k8,ENSMUSG00000024235,protein_coding -52692,Svil,ENSMUSG00000024236,protein_coding -52700,Zeb1,ENSMUSG00000024238,protein_coding -52712,Epc1,ENSMUSG00000024240,protein_coding -50879,Sos1,ENSMUSG00000024241,protein_coding -50882,Map4k3,ENSMUSG00000024242,protein_coding -50886,Tmem178,ENSMUSG00000024245,protein_coding -50887,Thumpd2,ENSMUSG00000024246,protein_coding -50906,Pkdcc,ENSMUSG00000024247,protein_coding -50910,Cox7a2l,ENSMUSG00000024248,protein_coding -50928,Thada,ENSMUSG00000024251,protein_coding -50935,Dync2li1,ENSMUSG00000024253,protein_coding -50937,Abcg8,ENSMUSG00000024254,protein_coding -51063,Adcyap1,ENSMUSG00000024256,protein_coding -53000,Polr2d,ENSMUSG00000024258,protein_coding -52994,Slc25a46,ENSMUSG00000024259,protein_coding -52996,Sap130,ENSMUSG00000024260,protein_coding -52990,Syt4,ENSMUSG00000024261,protein_coding -31619,Adad2,ENSMUSG00000024266,protein_coding -52952,Celf4,ENSMUSG00000024268,protein_coding -52949,Tpgs2,ENSMUSG00000024269,protein_coding -52942,Slc39a6,ENSMUSG00000024270,protein_coding -52943,Elp2,ENSMUSG00000024271,protein_coding -52939,2700062C07Rik,ENSMUSG00000024273,protein_coding -52929,Zfp397,ENSMUSG00000024276,protein_coding -52924,Mapre2,ENSMUSG00000024277,protein_coding -52735,Wac,ENSMUSG00000024283,protein_coding -52750,Ccny,ENSMUSG00000024286,protein_coding -52769,Thoc1,ENSMUSG00000024287,protein_coding -52771,Rock1,ENSMUSG00000024290,protein_coding -49712,Cyp4f14,ENSMUSG00000024292,protein_coding -52783,Esco1,ENSMUSG00000024293,protein_coding -52787,Mib1,ENSMUSG00000024294,protein_coding -49699,Zfp871,ENSMUSG00000024298,protein_coding -49733,Adamts10,ENSMUSG00000024299,protein_coding -49734,Myo1f,ENSMUSG00000024300,protein_coding -49445,Kifc5b,ENSMUSG00000024301,protein_coding -52921,Dtna,ENSMUSG00000024302,protein_coding -52868,Cdh2,ENSMUSG00000024304,protein_coding -52916,Ccdc178,ENSMUSG00000024306,protein_coding -49756,Tapbp,ENSMUSG00000024308,protein_coding -49760,Pfdn6,ENSMUSG00000024309,protein_coding -49762,Wdr46,ENSMUSG00000024312,protein_coding -52905,Mep1b,ENSMUSG00000024313,protein_coding -52904,Rnf138,ENSMUSG00000024317,protein_coding -49765,Vps52,ENSMUSG00000024319,protein_coding -49772,Ring1,ENSMUSG00000024325,protein_coding -49778,Slc39a7,ENSMUSG00000024327,protein_coding -49780,Col11a2,ENSMUSG00000024330,protein_coding -52887,Dsc2,ENSMUSG00000024331,protein_coding -49784,H2-Oa,ENSMUSG00000024334,protein_coding -49786,Brd2,ENSMUSG00000024335,protein_coding -49795,Psmb8,ENSMUSG00000024338,protein_coding -49797,Tap2,ENSMUSG00000024339,protein_coding -49806,Btnl2,ENSMUSG00000024340,protein_coding -53122,Pfdn1,ENSMUSG00000024346,protein_coding -53116,Psd2,ENSMUSG00000024347,protein_coding -53104,Tmem173,ENSMUSG00000024349,protein_coding -53101,Dnajc18,ENSMUSG00000024350,protein_coding -53100,Spata24,ENSMUSG00000024352,protein_coding -53097,Mzb1,ENSMUSG00000024353,protein_coding -53096,Slc23a1,ENSMUSG00000024354,protein_coding -53081,Sil1,ENSMUSG00000024357,protein_coding -53072,Hspa9,ENSMUSG00000024359,protein_coding -53071,Etf1,ENSMUSG00000024360,protein_coding -49841,Cyp21a1,ENSMUSG00000024365,protein_coding -53062,Gfra3,ENSMUSG00000024366,protein_coding -49848,Nelfe,ENSMUSG00000024369,protein_coding -53060,Cdc23,ENSMUSG00000024370,protein_coding -49852,C2,ENSMUSG00000024371,protein_coding -53041,Epb41l4a,ENSMUSG00000024376,protein_coding -53034,Stard4,ENSMUSG00000024378,protein_coding -53027,Tslp,ENSMUSG00000024379,protein_coding -53020,Bin1,ENSMUSG00000024381,protein_coding -53017,Ercc3,ENSMUSG00000024382,protein_coding -53016,Map3k2,ENSMUSG00000024383,protein_coding -53010,Iws1,ENSMUSG00000024384,protein_coding -53013,Proc,ENSMUSG00000024386,protein_coding -49882,Csnk2b,ENSMUSG00000024387,protein_coding -53007,Myo7b,ENSMUSG00000024388,protein_coding -49886,Apom,ENSMUSG00000024391,protein_coding -49887,Bag6,ENSMUSG00000024392,protein_coding -49888,Prrc2a,ENSMUSG00000024393,protein_coding -53005,Lims2,ENSMUSG00000024395,protein_coding -49891,Aif1,ENSMUSG00000024397,protein_coding -49894,Ltb,ENSMUSG00000024399,protein_coding -53001,Wdr33,ENSMUSG00000024400,protein_coding -49896,Tnf,ENSMUSG00000024401,protein_coding -49897,Lta,ENSMUSG00000024402,protein_coding -49901,Atp6v1g2,ENSMUSG00000024403,protein_coding -52806,Riok3,ENSMUSG00000024404,protein_coding -49922,Pou5f1,ENSMUSG00000024406,protein_coding -49926,Psors1c2,ENSMUSG00000024409,protein_coding -52808,Rmc1,ENSMUSG00000024410,protein_coding -52858,Aqp4,ENSMUSG00000024411,protein_coding -52809,Npc1,ENSMUSG00000024413,protein_coding -39309,Mrpl27,ENSMUSG00000024414,protein_coding -52836,Zfp521,ENSMUSG00000024420,protein_coding -52813,Lama3,ENSMUSG00000024421,protein_coding -49950,Dhx16,ENSMUSG00000024422,protein_coding -52827,Impact,ENSMUSG00000024423,protein_coding -52818,Ttc39c,ENSMUSG00000024424,protein_coding -53252,Ndfip1,ENSMUSG00000024425,protein_coding -49953,Atat1,ENSMUSG00000024426,protein_coding -53256,Spry4,ENSMUSG00000024427,protein_coding -49962,Gnl1,ENSMUSG00000024429,protein_coding -52822,Cabyr,ENSMUSG00000024430,protein_coding -53271,Nr3c1,ENSMUSG00000024431,protein_coding -49954,Mrps18b,ENSMUSG00000024436,protein_coding -53246,Pcdh12,ENSMUSG00000024440,protein_coding -53245,Dele1,ENSMUSG00000024442,protein_coding -50003,Rpp21,ENSMUSG00000024446,protein_coding -50006,H2-M10.1,ENSMUSG00000024448,protein_coding -53237,Arap3,ENSMUSG00000024451,protein_coding -53234,Hdac3,ENSMUSG00000024454,protein_coding -53231,Diaph1,ENSMUSG00000024456,protein_coding -50034,Trim26,ENSMUSG00000024457,protein_coding -50048,H2-M5,ENSMUSG00000024459,protein_coding -50052,Gabbr1,ENSMUSG00000024462,protein_coding -53348,Myot,ENSMUSG00000024471,protein_coding -53349,Dcp2,ENSMUSG00000024472,protein_coding -53137,Ik,ENSMUSG00000024474,protein_coding -53371,Pggt1b,ENSMUSG00000024477,protein_coding -45897,Mal2,ENSMUSG00000024479,protein_coding -53387,Ap3s1,ENSMUSG00000024480,protein_coding -53391,Lvrn,ENSMUSG00000024481,protein_coding -53126,Ankhd1,ENSMUSG00000024483,protein_coding -53125,Slc4a9,ENSMUSG00000024485,protein_coding -53123,Hbegf,ENSMUSG00000024486,protein_coding -53279,Yipf5,ENSMUSG00000024487,protein_coding -53306,Rbm27,ENSMUSG00000024491,protein_coding -53303,Lars,ENSMUSG00000024493,protein_coding -53308,Pou4f3,ENSMUSG00000024497,protein_coding -53311,Tcerg1,ENSMUSG00000024498,protein_coding -53317,Ppp2r2b,ENSMUSG00000024500,protein_coding -53323,Dpysl3,ENSMUSG00000024501,protein_coding -53328,Jakmip2,ENSMUSG00000024502,protein_coding -53329,Spink1,ENSMUSG00000024503,protein_coding -53422,Dtwd2,ENSMUSG00000024505,protein_coding -53428,Hsd17b4,ENSMUSG00000024507,protein_coding -53443,Ftmt,ENSMUSG00000024510,protein_coding -53765,Rab27b,ENSMUSG00000024511,protein_coding -53767,Dynap,ENSMUSG00000024512,protein_coding -53773,Mbd2,ENSMUSG00000024513,protein_coding -53788,Smad4,ENSMUSG00000024515,protein_coding -53671,Sec11c,ENSMUSG00000024516,protein_coding -53676,Grp,ENSMUSG00000024517,protein_coding -53681,Rax,ENSMUSG00000024518,protein_coding -53683,Cplx4,ENSMUSG00000024519,protein_coding -53694,Pmaip1,ENSMUSG00000024521,protein_coding -53713,Gnal,ENSMUSG00000024524,protein_coding -53720,Impa2,ENSMUSG00000024525,protein_coding -53722,Cidea,ENSMUSG00000024526,protein_coding -53724,Afg3l2,ENSMUSG00000024527,protein_coding -53444,Srfbp1,ENSMUSG00000024528,protein_coding -53446,Lox,ENSMUSG00000024529,protein_coding -53726,Prelid3a,ENSMUSG00000024530,protein_coding -53449,1700034E13Rik,ENSMUSG00000024532,protein_coding -53727,Spire1,ENSMUSG00000024533,protein_coding -53451,Sncaip,ENSMUSG00000024534,protein_coding -53456,Snx24,ENSMUSG00000024535,protein_coding -53730,Psmg2,ENSMUSG00000024537,protein_coding -53457,Ppic,ENSMUSG00000024538,protein_coding -53731,Ptpn2,ENSMUSG00000024539,protein_coding -53735,Cep192,ENSMUSG00000024542,protein_coding -53737,Ldlrad4,ENSMUSG00000024544,protein_coding -53872,Setbp1,ENSMUSG00000024548,protein_coding -53867,Slc14a2,ENSMUSG00000024552,protein_coding -53947,Galr1,ENSMUSG00000024553,protein_coding -53793,Me2,ENSMUSG00000024556,protein_coding -53795,Mapk4,ENSMUSG00000024558,protein_coding -53799,Cxxc1,ENSMUSG00000024560,protein_coding -53801,Mbd1,ENSMUSG00000024561,protein_coding -53839,Smad2,ENSMUSG00000024563,protein_coding -53920,Sall3,ENSMUSG00000024565,protein_coding -53916,Atp9b,ENSMUSG00000024566,protein_coding -53901,Rbfa,ENSMUSG00000024570,protein_coding -53902,Gm16286,ENSMUSG00000024571,protein_coding -53573,Pde6a,ENSMUSG00000024575,protein_coding -53583,Csnk1a1,ENSMUSG00000024576,protein_coding -53588,Il17b,ENSMUSG00000024578,protein_coding -53590,Pcyox1l,ENSMUSG00000024579,protein_coding -53591,Grpel2,ENSMUSG00000024580,protein_coding -53610,Napg,ENSMUSG00000024581,protein_coding -53620,Txnl1,ENSMUSG00000024583,protein_coding -53637,Nars,ENSMUSG00000024587,protein_coding -53636,Fech,ENSMUSG00000024588,protein_coding -53648,Nedd4l,ENSMUSG00000024589,protein_coding -53502,Lmnb1,ENSMUSG00000024590,protein_coding -53506,C330018D20Rik,ENSMUSG00000024592,protein_coding -53509,Megf10,ENSMUSG00000024593,protein_coding -53513,Prrc1,ENSMUSG00000024594,protein_coding -53523,Slc12a2,ENSMUSG00000024597,protein_coding -53524,Fbn2,ENSMUSG00000024598,protein_coding -53527,Slc27a6,ENSMUSG00000024600,protein_coding -53529,Isoc1,ENSMUSG00000024601,protein_coding -53551,Dctn4,ENSMUSG00000024603,protein_coding -53552,Rbm22,ENSMUSG00000024604,protein_coding -53556,Rps14,ENSMUSG00000024608,protein_coding -53558,Cd74,ENSMUSG00000024610,protein_coding -53560,Tcof1,ENSMUSG00000024613,protein_coding -54052,Tmx3,ENSMUSG00000024614,protein_coding -53562,Camk2a,ENSMUSG00000024617,protein_coding -53566,Cdx1,ENSMUSG00000024619,protein_coding -53567,Pdgfrb,ENSMUSG00000024620,protein_coding -53568,Csf1r,ENSMUSG00000024621,protein_coding -53570,Hmgxb3,ENSMUSG00000024622,protein_coding -54666,Gnaq,ENSMUSG00000024639,protein_coding -54661,Psat1,ENSMUSG00000024640,protein_coding -54655,Tle4,ENSMUSG00000024642,protein_coding -53994,Cndp2,ENSMUSG00000024644,protein_coding -54006,Timm21,ENSMUSG00000024645,protein_coding -54003,Cyb5a,ENSMUSG00000024646,protein_coding -54017,Cbln2,ENSMUSG00000024647,protein_coding -54365,Slc22a6,ENSMUSG00000024650,protein_coding -54415,Scgb1a1,ENSMUSG00000024653,protein_coding -54416,Asrgl1,ENSMUSG00000024654,protein_coding -54717,Anxa1,ENSMUSG00000024659,protein_coding -54430,Incenp,ENSMUSG00000024660,protein_coding -54438,Fth1,ENSMUSG00000024661,protein_coding -54440,Rab3il1,ENSMUSG00000024663,protein_coding -54441,Fads3,ENSMUSG00000024664,protein_coding -54442,Fads2,ENSMUSG00000024665,protein_coding -54460,Tmem138,ENSMUSG00000024666,protein_coding -54459,Tmem216,ENSMUSG00000024667,protein_coding -54457,Sdhaf2,ENSMUSG00000024668,protein_coding -54470,Cd5,ENSMUSG00000024669,protein_coding -54472,Cd6,ENSMUSG00000024670,protein_coding -54497,Ms4a7,ENSMUSG00000024672,protein_coding -54494,Ms4a1,ENSMUSG00000024673,protein_coding -54501,Ms4a4c,ENSMUSG00000024675,protein_coding -54508,Ms4a6b,ENSMUSG00000024677,protein_coding -54509,Ms4a4d,ENSMUSG00000024678,protein_coding -54512,Ms4a6d,ENSMUSG00000024679,protein_coding -54514,Ms4a2,ENSMUSG00000024680,protein_coding -54515,Ms4a3,ENSMUSG00000024681,protein_coding -54522,Cblif,ENSMUSG00000024682,protein_coding -54523,Mrpl16,ENSMUSG00000024683,protein_coding -54534,Osbp,ENSMUSG00000024687,protein_coding -54571,Fam111a,ENSMUSG00000024691,protein_coding -54579,Keg1,ENSMUSG00000024694,protein_coding -54581,Zfp91,ENSMUSG00000024695,protein_coding -54583,Lpxn,ENSMUSG00000024696,protein_coding -54673,Gna14,ENSMUSG00000024697,protein_coding -54688,Rfk,ENSMUSG00000024712,protein_coding -54689,Pcsk5,ENSMUSG00000024713,protein_coding -54699,Ostf1,ENSMUSG00000024725,protein_coding -54702,Carnmt1,ENSMUSG00000024726,protein_coding -54706,Trpm6,ENSMUSG00000024727,protein_coding -54490,1700025F22Rik,ENSMUSG00000024728,protein_coding -54488,1700017D01Rik,ENSMUSG00000024729,protein_coding -54486,Ms4a8a,ENSMUSG00000024730,protein_coding -54481,Ms4a10,ENSMUSG00000024731,protein_coding -54480,Ccdc86,ENSMUSG00000024732,protein_coding -54478,Zp1,ENSMUSG00000024734,protein_coding -54477,Prpf19,ENSMUSG00000024735,protein_coding -54475,Tmem132a,ENSMUSG00000024736,protein_coding -54473,Slc15a3,ENSMUSG00000024737,protein_coding -54467,Pga5,ENSMUSG00000024738,protein_coding -54464,Ddb1,ENSMUSG00000024740,protein_coding -54447,Fen1,ENSMUSG00000024742,protein_coding -54452,Syt7,ENSMUSG00000024743,protein_coding -54721,Aldh1a7,ENSMUSG00000024747,protein_coding -54722,Tmc1,ENSMUSG00000024749,protein_coding -54728,Zfand5,ENSMUSG00000024750,protein_coding -54735,Cemip2,ENSMUSG00000024754,protein_coding -54346,Slc22a19,ENSMUSG00000024757,protein_coding -54340,Rtn3,ENSMUSG00000024758,protein_coding -54341,Atl3,ENSMUSG00000024759,protein_coding -54327,Naa40,ENSMUSG00000024764,protein_coding -54909,Lipo3,ENSMUSG00000024766,protein_coding -54325,Otub1,ENSMUSG00000024767,protein_coding -54921,Lipf,ENSMUSG00000024768,protein_coding -54282,Cdc42bpg,ENSMUSG00000024769,protein_coding -54924,Lipn,ENSMUSG00000024770,protein_coding -54922,Lipk,ENSMUSG00000024771,protein_coding -54281,Ehd1,ENSMUSG00000024772,protein_coding -54277,Atg2a,ENSMUSG00000024773,protein_coding -54927,Ankrd22,ENSMUSG00000024774,protein_coding -54928,Stambpl1,ENSMUSG00000024776,protein_coding -54276,Ppp2r5b,ENSMUSG00000024777,protein_coding -54931,Fas,ENSMUSG00000024778,protein_coding -54841,Cdc37l1,ENSMUSG00000024780,protein_coding -54936,Lipa,ENSMUSG00000024781,protein_coding -54843,Ak3,ENSMUSG00000024782,protein_coding -54275,Gpha2,ENSMUSG00000024784,protein_coding -54847,Rcl1,ENSMUSG00000024785,protein_coding -54274,Majin,ENSMUSG00000024786,protein_coding -54269,Snx15,ENSMUSG00000024787,protein_coding -54850,Jak2,ENSMUSG00000024789,protein_coding -54268,Sac3d1,ENSMUSG00000024790,protein_coding -54265,Cdca5,ENSMUSG00000024791,protein_coding -54264,Zfpl1,ENSMUSG00000024792,protein_coding -15500,Tnfrsf25,ENSMUSG00000024793,protein_coding -54953,Kif20b,ENSMUSG00000024795,protein_coding -54261,Vps51,ENSMUSG00000024797,protein_coding -54964,Htr7,ENSMUSG00000024798,protein_coding -54260,Tm7sf2,ENSMUSG00000024799,protein_coding -54965,Rpp30,ENSMUSG00000024800,protein_coding -54966,Ankrd1,ENSMUSG00000024803,protein_coding -54971,Pcgf5,ENSMUSG00000024805,protein_coding -54860,Mlana,ENSMUSG00000024806,protein_coding -54255,Syvn1,ENSMUSG00000024807,protein_coding -54865,Il33,ENSMUSG00000024810,protein_coding -54980,Tnks2,ENSMUSG00000024811,protein_coding -54771,Tjp2,ENSMUSG00000024812,protein_coding -54867,Trpd52l3,ENSMUSG00000024815,protein_coding -54243,Frmd8,ENSMUSG00000024816,protein_coding -54868,Uhrf2,ENSMUSG00000024817,protein_coding -54244,Slc25a45,ENSMUSG00000024818,protein_coding -54124,Rad9a,ENSMUSG00000024824,protein_coding -54246,Dpf2,ENSMUSG00000024826,protein_coding -54869,Gldc,ENSMUSG00000024827,protein_coding -54066,Mrpl21,ENSMUSG00000024829,protein_coding -54117,Rps6kb2,ENSMUSG00000024830,protein_coding -54065,Ighmbp2,ENSMUSG00000024831,protein_coding -54249,Pola2,ENSMUSG00000024833,protein_coding -54115,Coro1b,ENSMUSG00000024835,protein_coding -54795,Dmrt1,ENSMUSG00000024837,protein_coding -54193,Eif1ad,ENSMUSG00000024841,protein_coding -54113,Cabp4,ENSMUSG00000024842,protein_coding -54086,Chka,ENSMUSG00000024843,protein_coding -54192,Banf1,ENSMUSG00000024844,protein_coding -54191,Cst6,ENSMUSG00000024846,protein_coding -54111,Aip,ENSMUSG00000024847,protein_coding -54110,Pitpnm1,ENSMUSG00000024851,protein_coding -54184,Sf3b2,ENSMUSG00000024853,protein_coding -54129,Pold4,ENSMUSG00000024854,protein_coding -54181,Pacs1,ENSMUSG00000024855,protein_coding -54109,Cdk2ap2,ENSMUSG00000024856,protein_coding -54108,Cabp2,ENSMUSG00000024857,protein_coding -54135,Grk2,ENSMUSG00000024858,protein_coding -54178,Klc2,ENSMUSG00000024862,protein_coding -54871,Mbl2,ENSMUSG00000024863,protein_coding -54095,Acy3,ENSMUSG00000024866,protein_coding -54776,Pip5k1b,ENSMUSG00000024867,protein_coding -54877,Dkk1,ENSMUSG00000024868,protein_coding -54099,Gm49405,ENSMUSG00000024869,protein_coding -54177,Rab1b,ENSMUSG00000024870,protein_coding -54101,Doc2g,ENSMUSG00000024871,protein_coding -54176,Cnih2,ENSMUSG00000024873,protein_coding -54175,Yif1a,ENSMUSG00000024875,protein_coding -54786,Cbwd1,ENSMUSG00000024878,protein_coding -54171,Rin1,ENSMUSG00000024883,protein_coding -54090,Aldh3b1,ENSMUSG00000024885,protein_coding -54885,Asah2,ENSMUSG00000024887,protein_coding -54143,Rce1,ENSMUSG00000024889,protein_coding -54167,Slc29a2,ENSMUSG00000024891,protein_coding -54141,Pcx,ENSMUSG00000024892,protein_coding -54892,Minpp1,ENSMUSG00000024896,protein_coding -54765,Apba1,ENSMUSG00000024897,protein_coding -54895,Papss2,ENSMUSG00000024899,protein_coding -54069,Cpt1a,ENSMUSG00000024900,protein_coding -54162,Peli3,ENSMUSG00000024901,protein_coding -54163,Mrpl11,ENSMUSG00000024902,protein_coding -14503,Lao1,ENSMUSG00000024903,protein_coding -54070,Tesmin,ENSMUSG00000024905,protein_coding -54209,Mus81,ENSMUSG00000024906,protein_coding -54071,Gal,ENSMUSG00000024907,protein_coding -54072,Ppp6r3,ENSMUSG00000024908,protein_coding -54208,Efemp2,ENSMUSG00000024909,protein_coding -54207,Ctsw,ENSMUSG00000024910,protein_coding -54206,Fibp,ENSMUSG00000024911,protein_coding -54204,Fosl1,ENSMUSG00000024912,protein_coding -54076,Lrp5,ENSMUSG00000024913,protein_coding -54199,Drap1,ENSMUSG00000024914,protein_coding -54808,Smarca2,ENSMUSG00000024921,protein_coding -54215,Ovol1,ENSMUSG00000024922,protein_coding -54816,Vldlr,ENSMUSG00000024924,protein_coding -54218,Rnaseh2c,ENSMUSG00000024925,protein_coding -54219,Kat5,ENSMUSG00000024926,protein_coding -54220,Rela,ENSMUSG00000024927,protein_coding -54836,Slc1a1,ENSMUSG00000024935,protein_coding -54226,Kcnk7,ENSMUSG00000024936,protein_coding -54227,Ehbp1l1,ENSMUSG00000024937,protein_coding -54229,Fam89b,ENSMUSG00000024939,protein_coding -54232,Ltbp3,ENSMUSG00000024940,protein_coding -54233,Scyl1,ENSMUSG00000024941,protein_coding -54251,Capn1,ENSMUSG00000024942,protein_coding -54751,Smc5,ENSMUSG00000024943,protein_coding -54270,Arl2,ENSMUSG00000024944,protein_coding -54284,Men1,ENSMUSG00000024947,protein_coding -54285,Map4k2,ENSMUSG00000024948,protein_coding -54288,Sf1,ENSMUSG00000024949,protein_coding -54302,Rps6ka4,ENSMUSG00000024952,protein_coding -54305,Prdx5,ENSMUSG00000024953,protein_coding -54307,Esrra,ENSMUSG00000024955,protein_coding -54310,Kcnk4,ENSMUSG00000024957,protein_coding -54311,Gpr137,ENSMUSG00000024958,protein_coding -54312,Bad,ENSMUSG00000024959,protein_coding -54313,Plcb3,ENSMUSG00000024960,protein_coding -54316,Vegfb,ENSMUSG00000024962,protein_coding -54317,Dnajc4,ENSMUSG00000024963,protein_coding -54320,Fermt3,ENSMUSG00000024965,protein_coding -54321,Stip1,ENSMUSG00000024966,protein_coding -54330,Rcor2,ENSMUSG00000024968,protein_coding -54331,Mark2,ENSMUSG00000024969,protein_coding -54333,Spindoc,ENSMUSG00000024970,protein_coding -54343,Lgals12,ENSMUSG00000024972,protein_coding -54344,Hrasls5,ENSMUSG00000024973,protein_coding -55319,Smc3,ENSMUSG00000024974,protein_coding -55324,Pdcd4,ENSMUSG00000024975,protein_coding -55326,Shoc2,ENSMUSG00000024976,protein_coding -55334,Gpam,ENSMUSG00000024978,protein_coding -55336,Tectb,ENSMUSG00000024979,protein_coding -55339,Acsl5,ENSMUSG00000024981,protein_coding -55340,Zdhhc6,ENSMUSG00000024982,protein_coding -55341,Vti1a,ENSMUSG00000024983,protein_coding -55343,Tcf7l2,ENSMUSG00000024985,protein_coding -54995,Hhex,ENSMUSG00000024986,protein_coding -55000,Cyp26a1,ENSMUSG00000024987,protein_coding -55006,Cep55,ENSMUSG00000024989,protein_coding -55009,Rbp4,ENSMUSG00000024990,protein_coding -55428,Eif3a,ENSMUSG00000024991,protein_coding -55010,Pde6c,ENSMUSG00000024992,protein_coding -55432,Fam45a,ENSMUSG00000024993,protein_coding -55435,Prdx3,ENSMUSG00000024997,protein_coding -55020,Plce1,ENSMUSG00000024998,protein_coding -55023,Noc3l,ENSMUSG00000024999,protein_coding -55026,Hells,ENSMUSG00000025001,protein_coding -55028,Cyp2c55,ENSMUSG00000025002,protein_coding -55036,Cyp2c39,ENSMUSG00000025003,protein_coding -55039,Cyp2c40,ENSMUSG00000025004,protein_coding -55050,Sorbs1,ENSMUSG00000025006,protein_coding -55052,Aldh18a1,ENSMUSG00000025007,protein_coding -55055,Tctn3,ENSMUSG00000025008,protein_coding -55061,Ccnj,ENSMUSG00000025010,protein_coding -55068,Tll2,ENSMUSG00000025013,protein_coding -55066,Dntt,ENSMUSG00000025014,protein_coding -55069,Tm9sf3,ENSMUSG00000025016,protein_coding -55070,Pik3ap1,ENSMUSG00000025017,protein_coding -55072,Lcor,ENSMUSG00000025019,protein_coding -55074,Slit1,ENSMUSG00000025020,protein_coding -55310,Smndc1,ENSMUSG00000025024,protein_coding -55307,Mxi1,ENSMUSG00000025025,protein_coding -55303,Add3,ENSMUSG00000025026,protein_coding -55298,Xpnpep1,ENSMUSG00000025027,protein_coding -55222,Trim8,ENSMUSG00000025034,protein_coding -55223,Arl3,ENSMUSG00000025035,protein_coding -55224,Sfxn2,ENSMUSG00000025036,protein_coding -7533,Maoa,ENSMUSG00000025037,protein_coding -7536,Efhc2,ENSMUSG00000025038,protein_coding -7539,Fundc1,ENSMUSG00000025040,protein_coding -55233,Nt5c2,ENSMUSG00000025041,protein_coding -7544,Dusp21,ENSMUSG00000025043,protein_coding -30142,Msr1,ENSMUSG00000025044,protein_coding -55240,Pdcd11,ENSMUSG00000025047,protein_coding -55238,Taf5,ENSMUSG00000025049,protein_coding -55235,Pcgf6,ENSMUSG00000025050,protein_coding -9652,Samt4,ENSMUSG00000025051,protein_coding -8559,Nr0b1,ENSMUSG00000025056,protein_coding -8556,5430427O19Rik,ENSMUSG00000025058,protein_coding -8549,Gk,ENSMUSG00000025059,protein_coding -55253,Slk,ENSMUSG00000025060,protein_coding -55256,Col17a1,ENSMUSG00000025064,protein_coding -55261,Sfr1,ENSMUSG00000025066,protein_coding -55264,Gsto1,ENSMUSG00000025068,protein_coding -55265,Gsto2,ENSMUSG00000025069,protein_coding -55349,Habp2,ENSMUSG00000025075,protein_coding -55352,Casp7,ENSMUSG00000025076,protein_coding -55354,Dclre1a,ENSMUSG00000025077,protein_coding -55355,Nhlrc2,ENSMUSG00000025078,protein_coding -55362,Tdrd1,ENSMUSG00000025081,protein_coding -55363,Vwa2,ENSMUSG00000025082,protein_coding -55364,Afap1l2,ENSMUSG00000025083,protein_coding -55365,Ablim1,ENSMUSG00000025085,protein_coding -55371,Trub1,ENSMUSG00000025086,protein_coding -55382,Gfra1,ENSMUSG00000025089,protein_coding -55384,Ccdc172,ENSMUSG00000025090,protein_coding -55390,Pnliprp2,ENSMUSG00000025091,protein_coding -55394,Hspa12a,ENSMUSG00000025092,protein_coding -55403,Slc18a2,ENSMUSG00000025094,protein_coding -24923,3110040N11Rik,ENSMUSG00000025102,protein_coding -24926,Btbd1,ENSMUSG00000025103,protein_coding -24931,Hdgfl3,ENSMUSG00000025104,protein_coding -24934,Bnc1,ENSMUSG00000025105,protein_coding -40233,Gcgr,ENSMUSG00000025127,protein_coding -10044,Bhlhe22,ENSMUSG00000025128,protein_coding -40235,Ppp1r27,ENSMUSG00000025129,protein_coding -40236,P4hb,ENSMUSG00000025130,protein_coding -40237,Arhgdia,ENSMUSG00000025132,protein_coding -25225,Ints4,ENSMUSG00000025133,protein_coding -40239,Alyref,ENSMUSG00000025134,protein_coding -40240,Anapc11,ENSMUSG00000025135,protein_coding -40242,Pcyt2,ENSMUSG00000025137,protein_coding -40243,Sirt7,ENSMUSG00000025138,protein_coding -26607,Tollip,ENSMUSG00000025139,protein_coding -40247,Pycr1,ENSMUSG00000025140,protein_coding -40248,Myadml2,ENSMUSG00000025141,protein_coding -40252,Aspscr1,ENSMUSG00000025142,protein_coding -40254,Cenpx,ENSMUSG00000025144,protein_coding -40255,Lrrc45,ENSMUSG00000025145,protein_coding -26611,Mob2,ENSMUSG00000025147,protein_coding -40258,Cbr2,ENSMUSG00000025150,protein_coding -8668,Maged1,ENSMUSG00000025151,protein_coding -40264,Fasn,ENSMUSG00000025153,protein_coding -55076,Arhgap19,ENSMUSG00000025154,protein_coding -40263,Dus1l,ENSMUSG00000025155,protein_coding -40262,Gps1,ENSMUSG00000025156,protein_coding -55083,Zdhhc16,ENSMUSG00000025157,protein_coding -40261,Rfng,ENSMUSG00000025158,protein_coding -55084,Mms19,ENSMUSG00000025159,protein_coding -40267,Slc16a3,ENSMUSG00000025161,protein_coding -40268,Csnk1d,ENSMUSG00000025162,protein_coding -40273,Cd7,ENSMUSG00000025163,protein_coding -40275,Sectm1a,ENSMUSG00000025165,protein_coding -40283,Ogfod3,ENSMUSG00000025169,protein_coding -40289,Rab40b,ENSMUSG00000025170,protein_coding -55086,Ubtd1,ENSMUSG00000025171,protein_coding -55087,Ankrd2,ENSMUSG00000025172,protein_coding -40288,Wdr45b,ENSMUSG00000025173,protein_coding -40293,Fn3k,ENSMUSG00000025175,protein_coding -55088,Hoga1,ENSMUSG00000025176,protein_coding -55091,Pi4k2a,ENSMUSG00000025178,protein_coding -55103,R3hcc1l,ENSMUSG00000025184,protein_coding -55105,Loxl4,ENSMUSG00000025185,protein_coding -55109,Hps1,ENSMUSG00000025188,protein_coding -55117,Cnnm1,ENSMUSG00000025189,protein_coding -55119,Got1,ENSMUSG00000025190,protein_coding -55129,Entpd7,ENSMUSG00000025192,protein_coding -55131,Cutc,ENSMUSG00000025193,protein_coding -55132,Abcc2,ENSMUSG00000025194,protein_coding -55133,Dnmbp,ENSMUSG00000025195,protein_coding -55137,Cpn1,ENSMUSG00000025196,protein_coding -55138,Cyp2c23,ENSMUSG00000025197,protein_coding -55139,Erlin1,ENSMUSG00000025198,protein_coding -55140,Chuk,ENSMUSG00000025199,protein_coding -55141,Cwf19l1,ENSMUSG00000025200,protein_coding -55145,Scd3,ENSMUSG00000025202,protein_coding -55149,Scd2,ENSMUSG00000025203,protein_coding -55159,Ndufb8,ENSMUSG00000025204,protein_coding -55172,Sema4g,ENSMUSG00000025207,protein_coding -55173,Mrpl43,ENSMUSG00000025208,protein_coding -55174,Twnk,ENSMUSG00000025209,protein_coding -55177,Sfxn3,ENSMUSG00000025212,protein_coding -55179,Kazald1,ENSMUSG00000025213,protein_coding -55180,Tlx1,ENSMUSG00000025215,protein_coding -55182,Lbx1,ENSMUSG00000025216,protein_coding -55186,Btrc,ENSMUSG00000025217,protein_coding -55188,Poll,ENSMUSG00000025218,protein_coding -55191,Fgf8,ENSMUSG00000025219,protein_coding -55194,Oga,ENSMUSG00000025220,protein_coding -55195,Kcnip2,ENSMUSG00000025221,protein_coding -55201,Ldb1,ENSMUSG00000025223,protein_coding -55207,Gbf1,ENSMUSG00000025224,protein_coding -55211,Nfkb2,ENSMUSG00000025225,protein_coding -55213,Fbxl15,ENSMUSG00000025226,protein_coding -55216,Mfsd13a,ENSMUSG00000025227,protein_coding -55220,Actr1a,ENSMUSG00000025228,protein_coding -55206,Pitx3,ENSMUSG00000025229,protein_coding -55221,Sufu,ENSMUSG00000025231,protein_coding -35755,Hexa,ENSMUSG00000025232,protein_coding -35751,Arih1,ENSMUSG00000025234,protein_coding -35750,Bbs4,ENSMUSG00000025235,protein_coding -35749,Adpgk,ENSMUSG00000025236,protein_coding -35757,Parp6,ENSMUSG00000025237,protein_coding -37263,Limd1,ENSMUSG00000025239,protein_coding -37264,Sacm1l,ENSMUSG00000025240,protein_coding -37276,Fyco1,ENSMUSG00000025241,protein_coding -37265,Slc6a20b,ENSMUSG00000025243,protein_coding -37273,Lztfl1,ENSMUSG00000025245,protein_coding -8477,Tbl1x,ENSMUSG00000025246,protein_coding -9897,Zfhx4,ENSMUSG00000025255,protein_coding -9594,Ribc1,ENSMUSG00000025257,protein_coding -9593,Hsd17b10,ENSMUSG00000025260,protein_coding -9587,Huwe1,ENSMUSG00000025261,protein_coding -9576,Fam120c,ENSMUSG00000025262,protein_coding -9568,Tsr2,ENSMUSG00000025264,protein_coding -9567,Fgd1,ENSMUSG00000025265,protein_coding -9565,Gnl3l,ENSMUSG00000025266,protein_coding -9560,Maged2,ENSMUSG00000025268,protein_coding -9553,Apex2,ENSMUSG00000025269,protein_coding -9554,Alas2,ENSMUSG00000025270,protein_coding -9556,Pfkfb1,ENSMUSG00000025271,protein_coding -9557,Tro,ENSMUSG00000025272,protein_coding -32092,Abhd6,ENSMUSG00000025277,protein_coding -32089,Flnb,ENSMUSG00000025278,protein_coding -32091,Dnase1l3,ENSMUSG00000025279,protein_coding -32354,Polr3a,ENSMUSG00000025280,protein_coding -9673,Sat1,ENSMUSG00000025283,protein_coding -9675,Acot9,ENSMUSG00000025287,protein_coding -8222,4933436I01Rik,ENSMUSG00000025288,protein_coding -9677,Prdx4,ENSMUSG00000025289,protein_coding -32355,Rps24,ENSMUSG00000025290,protein_coding -5364,Ptprj,ENSMUSG00000025314,protein_coding -31692,Banp,ENSMUSG00000025316,protein_coding -31689,Car5a,ENSMUSG00000025317,protein_coding -31680,Jph3,ENSMUSG00000025318,protein_coding -45280,Itgb8,ENSMUSG00000025321,protein_coding -45269,Sp4,ENSMUSG00000025323,protein_coding -24267,Atp10a,ENSMUSG00000025324,protein_coding -24280,Ube3a,ENSMUSG00000025326,protein_coding -15148,Padi3,ENSMUSG00000025328,protein_coding -15150,Padi1,ENSMUSG00000025329,protein_coding -15147,Padi4,ENSMUSG00000025330,protein_coding -9599,Kdm5c,ENSMUSG00000025332,protein_coding -9611,Gpr143,ENSMUSG00000025333,protein_coding -18252,Sbds,ENSMUSG00000025337,protein_coding -18246,Rabgef1,ENSMUSG00000025340,protein_coding -29273,Mettl7b,ENSMUSG00000025347,protein_coding -29271,Itga7,ENSMUSG00000025348,protein_coding -29269,Rdh5,ENSMUSG00000025350,protein_coding -29268,Cd63,ENSMUSG00000025351,protein_coding -29267,Gdf11,ENSMUSG00000025352,protein_coding -29266,Ormdl2,ENSMUSG00000025353,protein_coding -29264,Dnajc14,ENSMUSG00000025354,protein_coding -29262,Mmp19,ENSMUSG00000025355,protein_coding -29258,Dgka,ENSMUSG00000025357,protein_coding -29256,Cdk2,ENSMUSG00000025358,protein_coding -29257,Pmel,ENSMUSG00000025359,protein_coding -29250,Rps26,ENSMUSG00000025362,protein_coding -29244,Pa2g4,ENSMUSG00000025364,protein_coding -29238,Esyt1,ENSMUSG00000025366,protein_coding -29234,Smarcc2,ENSMUSG00000025369,protein_coding -45467,Cdh9,ENSMUSG00000025370,protein_coding -40195,Chmp6,ENSMUSG00000025371,protein_coding -40198,Baiap2,ENSMUSG00000025372,protein_coding -29231,Rnf41,ENSMUSG00000025373,protein_coding -29229,Nabp2,ENSMUSG00000025374,protein_coding -40199,Aatk,ENSMUSG00000025375,protein_coding -40205,Tepsin,ENSMUSG00000025377,protein_coding -40219,Fscn2,ENSMUSG00000025380,protein_coding -29219,Cnpy2,ENSMUSG00000025381,protein_coding -29216,Il23a,ENSMUSG00000025383,protein_coding -40220,Faap100,ENSMUSG00000025384,protein_coding -40223,Pde6g,ENSMUSG00000025386,protein_coding -29210,Mip,ENSMUSG00000025389,protein_coding -29201,Atp5b,ENSMUSG00000025393,protein_coding -29197,Prim1,ENSMUSG00000025395,protein_coding -29195,Hsd17b6,ENSMUSG00000025396,protein_coding -29179,Tac2,ENSMUSG00000025400,protein_coding -29178,Myo1a,ENSMUSG00000025401,protein_coding -29174,Nab2,ENSMUSG00000025402,protein_coding -29168,Shmt2,ENSMUSG00000025403,protein_coding -29164,R3hdm2,ENSMUSG00000025404,protein_coding -29163,Inhbc,ENSMUSG00000025405,protein_coding -29160,Gli1,ENSMUSG00000025407,protein_coding -29155,Ddit3,ENSMUSG00000025408,protein_coding -29152,Mbd6,ENSMUSG00000025409,protein_coding -29151,Dctn2,ENSMUSG00000025410,protein_coding -14195,Ttc4,ENSMUSG00000025413,protein_coding -29149,Pip4k2c,ENSMUSG00000025417,protein_coding -14187,Bsnd,ENSMUSG00000025418,protein_coding -53846,Katnal2,ENSMUSG00000025420,protein_coding -53845,Hdhd2,ENSMUSG00000025421,protein_coding -29140,Agap2,ENSMUSG00000025422,protein_coding -53847,Pias2,ENSMUSG00000025423,protein_coding -53849,St8sia5,ENSMUSG00000025425,protein_coding -53853,Rnf165,ENSMUSG00000025427,protein_coding -53860,Atp5a1,ENSMUSG00000025428,protein_coding -53861,Pstpip2,ENSMUSG00000025429,protein_coding -50183,Crisp1,ENSMUSG00000025431,protein_coding -29131,Avil,ENSMUSG00000025432,protein_coding -50182,Crisp3,ENSMUSG00000025433,protein_coding -29124,Atp23,ENSMUSG00000025436,protein_coding -12648,Usp33,ENSMUSG00000025437,protein_coding -25234,Clns1a,ENSMUSG00000025439,protein_coding -42771,Paip1,ENSMUSG00000025451,protein_coding -42769,Nnt,ENSMUSG00000025453,protein_coding -26507,Cd163l1,ENSMUSG00000025461,protein_coding -26501,Paox,ENSMUSG00000025464,protein_coding -26500,Echs1,ENSMUSG00000025465,protein_coding -26499,Fuom,ENSMUSG00000025466,protein_coding -26498,Prap1,ENSMUSG00000025467,protein_coding -26497,Caly,ENSMUSG00000025468,protein_coding -26496,Msx3,ENSMUSG00000025469,protein_coding -26495,Zfp511,ENSMUSG00000025470,protein_coding -26492,Adam8,ENSMUSG00000025473,protein_coding -26494,Tubgcp2,ENSMUSG00000025474,protein_coding -26483,Adgra1,ENSMUSG00000025475,protein_coding -26476,Inpp5a,ENSMUSG00000025477,protein_coding -26469,Dpysl4,ENSMUSG00000025478,protein_coding -26533,Cyp2e1,ENSMUSG00000025479,protein_coding -26534,Syce1,ENSMUSG00000025480,protein_coding -26538,Urah,ENSMUSG00000025481,protein_coding -26541,Odf3,ENSMUSG00000025482,protein_coding -26542,Bet1l,ENSMUSG00000025484,protein_coding -26543,Ric8a,ENSMUSG00000025485,protein_coding -26544,Sirt3,ENSMUSG00000025486,protein_coding -26545,Psmd13,ENSMUSG00000025487,protein_coding -26547,Cox8b,ENSMUSG00000025488,protein_coding -26551,Ifitm5,ENSMUSG00000025489,protein_coding -26554,Ifitm1,ENSMUSG00000025491,protein_coding -26556,Ifitm3,ENSMUSG00000025492,protein_coding -26562,Sigirr,ENSMUSG00000025494,protein_coding -26565,Ptdss2,ENSMUSG00000025495,protein_coding -26578,Drd4,ENSMUSG00000025496,protein_coding -26576,Cdhr5,ENSMUSG00000025497,protein_coding -26575,Irf7,ENSMUSG00000025498,protein_coding -26569,Hras,ENSMUSG00000025499,protein_coding -26571,Lmntd2,ENSMUSG00000025500,protein_coding -26584,Taldo1,ENSMUSG00000025503,protein_coding -26581,Eps8l2,ENSMUSG00000025504,protein_coding -26580,Tmem80,ENSMUSG00000025505,protein_coding -26589,Pidd1,ENSMUSG00000025507,protein_coding -26590,Rplp2,ENSMUSG00000025508,protein_coding -26592,Pnpla2,ENSMUSG00000025509,protein_coding -26595,Cd151,ENSMUSG00000025510,protein_coding -26597,Tspan4,ENSMUSG00000025511,protein_coding -26598,Chid1,ENSMUSG00000025512,protein_coding -26604,Muc2,ENSMUSG00000025515,protein_coding -30504,Tktl2,ENSMUSG00000025519,protein_coding -30479,Tmem192,ENSMUSG00000025521,protein_coding -9008,Apool,ENSMUSG00000025525,protein_coding -9009,Satl1,ENSMUSG00000025527,protein_coding -9012,2010106E10Rik,ENSMUSG00000025528,protein_coding -9013,Zfp711,ENSMUSG00000025529,protein_coding -9024,Chm,ENSMUSG00000025531,protein_coding -18242,Crcp,ENSMUSG00000025532,protein_coding -18241,Asl,ENSMUSG00000025533,protein_coding -18240,Gusb,ENSMUSG00000025534,protein_coding -18232,Phkg1,ENSMUSG00000025537,protein_coding -18231,Sumf2,ENSMUSG00000025538,protein_coding -34403,Tm9sf2,ENSMUSG00000025544,protein_coding -34406,Clybl,ENSMUSG00000025545,protein_coding -34431,Fgf14,ENSMUSG00000025551,protein_coding -34386,Farp1,ENSMUSG00000025555,protein_coding -34391,Slc15a1,ENSMUSG00000025557,protein_coding -34394,Dock9,ENSMUSG00000025558,protein_coding -40131,Tnrc6c,ENSMUSG00000025571,protein_coding -40132,Tmc6,ENSMUSG00000025572,protein_coding -40136,6030468B19Rik,ENSMUSG00000025573,protein_coding -40139,Tk1,ENSMUSG00000025574,protein_coding -40158,Cant1,ENSMUSG00000025575,protein_coding -40163,Rbfox3,ENSMUSG00000025576,protein_coding -40168,Cbx2,ENSMUSG00000025577,protein_coding -40169,Cbx8,ENSMUSG00000025578,protein_coding -40180,Gaa,ENSMUSG00000025579,protein_coding -40181,Eif4a3,ENSMUSG00000025580,protein_coding -40189,Nptx1,ENSMUSG00000025582,protein_coding -40191,Rptor,ENSMUSG00000025583,protein_coding -24903,Pde8a,ENSMUSG00000025584,protein_coding -24906,Cpeb1,ENSMUSG00000025586,protein_coding -30519,Nat1,ENSMUSG00000025588,protein_coding -30502,Tma16,ENSMUSG00000025591,protein_coding -9027,Dach2,ENSMUSG00000025592,protein_coding -9034,Klhl4,ENSMUSG00000025597,protein_coding -35255,Zfp202,ENSMUSG00000025602,protein_coding -19256,Copg2,ENSMUSG00000025607,protein_coding -19274,Podxl,ENSMUSG00000025608,protein_coding -19273,Mkln1,ENSMUSG00000025609,protein_coding -48501,Map3k7cl,ENSMUSG00000025610,protein_coding -48503,Bach1,ENSMUSG00000025612,protein_coding -48497,Cct8,ENSMUSG00000025613,protein_coding -48496,Usp16,ENSMUSG00000025616,protein_coding -8003,Phf6,ENSMUSG00000025626,protein_coding -8004,Hprt,ENSMUSG00000025630,protein_coding -36902,Ccdc51,ENSMUSG00000025645,protein_coding -36899,Atrip,ENSMUSG00000025646,protein_coding -36898,Shisa5,ENSMUSG00000025647,protein_coding -36897,Pfkfb4,ENSMUSG00000025648,protein_coding -36893,Col7a1,ENSMUSG00000025650,protein_coding -36891,Uqcrc1,ENSMUSG00000025651,protein_coding -36890,Tmem89,ENSMUSG00000025652,protein_coding -8684,Arhgef9,ENSMUSG00000025656,protein_coding -9711,Cnksr2,ENSMUSG00000025658,protein_coding -8997,Rps6ka6,ENSMUSG00000025665,protein_coding -8524,Tmem47,ENSMUSG00000025666,protein_coding -21134,Alox5,ENSMUSG00000025701,protein_coding -21131,March8,ENSMUSG00000025702,protein_coding -3859,Myo3a,ENSMUSG00000025716,protein_coding -24889,Wdr73,ENSMUSG00000025722,protein_coding -24890,Nmb,ENSMUSG00000025723,protein_coding -24891,Sec11a,ENSMUSG00000025724,protein_coding -24900,Slc28a1,ENSMUSG00000025726,protein_coding -49401,A930017K11Rik,ENSMUSG00000025727,protein_coding -49399,Pigq,ENSMUSG00000025728,protein_coding -49396,Rab40c,ENSMUSG00000025730,protein_coding -49394,Mettl26,ENSMUSG00000025731,protein_coding -49393,Mcrip2,ENSMUSG00000025732,protein_coding -49391,Rhot2,ENSMUSG00000025733,protein_coding -49389,Rhbdl1,ENSMUSG00000025735,protein_coding -49386,Jmjd8,ENSMUSG00000025736,protein_coding -49385,Wdr24,ENSMUSG00000025737,protein_coding -49384,Fbxl16,ENSMUSG00000025738,protein_coding -49369,Gng13,ENSMUSG00000025739,protein_coding -9841,Prps2,ENSMUSG00000025742,protein_coding -14828,Sdc3,ENSMUSG00000025743,protein_coding -16132,Hadha,ENSMUSG00000025745,protein_coding -16127,Il6,ENSMUSG00000025746,protein_coding -16128,Tyms,ENSMUSG00000025747,protein_coding -24789,Agbl1,ENSMUSG00000025754,protein_coding -10381,Hspa4l,ENSMUSG00000025757,protein_coding -10382,Plk4,ENSMUSG00000025758,protein_coding -10383,Mfsd8,ENSMUSG00000025759,protein_coding -10391,Larp1b,ENSMUSG00000025762,protein_coding -10404,Jade1,ENSMUSG00000025764,protein_coding -10408,D3Ertd751e,ENSMUSG00000025766,protein_coding -238,Crisp4,ENSMUSG00000025774,protein_coding -230,Crispld1,ENSMUSG00000025776,protein_coding -220,Gdap1,ENSMUSG00000025777,protein_coding -213,Ly96,ENSMUSG00000025779,protein_coding -3556,Itih5,ENSMUSG00000025780,protein_coding -3552,Atp5c1,ENSMUSG00000025781,protein_coding -3550,Taf3,ENSMUSG00000025782,protein_coding -3547,4930412O13Rik,ENSMUSG00000025783,protein_coding -37256,Clec3b,ENSMUSG00000025784,protein_coding -37254,Exosc7,ENSMUSG00000025785,protein_coding -37253,Zdhhc3,ENSMUSG00000025786,protein_coding -37252,Tgm4,ENSMUSG00000025787,protein_coding -24758,St8sia2,ENSMUSG00000025789,protein_coding -24763,Slco3a1,ENSMUSG00000025790,protein_coding -14098,Pgm1,ENSMUSG00000025791,protein_coding -40231,Slc25a10,ENSMUSG00000025792,protein_coding -40227,Hgs,ENSMUSG00000025793,protein_coding -37150,Rpl14,ENSMUSG00000025794,protein_coding -29058,Rassf3,ENSMUSG00000025795,protein_coding -37280,Ccr1,ENSMUSG00000025804,protein_coding -31897,Ccdc7a,ENSMUSG00000025808,protein_coding -31896,Itgb1,ENSMUSG00000025809,protein_coding -31891,Nrp1,ENSMUSG00000025810,protein_coding -31887,Pard3,ENSMUSG00000025812,protein_coding -24918,Homer2,ENSMUSG00000025813,protein_coding -3511,Dhtkd1,ENSMUSG00000025815,protein_coding -3509,Sec61a2,ENSMUSG00000025816,protein_coding -3507,Nudt5,ENSMUSG00000025817,protein_coding -19639,Zfp282,ENSMUSG00000025821,protein_coding -19633,Pdia4,ENSMUSG00000025823,protein_coding -17762,Iscu,ENSMUSG00000025825,protein_coding -5192,Pramel6,ENSMUSG00000025838,protein_coding -5191,Pramel7,ENSMUSG00000025839,protein_coding -18529,Fam20c,ENSMUSG00000025854,protein_coding -18533,Prkar1b,ENSMUSG00000025855,protein_coding -18531,Pdgfa,ENSMUSG00000025856,protein_coding -18535,Dnaaf5,ENSMUSG00000025857,protein_coding -18538,Get4,ENSMUSG00000025858,protein_coding -7882,Xiap,ENSMUSG00000025860,protein_coding -7885,Stag2,ENSMUSG00000025862,protein_coding -41498,Cplx2,ENSMUSG00000025867,protein_coding -41508,Higd2a,ENSMUSG00000025868,protein_coding -41507,Nop16,ENSMUSG00000025869,protein_coding -41505,Arl10,ENSMUSG00000025870,protein_coding -41504,4833439L19Rik,ENSMUSG00000025871,protein_coding -41501,Thoc3,ENSMUSG00000025872,protein_coding -41510,Faf2,ENSMUSG00000025873,protein_coding -41517,Tspan17,ENSMUSG00000025875,protein_coding -41522,Unc5a,ENSMUSG00000025876,protein_coding -41523,Hk3,ENSMUSG00000025877,protein_coding -41524,Uimc1,ENSMUSG00000025878,protein_coding -53828,Smad7,ENSMUSG00000025880,protein_coding -53806,Myo5b,ENSMUSG00000025885,protein_coding -34476,Casp12,ENSMUSG00000025887,protein_coding -34474,Casp1,ENSMUSG00000025888,protein_coding -19958,Snca,ENSMUSG00000025889,protein_coding -34470,Gria4,ENSMUSG00000025892,protein_coding -34467,Kbtbd3,ENSMUSG00000025893,protein_coding -34466,Aasdhppt,ENSMUSG00000025894,protein_coding -34460,Cwf19l2,ENSMUSG00000025898,protein_coding -34457,Alkbh8,ENSMUSG00000025899,protein_coding -16,Rp1,ENSMUSG00000025900,protein_coding -19,Sox17,ENSMUSG00000025902,protein_coding -33,Lypla1,ENSMUSG00000025903,protein_coding -47,Oprk1,ENSMUSG00000025905,protein_coding -53,Rb1cc1,ENSMUSG00000025907,protein_coding -81,Sntg1,ENSMUSG00000025909,protein_coding -102,Adhfe1,ENSMUSG00000025911,protein_coding -107,Mybl1,ENSMUSG00000025912,protein_coding -110,Sgk3,ENSMUSG00000025915,protein_coding -118,Ppp1r42,ENSMUSG00000025916,protein_coding -121,Cops5,ENSMUSG00000025917,protein_coding -205,Stau2,ENSMUSG00000025920,protein_coding -202,Rdh10,ENSMUSG00000025921,protein_coding -195,Terf1,ENSMUSG00000025925,protein_coding -250,Tfap2b,ENSMUSG00000025927,protein_coding -264,Il17a,ENSMUSG00000025929,protein_coding -181,Msc,ENSMUSG00000025930,protein_coding -271,Paqr8,ENSMUSG00000025931,protein_coding -177,Eya1,ENSMUSG00000025932,protein_coding -278,Tmem14a,ENSMUSG00000025933,protein_coding -280,Gsta3,ENSMUSG00000025934,protein_coding -168,Tram1,ENSMUSG00000025935,protein_coding -169,Lactb2,ENSMUSG00000025937,protein_coding -152,Slco5a1,ENSMUSG00000025938,protein_coding -207,Ube2w,ENSMUSG00000025939,protein_coding -212,Tmem70,ENSMUSG00000025940,protein_coding -1074,Crygf,ENSMUSG00000025945,protein_coding -1065,Pth2r,ENSMUSG00000025946,protein_coding -1064,Pikfyve,ENSMUSG00000025949,protein_coding -1063,Idh1,ENSMUSG00000025950,protein_coding -1057,Crygc,ENSMUSG00000025952,protein_coding -1053,Akr1cl,ENSMUSG00000025955,protein_coding -1042,Mettl21a,ENSMUSG00000025956,protein_coding -1041,Creb1,ENSMUSG00000025958,protein_coding -1033,Klf7,ENSMUSG00000025959,protein_coding -1029,4933402D24Rik,ENSMUSG00000025961,protein_coding -1028,Fastkd2,ENSMUSG00000025962,protein_coding -1027,Mdh1b,ENSMUSG00000025963,protein_coding -1022,Adam23,ENSMUSG00000025964,protein_coding -1007,Eef1b2,ENSMUSG00000025967,protein_coding -1005,Ndufs1,ENSMUSG00000025968,protein_coding -994,Nrp2,ENSMUSG00000025969,protein_coding -867,Maip1,ENSMUSG00000025971,protein_coding -843,Boll,ENSMUSG00000025977,protein_coding -840,Rftn2,ENSMUSG00000025978,protein_coding -838,Mob4,ENSMUSG00000025979,protein_coding -834,Hspd1,ENSMUSG00000025980,protein_coding -832,Coq10b,ENSMUSG00000025981,protein_coding -830,Sf3b1,ENSMUSG00000025982,protein_coding -819,Ccdc150,ENSMUSG00000025983,protein_coding -715,Slc39a10,ENSMUSG00000025986,protein_coding -1102,Cps1,ENSMUSG00000025991,protein_coding -705,Slc40a1,ENSMUSG00000025993,protein_coding -700,Wdr75,ENSMUSG00000025995,protein_coding -1125,Ikzf2,ENSMUSG00000025997,protein_coding -1101,Lancl1,ENSMUSG00000026000,protein_coding -1094,Acadl,ENSMUSG00000026003,protein_coding -1085,Kansl1l,ENSMUSG00000026004,protein_coding -1084,Rpe,ENSMUSG00000026005,protein_coding -966,Icos,ENSMUSG00000026009,protein_coding -963,Ctla4,ENSMUSG00000026011,protein_coding -956,Cd28,ENSMUSG00000026012,protein_coding -948,Raph1,ENSMUSG00000026014,protein_coding -941,Carf,ENSMUSG00000026017,protein_coding -939,Ica1l,ENSMUSG00000026018,protein_coding -940,Wdr12,ENSMUSG00000026019,protein_coding -928,Nop58,ENSMUSG00000026020,protein_coding -924,Sumo1,ENSMUSG00000026021,protein_coding -916,Cdk15,ENSMUSG00000026023,protein_coding -915,Als2,ENSMUSG00000026024,protein_coding -905,Stradb,ENSMUSG00000026027,protein_coding -904,Trak2,ENSMUSG00000026028,protein_coding -901,Casp8,ENSMUSG00000026029,protein_coding -899,Cflar,ENSMUSG00000026031,protein_coding -892,Ndufb3,ENSMUSG00000026032,protein_coding -885,Clk1,ENSMUSG00000026034,protein_coding -886,Ppil3,ENSMUSG00000026035,protein_coding -887,Nif3l1,ENSMUSG00000026036,protein_coding -888,Orc2,ENSMUSG00000026037,protein_coding -876,Sgo2a,ENSMUSG00000026039,protein_coding -691,Col5a2,ENSMUSG00000026042,protein_coding -689,Col3a1,ENSMUSG00000026043,protein_coding -672,Kdelc1,ENSMUSG00000026047,protein_coding -674,Ercc5,ENSMUSG00000026048,protein_coding -670,Tex30,ENSMUSG00000026049,protein_coding -656,1500015O10Rik,ENSMUSG00000026051,protein_coding -411,Khdrbs2,ENSMUSG00000026058,protein_coding -618,Slc9a2,ENSMUSG00000026062,protein_coding -402,Pih1d3,ENSMUSG00000026063,protein_coding -393,Ptp4a1,ENSMUSG00000026064,protein_coding -616,Slc9a4,ENSMUSG00000026065,protein_coding -614,Il18rap,ENSMUSG00000026068,protein_coding -612,Il1rl1,ENSMUSG00000026069,protein_coding -613,Il18r1,ENSMUSG00000026070,protein_coding -608,Il1r1,ENSMUSG00000026072,protein_coding -605,Il1r2,ENSMUSG00000026073,protein_coding -603,Map4k4,ENSMUSG00000026074,protein_coding -584,Npas2,ENSMUSG00000026077,protein_coding -579,Pdcl3,ENSMUSG00000026078,protein_coding -575,Chst10,ENSMUSG00000026080,protein_coding -564,Rev1,ENSMUSG00000026082,protein_coding -563,Eif5b,ENSMUSG00000026083,protein_coding -560,Lyg1,ENSMUSG00000026085,protein_coding -557,Mrpl30,ENSMUSG00000026087,protein_coding -556,Mitd1,ENSMUSG00000026088,protein_coding -549,2010300C02Rik,ENSMUSG00000026090,protein_coding -812,Stk17b,ENSMUSG00000026094,protein_coding -805,Asnsd1,ENSMUSG00000026095,protein_coding -802,Osgepl1,ENSMUSG00000026096,protein_coding -800,Ormdl1,ENSMUSG00000026097,protein_coding -798,Pms1,ENSMUSG00000026098,protein_coding -795,Mstn,ENSMUSG00000026100,protein_coding -791,Inpp1,ENSMUSG00000026102,protein_coding -777,Gls,ENSMUSG00000026103,protein_coding -775,Stat1,ENSMUSG00000026104,protein_coding -761,Nabp1,ENSMUSG00000026107,protein_coding -751,Tmeff2,ENSMUSG00000026109,protein_coding -546,Mgat4a,ENSMUSG00000026110,protein_coding -545,Unc50,ENSMUSG00000026111,protein_coding -544,Coa5,ENSMUSG00000026112,protein_coding -541,Inpp4a,ENSMUSG00000026113,protein_coding -539,Cnga3,ENSMUSG00000026114,protein_coding -531,Tmem131,ENSMUSG00000026116,protein_coding -529,Zap70,ENSMUSG00000026117,protein_coding -520,Sema4c,ENSMUSG00000026121,protein_coding -485,Plekhb2,ENSMUSG00000026123,protein_coding -469,Cfc1,ENSMUSG00000026124,protein_coding -467,Prss39,ENSMUSG00000026125,protein_coding -464,Ptpn18,ENSMUSG00000026126,protein_coding -463,Imp4,ENSMUSG00000026127,protein_coding -447,Dst,ENSMUSG00000026131,protein_coding -434,Prim2,ENSMUSG00000026134,protein_coding -1213,Zfp142,ENSMUSG00000026135,protein_coding -323,Col19a1,ENSMUSG00000026141,protein_coding -1346,Rhbdd1,ENSMUSG00000026142,protein_coding -322,Col9a1,ENSMUSG00000026147,protein_coding -1356,Tm4sf20,ENSMUSG00000026149,protein_coding -1354,Mff,ENSMUSG00000026150,protein_coding -319,Fam135a,ENSMUSG00000026153,protein_coding -317,Sdhaf4,ENSMUSG00000026154,protein_coding -316,Smap1,ENSMUSG00000026155,protein_coding -315,B3gat2,ENSMUSG00000026156,protein_coding -312,Ogfrl1,ENSMUSG00000026158,protein_coding -1363,Agfg1,ENSMUSG00000026159,protein_coding -1234,Nhej1,ENSMUSG00000026162,protein_coding -1384,Sphkap,ENSMUSG00000026163,protein_coding -1379,Ccl20,ENSMUSG00000026166,protein_coding -1224,Wnt10a,ENSMUSG00000026167,protein_coding -1219,Cyp27a1,ENSMUSG00000026170,protein_coding -1216,Rnf25,ENSMUSG00000026171,protein_coding -1215,Bcs1l,ENSMUSG00000026172,protein_coding -1212,Plcd4,ENSMUSG00000026173,protein_coding -1211,Cnot9,ENSMUSG00000026174,protein_coding -1208,Vil1,ENSMUSG00000026175,protein_coding -1206,Ctdsp1,ENSMUSG00000026176,protein_coding -1205,Slc11a1,ENSMUSG00000026177,protein_coding -1199,Pnkd,ENSMUSG00000026179,protein_coding -1191,Cxcr2,ENSMUSG00000026180,protein_coding -47412,Ppm1f,ENSMUSG00000026181,protein_coding -1175,Tnp1,ENSMUSG00000026182,protein_coding -1173,Igfbp5,ENSMUSG00000026185,protein_coding -1163,Xrcc5,ENSMUSG00000026187,protein_coding -1162,Tmem169,ENSMUSG00000026188,protein_coding -1161,Pecr,ENSMUSG00000026189,protein_coding -1142,Atic,ENSMUSG00000026192,protein_coding -1143,Fn1,ENSMUSG00000026193,protein_coding -1137,Bard1,ENSMUSG00000026196,protein_coding -1238,Zfand2b,ENSMUSG00000026197,protein_coding -1239,Abcb6,ENSMUSG00000026198,protein_coding -1242,Ankzf1,ENSMUSG00000026199,protein_coding -1243,Glb1l,ENSMUSG00000026200,protein_coding -1245,Stk16,ENSMUSG00000026201,protein_coding -1246,Tuba4a,ENSMUSG00000026202,protein_coding -1249,Dnajb2,ENSMUSG00000026203,protein_coding -1250,Ptprn,ENSMUSG00000026204,protein_coding -1235,Slc23a3,ENSMUSG00000026205,protein_coding -1257,Speg,ENSMUSG00000026207,protein_coding -1255,Des,ENSMUSG00000026208,protein_coding -1254,Dnpep,ENSMUSG00000026209,protein_coding -1263,Obsl1,ENSMUSG00000026211,protein_coding -1267,Stk11ip,ENSMUSG00000026213,protein_coding -1399,Trip12,ENSMUSG00000026219,protein_coding -1403,Slc16a14,ENSMUSG00000026220,protein_coding -1433,Sp100,ENSMUSG00000026222,protein_coding -1441,Itm2c,ENSMUSG00000026223,protein_coding -1442,4933407L21Rik,ENSMUSG00000026224,protein_coding -1447,Spata3,ENSMUSG00000026226,protein_coding -1448,2810459M11Rik,ENSMUSG00000026227,protein_coding -1451,Htr2b,ENSMUSG00000026228,protein_coding -1449,Psmd1,ENSMUSG00000026229,protein_coding -1460,Ncl,ENSMUSG00000026234,protein_coding -1281,Epha4,ENSMUSG00000026235,protein_coding -1467,Nmur1,ENSMUSG00000026237,protein_coding -1470,Ptma,ENSMUSG00000026238,protein_coding -1471,Pde6d,ENSMUSG00000026239,protein_coding -1473,Cops7b,ENSMUSG00000026240,protein_coding -1476,Nppc,ENSMUSG00000026241,protein_coding -1292,Farsb,ENSMUSG00000026245,protein_coding -1492,Alppl2,ENSMUSG00000026246,protein_coding -1497,Ecel1,ENSMUSG00000026247,protein_coding -1313,Mrpl44,ENSMUSG00000026248,protein_coding -1314,Serpine2,ENSMUSG00000026249,protein_coding -1499,Chrnd,ENSMUSG00000026251,protein_coding -1500,Chrng,ENSMUSG00000026253,protein_coding -1501,Eif4e2,ENSMUSG00000026254,protein_coding -1503,Efhd1,ENSMUSG00000026255,protein_coding -1509,Snorc,ENSMUSG00000026258,protein_coding -1510,Ngef,ENSMUSG00000026259,protein_coding -1629,Ndufa10,ENSMUSG00000026260,protein_coding -1654,Rnpepl1,ENSMUSG00000026269,protein_coding -1656,Capn10,ENSMUSG00000026270,protein_coding -1657,Gpr35,ENSMUSG00000026271,protein_coding -1662,Agxt,ENSMUSG00000026272,protein_coding -1667,Mterf4,ENSMUSG00000026273,protein_coding -1670,Pask,ENSMUSG00000026274,protein_coding -1671,Ppp1r7,ENSMUSG00000026275,protein_coding -1678,Sept2,ENSMUSG00000026276,protein_coding -1682,Stk25,ENSMUSG00000026277,protein_coding -1684,Bok,ENSMUSG00000026278,protein_coding -1685,Thap4,ENSMUSG00000026279,protein_coding -1687,Atg4b,ENSMUSG00000026280,protein_coding -1688,Dtymk,ENSMUSG00000026281,protein_coding -1689,Ing5,ENSMUSG00000026283,protein_coding -1698,Pdcd1,ENSMUSG00000026285,protein_coding -1514,Inpp5d,ENSMUSG00000026288,protein_coding -1515,Atg16l1,ENSMUSG00000026289,protein_coding -1550,Spp2,ENSMUSG00000026295,protein_coding -1579,Iqca,ENSMUSG00000026301,protein_coding -1593,Mlph,ENSMUSG00000026303,protein_coding -1595,Rab17,ENSMUSG00000026304,protein_coding -1597,Lrrfip1,ENSMUSG00000026305,protein_coding -1604,Scly,ENSMUSG00000026307,protein_coding -1607,Klhl30,ENSMUSG00000026308,protein_coding -1609,Ilkap,ENSMUSG00000026309,protein_coding -1617,Asb1,ENSMUSG00000026311,protein_coding -1859,Cdh7,ENSMUSG00000026312,protein_coding -1623,Hdac4,ENSMUSG00000026313,protein_coding -1843,Serpinb8,ENSMUSG00000026315,protein_coding -29650,Cln8,ENSMUSG00000026317,protein_coding -1802,Relch,ENSMUSG00000026319,protein_coding -1805,Tnfrsf11a,ENSMUSG00000026321,protein_coding -53600,Htr4,ENSMUSG00000026322,protein_coding -1837,Serpinb11,ENSMUSG00000026327,protein_coding -1728,Slco6c1,ENSMUSG00000026331,protein_coding -1734,Gin1,ENSMUSG00000026333,protein_coding -1735,Pam,ENSMUSG00000026335,protein_coding -1745,Slco6d1,ENSMUSG00000026336,protein_coding -1974,Ccdc93,ENSMUSG00000026339,protein_coding -2012,Actr3,ENSMUSG00000026341,protein_coding -2014,Slc35f5,ENSMUSG00000026342,protein_coding -2016,Gpr39,ENSMUSG00000026343,protein_coding -2017,Lypd1,ENSMUSG00000026344,protein_coding -2032,Tmem163,ENSMUSG00000026347,protein_coding -2033,Acmsd,ENSMUSG00000026348,protein_coding -2035,Ccnt2,ENSMUSG00000026349,protein_coding -2050,Ubxn4,ENSMUSG00000026353,protein_coding -2051,Lct,ENSMUSG00000026354,protein_coding -2052,Mcm6,ENSMUSG00000026355,protein_coding -2053,Dars,ENSMUSG00000026356,protein_coding -2394,Rgs18,ENSMUSG00000026357,protein_coding -2390,Rgs1,ENSMUSG00000026358,protein_coding -2387,Rgs2,ENSMUSG00000026360,protein_coding -2379,Cdc73,ENSMUSG00000026361,protein_coding -2363,Cfh,ENSMUSG00000026365,protein_coding -2350,F13b,ENSMUSG00000026368,protein_coding -1912,Tsn,ENSMUSG00000026374,protein_coding -1913,Nifk,ENSMUSG00000026377,protein_coding -1920,Tfcp2l1,ENSMUSG00000026380,protein_coding -1939,Epb41l5,ENSMUSG00000026383,protein_coding -1941,Ptpn4,ENSMUSG00000026384,protein_coding -1954,Dbi,ENSMUSG00000026385,protein_coding -1952,Sctr,ENSMUSG00000026387,protein_coding -1955,3110009E18Rik,ENSMUSG00000026388,protein_coding -1956,Steap3,ENSMUSG00000026389,protein_coding -1959,Marco,ENSMUSG00000026390,protein_coding -2323,Nek7,ENSMUSG00000026393,protein_coding -2321,Atp6v1g3,ENSMUSG00000026394,protein_coding -2316,Ptprc,ENSMUSG00000026395,protein_coding -2292,Nr5a2,ENSMUSG00000026398,protein_coding -2077,Cd55,ENSMUSG00000026399,protein_coding -2074,Cd55b,ENSMUSG00000026401,protein_coding -2276,Ddx59,ENSMUSG00000026404,protein_coding -2083,C4bp,ENSMUSG00000026405,protein_coding -2262,Cacna1s,ENSMUSG00000026407,protein_coding -2086,Pfkfb2,ENSMUSG00000026409,protein_coding -2260,Tmem9,ENSMUSG00000026411,protein_coding -2257,Pkp1,ENSMUSG00000026413,protein_coding -2255,Tnnt2,ENSMUSG00000026414,protein_coding -2092,Fcamr,ENSMUSG00000026415,protein_coding -2097,Il20,ENSMUSG00000026416,protein_coding -2094,Pigr,ENSMUSG00000026417,protein_coding -2253,Tnni1,ENSMUSG00000026418,protein_coding -2096,Il24,ENSMUSG00000026420,protein_coding -2251,Csrp1,ENSMUSG00000026421,protein_coding -2235,Gpr37l1,ENSMUSG00000026424,protein_coding -2111,Srgap2,ENSMUSG00000026425,protein_coding -2232,Arl8a,ENSMUSG00000026426,protein_coding -2105,Eif2d,ENSMUSG00000026427,protein_coding -2227,Ube2t,ENSMUSG00000026429,protein_coding -2106,Rassf5,ENSMUSG00000026430,protein_coding -2118,Avpr1b,ENSMUSG00000026432,protein_coding -2125,Rab29,ENSMUSG00000026433,protein_coding -2127,Nucks1,ENSMUSG00000026434,protein_coding -2131,Slc45a3,ENSMUSG00000026435,protein_coding -2132,Elk4,ENSMUSG00000026436,protein_coding -2137,Cdk18,ENSMUSG00000026437,protein_coding -2153,Rbbp5,ENSMUSG00000026439,protein_coding -2156,Nfasc,ENSMUSG00000026442,protein_coding -2159,Lrrn2,ENSMUSG00000026443,protein_coding -2164,Pik3c2b,ENSMUSG00000026447,protein_coding -2200,Chit1,ENSMUSG00000026450,protein_coding -2221,Syt2,ENSMUSG00000026452,protein_coding -2213,Klhl12,ENSMUSG00000026455,protein_coding -2209,Cyb5r1,ENSMUSG00000026456,protein_coding -2210,Adipor1,ENSMUSG00000026457,protein_coding -2206,Ppfia4,ENSMUSG00000026458,protein_coding -2205,Myog,ENSMUSG00000026459,protein_coding -2186,Atp2b4,ENSMUSG00000026463,protein_coding -2560,Tor1aip1,ENSMUSG00000026466,protein_coding -2549,Lhx4,ENSMUSG00000026468,protein_coding -2545,Xpr1,ENSMUSG00000026469,protein_coding -2541,Stx6,ENSMUSG00000026470,protein_coding -2540,Mr1,ENSMUSG00000026471,protein_coding -2524,Glul,ENSMUSG00000026473,protein_coding -2514,Rgs16,ENSMUSG00000026475,protein_coding -2500,Lamc1,ENSMUSG00000026478,protein_coding -2499,Lamc2,ENSMUSG00000026479,protein_coding -2492,Ncf2,ENSMUSG00000026480,protein_coding -2486,Rgl1,ENSMUSG00000026482,protein_coding -2475,Fam129a,ENSMUSG00000026483,protein_coding -2470,Rnf2,ENSMUSG00000026484,protein_coding -3134,Coq8a,ENSMUSG00000026489,protein_coding -3131,Cdc42bpa,ENSMUSG00000026490,protein_coding -3128,Ahctf1,ENSMUSG00000026491,protein_coding -3123,Tfb2m,ENSMUSG00000026492,protein_coding -3117,Kif26b,ENSMUSG00000026494,protein_coding -3116,Efcab2,ENSMUSG00000026495,protein_coding -3146,Parp1,ENSMUSG00000026496,protein_coding -3149,Mixl1,ENSMUSG00000026497,protein_coding -3151,Acbd3,ENSMUSG00000026499,protein_coding -3111,Cox20,ENSMUSG00000026500,protein_coding -3106,Desi2,ENSMUSG00000026502,protein_coding -3075,Sdccag8,ENSMUSG00000026504,protein_coding -3207,Capn2,ENSMUSG00000026509,protein_coding -3206,Trp53bp2,ENSMUSG00000026510,protein_coding -3198,Srp9,ENSMUSG00000026511,protein_coding -3181,Cnih3,ENSMUSG00000026514,protein_coding -3174,Nvl,ENSMUSG00000026516,protein_coding -3164,Tmem63a,ENSMUSG00000026519,protein_coding -3162,Pycr2,ENSMUSG00000026520,protein_coding -3053,Wdr64,ENSMUSG00000026523,protein_coding -3051,Opn3,ENSMUSG00000026525,protein_coding -3049,Fh1,ENSMUSG00000026526,protein_coding -3046,Rgs7,ENSMUSG00000026527,protein_coding -3038,Mptx1,ENSMUSG00000026531,protein_coding -3036,Spta1,ENSMUSG00000026532,protein_coding -3019,Ifi202b,ENSMUSG00000026535,protein_coding -3017,Ifi211,ENSMUSG00000026536,protein_coding -2985,Apcs,ENSMUSG00000026542,protein_coding -2978,Dusp23,ENSMUSG00000026544,protein_coding -2973,Cfap45,ENSMUSG00000026546,protein_coding -2971,Tagln2,ENSMUSG00000026547,protein_coding -2969,Slamf9,ENSMUSG00000026548,protein_coding -2950,Copa,ENSMUSG00000026553,protein_coding -2955,Dcaf8,ENSMUSG00000026554,protein_coding -2947,Vangl2,ENSMUSG00000026556,protein_coding -2827,Uck2,ENSMUSG00000026558,protein_coding -2818,Fmo9,ENSMUSG00000026560,protein_coding -2811,Tada1,ENSMUSG00000026563,protein_coding -2805,Dusp27,ENSMUSG00000026564,protein_coding -2800,Pou2f1,ENSMUSG00000026565,protein_coding -2784,Mpzl1,ENSMUSG00000026566,protein_coding -2778,Adcy10,ENSMUSG00000026567,protein_coding -2776,Mpc2,ENSMUSG00000026568,protein_coding -2772,Dcaf6,ENSMUSG00000026571,protein_coding -2766,Tbx19,ENSMUSG00000026572,protein_coding -2762,Xcl1,ENSMUSG00000026573,protein_coding -2758,Dpt,ENSMUSG00000026574,protein_coding -2747,Nme7,ENSMUSG00000026575,protein_coding -2749,Atp1b1,ENSMUSG00000026576,protein_coding -2746,Blzf1,ENSMUSG00000026577,protein_coding -2745,Ccdc181,ENSMUSG00000026578,protein_coding -2742,F5,ENSMUSG00000026579,protein_coding -2739,Selp,ENSMUSG00000026580,protein_coding -2738,Sell,ENSMUSG00000026581,protein_coding -2737,Sele,ENSMUSG00000026582,protein_coding -2733,Scyl3,ENSMUSG00000026584,protein_coding -2732,Kifap3,ENSMUSG00000026585,protein_coding -2722,Prrx1,ENSMUSG00000026586,protein_coding -2605,Astn1,ENSMUSG00000026587,protein_coding -2598,Sec16b,ENSMUSG00000026589,protein_coding -2588,Tex35,ENSMUSG00000026592,protein_coding -2580,Ralgps2,ENSMUSG00000026594,protein_coding -2574,Abl2,ENSMUSG00000026596,protein_coding -2571,Soat1,ENSMUSG00000026600,protein_coding -2569,Axdnd1,ENSMUSG00000026601,protein_coding -2568,Nphs2,ENSMUSG00000026602,protein_coding -3328,Smyd2,ENSMUSG00000026603,protein_coding -3324,Ptpn14,ENSMUSG00000026604,protein_coding -3322,Cenpf,ENSMUSG00000026605,protein_coding -3313,Kctd3,ENSMUSG00000026608,protein_coding -3310,Ush2a,ENSMUSG00000026609,protein_coding -3304,Esrrg,ENSMUSG00000026610,protein_coding -3294,Spata17,ENSMUSG00000026611,protein_coding -3272,Slc30a10,ENSMUSG00000026614,protein_coding -3269,Eprs,ENSMUSG00000026615,protein_coding -3441,Cr2,ENSMUSG00000026616,protein_coding -3267,Bpnt1,ENSMUSG00000026617,protein_coding -3262,Iars2,ENSMUSG00000026618,protein_coding -3253,Mark1,ENSMUSG00000026620,protein_coding -3248,Marc1,ENSMUSG00000026621,protein_coding -3374,Nek2,ENSMUSG00000026622,protein_coding -3369,Lpgat1,ENSMUSG00000026623,protein_coding -3360,Ppp2r5a,ENSMUSG00000026626,protein_coding -3359,Tmem206,ENSMUSG00000026627,protein_coding -3353,Atf3,ENSMUSG00000026628,protein_coding -3350,Batf3,ENSMUSG00000026630,protein_coding -3347,Tatdn3,ENSMUSG00000026632,protein_coding -3339,Angel2,ENSMUSG00000026634,protein_coding -3380,Traf5,ENSMUSG00000026637,protein_coding -3401,Irf6,ENSMUSG00000026638,protein_coding -3406,Lamb3,ENSMUSG00000026639,protein_coding -3420,Plxna2,ENSMUSG00000026640,protein_coding -2918,Usf1,ENSMUSG00000026641,protein_coding -3450,Nmt2,ENSMUSG00000026643,protein_coding -3453,Acbd7,ENSMUSG00000026644,protein_coding -3454,Olah,ENSMUSG00000026645,protein_coding -3458,Suv39h2,ENSMUSG00000026646,protein_coding -3457,Dclre1c,ENSMUSG00000026648,protein_coding -2893,Cfap126,ENSMUSG00000026649,protein_coding -3456,Meig1,ENSMUSG00000026650,protein_coding -3465,Fam107b,ENSMUSG00000026655,protein_coding -2884,Fcgr2b,ENSMUSG00000026656,protein_coding -3471,Frmd4a,ENSMUSG00000026657,protein_coding -2878,Dusp12,ENSMUSG00000026659,protein_coding -3490,Sephs1,ENSMUSG00000026662,protein_coding -2874,Atf6,ENSMUSG00000026663,protein_coding -3491,Phyh,ENSMUSG00000026664,protein_coding -2864,Uhmk1,ENSMUSG00000026667,protein_coding -3495,Ucma,ENSMUSG00000026668,protein_coding -3496,Mcm10,ENSMUSG00000026669,protein_coding -2861,Uap1,ENSMUSG00000026670,protein_coding -3497,Optn,ENSMUSG00000026672,protein_coding -2860,Ddr2,ENSMUSG00000026674,protein_coding -2858,Hsd17b7,ENSMUSG00000026675,protein_coding -3500,Ccdc3,ENSMUSG00000026676,protein_coding -2852,Rgs5,ENSMUSG00000026678,protein_coding -3850,Enkur,ENSMUSG00000026679,protein_coding -2849,Nuf2,ENSMUSG00000026683,protein_coding -2839,Lmx1a,ENSMUSG00000026686,protein_coding -2835,Aldh9a1,ENSMUSG00000026687,protein_coding -2836,Mgst3,ENSMUSG00000026688,protein_coding -2716,Fmo3,ENSMUSG00000026691,protein_coding -2712,Fmo4,ENSMUSG00000026692,protein_coding -2705,Eef1aknmt,ENSMUSG00000026694,protein_coding -2706,Vamp4,ENSMUSG00000026696,protein_coding -2708,Myoc,ENSMUSG00000026697,protein_coding -2695,Pigc,ENSMUSG00000026698,protein_coding -2683,Tnfsf4,ENSMUSG00000026700,protein_coding -2681,Prdx6,ENSMUSG00000026701,protein_coding -2677,Klhl20,ENSMUSG00000026705,protein_coding -3768,Nsun6,ENSMUSG00000026707,protein_coding -2676,Cenpl,ENSMUSG00000026708,protein_coding -2674,Dars2,ENSMUSG00000026709,protein_coding -3747,Mrc1,ENSMUSG00000026712,protein_coding -2658,Serpinc1,ENSMUSG00000026715,protein_coding -3743,Stam,ENSMUSG00000026718,protein_coding -2635,Rabgap1l,ENSMUSG00000026721,protein_coding -3735,Trdmt1,ENSMUSG00000026723,protein_coding -2631,Tnn,ENSMUSG00000026725,protein_coding -3734,Cubn,ENSMUSG00000026726,protein_coding -3728,Rsu1,ENSMUSG00000026727,protein_coding -3736,Vim,ENSMUSG00000026728,protein_coding -3722,Pter,ENSMUSG00000026730,protein_coding -3824,4921504E06Rik,ENSMUSG00000026734,protein_coding -3822,Ptf1a,ENSMUSG00000026735,protein_coding -3813,Pip4k2a,ENSMUSG00000026737,protein_coding -3808,Bmi1,ENSMUSG00000026739,protein_coding -3801,Dnajc1,ENSMUSG00000026740,protein_coding -3796,Mllt10,ENSMUSG00000026743,protein_coding -3781,Plxdc2,ENSMUSG00000026748,protein_coding -4413,Nek6,ENSMUSG00000026749,protein_coding -4416,Psmb7,ENSMUSG00000026750,protein_coding -4421,Nr5a1,ENSMUSG00000026751,protein_coding -4444,Ppp6c,ENSMUSG00000026753,protein_coding -4438,Golga1,ENSMUSG00000026754,protein_coding -4437,Arpc5l,ENSMUSG00000026755,protein_coding -4503,Orc4,ENSMUSG00000026761,protein_coding -4510,Kif5c,ENSMUSG00000026764,protein_coding -4512,Lypd6b,ENSMUSG00000026765,protein_coding -4516,Mmadhc,ENSMUSG00000026766,protein_coding -3716,Mindy3,ENSMUSG00000026767,protein_coding -3713,Itga8,ENSMUSG00000026768,protein_coding -3706,Il2ra,ENSMUSG00000026770,protein_coding -3885,Spopl,ENSMUSG00000026771,protein_coding -3700,Pfkfb3,ENSMUSG00000026773,protein_coding -3881,4931423N10Rik,ENSMUSG00000026774,protein_coding -3879,Yme1l1,ENSMUSG00000026775,protein_coding -3687,Prkcq,ENSMUSG00000026778,protein_coding -3878,Mastl,ENSMUSG00000026779,protein_coding -3877,Acbd5,ENSMUSG00000026781,protein_coding -947,Abi2,ENSMUSG00000026782,protein_coding -3872,Pdss1,ENSMUSG00000026784,protein_coding -4137,Pkn3,ENSMUSG00000026785,protein_coding -3870,Apbb1ip,ENSMUSG00000026786,protein_coding -3864,Gad2,ENSMUSG00000026787,protein_coding -4272,Zbtb43,ENSMUSG00000026788,protein_coding -4130,Odf2,ENSMUSG00000026790,protein_coding -4261,Slc2a8,ENSMUSG00000026791,protein_coding -4258,Lrsam1,ENSMUSG00000026792,protein_coding -4257,Fam129b,ENSMUSG00000026796,protein_coding -4253,Stxbp1,ENSMUSG00000026797,protein_coding -4123,Coq4,ENSMUSG00000026798,protein_coding -4113,Med27,ENSMUSG00000026799,protein_coding -4107,Ttf1,ENSMUSG00000026803,protein_coding -4104,Barhl1,ENSMUSG00000026805,protein_coding -4103,Ddx31,ENSMUSG00000026806,protein_coding -4098,Ak8,ENSMUSG00000026807,protein_coding -4097,Spaca9,ENSMUSG00000026809,protein_coding -4231,Dpm2,ENSMUSG00000026810,protein_coding -4236,St6galnac6,ENSMUSG00000026811,protein_coding -4096,Tsc1,ENSMUSG00000026812,protein_coding -4238,Eng,ENSMUSG00000026814,protein_coding -4095,Gfi1b,ENSMUSG00000026815,protein_coding -4092,Gtf3c5,ENSMUSG00000026816,protein_coding -4237,Ak1,ENSMUSG00000026817,protein_coding -4090,Cel,ENSMUSG00000026818,protein_coding -4226,Slc25a25,ENSMUSG00000026819,protein_coding -4224,Ptges2,ENSMUSG00000026820,protein_coding -4089,Ralgds,ENSMUSG00000026821,protein_coding -4223,Lcn2,ENSMUSG00000026822,protein_coding -4574,Kcnj3,ENSMUSG00000026824,protein_coding -4218,Dnm1,ENSMUSG00000026825,protein_coding -4580,Nr4a2,ENSMUSG00000026826,protein_coding -4585,Gpd2,ENSMUSG00000026827,protein_coding -4592,Galnt5,ENSMUSG00000026828,protein_coding -4087,Gbgt1,ENSMUSG00000026829,protein_coding -4593,Ermn,ENSMUSG00000026830,protein_coding -4084,1700007K13Rik,ENSMUSG00000026831,protein_coding -4595,Cytip,ENSMUSG00000026832,protein_coding -4081,Olfm1,ENSMUSG00000026833,protein_coding -4597,Acvr1c,ENSMUSG00000026834,protein_coding -4078,Fcnb,ENSMUSG00000026835,protein_coding -4600,Acvr1,ENSMUSG00000026836,protein_coding -4076,Col5a1,ENSMUSG00000026837,protein_coding -4602,Upp2,ENSMUSG00000026839,protein_coding -4198,Lamc3,ENSMUSG00000026840,protein_coding -4196,Fibcd1,ENSMUSG00000026841,protein_coding -4194,Abl1,ENSMUSG00000026842,protein_coding -4187,Fubp3,ENSMUSG00000026843,protein_coding -4174,Tor1b,ENSMUSG00000026848,protein_coding -4175,Tor1a,ENSMUSG00000026849,protein_coding -4176,BC005624,ENSMUSG00000026851,protein_coding -4157,Crat,ENSMUSG00000026853,protein_coding -4177,Usp20,ENSMUSG00000026854,protein_coding -4156,Dolpp1,ENSMUSG00000026856,protein_coding -4169,Ntmt1,ENSMUSG00000026857,protein_coding -4154,Miga2,ENSMUSG00000026858,protein_coding -4153,Sh3glb2,ENSMUSG00000026860,protein_coding -4294,Hspa5,ENSMUSG00000026864,protein_coding -4470,Kynu,ENSMUSG00000026866,protein_coding -4293,Gapvd1,ENSMUSG00000026867,protein_coding -4299,Psmd5,ENSMUSG00000026869,protein_coding -4300,Cutal,ENSMUSG00000026870,protein_coding -4476,Zeb2,ENSMUSG00000026872,protein_coding -4301,Phf19,ENSMUSG00000026873,protein_coding -4306,Hc,ENSMUSG00000026874,protein_coding -4304,Traf1,ENSMUSG00000026875,protein_coding -4310,Rab14,ENSMUSG00000026878,protein_coding -4312,Gsn,ENSMUSG00000026879,protein_coding -4314,Stom,ENSMUSG00000026880,protein_coding -4316,4930568D16Rik,ENSMUSG00000026882,protein_coding -4324,Dab2ip,ENSMUSG00000026883,protein_coding -4325,Ttll11,ENSMUSG00000026885,protein_coding -4335,Mrrf,ENSMUSG00000026887,protein_coding -4667,Grb14,ENSMUSG00000026888,protein_coding -4334,Rbm18,ENSMUSG00000026889,protein_coding -4332,Lhx6,ENSMUSG00000026890,protein_coding -4656,Gca,ENSMUSG00000026893,protein_coding -4330,Morn5,ENSMUSG00000026894,protein_coding -4329,Ndufa8,ENSMUSG00000026895,protein_coding -4655,Ifih1,ENSMUSG00000026896,protein_coding -4648,Slc4a10,ENSMUSG00000026904,protein_coding -4643,Psmd14,ENSMUSG00000026914,protein_coding -4393,Strbp,ENSMUSG00000026915,protein_coding -4071,Wdr5,ENSMUSG00000026917,protein_coding -4069,Brd3,ENSMUSG00000026918,protein_coding -4039,Lcn4,ENSMUSG00000026919,protein_coding -4029,Egfl7,ENSMUSG00000026921,protein_coding -4033,Agpat2,ENSMUSG00000026922,protein_coding -4026,Notch1,ENSMUSG00000026923,protein_coding -4023,Sec16a,ENSMUSG00000026924,protein_coding -4022,Inpp5e,ENSMUSG00000026925,protein_coding -4021,Pmpca,ENSMUSG00000026926,protein_coding -4020,Entr1,ENSMUSG00000026927,protein_coding -4016,Card9,ENSMUSG00000026928,protein_coding -4014,Gpsm1,ENSMUSG00000026930,protein_coding -55391,1700019N19Rik,ENSMUSG00000026931,protein_coding -4005,Nacc2,ENSMUSG00000026932,protein_coding -4002,Camsap1,ENSMUSG00000026933,protein_coding -4010,Lhx3,ENSMUSG00000026934,protein_coding -3994,Lcn3,ENSMUSG00000026936,protein_coding -3987,Lcn5,ENSMUSG00000026937,protein_coding -3984,Fcna,ENSMUSG00000026938,protein_coding -3983,Tmem141,ENSMUSG00000026939,protein_coding -3982,Ccdc183,ENSMUSG00000026940,protein_coding -3976,Mamdc4,ENSMUSG00000026941,protein_coding -3972,Traf2,ENSMUSG00000026942,protein_coding -3969,Lcn12,ENSMUSG00000026943,protein_coding -3964,Abca2,ENSMUSG00000026944,protein_coding -4540,Nmi,ENSMUSG00000026946,protein_coding -4543,Neb,ENSMUSG00000026950,protein_coding -3960,Sapcd2,ENSMUSG00000026955,protein_coding -3959,Uap1l1,ENSMUSG00000026956,protein_coding -3958,Dpp7,ENSMUSG00000026958,protein_coding -3953,Grin1,ENSMUSG00000026959,protein_coding -4557,Arl6ip6,ENSMUSG00000026960,protein_coding -3952,Lrrc26,ENSMUSG00000026961,protein_coding -3951,Tmem210,ENSMUSG00000026963,protein_coding -3950,Anapc2,ENSMUSG00000026965,protein_coding -3949,Ssna1,ENSMUSG00000026966,protein_coding -3938,Fam166a,ENSMUSG00000026969,protein_coding -4635,Rbms1,ENSMUSG00000026970,protein_coding -4634,Itgb6,ENSMUSG00000026971,protein_coding -3916,Arrdc1,ENSMUSG00000026972,protein_coding -3918,Zmynd19,ENSMUSG00000026974,protein_coding -3919,Dph7,ENSMUSG00000026975,protein_coding -3903,Pax8,ENSMUSG00000026976,protein_coding -4624,March7,ENSMUSG00000026977,protein_coding -3902,Psd4,ENSMUSG00000026979,protein_coding -4627,Ly75,ENSMUSG00000026980,protein_coding -3901,Il1rn,ENSMUSG00000026981,protein_coding -3898,Il1f5,ENSMUSG00000026983,protein_coding -3895,Il1f6,ENSMUSG00000026984,protein_coding -3892,Il1f8,ENSMUSG00000026985,protein_coding -3890,Hnmt,ENSMUSG00000026986,protein_coding -4618,Baz2b,ENSMUSG00000026987,protein_coding -4617,Wdsub1,ENSMUSG00000026988,protein_coding -4614,Dapl1,ENSMUSG00000026989,protein_coding -4610,Pkp4,ENSMUSG00000026991,protein_coding -4681,Galnt3,ENSMUSG00000026994,protein_coding -4948,Nup35,ENSMUSG00000026999,protein_coding -4947,Dusp19,ENSMUSG00000027001,protein_coding -4943,Nckap1,ENSMUSG00000027002,protein_coding -4941,Frzb,ENSMUSG00000027004,protein_coding -4939,Dnajc10,ENSMUSG00000027006,protein_coding -4934,Itprid2,ENSMUSG00000027007,protein_coding -4928,Itga4,ENSMUSG00000027009,protein_coding -4771,Slc25a12,ENSMUSG00000027010,protein_coding -4923,Ube2e3,ENSMUSG00000027011,protein_coding -4770,Dync1i2,ENSMUSG00000027012,protein_coding -4914,Cwc22,ENSMUSG00000027014,protein_coding -4769,Cybrd1,ENSMUSG00000027015,protein_coding -4910,Zfp385b,ENSMUSG00000027016,protein_coding -4773,Hat1,ENSMUSG00000027018,protein_coding -4701,Xirp2,ENSMUSG00000027022,protein_coding -4711,Stk39,ENSMUSG00000027030,protein_coding -4722,Cers6,ENSMUSG00000027035,protein_coding -4732,Abcb11,ENSMUSG00000027048,protein_coding -5018,Ssrp1,ENSMUSG00000027067,protein_coding -4735,Dhrs9,ENSMUSG00000027068,protein_coding -4736,Lrp2,ENSMUSG00000027070,protein_coding -5017,P2rx3,ENSMUSG00000027071,protein_coding -5016,Prg3,ENSMUSG00000027072,protein_coding -5015,Prg2,ENSMUSG00000027073,protein_coding -5014,Slc43a3,ENSMUSG00000027074,protein_coding -5011,Slc43a1,ENSMUSG00000027075,protein_coding -5010,Timm10,ENSMUSG00000027076,protein_coding -5009,Smtnl1,ENSMUSG00000027077,protein_coding -5008,Ube2l6,ENSMUSG00000027078,protein_coding -5003,Clp1,ENSMUSG00000027079,protein_coding -4999,Med19,ENSMUSG00000027080,protein_coding -4992,Tfpi,ENSMUSG00000027082,protein_coding -4739,Fastkd1,ENSMUSG00000027086,protein_coding -4975,Itgav,ENSMUSG00000027087,protein_coding -4744,Phospho2,ENSMUSG00000027088,protein_coding -4973,Zc3h15,ENSMUSG00000027091,protein_coding -4864,Mtx2,ENSMUSG00000027099,protein_coding -4855,Hoxd8,ENSMUSG00000027102,protein_coding -4830,Atf2,ENSMUSG00000027104,protein_coding -4824,Chrna1,ENSMUSG00000027107,protein_coding -4810,Ola1,ENSMUSG00000027108,protein_coding -4805,Sp3,ENSMUSG00000027109,protein_coding -4783,Itga6,ENSMUSG00000027111,protein_coding -5635,Kif18a,ENSMUSG00000027115,protein_coding -5618,Fshb,ENSMUSG00000027120,protein_coding -5615,Arl14ep,ENSMUSG00000027122,protein_coding -5719,Slc12a6,ENSMUSG00000027130,protein_coding -5721,Emc4,ENSMUSG00000027131,protein_coding -5722,Katnbl1,ENSMUSG00000027132,protein_coding -5718,Nop10,ENSMUSG00000027133,protein_coding -5716,Lpcat4,ENSMUSG00000027134,protein_coding -5660,4930430A15Rik,ENSMUSG00000027157,protein_coding -5645,Ccdc34,ENSMUSG00000027160,protein_coding -5642,Lin7c,ENSMUSG00000027162,protein_coding -5510,Commd9,ENSMUSG00000027163,protein_coding -5507,Traf6,ENSMUSG00000027164,protein_coding -5503,B230118H07Rik,ENSMUSG00000027165,protein_coding -5600,Dnajc24,ENSMUSG00000027166,protein_coding -5597,Elp4,ENSMUSG00000027167,protein_coding -5596,Pax6,ENSMUSG00000027168,protein_coding -5580,Eif3m,ENSMUSG00000027170,protein_coding -5577,Prrg4,ENSMUSG00000027171,protein_coding -5574,Depdc7,ENSMUSG00000027173,protein_coding -5572,Tcp11l1,ENSMUSG00000027175,protein_coding -5571,Cstf3,ENSMUSG00000027176,protein_coding -5564,Hipk3,ENSMUSG00000027177,protein_coding -5552,Fbxo3,ENSMUSG00000027180,protein_coding -5539,Caprin1,ENSMUSG00000027184,protein_coding -5538,Nat10,ENSMUSG00000027185,protein_coding -5534,Elf5,ENSMUSG00000027186,protein_coding -5535,Cat,ENSMUSG00000027187,protein_coding -5519,Pamr1,ENSMUSG00000027188,protein_coding -5516,Trim44,ENSMUSG00000027189,protein_coding -5471,Api5,ENSMUSG00000027193,protein_coding -5468,Ttc17,ENSMUSG00000027194,protein_coding -5462,Hsd17b12,ENSMUSG00000027195,protein_coding -5453,Ext2,ENSMUSG00000027198,protein_coding -5950,Gatm,ENSMUSG00000027199,protein_coding -5960,Sema6d,ENSMUSG00000027200,protein_coding -5965,Myef2,ENSMUSG00000027201,protein_coding -5969,Slc12a1,ENSMUSG00000027202,protein_coding -5971,Dut,ENSMUSG00000027203,protein_coding -5974,Fbn1,ENSMUSG00000027204,protein_coding -5980,Cops2,ENSMUSG00000027206,protein_coding -5981,Galk2,ENSMUSG00000027207,protein_coding -5983,Fgf7,ENSMUSG00000027208,protein_coding -5982,Fam227b,ENSMUSG00000027209,protein_coding -5769,Meis2,ENSMUSG00000027210,protein_coding -5449,Cd82,ENSMUSG00000027215,protein_coding -5446,Tspan18,ENSMUSG00000027217,protein_coding -5946,Slc28a2,ENSMUSG00000027219,protein_coding -5441,Syt13,ENSMUSG00000027220,protein_coding -5437,Chst1,ENSMUSG00000027221,protein_coding -5425,Pex16,ENSMUSG00000027222,protein_coding -5427,Mapk8ip1,ENSMUSG00000027223,protein_coding -5942,Duoxa1,ENSMUSG00000027224,protein_coding -5941,Duoxa2,ENSMUSG00000027225,protein_coding -5939,Sord,ENSMUSG00000027227,protein_coding -5935,Terb2,ENSMUSG00000027229,protein_coding -5417,Creb3l1,ENSMUSG00000027230,protein_coding -5932,Patl2,ENSMUSG00000027233,protein_coding -5929,Eif3j1,ENSMUSG00000027236,protein_coding -5921,Frmd5,ENSMUSG00000027238,protein_coding -5413,Mdk,ENSMUSG00000027239,protein_coding -5920,Wdr76,ENSMUSG00000027242,protein_coding -5408,Harbi1,ENSMUSG00000027243,protein_coding -5407,Atg13,ENSMUSG00000027244,protein_coding -5915,Hypk,ENSMUSG00000027245,protein_coding -5911,Ell3,ENSMUSG00000027246,protein_coding -5406,Arhgap1,ENSMUSG00000027247,protein_coding -5910,Pdia3,ENSMUSG00000027248,protein_coding -5403,F2,ENSMUSG00000027249,protein_coding -5399,Lrp4,ENSMUSG00000027253,protein_coding -5902,Map1a,ENSMUSG00000027254,protein_coding -5395,Arfgap2,ENSMUSG00000027255,protein_coding -5394,Pacsin3,ENSMUSG00000027257,protein_coding -5898,Adal,ENSMUSG00000027259,protein_coding -6204,Hao1,ENSMUSG00000027261,protein_coding -5900,Tubgcp4,ENSMUSG00000027263,protein_coding -6218,Lamp5,ENSMUSG00000027270,protein_coding -5887,Ubr1,ENSMUSG00000027272,protein_coding -6225,Snap25,ENSMUSG00000027273,protein_coding -6227,Mkks,ENSMUSG00000027274,protein_coding -6231,Jag1,ENSMUSG00000027276,protein_coding -6230,Slx4ip,ENSMUSG00000027281,protein_coding -5371,Mtch2,ENSMUSG00000027282,protein_coding -5884,Cdan1,ENSMUSG00000027284,protein_coding -5881,Haus2,ENSMUSG00000027285,protein_coding -5880,Lrrc57,ENSMUSG00000027286,protein_coding -5879,Snap23,ENSMUSG00000027287,protein_coding -5877,Zfp106,ENSMUSG00000027288,protein_coding -5873,Vps39,ENSMUSG00000027291,protein_coding -5868,Ehd4,ENSMUSG00000027293,protein_coding -5854,Itpka,ENSMUSG00000027296,protein_coding -5855,Ltk,ENSMUSG00000027297,protein_coding -5858,Tyro3,ENSMUSG00000027298,protein_coding -6120,Ubox5,ENSMUSG00000027300,protein_coding -6118,Oxt,ENSMUSG00000027301,protein_coding -6113,Ptpra,ENSMUSG00000027303,protein_coding -5852,Rtf1,ENSMUSG00000027304,protein_coding -5849,Ndufaf1,ENSMUSG00000027305,protein_coding -5847,Nusap1,ENSMUSG00000027306,protein_coding -6127,4930402H24Rik,ENSMUSG00000027309,protein_coding -6131,Atrn,ENSMUSG00000027312,protein_coding -5840,Chac1,ENSMUSG00000027313,protein_coding -5839,Dll4,ENSMUSG00000027314,protein_coding -5834,Spint1,ENSMUSG00000027315,protein_coding -6133,Gfra4,ENSMUSG00000027316,protein_coding -5833,Ppp1r14d,ENSMUSG00000027317,protein_coding -6135,Adam33,ENSMUSG00000027318,protein_coding -6136,Siglec1,ENSMUSG00000027322,protein_coding -5826,Rad51,ENSMUSG00000027323,protein_coding -5824,Rpusd2,ENSMUSG00000027324,protein_coding -5825,Knl1,ENSMUSG00000027326,protein_coding -6139,1700037H04Rik,ENSMUSG00000027327,protein_coding -6141,Spef1,ENSMUSG00000027329,protein_coding -6143,Cdc25b,ENSMUSG00000027330,protein_coding -5816,Knstrn,ENSMUSG00000027331,protein_coding -5818,Ivd,ENSMUSG00000027332,protein_coding -6154,Smox,ENSMUSG00000027333,protein_coding -6155,Adra1d,ENSMUSG00000027335,protein_coding -6165,Prnd,ENSMUSG00000027338,protein_coding -6166,Rassf2,ENSMUSG00000027339,protein_coding -6167,Slc23a2,ENSMUSG00000027340,protein_coding -6170,Tmem230,ENSMUSG00000027341,protein_coding -6171,Pcna,ENSMUSG00000027342,protein_coding -5796,Fsip1,ENSMUSG00000027344,protein_coding -6178,Gpcpd1,ENSMUSG00000027346,protein_coding -5786,Rasgrp1,ENSMUSG00000027347,protein_coding -5785,Fam98b,ENSMUSG00000027349,protein_coding -6186,Chgb,ENSMUSG00000027350,protein_coding -5781,Spred1,ENSMUSG00000027351,protein_coding -6188,Mcm8,ENSMUSG00000027353,protein_coding -5775,Tmco5,ENSMUSG00000027355,protein_coding -6192,Fermt1,ENSMUSG00000027356,protein_coding -6190,Crls1,ENSMUSG00000027357,protein_coding -6201,Bmp2,ENSMUSG00000027358,protein_coding -5990,Slc27a2,ENSMUSG00000027359,protein_coding -5991,Hdc,ENSMUSG00000027360,protein_coding -5992,Gabpb1,ENSMUSG00000027361,protein_coding -5995,Usp8,ENSMUSG00000027363,protein_coding -5996,Usp50,ENSMUSG00000027364,protein_coding -5998,Trpm7,ENSMUSG00000027365,protein_coding -6000,Sppl2a,ENSMUSG00000027366,protein_coding -6012,Stard7,ENSMUSG00000027367,protein_coding -6013,Dusp2,ENSMUSG00000027368,protein_coding -6017,Fahd2a,ENSMUSG00000027371,protein_coding -6023,Mrps5,ENSMUSG00000027374,protein_coding -6024,Mal,ENSMUSG00000027375,protein_coding -6020,Prom2,ENSMUSG00000027376,protein_coding -6028,Mall,ENSMUSG00000027377,protein_coding -6029,Nphp1,ENSMUSG00000027378,protein_coding -6031,Bub1,ENSMUSG00000027379,protein_coding -6032,Acoxl,ENSMUSG00000027380,protein_coding -6035,Bcl2l11,ENSMUSG00000027381,protein_coding -6253,Ndufaf5,ENSMUSG00000027384,protein_coding -6051,Fbln7,ENSMUSG00000027386,protein_coding -6053,Zc3h8,ENSMUSG00000027387,protein_coding -6061,Ttl,ENSMUSG00000027394,protein_coding -6062,Polr1b,ENSMUSG00000027395,protein_coding -6069,Slc20a1,ENSMUSG00000027397,protein_coding -6078,Il1b,ENSMUSG00000027398,protein_coding -6075,Il1a,ENSMUSG00000027399,protein_coding -6085,Pdyn,ENSMUSG00000027400,protein_coding -6089,Tgm3,ENSMUSG00000027401,protein_coding -6094,Tgm6,ENSMUSG00000027403,protein_coding -6095,Snrpb,ENSMUSG00000027404,protein_coding -6097,Nop56,ENSMUSG00000027405,protein_coding -6103,Idh3b,ENSMUSG00000027406,protein_coding -6106,Cpxm1,ENSMUSG00000027408,protein_coding -6109,1700020A23Rik,ENSMUSG00000027409,protein_coding -6112,Vps16,ENSMUSG00000027411,protein_coding -6735,Lpin3,ENSMUSG00000027412,protein_coding -6271,Otor,ENSMUSG00000027416,protein_coding -6277,Pcsk2,ENSMUSG00000027419,protein_coding -6279,Bfsp1,ENSMUSG00000027420,protein_coding -6284,Rrbp1,ENSMUSG00000027422,protein_coding -6290,Snx5,ENSMUSG00000027423,protein_coding -6294,Mgme1,ENSMUSG00000027424,protein_coding -6299,Kat14,ENSMUSG00000027425,protein_coding -6303,Polr3f,ENSMUSG00000027427,protein_coding -6304,Rbbp9,ENSMUSG00000027428,protein_coding -6305,Sec23b,ENSMUSG00000027429,protein_coding -6307,Dtd1,ENSMUSG00000027430,protein_coding -6310,Scp2d1,ENSMUSG00000027431,protein_coding -6333,Xrn2,ENSMUSG00000027433,protein_coding -6336,Nkx2-2,ENSMUSG00000027434,protein_coding -6359,Cd93,ENSMUSG00000027435,protein_coding -6364,Napb,ENSMUSG00000027438,protein_coding -6363,Gzf1,ENSMUSG00000027439,protein_coding -6369,Cst8,ENSMUSG00000027442,protein_coding -6368,Cst12,ENSMUSG00000027443,protein_coding -6367,Cstdc1,ENSMUSG00000027444,protein_coding -6371,Cst9,ENSMUSG00000027445,protein_coding -6372,Cstdc2,ENSMUSG00000027446,protein_coding -6373,Cst3,ENSMUSG00000027447,protein_coding -6407,Acss1,ENSMUSG00000027452,protein_coding -6417,Gins1,ENSMUSG00000027454,protein_coding -6441,Nsfl1c,ENSMUSG00000027455,protein_coding -6444,Sdcbp2,ENSMUSG00000027456,protein_coding -6445,Snph,ENSMUSG00000027457,protein_coding -6459,Fam110a,ENSMUSG00000027459,protein_coding -6456,Angpt4,ENSMUSG00000027460,protein_coding -6460,Slc52a3,ENSMUSG00000027463,protein_coding -6467,Tbc1d20,ENSMUSG00000027465,protein_coding -6468,Rbck1,ENSMUSG00000027466,protein_coding -6481,Defb22,ENSMUSG00000027468,protein_coding -6503,Tpx2,ENSMUSG00000027469,protein_coding -6505,Mylk2,ENSMUSG00000027470,protein_coding -6510,Pdrg1,ENSMUSG00000027472,protein_coding -6512,Ccm2l,ENSMUSG00000027474,protein_coding -6519,Kif3b,ENSMUSG00000027475,protein_coding -6527,Dnmt3b,ENSMUSG00000027478,protein_coding -6529,Mapre1,ENSMUSG00000027479,protein_coding -6532,Sun5,ENSMUSG00000027480,protein_coding -6533,Bpifb2,ENSMUSG00000027481,protein_coding -6541,Bpifa3,ENSMUSG00000027482,protein_coding -6542,Bpifa1,ENSMUSG00000027483,protein_coding -6543,Bpifa5,ENSMUSG00000027484,protein_coding -6544,Bpifb1,ENSMUSG00000027485,protein_coding -6549,Cdk5rap1,ENSMUSG00000027487,protein_coding -6550,Snta1,ENSMUSG00000027488,protein_coding -6552,Necab3,ENSMUSG00000027489,protein_coding -6555,E2f1,ENSMUSG00000027490,protein_coding -7000,Fam210b,ENSMUSG00000027495,protein_coding -7001,Aurka,ENSMUSG00000027496,protein_coding -7002,Cstf1,ENSMUSG00000027498,protein_coding -9915,Pkia,ENSMUSG00000027499,protein_coding -9928,Stmn2,ENSMUSG00000027500,protein_coding -7005,Rtf2,ENSMUSG00000027502,protein_coding -7007,Fam209,ENSMUSG00000027505,protein_coding -9939,Tpd52,ENSMUSG00000027506,protein_coding -9949,Pag1,ENSMUSG00000027508,protein_coding -7013,Rae1,ENSMUSG00000027509,protein_coding -7014,Rbm38,ENSMUSG00000027510,protein_coding -7019,Pck1,ENSMUSG00000027513,protein_coding -7020,Zbp1,ENSMUSG00000027514,protein_coding -7025,Ankrd60,ENSMUSG00000027517,protein_coding -7024,1700021F07Rik,ENSMUSG00000027518,protein_coding -7027,Rab22a,ENSMUSG00000027519,protein_coding -1018,Zdbf2,ENSMUSG00000027520,protein_coding -7036,Stx16,ENSMUSG00000027522,protein_coding -7043,Gnas,ENSMUSG00000027523,protein_coding -7057,Edn3,ENSMUSG00000027524,protein_coding -7148,Phactr3,ENSMUSG00000027525,protein_coding -9960,Fabp9,ENSMUSG00000027528,protein_coding -9965,Fabp12,ENSMUSG00000027530,protein_coding -9968,Impa1,ENSMUSG00000027531,protein_coding -9954,Fabp5,ENSMUSG00000027533,protein_coding -9973,Snx16,ENSMUSG00000027534,protein_coding -9972,Chmp4c,ENSMUSG00000027536,protein_coding -6932,Ptpn1,ENSMUSG00000027540,protein_coding -6949,Nfatc2,ENSMUSG00000027544,protein_coding -6950,Atp9a,ENSMUSG00000027546,protein_coding -6952,Sall4,ENSMUSG00000027547,protein_coding -10005,Lrrcc1,ENSMUSG00000027550,protein_coding -6955,Zfp64,ENSMUSG00000027551,protein_coding -10007,E2f5,ENSMUSG00000027552,protein_coding -10010,Car13,ENSMUSG00000027555,protein_coding -10012,Car1,ENSMUSG00000027556,protein_coding -10014,Car3,ENSMUSG00000027559,protein_coding -6984,Dok5,ENSMUSG00000027560,protein_coding -10015,Car2,ENSMUSG00000027562,protein_coding -10042,Cypt12,ENSMUSG00000027564,protein_coding -7164,Psma7,ENSMUSG00000027566,protein_coding -7183,Ntsr1,ENSMUSG00000027568,protein_coding -7184,Mrgbp,ENSMUSG00000027569,protein_coding -7186,Col9a3,ENSMUSG00000027570,protein_coding -7190,Gid8,ENSMUSG00000027573,protein_coding -7203,Nkain4,ENSMUSG00000027574,protein_coding -7205,Arfgap1,ENSMUSG00000027575,protein_coding -7208,Chrna4,ENSMUSG00000027577,protein_coding -7214,Srms,ENSMUSG00000027579,protein_coding -7217,Helz2,ENSMUSG00000027580,protein_coding -7220,Stmn3,ENSMUSG00000027581,protein_coding -7223,Zgpat,ENSMUSG00000027582,protein_coding -7226,Zbtb46,ENSMUSG00000027583,protein_coding -7244,Oprl1,ENSMUSG00000027584,protein_coding -7247,Pcmtd2,ENSMUSG00000027589,protein_coding -6565,Raly,ENSMUSG00000027593,protein_coding -6566,a,ENSMUSG00000027596,protein_coding -6569,Ahcy,ENSMUSG00000027597,protein_coding -6572,Itch,ENSMUSG00000027598,protein_coding -10056,Armc1,ENSMUSG00000027599,protein_coding -10058,Mtfr1,ENSMUSG00000027601,protein_coding -6575,Map1lc3a,ENSMUSG00000027602,protein_coding -6582,Ggt7,ENSMUSG00000027603,protein_coding -6583,Acss2,ENSMUSG00000027605,protein_coding -10062,Dnajc5b,ENSMUSG00000027606,protein_coding -6587,Gss,ENSMUSG00000027610,protein_coding -6597,Procr,ENSMUSG00000027611,protein_coding -6598,Mmp24,ENSMUSG00000027612,protein_coding -6600,Eif6,ENSMUSG00000027613,protein_coding -10075,Hps3,ENSMUSG00000027615,protein_coding -6616,Nfs1,ENSMUSG00000027618,protein_coding -6618,Rbm39,ENSMUSG00000027620,protein_coding -6632,Epb41l1,ENSMUSG00000027624,protein_coding -6635,Aar2,ENSMUSG00000027628,protein_coding -10100,Tbl1xr1,ENSMUSG00000027630,protein_coding -6655,Ndrg3,ENSMUSG00000027634,protein_coding -6658,Dsn1,ENSMUSG00000027635,protein_coding -6651,Sla2,ENSMUSG00000027636,protein_coding -6650,Rab5if,ENSMUSG00000027637,protein_coding -6661,Samhd1,ENSMUSG00000027639,protein_coding -6662,Rbl1,ENSMUSG00000027641,protein_coding -6665,Rpn2,ENSMUSG00000027642,protein_coding -6666,Ghrh,ENSMUSG00000027643,protein_coding -6668,Src,ENSMUSG00000027646,protein_coding -6678,Ctnnbl1,ENSMUSG00000027649,protein_coding -6681,Tti1,ENSMUSG00000027650,protein_coding -6682,Rprd1b,ENSMUSG00000027651,protein_coding -6701,Ralgapb,ENSMUSG00000027652,protein_coding -6713,Fam83d,ENSMUSG00000027654,protein_coding -6714,Dhx35,ENSMUSG00000027655,protein_coding -6786,Ccn5,ENSMUSG00000027656,protein_coding -10197,Skil,ENSMUSG00000027660,protein_coding -6868,Slc2a10,ENSMUSG00000027661,protein_coding -10206,Zmat3,ENSMUSG00000027663,protein_coding -10208,Pik3ca,ENSMUSG00000027665,protein_coding -10211,Zfp639,ENSMUSG00000027667,protein_coding -10213,Mfn1,ENSMUSG00000027668,protein_coding -10214,Gnb4,ENSMUSG00000027669,protein_coding -6864,Ocstamp,ENSMUSG00000027670,protein_coding -10216,Actl6a,ENSMUSG00000027671,protein_coding -10219,Ndufb5,ENSMUSG00000027673,protein_coding -10223,Pex5l,ENSMUSG00000027674,protein_coding -10238,Ccdc39,ENSMUSG00000027676,protein_coding -10237,Ttc14,ENSMUSG00000027677,protein_coding -6880,Ncoa3,ENSMUSG00000027678,protein_coding -10247,Dnajc19,ENSMUSG00000027679,protein_coding -10245,Fxr1,ENSMUSG00000027680,protein_coding -10169,Mecom,ENSMUSG00000027684,protein_coding -10150,Slc2a2,ENSMUSG00000027690,protein_coding -10147,Tnik,ENSMUSG00000027692,protein_coding -10144,Pld1,ENSMUSG00000027695,protein_coding -10134,Nceh1,ENSMUSG00000027698,protein_coding -10131,Ect2,ENSMUSG00000027699,protein_coding -10183,Lrrc34,ENSMUSG00000027702,protein_coding -10184,Lrriq4,ENSMUSG00000027703,protein_coding -10189,Sec62,ENSMUSG00000027706,protein_coding -10279,Dcun1d1,ENSMUSG00000027708,protein_coding -10282,Mccc1,ENSMUSG00000027709,protein_coding -10287,Acad9,ENSMUSG00000027710,protein_coding -10297,Anxa5,ENSMUSG00000027712,protein_coding -10299,1810062G17Rik,ENSMUSG00000027713,protein_coding -10304,Exosc9,ENSMUSG00000027714,protein_coding -10305,Ccna2,ENSMUSG00000027715,protein_coding -10307,Trpc3,ENSMUSG00000027716,protein_coding -10318,Il21,ENSMUSG00000027718,protein_coding -10312,Adad1,ENSMUSG00000027719,protein_coding -10316,Il2,ENSMUSG00000027720,protein_coding -10331,Spata5,ENSMUSG00000027722,protein_coding -10471,Slc7a11,ENSMUSG00000027737,protein_coding -10502,Rab33b,ENSMUSG00000027739,protein_coding -10544,Cog6,ENSMUSG00000027742,protein_coding -10558,Stoml3,ENSMUSG00000027744,protein_coding -10569,Ufm1,ENSMUSG00000027746,protein_coding -10573,Trpc4,ENSMUSG00000027748,protein_coding -10574,Postn,ENSMUSG00000027750,protein_coding -10578,Supt20,ENSMUSG00000027751,protein_coding -10580,Exosc8,ENSMUSG00000027752,protein_coding -10691,Aadac,ENSMUSG00000027761,protein_coding -10692,Sucnr1,ENSMUSG00000027762,protein_coding -10695,Mbnl1,ENSMUSG00000027763,protein_coding -10704,P2ry1,ENSMUSG00000027765,protein_coding -10715,Dhx36,ENSMUSG00000027770,protein_coding -10804,Gfm1,ENSMUSG00000027774,protein_coding -10811,Mfsd1,ENSMUSG00000027775,protein_coding -10822,Il12a,ENSMUSG00000027776,protein_coding -10818,Schip1,ENSMUSG00000027777,protein_coding -10830,Ift80,ENSMUSG00000027778,protein_coding -10841,Kpna4,ENSMUSG00000027782,protein_coding -10846,Ppm1l,ENSMUSG00000027784,protein_coding -10855,Nmd3,ENSMUSG00000027787,protein_coding -10860,Otol1,ENSMUSG00000027788,protein_coding -10878,Sis,ENSMUSG00000027790,protein_coding -10887,Bche,ENSMUSG00000027792,protein_coding -10586,Ccna1,ENSMUSG00000027793,protein_coding -10592,Sohlh2,ENSMUSG00000027794,protein_coding -10582,Smad9,ENSMUSG00000027796,protein_coding -10593,Dclk1,ENSMUSG00000027797,protein_coding -10603,Nbea,ENSMUSG00000027799,protein_coding -10618,Tm4sf1,ENSMUSG00000027800,protein_coding -10620,Tm4sf4,ENSMUSG00000027801,protein_coding -10621,Wwtr1,ENSMUSG00000027803,protein_coding -10940,Ppid,ENSMUSG00000027804,protein_coding -10634,Pfn2,ENSMUSG00000027805,protein_coding -10646,Tsc22d2,ENSMUSG00000027806,protein_coding -10647,Serp1,ENSMUSG00000027808,protein_coding -10941,Etfdh,ENSMUSG00000027809,protein_coding -10648,Eif2a,ENSMUSG00000027810,protein_coding -10943,4930579G24Rik,ENSMUSG00000027811,protein_coding -10721,Mme,ENSMUSG00000027820,protein_coding -10732,Slc33a1,ENSMUSG00000027822,protein_coding -10734,Gmps,ENSMUSG00000027823,protein_coding -10736,Vmn2r1,ENSMUSG00000027824,protein_coding -10762,Kcnab1,ENSMUSG00000027827,protein_coding -10765,Ssr3,ENSMUSG00000027828,protein_coding -10781,Ccnl1,ENSMUSG00000027829,protein_coding -10787,Veph1,ENSMUSG00000027831,protein_coding -10793,Ptx3,ENSMUSG00000027832,protein_coding -10799,Shox2,ENSMUSG00000027833,protein_coding -10899,Serpini1,ENSMUSG00000027834,protein_coding -10898,Pdcd10,ENSMUSG00000027835,protein_coding -11816,Wnt2b,ENSMUSG00000027840,protein_coding -11767,Ptpn22,ENSMUSG00000027843,protein_coding -11762,Dclre1b,ENSMUSG00000027845,protein_coding -11756,Olfml3,ENSMUSG00000027848,protein_coding -11751,Syt6,ENSMUSG00000027849,protein_coding -11736,Nras,ENSMUSG00000027852,protein_coding -11734,Sike1,ENSMUSG00000027854,protein_coding -11728,Sycp1,ENSMUSG00000027855,protein_coding -11726,Tshb,ENSMUSG00000027857,protein_coding -11725,Tspan2,ENSMUSG00000027858,protein_coding -11718,Ngf,ENSMUSG00000027859,protein_coding -11712,Vangl1,ENSMUSG00000027860,protein_coding -11709,Casq2,ENSMUSG00000027861,protein_coding -11692,Cd2,ENSMUSG00000027863,protein_coding -11687,Ptgfrn,ENSMUSG00000027864,protein_coding -11666,Gdap2,ENSMUSG00000027865,protein_coding -11661,Spag17,ENSMUSG00000027867,protein_coding -11655,Tbx15,ENSMUSG00000027868,protein_coding -11637,Hsd3b6,ENSMUSG00000027869,protein_coding -11644,Hao2,ENSMUSG00000027870,protein_coding -11643,Hsd3b1,ENSMUSG00000027871,protein_coding -11620,Hmgcs2,ENSMUSG00000027875,protein_coding -11618,Reg4,ENSMUSG00000027876,protein_coding -11613,Notch2,ENSMUSG00000027878,protein_coding -11611,Sec22b,ENSMUSG00000027879,protein_coding -11962,Slc25a54,ENSMUSG00000027880,protein_coding -11955,Prpf38b,ENSMUSG00000027881,protein_coding -11952,Stxbp3,ENSMUSG00000027882,protein_coding -11949,Gpsm2,ENSMUSG00000027883,protein_coding -11948,Clcc1,ENSMUSG00000027884,protein_coding -11940,1700013F07Rik,ENSMUSG00000027886,protein_coding -11921,Sypl2,ENSMUSG00000027887,protein_coding -11912,Ampd2,ENSMUSG00000027889,protein_coding -11911,Gstm4,ENSMUSG00000027890,protein_coding -11887,Ahcyl1,ENSMUSG00000027893,protein_coding -11882,Slc6a17,ENSMUSG00000027894,protein_coding -11881,Kcnc4,ENSMUSG00000027895,protein_coding -11877,Slc16a4,ENSMUSG00000027896,protein_coding -11858,Dram2,ENSMUSG00000027900,protein_coding -11856,Dennd2d,ENSMUSG00000027901,protein_coding -11853,Chil6,ENSMUSG00000027902,protein_coding -11824,Ddx20,ENSMUSG00000027905,protein_coding -11360,S100a11,ENSMUSG00000027907,protein_coding -11358,Tchhl1,ENSMUSG00000027908,protein_coding -11337,Lce1m,ENSMUSG00000027912,protein_coding -11336,Crct1,ENSMUSG00000027913,protein_coding -11318,Lce1g,ENSMUSG00000027919,protein_coding -11309,Lce1b,ENSMUSG00000027923,protein_coding -11295,Sprr2j-ps,ENSMUSG00000027925,protein_coding -11277,Lelp1,ENSMUSG00000027927,protein_coding -11236,Npr1,ENSMUSG00000027931,protein_coding -11234,Slc27a3,ENSMUSG00000027932,protein_coding -11235,Ints3,ENSMUSG00000027933,protein_coding -11223,Rab13,ENSMUSG00000027935,protein_coding -11227,Crtc2,ENSMUSG00000027936,protein_coding -11224,Jtb,ENSMUSG00000027937,protein_coding -11225,Creb3l4,ENSMUSG00000027938,protein_coding -11218,Nup210l,ENSMUSG00000027939,protein_coding -11217,Tpm3,ENSMUSG00000027940,protein_coding -11214,4933434E20Rik,ENSMUSG00000027942,protein_coding -11209,Hax1,ENSMUSG00000027944,protein_coding -11204,Il6ra,ENSMUSG00000027947,protein_coding -11199,Chrnb2,ENSMUSG00000027950,protein_coding -11198,Adar,ENSMUSG00000027951,protein_coding -11196,Pmvk,ENSMUSG00000027952,protein_coding -11177,Slc50a1,ENSMUSG00000027953,protein_coding -11178,Efna1,ENSMUSG00000027954,protein_coding -10949,Gask1b,ENSMUSG00000027955,protein_coding -10946,Tmem144,ENSMUSG00000027956,protein_coding -12051,Slc35a3,ENSMUSG00000027957,protein_coding -12045,Sass6,ENSMUSG00000027959,protein_coding -12042,Lrrc39,ENSMUSG00000027961,protein_coding -12026,Vcam1,ENSMUSG00000027962,protein_coding -12024,Extl2,ENSMUSG00000027963,protein_coding -12008,Olfm3,ENSMUSG00000027965,protein_coding -12004,Col11a1,ENSMUSG00000027966,protein_coding -12241,Neurog2,ENSMUSG00000027967,protein_coding -12233,Larp7,ENSMUSG00000027968,protein_coding -12196,Ndst4,ENSMUSG00000027971,protein_coding -12186,1700006A11Rik,ENSMUSG00000027973,protein_coding -12175,Ndst3,ENSMUSG00000027977,protein_coding -12170,Prss12,ENSMUSG00000027978,protein_coding -12000,Rnpc3,ENSMUSG00000027981,protein_coding -12320,Cyp2u1,ENSMUSG00000027983,protein_coding -12318,Hadh,ENSMUSG00000027984,protein_coding -12316,Lef1,ENSMUSG00000027985,protein_coding -11007,Trim2,ENSMUSG00000027993,protein_coding -12288,Mcub,ENSMUSG00000027994,protein_coding -11003,Tlr2,ENSMUSG00000027995,protein_coding -11000,Sfrp2,ENSMUSG00000027996,protein_coding -12287,Casp6,ENSMUSG00000027997,protein_coding -10992,Plrg1,ENSMUSG00000027998,protein_coding -12286,Pla2g12a,ENSMUSG00000027999,protein_coding -10990,Fga,ENSMUSG00000028001,protein_coding -10987,Lrat,ENSMUSG00000028003,protein_coding -10978,Npy2r,ENSMUSG00000028004,protein_coding -10974,Gucy1b1,ENSMUSG00000028005,protein_coding -12070,Snx7,ENSMUSG00000028007,protein_coding -10973,Asic5,ENSMUSG00000028008,protein_coding -12284,Gar1,ENSMUSG00000028010,protein_coding -10972,Tdo2,ENSMUSG00000028011,protein_coding -12283,Rrh,ENSMUSG00000028012,protein_coding -12351,Ppa2,ENSMUSG00000028013,protein_coding -10971,Ctso,ENSMUSG00000028015,protein_coding -12348,Ints12,ENSMUSG00000028016,protein_coding -12277,Egf,ENSMUSG00000028017,protein_coding -12344,Gstcd,ENSMUSG00000028018,protein_coding -10963,Pdgfc,ENSMUSG00000028019,protein_coding -10959,Glrb,ENSMUSG00000028020,protein_coding -12265,Pitx2,ENSMUSG00000028023,protein_coding -12268,Enpep,ENSMUSG00000028024,protein_coding -12242,Alpk1,ENSMUSG00000028028,protein_coding -12335,Aimp1,ENSMUSG00000028029,protein_coding -12336,Tbck,ENSMUSG00000028030,protein_coding -12330,Dkk2,ENSMUSG00000028031,protein_coding -12324,Papss1,ENSMUSG00000028032,protein_coding -293,Kcnq5,ENSMUSG00000028033,protein_coding -12644,Fubp1,ENSMUSG00000028034,protein_coding -12643,Dnajb4,ENSMUSG00000028035,protein_coding -12631,Ptgfr,ENSMUSG00000028036,protein_coding -12629,Ifi44,ENSMUSG00000028037,protein_coding -11180,Efna3,ENSMUSG00000028039,protein_coding -11182,Efna4,ENSMUSG00000028040,protein_coding -11184,Adam15,ENSMUSG00000028041,protein_coding -11188,Zbtb7b,ENSMUSG00000028042,protein_coding -11192,Cks1b,ENSMUSG00000028044,protein_coding -11171,Thbs3,ENSMUSG00000028047,protein_coding -11168,Gba,ENSMUSG00000028048,protein_coding -11164,Scamp3,ENSMUSG00000028049,protein_coding -11162,Hcn3,ENSMUSG00000028051,protein_coding -11153,Ash1l,ENSMUSG00000028053,protein_coding -11141,Rit1,ENSMUSG00000028057,protein_coding -11133,Arhgef2,ENSMUSG00000028059,protein_coding -11137,Khdc4,ENSMUSG00000028060,protein_coding -11129,Lamtor2,ENSMUSG00000028062,protein_coding -11124,Lmna,ENSMUSG00000028063,protein_coding -11121,Sema4a,ENSMUSG00000028064,protein_coding -11118,Pmf1,ENSMUSG00000028066,protein_coding -11095,Iqgap3,ENSMUSG00000028068,protein_coding -11092,Gpatch4,ENSMUSG00000028069,protein_coding -11093,Naxe,ENSMUSG00000028070,protein_coding -11080,Sh2d2a,ENSMUSG00000028071,protein_coding -11078,Ntrk1,ENSMUSG00000028072,protein_coding -11077,Pear1,ENSMUSG00000028073,protein_coding -11059,Cd1d1,ENSMUSG00000028076,protein_coding -11054,Dclk2,ENSMUSG00000028078,protein_coding -11046,Lrba,ENSMUSG00000028080,protein_coding -11040,Rps3a1,ENSMUSG00000028081,protein_coding -11037,Sh3d19,ENSMUSG00000028082,protein_coding -11027,Gatb,ENSMUSG00000028085,protein_coding -11017,Fbxw7,ENSMUSG00000028086,protein_coding -11604,Fmo5,ENSMUSG00000028088,protein_coding -11602,Chd1l,ENSMUSG00000028089,protein_coding -11597,Acp6,ENSMUSG00000028093,protein_coding -11590,Gpr89,ENSMUSG00000028096,protein_coding -11585,Rnf115,ENSMUSG00000028098,protein_coding -11582,Polr3c,ENSMUSG00000028099,protein_coding -11581,Nudt17,ENSMUSG00000028100,protein_coding -11580,Pias3,ENSMUSG00000028101,protein_coding -11575,Pex11b,ENSMUSG00000028102,protein_coding -11570,Polr3gl,ENSMUSG00000028104,protein_coding -11496,Rprd2,ENSMUSG00000028106,protein_coding -11495,Tars2,ENSMUSG00000028107,protein_coding -11493,Ecm1,ENSMUSG00000028108,protein_coding -11485,Hormad1,ENSMUSG00000028109,protein_coding -11482,Ctsk,ENSMUSG00000028111,protein_coding -12167,Mettl14,ENSMUSG00000028114,protein_coding -11467,Bnipl,ENSMUSG00000028115,protein_coding -12158,Myoz2,ENSMUSG00000028116,protein_coding -12138,Bcar3,ENSMUSG00000028121,protein_coding -12132,Gclm,ENSMUSG00000028124,protein_coding -12126,Abca4,ENSMUSG00000028125,protein_coding -11451,Pip5k1a,ENSMUSG00000028126,protein_coding -12119,Abcd3,ENSMUSG00000028127,protein_coding -12117,F3,ENSMUSG00000028128,protein_coding -12101,Tmem56,ENSMUSG00000028132,protein_coding -12098,Rwdd3,ENSMUSG00000028133,protein_coding -12085,Ptbp2,ENSMUSG00000028134,protein_coding -11423,Snx27,ENSMUSG00000028136,protein_coding -11422,Celf3,ENSMUSG00000028137,protein_coding -12453,Adh5,ENSMUSG00000028138,protein_coding -11421,Riiad1,ENSMUSG00000028139,protein_coding -11419,Mrpl9,ENSMUSG00000028140,protein_coding -11418,Oaz3,ENSMUSG00000028141,protein_coding -11405,Them4,ENSMUSG00000028145,protein_coding -11407,Them5,ENSMUSG00000028148,protein_coding -12466,Rap1gds1,ENSMUSG00000028149,protein_coding -11411,Rorc,ENSMUSG00000028150,protein_coding -12464,Tspan5,ENSMUSG00000028152,protein_coding -12457,Eif4e,ENSMUSG00000028156,protein_coding -12440,Mttp,ENSMUSG00000028158,protein_coding -12436,Dapp1,ENSMUSG00000028159,protein_coding -12408,Ppp3ca,ENSMUSG00000028161,protein_coding -12394,Nfkb1,ENSMUSG00000028163,protein_coding -12391,Manba,ENSMUSG00000028164,protein_coding -12385,Cisd2,ENSMUSG00000028165,protein_coding -12382,Bdh2,ENSMUSG00000028167,protein_coding -12373,Tacr3,ENSMUSG00000028172,protein_coding -12757,Wls,ENSMUSG00000028173,protein_coding -12756,Rpe65,ENSMUSG00000028174,protein_coding -12755,Depdc1a,ENSMUSG00000028175,protein_coding -12747,Lrrc7,ENSMUSG00000028176,protein_coding -12738,Cth,ENSMUSG00000028179,protein_coding -12732,Zranb2,ENSMUSG00000028180,protein_coding -12702,Lrriq3,ENSMUSG00000028182,protein_coding -12608,Adgrl2,ENSMUSG00000028184,protein_coding -12588,Dnase2b,ENSMUSG00000028185,protein_coding -12587,Uox,ENSMUSG00000028186,protein_coding -12585,Rpf1,ENSMUSG00000028187,protein_coding -12583,Spata1,ENSMUSG00000028188,protein_coding -12582,Ctbs,ENSMUSG00000028189,protein_coding -12568,Bcl10,ENSMUSG00000028191,protein_coding -12566,Ddah1,ENSMUSG00000028194,protein_coding -12563,Ccn1,ENSMUSG00000028195,protein_coding -12559,Col24a1,ENSMUSG00000028197,protein_coding -12693,Cryz,ENSMUSG00000028199,protein_coding -12836,Asph,ENSMUSG00000028207,protein_coding -12861,Trp53inp1,ENSMUSG00000028211,protein_coding -12862,Ccne2,ENSMUSG00000028212,protein_coding -12876,Gem,ENSMUSG00000028214,protein_coding -12877,Cdh17,ENSMUSG00000028217,protein_coding -12891,Fam92a,ENSMUSG00000028218,protein_coding -12914,Pip4p2,ENSMUSG00000028221,protein_coding -12927,Calb1,ENSMUSG00000028222,protein_coding -12929,Decr1,ENSMUSG00000028223,protein_coding -12931,Nbn,ENSMUSG00000028224,protein_coding -12951,Mmp16,ENSMUSG00000028226,protein_coding -12966,Cpne3,ENSMUSG00000028228,protein_coding -12967,Rmdn1,ENSMUSG00000028229,protein_coding -12772,Tmem68,ENSMUSG00000028232,protein_coding -12773,Tgs1,ENSMUSG00000028233,protein_coding -12781,Rps20,ENSMUSG00000028234,protein_coding -12789,Sdr16c5,ENSMUSG00000028236,protein_coding -12972,Atp6v0d2,ENSMUSG00000028238,protein_coding -12806,Cyp7a1,ENSMUSG00000028240,protein_coding -12804,Ubxn2b,ENSMUSG00000028243,protein_coding -12810,Nsmaf,ENSMUSG00000028245,protein_coding -12991,Faxc,ENSMUSG00000028246,protein_coding -12990,Coq3,ENSMUSG00000028247,protein_coding -12989,Pnisr,ENSMUSG00000028248,protein_coding -12808,Sdcbp,ENSMUSG00000028249,protein_coding -12987,Tstd3,ENSMUSG00000028251,protein_coding -12986,Ccnc,ENSMUSG00000028252,protein_coding -12556,Clca1,ENSMUSG00000028255,protein_coding -12558,Odf2l,ENSMUSG00000028256,protein_coding -13021,Fhl5,ENSMUSG00000028259,protein_coding -13017,Ndufaf4,ENSMUSG00000028261,protein_coding -12547,Clca3a2,ENSMUSG00000028262,protein_coding -13108,Spaca1,ENSMUSG00000028264,protein_coding -12531,Lmo4,ENSMUSG00000028266,protein_coding -12496,Gbp3,ENSMUSG00000028268,protein_coding -12498,Gbp2,ENSMUSG00000028270,protein_coding -12504,Gtf2b,ENSMUSG00000028271,protein_coding -12492,Pdlim5,ENSMUSG00000028273,protein_coding -13101,Rngtt,ENSMUSG00000028274,protein_coding -13092,Ube2j1,ENSMUSG00000028277,protein_coding -13089,Rragd,ENSMUSG00000028278,protein_coding -13095,Gabrr1,ENSMUSG00000028280,protein_coding -13082,Casp8ap2,ENSMUSG00000028282,protein_coding -13070,Map3k7,ENSMUSG00000028284,protein_coding -13161,1700009N14Rik,ENSMUSG00000028287,protein_coding -13056,Epha7,ENSMUSG00000028289,protein_coding -13117,Akirin2,ENSMUSG00000028291,protein_coding -13119,Rars2,ENSMUSG00000028292,protein_coding -13120,Slc35a1,ENSMUSG00000028293,protein_coding -13122,Cfap206,ENSMUSG00000028294,protein_coding -13124,Smim8,ENSMUSG00000028295,protein_coding -13127,Cga,ENSMUSG00000028298,protein_coding -13134,C9orf72,ENSMUSG00000028300,protein_coding -13457,Aldob,ENSMUSG00000028307,protein_coding -13459,Rnf20,ENSMUSG00000028309,protein_coding -13461,Ppp3r2,ENSMUSG00000028310,protein_coding -13479,Smc2,ENSMUSG00000028312,protein_coding -13482,Toporsl,ENSMUSG00000028314,protein_coding -13364,Polr1e,ENSMUSG00000028318,protein_coding -13371,Exosc3,ENSMUSG00000028322,protein_coding -13387,Stra6l,ENSMUSG00000028327,protein_coding -13390,Tmod1,ENSMUSG00000028328,protein_coding -13394,Xpa,ENSMUSG00000028329,protein_coding -13393,Ncbp1,ENSMUSG00000028330,protein_coding -13400,Trmo,ENSMUSG00000028331,protein_coding -13401,Hemgn,ENSMUSG00000028332,protein_coding -13403,Anp32b,ENSMUSG00000028333,protein_coding -13404,Nans,ENSMUSG00000028334,protein_coding -13408,Coro2a,ENSMUSG00000028337,protein_coding -13419,Col15a1,ENSMUSG00000028339,protein_coding -13429,Nr4a3,ENSMUSG00000028341,protein_coding -13431,Erp44,ENSMUSG00000028343,protein_coding -13433,Invs,ENSMUSG00000028344,protein_coding -13435,Tex10,ENSMUSG00000028345,protein_coding -13437,Tmeff1,ENSMUSG00000028347,protein_coding -13438,Cavin4,ENSMUSG00000028348,protein_coding -13732,Brinp1,ENSMUSG00000028351,protein_coding -3044,Fmn2,ENSMUSG00000028354,protein_coding -13687,Ambp,ENSMUSG00000028356,protein_coding -13688,Kif12,ENSMUSG00000028357,protein_coding -13686,Zfp618,ENSMUSG00000028358,protein_coding -13693,Orm3,ENSMUSG00000028359,protein_coding -12686,Slc44a5,ENSMUSG00000028360,protein_coding -13708,Tnfsf8,ENSMUSG00000028362,protein_coding -13710,Tnc,ENSMUSG00000028364,protein_coding -13573,Txn1,ENSMUSG00000028367,protein_coding -13575,Svep1,ENSMUSG00000028369,protein_coding -13717,Pappa,ENSMUSG00000028370,protein_coding -13718,Astn2,ENSMUSG00000028373,protein_coding -13588,Ptgr1,ENSMUSG00000028378,protein_coding -13596,Ugcg,ENSMUSG00000028381,protein_coding -13600,Ptbp3,ENSMUSG00000028382,protein_coding -13603,Hsdl2,ENSMUSG00000028383,protein_coding -13606,Snx30,ENSMUSG00000028385,protein_coding -13609,Slc46a2,ENSMUSG00000028386,protein_coding -13664,Zfp37,ENSMUSG00000028389,protein_coding -13672,Wdr31,ENSMUSG00000028391,protein_coding -13673,Bspry,ENSMUSG00000028392,protein_coding -13675,Alad,ENSMUSG00000028393,protein_coding -13676,Pole3,ENSMUSG00000028394,protein_coding -13778,2310002L09Rik,ENSMUSG00000028396,protein_coding -13782,Kdm4c,ENSMUSG00000028397,protein_coding -13791,Dmac1,ENSMUSG00000028398,protein_coding -13797,Ptprd,ENSMUSG00000028399,protein_coding -13821,Mpdz,ENSMUSG00000028402,protein_coding -13837,Zdhhc21,ENSMUSG00000028403,protein_coding -13170,Aco1,ENSMUSG00000028405,protein_coding -13173,Smim27,ENSMUSG00000028407,protein_coding -13185,Smu1,ENSMUSG00000028409,protein_coding -13183,Dnaja1,ENSMUSG00000028410,protein_coding -13180,Aptx,ENSMUSG00000028411,protein_coding -13500,Slc44a1,ENSMUSG00000028412,protein_coding -13187,B4galt1,ENSMUSG00000028413,protein_coding -13504,Fktn,ENSMUSG00000028414,protein_coding -13192,Spink4,ENSMUSG00000028415,protein_coding -13193,Bag1,ENSMUSG00000028416,protein_coding -13506,Tal2,ENSMUSG00000028417,protein_coding -13194,Chmp5,ENSMUSG00000028419,protein_coding -13508,Tmem38b,ENSMUSG00000028420,protein_coding -13195,Nfx1,ENSMUSG00000028423,protein_coding -13530,Rad23b,ENSMUSG00000028426,protein_coding -13197,Aqp7,ENSMUSG00000028427,protein_coding -13200,Nol6,ENSMUSG00000028430,protein_coding -13548,Elp1,ENSMUSG00000028431,protein_coding -13205,Ubap2,ENSMUSG00000028433,protein_coding -13558,Epb41l4b,ENSMUSG00000028434,protein_coding -13199,Aqp3,ENSMUSG00000028435,protein_coding -13210,Dcaf12,ENSMUSG00000028436,protein_coding -13214,Ubap1,ENSMUSG00000028437,protein_coding -13215,Kif24,ENSMUSG00000028438,protein_coding -13220,Fam219a,ENSMUSG00000028439,protein_coding -13218,1110017D15Rik,ENSMUSG00000028441,protein_coding -13216,Nudt2,ENSMUSG00000028443,protein_coding -13226,Cntfr,ENSMUSG00000028444,protein_coding -13223,Enho,ENSMUSG00000028445,protein_coding -13229,Dctn3,ENSMUSG00000028447,protein_coding -13285,1700022I11Rik,ENSMUSG00000028451,protein_coding -13286,Vcp,ENSMUSG00000028452,protein_coding -13287,Fancg,ENSMUSG00000028453,protein_coding -13288,Pigo,ENSMUSG00000028454,protein_coding -13289,Stoml2,ENSMUSG00000028455,protein_coding -13293,Unc13b,ENSMUSG00000028456,protein_coding -13296,Atp8b5,ENSMUSG00000028457,protein_coding -13299,Tesk1,ENSMUSG00000028458,protein_coding -13300,Cd72,ENSMUSG00000028459,protein_coding -13301,Sit1,ENSMUSG00000028460,protein_coding -13304,Ccdc107,ENSMUSG00000028461,protein_coding -13307,Car9,ENSMUSG00000028463,protein_coding -13308,Tpm2,ENSMUSG00000028464,protein_coding -13309,Tln1,ENSMUSG00000028465,protein_coding -13310,Creb3,ENSMUSG00000028466,protein_coding -13311,Gba2,ENSMUSG00000028467,protein_coding -13312,Rgp1,ENSMUSG00000028468,protein_coding -13317,Npr2,ENSMUSG00000028469,protein_coding -13319,Hint2,ENSMUSG00000028470,protein_coding -13327,Spaar,ENSMUSG00000028475,protein_coding -13335,Reck,ENSMUSG00000028476,protein_coding -13339,Clta,ENSMUSG00000028478,protein_coding -13340,Gne,ENSMUSG00000028479,protein_coding -13337,Glipr2,ENSMUSG00000028480,protein_coding -13847,Snapc3,ENSMUSG00000028483,protein_coding -13848,Psip1,ENSMUSG00000028484,protein_coding -13860,Bnc2,ENSMUSG00000028487,protein_coding -13868,Sh3gl2,ENSMUSG00000028488,protein_coding -13874,Saxo1,ENSMUSG00000028492,protein_coding -13881,Plin2,ENSMUSG00000028494,protein_coding -13884,Rps6,ENSMUSG00000028495,protein_coding -13890,Mllt3,ENSMUSG00000028496,protein_coding -13896,Hacd4,ENSMUSG00000028497,protein_coding -14185,Usp24,ENSMUSG00000028514,protein_coding -14170,Plpp3,ENSMUSG00000028517,protein_coding -14169,Prkaa2,ENSMUSG00000028518,protein_coding -14161,Dab1,ENSMUSG00000028519,protein_coding -14151,4921539E11Rik,ENSMUSG00000028520,protein_coding -14149,Slc35d1,ENSMUSG00000028521,protein_coding -14148,Mier1,ENSMUSG00000028522,protein_coding -14143,Tctex1d1,ENSMUSG00000028523,protein_coding -14141,Sgip1,ENSMUSG00000028524,protein_coding -14139,Pde4b,ENSMUSG00000028525,protein_coding -14124,Ak4,ENSMUSG00000028527,protein_coding -14125,Dnajc6,ENSMUSG00000028528,protein_coding -14112,Jak1,ENSMUSG00000028530,protein_coding -14107,Cachd1,ENSMUSG00000028532,protein_coding -13983,Izumo3,ENSMUSG00000028533,protein_coding -14480,2610528J11Rik,ENSMUSG00000028536,protein_coding -14464,St3gal3,ENSMUSG00000028538,protein_coding -14463,Artn,ENSMUSG00000028539,protein_coding -14461,Dph2,ENSMUSG00000028540,protein_coding -14458,B4galt2,ENSMUSG00000028541,protein_coding -14455,Slc6a9,ENSMUSG00000028542,protein_coding -14303,Slc5a9,ENSMUSG00000028544,protein_coding -14298,Bend5,ENSMUSG00000028545,protein_coding -14292,Elavl4,ENSMUSG00000028546,protein_coding -14095,Itgb3bp,ENSMUSG00000028549,protein_coding -14079,Atg4c,ENSMUSG00000028550,protein_coding -14287,Cdkn2c,ENSMUSG00000028551,protein_coding -14277,Eps15,ENSMUSG00000028552,protein_coding -14075,Angptl3,ENSMUSG00000028553,protein_coding -14280,Ttc39a,ENSMUSG00000028555,protein_coding -14074,Dock7,ENSMUSG00000028556,protein_coding -14281,Rnf11,ENSMUSG00000028557,protein_coding -14276,Calr4,ENSMUSG00000028558,protein_coding -14274,Osbpl9,ENSMUSG00000028559,protein_coding -14073,Usp1,ENSMUSG00000028560,protein_coding -14061,Tm2d1,ENSMUSG00000028563,protein_coding -14054,Nfia,ENSMUSG00000028565,protein_coding -14266,Txndc12,ENSMUSG00000028567,protein_coding -14265,Btf3l4,ENSMUSG00000028568,protein_coding -14033,Cyp2j13,ENSMUSG00000028571,protein_coding -14032,Hook1,ENSMUSG00000028572,protein_coding -14029,Fggy,ENSMUSG00000028573,protein_coding -14013,Eqtn,ENSMUSG00000028575,protein_coding -14009,Ift74,ENSMUSG00000028576,protein_coding -14007,Plaa,ENSMUSG00000028577,protein_coding -14006,Caap1,ENSMUSG00000028578,protein_coding -14821,Pum1,ENSMUSG00000028580,protein_coding -14832,Laptm5,ENSMUSG00000028581,protein_coding -14259,Cc2d1b,ENSMUSG00000028582,protein_coding -15217,Pdpn,ENSMUSG00000028583,protein_coding -15219,Lrrc38,ENSMUSG00000028584,protein_coding -14257,Orc1,ENSMUSG00000028587,protein_coding -15268,1700012P22Rik,ENSMUSG00000028589,protein_coding -15266,Pramef12,ENSMUSG00000028591,protein_coding -15272,9430007A20Rik,ENSMUSG00000028593,protein_coding -14247,Gpx7,ENSMUSG00000028597,protein_coding -15286,Tnfrsf1b,ENSMUSG00000028599,protein_coding -14236,Podn,ENSMUSG00000028600,protein_coding -14240,Echdc2,ENSMUSG00000028601,protein_coding -15287,Tnfrsf8,ENSMUSG00000028602,protein_coding -14238,Scp2,ENSMUSG00000028603,protein_coding -14233,Cpt2,ENSMUSG00000028607,protein_coding -14232,Czib,ENSMUSG00000028608,protein_coding -14231,Magoh,ENSMUSG00000028609,protein_coding -14220,Dmrtb1,ENSMUSG00000028610,protein_coding -14228,Lrp8,ENSMUSG00000028613,protein_coding -14217,Ndc1,ENSMUSG00000028614,protein_coding -14210,Lrrc42,ENSMUSG00000028617,protein_coding -14208,Tmem59,ENSMUSG00000028618,protein_coding -14207,Tceanc2,ENSMUSG00000028619,protein_coding -14204,Cyb5rl,ENSMUSG00000028621,protein_coding -14203,Mrpl37,ENSMUSG00000028622,protein_coding -14567,Col9a2,ENSMUSG00000028626,protein_coding -14563,Exo5,ENSMUSG00000028629,protein_coding -28994,Dyrk2,ENSMUSG00000028630,protein_coding -14554,Kcnq4,ENSMUSG00000028631,protein_coding -14549,Ctps,ENSMUSG00000028633,protein_coding -14542,Hivep3,ENSMUSG00000028634,protein_coding -14544,Edn2,ENSMUSG00000028635,protein_coding -14529,Ppcs,ENSMUSG00000028636,protein_coding -14526,Ccdc30,ENSMUSG00000028637,protein_coding -14523,Ybx1,ENSMUSG00000028639,protein_coding -7009,Tfap2c,ENSMUSG00000028640,protein_coding -14520,P3h1,ENSMUSG00000028641,protein_coding -14518,Tmem269,ENSMUSG00000028642,protein_coding -14516,Svbp,ENSMUSG00000028643,protein_coding -14515,Ermap,ENSMUSG00000028644,protein_coding -14510,Slc2a1,ENSMUSG00000028645,protein_coding -14640,Rragc,ENSMUSG00000028646,protein_coding -14638,Mycbp,ENSMUSG00000028647,protein_coding -14629,Ndufs5,ENSMUSG00000028648,protein_coding -14622,Macf1,ENSMUSG00000028649,protein_coding -14611,Ppie,ENSMUSG00000028651,protein_coding -14606,Trit1,ENSMUSG00000028653,protein_coding -14605,Mycl,ENSMUSG00000028654,protein_coding -14603,Mfsd2a,ENSMUSG00000028655,protein_coding -14601,Cap1,ENSMUSG00000028656,protein_coding -14600,Ppt1,ENSMUSG00000028657,protein_coding -15044,Epha8,ENSMUSG00000028661,protein_coding -15040,Ephb2,ENSMUSG00000028664,protein_coding -15016,Eloa,ENSMUSG00000028668,protein_coding -15014,Pithd1,ENSMUSG00000028669,protein_coding -15013,Lypla2,ENSMUSG00000028670,protein_coding -15012,Gale,ENSMUSG00000028671,protein_coding -15011,Hmgcl,ENSMUSG00000028672,protein_coding -15010,Fuca1,ENSMUSG00000028673,protein_coding -15007,Pnrc2,ENSMUSG00000028675,protein_coding -15006,Srsf10,ENSMUSG00000028676,protein_coding -14442,Rnf220,ENSMUSG00000028677,protein_coding -14435,Kif2c,ENSMUSG00000028678,protein_coding -14426,Plk3,ENSMUSG00000028680,protein_coding -14423,Ptch2,ENSMUSG00000028681,protein_coding -14421,Eif2b3,ENSMUSG00000028683,protein_coding -14417,Urod,ENSMUSG00000028684,protein_coding -14406,Mutyh,ENSMUSG00000028687,protein_coding -14405,Toe1,ENSMUSG00000028688,protein_coding -14402,Ccdc163,ENSMUSG00000028689,protein_coding -14401,Mmachc,ENSMUSG00000028690,protein_coding -14400,Prdx1,ENSMUSG00000028691,protein_coding -14398,Akr1a1,ENSMUSG00000028692,protein_coding -14397,Nasp,ENSMUSG00000028693,protein_coding -14391,Ipp,ENSMUSG00000028696,protein_coding -14388,Pik3r3,ENSMUSG00000028698,protein_coding -14383,Tspan1,ENSMUSG00000028699,protein_coding -14381,Pomgnt1,ENSMUSG00000028700,protein_coding -14382,Lurap1,ENSMUSG00000028701,protein_coding -14380,Rad54l,ENSMUSG00000028702,protein_coding -14379,Lrrc41,ENSMUSG00000028703,protein_coding -14376,Nsun4,ENSMUSG00000028706,protein_coding -14374,Dmbx1,ENSMUSG00000028707,protein_coding -14370,Mknk1,ENSMUSG00000028708,protein_coding -14369,Mob3c,ENSMUSG00000028709,protein_coding -14367,Atpaf1,ENSMUSG00000028710,protein_coding -14359,Cyp4a31,ENSMUSG00000028712,protein_coding -14362,Cyp4b1,ENSMUSG00000028713,protein_coding -14356,Cyp4a14,ENSMUSG00000028715,protein_coding -14342,Pdzk1ip1,ENSMUSG00000028716,protein_coding -14341,Tal1,ENSMUSG00000028717,protein_coding -14340,Stil,ENSMUSG00000028718,protein_coding -14338,Cmpk1,ENSMUSG00000028719,protein_coding -14483,Ebna1bp2,ENSMUSG00000028729,protein_coding -14482,Cfap57,ENSMUSG00000028730,protein_coding -15126,Pax7,ENSMUSG00000028736,protein_coding -15123,Aldh4a1,ENSMUSG00000028737,protein_coding -15125,Tas1r2,ENSMUSG00000028738,protein_coding -15116,Mrto4,ENSMUSG00000028741,protein_coding -15111,Akr7a5,ENSMUSG00000028743,protein_coding -15110,Pqlc2,ENSMUSG00000028744,protein_coding -15109,Capzb,ENSMUSG00000028745,protein_coding -15103,Htr6,ENSMUSG00000028747,protein_coding -15093,Pla2g2f,ENSMUSG00000028749,protein_coding -15092,Pla2g2c,ENSMUSG00000028750,protein_coding -15099,Pla2g2e,ENSMUSG00000028751,protein_coding -15089,Vwa5b1,ENSMUSG00000028753,protein_coding -15083,Cda,ENSMUSG00000028755,protein_coding -15080,Pink1,ENSMUSG00000028756,protein_coding -15079,Ddost,ENSMUSG00000028757,protein_coding -15078,Kif17,ENSMUSG00000028758,protein_coding -15076,Hp1bp3,ENSMUSG00000028759,protein_coding -15072,Eif4g3,ENSMUSG00000028760,protein_coding -15063,Hspg2,ENSMUSG00000028763,protein_coding -15069,Alpl,ENSMUSG00000028766,protein_coding -15917,Ptpn12,ENSMUSG00000028771,protein_coding -14818,Zcchc17,ENSMUSG00000028772,protein_coding -14817,Fabp3,ENSMUSG00000028773,protein_coding -14813,Tinagl1,ENSMUSG00000028776,protein_coding -15893,Gnat3,ENSMUSG00000028777,protein_coding -14812,Hcrtr1,ENSMUSG00000028778,protein_coding -14811,Pef1,ENSMUSG00000028779,protein_coding -15891,Sema3c,ENSMUSG00000028780,protein_coding -14808,Adgrb2,ENSMUSG00000028782,protein_coding -14806,Spocd1,ENSMUSG00000028784,protein_coding -14764,Hpca,ENSMUSG00000028785,protein_coding -14763,Tmem54,ENSMUSG00000028786,protein_coding -14801,Ptp4a2,ENSMUSG00000028788,protein_coding -14758,Azin2,ENSMUSG00000028789,protein_coding -14798,Khdrbs1,ENSMUSG00000028790,protein_coding -14760,Ak2,ENSMUSG00000028792,protein_coding -14762,Rnf19b,ENSMUSG00000028793,protein_coding -14753,A3galt2,ENSMUSG00000028794,protein_coding -14793,Ccdc28b,ENSMUSG00000028795,protein_coding -14752,Phc2,ENSMUSG00000028796,protein_coding -14790,Tmem234,ENSMUSG00000028797,protein_coding -14789,Eif3i,ENSMUSG00000028798,protein_coding -14754,Zfp362,ENSMUSG00000028799,protein_coding -14785,Hdac1,ENSMUSG00000028800,protein_coding -14995,Stpg1,ENSMUSG00000028801,protein_coding -14993,Nipal3,ENSMUSG00000028803,protein_coding -14745,Csmd2,ENSMUSG00000028804,protein_coding -14776,Zbtb8a,ENSMUSG00000028807,protein_coding -14986,Srrm1,ENSMUSG00000028809,protein_coding -14768,Yars,ENSMUSG00000028811,protein_coding -14744,CK137956,ENSMUSG00000028813,protein_coding -14722,Sfpq,ENSMUSG00000028820,protein_coding -14978,Syf2,ENSMUSG00000028821,protein_coding -14976,Tmem50a,ENSMUSG00000028822,protein_coding -14975,Rhd,ENSMUSG00000028825,protein_coding -14974,Maco1,ENSMUSG00000028826,protein_coding -14716,AU040320,ENSMUSG00000028830,protein_coding -14964,Stmn1,ENSMUSG00000028832,protein_coding -14715,Ncdn,ENSMUSG00000028833,protein_coding -14958,Trim63,ENSMUSG00000028834,protein_coding -14960,Slc30a2,ENSMUSG00000028836,protein_coding -14713,Psmb2,ENSMUSG00000028837,protein_coding -14961,Extl1,ENSMUSG00000028838,protein_coding -14953,Zfp593,ENSMUSG00000028840,protein_coding -14952,Cnksr1,ENSMUSG00000028841,protein_coding -14701,Ago3,ENSMUSG00000028842,protein_coding -14946,Sh3bgrl3,ENSMUSG00000028843,protein_coding -14699,Tekt2,ENSMUSG00000028845,protein_coding -14696,Trappc3,ENSMUSG00000028847,protein_coding -14924,Gpn2,ENSMUSG00000028848,protein_coding -14695,Map7d1,ENSMUSG00000028849,protein_coding -14923,Gpatch3,ENSMUSG00000028850,protein_coding -14920,Nudc,ENSMUSG00000028851,protein_coding -14914,Slc9a1,ENSMUSG00000028854,protein_coding -14911,Tmem222,ENSMUSG00000028857,protein_coding -14684,Csf3r,ENSMUSG00000028859,protein_coding -14910,Sytl1,ENSMUSG00000028860,protein_coding -14685,Mrps15,ENSMUSG00000028861,protein_coding -14909,Map3k6,ENSMUSG00000028862,protein_coding -14677,Meaf6,ENSMUSG00000028863,protein_coding -15879,Hgf,ENSMUSG00000028864,protein_coding -14908,Cd164l2,ENSMUSG00000028865,protein_coding -14904,Wasf2,ENSMUSG00000028868,protein_coding -14672,Gnl2,ENSMUSG00000028869,protein_coding -14671,Rspo1,ENSMUSG00000028871,protein_coding -14668,Cdca8,ENSMUSG00000028873,protein_coding -14901,Fgr,ENSMUSG00000028874,protein_coding -14666,Epha10,ENSMUSG00000028876,protein_coding -14899,Fam76a,ENSMUSG00000028878,protein_coding -14897,Stx12,ENSMUSG00000028879,protein_coding -14895,Ppp1r8,ENSMUSG00000028882,protein_coding -15823,Sema3a,ENSMUSG00000028883,protein_coding -14892,Rpa2,ENSMUSG00000028884,protein_coding -14891,Smpdl3b,ENSMUSG00000028885,protein_coding -14889,Eya3,ENSMUSG00000028886,protein_coding -14664,Yrdc,ENSMUSG00000028889,protein_coding -14661,Mtf1,ENSMUSG00000028890,protein_coding -14882,Sesn2,ENSMUSG00000028893,protein_coding -14659,Inpp5b,ENSMUSG00000028894,protein_coding -14873,Rcc1,ENSMUSG00000028896,protein_coding -14872,Trnau1ap,ENSMUSG00000028898,protein_coding -14864,Taf12,ENSMUSG00000028899,protein_coding -14859,Gmeb1,ENSMUSG00000028901,protein_coding -14654,Sf3a3,ENSMUSG00000028902,protein_coding -14848,Epb41,ENSMUSG00000028906,protein_coding -14651,Utp11,ENSMUSG00000028907,protein_coding -14840,Ptpru,ENSMUSG00000028909,protein_coding -14841,Mecr,ENSMUSG00000028910,protein_coding -14842,Srsf4,ENSMUSG00000028911,protein_coding -15192,Casp9,ENSMUSG00000028914,protein_coding -15187,Plekhm2,ENSMUSG00000028917,protein_coding -15167,Arhgef19,ENSMUSG00000028919,protein_coding -15164,Fbxo42,ENSMUSG00000028920,protein_coding -15159,Necap2,ENSMUSG00000028923,protein_coding -15675,Cdk14,ENSMUSG00000028926,protein_coding -15152,Padi2,ENSMUSG00000028927,protein_coding -15514,Kcnab2,ENSMUSG00000028931,protein_coding -15939,Psmc2,ENSMUSG00000028932,protein_coding -16056,Xrcc2,ENSMUSG00000028933,protein_coding -15511,Rpl22,ENSMUSG00000028936,protein_coding -15504,Acot7,ENSMUSG00000028937,protein_coding -16039,Galntl5,ENSMUSG00000028938,protein_coding -15503,Hes2,ENSMUSG00000028940,protein_coding -15501,Espn,ENSMUSG00000028943,protein_coding -16032,Prkag2,ENSMUSG00000028944,protein_coding -16030,Rheb,ENSMUSG00000028945,protein_coding -15508,Hes3,ENSMUSG00000028946,protein_coding -15498,Nol9,ENSMUSG00000028948,protein_coding -16023,Smarcd3,ENSMUSG00000028949,protein_coding -15497,Tas1r1,ENSMUSG00000028950,protein_coding -15496,Zbtb48,ENSMUSG00000028952,protein_coding -16020,Abcf2,ENSMUSG00000028953,protein_coding -16026,Nub1,ENSMUSG00000028954,protein_coding -15487,Vamp3,ENSMUSG00000028955,protein_coding -15486,Per3,ENSMUSG00000028957,protein_coding -16013,Tmub1,ENSMUSG00000028958,protein_coding -16011,Fastk,ENSMUSG00000028959,protein_coding -15432,Ube4b,ENSMUSG00000028960,protein_coding -15430,Pgd,ENSMUSG00000028961,protein_coding -16010,Slc4a2,ENSMUSG00000028962,protein_coding -15485,Uts2,ENSMUSG00000028963,protein_coding -15481,Park7,ENSMUSG00000028964,protein_coding -15482,Tnfrsf9,ENSMUSG00000028965,protein_coding -15480,Errfi1,ENSMUSG00000028967,protein_coding -16009,Cdk5,ENSMUSG00000028969,protein_coding -15736,Abcb1b,ENSMUSG00000028970,protein_coding -15427,Cort,ENSMUSG00000028971,protein_coding -15468,Car6,ENSMUSG00000028972,protein_coding -16007,Abcb8,ENSMUSG00000028973,protein_coding -15426,Dffa,ENSMUSG00000028974,protein_coding -15424,Pex14,ENSMUSG00000028975,protein_coding -15466,Slc2a5,ENSMUSG00000028976,protein_coding -15420,Casz1,ENSMUSG00000028977,protein_coding -16004,Nos3,ENSMUSG00000028978,protein_coding -15416,Masp2,ENSMUSG00000028979,protein_coding -15460,H6pd,ENSMUSG00000028980,protein_coding -15450,Slc25a33,ENSMUSG00000028982,protein_coding -15994,Klhl7,ENSMUSG00000028986,protein_coding -15443,Ctnnbip1,ENSMUSG00000028988,protein_coding -15409,Angptl7,ENSMUSG00000028989,protein_coding -15440,Lzic,ENSMUSG00000028990,protein_coding -15408,Mtor,ENSMUSG00000028991,protein_coding -15438,Nmnat1,ENSMUSG00000028992,protein_coding -15991,Fam126a,ENSMUSG00000028995,protein_coding -15436,Rbp7,ENSMUSG00000028996,protein_coding -15988,Tomm7,ENSMUSG00000028998,protein_coding -15984,Rint1,ENSMUSG00000028999,protein_coding -15401,Fbxo44,ENSMUSG00000029001,protein_coding -15398,Mad2l2,ENSMUSG00000029003,protein_coding -15976,Kmt2e,ENSMUSG00000029004,protein_coding -15397,Draxin,ENSMUSG00000029005,protein_coding -15396,Agtrap,ENSMUSG00000029007,protein_coding -15393,Mthfr,ENSMUSG00000029009,protein_coding -15953,Orc5,ENSMUSG00000029012,protein_coding -15938,Dnajc2,ENSMUSG00000029014,protein_coding -15940,Slc26a5,ENSMUSG00000029015,protein_coding -15391,Clcn6,ENSMUSG00000029016,protein_coding -15936,Pmpcb,ENSMUSG00000029017,protein_coding -15388,Nppb,ENSMUSG00000029019,protein_coding -15384,Mfn2,ENSMUSG00000029020,protein_coding -15382,Miip,ENSMUSG00000029022,protein_coding -15534,Trp73,ENSMUSG00000029026,protein_coding -15527,Dffb,ENSMUSG00000029027,protein_coding -15529,Lrrc47,ENSMUSG00000029028,protein_coding -15537,Wrap73,ENSMUSG00000029029,protein_coding -15538,Tprgl,ENSMUSG00000029030,protein_coding -15541,Arhgef16,ENSMUSG00000029032,protein_coding -15603,Acap3,ENSMUSG00000029033,protein_coding -15601,Ints11,ENSMUSG00000029034,protein_coding -15586,Atad3a,ENSMUSG00000029036,protein_coding -15582,Ssu72,ENSMUSG00000029038,protein_coding -15560,Pex10,ENSMUSG00000029047,protein_coding -15561,Rer1,ENSMUSG00000029048,protein_coding -15562,Morn1,ENSMUSG00000029049,protein_coding -15563,Ski,ENSMUSG00000029050,protein_coding -15565,Prkcz,ENSMUSG00000029053,protein_coding -15567,Gabrd,ENSMUSG00000029054,protein_coding -15558,Plch2,ENSMUSG00000029055,protein_coding -15557,Pank4,ENSMUSG00000029056,protein_coding -15553,Prxl2b,ENSMUSG00000029059,protein_coding -15580,Mib2,ENSMUSG00000029060,protein_coding -15579,Mmp23,ENSMUSG00000029061,protein_coding -15578,Cdk11b,ENSMUSG00000029062,protein_coding -15573,Nadk,ENSMUSG00000029063,protein_coding -15571,Gnb1,ENSMUSG00000029064,protein_coding -15593,Mrpl20,ENSMUSG00000029066,protein_coding -15594,Ccnl2,ENSMUSG00000029068,protein_coding -15597,Mxra8,ENSMUSG00000029070,protein_coding -15598,Dvl1,ENSMUSG00000029071,protein_coding -15599,Tas1r3,ENSMUSG00000029072,protein_coding -15600,Cptp,ENSMUSG00000029073,protein_coding -15614,Ttll10,ENSMUSG00000029074,protein_coding -15609,Tnfrsf4,ENSMUSG00000029075,protein_coding -15608,Sdf4,ENSMUSG00000029076,protein_coding -16440,Bst1,ENSMUSG00000029082,protein_coding -16441,Cd38,ENSMUSG00000029084,protein_coding -16450,Prom1,ENSMUSG00000029086,protein_coding -16521,Kcnip4,ENSMUSG00000029088,protein_coding -16520,5730480H06Rik,ENSMUSG00000029089,protein_coding -16555,Adgra3,ENSMUSG00000029090,protein_coding -16333,Sorcs2,ENSMUSG00000029093,protein_coding -16331,Afap1,ENSMUSG00000029094,protein_coding -16330,Ablim2,ENSMUSG00000029095,protein_coding -16327,Htra3,ENSMUSG00000029096,protein_coding -16324,Trmt44,ENSMUSG00000029097,protein_coding -16326,Acox3,ENSMUSG00000029098,protein_coding -16309,Rgs12,ENSMUSG00000029101,protein_coding -16312,Hgfac,ENSMUSG00000029102,protein_coding -16314,Lrpap1,ENSMUSG00000029103,protein_coding -16307,Htt,ENSMUSG00000029104,protein_coding -16298,Add1,ENSMUSG00000029106,protein_coding -16651,Pcdh7,ENSMUSG00000029108,protein_coding -16290,Rnf4,ENSMUSG00000029110,protein_coding -16269,Nelfa,ENSMUSG00000029111,protein_coding -16254,Nkx1-1,ENSMUSG00000029112,protein_coding -16350,Man2b2,ENSMUSG00000029119,protein_coding -16351,Ppp2r2c,ENSMUSG00000029120,protein_coding -16360,Crmp1,ENSMUSG00000029121,protein_coding -16361,Evc,ENSMUSG00000029122,protein_coding -16365,Stk32b,ENSMUSG00000029123,protein_coding -16372,Stx18,ENSMUSG00000029125,protein_coding -16374,Nsg1,ENSMUSG00000029126,protein_coding -16375,Zbtb49,ENSMUSG00000029127,protein_coding -16410,Rab28,ENSMUSG00000029128,protein_coding -16105,Rnf32,ENSMUSG00000029130,protein_coding -16120,Dnajb6,ENSMUSG00000029131,protein_coding -16215,Plb1,ENSMUSG00000029134,protein_coding -16212,Fosl2,ENSMUSG00000029135,protein_coding -16204,Rbks,ENSMUSG00000029136,protein_coding -16194,4930548H24Rik,ENSMUSG00000029138,protein_coding -16198,Slc4a1ap,ENSMUSG00000029141,protein_coding -16178,Eif2b4,ENSMUSG00000029145,protein_coding -16179,Snx17,ENSMUSG00000029146,protein_coding -16181,Ppm1g,ENSMUSG00000029147,protein_coding -16184,Nrbp1,ENSMUSG00000029148,protein_coding -16185,Krtcap3,ENSMUSG00000029149,protein_coding -16169,Slc30a3,ENSMUSG00000029151,protein_coding -16919,Ociad1,ENSMUSG00000029152,protein_coding -16922,Ociad2,ENSMUSG00000029153,protein_coding -16925,Cwh43,ENSMUSG00000029154,protein_coding -16932,Spata18,ENSMUSG00000029155,protein_coding -16930,Sgcb,ENSMUSG00000029156,protein_coding -16863,Yipf7,ENSMUSG00000029158,protein_coding -16162,Cgref1,ENSMUSG00000029161,protein_coding -16161,Khk,ENSMUSG00000029162,protein_coding -16160,Emilin1,ENSMUSG00000029163,protein_coding -16157,Agbl5,ENSMUSG00000029165,protein_coding -16152,Mapre3,ENSMUSG00000029166,protein_coding -16569,Ppargc1a,ENSMUSG00000029167,protein_coding -16150,Dpysl5,ENSMUSG00000029168,protein_coding -16582,Dhx15,ENSMUSG00000029169,protein_coding -16714,Pgm2,ENSMUSG00000029171,protein_coding -16597,Sepsecs,ENSMUSG00000029173,protein_coding -16718,Tbc1d1,ENSMUSG00000029174,protein_coding -16147,Slc35f6,ENSMUSG00000029175,protein_coding -16600,Anapc4,ENSMUSG00000029176,protein_coding -16149,Cenpa,ENSMUSG00000029177,protein_coding -16738,Klf3,ENSMUSG00000029178,protein_coding -16599,Zcchc4,ENSMUSG00000029179,protein_coding -16141,1700001C02Rik,ENSMUSG00000029182,protein_coding -50088,Olfr109,ENSMUSG00000029184,protein_coding -16742,Fam114a1,ENSMUSG00000029185,protein_coding -16598,Pi4k2b,ENSMUSG00000029186,protein_coding -16605,Slc34a2,ENSMUSG00000029188,protein_coding -16607,Sel1l3,ENSMUSG00000029189,protein_coding -16342,D5Ertd579e,ENSMUSG00000029190,protein_coding -16749,Rfc1,ENSMUSG00000029191,protein_coding -16340,Tbc1d14,ENSMUSG00000029192,protein_coding -16617,Cckar,ENSMUSG00000029193,protein_coding -16751,Klb,ENSMUSG00000029195,protein_coding -16338,Tada2b,ENSMUSG00000029196,protein_coding -16337,Grpel1,ENSMUSG00000029198,protein_coding -16753,Lias,ENSMUSG00000029199,protein_coding -16756,Ugdh,ENSMUSG00000029201,protein_coding -16766,Pds5a,ENSMUSG00000029202,protein_coding -16763,Ube2k,ENSMUSG00000029203,protein_coding -16779,Rhoh,ENSMUSG00000029204,protein_coding -16781,Chrna9,ENSMUSG00000029205,protein_coding -16795,Nsun7,ENSMUSG00000029206,protein_coding -16796,Apbb2,ENSMUSG00000029207,protein_coding -16865,Guf1,ENSMUSG00000029208,protein_coding -16866,Gnpda2,ENSMUSG00000029209,protein_coding -16877,Gabra4,ENSMUSG00000029211,protein_coding -16878,Gabrb1,ENSMUSG00000029212,protein_coding -16882,Commd8,ENSMUSG00000029213,protein_coding -16902,Tec,ENSMUSG00000029217,protein_coding -16908,Slc10a4,ENSMUSG00000029219,protein_coding -16822,Slc30a9,ENSMUSG00000029221,protein_coding -16807,Uchl1,ENSMUSG00000029223,protein_coding -16951,Fip1l1,ENSMUSG00000029227,protein_coding -16953,Lnx1,ENSMUSG00000029228,protein_coding -16960,Chic2,ENSMUSG00000029229,protein_coding -16967,Pdgfra,ENSMUSG00000029231,protein_coding -16988,Srd5a3,ENSMUSG00000029233,protein_coding -16989,Tmem165,ENSMUSG00000029234,protein_coding -16995,Pdcl2,ENSMUSG00000029235,protein_coding -16997,Nmu,ENSMUSG00000029236,protein_coding -16991,Clock,ENSMUSG00000029238,protein_coding -17063,Epha5,ENSMUSG00000029245,protein_coding -17007,Ppat,ENSMUSG00000029246,protein_coding -17009,Paics,ENSMUSG00000029247,protein_coding -17013,Thegl,ENSMUSG00000029248,protein_coding -17020,Rest,ENSMUSG00000029249,protein_coding -17023,Polr2b,ENSMUSG00000029250,protein_coding -17071,Cenpc1,ENSMUSG00000029253,protein_coding -17073,Stap1,ENSMUSG00000029254,protein_coding -17076,Gnrhr,ENSMUSG00000029255,protein_coding -17091,Ugt2b34,ENSMUSG00000029260,protein_coding -17585,Pigg,ENSMUSG00000029263,protein_coding -17583,Dr1,ENSMUSG00000029265,protein_coding -17577,Mtf2,ENSMUSG00000029267,protein_coding -17117,Ugt2a2,ENSMUSG00000029268,protein_coding -17120,Sult1b1,ENSMUSG00000029269,protein_coding -17574,Dipk1a,ENSMUSG00000029270,protein_coding -17122,Sult1e1,ENSMUSG00000029272,protein_coding -17121,Sult1d1,ENSMUSG00000029273,protein_coding -17563,Gfi1,ENSMUSG00000029275,protein_coding -17557,Glmn,ENSMUSG00000029276,protein_coding -17546,Brdt,ENSMUSG00000029279,protein_coding -17144,Smr3a,ENSMUSG00000029280,protein_coding -17147,Smr2,ENSMUSG00000029281,protein_coding -17156,Amtn,ENSMUSG00000029282,protein_coding -17538,Cdc7,ENSMUSG00000029283,protein_coding -17160,Enam,ENSMUSG00000029286,protein_coding -17540,Tgfbr3,ENSMUSG00000029287,protein_coding -17159,Ambn,ENSMUSG00000029288,protein_coding -17512,Zfp326,ENSMUSG00000029290,protein_coding -17164,Rufy3,ENSMUSG00000029291,protein_coding -17495,Gbp9,ENSMUSG00000029298,protein_coding -17489,Abcg3,ENSMUSG00000029299,protein_coding -17482,Spp1,ENSMUSG00000029304,protein_coding -17480,Ibsp,ENSMUSG00000029306,protein_coding -17477,Dmp1,ENSMUSG00000029307,protein_coding -17474,Sparcl1,ENSMUSG00000029309,protein_coding -17470,Nudt9,ENSMUSG00000029310,protein_coding -17468,Hsd17b11,ENSMUSG00000029311,protein_coding -17463,Klhl8,ENSMUSG00000029312,protein_coding -17462,Aff1,ENSMUSG00000029313,protein_coding -17424,Gpat3,ENSMUSG00000029314,protein_coding -17413,Coq2,ENSMUSG00000029319,protein_coding -17460,1700016H13Rik,ENSMUSG00000029320,protein_coding -17457,Slc10a6,ENSMUSG00000029321,protein_coding -17411,Plac8,ENSMUSG00000029322,protein_coding -17392,Enoph1,ENSMUSG00000029326,protein_coding -17391,Hnrnpdl,ENSMUSG00000029328,protein_coding -17433,Cds1,ENSMUSG00000029330,protein_coding -17372,Prkg2,ENSMUSG00000029334,protein_coding -17371,Bmp3,ENSMUSG00000029335,protein_coding -17363,Fgf5,ENSMUSG00000029337,protein_coding -17355,Antxr2,ENSMUSG00000029338,protein_coding -17718,Crybb1,ENSMUSG00000029343,protein_coding -17719,Tpst2,ENSMUSG00000029344,protein_coding -17720,Tfip11,ENSMUSG00000029345,protein_coding -17721,Srrd,ENSMUSG00000029346,protein_coding -17727,Asphd2,ENSMUSG00000029348,protein_coding -17741,Crybb3,ENSMUSG00000029352,protein_coding -17908,Tesc,ENSMUSG00000029359,protein_coding -17902,Nos1,ENSMUSG00000029361,protein_coding -17898,Rfc5,ENSMUSG00000029363,protein_coding -17897,Wsb2,ENSMUSG00000029364,protein_coding -17168,Dck,ENSMUSG00000029366,protein_coding -17185,Alb,ENSMUSG00000029368,protein_coding -17188,Afm,ENSMUSG00000029369,protein_coding -17192,Rassf6,ENSMUSG00000029370,protein_coding -17194,Cxcl5,ENSMUSG00000029371,protein_coding -17195,Ppbp,ENSMUSG00000029372,protein_coding -17196,Pf4,ENSMUSG00000029373,protein_coding -17198,Cxcl15,ENSMUSG00000029375,protein_coding -17206,Mthfd2l,ENSMUSG00000029376,protein_coding -17209,Ereg,ENSMUSG00000029377,protein_coding -17210,Areg,ENSMUSG00000029378,protein_coding -17197,Cxcl3,ENSMUSG00000029379,protein_coding -17202,Cxcl1,ENSMUSG00000029380,protein_coding -17255,Shroom3,ENSMUSG00000029381,protein_coding -17270,2010109A12Rik,ENSMUSG00000029384,protein_coding -17271,Ccng2,ENSMUSG00000029385,protein_coding -18143,Gtf2h3,ENSMUSG00000029387,protein_coding -18142,Eif2b1,ENSMUSG00000029388,protein_coding -18141,Ddx55,ENSMUSG00000029389,protein_coding -18140,Tmed2,ENSMUSG00000029390,protein_coding -18138,Rilpl1,ENSMUSG00000029392,protein_coding -18130,Cdk2ap1,ENSMUSG00000029394,protein_coding -17223,Rchy1,ENSMUSG00000029397,protein_coding -18135,Rilpl2,ENSMUSG00000029401,protein_coding -18137,Snrnp35,ENSMUSG00000029402,protein_coding -17228,Cdkl2,ENSMUSG00000029403,protein_coding -18120,Arl6ip4,ENSMUSG00000029404,protein_coding -17230,G3bp2,ENSMUSG00000029405,protein_coding -18121,Pitpnm2,ENSMUSG00000029406,protein_coding -17233,Uso1,ENSMUSG00000029407,protein_coding -18117,Abcb9,ENSMUSG00000029408,protein_coding -17238,Ppef2,ENSMUSG00000029410,protein_coding -17239,Naaa,ENSMUSG00000029413,protein_coding -18105,Kntc1,ENSMUSG00000029414,protein_coding -17240,Sdad1,ENSMUSG00000029415,protein_coding -18191,Slc15a4,ENSMUSG00000029416,protein_coding -17243,Cxcl9,ENSMUSG00000029417,protein_coding -3978,Ajm1,ENSMUSG00000029419,protein_coding -18209,Rimbp2,ENSMUSG00000029420,protein_coding -18104,Rsrc2,ENSMUSG00000029422,protein_coding -18207,Piwil1,ENSMUSG00000029423,protein_coding -17250,Scarb2,ENSMUSG00000029426,protein_coding -18103,Zcchc8,ENSMUSG00000029427,protein_coding -18212,Stx2,ENSMUSG00000029428,protein_coding -18213,Ran,ENSMUSG00000029430,protein_coding -18093,B3gnt4,ENSMUSG00000029431,protein_coding -18225,Nipsnap2,ENSMUSG00000029432,protein_coding -18092,Diablo,ENSMUSG00000029433,protein_coding -18097,Vps33a,ENSMUSG00000029434,protein_coding -18220,Mmp17,ENSMUSG00000029436,protein_coding -18089,Il31,ENSMUSG00000029437,protein_coding -18080,Bcl7a,ENSMUSG00000029438,protein_coding -18218,Sfswap,ENSMUSG00000029439,protein_coding -18073,Psmd9,ENSMUSG00000029440,protein_coding -18076,Wdr66,ENSMUSG00000029442,protein_coding -18074,Hpd,ENSMUSG00000029445,protein_coding -18227,Psph,ENSMUSG00000029446,protein_coding -18228,Cct6a,ENSMUSG00000029447,protein_coding -18067,Rhof,ENSMUSG00000029449,protein_coding -15244,Gm13103,ENSMUSG00000029451,protein_coding -17997,Tmem116,ENSMUSG00000029452,protein_coding -18003,Mapkapk5,ENSMUSG00000029454,protein_coding -18005,Aldh2,ENSMUSG00000029455,protein_coding -18008,Acad10,ENSMUSG00000029456,protein_coding -18009,Brap,ENSMUSG00000029458,protein_coding -25332,Fam168a,ENSMUSG00000029461,protein_coding -18032,Vps29,ENSMUSG00000029462,protein_coding -18033,Fam216a,ENSMUSG00000029463,protein_coding -18034,Gpn3,ENSMUSG00000029464,protein_coding -18036,Arpc3,ENSMUSG00000029465,protein_coding -18038,Anapc7,ENSMUSG00000029466,protein_coding -18040,Atp2a2,ENSMUSG00000029467,protein_coding -18048,P2rx7,ENSMUSG00000029468,protein_coding -18045,Ift81,ENSMUSG00000029469,protein_coding -18051,P2rx4,ENSMUSG00000029470,protein_coding -18052,Camkk2,ENSMUSG00000029471,protein_coding -18054,Anapc5,ENSMUSG00000029472,protein_coding -18055,Rnf34,ENSMUSG00000029474,protein_coding -18056,Kdm2b,ENSMUSG00000029475,protein_coding -18062,Morn3,ENSMUSG00000029477,protein_coding -18153,Ncor2,ENSMUSG00000029478,protein_coding -18162,Dhx37,ENSMUSG00000029480,protein_coding -18166,Aacs,ENSMUSG00000029482,protein_coding -17333,Anxa3,ENSMUSG00000029484,protein_coding -17328,Mrpl1,ENSMUSG00000029486,protein_coding -17592,Mfsd7a,ENSMUSG00000029490,protein_coding -17590,Pde6b,ENSMUSG00000029491,protein_coding -17659,Pxmp2,ENSMUSG00000029499,protein_coding -17656,Pgam5,ENSMUSG00000029500,protein_coding -17655,Ankle2,ENSMUSG00000029501,protein_coding -17652,Golga3,ENSMUSG00000029502,protein_coding -17664,P2rx2,ENSMUSG00000029503,protein_coding -17673,Ddx51,ENSMUSG00000029504,protein_coding -17675,Ep400,ENSMUSG00000029505,protein_coding -17681,Pus1,ENSMUSG00000029507,protein_coding -18510,Gpc2,ENSMUSG00000029510,protein_coding -17683,Ulk1,ENSMUSG00000029512,protein_coding -17861,Prkab1,ENSMUSG00000029513,protein_coding -17857,Cit,ENSMUSG00000029516,protein_coding -19095,Ankrd7,ENSMUSG00000029517,protein_coding -17853,Rab35,ENSMUSG00000029518,protein_coding -17685,Chek2,ENSMUSG00000029521,protein_coding -17842,Pla2g1b,ENSMUSG00000029522,protein_coding -17843,Sirt4,ENSMUSG00000029524,protein_coding -18522,1700123K08Rik,ENSMUSG00000029526,protein_coding -17847,Pxn,ENSMUSG00000029528,protein_coding -37271,Ccr9,ENSMUSG00000029530,protein_coding -19062,St7,ENSMUSG00000029534,protein_coding -17833,Triap1,ENSMUSG00000029535,protein_coding -17832,Gatc,ENSMUSG00000029536,protein_coding -17831,Srsf9,ENSMUSG00000029538,protein_coding -18542,Cyp2w1,ENSMUSG00000029541,protein_coding -17822,Cabp1,ENSMUSG00000029544,protein_coding -17819,Acads,ENSMUSG00000029545,protein_coding -18551,Uncx,ENSMUSG00000029546,protein_coding -18554,Ints1,ENSMUSG00000029547,protein_coding -17814,Sppl3,ENSMUSG00000029550,protein_coding -18559,Psmg3,ENSMUSG00000029551,protein_coding -19053,Tes,ENSMUSG00000029552,protein_coding -19049,Tfec,ENSMUSG00000029553,protein_coding -18564,Mad1l1,ENSMUSG00000029554,protein_coding -17810,Hnf1a,ENSMUSG00000029556,protein_coding -18572,Mrm2,ENSMUSG00000029557,protein_coding -17809,2210016L21Rik,ENSMUSG00000029559,protein_coding -18573,Snx8,ENSMUSG00000029560,protein_coding -17806,Oasl2,ENSMUSG00000029561,protein_coding -19037,Foxp2,ENSMUSG00000029563,protein_coding -17803,4930519G04Rik,ENSMUSG00000029564,protein_coding -19024,Tmem168,ENSMUSG00000029569,protein_coding -18583,Lfng,ENSMUSG00000029570,protein_coding -19018,Tmem106b,ENSMUSG00000029571,protein_coding -17787,Mmab,ENSMUSG00000029575,protein_coding -18603,Radil,ENSMUSG00000029576,protein_coding -17785,Ube3b,ENSMUSG00000029577,protein_coding -18608,Wipi2,ENSMUSG00000029578,protein_coding -18614,Actb,ENSMUSG00000029580,protein_coding -18615,Fscn1,ENSMUSG00000029581,protein_coding -18624,Spdye4b,ENSMUSG00000029586,protein_coding -18627,Zfp12,ENSMUSG00000029587,protein_coding -17779,Ung,ENSMUSG00000029591,protein_coding -17775,Usp30,ENSMUSG00000029592,protein_coding -17947,Rbm19,ENSMUSG00000029594,protein_coding -17953,Lhx5,ENSMUSG00000029595,protein_coding -17954,Sdsl,ENSMUSG00000029596,protein_coding -17955,Sds,ENSMUSG00000029597,protein_coding -17957,Plbd2,ENSMUSG00000029598,protein_coding -17963,Ddx54,ENSMUSG00000029599,protein_coding -17962,Rita1,ENSMUSG00000029600,protein_coding -17961,Iqcd,ENSMUSG00000029601,protein_coding -17968,Rasal1,ENSMUSG00000029602,protein_coding -17969,Dtx1,ENSMUSG00000029603,protein_coding -18654,Ankrd61,ENSMUSG00000029607,protein_coding -17983,Rph3a,ENSMUSG00000029608,protein_coding -18655,Aimp2,ENSMUSG00000029610,protein_coding -18653,Eif2ak1,ENSMUSG00000029613,protein_coding -17987,Rpl6,ENSMUSG00000029614,protein_coding -17996,Erp29,ENSMUSG00000029616,protein_coding -18660,Ccz1,ENSMUSG00000029617,protein_coding -18662,Ocm,ENSMUSG00000029618,protein_coding -18687,1700018F24Rik,ENSMUSG00000029620,protein_coding -18689,Arpc1a,ENSMUSG00000029621,protein_coding -18690,Arpc1b,ENSMUSG00000029622,protein_coding -18691,Pdap1,ENSMUSG00000029623,protein_coding -18693,Ptcd1,ENSMUSG00000029624,protein_coding -18694,Cpsf4,ENSMUSG00000029625,protein_coding -18696,Zkscan14,ENSMUSG00000029627,protein_coding -19008,Phf14,ENSMUSG00000029629,protein_coding -18719,Cyp3a25,ENSMUSG00000029630,protein_coding -19007,Ndufa4,ENSMUSG00000029632,protein_coding -18730,Rnf6,ENSMUSG00000029634,protein_coding -18731,Cdk8,ENSMUSG00000029635,protein_coding -18733,Wasf3,ENSMUSG00000029636,protein_coding -18993,Glcci1,ENSMUSG00000029638,protein_coding -18745,Usp12,ENSMUSG00000029640,protein_coding -18750,Rasl11a,ENSMUSG00000029641,protein_coding -18754,Polr1d,ENSMUSG00000029642,protein_coding -18759,Pdx1,ENSMUSG00000029644,protein_coding -18762,Cdx2,ENSMUSG00000029646,protein_coding -18772,Pan3,ENSMUSG00000029647,protein_coding -18774,Flt1,ENSMUSG00000029648,protein_coding -18777,Pomp,ENSMUSG00000029649,protein_coding -18779,Slc46a3,ENSMUSG00000029650,protein_coding -18781,Mtus2,ENSMUSG00000029651,protein_coding -18860,N4bp2l2,ENSMUSG00000029655,protein_coding -14164,C8b,ENSMUSG00000029656,protein_coding -18831,Hsph1,ENSMUSG00000029657,protein_coding -18830,Wdr95,ENSMUSG00000029658,protein_coding -18823,Medag,ENSMUSG00000029659,protein_coding -18826,Tex26,ENSMUSG00000029660,protein_coding -18920,Col1a2,ENSMUSG00000029661,protein_coding -18908,Gngt1,ENSMUSG00000029663,protein_coding -18907,Tfpi2,ENSMUSG00000029664,protein_coding -19114,Tspan12,ENSMUSG00000029669,protein_coding -19117,Ing3,ENSMUSG00000029670,protein_coding -19121,Wnt16,ENSMUSG00000029671,protein_coding -19122,Fam3c,ENSMUSG00000029672,protein_coding -18265,Auts2,ENSMUSG00000029673,protein_coding -18328,Limk1,ENSMUSG00000029674,protein_coding -18330,Eln,ENSMUSG00000029675,protein_coding -19165,Pot1a,ENSMUSG00000029676,protein_coding -19158,Hyal5,ENSMUSG00000029678,protein_coding -19154,Hyal6,ENSMUSG00000029679,protein_coding -19155,Hyal4,ENSMUSG00000029680,protein_coding -18354,Bcl7b,ENSMUSG00000029681,protein_coding -19156,Spam1,ENSMUSG00000029682,protein_coding -19151,Lmod2,ENSMUSG00000029683,protein_coding -19152,Wasl,ENSMUSG00000029684,protein_coding -19149,Asb15,ENSMUSG00000029685,protein_coding -19623,Cul1,ENSMUSG00000029686,protein_coding -19625,Ezh2,ENSMUSG00000029687,protein_coding -19131,Aass,ENSMUSG00000029695,protein_coding -19136,Fezf1,ENSMUSG00000029697,protein_coding -18386,Ssc4d,ENSMUSG00000029699,protein_coding -19144,Slc13a1,ENSMUSG00000029700,protein_coding -19194,Rbm28,ENSMUSG00000029701,protein_coding -18397,Lrwd1,ENSMUSG00000029703,protein_coding -18403,Cux1,ENSMUSG00000029705,protein_coding -19183,Pax4,ENSMUSG00000029706,protein_coding -19182,Fscn3,ENSMUSG00000029707,protein_coding -19180,Gcc1,ENSMUSG00000029708,protein_coding -18440,Ephb4,ENSMUSG00000029710,protein_coding -18443,Epo,ENSMUSG00000029711,protein_coding -18451,Actl6b,ENSMUSG00000029712,protein_coding -18448,Gnb2,ENSMUSG00000029713,protein_coding -18447,Gigyf1,ENSMUSG00000029714,protein_coding -18444,Pop7,ENSMUSG00000029715,protein_coding -18452,Tfr2,ENSMUSG00000029716,protein_coding -18455,Pcolce,ENSMUSG00000029718,protein_coding -18460,Gm20605,ENSMUSG00000029720,protein_coding -18463,Agfg2,ENSMUSG00000029722,protein_coding -18467,Tsc22d4,ENSMUSG00000029723,protein_coding -18468,Ppp1r35,ENSMUSG00000029725,protein_coding -18469,Mepce,ENSMUSG00000029726,protein_coding -18478,Cyp3a13,ENSMUSG00000029727,protein_coding -18488,Zkscan1,ENSMUSG00000029729,protein_coding -18493,Mcm7,ENSMUSG00000029730,protein_coding -19583,Tpk1,ENSMUSG00000029735,protein_coding -19582,Nobox,ENSMUSG00000029736,protein_coding -18974,Asns,ENSMUSG00000029752,protein_coding -18960,Dlx6,ENSMUSG00000029754,protein_coding -18963,Dlx5,ENSMUSG00000029755,protein_coding -18943,Dync1i1,ENSMUSG00000029757,protein_coding -18938,Pon3,ENSMUSG00000029759,protein_coding -19312,Cald1,ENSMUSG00000029761,protein_coding -19304,Akr1b8,ENSMUSG00000029762,protein_coding -19293,Exoc4,ENSMUSG00000029763,protein_coding -19282,Plxna4,ENSMUSG00000029765,protein_coding -19280,1700012A03Rik,ENSMUSG00000029766,protein_coding -19203,Calu,ENSMUSG00000029767,protein_coding -19205,Ccdc136,ENSMUSG00000029769,protein_coding -19214,Irf5,ENSMUSG00000029771,protein_coding -19220,Ahcyl2,ENSMUSG00000029772,protein_coding -19239,Klhdc10,ENSMUSG00000029775,protein_coding -19798,Hibadh,ENSMUSG00000029776,protein_coding -19840,Gars,ENSMUSG00000029777,protein_coding -19850,Adcyap1r1,ENSMUSG00000029778,protein_coding -19866,Nt5c3,ENSMUSG00000029780,protein_coding -19865,Fkbp9,ENSMUSG00000029781,protein_coding -19241,Tmem209,ENSMUSG00000029782,protein_coding -19242,Ssmem1,ENSMUSG00000029784,protein_coding -19862,Avl9,ENSMUSG00000029787,protein_coding -19246,Cpa5,ENSMUSG00000029788,protein_coding -19248,Cep41,ENSMUSG00000029790,protein_coding -19655,Sspo,ENSMUSG00000029797,protein_coding -19894,Herc6,ENSMUSG00000029798,protein_coding -19936,Abcg2,ENSMUSG00000029802,protein_coding -19938,Herc3,ENSMUSG00000029804,protein_coding -19685,Tmem176b,ENSMUSG00000029810,protein_coding -19689,Aoc1,ENSMUSG00000029811,protein_coding -19690,1600015I10Rik,ENSMUSG00000029813,protein_coding -19704,Igf2bp3,ENSMUSG00000029814,protein_coding -19702,Malsu1,ENSMUSG00000029815,protein_coding -19700,Gpnmb,ENSMUSG00000029816,protein_coding -19706,Tra2a,ENSMUSG00000029817,protein_coding -19715,Npy,ENSMUSG00000029819,protein_coding -19719,Gsdme,ENSMUSG00000029821,protein_coding -19720,Osbpl3,ENSMUSG00000029822,protein_coding -19372,Luc7l2,ENSMUSG00000029823,protein_coding -19363,Zc3hav1,ENSMUSG00000029826,protein_coding -19725,4921507P07Rik,ENSMUSG00000029828,protein_coding -19358,Tmem213,ENSMUSG00000029829,protein_coding -19356,Svopl,ENSMUSG00000029830,protein_coding -19732,Npvf,ENSMUSG00000029831,protein_coding -19744,Nfe2l3,ENSMUSG00000029832,protein_coding -19355,Trim24,ENSMUSG00000029833,protein_coding -19746,Cbx3,ENSMUSG00000029836,protein_coding -19345,Ptn,ENSMUSG00000029838,protein_coding -19333,Mtpn,ENSMUSG00000029840,protein_coding -19330,Slc13a4,ENSMUSG00000029843,protein_coding -19755,Hoxa1,ENSMUSG00000029844,protein_coding -19325,Slc23a4,ENSMUSG00000029847,protein_coding -19324,Stra8,ENSMUSG00000029848,protein_coding -19545,Tcaf2,ENSMUSG00000029851,protein_coding -19533,Epha1,ENSMUSG00000029859,protein_coding -19532,Zyx,ENSMUSG00000029860,protein_coding -19529,Fam131b,ENSMUSG00000029861,protein_coding -19528,Clcn1,ENSMUSG00000029862,protein_coding -19526,Casp2,ENSMUSG00000029863,protein_coding -19524,Gstk1,ENSMUSG00000029864,protein_coding -19518,Sval1,ENSMUSG00000029865,protein_coding -19513,Kel,ENSMUSG00000029866,protein_coding -19512,Llcfc1,ENSMUSG00000029867,protein_coding -19510,Trpv6,ENSMUSG00000029868,protein_coding -19509,Ephb6,ENSMUSG00000029869,protein_coding -46842,Ccdc184,ENSMUSG00000029875,protein_coding -19437,2210010C04Rik,ENSMUSG00000029882,protein_coding -19429,1700074P13Rik,ENSMUSG00000029883,protein_coding -19425,Moxd2,ENSMUSG00000029885,protein_coding -19417,Prss37,ENSMUSG00000029909,protein_coding -20024,Mad2l1,ENSMUSG00000029910,protein_coding -19414,Ssbp1,ENSMUSG00000029911,protein_coding -20010,Prdm5,ENSMUSG00000029913,protein_coding -19421,Clec5a,ENSMUSG00000029915,protein_coding -19411,Agk,ENSMUSG00000029916,protein_coding -20003,C130060K24Rik,ENSMUSG00000029917,protein_coding -19401,Mrps33,ENSMUSG00000029918,protein_coding -19993,Hpgds,ENSMUSG00000029919,protein_coding -19992,Smarcad1,ENSMUSG00000029920,protein_coding -19390,Mkrn1,ENSMUSG00000029922,protein_coding -19389,Rab19,ENSMUSG00000029923,protein_coding -19388,Slc37a3,ENSMUSG00000029924,protein_coding -19377,Tbxas1,ENSMUSG00000029925,protein_coding -20634,Gfpt1,ENSMUSG00000029992,protein_coding -20633,Nfu1,ENSMUSG00000029993,protein_coding -20626,Anxa4,ENSMUSG00000029994,protein_coding -20605,Pcyox1,ENSMUSG00000029998,protein_coding -20597,Tgfa,ENSMUSG00000029999,protein_coding -20593,Add2,ENSMUSG00000030000,protein_coding -20592,Figla,ENSMUSG00000030001,protein_coding -20589,Dusp11,ENSMUSG00000030002,protein_coding -20579,Nat8,ENSMUSG00000030004,protein_coding -20564,Cct7,ENSMUSG00000030007,protein_coding -20563,Pradc1,ENSMUSG00000030008,protein_coding -20528,Zfp638,ENSMUSG00000030016,protein_coding -20416,Reg3g,ENSMUSG00000030017,protein_coding -21194,Fbxl14,ENSMUSG00000030019,protein_coding -20812,Prickle2,ENSMUSG00000030020,protein_coding -20814,Adamts9,ENSMUSG00000030022,protein_coding -20834,Lrig1,ENSMUSG00000030029,protein_coding -20491,1700003E16Rik,ENSMUSG00000030030,protein_coding -20844,Kbtbd8,ENSMUSG00000030031,protein_coding -20490,Wdr54,ENSMUSG00000030032,protein_coding -20487,Ino80b,ENSMUSG00000030034,protein_coding -20486,Wbp1,ENSMUSG00000030035,protein_coding -20485,Mogs,ENSMUSG00000030036,protein_coding -20484,Mrpl53,ENSMUSG00000030037,protein_coding -20466,M1ap,ENSMUSG00000030041,protein_coding -20456,Pole4,ENSMUSG00000030042,protein_coding -20455,Tacr1,ENSMUSG00000030043,protein_coding -20448,Mrpl19,ENSMUSG00000030045,protein_coding -20647,Bmp10,ENSMUSG00000030046,protein_coding -20648,Arhgap25,ENSMUSG00000030047,protein_coding -20646,Gkn3,ENSMUSG00000030048,protein_coding -20645,Gkn2,ENSMUSG00000030049,protein_coding -20644,Gkn1,ENSMUSG00000030050,protein_coding -20652,Aplf,ENSMUSG00000030051,protein_coding -20660,Gp9,ENSMUSG00000030054,protein_coding -20661,Rab43,ENSMUSG00000030055,protein_coding -20665,Isy1,ENSMUSG00000030056,protein_coding -20667,Cnbp,ENSMUSG00000030057,protein_coding -20669,Copg1,ENSMUSG00000030058,protein_coding -20859,Tmf1,ENSMUSG00000030059,protein_coding -20671,Hmces,ENSMUSG00000030060,protein_coding -20861,Uba3,ENSMUSG00000030061,protein_coding -20680,Rpn1,ENSMUSG00000030062,protein_coding -20864,Frmd4b,ENSMUSG00000030064,protein_coding -20879,Foxp1,ENSMUSG00000030067,protein_coding -20881,Gm20696,ENSMUSG00000030068,protein_coding -20888,Prok2,ENSMUSG00000030069,protein_coding -20909,Gxylt2,ENSMUSG00000030074,protein_coding -20932,Cntn3,ENSMUSG00000030075,protein_coding -20941,Chl1,ENSMUSG00000030077,protein_coding -20691,Ruvbl1,ENSMUSG00000030079,protein_coding -20692,Sec61a1,ENSMUSG00000030082,protein_coding -20702,Abtb1,ENSMUSG00000030083,protein_coding -20718,Plxna1,ENSMUSG00000030084,protein_coding -20720,Chchd6,ENSMUSG00000030086,protein_coding -20758,Klf15,ENSMUSG00000030087,protein_coding -20761,Aldh1l1,ENSMUSG00000030088,protein_coding -20764,Slc41a3,ENSMUSG00000030089,protein_coding -20772,Nup210,ENSMUSG00000030091,protein_coding -20951,Cntn6,ENSMUSG00000030092,protein_coding -20779,Wnt7a,ENSMUSG00000030093,protein_coding -20787,Xpc,ENSMUSG00000030094,protein_coding -20786,Tmem43,ENSMUSG00000030095,protein_coding -20793,Slc6a6,ENSMUSG00000030096,protein_coding -20797,Grip2,ENSMUSG00000030098,protein_coding -20973,Sumf1,ENSMUSG00000030101,protein_coding -20975,Itpr1,ENSMUSG00000030102,protein_coding -20984,Bhlhe40,ENSMUSG00000030103,protein_coding -20986,Edem1,ENSMUSG00000030104,protein_coding -20985,Arl8b,ENSMUSG00000030105,protein_coding -21234,Usp18,ENSMUSG00000030107,protein_coding -21236,Slc6a13,ENSMUSG00000030108,protein_coding -21238,Slc6a12,ENSMUSG00000030109,protein_coding -21177,Ret,ENSMUSG00000030110,protein_coding -21244,A2m,ENSMUSG00000030111,protein_coding -21255,Klrg1,ENSMUSG00000030114,protein_coding -21266,Mfap5,ENSMUSG00000030116,protein_coding -21272,Gdf3,ENSMUSG00000030117,protein_coding -21373,Mlf2,ENSMUSG00000030120,protein_coding -21371,Ptms,ENSMUSG00000030122,protein_coding -21121,Plxnd1,ENSMUSG00000030123,protein_coding -21370,Lag3,ENSMUSG00000030124,protein_coding -21361,Lrrc23,ENSMUSG00000030125,protein_coding -21122,Tmcc1,ENSMUSG00000030126,protein_coding -21375,Cops7a,ENSMUSG00000030127,protein_coding -21250,Mug2,ENSMUSG00000030131,protein_coding -21173,Rasgef1a,ENSMUSG00000030134,protein_coding -21232,Tuba8,ENSMUSG00000030137,protein_coding -21181,Bms1,ENSMUSG00000030138,protein_coding -21299,Clec4e,ENSMUSG00000030142,protein_coding -21655,Gm8882,ENSMUSG00000030143,protein_coding -21298,Clec4d,ENSMUSG00000030144,protein_coding -21182,Zfp248,ENSMUSG00000030145,protein_coding -21290,Clec4b1,ENSMUSG00000030147,protein_coding -21291,Clec4a2,ENSMUSG00000030148,protein_coding -21558,Klrk1,ENSMUSG00000030149,protein_coding -21530,Klrb1f,ENSMUSG00000030154,protein_coding -21533,Clec2e,ENSMUSG00000030155,protein_coding -21540,Cd69,ENSMUSG00000030156,protein_coding -21536,Clec2d,ENSMUSG00000030157,protein_coding -21543,Clec12b,ENSMUSG00000030158,protein_coding -21544,Clec1b,ENSMUSG00000030159,protein_coding -21550,Tmem52b,ENSMUSG00000030160,protein_coding -21553,Gabarapl1,ENSMUSG00000030161,protein_coding -21549,Olr1,ENSMUSG00000030162,protein_coding -21557,Klrd1,ENSMUSG00000030165,protein_coding -21199,Rad52,ENSMUSG00000030166,protein_coding -21561,Klrc1,ENSMUSG00000030167,protein_coding -21192,Adipor2,ENSMUSG00000030168,protein_coding -21193,Wnt5b,ENSMUSG00000030170,protein_coding -21195,Erc1,ENSMUSG00000030172,protein_coding -21573,Klra5,ENSMUSG00000030173,protein_coding -21206,Ccdc77,ENSMUSG00000030177,protein_coding -21207,Kdm5a,ENSMUSG00000030180,protein_coding -21621,Klra2,ENSMUSG00000030187,protein_coding -21624,Magohb,ENSMUSG00000030188,protein_coding -21627,Ybx3,ENSMUSG00000030189,protein_coding -21681,Tas2r116,ENSMUSG00000030194,protein_coding -21695,Tas2r103,ENSMUSG00000030196,protein_coding -21720,Etv6,ENSMUSG00000030199,protein_coding -21727,Bcl2l14,ENSMUSG00000030200,protein_coding -21729,Lrp6,ENSMUSG00000030201,protein_coding -21735,Dusp16,ENSMUSG00000030203,protein_coding -21746,Ddx47,ENSMUSG00000030204,protein_coding -21748,Gprc5d,ENSMUSG00000030205,protein_coding -21755,Gsg1,ENSMUSG00000030206,protein_coding -21754,Fam234b,ENSMUSG00000030207,protein_coding -21761,Emp1,ENSMUSG00000030208,protein_coding -21764,Grin2b,ENSMUSG00000030209,protein_coding -21777,Atf7ip,ENSMUSG00000030213,protein_coding -21779,Plbd1,ENSMUSG00000030214,protein_coding -21787,Wbp11,ENSMUSG00000030216,protein_coding -21791,Art4,ENSMUSG00000030217,protein_coding -21792,Mgp,ENSMUSG00000030218,protein_coding -21793,Erp27,ENSMUSG00000030219,protein_coding -21794,Arhgdib,ENSMUSG00000030220,protein_coding -21797,Rerg,ENSMUSG00000030222,protein_coding -21799,Ptpro,ENSMUSG00000030223,protein_coding -21803,Strap,ENSMUSG00000030224,protein_coding -21804,Dera,ENSMUSG00000030225,protein_coding -21808,Lmo3,ENSMUSG00000030226,protein_coding -21822,Pik3c2g,ENSMUSG00000030228,protein_coding -21825,Plcz1,ENSMUSG00000030230,protein_coding -21837,Plekha5,ENSMUSG00000030231,protein_coding -21840,Aebp2,ENSMUSG00000030232,protein_coding -21851,Slco1c1,ENSMUSG00000030235,protein_coding -21852,Slco1b2,ENSMUSG00000030236,protein_coding -21855,Slco1a4,ENSMUSG00000030237,protein_coding -21868,Recql,ENSMUSG00000030243,protein_coding -21871,Gys2,ENSMUSG00000030244,protein_coding -21869,Golt1b,ENSMUSG00000030245,protein_coding -21872,Ldhb,ENSMUSG00000030246,protein_coding -21873,Kcnj8,ENSMUSG00000030247,protein_coding -21874,Abcc9,ENSMUSG00000030249,protein_coding -21016,Rad18,ENSMUSG00000030254,protein_coding -21929,Sspn,ENSMUSG00000030255,protein_coding -21927,Bhlhe41,ENSMUSG00000030256,protein_coding -21018,Srgap3,ENSMUSG00000030257,protein_coding -21926,Rassf8,ENSMUSG00000030259,protein_coding -21906,Lrmp,ENSMUSG00000030263,protein_coding -21025,Thumpd3,ENSMUSG00000030264,protein_coding -21911,Kras,ENSMUSG00000030265,protein_coding -21902,Bcat1,ENSMUSG00000030268,protein_coding -21030,Mtmr14,ENSMUSG00000030269,protein_coding -21032,Cpne9,ENSMUSG00000030270,protein_coding -21036,Ogg1,ENSMUSG00000030271,protein_coding -21037,Camk1,ENSMUSG00000030272,protein_coding -21887,Etnk1,ENSMUSG00000030275,protein_coding -21041,Ttll3,ENSMUSG00000030276,protein_coding -21045,Cidec,ENSMUSG00000030278,protein_coding -21883,C2cd5,ENSMUSG00000030279,protein_coding -21049,Il17rc,ENSMUSG00000030281,protein_coding -21876,Cmas,ENSMUSG00000030282,protein_coding -21879,St8sia1,ENSMUSG00000030283,protein_coding -21050,Creld1,ENSMUSG00000030284,protein_coding -21052,Emc3,ENSMUSG00000030286,protein_coding -21933,Itpr2,ENSMUSG00000030287,protein_coding -21940,Med21,ENSMUSG00000030291,protein_coding -21948,Smco2,ENSMUSG00000030292,protein_coding -21072,Sec13,ENSMUSG00000030298,protein_coding -21963,Ccdc91,ENSMUSG00000030301,protein_coding -21073,Atp2b2,ENSMUSG00000030302,protein_coding -21968,Far2,ENSMUSG00000030303,protein_coding -21970,Ergic2,ENSMUSG00000030304,protein_coding -21973,Tmtc1,ENSMUSG00000030306,protein_coding -21079,Slc6a11,ENSMUSG00000030307,protein_coding -21986,Caprin2,ENSMUSG00000030309,protein_coding -21081,Slc6a1,ENSMUSG00000030310,protein_coding -21993,Dennd5b,ENSMUSG00000030313,protein_coding -21083,Atg7,ENSMUSG00000030314,protein_coding -21087,Vgll4,ENSMUSG00000030315,protein_coding -21088,Tamm41,ENSMUSG00000030316,protein_coding -21095,Timp4,ENSMUSG00000030317,protein_coding -21110,Cand2,ENSMUSG00000030319,protein_coding -21114,Efcab12,ENSMUSG00000030321,protein_coding -21116,Mbd4,ENSMUSG00000030322,protein_coding -21117,Ift122,ENSMUSG00000030323,protein_coding -21118,Rho,ENSMUSG00000030324,protein_coding -21511,Klrb1c,ENSMUSG00000030325,protein_coding -21285,Necap1,ENSMUSG00000030327,protein_coding -21377,Pianp,ENSMUSG00000030329,protein_coding -21380,Ing4,ENSMUSG00000030330,protein_coding -21394,Mrpl51,ENSMUSG00000030335,protein_coding -21399,Cd27,ENSMUSG00000030336,protein_coding -21397,Vamp1,ENSMUSG00000030337,protein_coding -21405,Ltbr,ENSMUSG00000030339,protein_coding -21406,Scnn1a,ENSMUSG00000030340,protein_coding -21407,Tnfrsf1a,ENSMUSG00000030341,protein_coding -21409,Cd9,ENSMUSG00000030342,protein_coding -21429,Akap3,ENSMUSG00000030344,protein_coding -21430,Dyrk4,ENSMUSG00000030345,protein_coding -21431,Rad51ap1,ENSMUSG00000030346,protein_coding -21433,D6Wsu163e,ENSMUSG00000030347,protein_coding -21457,Prmt8,ENSMUSG00000030350,protein_coding -21466,Tspan11,ENSMUSG00000030351,protein_coding -21468,Tspan9,ENSMUSG00000030352,protein_coding -21477,Tead4,ENSMUSG00000030353,protein_coding -21492,Fkbp4,ENSMUSG00000030357,protein_coding -21496,Pzp,ENSMUSG00000030359,protein_coding -21501,Klrb1a,ENSMUSG00000030361,protein_coding -21507,Clec2h,ENSMUSG00000030364,protein_coding -21520,Clec2i,ENSMUSG00000030365,protein_coding -22725,Ceacam12,ENSMUSG00000030366,protein_coding -22721,Ceacam11,ENSMUSG00000030368,protein_coding -22735,Psg28,ENSMUSG00000030373,protein_coding -22686,Strn4,ENSMUSG00000030374,protein_coding -22652,Slc8a2,ENSMUSG00000030376,protein_coding -22558,Sult2a8,ENSMUSG00000030378,protein_coding -22507,Mzf1,ENSMUSG00000030380,protein_coding -22502,Slc27a5,ENSMUSG00000030382,protein_coding -22473,2900092C05Rik,ENSMUSG00000030385,protein_coding -22472,Zfp606,ENSMUSG00000030386,protein_coding -22347,Zik1,ENSMUSG00000030393,protein_coding -22810,Mark4,ENSMUSG00000030397,protein_coding -22808,Ckm,ENSMUSG00000030399,protein_coding -22804,Ercc2,ENSMUSG00000030400,protein_coding -22796,Rtn2,ENSMUSG00000030401,protein_coding -22795,Ppm1n,ENSMUSG00000030402,protein_coding -22793,Vasp,ENSMUSG00000030403,protein_coding -22787,Gipr,ENSMUSG00000030406,protein_coding -22785,Qpctl,ENSMUSG00000030407,protein_coding -22780,Dmpk,ENSMUSG00000030409,protein_coding -22779,Dmwd,ENSMUSG00000030410,protein_coding -22768,Nova2,ENSMUSG00000030411,protein_coding -22766,Pglyrp1,ENSMUSG00000030413,protein_coding -23614,Pdcd5,ENSMUSG00000030417,protein_coding -23645,Uri1,ENSMUSG00000030421,protein_coding -23653,Pop4,ENSMUSG00000030423,protein_coding -22076,Lilra6,ENSMUSG00000030427,protein_coding -22086,Ttyh1,ENSMUSG00000030428,protein_coding -22132,Tmem238,ENSMUSG00000030431,protein_coding -22133,Rpl28,ENSMUSG00000030432,protein_coding -22144,Sbk2,ENSMUSG00000030433,protein_coding -22158,U2af2,ENSMUSG00000030435,protein_coding -22204,Zfp583,ENSMUSG00000030443,protein_coding -41993,Zfp273,ENSMUSG00000030446,protein_coding -24231,Cyfip1,ENSMUSG00000030447,protein_coding -24245,Oca2,ENSMUSG00000030450,protein_coding -24243,Herc2,ENSMUSG00000030451,protein_coding -24234,Nipa2,ENSMUSG00000030452,protein_coding -23780,4933421I07Rik,ENSMUSG00000030463,protein_coding -30525,Psd3,ENSMUSG00000030465,protein_coding -23814,Siglecg,ENSMUSG00000030468,protein_coding -23830,Zfp719,ENSMUSG00000030469,protein_coding -24162,Csrp3,ENSMUSG00000030470,protein_coding -24160,Zdhhc13,ENSMUSG00000030471,protein_coding -23832,Ceacam18,ENSMUSG00000030472,protein_coding -23833,Siglece,ENSMUSG00000030474,protein_coding -23142,Cyp2b10,ENSMUSG00000030483,protein_coding -23051,Lypd5,ENSMUSG00000030484,protein_coding -23041,Zfp108,ENSMUSG00000030486,protein_coding -23608,Tdrd12,ENSMUSG00000030491,protein_coding -23607,Slc7a9,ENSMUSG00000030492,protein_coding -23604,Faap24,ENSMUSG00000030493,protein_coding -23603,Rhpn2,ENSMUSG00000030494,protein_coding -23597,Slc7a10,ENSMUSG00000030495,protein_coding -24202,Gas2,ENSMUSG00000030498,protein_coding -23580,Kctd15,ENSMUSG00000030499,protein_coding -24194,Slc17a6,ENSMUSG00000030500,protein_coding -24176,Prmt3,ENSMUSG00000030505,protein_coding -24174,Dbx1,ENSMUSG00000030507,protein_coding -24622,Asb7,ENSMUSG00000030509,protein_coding -24626,Cers3,ENSMUSG00000030510,protein_coding -24604,Snrpa1,ENSMUSG00000030512,protein_coding -24602,Pcsk6,ENSMUSG00000030513,protein_coding -24597,Tarsl2,ENSMUSG00000030515,protein_coding -24591,Tjp1,ENSMUSG00000030516,protein_coding -24585,Fam189a1,ENSMUSG00000030518,protein_coding -24582,Apba2,ENSMUSG00000030519,protein_coding -24580,Mphosph10,ENSMUSG00000030521,protein_coding -24577,Mtmr10,ENSMUSG00000030522,protein_coding -24570,Trpm1,ENSMUSG00000030523,protein_coding -24552,Chrna7,ENSMUSG00000030525,protein_coding -24883,Crtc3,ENSMUSG00000030527,protein_coding -24881,Blm,ENSMUSG00000030528,protein_coding -24878,Furin,ENSMUSG00000030530,protein_coding -24874,Hddc3,ENSMUSG00000030532,protein_coding -24873,Unc45a,ENSMUSG00000030533,protein_coding -24869,Vps33b,ENSMUSG00000030534,protein_coding -24886,Iqgap1,ENSMUSG00000030536,protein_coding -24864,Cib1,ENSMUSG00000030538,protein_coding -24862,Sema4b,ENSMUSG00000030539,protein_coding -24859,Idh2,ENSMUSG00000030541,protein_coding -24846,Mesp2,ENSMUSG00000030543,protein_coding -24845,Mesp1,ENSMUSG00000030544,protein_coding -24841,Pex11a,ENSMUSG00000030545,protein_coding -24840,Plin1,ENSMUSG00000030546,protein_coding -24835,Rhcg,ENSMUSG00000030549,protein_coding -24686,Nr2f2,ENSMUSG00000030551,protein_coding -24653,Pgpep1l,ENSMUSG00000030553,protein_coding -24644,Synm,ENSMUSG00000030554,protein_coding -24642,Ttc23,ENSMUSG00000030555,protein_coding -24638,Lrrc28,ENSMUSG00000030556,protein_coding -24634,Mef2a,ENSMUSG00000030557,protein_coding -25091,Rab38,ENSMUSG00000030559,protein_coding -25087,Ctsc,ENSMUSG00000030560,protein_coding -25078,Nox4,ENSMUSG00000030562,protein_coding -23407,Cd22,ENSMUSG00000030577,protein_coding -23361,Tyrobp,ENSMUSG00000030579,protein_coding -23304,Sipa1l3,ENSMUSG00000030583,protein_coding -23302,Dpf1,ENSMUSG00000030584,protein_coding -23299,2200002D01Rik,ENSMUSG00000030587,protein_coding -23298,Yif1b,ENSMUSG00000030588,protein_coding -23288,Rasgrp4,ENSMUSG00000030589,protein_coding -23290,Fam98c,ENSMUSG00000030590,protein_coding -23294,Psmd8,ENSMUSG00000030591,protein_coding -23283,Ryr1,ENSMUSG00000030592,protein_coding -23268,Nfkbib,ENSMUSG00000030595,protein_coding -23264,Fbxo17,ENSMUSG00000030598,protein_coding -23250,Lrfn1,ENSMUSG00000030600,protein_coding -23258,Pak4,ENSMUSG00000030602,protein_coding -23224,Psmc4,ENSMUSG00000030603,protein_coding -23215,Zfp626,ENSMUSG00000030604,protein_coding -24816,Mfge8,ENSMUSG00000030605,protein_coding -24815,Hapln3,ENSMUSG00000030606,protein_coding -24814,Acan,ENSMUSG00000030607,protein_coding -24810,Aen,ENSMUSG00000030609,protein_coding -24806,Det1,ENSMUSG00000030610,protein_coding -24804,Mrps11,ENSMUSG00000030611,protein_coding -24802,Mrpl46,ENSMUSG00000030612,protein_coding -25166,Ccdc90b,ENSMUSG00000030613,protein_coding -25134,Tmem126b,ENSMUSG00000030614,protein_coding -25132,Tmem126a,ENSMUSG00000030615,protein_coding -25129,Sytl2,ENSMUSG00000030616,protein_coding -25127,Ccdc83,ENSMUSG00000030617,protein_coding -25115,Eed,ENSMUSG00000030619,protein_coding -25110,Me3,ENSMUSG00000030621,protein_coding -24997,Zfand6,ENSMUSG00000030629,protein_coding -24996,Fah,ENSMUSG00000030630,protein_coding -24941,Sh3gl3,ENSMUSG00000030638,protein_coding -25177,Ddias,ENSMUSG00000030641,protein_coding -25175,Rab30,ENSMUSG00000030643,protein_coding -25221,Ndufc2,ENSMUSG00000030647,protein_coding -25369,Anapc15,ENSMUSG00000030649,protein_coding -25949,Tmc5,ENSMUSG00000030650,protein_coding -25360,Art2b,ENSMUSG00000030651,protein_coding -25946,Coq7,ENSMUSG00000030652,protein_coding -25354,Gm45837,ENSMUSG00000030653,protein_coding -25934,Arl6ip1,ENSMUSG00000030654,protein_coding -25935,Smg1,ENSMUSG00000030655,protein_coding -25930,Xylt1,ENSMUSG00000030657,protein_coding -25921,Nucb2,ENSMUSG00000030659,protein_coding -25916,Pik3c2a,ENSMUSG00000030660,protein_coding -34385,Ipo5,ENSMUSG00000030662,protein_coding -25908,1110004F10Rik,ENSMUSG00000030663,protein_coding -25894,Calcb,ENSMUSG00000030666,protein_coding -25891,Calca,ENSMUSG00000030669,protein_coding -25890,Cyp2r1,ENSMUSG00000030670,protein_coding -25889,Pde3b,ENSMUSG00000030671,protein_coding -26195,Mylpf,ENSMUSG00000030672,protein_coding -26188,Qprt,ENSMUSG00000030674,protein_coding -26180,Kif22,ENSMUSG00000030677,protein_coding -26179,Maz,ENSMUSG00000030678,protein_coding -26176,Pagr1a,ENSMUSG00000030680,protein_coding -26174,Mvp,ENSMUSG00000030681,protein_coding -26173,Cdipt,ENSMUSG00000030682,protein_coding -26171,Sez6l2,ENSMUSG00000030683,protein_coding -26167,Kctd13,ENSMUSG00000030685,protein_coding -25348,Stard10,ENSMUSG00000030688,protein_coding -26161,Ino80e,ENSMUSG00000030689,protein_coding -25344,Fchsd2,ENSMUSG00000030691,protein_coding -23843,Klk10,ENSMUSG00000030693,protein_coding -26154,Aldoa,ENSMUSG00000030695,protein_coding -26153,Ppp4c,ENSMUSG00000030697,protein_coding -26152,Tbx6,ENSMUSG00000030699,protein_coding -25330,Plekhb1,ENSMUSG00000030701,protein_coding -26149,Gdpd3,ENSMUSG00000030703,protein_coding -25328,Rab6a,ENSMUSG00000030704,protein_coding -25325,Mrpl48,ENSMUSG00000030706,protein_coding -26143,Coro1a,ENSMUSG00000030707,protein_coding -25321,Dnajb13,ENSMUSG00000030708,protein_coding -26139,Sult1a1,ENSMUSG00000030711,protein_coding -23846,Klk7,ENSMUSG00000030713,protein_coding -26138,Sgf29,ENSMUSG00000030714,protein_coding -26136,Nupr1,ENSMUSG00000030717,protein_coding -25314,Ppme1,ENSMUSG00000030718,protein_coding -26132,Cln3,ENSMUSG00000030720,protein_coding -26121,Nfatc2ip,ENSMUSG00000030722,protein_coding -26123,Cd19,ENSMUSG00000030724,protein_coding -25307,Lipt2,ENSMUSG00000030725,protein_coding -25306,Pold3,ENSMUSG00000030726,protein_coding -26124,Rabep2,ENSMUSG00000030727,protein_coding -25311,Pgm2l1,ENSMUSG00000030729,protein_coding -26125,Atp2a1,ENSMUSG00000030730,protein_coding -23914,Syt3,ENSMUSG00000030731,protein_coding -25305,Chrdl2,ENSMUSG00000030732,protein_coding -26126,Sh2b1,ENSMUSG00000030733,protein_coding -25289,Slco2b1,ENSMUSG00000030737,protein_coding -26129,Eif3c,ENSMUSG00000030738,protein_coding -23934,Myh14,ENSMUSG00000030739,protein_coding -26120,Spns1,ENSMUSG00000030741,protein_coding -26118,Lat,ENSMUSG00000030742,protein_coding -25279,Rps3,ENSMUSG00000030744,protein_coding -26105,Il21r,ENSMUSG00000030745,protein_coding -25269,Dgat2,ENSMUSG00000030747,protein_coding -26103,Il4ra,ENSMUSG00000030748,protein_coding -26101,Nsmce1,ENSMUSG00000030750,protein_coding -25887,Psma1,ENSMUSG00000030751,protein_coding -26100,Kdm8,ENSMUSG00000030752,protein_coding -25257,Thap12,ENSMUSG00000030753,protein_coding -25886,Copb1,ENSMUSG00000030754,protein_coding -26082,Zkscan2,ENSMUSG00000030757,protein_coding -25876,Far1,ENSMUSG00000030759,protein_coding -25244,Acer3,ENSMUSG00000030760,protein_coding -25239,Myo7a,ENSMUSG00000030761,protein_coding -26081,Aqp8,ENSMUSG00000030762,protein_coding -26079,Lcmt1,ENSMUSG00000030763,protein_coding -26078,Arhgap17,ENSMUSG00000030766,protein_coding -3220,Disp1,ENSMUSG00000030768,protein_coding -26077,Slc5a11,ENSMUSG00000030769,protein_coding -25857,Parva,ENSMUSG00000030770,protein_coding -25855,Dkk3,ENSMUSG00000030772,protein_coding -25236,Pak1,ENSMUSG00000030774,protein_coding -48101,Trat1,ENSMUSG00000030775,protein_coding -26070,Rbbp6,ENSMUSG00000030779,protein_coding -26282,BC017158,ENSMUSG00000030780,protein_coding -26281,Slc5a2,ENSMUSG00000030781,protein_coding -26280,Tgfb1i1,ENSMUSG00000030782,protein_coding -26276,Cox6a2,ENSMUSG00000030785,protein_coding -26268,Itgam,ENSMUSG00000030786,protein_coding -25840,Lyve1,ENSMUSG00000030787,protein_coding -25838,Rnf141,ENSMUSG00000030788,protein_coding -26271,Itgax,ENSMUSG00000030789,protein_coding -25833,Adm,ENSMUSG00000030790,protein_coding -23988,Dkkl1,ENSMUSG00000030792,protein_coding -26260,Pycard,ENSMUSG00000030793,protein_coding -26258,Fus,ENSMUSG00000030795,protein_coding -23989,Tead2,ENSMUSG00000030796,protein_coding -23990,Cd37,ENSMUSG00000030798,protein_coding -26256,Prss8,ENSMUSG00000030800,protein_coding -26255,Kat8,ENSMUSG00000030801,protein_coding -26254,Bckdk,ENSMUSG00000030802,protein_coding -26251,Gm21974,ENSMUSG00000030804,protein_coding -26247,Stx4a,ENSMUSG00000030805,protein_coding -26245,Stx1b,ENSMUSG00000030806,protein_coding -26239,Fbxl19,ENSMUSG00000030811,protein_coding -26234,Bcl7c,ENSMUSG00000030814,protein_coding -26227,Phkg2,ENSMUSG00000030815,protein_coding -26229,Rnf40,ENSMUSG00000030816,protein_coding -26217,Prr14,ENSMUSG00000030822,protein_coding -26207,9130019O22Rik,ENSMUSG00000030823,protein_coding -24016,Nucb1,ENSMUSG00000030824,protein_coding -24021,Hsd17b14,ENSMUSG00000030825,protein_coding -24023,Bcat2,ENSMUSG00000030826,protein_coding -24025,Fgf21,ENSMUSG00000030827,protein_coding -26204,Itgal,ENSMUSG00000030830,protein_coding -24058,Abcc6,ENSMUSG00000030834,protein_coding -24060,Nomo1,ENSMUSG00000030835,protein_coding -24066,Ush1c,ENSMUSG00000030838,protein_coding -24072,Sergef,ENSMUSG00000030839,protein_coding -25372,Lamtor1,ENSMUSG00000030842,protein_coding -26286,Rgs10,ENSMUSG00000030844,protein_coding -26288,Tial1,ENSMUSG00000030846,protein_coding -26292,Bag3,ENSMUSG00000030847,protein_coding -26318,Fgfr2,ENSMUSG00000030849,protein_coding -26320,Ate1,ENSMUSG00000030850,protein_coding -24091,Ldhc,ENSMUSG00000030851,protein_coding -26326,Tacc2,ENSMUSG00000030852,protein_coding -24105,Ptpn5,ENSMUSG00000030854,protein_coding -26340,Fam24b,ENSMUSG00000030858,protein_coding -26341,Fam24a,ENSMUSG00000030859,protein_coding -26348,Acadsb,ENSMUSG00000030861,protein_coding -26362,Cpxm2,ENSMUSG00000030862,protein_coding -26055,Chp2,ENSMUSG00000030865,protein_coding -26054,Ern2,ENSMUSG00000030866,protein_coding -26052,Plk1,ENSMUSG00000030867,protein_coding -26050,Dctn5,ENSMUSG00000030868,protein_coding -26046,Ndufab1,ENSMUSG00000030869,protein_coding -26045,Ubfd1,ENSMUSG00000030870,protein_coding -26044,Ears2,ENSMUSG00000030871,protein_coding -26043,Gga2,ENSMUSG00000030872,protein_coding -26040,Scnn1b,ENSMUSG00000030873,protein_coding -26022,Mettl9,ENSMUSG00000030876,protein_coding -26018,Mfsd13b,ENSMUSG00000030877,protein_coding -26017,Cdr2,ENSMUSG00000030878,protein_coding -25649,Mrpl17,ENSMUSG00000030879,protein_coding -26016,Polr3e,ENSMUSG00000030880,protein_coding -25637,Arfip2,ENSMUSG00000030881,protein_coding -25641,Dnhd1,ENSMUSG00000030882,protein_coding -26005,Uqcrc2,ENSMUSG00000030884,protein_coding -26006,Pdzd9,ENSMUSG00000030887,protein_coding -25643,Rrp8,ENSMUSG00000030888,protein_coding -26011,Vwa3a,ENSMUSG00000030889,protein_coding -25644,Ilk,ENSMUSG00000030890,protein_coding -25646,Tpp1,ENSMUSG00000030894,protein_coding -25634,Hpx,ENSMUSG00000030895,protein_coding -25627,Cnga4,ENSMUSG00000030897,protein_coding -25629,Cckbr,ENSMUSG00000030898,protein_coding -25995,Crym,ENSMUSG00000030905,protein_coding -25994,Anks4b,ENSMUSG00000030909,protein_coding -25992,Zp2,ENSMUSG00000030911,protein_coding -25991,Tmem159,ENSMUSG00000030917,protein_coding -25558,Trim30a,ENSMUSG00000030921,protein_coding -25987,Lyrm1,ENSMUSG00000030922,protein_coding -25985,Rexo5,ENSMUSG00000030924,protein_coding -25984,Eri2,ENSMUSG00000030929,protein_coding -26365,Chst15,ENSMUSG00000030930,protein_coding -26373,Oat,ENSMUSG00000030934,protein_coding -25983,Acsm3,ENSMUSG00000030935,protein_coding -25978,Thumpd1,ENSMUSG00000030942,protein_coding -25972,Acsm2,ENSMUSG00000030945,protein_coding -26376,Lhpp,ENSMUSG00000030946,protein_coding -25965,Gp2,ENSMUSG00000030954,protein_coding -26378,Fam53b,ENSMUSG00000030956,protein_coding -26379,Eef1akmt2,ENSMUSG00000030960,protein_coding -25966,Umod,ENSMUSG00000030963,protein_coding -26381,Abraxas2,ENSMUSG00000030965,protein_coding -25402,Trim21,ENSMUSG00000030966,protein_coding -26384,Zranb1,ENSMUSG00000030967,protein_coding -25968,Pdilt,ENSMUSG00000030968,protein_coding -26386,Ctbp2,ENSMUSG00000030970,protein_coding -25971,Acsm5,ENSMUSG00000030972,protein_coding -26390,Tex36,ENSMUSG00000030976,protein_coding -25392,Rrm1,ENSMUSG00000030978,protein_coding -26396,Uros,ENSMUSG00000030979,protein_coding -25958,Knop1,ENSMUSG00000030980,protein_coding -26395,Mmp21,ENSMUSG00000030981,protein_coding -25953,Vps35l,ENSMUSG00000030982,protein_coding -26397,Bccip,ENSMUSG00000030983,protein_coding -26398,Dhx32,ENSMUSG00000030986,protein_coding -25390,Stim1,ENSMUSG00000030987,protein_coding -25387,Pgap2,ENSMUSG00000030990,protein_coding -26406,D7Ertd443e,ENSMUSG00000030994,protein_coding -25384,Art1,ENSMUSG00000030996,protein_coding -26423,Mki67,ENSMUSG00000031004,protein_coding -7484,Atp6ap2,ENSMUSG00000031007,protein_coding -7496,Usp9x,ENSMUSG00000031010,protein_coding -7510,Cask,ENSMUSG00000031012,protein_coding -25825,Swap70,ENSMUSG00000031015,protein_coding -25824,Wee1,ENSMUSG00000031016,protein_coding -25808,Tmem9b,ENSMUSG00000031021,protein_coding -25806,BC051019,ENSMUSG00000031022,protein_coding -25805,Akip1,ENSMUSG00000031023,protein_coding -25803,St5,ENSMUSG00000031024,protein_coding -25798,Trim66,ENSMUSG00000031026,protein_coding -25795,Stk33,ENSMUSG00000031027,protein_coding -25788,Tub,ENSMUSG00000031028,protein_coding -25787,Eif3f,ENSMUSG00000031029,protein_coding -7576,Ndufb11,ENSMUSG00000031059,protein_coding -7577,Rbm10,ENSMUSG00000031060,protein_coding -7579,Cdk16,ENSMUSG00000031065,protein_coding -7580,Usp11,ENSMUSG00000031066,protein_coding -26449,Glrx3,ENSMUSG00000031068,protein_coding -26733,Mrgprf,ENSMUSG00000031070,protein_coding -26728,LTO1,ENSMUSG00000031072,protein_coding -26725,Fgf15,ENSMUSG00000031073,protein_coding -26722,Fgf3,ENSMUSG00000031074,protein_coding -26716,Ano1,ENSMUSG00000031075,protein_coding -26715,Fadd,ENSMUSG00000031077,protein_coding -26711,Cttn,ENSMUSG00000031078,protein_coding -7595,Zfp300,ENSMUSG00000031079,protein_coding -26705,Acte1,ENSMUSG00000031085,protein_coding -7607,Slc6a14,ENSMUSG00000031089,protein_coding -26703,Nadsyn1,ENSMUSG00000031090,protein_coding -7747,Dock11,ENSMUSG00000031093,protein_coding -7844,Cul4b,ENSMUSG00000031095,protein_coding -26643,Tnni2,ENSMUSG00000031097,protein_coding -26642,Syt8,ENSMUSG00000031098,protein_coding -7923,Smarca1,ENSMUSG00000031099,protein_coding -7928,Sash3,ENSMUSG00000031101,protein_coding -7937,Elf4,ENSMUSG00000031103,protein_coding -7939,Rab33a,ENSMUSG00000031104,protein_coding -7941,Slc25a14,ENSMUSG00000031105,protein_coding -7944,Rbmx2,ENSMUSG00000031107,protein_coding -7947,Enox2,ENSMUSG00000031109,protein_coding -7957,Igsf1,ENSMUSG00000031111,protein_coding -7969,Stk26,ENSMUSG00000031112,protein_coding -7983,1700080O16Rik,ENSMUSG00000031118,protein_coding -7984,Gpc4,ENSMUSG00000031119,protein_coding -8070,3830403N18Rik,ENSMUSG00000031125,protein_coding -36466,Slc9a9,ENSMUSG00000031129,protein_coding -8091,Brs3,ENSMUSG00000031130,protein_coding -8093,Vgll1,ENSMUSG00000031131,protein_coding -8095,Cd40lg,ENSMUSG00000031132,protein_coding -8096,Arhgef6,ENSMUSG00000031133,protein_coding -8097,Rbmx,ENSMUSG00000031134,protein_coding -8118,Fgf13,ENSMUSG00000031137,protein_coding -8126,F9,ENSMUSG00000031138,protein_coding -8129,Mcf2,ENSMUSG00000031139,protein_coding -7346,Cacna1f,ENSMUSG00000031142,protein_coding -7345,Ccdc22,ENSMUSG00000031143,protein_coding -7347,Syp,ENSMUSG00000031144,protein_coding -7349,Prickle3,ENSMUSG00000031145,protein_coding -7350,Plp2,ENSMUSG00000031146,protein_coding -7351,Magix,ENSMUSG00000031147,protein_coding -7353,Gpkow,ENSMUSG00000031148,protein_coding -7356,Praf2,ENSMUSG00000031149,protein_coding -7357,Ccdc120,ENSMUSG00000031150,protein_coding -7360,Gripap1,ENSMUSG00000031153,protein_coding -7363,Otud5,ENSMUSG00000031154,protein_coding -7364,Pim2,ENSMUSG00000031155,protein_coding -7365,Slc35a2,ENSMUSG00000031156,protein_coding -7366,Pqbp1,ENSMUSG00000031157,protein_coding -7367,Timm17b,ENSMUSG00000031158,protein_coding -7371,Eras,ENSMUSG00000031160,protein_coding -7372,Hdac6,ENSMUSG00000031161,protein_coding -7374,Gata1,ENSMUSG00000031162,protein_coding -7375,Glod5,ENSMUSG00000031163,protein_coding -7382,Was,ENSMUSG00000031165,protein_coding -7386,Wdr13,ENSMUSG00000031166,protein_coding -7387,Rbm3,ENSMUSG00000031167,protein_coding -7390,Ebp,ENSMUSG00000031168,protein_coding -7391,Porcn,ENSMUSG00000031169,protein_coding -7393,Slc38a5,ENSMUSG00000031170,protein_coding -7392,Ftsj1,ENSMUSG00000031171,protein_coding -7451,Otc,ENSMUSG00000031173,protein_coding -7450,Rpgr,ENSMUSG00000031174,protein_coding -7444,Dynlt3,ENSMUSG00000031176,protein_coding -8175,3830417A13Rik,ENSMUSG00000031179,protein_coding -8182,Ctag2,ENSMUSG00000031181,protein_coding -8188,4930447F04Rik,ENSMUSG00000031182,protein_coding -8245,Aff2,ENSMUSG00000031189,protein_coding -8164,4931400O07Rik,ENSMUSG00000031194,protein_coding -8432,F8,ENSMUSG00000031196,protein_coding -8442,Vbp1,ENSMUSG00000031197,protein_coding -8435,Fundc2,ENSMUSG00000031198,protein_coding -8437,Mtcp1,ENSMUSG00000031200,protein_coding -8438,Brcc3,ENSMUSG00000031201,protein_coding -8444,Rab39b,ENSMUSG00000031202,protein_coding -8690,Asb12,ENSMUSG00000031204,protein_coding -8697,Msn,ENSMUSG00000031207,protein_coding -8706,Heph,ENSMUSG00000031209,protein_coding -8709,Gpr165,ENSMUSG00000031210,protein_coding -8714,Pgr15l,ENSMUSG00000031212,protein_coding -8723,Ophn1,ENSMUSG00000031214,protein_coding -8728,Stard8,ENSMUSG00000031216,protein_coding -8730,Efnb1,ENSMUSG00000031217,protein_coding -8743,Awat2,ENSMUSG00000031220,protein_coding -8745,Igbp1,ENSMUSG00000031221,protein_coding -8901,Magee2,ENSMUSG00000031224,protein_coding -8906,Pbdc1,ENSMUSG00000031226,protein_coding -8907,Magee1,ENSMUSG00000031227,protein_coding -8922,Atrx,ENSMUSG00000031229,protein_coding -8921,Fgf16,ENSMUSG00000031230,protein_coding -8927,Cox7b,ENSMUSG00000031231,protein_coding -8926,Magt1,ENSMUSG00000031232,protein_coding -50181,Pgk2,ENSMUSG00000031233,protein_coding -8964,Itm2a,ENSMUSG00000031239,protein_coding -8967,Tbx22,ENSMUSG00000031241,protein_coding -8968,2610002M06Rik,ENSMUSG00000031242,protein_coding -8979,Hmgn5,ENSMUSG00000031245,protein_coding -8982,Sh3bgrl,ENSMUSG00000031246,protein_coding -9209,Tnmd,ENSMUSG00000031250,protein_coding -9212,Srpx2,ENSMUSG00000031253,protein_coding -9213,Sytl4,ENSMUSG00000031255,protein_coding -9215,Cstf2,ENSMUSG00000031256,protein_coding -9217,Nox1,ENSMUSG00000031257,protein_coding -9218,Xkrx,ENSMUSG00000031258,protein_coding -9222,Cenpi,ENSMUSG00000031262,protein_coding -9228,Btk,ENSMUSG00000031264,protein_coding -9231,Gla,ENSMUSG00000031266,protein_coding -9362,4930513O06Rik,ENSMUSG00000031270,protein_coding -9361,Serpina7,ENSMUSG00000031271,protein_coding -9402,Col4a6,ENSMUSG00000031273,protein_coding -9406,Col4a5,ENSMUSG00000031274,protein_coding -9424,Acsl4,ENSMUSG00000031278,protein_coding -9439,Chrdl1,ENSMUSG00000031283,protein_coding -9441,Pak3,ENSMUSG00000031284,protein_coding -9444,Dcx,ENSMUSG00000031285,protein_coding -11029,Glt28d2,ENSMUSG00000031286,protein_coding -9493,Il13ra2,ENSMUSG00000031289,protein_coding -9495,Lrch2,ENSMUSG00000031290,protein_coding -9747,Cdkl5,ENSMUSG00000031292,protein_coding -9746,Rs1,ENSMUSG00000031293,protein_coding -9741,Phka2,ENSMUSG00000031295,protein_coding -8763,Slc7a3,ENSMUSG00000031297,protein_coding -9739,Adgrg2,ENSMUSG00000031298,protein_coding -9734,Pdha1,ENSMUSG00000031299,protein_coding -8774,Nlgn3,ENSMUSG00000031302,protein_coding -9731,Map3k15,ENSMUSG00000031303,protein_coding -8771,Il2rg,ENSMUSG00000031304,protein_coding -9722,Rps6ka3,ENSMUSG00000031309,protein_coding -8776,Zmym3,ENSMUSG00000031310,protein_coding -8777,Nono,ENSMUSG00000031311,protein_coding -8778,Itgb1bp2,ENSMUSG00000031312,protein_coding -8781,Taf1,ENSMUSG00000031314,protein_coding -8802,Rps4x,ENSMUSG00000031320,protein_coding -8828,Dmrtc1a,ENSMUSG00000031323,protein_coding -8847,Cdx4,ENSMUSG00000031326,protein_coding -8849,Chic1,ENSMUSG00000031327,protein_coding -8369,Flna,ENSMUSG00000031328,protein_coding -8851,Tsx,ENSMUSG00000031329,protein_coding -8876,Zcchc13,ENSMUSG00000031330,protein_coding -8893,Abcb7,ENSMUSG00000031333,protein_coding -8276,Mtm1,ENSMUSG00000031337,protein_coding -8293,Gabre,ENSMUSG00000031340,protein_coding -9819,Gpm6b,ENSMUSG00000031342,protein_coding -8297,Gabra3,ENSMUSG00000031343,protein_coding -8303,Gabrq,ENSMUSG00000031344,protein_coding -8305,Cetn2,ENSMUSG00000031347,protein_coding -8306,Nsdhl,ENSMUSG00000031349,protein_coding -8308,Zfp185,ENSMUSG00000031351,protein_coding -9858,Hccs,ENSMUSG00000031352,protein_coding -9772,Rbbp7,ENSMUSG00000031353,protein_coding -9857,Amelx,ENSMUSG00000031354,protein_coding -9855,Arhgap6,ENSMUSG00000031355,protein_coding -9777,Syap1,ENSMUSG00000031357,protein_coding -9853,Msl3,ENSMUSG00000031358,protein_coding -9780,Ctps2,ENSMUSG00000031360,protein_coding -8324,Xlr4c,ENSMUSG00000031362,protein_coding -9789,Grpr,ENSMUSG00000031364,protein_coding -8330,Zfp275,ENSMUSG00000031365,protein_coding -9792,Ap1s2,ENSMUSG00000031367,protein_coding -9793,Zrsr2,ENSMUSG00000031370,protein_coding -8336,Haus7,ENSMUSG00000031371,protein_coding -8335,Trex2,ENSMUSG00000031372,protein_coding -9794,Car5b,ENSMUSG00000031373,protein_coding -8334,Zfp92,ENSMUSG00000031374,protein_coding -8337,Bgn,ENSMUSG00000031375,protein_coding -8338,Atp2b3,ENSMUSG00000031376,protein_coding -9799,Bmx,ENSMUSG00000031377,protein_coding -8344,Abcd1,ENSMUSG00000031378,protein_coding -9801,Pir,ENSMUSG00000031379,protein_coding -9803,Vegfd,ENSMUSG00000031380,protein_coding -9804,Piga,ENSMUSG00000031381,protein_coding -9805,Asb11,ENSMUSG00000031382,protein_coding -8340,Dusp9,ENSMUSG00000031383,protein_coding -9807,Asb9,ENSMUSG00000031384,protein_coding -8346,Plxnb3,ENSMUSG00000031385,protein_coding -8358,Hcfc1,ENSMUSG00000031386,protein_coding -8357,Renbp,ENSMUSG00000031387,protein_coding -8356,Naa10,ENSMUSG00000031388,protein_coding -8354,Arhgap4,ENSMUSG00000031389,protein_coding -8355,Avpr2,ENSMUSG00000031390,protein_coding -8353,L1cam,ENSMUSG00000031391,protein_coding -8360,Irak1,ENSMUSG00000031392,protein_coding -8363,Mecp2,ENSMUSG00000031393,protein_coding -8364,Opn1mw,ENSMUSG00000031394,protein_coding -8368,Tktl1,ENSMUSG00000031397,protein_coding -8382,Plxna3,ENSMUSG00000031398,protein_coding -8386,Fam3a,ENSMUSG00000031399,protein_coding -8388,G6pdx,ENSMUSG00000031400,protein_coding -8427,Mpp1,ENSMUSG00000031402,protein_coding -8425,Dkc1,ENSMUSG00000031403,protein_coding -9253,Tceal6,ENSMUSG00000031409,protein_coding -9270,Nxf7,ENSMUSG00000031410,protein_coding -9271,Prame,ENSMUSG00000031411,protein_coding -9320,Morf4l2,ENSMUSG00000031422,protein_coding -9311,Kir3dl1,ENSMUSG00000031424,protein_coding -9324,Plp1,ENSMUSG00000031425,protein_coding -9334,Zcchc18,ENSMUSG00000031428,protein_coding -9400,Psmd10,ENSMUSG00000031429,protein_coding -9399,Vsig1,ENSMUSG00000031430,protein_coding -9390,Tsc22d3,ENSMUSG00000031431,protein_coding -9389,Prps1,ENSMUSG00000031432,protein_coding -9380,Rbm41,ENSMUSG00000031433,protein_coding -9379,Morc4,ENSMUSG00000031434,protein_coding -9371,Rnf128,ENSMUSG00000031438,protein_coding -29593,Atp11a,ENSMUSG00000031441,protein_coding -29596,Mcf2l,ENSMUSG00000031442,protein_coding -29602,F7,ENSMUSG00000031443,protein_coding -29603,F10,ENSMUSG00000031444,protein_coding -29604,Proz,ENSMUSG00000031445,protein_coding -29608,Cul4a,ENSMUSG00000031446,protein_coding -29610,Lamp1,ENSMUSG00000031447,protein_coding -29614,Adprhl1,ENSMUSG00000031448,protein_coding -29621,Atp4b,ENSMUSG00000031449,protein_coding -29623,Grk1,ENSMUSG00000031450,protein_coding -29625,Gas6,ENSMUSG00000031451,protein_coding -29627,1700029H14Rik,ENSMUSG00000031452,protein_coding -29628,Rasa3,ENSMUSG00000031453,protein_coding -29637,Coprs,ENSMUSG00000031458,protein_coding -29659,Myom2,ENSMUSG00000031461,protein_coding -29690,Angpt2,ENSMUSG00000031465,protein_coding -29691,Agpat5,ENSMUSG00000031467,protein_coding -29718,Defb8,ENSMUSG00000031471,protein_coding -29836,Nek3,ENSMUSG00000031478,protein_coding -29838,Vps36,ENSMUSG00000031479,protein_coding -29839,Thsd1,ENSMUSG00000031480,protein_coding -29841,Tpte,ENSMUSG00000031481,protein_coding -29842,Slc25a15,ENSMUSG00000031482,protein_coding -29969,Erlin2,ENSMUSG00000031483,protein_coding -29971,Plpbp,ENSMUSG00000031485,protein_coding -29972,Adgra2,ENSMUSG00000031486,protein_coding -29973,Brf2,ENSMUSG00000031487,protein_coding -29974,Rab11fip1,ENSMUSG00000031488,protein_coding -29979,Adrb3,ENSMUSG00000031489,protein_coding -29981,Eif4ebp1,ENSMUSG00000031490,protein_coding -29986,Chrna6,ENSMUSG00000031491,protein_coding -29985,Chrnb3,ENSMUSG00000031492,protein_coding -23293,Ggn,ENSMUSG00000031493,protein_coding -29400,Cd209a,ENSMUSG00000031494,protein_coding -29405,Cd209d,ENSMUSG00000031495,protein_coding -29523,Tnfsf13b,ENSMUSG00000031497,protein_coding -29549,Col4a1,ENSMUSG00000031502,protein_coding -29551,Col4a2,ENSMUSG00000031503,protein_coding -29553,Rab20,ENSMUSG00000031504,protein_coding -29556,Naxd,ENSMUSG00000031505,protein_coding -2231,Ptpn7,ENSMUSG00000031506,protein_coding -29559,Ankrd10,ENSMUSG00000031508,protein_coding -29562,1700016D06Rik,ENSMUSG00000031509,protein_coding -29570,Arhgef7,ENSMUSG00000031511,protein_coding -29573,Tex29,ENSMUSG00000031512,protein_coding -30056,Leprotl1,ENSMUSG00000031513,protein_coding -30051,Dctn6,ENSMUSG00000031516,protein_coding -30348,Gpm6a,ENSMUSG00000031517,protein_coding -30344,Spata4,ENSMUSG00000031518,protein_coding -30342,Asb5,ENSMUSG00000031519,protein_coding -30339,Vegfc,ENSMUSG00000031520,protein_coding -30336,Aga,ENSMUSG00000031521,protein_coding -30112,Dlc1,ENSMUSG00000031523,protein_coding -30089,Eri1,ENSMUSG00000031527,protein_coding -30074,Tnks,ENSMUSG00000031529,protein_coding -30073,Dusp4,ENSMUSG00000031530,protein_coding -30058,Saraf,ENSMUSG00000031532,protein_coding -29847,Mrps31,ENSMUSG00000031533,protein_coding -29848,Smim19,ENSMUSG00000031534,protein_coding -29852,Dkk4,ENSMUSG00000031535,protein_coding -29853,Polb,ENSMUSG00000031536,protein_coding -29855,Ikbkb,ENSMUSG00000031537,protein_coding -29857,Plat,ENSMUSG00000031538,protein_coding -29858,Ap3m2,ENSMUSG00000031539,protein_coding -29861,Kat6a,ENSMUSG00000031540,protein_coding -29864,Ank1,ENSMUSG00000031543,protein_coding -29871,Gpat4,ENSMUSG00000031545,protein_coding -29873,Gins4,ENSMUSG00000031546,protein_coding -29877,Sfrp1,ENSMUSG00000031548,protein_coding -29889,Ido2,ENSMUSG00000031549,protein_coding -29890,Ido1,ENSMUSG00000031551,protein_coding -29891,Adam18,ENSMUSG00000031552,protein_coding -29892,Adam3,ENSMUSG00000031553,protein_coding -29895,Adam5,ENSMUSG00000031554,protein_coding -29898,Adam9,ENSMUSG00000031555,protein_coding -29899,Tm2d2,ENSMUSG00000031556,protein_coding -29902,Plekha2,ENSMUSG00000031557,protein_coding -16511,Slit2,ENSMUSG00000031558,protein_coding -30295,Tenm3,ENSMUSG00000031561,protein_coding -30294,Dctd,ENSMUSG00000031562,protein_coding -30289,Wwc2,ENSMUSG00000031563,protein_coding -29914,Fgfr1,ENSMUSG00000031565,protein_coding -30274,Rwdd4a,ENSMUSG00000031568,protein_coding -29925,Plpp5,ENSMUSG00000031570,protein_coding -29931,Star,ENSMUSG00000031574,protein_coding -29932,Ash2l,ENSMUSG00000031575,protein_coding -29933,Kcnu1,ENSMUSG00000031576,protein_coding -30016,Tti2,ENSMUSG00000031577,protein_coding -30017,Mak16,ENSMUSG00000031578,protein_coding -30034,Wrn,ENSMUSG00000031583,protein_coding -30040,Gsr,ENSMUSG00000031584,protein_coding -30043,Gtf2e2,ENSMUSG00000031585,protein_coding -30045,Rbpms,ENSMUSG00000031586,protein_coding -30173,Frg1,ENSMUSG00000031590,protein_coding -30172,Asah1,ENSMUSG00000031591,protein_coding -30171,Pcm1,ENSMUSG00000031592,protein_coding -30169,Fgl1,ENSMUSG00000031594,protein_coding -30164,Pdgfrl,ENSMUSG00000031595,protein_coding -30163,Slc7a2,ENSMUSG00000031596,protein_coding -30150,Vps37a,ENSMUSG00000031600,protein_coding -30149,Cnot7,ENSMUSG00000031601,protein_coding -30145,Fgf20,ENSMUSG00000031603,protein_coding -30471,Msmo1,ENSMUSG00000031604,protein_coding -30472,Klhl2,ENSMUSG00000031605,protein_coding -30386,Galnt7,ENSMUSG00000031608,protein_coding -30382,Sap30,ENSMUSG00000031609,protein_coding -30381,Scrg1,ENSMUSG00000031610,protein_coding -30363,Hpgd,ENSMUSG00000031613,protein_coding -30788,Ednra,ENSMUSG00000031616,protein_coding -30784,Tmem184c,ENSMUSG00000031617,protein_coding -30772,Nr3c2,ENSMUSG00000031618,protein_coding -30758,Iqcm,ENSMUSG00000031620,protein_coding -30741,Isx,ENSMUSG00000031621,protein_coding -30725,Sin3b,ENSMUSG00000031622,protein_coding -30221,Sorbs2,ENSMUSG00000031626,protein_coding -30261,Irf2,ENSMUSG00000031627,protein_coding -30259,Casp3,ENSMUSG00000031628,protein_coding -30256,Cenpu,ENSMUSG00000031629,protein_coding -30239,Cfap97,ENSMUSG00000031631,protein_coding -30242,Slc25a4,ENSMUSG00000031633,protein_coding -30234,Ufsp2,ENSMUSG00000031634,protein_coding -30442,Anxa10,ENSMUSG00000031635,protein_coding -30229,Pdlim3,ENSMUSG00000031636,protein_coding -30236,Lrp2bp,ENSMUSG00000031637,protein_coding -30219,Tlr3,ENSMUSG00000031639,protein_coding -30216,Gm45753,ENSMUSG00000031640,protein_coding -30436,Cbr4,ENSMUSG00000031641,protein_coding -30432,Sh3rf1,ENSMUSG00000031642,protein_coding -30426,Nek1,ENSMUSG00000031644,protein_coding -30214,F11,ENSMUSG00000031645,protein_coding -30417,Mfap3l,ENSMUSG00000031647,protein_coding -30184,Triml1,ENSMUSG00000031651,protein_coding -31009,N4bp1,ENSMUSG00000031652,protein_coding -31026,Cbln1,ENSMUSG00000031654,protein_coding -31034,Heatr3,ENSMUSG00000031657,protein_coding -31039,Adcy7,ENSMUSG00000031659,protein_coding -31040,Brd7,ENSMUSG00000031660,protein_coding -31042,Nkd1,ENSMUSG00000031661,protein_coding -31044,Snx20,ENSMUSG00000031662,protein_coding -31052,Sall1,ENSMUSG00000031665,protein_coding -31071,Rbl2,ENSMUSG00000031666,protein_coding -31072,Aktip,ENSMUSG00000031667,protein_coding -20283,Eif2ak3,ENSMUSG00000031668,protein_coding -31214,Gins3,ENSMUSG00000031669,protein_coding -31217,Setd6,ENSMUSG00000031671,protein_coding -31229,Got2,ENSMUSG00000031672,protein_coding -31269,Cdh11,ENSMUSG00000031673,protein_coding -30812,Smad1,ENSMUSG00000031681,protein_coding -30808,1700011L22Rik,ENSMUSG00000031682,protein_coding -30801,Lsm6,ENSMUSG00000031683,protein_coding -30797,Slc10a7,ENSMUSG00000031684,protein_coding -30794,Pou4f2,ENSMUSG00000031688,protein_coding -30964,Tnpo2,ENSMUSG00000031691,protein_coding -30981,Vps35,ENSMUSG00000031696,protein_coding -30982,Orc6,ENSMUSG00000031697,protein_coding -30984,Mylk3,ENSMUSG00000031698,protein_coding -30989,Gpt2,ENSMUSG00000031700,protein_coding -30990,Dnaja2,ENSMUSG00000031701,protein_coding -30995,Itfg1,ENSMUSG00000031703,protein_coding -30909,Rfx1,ENSMUSG00000031706,protein_coding -30879,Tecr,ENSMUSG00000031708,protein_coding -30866,Tbc1d9,ENSMUSG00000031709,protein_coding -30868,Ucp1,ENSMUSG00000031710,protein_coding -30860,Zfp330,ENSMUSG00000031711,protein_coding -30856,Il15,ENSMUSG00000031712,protein_coding -30838,Gab1,ENSMUSG00000031714,protein_coding -30836,Smarca5,ENSMUSG00000031715,protein_coding -31468,Hp,ENSMUSG00000031722,protein_coding -31467,Txnl4b,ENSMUSG00000031723,protein_coding -31129,Ces1f,ENSMUSG00000031725,protein_coding -31465,Pmfbp1,ENSMUSG00000031727,protein_coding -31472,Zfp821,ENSMUSG00000031728,protein_coding -31471,Ist1,ENSMUSG00000031729,protein_coding -31469,Dhodh,ENSMUSG00000031730,protein_coding -31474,Ap1g1,ENSMUSG00000031731,protein_coding -31478,Phlpp2,ENSMUSG00000031732,protein_coding -31096,Irx3,ENSMUSG00000031734,protein_coding -31109,Irx5,ENSMUSG00000031737,protein_coding -31111,Irx6,ENSMUSG00000031738,protein_coding -31114,Mmp2,ENSMUSG00000031740,protein_coding -31142,Gnao1,ENSMUSG00000031748,protein_coding -31508,St3gal2,ENSMUSG00000031749,protein_coding -31499,Il34,ENSMUSG00000031750,protein_coding -31144,Amfr,ENSMUSG00000031751,protein_coding -31505,Cog4,ENSMUSG00000031753,protein_coding -31145,Nudt21,ENSMUSG00000031754,protein_coding -31148,Bbs2,ENSMUSG00000031755,protein_coding -31579,Cenpn,ENSMUSG00000031756,protein_coding -31149,Mt4,ENSMUSG00000031757,protein_coding -31576,Cdyl2,ENSMUSG00000031758,protein_coding -31150,Mt3,ENSMUSG00000031760,protein_coding -31152,Mt2,ENSMUSG00000031762,protein_coding -31153,Mt1,ENSMUSG00000031765,protein_coding -31160,Slc12a3,ENSMUSG00000031766,protein_coding -31558,Nudt7,ENSMUSG00000031767,protein_coding -31163,Herpud1,ENSMUSG00000031770,protein_coding -31547,Cntnap4,ENSMUSG00000031772,protein_coding -31169,Fam192a,ENSMUSG00000031774,protein_coding -31173,Pllp,ENSMUSG00000031775,protein_coding -31172,Arl2bp,ENSMUSG00000031776,protein_coding -31178,Cx3cl1,ENSMUSG00000031778,protein_coding -31176,Ccl22,ENSMUSG00000031779,protein_coding -31179,Ccl17,ENSMUSG00000031780,protein_coding -31180,Ciapin1,ENSMUSG00000031781,protein_coding -31182,Coq9,ENSMUSG00000031782,protein_coding -31183,Polr2c,ENSMUSG00000031783,protein_coding -31188,Adgrg1,ENSMUSG00000031785,protein_coding -31191,Drc7,ENSMUSG00000031786,protein_coding -31192,Katnb1,ENSMUSG00000031787,protein_coding -31194,Kifc3,ENSMUSG00000031788,protein_coding -31198,Cngb1,ENSMUSG00000031789,protein_coding -31202,Mmp15,ENSMUSG00000031790,protein_coding -30722,Tmem38a,ENSMUSG00000031791,protein_coding -31201,Usb1,ENSMUSG00000031792,protein_coding -31203,Cfap20,ENSMUSG00000031796,protein_coding -30696,Tpm4,ENSMUSG00000031799,protein_coding -30671,B3gnt3,ENSMUSG00000031803,protein_coding -30669,Jak3,ENSMUSG00000031805,protein_coding -30664,Pgls,ENSMUSG00000031807,protein_coding -30663,Slc27a1,ENSMUSG00000031808,protein_coding -31670,1700018B08Rik,ENSMUSG00000031809,protein_coding -31675,Map1lc3b,ENSMUSG00000031812,protein_coding -30660,Mvb12a,ENSMUSG00000031813,protein_coding -31662,Mthfsd,ENSMUSG00000031816,protein_coding -31652,Cox4i1,ENSMUSG00000031818,protein_coding -31651,Emc8,ENSMUSG00000031819,protein_coding -30649,Babam1,ENSMUSG00000031820,protein_coding -31644,Gins2,ENSMUSG00000031821,protein_coding -31639,Gse1,ENSMUSG00000031822,protein_coding -31632,Zdhhc7,ENSMUSG00000031823,protein_coding -31636,6430548M08Rik,ENSMUSG00000031824,protein_coding -31630,Crispld2,ENSMUSG00000031825,protein_coding -31628,Usp10,ENSMUSG00000031826,protein_coding -31626,Cotl1,ENSMUSG00000031827,protein_coding -31627,Klhl36,ENSMUSG00000031828,protein_coding -31616,Dnaaf1,ENSMUSG00000031831,protein_coding -31617,Taf1c,ENSMUSG00000031832,protein_coding -30625,Mast3,ENSMUSG00000031833,protein_coding -30623,Pik3r2,ENSMUSG00000031834,protein_coding -31614,Mbtps1,ENSMUSG00000031835,protein_coding -31612,Necab2,ENSMUSG00000031837,protein_coding -31608,Hsbp1,ENSMUSG00000031839,protein_coding -30619,Rab3a,ENSMUSG00000031840,protein_coding -31602,Cdh13,ENSMUSG00000031841,protein_coding -30618,Pde4c,ENSMUSG00000031842,protein_coding -31599,Mphosph6,ENSMUSG00000031843,protein_coding -31596,Hsd17b2,ENSMUSG00000031844,protein_coding -31585,Bco1,ENSMUSG00000031845,protein_coding -31581,1700030J22Rik,ENSMUSG00000031847,protein_coding -30613,Lsm4,ENSMUSG00000031848,protein_coding -30596,Comp,ENSMUSG00000031849,protein_coding -31865,Ntpcr,ENSMUSG00000031851,protein_coding -31868,Map3k21,ENSMUSG00000031853,protein_coding -30575,Mau2,ENSMUSG00000031858,protein_coding -30568,Pbx4,ENSMUSG00000031860,protein_coding -30567,Lpar2,ENSMUSG00000031861,protein_coding -30565,Atp13a1,ENSMUSG00000031862,protein_coding -30536,Ints10,ENSMUSG00000031864,protein_coding -20492,Dctn1,ENSMUSG00000031865,protein_coding -34521,Pgr,ENSMUSG00000031870,protein_coding -31282,Cdh5,ENSMUSG00000031871,protein_coding -31284,Bean1,ENSMUSG00000031872,protein_coding -31298,Cmtm3,ENSMUSG00000031875,protein_coding -31292,Gm45711,ENSMUSG00000031876,protein_coding -31330,Ces2g,ENSMUSG00000031877,protein_coding -31306,Nae1,ENSMUSG00000031878,protein_coding -31313,Ciao2b,ENSMUSG00000031879,protein_coding -31311,Rrad,ENSMUSG00000031880,protein_coding -31310,Cdh16,ENSMUSG00000031881,protein_coding -31307,Car7,ENSMUSG00000031883,protein_coding -31340,Cbfb,ENSMUSG00000031885,protein_coding -31328,Ces2e,ENSMUSG00000031886,protein_coding -31345,Tradd,ENSMUSG00000031887,protein_coding -31343,D230025D16Rik,ENSMUSG00000031889,protein_coding -31366,Hsd11b2,ENSMUSG00000031891,protein_coding -31384,Tsnaxip1,ENSMUSG00000031893,protein_coding -31392,Ctrl,ENSMUSG00000031896,protein_coding -31394,Psmb10,ENSMUSG00000031897,protein_coding -31399,Dpep3,ENSMUSG00000031898,protein_coding -31403,Dus2,ENSMUSG00000031901,protein_coding -31406,Nfatc3,ENSMUSG00000031902,protein_coding -31410,Pla2g15,ENSMUSG00000031903,protein_coding -31411,Slc7a6,ENSMUSG00000031904,protein_coding -31415,Smpd3,ENSMUSG00000031906,protein_coding -31417,Zfp90,ENSMUSG00000031907,protein_coding -31424,Has3,ENSMUSG00000031910,protein_coding -31432,Vps4a,ENSMUSG00000031913,protein_coding -31435,Cog8,ENSMUSG00000031916,protein_coding -31436,Nip7,ENSMUSG00000031917,protein_coding -34562,Mtmr2,ENSMUSG00000031918,protein_coding -31437,Tmed6,ENSMUSG00000031919,protein_coding -31438,Terf2,ENSMUSG00000031921,protein_coding -34563,Cep57,ENSMUSG00000031922,protein_coding -31441,Cyb5b,ENSMUSG00000031924,protein_coding -34555,Maml2,ENSMUSG00000031925,protein_coding -34594,1700012B09Rik,ENSMUSG00000031927,protein_coding -34596,Mre11a,ENSMUSG00000031928,protein_coding -31446,Wwp2,ENSMUSG00000031930,protein_coding -34595,Ankrd49,ENSMUSG00000031931,protein_coding -34600,Gpr83,ENSMUSG00000031932,protein_coding -34601,Izumo1r,ENSMUSG00000031933,protein_coding -34604,Panx1,ENSMUSG00000031934,protein_coding -34608,Med17,ENSMUSG00000031935,protein_coding -34605,Hephl1,ENSMUSG00000031936,protein_coding -34607,Vstm5,ENSMUSG00000031937,protein_coding -34609,4931406C07Rik,ENSMUSG00000031938,protein_coding -34610,Taf1d,ENSMUSG00000031939,protein_coding -31541,Kars,ENSMUSG00000031948,protein_coding -31540,Adat1,ENSMUSG00000031949,protein_coding -31539,Gabarapl2,ENSMUSG00000031950,protein_coding -31538,Tmem231,ENSMUSG00000031951,protein_coding -31537,Chst5,ENSMUSG00000031952,protein_coding -31535,Tmem170,ENSMUSG00000031953,protein_coding -31534,Cfdp1,ENSMUSG00000031954,protein_coding -31533,Bcar1,ENSMUSG00000031955,protein_coding -31531,Ctrb1,ENSMUSG00000031957,protein_coding -31529,Ldhd,ENSMUSG00000031958,protein_coding -31526,Wdr59,ENSMUSG00000031959,protein_coding -31513,Aars,ENSMUSG00000031960,protein_coding -31728,Cdh15,ENSMUSG00000031962,protein_coding -34857,Bmper,ENSMUSG00000031963,protein_coding -34873,Tbx20,ENSMUSG00000031965,protein_coding -34905,Glb1l3,ENSMUSG00000031966,protein_coding -31759,Afg3l1,ENSMUSG00000031967,protein_coding -34908,Acad8,ENSMUSG00000031969,protein_coding -31760,Dbndd1,ENSMUSG00000031970,protein_coding -31822,Ccsap,ENSMUSG00000031971,protein_coding -31824,Acta1,ENSMUSG00000031972,protein_coding -31828,Abcb10,ENSMUSG00000031974,protein_coding -31831,Urb2,ENSMUSG00000031976,protein_coding -31840,Cog2,ENSMUSG00000031979,protein_coding -31841,Agt,ENSMUSG00000031980,protein_coding -31842,Capn9,ENSMUSG00000031981,protein_coding -31845,Arv1,ENSMUSG00000031982,protein_coding -31843,2310022B05Rik,ENSMUSG00000031983,protein_coding -31851,2810004N23Rik,ENSMUSG00000031984,protein_coding -31852,Gnpat,ENSMUSG00000031985,protein_coding -31854,Sprtn,ENSMUSG00000031986,protein_coding -31856,Egln1,ENSMUSG00000031987,protein_coding -34910,Vps26b,ENSMUSG00000031988,protein_coding -34913,Jam3,ENSMUSG00000031990,protein_coding -34919,Spata19,ENSMUSG00000031991,protein_coding -34935,Snx19,ENSMUSG00000031993,protein_coding -34942,Adamts8,ENSMUSG00000031994,protein_coding -34946,St14,ENSMUSG00000031995,protein_coding -34948,Aplp2,ENSMUSG00000031996,protein_coding -34519,Trpc6,ENSMUSG00000031997,protein_coding -34508,Birc3,ENSMUSG00000032000,protein_coding -34488,Dcun1d5,ENSMUSG00000032002,protein_coding -34481,Pdgfd,ENSMUSG00000032006,protein_coding -34573,Sesn3,ENSMUSG00000032009,protein_coding -35354,Usp2,ENSMUSG00000032010,protein_coding -35351,Thy1,ENSMUSG00000032011,protein_coding -35343,Nectin1,ENSMUSG00000032012,protein_coding -35333,Trim29,ENSMUSG00000032013,protein_coding -35331,Oaf,ENSMUSG00000032014,protein_coding -35329,Pou2f3,ENSMUSG00000032015,protein_coding -35322,Grik4,ENSMUSG00000032017,protein_coding -35314,Sc5d,ENSMUSG00000032018,protein_coding -35285,Ubash3b,ENSMUSG00000032020,protein_coding -35284,Crtam,ENSMUSG00000032021,protein_coding -35282,Jhy,ENSMUSG00000032023,protein_coding -35271,Clmp,ENSMUSG00000032024,protein_coding -35515,Rexo2,ENSMUSG00000032026,protein_coding -35512,Nxpe2,ENSMUSG00000032028,protein_coding -35607,Cul5,ENSMUSG00000032030,protein_coding -34962,Barx2,ENSMUSG00000032033,protein_coding -34972,Kcnj5,ENSMUSG00000032034,protein_coding -34984,Ets1,ENSMUSG00000032035,protein_coding -35005,Kirrel3,ENSMUSG00000032036,protein_coding -35010,St3gal4,ENSMUSG00000032038,protein_coding -35012,Dcps,ENSMUSG00000032040,protein_coding -35014,Tirap,ENSMUSG00000032041,protein_coding -35016,Srpr,ENSMUSG00000032042,protein_coding -35019,Rpusd4,ENSMUSG00000032044,protein_coding -6415,Abhd12,ENSMUSG00000032046,protein_coding -35606,Acat1,ENSMUSG00000032047,protein_coding -35584,Rdx,ENSMUSG00000032050,protein_coding -35582,Fdx1,ENSMUSG00000032051,protein_coding -35570,Pou2af1,ENSMUSG00000032053,protein_coding -35565,Btg4,ENSMUSG00000032056,protein_coding -35566,4833427G06Rik,ENSMUSG00000032057,protein_coding -35561,Ppp2r1b,ENSMUSG00000032058,protein_coding -35560,Alg9,ENSMUSG00000032059,protein_coding -35557,Cryab,ENSMUSG00000032060,protein_coding -35555,2310030G06Rik,ENSMUSG00000032062,protein_coding -35554,Dixdc1,ENSMUSG00000032064,protein_coding -35548,Tex12,ENSMUSG00000032065,protein_coding -35546,Bco2,ENSMUSG00000032066,protein_coding -35544,Pts,ENSMUSG00000032067,protein_coding -35543,Plet1,ENSMUSG00000032068,protein_coding -35503,Cadm1,ENSMUSG00000032076,protein_coding -35481,Bud13,ENSMUSG00000032077,protein_coding -35480,Zpr1,ENSMUSG00000032078,protein_coding -35478,Apoa5,ENSMUSG00000032079,protein_coding -35476,Apoa4,ENSMUSG00000032080,protein_coding -35474,Apoc3,ENSMUSG00000032081,protein_coding -35473,Apoa1,ENSMUSG00000032083,protein_coding -35466,Tagln,ENSMUSG00000032085,protein_coding -35460,Bace1,ENSMUSG00000032086,protein_coding -35455,Dscaml1,ENSMUSG00000032087,protein_coding -35445,Il10ra,ENSMUSG00000032089,protein_coding -35442,Tmprss4,ENSMUSG00000032091,protein_coding -35433,Mpzl2,ENSMUSG00000032092,protein_coding -35430,Cd3e,ENSMUSG00000032093,protein_coding -35429,Cd3d,ENSMUSG00000032094,protein_coding -35414,Arcn1,ENSMUSG00000032096,protein_coding -35403,Ddx6,ENSMUSG00000032097,protein_coding -35410,Treh,ENSMUSG00000032098,protein_coding -35034,Pate4,ENSMUSG00000032099,protein_coding -35028,Ddx25,ENSMUSG00000032101,protein_coding -35029,Pus3,ENSMUSG00000032103,protein_coding -35367,Pdzd3,ENSMUSG00000032105,protein_coding -35040,Pate6,ENSMUSG00000032108,protein_coding -35368,Nlrx1,ENSMUSG00000032109,protein_coding -35064,Acrv1,ENSMUSG00000032110,protein_coding -35387,Trappc4,ENSMUSG00000032112,protein_coding -35065,Chek1,ENSMUSG00000032113,protein_coding -35385,Slc37a4,ENSMUSG00000032114,protein_coding -35383,Hyou1,ENSMUSG00000032115,protein_coding -35066,Stt3a,ENSMUSG00000032116,protein_coding -35070,Fez1,ENSMUSG00000032118,protein_coding -35372,Hinfp,ENSMUSG00000032119,protein_coding -35373,C2cd2l,ENSMUSG00000032120,protein_coding -35077,Tmem218,ENSMUSG00000032121,protein_coding -35078,Slc37a2,ENSMUSG00000032122,protein_coding -35374,Dpagt1,ENSMUSG00000032123,protein_coding -35089,Robo4,ENSMUSG00000032125,protein_coding -35378,Hmbs,ENSMUSG00000032126,protein_coding -35379,Vps11,ENSMUSG00000032127,protein_coding -35090,Robo3,ENSMUSG00000032128,protein_coding -35370,Abcg4,ENSMUSG00000032131,protein_coding -35362,Mcam,ENSMUSG00000032135,protein_coding -34745,Pin1,ENSMUSG00000032171,protein_coding -34746,Olfm2,ENSMUSG00000032172,protein_coding -34764,Icam5,ENSMUSG00000032174,protein_coding -34774,Tyk2,ENSMUSG00000032175,protein_coding -34776,Pde4a,ENSMUSG00000032177,protein_coding -34788,Ilf3,ENSMUSG00000032178,protein_coding -36116,Bmp5,ENSMUSG00000032179,protein_coding -34793,Tmed1,ENSMUSG00000032180,protein_coding -36113,Scg3,ENSMUSG00000032181,protein_coding -34797,Yipf2,ENSMUSG00000032182,protein_coding -36112,Lysmd2,ENSMUSG00000032184,protein_coding -34796,Carm1,ENSMUSG00000032185,protein_coding -36110,Tmod2,ENSMUSG00000032186,protein_coding -34801,Smarca4,ENSMUSG00000032187,protein_coding -36101,Bcl2l10,ENSMUSG00000032191,protein_coding -36098,Gnb5,ENSMUSG00000032192,protein_coding -34804,Ldlr,ENSMUSG00000032193,protein_coding -34806,Kank2,ENSMUSG00000032194,protein_coding -34807,Dock6,ENSMUSG00000032198,protein_coding -35995,Polr2m,ENSMUSG00000032199,protein_coding -36066,Rab27a,ENSMUSG00000032202,protein_coding -35990,Aqp9,ENSMUSG00000032204,protein_coding -35986,Lipc,ENSMUSG00000032207,protein_coding -35982,Sltm,ENSMUSG00000032212,protein_coding -36070,Rsl24d1,ENSMUSG00000032215,protein_coding -36045,Nedd4,ENSMUSG00000032216,protein_coding -35977,Rnf111,ENSMUSG00000032217,protein_coding -35976,Ccnb2,ENSMUSG00000032218,protein_coding -35974,Myo1e,ENSMUSG00000032220,protein_coding -36032,Mns1,ENSMUSG00000032221,protein_coding -35971,Fam81a,ENSMUSG00000032224,protein_coding -35968,Gcnt3,ENSMUSG00000032226,protein_coding -36001,Tcf12,ENSMUSG00000032228,protein_coding -35958,Anxa2,ENSMUSG00000032231,protein_coding -35998,Cgnl1,ENSMUSG00000032232,protein_coding -35954,Ice2,ENSMUSG00000032235,protein_coding -35952,Rora,ENSMUSG00000032238,protein_coding -34849,Rp9,ENSMUSG00000032239,protein_coding -35816,Itga11,ENSMUSG00000032243,protein_coding -35818,Fem1b,ENSMUSG00000032244,protein_coding -35820,Cln6,ENSMUSG00000032245,protein_coding -35823,Calml4,ENSMUSG00000032246,protein_coding -35813,Anp32a,ENSMUSG00000032249,protein_coding -36251,Irak1bp1,ENSMUSG00000032251,protein_coding -35806,Glce,ENSMUSG00000032252,protein_coding -36252,Phip,ENSMUSG00000032253,protein_coding -35803,Kif23,ENSMUSG00000032254,protein_coding -35531,Ankk1,ENSMUSG00000032257,protein_coding -36262,Lca5,ENSMUSG00000032258,protein_coding -35530,Drd2,ENSMUSG00000032259,protein_coding -36268,Sh3bgrl2,ENSMUSG00000032261,protein_coding -36272,Elovl4,ENSMUSG00000032262,protein_coding -36275,Bckdhb,ENSMUSG00000032263,protein_coding -35527,Zw10,ENSMUSG00000032264,protein_coding -36294,Tent5a,ENSMUSG00000032265,protein_coding -35524,Usp28,ENSMUSG00000032267,protein_coding -35528,Tmprss5,ENSMUSG00000032268,protein_coding -35522,Htr3a,ENSMUSG00000032269,protein_coding -35519,Nnmt,ENSMUSG00000032271,protein_coding -35620,Cyp19a1,ENSMUSG00000032274,protein_coding -35804,Paqr5,ENSMUSG00000032278,protein_coding -35627,Idh3a,ENSMUSG00000032279,protein_coding -35796,Tle3,ENSMUSG00000032280,protein_coding -35628,Acsbg1,ENSMUSG00000032281,protein_coding -35630,Dnaja4,ENSMUSG00000032285,protein_coding -35683,Imp3,ENSMUSG00000032288,protein_coding -35770,Thsd4,ENSMUSG00000032289,protein_coding -35685,Ptpn9,ENSMUSG00000032290,protein_coding -35633,Crabp1,ENSMUSG00000032291,protein_coding -35767,Nr2e3,ENSMUSG00000032292,protein_coding -35635,Ireb2,ENSMUSG00000032293,protein_coding -35758,Pkm,ENSMUSG00000032294,protein_coding -35690,Man2c1,ENSMUSG00000032295,protein_coding -35756,Celf6,ENSMUSG00000032297,protein_coding -35691,Neil1,ENSMUSG00000032298,protein_coding -35692,Commd4,ENSMUSG00000032299,protein_coding -35695,1700017B05Rik,ENSMUSG00000032300,protein_coding -35638,Psma4,ENSMUSG00000032301,protein_coding -35640,Chrna3,ENSMUSG00000032303,protein_coding -35706,Fam219b,ENSMUSG00000032305,protein_coding -35707,Mpi,ENSMUSG00000032306,protein_coding -35646,Ube2q2,ENSMUSG00000032307,protein_coding -35709,Ulk3,ENSMUSG00000032308,protein_coding -35647,Fbxo22,ENSMUSG00000032309,protein_coding -35713,Cyp1a2,ENSMUSG00000032310,protein_coding -35648,Nrg4,ENSMUSG00000032311,protein_coding -35712,Csk,ENSMUSG00000032312,protein_coding -35650,Tmem266,ENSMUSG00000032313,protein_coding -35651,Etfa,ENSMUSG00000032314,protein_coding -35714,Cyp1a1,ENSMUSG00000032315,protein_coding -35716,Clk3,ENSMUSG00000032316,protein_coding -35653,Isl2,ENSMUSG00000032318,protein_coding -35661,Rcn2,ENSMUSG00000032320,protein_coding -35662,Pstpip1,ENSMUSG00000032322,protein_coding -35722,Cyp11a1,ENSMUSG00000032323,protein_coding -35663,Tspan3,ENSMUSG00000032324,protein_coding -35725,Stra6,ENSMUSG00000032327,protein_coding -36215,Tmem30a,ENSMUSG00000032328,protein_coding -35670,Hmg20a,ENSMUSG00000032329,protein_coding -36214,Cox7a2,ENSMUSG00000032330,protein_coding -36212,Col12a1,ENSMUSG00000032332,protein_coding -35733,Stoml1,ENSMUSG00000032333,protein_coding -35734,Loxl1,ENSMUSG00000032334,protein_coding -35741,Nptn,ENSMUSG00000032336,protein_coding -35744,Hcn4,ENSMUSG00000032338,protein_coding -35745,Neo1,ENSMUSG00000032340,protein_coding -36188,Mto1,ENSMUSG00000032342,protein_coding -36228,Impg1,ENSMUSG00000032343,protein_coding -36186,Cgas,ENSMUSG00000032344,protein_coding -36181,Ooep,ENSMUSG00000032346,protein_coding -36169,Gsta4,ENSMUSG00000032348,protein_coding -36158,Elovl5,ENSMUSG00000032349,protein_coding -36153,Gclc,ENSMUSG00000032350,protein_coding -36145,Lrrc1,ENSMUSG00000032352,protein_coding -36395,Tmed3,ENSMUSG00000032353,protein_coding -36140,Mlip,ENSMUSG00000032355,protein_coding -36401,Rasgrf1,ENSMUSG00000032356,protein_coding -36136,Tinag,ENSMUSG00000032357,protein_coding -36127,Fam83b,ENSMUSG00000032358,protein_coding -36404,Ctsh,ENSMUSG00000032359,protein_coding -36125,Hcrtr2,ENSMUSG00000032360,protein_coding -36409,Adamts7,ENSMUSG00000032363,protein_coding -35929,Tpm1,ENSMUSG00000032366,protein_coding -36431,Zic1,ENSMUSG00000032368,protein_coding -36439,Plscr1,ENSMUSG00000032369,protein_coding -35927,Lactb,ENSMUSG00000032370,protein_coding -36441,Plscr2,ENSMUSG00000032372,protein_coding -35918,Car12,ENSMUSG00000032373,protein_coding -36449,Plod2,ENSMUSG00000032374,protein_coding -35919,Aph1b,ENSMUSG00000032375,protein_coding -35916,Usp3,ENSMUSG00000032376,protein_coding -36445,Plscr4,ENSMUSG00000032377,protein_coding -35911,Dapk2,ENSMUSG00000032380,protein_coding -35910,Ciao2a,ENSMUSG00000032381,protein_coding -35909,Snx1,ENSMUSG00000032382,protein_coding -35907,Ppib,ENSMUSG00000032383,protein_coding -35905,Csnk1g1,ENSMUSG00000032384,protein_coding -35900,Trip4,ENSMUSG00000032386,protein_coding -35897,Rbpms2,ENSMUSG00000032387,protein_coding -35892,Spg21,ENSMUSG00000032388,protein_coding -35875,Parp16,ENSMUSG00000032392,protein_coding -35871,Dpp8,ENSMUSG00000032393,protein_coding -35874,Igdcc3,ENSMUSG00000032394,protein_coding -35861,Dis3l,ENSMUSG00000032396,protein_coding -35859,Tipin,ENSMUSG00000032397,protein_coding -35854,Snapc5,ENSMUSG00000032398,protein_coding -35848,Rpl4,ENSMUSG00000032399,protein_coding -35847,Zwilch,ENSMUSG00000032400,protein_coding -35846,Lctl,ENSMUSG00000032401,protein_coding -35837,Smad3,ENSMUSG00000032402,protein_coding -35831,2300009A05Rik,ENSMUSG00000032403,protein_coding -35824,Pias1,ENSMUSG00000032405,protein_coding -36474,U2surp,ENSMUSG00000032407,protein_coding -36487,Atr,ENSMUSG00000032409,protein_coding -36490,Xrn1,ENSMUSG00000032410,protein_coding -36495,Tfdp2,ENSMUSG00000032411,protein_coding -36498,Atp1b3,ENSMUSG00000032412,protein_coding -36505,Rasa2,ENSMUSG00000032413,protein_coding -36310,Ube2cbp,ENSMUSG00000032415,protein_coding -36315,Rwdd2a,ENSMUSG00000032417,protein_coding -36316,Me1,ENSMUSG00000032418,protein_coding -36335,Tbx18,ENSMUSG00000032419,protein_coding -36347,Nt5e,ENSMUSG00000032420,protein_coding -36348,Snx14,ENSMUSG00000032422,protein_coding -36352,Syncrip,ENSMUSG00000032423,protein_coding -36357,Zfp949,ENSMUSG00000032425,protein_coding -37030,Crtap,ENSMUSG00000032431,protein_coding -37040,Cmtm6,ENSMUSG00000032434,protein_coding -37039,Dync1li1,ENSMUSG00000032435,protein_coding -37041,Cmtm7,ENSMUSG00000032436,protein_coding -37051,Stt3b,ENSMUSG00000032437,protein_coding -37065,Tgfbr2,ENSMUSG00000032440,protein_coding -37087,Eomes,ENSMUSG00000032446,protein_coding -36521,Slc25a36,ENSMUSG00000032449,protein_coding -36528,Trim42,ENSMUSG00000032451,protein_coding -36531,Clstn2,ENSMUSG00000032452,protein_coding -36538,Rbp2,ENSMUSG00000032454,protein_coding -36535,Nmnat3,ENSMUSG00000032456,protein_coding -36541,Copb2,ENSMUSG00000032458,protein_coding -36543,Mrps22,ENSMUSG00000032459,protein_coding -36563,Pik3cb,ENSMUSG00000032462,protein_coding -36561,Faim,ENSMUSG00000032463,protein_coding -36577,Armc8,ENSMUSG00000032468,protein_coding -36578,Dbr1,ENSMUSG00000032469,protein_coding -36575,Mras,ENSMUSG00000032470,protein_coding -36581,Cldn18,ENSMUSG00000032473,protein_coding -36595,Nck1,ENSMUSG00000032475,protein_coding -36933,Cdc25a,ENSMUSG00000032477,protein_coding -36931,Nme6,ENSMUSG00000032478,protein_coding -36936,Map4,ENSMUSG00000032479,protein_coding -36941,Dhx30,ENSMUSG00000032480,protein_coding -36943,Smarcc1,ENSMUSG00000032481,protein_coding -36946,Cspg5,ENSMUSG00000032482,protein_coding -36955,Ngp,ENSMUSG00000032484,protein_coding -36952,Scap,ENSMUSG00000032485,protein_coding -2438,Ptgs2,ENSMUSG00000032487,protein_coding -36957,Kif9,ENSMUSG00000032489,protein_coding -36960,Nradd,ENSMUSG00000032491,protein_coding -36966,Pth1r,ENSMUSG00000032492,protein_coding -36969,Prss44,ENSMUSG00000032493,protein_coding -36979,Tdgf1,ENSMUSG00000032494,protein_coding -36980,Lrrc2,ENSMUSG00000032495,protein_coding -36985,Ltf,ENSMUSG00000032496,protein_coding -36990,Lrrfip2,ENSMUSG00000032497,protein_coding -36991,Mlh1,ENSMUSG00000032498,protein_coding -36996,Dclk3,ENSMUSG00000032500,protein_coding -45975,Trib1,ENSMUSG00000032501,protein_coding -36999,Stac,ENSMUSG00000032502,protein_coding -37002,Arpp21,ENSMUSG00000032503,protein_coding -37019,Pdcd6ip,ENSMUSG00000032504,protein_coding -37023,Fbxl2,ENSMUSG00000032507,protein_coding -37109,Myd88,ENSMUSG00000032508,protein_coding -37116,Scn5a,ENSMUSG00000032511,protein_coding -37120,Wdr48,ENSMUSG00000032512,protein_coding -37122,Gorasp1,ENSMUSG00000032513,protein_coding -37123,Ttc21a,ENSMUSG00000032514,protein_coding -37124,Csrnp1,ENSMUSG00000032515,protein_coding -37138,Mobp,ENSMUSG00000032517,protein_coding -37133,Rpsa,ENSMUSG00000032518,protein_coding -37132,Slc25a38,ENSMUSG00000032519,protein_coding -37192,Hhatl,ENSMUSG00000032523,protein_coding -37187,Nktr,ENSMUSG00000032525,protein_coding -37186,Ss18l2,ENSMUSG00000032526,protein_coding -36604,Pccb,ENSMUSG00000032527,protein_coding -37184,Vipr1,ENSMUSG00000032528,protein_coding -37181,Lyzl4,ENSMUSG00000032530,protein_coding -36648,Amotl2,ENSMUSG00000032531,protein_coding -37178,Cck,ENSMUSG00000032532,protein_coding -36644,Cep63,ENSMUSG00000032534,protein_coding -37175,Trak1,ENSMUSG00000032536,protein_coding -36627,Ephb1,ENSMUSG00000032537,protein_coding -37218,Abhd5,ENSMUSG00000032540,protein_coding -36653,Ryk,ENSMUSG00000032547,protein_coding -36655,Slco2a1,ENSMUSG00000032548,protein_coding -36660,Rab6b,ENSMUSG00000032549,protein_coding -37246,1110059G10Rik,ENSMUSG00000032551,protein_coding -36662,Srprb,ENSMUSG00000032553,protein_coding -36664,Trf,ENSMUSG00000032554,protein_coding -36666,Topbp1,ENSMUSG00000032555,protein_coding -36671,Bfsp2,ENSMUSG00000032556,protein_coding -36685,Uba5,ENSMUSG00000032557,protein_coding -36682,Nphp3,ENSMUSG00000032558,protein_coding -36691,Dnajc13,ENSMUSG00000032560,protein_coding -36694,Acpp,ENSMUSG00000032561,protein_coding -36802,Gnai2,ENSMUSG00000032562,protein_coding -36701,Mrpl3,ENSMUSG00000032563,protein_coding -36699,Cpne4,ENSMUSG00000032564,protein_coding -36706,Nudt16,ENSMUSG00000032565,protein_coding -36707,Nudt16l2,ENSMUSG00000032566,protein_coding -36712,Aste1,ENSMUSG00000032567,protein_coding -36713,Atp2c1,ENSMUSG00000032570,protein_coding -36717,Pik3r4,ENSMUSG00000032571,protein_coding -36722,Col6a4,ENSMUSG00000032572,protein_coding -36763,Manf,ENSMUSG00000032575,protein_coding -36775,Mapkapk3,ENSMUSG00000032577,protein_coding -36777,Cish,ENSMUSG00000032578,protein_coding -36779,Hemk1,ENSMUSG00000032579,protein_coding -36808,Rbm5,ENSMUSG00000032580,protein_coding -36810,Rbm6,ENSMUSG00000032582,protein_coding -36812,Mon1a,ENSMUSG00000032583,protein_coding -36814,Mst1r,ENSMUSG00000032584,protein_coding -36818,Traip,ENSMUSG00000032586,protein_coding -36836,Bsn,ENSMUSG00000032589,protein_coding -36835,Apeh,ENSMUSG00000032590,protein_coding -36833,Mst1,ENSMUSG00000032591,protein_coding -36832,Amigo3,ENSMUSG00000032593,protein_coding -36827,Ip6k1,ENSMUSG00000032594,protein_coding -36824,Cdhr4,ENSMUSG00000032595,protein_coding -36819,Uba7,ENSMUSG00000032596,protein_coding -36884,Nckipsd,ENSMUSG00000032598,protein_coding -36882,Ip6k2,ENSMUSG00000032599,protein_coding -36880,Prkar2a,ENSMUSG00000032601,protein_coding -36879,Slc25a20,ENSMUSG00000032602,protein_coding -36862,Qars,ENSMUSG00000032604,protein_coding -36843,Nicn1,ENSMUSG00000032606,protein_coding -36844,Amt,ENSMUSG00000032607,protein_coding -36857,Klhdc8b,ENSMUSG00000032609,protein_coding -36853,1700102P08Rik,ENSMUSG00000032611,protein_coding -36850,Usp4,ENSMUSG00000032612,protein_coding -38308,Nt5m,ENSMUSG00000032615,protein_coding -42552,Srek1,ENSMUSG00000032621,protein_coding -17982,Oas1d,ENSMUSG00000032623,protein_coding -50909,Eml4,ENSMUSG00000032624,protein_coding -19011,Thsd7a,ENSMUSG00000032625,protein_coding -38304,Flcn,ENSMUSG00000032633,protein_coding -26128,Atxn2l,ENSMUSG00000032637,protein_coding -24607,Chsy1,ENSMUSG00000032640,protein_coding -21739,Gpr19,ENSMUSG00000032641,protein_coding -14653,Fhl3,ENSMUSG00000032643,protein_coding -54289,Pygm,ENSMUSG00000032648,protein_coding -2484,Colgalt2,ENSMUSG00000032649,protein_coding -21737,Crebl2,ENSMUSG00000032652,protein_coding -53503,March3,ENSMUSG00000032656,protein_coding -11166,Fam189b,ENSMUSG00000032657,protein_coding -17972,Oas3,ENSMUSG00000032661,protein_coding -2478,1700025G04Rik,ENSMUSG00000032666,protein_coding -18939,Pon2,ENSMUSG00000032667,protein_coding -5558,A930018P22Rik,ENSMUSG00000032671,protein_coding -37711,Prorsd1,ENSMUSG00000032673,protein_coding -5557,Cd59a,ENSMUSG00000032679,protein_coding -6475,6820408C15Rik,ENSMUSG00000032680,protein_coding -53660,Malt1,ENSMUSG00000032688,protein_coding -17970,Oas2,ENSMUSG00000032690,protein_coding -38294,Nlrp3,ENSMUSG00000032691,protein_coding -5550,Lmo2,ENSMUSG00000032698,protein_coding -54791,Kank1,ENSMUSG00000032702,protein_coding -44076,Exd2,ENSMUSG00000032705,protein_coding -22006,Resf1,ENSMUSG00000032712,protein_coding -28075,Syde1,ENSMUSG00000032714,protein_coding -6469,Trib3,ENSMUSG00000032715,protein_coding -50352,Mdfi,ENSMUSG00000032717,protein_coding -21731,Mansc1,ENSMUSG00000032718,protein_coding -197,Sbspon,ENSMUSG00000032719,protein_coding -5537,Abtb2,ENSMUSG00000032724,protein_coding -25366,Folr2,ENSMUSG00000032725,protein_coding -14620,Bmp8a,ENSMUSG00000032726,protein_coding -42644,Mier3,ENSMUSG00000032727,protein_coding -35680,Snx33,ENSMUSG00000032733,protein_coding -53594,Ablim3,ENSMUSG00000032735,protein_coding -25364,Inppl1,ENSMUSG00000032737,protein_coding -49737,Pram1,ENSMUSG00000032739,protein_coding -37710,Ccdc88a,ENSMUSG00000032740,protein_coding -17959,Tpcn1,ENSMUSG00000032741,protein_coding -26107,D430042O09Rik,ENSMUSG00000032743,protein_coding -14614,Heyl,ENSMUSG00000032744,protein_coding -42637,Gpbp1,ENSMUSG00000032745,protein_coding -8422,Gab3,ENSMUSG00000032750,protein_coding -17958,Slc8b1,ENSMUSG00000032754,protein_coding -18911,Bet1,ENSMUSG00000032757,protein_coding -21717,Kap,ENSMUSG00000032758,protein_coding -28074,Ilvbl,ENSMUSG00000032763,protein_coding -18909,Gng11,ENSMUSG00000032766,protein_coding -185,Trpa1,ENSMUSG00000032769,protein_coding -54367,Chrm1,ENSMUSG00000032773,protein_coding -24714,Mctp2,ENSMUSG00000032776,protein_coding -26106,Gtf3c1,ENSMUSG00000032777,protein_coding -38540,Cntrob,ENSMUSG00000032782,protein_coding -46899,Troap,ENSMUSG00000032783,protein_coding -36736,Alas1,ENSMUSG00000032786,protein_coding -28063,Pdxk,ENSMUSG00000032788,protein_coding -50681,Lama1,ENSMUSG00000032796,protein_coding -6463,Srxn1,ENSMUSG00000032802,protein_coding -36667,Cdv3,ENSMUSG00000032803,protein_coding -8385,Slc10a3,ENSMUSG00000032806,protein_coding -38535,Alox12b,ENSMUSG00000032807,protein_coding -55035,Cyp2c38,ENSMUSG00000032808,protein_coding -25352,Arap1,ENSMUSG00000032812,protein_coding -31751,Fanca,ENSMUSG00000032815,protein_coding -35873,Igdcc4,ENSMUSG00000032816,protein_coding -53852,Loxhd1,ENSMUSG00000032818,protein_coding -12214,Ank2,ENSMUSG00000032826,protein_coding -18932,Ppp1r9a,ENSMUSG00000032827,protein_coding -28047,Pwp2,ENSMUSG00000032834,protein_coding -36484,Trpc1,ENSMUSG00000032839,protein_coding -17915,2410131K14Rik,ENSMUSG00000032840,protein_coding -5508,Prr5l,ENSMUSG00000032841,protein_coding -50293,Abcc10,ENSMUSG00000032842,protein_coding -53652,Alpk2,ENSMUSG00000032845,protein_coding -34351,Abcc4,ENSMUSG00000032849,protein_coding -17913,Rnft2,ENSMUSG00000032850,protein_coding -6454,Rspo4,ENSMUSG00000032852,protein_coding -12197,Ugt8a,ENSMUSG00000032854,protein_coding -49298,Pkd1,ENSMUSG00000032855,protein_coding -25341,P2ry2,ENSMUSG00000032860,protein_coding -5505,Rag2,ENSMUSG00000032864,protein_coding -17909,Fbxw8,ENSMUSG00000032867,protein_coding -6450,Psmf1,ENSMUSG00000032869,protein_coding -14565,Smap2,ENSMUSG00000032870,protein_coding -36327,Cyb5r4,ENSMUSG00000032872,protein_coding -25337,Arhgef17,ENSMUSG00000032875,protein_coding -37693,Ccdc85a,ENSMUSG00000032878,protein_coding -1299,Acsl3,ENSMUSG00000032883,protein_coding -14560,Rims3,ENSMUSG00000032890,protein_coding -38518,Rangrf,ENSMUSG00000032892,protein_coding -8826,1700031F05Rik,ENSMUSG00000032894,protein_coding -14555,Nfyc,ENSMUSG00000032897,protein_coding -17906,Fbxo21,ENSMUSG00000032898,protein_coding -21626,Styk1,ENSMUSG00000032899,protein_coding -53498,Tex43,ENSMUSG00000032900,protein_coding -11798,Slc16a1,ENSMUSG00000032902,protein_coding -53386,Atg12,ENSMUSG00000032905,protein_coding -1289,Sgpp2,ENSMUSG00000032908,protein_coding -35678,Cspg4,ENSMUSG00000032911,protein_coding -11790,Lrig2,ENSMUSG00000032913,protein_coding -50485,Adgre4,ENSMUSG00000032915,protein_coding -38515,Odf4,ENSMUSG00000032921,protein_coding -34428,Itgbl1,ENSMUSG00000032925,protein_coding -48368,Hspa13,ENSMUSG00000032932,protein_coding -36817,Camkv,ENSMUSG00000032936,protein_coding -51025,Fshr,ENSMUSG00000032937,protein_coding -31155,Nup93,ENSMUSG00000032939,protein_coding -48366,Rbm11,ENSMUSG00000032940,protein_coding -25319,Ucp3,ENSMUSG00000032942,protein_coding -54290,Rasgrp2,ENSMUSG00000032946,protein_coding -48365,Lipi,ENSMUSG00000032948,protein_coding -11764,Ap4b1,ENSMUSG00000032952,protein_coding -17890,Pebp1,ENSMUSG00000032959,protein_coding -48115,Ift57,ENSMUSG00000032965,protein_coding -6443,Fkbp1a,ENSMUSG00000032966,protein_coding -1265,Inha,ENSMUSG00000032968,protein_coding -27998,Fam207a,ENSMUSG00000032977,protein_coding -14541,Guca2b,ENSMUSG00000032978,protein_coding -37670,5730522E02Rik,ENSMUSG00000032985,protein_coding -46856,Olfr281,ENSMUSG00000032987,protein_coding -46395,Slc16a8,ENSMUSG00000032988,protein_coding -1262,Chpf,ENSMUSG00000032997,protein_coding -14538,Foxj3,ENSMUSG00000032998,protein_coding -41831,Nlrp4f,ENSMUSG00000032999,protein_coding -34224,Mycbp2,ENSMUSG00000033004,protein_coding -46391,Sox10,ENSMUSG00000033006,protein_coding -1260,Asic4,ENSMUSG00000033007,protein_coding -31146,Ogfod1,ENSMUSG00000033009,protein_coding -11746,Trim33,ENSMUSG00000033014,protein_coding -53914,Nfatc1,ENSMUSG00000033016,protein_coding -46390,Polr2f,ENSMUSG00000033020,protein_coding -1259,Gmppa,ENSMUSG00000033021,protein_coding -53385,Cdo1,ENSMUSG00000033022,protein_coding -21586,Klra9,ENSMUSG00000033024,protein_coding -21559,Klrc3,ENSMUSG00000033027,protein_coding -46389,1700088E04Rik,ENSMUSG00000033029,protein_coding -48108,Cip2a,ENSMUSG00000033031,protein_coding -53593,Afap1l1,ENSMUSG00000033032,protein_coding -55241,Calhm2,ENSMUSG00000033033,protein_coding -16444,Gm7879,ENSMUSG00000033036,protein_coding -46388,Micall1,ENSMUSG00000033039,protein_coding -38493,Dhrs7c,ENSMUSG00000033044,protein_coding -46386,Eif3l,ENSMUSG00000033047,protein_coding -54757,1700028P14Rik,ENSMUSG00000033053,protein_coding -35605,Npat,ENSMUSG00000033054,protein_coding -46384,Ankrd54,ENSMUSG00000033055,protein_coding -6414,Pygb,ENSMUSG00000033059,protein_coding -34209,Lmo7,ENSMUSG00000033060,protein_coding -1252,Resp18,ENSMUSG00000033061,protein_coding -41819,Cntnap3,ENSMUSG00000033063,protein_coding -46839,Pfkm,ENSMUSG00000033065,protein_coding -38488,Gas7,ENSMUSG00000033066,protein_coding -6413,Entpd6,ENSMUSG00000033068,protein_coding -46837,Senp1,ENSMUSG00000033075,protein_coding -6409,Vsx1,ENSMUSG00000033080,protein_coding -21547,Clec1a,ENSMUSG00000033082,protein_coding -34202,Tbc1d4,ENSMUSG00000033083,protein_coding -46374,Triobp,ENSMUSG00000033088,protein_coding -6405,Apmap,ENSMUSG00000033096,protein_coding -46373,Nol12,ENSMUSG00000033099,protein_coding -41796,Cdc14b,ENSMUSG00000033102,protein_coding -27977,Lss,ENSMUSG00000033105,protein_coding -31412,Slc7a6os,ENSMUSG00000033106,protein_coding -52903,Rnf125,ENSMUSG00000033107,protein_coding -32136,3830406C13Rik,ENSMUSG00000033111,protein_coding -41792,Slc35d2,ENSMUSG00000033114,protein_coding -41791,Hsd17b3,ENSMUSG00000033122,protein_coding -1241,Atg9a,ENSMUSG00000033124,protein_coding -27975,Ybey,ENSMUSG00000033126,protein_coding -46366,Gga1,ENSMUSG00000033128,protein_coding -11705,Slc22a15,ENSMUSG00000033147,protein_coding -48073,Phldb2,ENSMUSG00000033149,protein_coding -20703,Podxl2,ENSMUSG00000033152,protein_coding -6381,Cst10,ENSMUSG00000033156,protein_coding -48071,Abhd10,ENSMUSG00000033157,protein_coding -1236,Cnppd1,ENSMUSG00000033159,protein_coding -11699,Atp1a1,ENSMUSG00000033161,protein_coding -34170,Dis3,ENSMUSG00000033166,protein_coding -46361,Card10,ENSMUSG00000033170,protein_coding -20697,Mgll,ENSMUSG00000033174,protein_coding -48069,Tmprss7,ENSMUSG00000033177,protein_coding -20695,Kbtbd12,ENSMUSG00000033182,protein_coding -53382,Tmed7,ENSMUSG00000033184,protein_coding -34168,Mzt1,ENSMUSG00000033186,protein_coding -48067,BC016579,ENSMUSG00000033187,protein_coding -14477,Tie1,ENSMUSG00000033191,protein_coding -31116,Lpcat2,ENSMUSG00000033192,protein_coding -38480,Myh2,ENSMUSG00000033196,protein_coding -49353,Tpsg1,ENSMUSG00000033200,protein_coding -54754,Mamdc2,ENSMUSG00000033207,protein_coding -27968,S100b,ENSMUSG00000033208,protein_coding -17687,Ttc28,ENSMUSG00000033209,protein_coding -48065,Slc9c1,ENSMUSG00000033210,protein_coding -5952,AA467197,ENSMUSG00000033213,protein_coding -34302,Slitrk5,ENSMUSG00000033214,protein_coding -20687,Eefsec,ENSMUSG00000033216,protein_coding -15854,Gm9758,ENSMUSG00000033219,protein_coding -46355,Rac2,ENSMUSG00000033220,protein_coding -11684,Ttf2,ENSMUSG00000033222,protein_coding -1222,Wnt6,ENSMUSG00000033227,protein_coding -46811,Scaf11,ENSMUSG00000033228,protein_coding -11683,Trim45,ENSMUSG00000033233,protein_coding -46809,Arid2,ENSMUSG00000033237,protein_coding -31347,Hsf4,ENSMUSG00000033249,protein_coding -14471,Szt2,ENSMUSG00000033253,protein_coding -27956,Gm5134,ENSMUSG00000033255,protein_coding -5944,Shf,ENSMUSG00000033256,protein_coding -1218,Ttll4,ENSMUSG00000033257,protein_coding -5943,Duox1,ENSMUSG00000033268,protein_coding -53132,Slc35a4,ENSMUSG00000033272,protein_coding -1217,Stk36,ENSMUSG00000033276,protein_coding -50667,Ptprm,ENSMUSG00000033278,protein_coding -31079,Rpgrip1l,ENSMUSG00000033282,protein_coding -11665,Wdr3,ENSMUSG00000033285,protein_coding -46348,Kctd17,ENSMUSG00000033287,protein_coding -17672,Noc4l,ENSMUSG00000033294,protein_coding -14468,Ptprf,ENSMUSG00000033295,protein_coding -47696,Lpp,ENSMUSG00000033306,protein_coding -27947,Mif,ENSMUSG00000033307,protein_coding -12081,Dpyd,ENSMUSG00000033308,protein_coding -31346,Fbxl8,ENSMUSG00000033313,protein_coding -17671,Galnt9,ENSMUSG00000033316,protein_coding -27946,Gstt2,ENSMUSG00000033318,protein_coding -53380,Fem1c,ENSMUSG00000033319,protein_coding -53910,Ctdp1,ENSMUSG00000033323,protein_coding -14466,Kdm4a,ENSMUSG00000033326,protein_coding -49833,Tnxb,ENSMUSG00000033327,protein_coding -34791,Dnm2,ENSMUSG00000033335,protein_coding -12069,Plppr5,ENSMUSG00000033342,protein_coding -8291,Magea4,ENSMUSG00000033343,protein_coding -36470,Chst2,ENSMUSG00000033350,protein_coding -38463,Map2k4,ENSMUSG00000033352,protein_coding -47680,Rtp4,ENSMUSG00000033355,protein_coding -46786,Pus7l,ENSMUSG00000033356,protein_coding -8288,Prrg3,ENSMUSG00000033361,protein_coding -1209,Usp37,ENSMUSG00000033364,protein_coding -14462,Ipo13,ENSMUSG00000033365,protein_coding -5934,Trim69,ENSMUSG00000033368,protein_coding -44009,Fntb,ENSMUSG00000033373,protein_coding -12058,Palmd,ENSMUSG00000033377,protein_coding -14460,Atp6v0b,ENSMUSG00000033379,protein_coding -52901,Trappc8,ENSMUSG00000033382,protein_coding -47676,Rtp1,ENSMUSG00000033383,protein_coding -12057,Frrs1,ENSMUSG00000033386,protein_coding -38455,Arhgap44,ENSMUSG00000033389,protein_coding -37020,Clasp2,ENSMUSG00000033392,protein_coding -5931,Spg11,ENSMUSG00000033396,protein_coding -12052,Agl,ENSMUSG00000033400,protein_coding -33909,Nudt15,ENSMUSG00000033405,protein_coding -31554,Syce1l,ENSMUSG00000033409,protein_coding -5928,Ctdspl2,ENSMUSG00000033411,protein_coding -27926,Gucd1,ENSMUSG00000033416,protein_coding -55424,Cacul1,ENSMUSG00000033417,protein_coding -36323,Snap91,ENSMUSG00000033419,protein_coding -20636,Antxr1,ENSMUSG00000033420,protein_coding -14447,Eri3,ENSMUSG00000033423,protein_coding -27927,Upb1,ENSMUSG00000033427,protein_coding -24581,Mcee,ENSMUSG00000033429,protein_coding -31542,Terf2ip,ENSMUSG00000033430,protein_coding -17646,Gtpbp6,ENSMUSG00000033434,protein_coding -9243,Armcx2,ENSMUSG00000033436,protein_coding -12044,Trmt13,ENSMUSG00000033439,protein_coding -27922,Specc1l,ENSMUSG00000033444,protein_coding -33904,Lpar6,ENSMUSG00000033446,protein_coding -48849,Tagap,ENSMUSG00000033450,protein_coding -34941,Adamts15,ENSMUSG00000033453,protein_coding -43996,Zbtb1,ENSMUSG00000033454,protein_coding -24579,Fan1,ENSMUSG00000033458,protein_coding -9237,Armcx1,ENSMUSG00000033460,protein_coding -17630,Crlf2,ENSMUSG00000033467,protein_coding -33899,Cysltr2,ENSMUSG00000033470,protein_coding -50344,Tomm6,ENSMUSG00000033475,protein_coding -55369,Fam160b1,ENSMUSG00000033478,protein_coding -5907,Catsper2,ENSMUSG00000033486,protein_coding -33892,Fndc3a,ENSMUSG00000033487,protein_coding -2596,Cryzl2,ENSMUSG00000033488,protein_coding -36322,Prss35,ENSMUSG00000033491,protein_coding -5906,Strc,ENSMUSG00000033498,protein_coding -40416,Larp4b,ENSMUSG00000033499,protein_coding -47644,Crygs,ENSMUSG00000033501,protein_coding -12032,Cdc14a,ENSMUSG00000033502,protein_coding -20619,Asprv1,ENSMUSG00000033508,protein_coding -24555,Otud7a,ENSMUSG00000033510,protein_coding -40410,Idi2,ENSMUSG00000033520,protein_coding -5904,Ppip5k1,ENSMUSG00000033526,protein_coding -44514,Ttc7b,ENSMUSG00000033530,protein_coding -25974,Acsm1,ENSMUSG00000033533,protein_coding -34475,Casp4,ENSMUSG00000033538,protein_coding -17601,Idua,ENSMUSG00000033540,protein_coding -19580,Arhgef5,ENSMUSG00000033542,protein_coding -35967,Gtf2a2,ENSMUSG00000033543,protein_coding -2582,Angptl1,ENSMUSG00000033544,protein_coding -31528,Znrf1,ENSMUSG00000033545,protein_coding -12016,Dph5,ENSMUSG00000033554,protein_coding -2577,Fam20b,ENSMUSG00000033557,protein_coding -46295,Rbfox2,ENSMUSG00000033565,protein_coding -338,Adgrb3,ENSMUSG00000033569,protein_coding -46293,Apol6,ENSMUSG00000033576,protein_coding -36226,Myo6,ENSMUSG00000033577,protein_coding -9221,Tmem35a,ENSMUSG00000033578,protein_coding -31524,Fa2h,ENSMUSG00000033579,protein_coding -47625,Igf2bp2,ENSMUSG00000033581,protein_coding -24542,Ndn,ENSMUSG00000033585,protein_coding -33870,Reep4,ENSMUSG00000033589,protein_coding -36097,Myo5c,ENSMUSG00000033590,protein_coding -31746,Spata2l,ENSMUSG00000033594,protein_coding -33869,Lgi3,ENSMUSG00000033595,protein_coding -31521,Rfwd3,ENSMUSG00000033596,protein_coding -49292,Caskin1,ENSMUSG00000033597,protein_coding -54951,Pank1,ENSMUSG00000033610,protein_coding -17595,Cplx1,ENSMUSG00000033615,protein_coding -47615,Map3k13,ENSMUSG00000033618,protein_coding -17593,Pcgf3,ENSMUSG00000033623,protein_coding -31517,Pdpr,ENSMUSG00000033624,protein_coding -52980,Pik3c3,ENSMUSG00000033628,protein_coding -35870,Hacd3,ENSMUSG00000033629,protein_coding -52950,AW554918,ENSMUSG00000033632,protein_coding -31516,Clec18a,ENSMUSG00000033633,protein_coding -20581,Nat8f2,ENSMUSG00000033634,protein_coding -33861,Piwil2,ENSMUSG00000033644,protein_coding -47610,Vps8,ENSMUSG00000033653,protein_coding -31511,Ddx19b,ENSMUSG00000033658,protein_coding -46283,Zfp7,ENSMUSG00000033669,protein_coding -2554,Cep350,ENSMUSG00000033671,protein_coding -24256,Gabrb3,ENSMUSG00000033676,protein_coding -2551,Qsox1,ENSMUSG00000033684,protein_coding -25320,Ucp2,ENSMUSG00000033685,protein_coding -36665,1300017J02Rik,ENSMUSG00000033688,protein_coding -46280,Arhgap39,ENSMUSG00000033697,protein_coding -2548,Acbd6,ENSMUSG00000033701,protein_coding -31507,Fuk,ENSMUSG00000033703,protein_coding -5882,Stard9,ENSMUSG00000033705,protein_coding -20562,Smyd5,ENSMUSG00000033706,protein_coding -46278,Lrrc24,ENSMUSG00000033707,protein_coding -33850,Ccar2,ENSMUSG00000033712,protein_coding -44482,Foxn3,ENSMUSG00000033713,protein_coding -40340,Akr1c14,ENSMUSG00000033715,protein_coding -55329,Adra2a,ENSMUSG00000033717,protein_coding -20558,Sfxn5,ENSMUSG00000033720,protein_coding -11966,Vav3,ENSMUSG00000033721,protein_coding -2543,BC034090,ENSMUSG00000033722,protein_coding -20557,Emx1,ENSMUSG00000033726,protein_coding -46277,Lrrc14,ENSMUSG00000033728,protein_coding -33848,Egr3,ENSMUSG00000033730,protein_coding -31502,Sf3b3,ENSMUSG00000033732,protein_coding -20552,Spr,ENSMUSG00000033735,protein_coding -8940,Fndc3c1,ENSMUSG00000033737,protein_coding -49831,Fkbpl,ENSMUSG00000033739,protein_coding -59,St18,ENSMUSG00000033740,protein_coding -30943,Gadd45gip1,ENSMUSG00000033751,protein_coding -11006,Mnd1,ENSMUSG00000033752,protein_coding -54149,Rbm4b,ENSMUSG00000033760,protein_coding -46276,Recql4,ENSMUSG00000033762,protein_coding -31498,Mtss1l,ENSMUSG00000033763,protein_coding -40331,Calm4,ENSMUSG00000033765,protein_coding -11004,Tmem131l,ENSMUSG00000033767,protein_coding -54292,Nrxn2,ENSMUSG00000033768,protein_coding -20541,Exoc6b,ENSMUSG00000033769,protein_coding -15175,Clcnka,ENSMUSG00000033770,protein_coding -17558,Rpap2,ENSMUSG00000033773,protein_coding -51,Npbwr1,ENSMUSG00000033774,protein_coding -8929,Tlr13,ENSMUSG00000033777,protein_coding -40325,Asb13,ENSMUSG00000033781,protein_coding -20533,Dysf,ENSMUSG00000033788,protein_coding -24230,Tubgcp5,ENSMUSG00000033790,protein_coding -8928,Atp7a,ENSMUSG00000033792,protein_coding -43,Atp6v1h,ENSMUSG00000033793,protein_coding -17552,Lpcat2b,ENSMUSG00000033794,protein_coding -40324,Tasor2,ENSMUSG00000033799,protein_coding -17550,Ephx4,ENSMUSG00000033805,protein_coding -5874,Tmem87a,ENSMUSG00000033808,protein_coding -47578,Alg3,ENSMUSG00000033809,protein_coding -35,Tcea1,ENSMUSG00000033813,protein_coding -46273,Ppp1r16a,ENSMUSG00000033819,protein_coding -49352,Tpsb2,ENSMUSG00000033825,protein_coding -49616,Dnah8,ENSMUSG00000033826,protein_coding -10991,Fgb,ENSMUSG00000033831,protein_coding -41682,Tpbpa,ENSMUSG00000033834,protein_coding -46272,Foxh1,ENSMUSG00000033837,protein_coding -31,Mrpl15,ENSMUSG00000033845,protein_coding -22522,Pla2g4c,ENSMUSG00000033847,protein_coding -2380,B3galt2,ENSMUSG00000033849,protein_coding -5059,Olfr1015,ENSMUSG00000033850,protein_coding -5865,Gm28042,ENSMUSG00000033852,protein_coding -44461,Kcnk10,ENSMUSG00000033854,protein_coding -51019,Ston1,ENSMUSG00000033855,protein_coding -40162,Engase,ENSMUSG00000033857,protein_coding -10989,Fgg,ENSMUSG00000033860,protein_coding -31745,Cdk10,ENSMUSG00000033862,protein_coding -54749,Klf9,ENSMUSG00000033863,protein_coding -53575,Ppargc1b,ENSMUSG00000033871,protein_coding -40157,Lgals3bp,ENSMUSG00000033880,protein_coding -10985,Rbm46,ENSMUSG00000033882,protein_coding -10290,D3Ertd254e,ENSMUSG00000033883,protein_coding -32094,Pxk,ENSMUSG00000033885,protein_coding -2356,Cfhr2,ENSMUSG00000033898,protein_coding -10977,Map9,ENSMUSG00000033900,protein_coding -5861,Mapkbp1,ENSMUSG00000033902,protein_coding -25952,Ccp110,ENSMUSG00000033904,protein_coding -8897,Zdhhc15,ENSMUSG00000033906,protein_coding -40154,Usp36,ENSMUSG00000033909,protein_coding -10975,Gucy1a1,ENSMUSG00000033910,protein_coding -22505,Chmp2a,ENSMUSG00000033916,protein_coding -25950,Gde1,ENSMUSG00000033917,protein_coding -47559,Parl,ENSMUSG00000033918,protein_coding -31883,Rbm34,ENSMUSG00000033931,protein_coding -21060,Vhl,ENSMUSG00000033933,protein_coding -30878,Ndufb7,ENSMUSG00000033938,protein_coding -21057,Brk1,ENSMUSG00000033940,protein_coding -5860,Mga,ENSMUSG00000033943,protein_coding -14412,Zswim5,ENSMUSG00000033948,protein_coding -53369,Trim36,ENSMUSG00000033949,protein_coding -2349,Aspm,ENSMUSG00000033952,protein_coding -37529,Ppp3r1,ENSMUSG00000033953,protein_coding -5019,Tnks1bp1,ENSMUSG00000033955,protein_coding -52689,Jcad,ENSMUSG00000033960,protein_coding -22501,Zfp446,ENSMUSG00000033961,protein_coding -21056,Fancd2os,ENSMUSG00000033963,protein_coding -2348,Zbtb41,ENSMUSG00000033964,protein_coding -8880,Slc16a2,ENSMUSG00000033965,protein_coding -50880,Cdkl4,ENSMUSG00000033966,protein_coding -22496,Rnf225,ENSMUSG00000033967,protein_coding -18879,Rfc3,ENSMUSG00000033970,protein_coding -49178,Zfp944,ENSMUSG00000033972,protein_coding -10957,Gria2,ENSMUSG00000033981,protein_coding -39246,Coil,ENSMUSG00000033983,protein_coding -14403,Tesk2,ENSMUSG00000033985,protein_coding -40149,Dnah17,ENSMUSG00000033987,protein_coding -42149,Ttc37,ENSMUSG00000033991,protein_coding -31869,Kcnk1,ENSMUSG00000033998,protein_coding -1697,Neu4,ENSMUSG00000034000,protein_coding -53905,Pqlc1,ENSMUSG00000034006,protein_coding -35655,Scaper,ENSMUSG00000034007,protein_coding -10944,Rxfp1,ENSMUSG00000034009,protein_coding -18861,Pds5b,ENSMUSG00000034021,protein_coding -46266,Cpsf1,ENSMUSG00000034022,protein_coding -21054,Fancd2,ENSMUSG00000034023,protein_coding -28940,Cct2,ENSMUSG00000034024,protein_coding -54038,Cd226,ENSMUSG00000034028,protein_coding -39232,Ccdc182,ENSMUSG00000034031,protein_coding -5856,Rpap1,ENSMUSG00000034032,protein_coding -14396,Ccdc17,ENSMUSG00000034035,protein_coding -20802,Fgd5,ENSMUSG00000034037,protein_coding -49348,Prss29,ENSMUSG00000034039,protein_coding -18259,Galnt17,ENSMUSG00000034040,protein_coding -30940,Lyl1,ENSMUSG00000034041,protein_coding -14394,Gpbp1l1,ENSMUSG00000034042,protein_coding -8808,Phka1,ENSMUSG00000034055,protein_coding -28931,Myrfl,ENSMUSG00000034057,protein_coding -5004,Ypel4,ENSMUSG00000034059,protein_coding -20800,4930590J08Rik,ENSMUSG00000034063,protein_coding -47980,Poglut1,ENSMUSG00000034064,protein_coding -1680,Farp2,ENSMUSG00000034066,protein_coding -22470,Zfp551,ENSMUSG00000034071,protein_coding -5000,Zdhhc5,ENSMUSG00000034075,protein_coding -20799,Ccdc174,ENSMUSG00000034083,protein_coding -22357,Nlrp4b,ENSMUSG00000034087,protein_coding -1676,Hdlbp,ENSMUSG00000034088,protein_coding -10909,Fstl5,ENSMUSG00000034098,protein_coding -4994,Ctnnd1,ENSMUSG00000034101,protein_coding -31625,Meak7,ENSMUSG00000034105,protein_coding -1675,Ano7,ENSMUSG00000034107,protein_coding -54154,Ccs,ENSMUSG00000034108,protein_coding -10905,Golim4,ENSMUSG00000034109,protein_coding -18245,Kctd7,ENSMUSG00000034110,protein_coding -44264,Tmed8,ENSMUSG00000034111,protein_coding -31623,Atp2c2,ENSMUSG00000034112,protein_coding -37118,Scn11a,ENSMUSG00000034115,protein_coding -50567,Vav1,ENSMUSG00000034116,protein_coding -54479,Ptgdr2,ENSMUSG00000034117,protein_coding -18243,Tpst1,ENSMUSG00000034118,protein_coding -40109,Srsf2,ENSMUSG00000034120,protein_coding -39216,Mks1,ENSMUSG00000034121,protein_coding -44261,Pomt2,ENSMUSG00000034126,protein_coding -28913,Tspan8,ENSMUSG00000034127,protein_coding -35472,Sik3,ENSMUSG00000034135,protein_coding -10895,Serpini2,ENSMUSG00000034139,protein_coding -44256,Tmem63c,ENSMUSG00000034145,protein_coding -10894,Zbbx,ENSMUSG00000034151,protein_coding -42098,Exoc3,ENSMUSG00000034152,protein_coding -5841,Ino80,ENSMUSG00000034154,protein_coding -39212,Tspoap1,ENSMUSG00000034156,protein_coding -44254,Cipc,ENSMUSG00000034157,protein_coding -47960,Lrrc58,ENSMUSG00000034158,protein_coding -1663,Mab21l4,ENSMUSG00000034159,protein_coding -8782,Ogt,ENSMUSG00000034160,protein_coding -46255,Scx,ENSMUSG00000034161,protein_coding -28907,Zfc3h1,ENSMUSG00000034163,protein_coding -37383,Emid1,ENSMUSG00000034164,protein_coding -50338,Ccnd3,ENSMUSG00000034165,protein_coding -44252,Irf2bpl,ENSMUSG00000034168,protein_coding -14375,Faah,ENSMUSG00000034171,protein_coding -18235,Zbed5,ENSMUSG00000034173,protein_coding -37382,Rhbdd3,ENSMUSG00000034175,protein_coding -39205,Rnf43,ENSMUSG00000034177,protein_coding -14373,6430628N08Rik,ENSMUSG00000034185,protein_coding -39798,Nsf,ENSMUSG00000034187,protein_coding -31615,Hsdl1,ENSMUSG00000034189,protein_coding -33841,Chmp7,ENSMUSG00000034190,protein_coding -20788,Lsm3,ENSMUSG00000034192,protein_coding -33840,R3hcc1,ENSMUSG00000034194,protein_coding -37379,Gas2l1,ENSMUSG00000034201,protein_coding -20785,Chchd4,ENSMUSG00000034203,protein_coding -33837,Loxl2,ENSMUSG00000034205,protein_coding -47946,Polq,ENSMUSG00000034206,protein_coding -37380,Rasl10a,ENSMUSG00000034209,protein_coding -14365,Efcab14,ENSMUSG00000034210,protein_coding -18224,Mrps17,ENSMUSG00000034211,protein_coding -1651,Ankmy1,ENSMUSG00000034212,protein_coding -5837,Vps18,ENSMUSG00000034216,protein_coding -35604,Atm,ENSMUSG00000034218,protein_coding -18223,Sept14,ENSMUSG00000034219,protein_coding -1648,Gpc1,ENSMUSG00000034220,protein_coding -31613,Slc38a8,ENSMUSG00000034224,protein_coding -5835,Rhov,ENSMUSG00000034226,protein_coding -40080,Foxj1,ENSMUSG00000034227,protein_coding -32294,Usp54,ENSMUSG00000034235,protein_coding -39789,Gm884,ENSMUSG00000034239,protein_coding -47939,Golgb1,ENSMUSG00000034243,protein_coding -20774,Hdac11,ENSMUSG00000034245,protein_coding -39786,Plekhm1,ENSMUSG00000034247,protein_coding -33817,Slc25a37,ENSMUSG00000034248,protein_coding -36223,Senp6,ENSMUSG00000034252,protein_coding -49825,Agpat1,ENSMUSG00000034254,protein_coding -39782,Arhgap27,ENSMUSG00000034255,protein_coding -44230,Flvcr2,ENSMUSG00000034258,protein_coding -46243,Exosc4,ENSMUSG00000034259,protein_coding -35868,Ints14,ENSMUSG00000034263,protein_coding -48791,Zdhhc14,ENSMUSG00000034265,protein_coding -44226,Batf,ENSMUSG00000034266,protein_coding -21028,Setd5,ENSMUSG00000034269,protein_coding -44224,Jdp2,ENSMUSG00000034271,protein_coding -37373,Thoc5,ENSMUSG00000034274,protein_coding -34917,Igsf9b,ENSMUSG00000034275,protein_coding -5829,Dnajc17,ENSMUSG00000034278,protein_coding -40074,Evpl,ENSMUSG00000034282,protein_coding -37371,Nipsnap1,ENSMUSG00000034285,protein_coding -44215,Nek9,ENSMUSG00000034290,protein_coding -1616,Traf3ip1,ENSMUSG00000034292,protein_coding -52946,Fhod3,ENSMUSG00000034295,protein_coding -39160,Med13,ENSMUSG00000034297,protein_coding -53066,Fam53c,ENSMUSG00000034300,protein_coding -35084,Ccdc15,ENSMUSG00000034303,protein_coding -31593,Sdr42e1,ENSMUSG00000034308,protein_coding -18194,Tmem132d,ENSMUSG00000034310,protein_coding -8752,Kif4,ENSMUSG00000034311,protein_coding -20765,Iqsec1,ENSMUSG00000034312,protein_coding -10840,Trim59,ENSMUSG00000034317,protein_coding -53572,Slc26a2,ENSMUSG00000034320,protein_coding -55082,Exosc1,ENSMUSG00000034321,protein_coding -18187,Tmem132c,ENSMUSG00000034324,protein_coding -33794,Kctd9,ENSMUSG00000034327,protein_coding -39156,Brip1,ENSMUSG00000034329,protein_coding -31591,Plcg2,ENSMUSG00000034330,protein_coding -46680,Zbed4,ENSMUSG00000034333,protein_coding -42316,Fam151b,ENSMUSG00000034334,protein_coding -55234,Ina,ENSMUSG00000034336,protein_coding -40064,Wbp2,ENSMUSG00000034341,protein_coding -35363,Cbl,ENSMUSG00000034342,protein_coding -1601,Ube2f,ENSMUSG00000034343,protein_coding -48800,Gtf2h5,ENSMUSG00000034345,protein_coding -10837,Smc4,ENSMUSG00000034349,protein_coding -1600,Ramp1,ENSMUSG00000034353,protein_coding -37362,Mtmr3,ENSMUSG00000034354,protein_coding -14315,Skint2,ENSMUSG00000034359,protein_coding -31168,Cpne2,ENSMUSG00000034361,protein_coding -47892,Csta1,ENSMUSG00000034362,protein_coding -54463,Tkfc,ENSMUSG00000034371,protein_coding -48801,Tulp4,ENSMUSG00000034377,protein_coding -47889,Wdr5b,ENSMUSG00000034379,protein_coding -50336,AI661453,ENSMUSG00000034382,protein_coding -17524,Barhl2,ENSMUSG00000034384,protein_coding -21008,Ssu2,ENSMUSG00000034387,protein_coding -31588,Cmip,ENSMUSG00000034390,protein_coding -54005,Fbxo15,ENSMUSG00000034391,protein_coding -37357,Lif,ENSMUSG00000034394,protein_coding -14300,Spata6,ENSMUSG00000034401,protein_coding -43976,Kcnh5,ENSMUSG00000034402,protein_coding -8734,Pja1,ENSMUSG00000034403,protein_coding -37353,Tbc1d10a,ENSMUSG00000034412,protein_coding -49427,Neurl1b,ENSMUSG00000034413,protein_coding -31583,Pkd1l2,ENSMUSG00000034416,protein_coding -47881,Parp14,ENSMUSG00000034422,protein_coding -31582,Gcsh,ENSMUSG00000034424,protein_coding -40051,Myo15b,ENSMUSG00000034427,protein_coding -46216,Zfp707,ENSMUSG00000034429,protein_coding -20754,Zxdc,ENSMUSG00000034430,protein_coding -1588,Cops8,ENSMUSG00000034432,protein_coding -43955,Tmem30b,ENSMUSG00000034435,protein_coding -17494,Gbp8,ENSMUSG00000034438,protein_coding -43948,Trmt5,ENSMUSG00000034442,protein_coding -54462,Cyb561a3,ENSMUSG00000034445,protein_coding -39126,Dhrs11,ENSMUSG00000034449,protein_coding -33758,Gulo,ENSMUSG00000034450,protein_coding -35867,Slc24a1,ENSMUSG00000034452,protein_coding -28357,Polr3b,ENSMUSG00000034453,protein_coding -20753,Uroc1,ENSMUSG00000034456,protein_coding -8716,Eda2r,ENSMUSG00000034457,protein_coding -54944,Ifit1,ENSMUSG00000034459,protein_coding -43945,Six4,ENSMUSG00000034460,protein_coding -17484,Pkd2,ENSMUSG00000034462,protein_coding -33755,Scara3,ENSMUSG00000034463,protein_coding -31575,Dynlrb2,ENSMUSG00000034467,protein_coding -40048,Caskin2,ENSMUSG00000034471,protein_coding -30751,Rasd2,ENSMUSG00000034472,protein_coding -47867,Sec22a,ENSMUSG00000034473,protein_coding -9185,Diaph2,ENSMUSG00000034480,protein_coding -49880,Ly6g5c,ENSMUSG00000034482,protein_coding -53454,Snx2,ENSMUSG00000034484,protein_coding -35783,Uaca,ENSMUSG00000034485,protein_coding -1576,Gbx2,ENSMUSG00000034486,protein_coding -35601,Kdelc2,ENSMUSG00000034487,protein_coding -42273,Edil3,ENSMUSG00000034488,protein_coding -37338,4930556J24Rik,ENSMUSG00000034493,protein_coding -43933,Pcnx4,ENSMUSG00000034501,protein_coding -50281,Mad2l1bp,ENSMUSG00000034509,protein_coding -30742,Hmgxb4,ENSMUSG00000034518,protein_coding -39758,Gjc1,ENSMUSG00000034520,protein_coding -33747,Zfp395,ENSMUSG00000034522,protein_coding -42044,Ice1,ENSMUSG00000034525,protein_coding -17466,Hsd17b13,ENSMUSG00000034528,protein_coding -33744,Fbxo16,ENSMUSG00000034532,protein_coding -37117,Scn10a,ENSMUSG00000034533,protein_coding -22247,Zfp418,ENSMUSG00000034538,protein_coding -37328,Morc2a,ENSMUSG00000034543,protein_coding -10800,Rsrc1,ENSMUSG00000034544,protein_coding -8998,Hdx,ENSMUSG00000034551,protein_coding -4979,Zswim2,ENSMUSG00000034552,protein_coding -9003,Tex16,ENSMUSG00000034555,protein_coding -14262,Zfyve9,ENSMUSG00000034557,protein_coding -28334,Washc4,ENSMUSG00000034560,protein_coding -36061,Ccpg1,ENSMUSG00000034563,protein_coding -40023,Atp5h,ENSMUSG00000034566,protein_coding -37315,Inpp5j,ENSMUSG00000034570,protein_coding -17454,Ptpn13,ENSMUSG00000034573,protein_coding -43921,Daam1,ENSMUSG00000034574,protein_coding -42018,Tent4a,ENSMUSG00000034575,protein_coding -37314,Pla2g3,ENSMUSG00000034579,protein_coding -22218,Olfr1347,ENSMUSG00000034583,protein_coding -35600,Exph5,ENSMUSG00000034584,protein_coding -40019,Hid1,ENSMUSG00000034586,protein_coding -28329,Slc41a2,ENSMUSG00000034591,protein_coding -36094,Myo5a,ENSMUSG00000034593,protein_coding -49948,Ppp1r18,ENSMUSG00000034595,protein_coding -43908,2700049A03Rik,ENSMUSG00000034601,protein_coding -29092,Mon2,ENSMUSG00000034602,protein_coding -9014,Pof1b,ENSMUSG00000034607,protein_coding -14249,Tut4,ENSMUSG00000034610,protein_coding -28325,Chst11,ENSMUSG00000034612,protein_coding -29085,Ppm1h,ENSMUSG00000034613,protein_coding -37307,Pik3ip1,ENSMUSG00000034614,protein_coding -54131,Ssh3,ENSMUSG00000034616,protein_coding -42004,Mtrr,ENSMUSG00000034617,protein_coding -29075,Rxylt1,ENSMUSG00000034620,protein_coding -39743,Gpatch8,ENSMUSG00000034621,protein_coding -33719,Prss55,ENSMUSG00000034623,protein_coding -46154,Ly6d,ENSMUSG00000034634,protein_coding -14242,Zyg11b,ENSMUSG00000034636,protein_coding -20972,Setmar,ENSMUSG00000034639,protein_coding -10771,Tiparp,ENSMUSG00000034640,protein_coding -40000,Cd300ld,ENSMUSG00000034641,protein_coding -14241,Zyg11a,ENSMUSG00000034645,protein_coding -50647,Ankrd12,ENSMUSG00000034647,protein_coding -20968,Lrrn1,ENSMUSG00000034648,protein_coding -39995,Cd300a,ENSMUSG00000034652,protein_coding -53354,Ythdc2,ENSMUSG00000034653,protein_coding -30931,Cacna1a,ENSMUSG00000034656,protein_coding -54476,Tmem109,ENSMUSG00000034659,protein_coding -22199,Galp,ENSMUSG00000034660,protein_coding -17345,Bmp2k,ENSMUSG00000034663,protein_coding -39741,Itga2b,ENSMUSG00000034664,protein_coding -29065,Xpot,ENSMUSG00000034667,protein_coding -49821,Pbx2,ENSMUSG00000034673,protein_coding -28313,Tdg,ENSMUSG00000034674,protein_coding -41543,Dbn1,ENSMUSG00000034675,protein_coding -39993,Gpr142,ENSMUSG00000034677,protein_coding -49284,Rnps1,ENSMUSG00000034681,protein_coding -4935,Ppp1r1c,ENSMUSG00000034683,protein_coding -36807,Sema3f,ENSMUSG00000034684,protein_coding -39740,Fam171a2,ENSMUSG00000034685,protein_coding -41542,Prr7,ENSMUSG00000034686,protein_coding -17329,Fras1,ENSMUSG00000034687,protein_coding -34140,4921530L21Rik,ENSMUSG00000034689,protein_coding -22192,Nlrp4c,ENSMUSG00000034690,protein_coding -4932,Neurod1,ENSMUSG00000034701,protein_coding -39990,Dnaic2,ENSMUSG00000034706,protein_coding -29056,Gns,ENSMUSG00000034707,protein_coding -39739,Grn,ENSMUSG00000034708,protein_coding -51017,Ppp1r21,ENSMUSG00000034709,protein_coding -39989,Ttyh2,ENSMUSG00000034714,protein_coding -6205,Tmx4,ENSMUSG00000034723,protein_coding -17326,Cnot6l,ENSMUSG00000034724,protein_coding -50330,Mrps10,ENSMUSG00000034729,protein_coding -46141,Adgrb1,ENSMUSG00000034730,protein_coding -33995,Dgkh,ENSMUSG00000034731,protein_coding -9082,Pabpc5,ENSMUSG00000034732,protein_coding -4729,Nostrin,ENSMUSG00000034738,protein_coding -35356,Mfrp,ENSMUSG00000034739,protein_coding -20523,Nagk,ENSMUSG00000034744,protein_coding -28271,Sirt6,ENSMUSG00000034748,protein_coding -42542,Mast4,ENSMUSG00000034751,protein_coding -9087,Pcdh11x,ENSMUSG00000034755,protein_coding -39731,Tmub2,ENSMUSG00000034757,protein_coding -28268,Tle6,ENSMUSG00000034758,protein_coding -43871,Map4k5,ENSMUSG00000034761,protein_coding -14219,Glis1,ENSMUSG00000034762,protein_coding -55316,Dusp5,ENSMUSG00000034765,protein_coding -39730,Asb16,ENSMUSG00000034768,protein_coding -28267,Tle2,ENSMUSG00000034771,protein_coding -39729,BC030867,ENSMUSG00000034773,protein_coding -52889,Dsg1c,ENSMUSG00000034774,protein_coding -20516,Vax2,ENSMUSG00000034777,protein_coding -4703,B3galt1,ENSMUSG00000034780,protein_coding -28264,Gna11,ENSMUSG00000034781,protein_coding -20511,Cd207,ENSMUSG00000034783,protein_coding -14213,Dio1,ENSMUSG00000034785,protein_coding -49820,Gpsm3,ENSMUSG00000034786,protein_coding -41532,Rab24,ENSMUSG00000034789,protein_coding -28260,Gna15,ENSMUSG00000034792,protein_coding -39726,G6pc3,ENSMUSG00000034793,protein_coding -33971,Ccdc122,ENSMUSG00000034795,protein_coding -31737,Cpne7,ENSMUSG00000034796,protein_coding -30667,Unc13a,ENSMUSG00000034799,protein_coding -6022,Zfp661,ENSMUSG00000034800,protein_coding -43864,Sos2,ENSMUSG00000034801,protein_coding -30666,Colgalt1,ENSMUSG00000034807,protein_coding -4693,Scn7a,ENSMUSG00000034810,protein_coding -29012,Grip1,ENSMUSG00000034813,protein_coding -28256,Celf5,ENSMUSG00000034818,protein_coding -54458,Cpsf7,ENSMUSG00000034820,protein_coding -25810,Nrip3,ENSMUSG00000034825,protein_coding -17248,Nup54,ENSMUSG00000034826,protein_coding -30662,Nxnl1,ENSMUSG00000034829,protein_coding -20499,Tet3,ENSMUSG00000034832,protein_coding -29354,Tespa1,ENSMUSG00000034833,protein_coding -36805,Gnat1,ENSMUSG00000034837,protein_coding -35781,Larp6,ENSMUSG00000034839,protein_coding -17244,Art3,ENSMUSG00000034842,protein_coding -30656,Plvap,ENSMUSG00000034845,protein_coding -4684,Ttc21b,ENSMUSG00000034848,protein_coding -6010,Tmem127,ENSMUSG00000034850,protein_coding -14198,Acot11,ENSMUSG00000034853,protein_coding -28245,Mfsd12,ENSMUSG00000034854,protein_coding -17245,Cxcl10,ENSMUSG00000034855,protein_coding -36091,Fam214a,ENSMUSG00000034858,protein_coding -30654,Ano8,ENSMUSG00000034863,protein_coding -23613,Ankrd27,ENSMUSG00000034867,protein_coding -50722,Myl12b,ENSMUSG00000034868,protein_coding -14197,Fam151a,ENSMUSG00000034871,protein_coding -28243,Gipc3,ENSMUSG00000034872,protein_coding -23610,Nudt19,ENSMUSG00000034875,protein_coding -30652,Mrpl34,ENSMUSG00000034880,protein_coding -28242,Tbxa2r,ENSMUSG00000034881,protein_coding -43842,Lrr1,ENSMUSG00000034883,protein_coding -28241,Cactin,ENSMUSG00000034889,protein_coding -41514,Sncb,ENSMUSG00000034891,protein_coding -43838,Rps29,ENSMUSG00000034892,protein_coding -33945,Cog3,ENSMUSG00000034893,protein_coding -36217,Filip1,ENSMUSG00000034898,protein_coding -28239,Pip5k1c,ENSMUSG00000034902,protein_coding -4668,Cobll1,ENSMUSG00000034903,protein_coding -6005,Ncaph,ENSMUSG00000034906,protein_coding -35468,Sidt2,ENSMUSG00000034908,protein_coding -36055,Pygo1,ENSMUSG00000034910,protein_coding -30648,Ushbp1,ENSMUSG00000034911,protein_coding -43804,Mdga2,ENSMUSG00000034912,protein_coding -33942,Spert,ENSMUSG00000034913,protein_coding -28238,Tjp3,ENSMUSG00000034917,protein_coding -41512,Cdhr2,ENSMUSG00000034918,protein_coding -14193,Ttc22,ENSMUSG00000034919,protein_coding -49878,Ly6g6f,ENSMUSG00000034923,protein_coding -14190,Dhcr24,ENSMUSG00000034926,protein_coding -41511,Rnf44,ENSMUSG00000034928,protein_coding -20489,Rtkn,ENSMUSG00000034930,protein_coding -39704,Dhx8,ENSMUSG00000034931,protein_coding -28235,Mrpl54,ENSMUSG00000034932,protein_coding -39703,Arl4d,ENSMUSG00000034936,protein_coding -39114,Synrg,ENSMUSG00000034940,protein_coding -39694,Tmem106a,ENSMUSG00000034947,protein_coding -28233,Zfr2,ENSMUSG00000034949,protein_coding -26041,Cog7,ENSMUSG00000034951,protein_coding -23595,Cebpa,ENSMUSG00000034957,protein_coding -28232,Atcay,ENSMUSG00000034958,protein_coding -33931,Rubcnl,ENSMUSG00000034959,protein_coding -20479,Lbx2,ENSMUSG00000034968,protein_coding -36312,Dop1a,ENSMUSG00000034973,protein_coding -28229,Dapk3,ENSMUSG00000034974,protein_coding -17214,Parm1,ENSMUSG00000034981,protein_coding -41493,Hrh2,ENSMUSG00000034987,protein_coding -26028,Otoa,ENSMUSG00000034990,protein_coding -39689,Vat1,ENSMUSG00000034993,protein_coding -28226,Eef2,ENSMUSG00000034994,protein_coding -33925,Htr2a,ENSMUSG00000034997,protein_coding -51016,Foxn2,ENSMUSG00000034998,protein_coding -4649,Dpp4,ENSMUSG00000035000,protein_coding -26027,Igsf6,ENSMUSG00000035004,protein_coding -39685,Rundc1,ENSMUSG00000035007,protein_coding -28224,Zbtb7a,ENSMUSG00000035011,protein_coding -17207,Epgn,ENSMUSG00000035020,protein_coding -43635,Baz1a,ENSMUSG00000035021,protein_coding -34912,Ncapd3,ENSMUSG00000035024,protein_coding -28223,Map2k2,ENSMUSG00000035027,protein_coding -14165,C8a,ENSMUSG00000035031,protein_coding -36708,Nek11,ENSMUSG00000035032,protein_coding -4646,Tbr1,ENSMUSG00000035033,protein_coding -28220,Creb3l3,ENSMUSG00000035041,protein_coding -39092,Ccl5,ENSMUSG00000035042,protein_coding -8694,Zc3h12b,ENSMUSG00000035045,protein_coding -34782,Kri1,ENSMUSG00000035047,protein_coding -36645,Anapc13,ENSMUSG00000035048,protein_coding -55079,Rrp12,ENSMUSG00000035049,protein_coding -50874,Dhx57,ENSMUSG00000035051,protein_coding -30870,Mgat4d,ENSMUSG00000035057,protein_coding -8693,Zc4h2,ENSMUSG00000035062,protein_coding -26013,Eef2k,ENSMUSG00000035064,protein_coding -33713,Xkr6,ENSMUSG00000035067,protein_coding -14153,Oma1,ENSMUSG00000035069,protein_coding -33711,Mtmr9,ENSMUSG00000035078,protein_coding -39087,1700020L24Rik,ENSMUSG00000035085,protein_coding -39670,Becn1,ENSMUSG00000035086,protein_coding -5979,Secisbp2l,ENSMUSG00000035093,protein_coding -33707,Fam167a,ENSMUSG00000035095,protein_coding -20451,Eva1a,ENSMUSG00000035104,protein_coding -43605,Egln3,ENSMUSG00000035105,protein_coding -48205,Dcbld2,ENSMUSG00000035107,protein_coding -5977,Shc4,ENSMUSG00000035109,protein_coding -39666,Wnk4,ENSMUSG00000035112,protein_coding -33701,Neil2,ENSMUSG00000035121,protein_coding -20447,Gcfc2,ENSMUSG00000035125,protein_coding -14146,Wdr78,ENSMUSG00000035126,protein_coding -2406,Brinp3,ENSMUSG00000035131,protein_coding -43593,Arhgap5,ENSMUSG00000035133,protein_coding -41458,Secisbp2,ENSMUSG00000035139,protein_coding -43589,Nubpl,ENSMUSG00000035142,protein_coding -43587,Gpr33,ENSMUSG00000035148,protein_coding -8656,Eif2s3x,ENSMUSG00000035150,protein_coding -30869,Elmod2,ENSMUSG00000035151,protein_coding -39083,Ap2b1,ENSMUSG00000035152,protein_coding -20871,Mitf,ENSMUSG00000035158,protein_coding -33681,Ints6,ENSMUSG00000035161,protein_coding -35586,Zc3h12c,ENSMUSG00000035164,protein_coding -25309,Kcne3,ENSMUSG00000035165,protein_coding -4616,Tanc1,ENSMUSG00000035168,protein_coding -54732,1110059E24Rik,ENSMUSG00000035171,protein_coding -39660,Plekhh3,ENSMUSG00000035172,protein_coding -55361,Ccdc186,ENSMUSG00000035173,protein_coding -22169,Nlrp2,ENSMUSG00000035177,protein_coding -54456,Ppp1r32,ENSMUSG00000035179,protein_coding -43583,Heatr5a,ENSMUSG00000035181,protein_coding -5964,Slc24a5,ENSMUSG00000035183,protein_coding -33677,Fam124a,ENSMUSG00000035184,protein_coding -50062,Ubd,ENSMUSG00000035186,protein_coding -17430,Nkx6-1,ENSMUSG00000035187,protein_coding -28477,Ano4,ENSMUSG00000035189,protein_coding -22160,Rfpl4,ENSMUSG00000035191,protein_coding -39657,Tubg1,ENSMUSG00000035198,protein_coding -20862,Arl6ip5,ENSMUSG00000035199,protein_coding -35641,Chrnb4,ENSMUSG00000035200,protein_coding -14129,B020004J07Rik,ENSMUSG00000035201,protein_coding -37262,Lars2,ENSMUSG00000035202,protein_coding -22159,Epn1,ENSMUSG00000035203,protein_coding -28208,Sppl2b,ENSMUSG00000035206,protein_coding -39060,Slfn8,ENSMUSG00000035208,protein_coding -25299,Xrra1,ENSMUSG00000035211,protein_coding -14127,Leprot,ENSMUSG00000035212,protein_coding -28207,Lsm7,ENSMUSG00000035215,protein_coding -6788,Rims4,ENSMUSG00000035226,protein_coding -25298,Spcs2,ENSMUSG00000035227,protein_coding -22151,Ccdc106,ENSMUSG00000035228,protein_coding -8647,Pdk3,ENSMUSG00000035232,protein_coding -17419,Abraxas1,ENSMUSG00000035234,protein_coding -33650,Trim13,ENSMUSG00000035235,protein_coding -4440,Scai,ENSMUSG00000035236,protein_coding -31395,Lcat,ENSMUSG00000035237,protein_coding -6787,Kcnk15,ENSMUSG00000035238,protein_coding -25297,Neu3,ENSMUSG00000035239,protein_coding -28204,Oaz1,ENSMUSG00000035242,protein_coding -20857,Eogt,ENSMUSG00000035245,protein_coding -8646,Pcyt1b,ENSMUSG00000035246,protein_coding -43581,Hectd1,ENSMUSG00000035247,protein_coding -41650,Tut7,ENSMUSG00000035248,protein_coding -48173,Abi3bp,ENSMUSG00000035258,protein_coding -28202,Amh,ENSMUSG00000035262,protein_coding -17416,Helq,ENSMUSG00000035266,protein_coding -6783,Pkig,ENSMUSG00000035268,protein_coding -48171,Impg2,ENSMUSG00000035270,protein_coding -17415,Hpse,ENSMUSG00000035273,protein_coding -36307,Tpbg,ENSMUSG00000035274,protein_coding -14110,Raver2,ENSMUSG00000035275,protein_coding -8641,Arx,ENSMUSG00000035277,protein_coding -28200,Plekhj1,ENSMUSG00000035278,protein_coding -22143,Ssc5d,ENSMUSG00000035279,protein_coding -55358,Adrb1,ENSMUSG00000035283,protein_coding -35943,Vps13c,ENSMUSG00000035284,protein_coding -22142,Nat14,ENSMUSG00000035285,protein_coding -43574,G2e3,ENSMUSG00000035293,protein_coding -4435,Wdr38,ENSMUSG00000035295,protein_coding -33636,Sgcg,ENSMUSG00000035296,protein_coding -17410,Cops4,ENSMUSG00000035297,protein_coding -25278,Klhl35,ENSMUSG00000035298,protein_coding -9863,Mid1,ENSMUSG00000035299,protein_coding -14100,Ror1,ENSMUSG00000035305,protein_coding -17406,Lin54,ENSMUSG00000035310,protein_coding -28458,Gnptab,ENSMUSG00000035311,protein_coding -25277,Gdpd5,ENSMUSG00000035314,protein_coding -17399,Sec31a,ENSMUSG00000035325,protein_coding -43718,Fbxo33,ENSMUSG00000035329,protein_coding -35858,Uchl4,ENSMUSG00000035337,protein_coding -55175,Lzts2,ENSMUSG00000035342,protein_coding -28452,Nup37,ENSMUSG00000035351,protein_coding -39034,Ccl12,ENSMUSG00000035352,protein_coding -25262,Uvrag,ENSMUSG00000035354,protein_coding -39641,Kcnh4,ENSMUSG00000035355,protein_coding -48155,Nfkbiz,ENSMUSG00000035356,protein_coding -20919,Pdzrn3,ENSMUSG00000035357,protein_coding -28449,Parpbp,ENSMUSG00000035365,protein_coding -41611,Rmi1,ENSMUSG00000035367,protein_coding -28189,Gm49322,ENSMUSG00000035370,protein_coding -7406,Ssx9,ENSMUSG00000035371,protein_coding -54078,1810055G02Rik,ENSMUSG00000035372,protein_coding -39032,Ccl7,ENSMUSG00000035373,protein_coding -47863,Hacd2,ENSMUSG00000035376,protein_coding -20905,Shq1,ENSMUSG00000035378,protein_coding -35465,Pcsk7,ENSMUSG00000035382,protein_coding -28448,Pmch,ENSMUSG00000035383,protein_coding -39031,Ccl2,ENSMUSG00000035385,protein_coding -8602,4930415L06Rik,ENSMUSG00000035387,protein_coding -22124,Brsk1,ENSMUSG00000035390,protein_coding -4400,Dennd1a,ENSMUSG00000035392,protein_coding -53802,Cfap53,ENSMUSG00000035394,protein_coding -8595,Pet2,ENSMUSG00000035395,protein_coding -28183,Klf16,ENSMUSG00000035397,protein_coding -6771,Oser1,ENSMUSG00000035399,protein_coding -25254,Emsy,ENSMUSG00000035401,protein_coding -4399,Crb2,ENSMUSG00000035403,protein_coding -14069,Kank4,ENSMUSG00000035407,protein_coding -39017,Tmem98,ENSMUSG00000035413,protein_coding -53432,Fam170a,ENSMUSG00000035420,protein_coding -8560,Mageb4,ENSMUSG00000035427,protein_coding -22114,Ptprh,ENSMUSG00000035429,protein_coding -43702,Sstr1,ENSMUSG00000035431,protein_coding -49279,Abca17,ENSMUSG00000035435,protein_coding -4392,Rabgap1,ENSMUSG00000035437,protein_coding -30643,Haus8,ENSMUSG00000035439,protein_coding -39015,Myo1d,ENSMUSG00000035441,protein_coding -34909,Thyn1,ENSMUSG00000035443,protein_coding -37282,Ccr3,ENSMUSG00000035448,protein_coding -43695,Foxa1,ENSMUSG00000035451,protein_coding -8558,Samt3,ENSMUSG00000035454,protein_coding -37487,Fignl1,ENSMUSG00000035455,protein_coding -17361,Prdm8,ENSMUSG00000035456,protein_coding -22110,Tnni3,ENSMUSG00000035458,protein_coding -28429,Stab2,ENSMUSG00000035459,protein_coding -33602,Rcbtb1,ENSMUSG00000035469,protein_coding -43688,Slc25a21,ENSMUSG00000035472,protein_coding -50869,Galm,ENSMUSG00000035473,protein_coding -8547,Tab3,ENSMUSG00000035476,protein_coding -28175,Mbd3,ENSMUSG00000035478,protein_coding -28169,Plk5,ENSMUSG00000035486,protein_coding -8545,Fthl17a,ENSMUSG00000035491,protein_coding -41581,Tgfbi,ENSMUSG00000035493,protein_coding -13391,Tstd2,ENSMUSG00000035495,protein_coding -37258,Cdcp1,ENSMUSG00000035498,protein_coding -28167,Reep6,ENSMUSG00000035504,protein_coding -17181,Cox18,ENSMUSG00000035505,protein_coding -47847,Slc12a8,ENSMUSG00000035506,protein_coding -41578,Fbxl21,ENSMUSG00000035509,protein_coding -4111,Ntng2,ENSMUSG00000035513,protein_coding -13389,Tdrd7,ENSMUSG00000035517,protein_coding -49340,Gnptg,ENSMUSG00000035521,protein_coding -8539,Tsga8,ENSMUSG00000035522,protein_coding -23012,Vmn1r172,ENSMUSG00000035523,protein_coding -17176,Npffr2,ENSMUSG00000035528,protein_coding -28386,Prdm4,ENSMUSG00000035529,protein_coding -39615,Eif1,ENSMUSG00000035530,protein_coding -13388,Ccdc180,ENSMUSG00000035539,protein_coding -17173,Gc,ENSMUSG00000035540,protein_coding -22088,Leng8,ENSMUSG00000035545,protein_coding -25240,Capn5,ENSMUSG00000035547,protein_coding -13386,Igfbpl1,ENSMUSG00000035551,protein_coding -39611,Krt17,ENSMUSG00000035557,protein_coding -30620,Mpv17l2,ENSMUSG00000035559,protein_coding -43793,Wdr20rt,ENSMUSG00000035560,protein_coding -13385,Aldh1b1,ENSMUSG00000035561,protein_coding -34058,Pcdh17,ENSMUSG00000035566,protein_coding -31730,Ankrd11,ENSMUSG00000035569,protein_coding -13373,Dcaf10,ENSMUSG00000035572,protein_coding -39000,Utp6,ENSMUSG00000035575,protein_coding -6759,L3mbtl1,ENSMUSG00000035576,protein_coding -47837,Iqcg,ENSMUSG00000035578,protein_coding -50434,Kcnh8,ENSMUSG00000035580,protein_coding -25238,Gdpd4,ENSMUSG00000035582,protein_coding -22066,Tsen34,ENSMUSG00000035585,protein_coding -39594,Krt33a,ENSMUSG00000035592,protein_coding -35639,Chrna5,ENSMUSG00000035594,protein_coding -28155,1600002K03Rik,ENSMUSG00000035595,protein_coding -22065,Mboat7,ENSMUSG00000035596,protein_coding -43784,Prpf39,ENSMUSG00000035597,protein_coding -13370,Trmt10b,ENSMUSG00000035601,protein_coding -36638,Ky,ENSMUSG00000035606,protein_coding -43782,Togaram1,ENSMUSG00000035614,protein_coding -13368,Frmpd1,ENSMUSG00000035615,protein_coding -28360,Ric8b,ENSMUSG00000035620,protein_coding -28153,Midn,ENSMUSG00000035621,protein_coding -25228,Rsf1,ENSMUSG00000035623,protein_coding -33168,Olfr1509,ENSMUSG00000035626,protein_coding -47831,Rubcn,ENSMUSG00000035629,protein_coding -22060,Cnot3,ENSMUSG00000035632,protein_coding -13361,Grhpr,ENSMUSG00000035637,protein_coding -47828,Muc20,ENSMUSG00000035638,protein_coding -28151,Cbarp,ENSMUSG00000035640,protein_coding -25227,Aamdc,ENSMUSG00000035642,protein_coding -13356,Zcchc7,ENSMUSG00000035649,protein_coding -8496,4930480E11Rik,ENSMUSG00000035651,protein_coding -43745,Lrfn5,ENSMUSG00000035653,protein_coding -4102,Gtf3c4,ENSMUSG00000035666,protein_coding -30926,Zswim4,ENSMUSG00000035671,protein_coding -28149,Sbno2,ENSMUSG00000035673,protein_coding -22056,Ndufa3,ENSMUSG00000035674,protein_coding -50558,Tnfsf9,ENSMUSG00000035678,protein_coding -28874,Kcnc2,ENSMUSG00000035681,protein_coding -13346,Melk,ENSMUSG00000035683,protein_coding -25223,Thrsp,ENSMUSG00000035686,protein_coding -15622,Isg15,ENSMUSG00000035692,protein_coding -28872,Caps2,ENSMUSG00000035694,protein_coding -13344,Rnf38,ENSMUSG00000035696,protein_coding -28146,Arhgap45,ENSMUSG00000035697,protein_coding -47819,Slc51a,ENSMUSG00000035699,protein_coding -25220,Alg8,ENSMUSG00000035704,protein_coding -41547,Dok3,ENSMUSG00000035711,protein_coding -25218,Usp35,ENSMUSG00000035713,protein_coding -28145,Abca7,ENSMUSG00000035722,protein_coding -8483,Prkx,ENSMUSG00000035725,protein_coding -33149,Supt16,ENSMUSG00000035726,protein_coding -54450,Dagla,ENSMUSG00000035735,protein_coding -28141,Grin3b,ENSMUSG00000035745,protein_coding -28140,Wdr18,ENSMUSG00000035754,protein_coding -46694,Selenoo,ENSMUSG00000035757,protein_coding -28845,Bbs10,ENSMUSG00000035759,protein_coding -42239,Tmem161b,ENSMUSG00000035762,protein_coding -47806,Fbxo45,ENSMUSG00000035764,protein_coding -53823,Dym,ENSMUSG00000035765,protein_coding -37111,Xylb,ENSMUSG00000035769,protein_coding -31301,Dync1li2,ENSMUSG00000035770,protein_coding -4085,Mrps2,ENSMUSG00000035772,protein_coding -28138,Kiss1r,ENSMUSG00000035773,protein_coding -39536,Krt20,ENSMUSG00000035775,protein_coding -8278,Cd99l2,ENSMUSG00000035776,protein_coding -4318,Ggta1,ENSMUSG00000035778,protein_coding -17110,Ugt2a3,ENSMUSG00000035780,protein_coding -28136,R3hdm4,ENSMUSG00000035781,protein_coding -54929,Acta2,ENSMUSG00000035783,protein_coding -31296,Cmtm2b,ENSMUSG00000035785,protein_coding -47802,Cep19,ENSMUSG00000035790,protein_coding -28838,Zdhhc17,ENSMUSG00000035798,protein_coding -43393,Twist1,ENSMUSG00000035799,protein_coding -55294,Ins1,ENSMUSG00000035804,protein_coding -46687,Mlc1,ENSMUSG00000035805,protein_coding -17098,Ugt2b35,ENSMUSG00000035811,protein_coding -55353,Plekhs1,ENSMUSG00000035818,protein_coding -31289,Tk2,ENSMUSG00000035824,protein_coding -46684,Pim3,ENSMUSG00000035828,protein_coding -4083,Ppp1r26,ENSMUSG00000035829,protein_coding -39528,Krt25,ENSMUSG00000035831,protein_coding -42212,Polr3g,ENSMUSG00000035834,protein_coding -28127,Plppr3,ENSMUSG00000035835,protein_coding -17093,Ugt2b1,ENSMUSG00000035836,protein_coding -42211,Lysmd3,ENSMUSG00000035840,protein_coding -50653,Ddx11,ENSMUSG00000035842,protein_coding -46681,Alg12,ENSMUSG00000035845,protein_coding -8249,Ids,ENSMUSG00000035847,protein_coding -39525,Krt222,ENSMUSG00000035849,protein_coding -17089,Ythdc1,ENSMUSG00000035851,protein_coding -28124,Misp,ENSMUSG00000035852,protein_coding -43378,Cdhr3,ENSMUSG00000035860,protein_coding -17083,Tmprss11b,ENSMUSG00000035861,protein_coding -28122,Palm,ENSMUSG00000035863,protein_coding -28814,Syt1,ENSMUSG00000035864,protein_coding -49154,Zfp983,ENSMUSG00000035868,protein_coding -28809,Pawr,ENSMUSG00000035873,protein_coding -4308,AI182371,ENSMUSG00000035875,protein_coding -6732,Zhx3,ENSMUSG00000035877,protein_coding -35636,Hykk,ENSMUSG00000035878,protein_coding -54326,Cox8a,ENSMUSG00000035885,protein_coding -28119,Rnf126,ENSMUSG00000035890,protein_coding -46655,Cerk,ENSMUSG00000035891,protein_coding -33092,Rnase1,ENSMUSG00000035896,protein_coding -17075,Uba6,ENSMUSG00000035898,protein_coding -46653,Gramd4,ENSMUSG00000035900,protein_coding -25813,Dennd5a,ENSMUSG00000035901,protein_coding -40914,Dcdc2a,ENSMUSG00000035910,protein_coding -35739,Cd276,ENSMUSG00000035914,protein_coding -28804,Ptprq,ENSMUSG00000035916,protein_coding -34852,Bbs9,ENSMUSG00000035919,protein_coding -28803,Myf6,ENSMUSG00000035923,protein_coding -49915,H2-Q4,ENSMUSG00000035929,protein_coding -31484,Chst4,ENSMUSG00000035930,protein_coding -33086,Olfr750,ENSMUSG00000035932,protein_coding -43350,Cog5,ENSMUSG00000035933,protein_coding -35071,Pknox2,ENSMUSG00000035934,protein_coding -40909,Aldh5a1,ENSMUSG00000035936,protein_coding -36301,Ibtk,ENSMUSG00000035941,protein_coding -46648,Ttc38,ENSMUSG00000035944,protein_coding -16963,Gsx2,ENSMUSG00000035946,protein_coding -28793,Acss3,ENSMUSG00000035948,protein_coding -4296,Fbxw2,ENSMUSG00000035949,protein_coding -25807,Ascl3,ENSMUSG00000035951,protein_coding -33075,Pip4p1,ENSMUSG00000035953,protein_coding -43481,Dock4,ENSMUSG00000035954,protein_coding -40906,Tdp2,ENSMUSG00000035958,protein_coding -33074,Apex1,ENSMUSG00000035960,protein_coding -28109,Odf3l2,ENSMUSG00000035963,protein_coding -30599,Tmem59l,ENSMUSG00000035964,protein_coding -8080,Ints6l,ENSMUSG00000035967,protein_coding -13297,Rusc2,ENSMUSG00000035969,protein_coding -53056,Nme5,ENSMUSG00000035984,protein_coding -38146,Fnip1,ENSMUSG00000035992,protein_coding -13291,Fam214b,ENSMUSG00000036002,protein_coding -40898,Ripor2,ENSMUSG00000036006,protein_coding -28776,Mettl25,ENSMUSG00000036009,protein_coding -8016,Fam122c,ENSMUSG00000036013,protein_coding -28767,Tmtc2,ENSMUSG00000036019,protein_coding -8015,Fam122b,ENSMUSG00000036022,protein_coding -33068,Parp2,ENSMUSG00000036023,protein_coding -50267,Tmem63b,ENSMUSG00000036026,protein_coding -35574,1810046K07Rik,ENSMUSG00000036027,protein_coding -36051,Prtg,ENSMUSG00000036030,protein_coding -50049,Zfp57,ENSMUSG00000036036,protein_coding -4059,Adamtsl2,ENSMUSG00000036040,protein_coding -46624,5031439G07Rik,ENSMUSG00000036046,protein_coding -13284,Dnajb5,ENSMUSG00000036052,protein_coding -4554,Fmnl2,ENSMUSG00000036053,protein_coding -30589,Sugp2,ENSMUSG00000036054,protein_coding -36954,Ptpn23,ENSMUSG00000036057,protein_coding -47117,Smug1,ENSMUSG00000036061,protein_coding -13283,Phf24,ENSMUSG00000036062,protein_coding -4055,Slc2a6,ENSMUSG00000036067,protein_coding -13232,Galt,ENSMUSG00000036073,protein_coding -13231,Sigmar1,ENSMUSG00000036078,protein_coding -40880,Slc17a3,ENSMUSG00000036083,protein_coding -2038,Zranb3,ENSMUSG00000036086,protein_coding -16905,Slain2,ENSMUSG00000036087,protein_coding -36797,Hyal3,ENSMUSG00000036091,protein_coding -4547,Arl5a,ENSMUSG00000036093,protein_coding -43445,Dgkb,ENSMUSG00000036095,protein_coding -55169,Slf2,ENSMUSG00000036097,protein_coding -54449,Myrf,ENSMUSG00000036098,protein_coding -28563,Vezt,ENSMUSG00000036099,protein_coding -52759,Colec12,ENSMUSG00000036103,protein_coding -2037,Rab3gap1,ENSMUSG00000036104,protein_coding -46616,Prr5,ENSMUSG00000036106,protein_coding -7973,Mbnl3,ENSMUSG00000036109,protein_coding -40879,Slc17a2,ENSMUSG00000036110,protein_coding -25792,Lmo1,ENSMUSG00000036111,protein_coding -28559,Metap2,ENSMUSG00000036112,protein_coding -13228,Rpp25l,ENSMUSG00000036114,protein_coding -38118,Il5,ENSMUSG00000036117,protein_coding -30582,Rfxank,ENSMUSG00000036120,protein_coding -42097,Slc9a3,ENSMUSG00000036123,protein_coding -7970,Frmd7,ENSMUSG00000036131,protein_coding -43329,Fam110c,ENSMUSG00000036136,protein_coding -37110,Acaa1a,ENSMUSG00000036138,protein_coding -47106,Hoxc9,ENSMUSG00000036139,protein_coding -43441,Meox2,ENSMUSG00000036144,protein_coding -30577,Tm6sf2,ENSMUSG00000036151,protein_coding -2024,Mgat5,ENSMUSG00000036155,protein_coding -46778,Prickle1,ENSMUSG00000036158,protein_coding -4042,Surf6,ENSMUSG00000036160,protein_coding -46777,Pphln1,ENSMUSG00000036167,protein_coding -28548,Ccdc38,ENSMUSG00000036168,protein_coding -43427,Sostdc1,ENSMUSG00000036169,protein_coding -48053,Cd200r3,ENSMUSG00000036172,protein_coding -30573,Gatad2a,ENSMUSG00000036180,protein_coding -40864,Hist1h1c,ENSMUSG00000036181,protein_coding -49869,Sapcd1,ENSMUSG00000036185,protein_coding -4034,Dipk1b,ENSMUSG00000036186,protein_coding -43423,Ankmy2,ENSMUSG00000036188,protein_coding -54708,Rorb,ENSMUSG00000036192,protein_coding -49538,Slc26a8,ENSMUSG00000036196,protein_coding -46773,Gxylt1,ENSMUSG00000036197,protein_coding -7952,Arhgap36,ENSMUSG00000036198,protein_coding -30571,Ndufa13,ENSMUSG00000036199,protein_coding -4542,Rif1,ENSMUSG00000036202,protein_coding -1564,Sh3bp4,ENSMUSG00000036206,protein_coding -48044,Nepro,ENSMUSG00000036208,protein_coding -40860,Hist1h1t,ENSMUSG00000036211,protein_coding -50044,Znrd1as,ENSMUSG00000036214,protein_coding -38100,Leap2,ENSMUSG00000036216,protein_coding -46764,Pdzrn4,ENSMUSG00000036218,protein_coding -53798,Ska1,ENSMUSG00000036223,protein_coding -52851,Kctd1,ENSMUSG00000036225,protein_coding -43416,Agr3,ENSMUSG00000036231,protein_coding -13202,Ube2r2,ENSMUSG00000036241,protein_coding -33021,Armh4,ENSMUSG00000036242,protein_coding -35735,Tbc1d21,ENSMUSG00000036244,protein_coding -30566,Gmip,ENSMUSG00000036246,protein_coding -4538,Rbm43,ENSMUSG00000036249,protein_coding -1548,Trpm8,ENSMUSG00000036251,protein_coding -17025,Igfbp7,ENSMUSG00000036256,protein_coding -43514,Pnpla8,ENSMUSG00000036257,protein_coding -38091,Fstl4,ENSMUSG00000036264,protein_coding -31389,Edc4,ENSMUSG00000036270,protein_coding -46756,Lrrk2,ENSMUSG00000036273,protein_coding -38087,9530068E07Rik,ENSMUSG00000036275,protein_coding -54322,Macrod1,ENSMUSG00000036278,protein_coding -4018,Snapc4,ENSMUSG00000036281,protein_coding -33013,Naa30,ENSMUSG00000036282,protein_coding -17022,Noa1,ENSMUSG00000036285,protein_coding -33011,Ap5m1,ENSMUSG00000036291,protein_coding -48033,Gramd1c,ENSMUSG00000036292,protein_coding -43493,Lrrn3,ENSMUSG00000036295,protein_coding -46750,Slc2a13,ENSMUSG00000036298,protein_coding -53822,BC031181,ENSMUSG00000036299,protein_coding -48032,Zdhhc23,ENSMUSG00000036304,protein_coding -30544,Lzts1,ENSMUSG00000036306,protein_coding -38082,Skp1a,ENSMUSG00000036309,protein_coding -50043,Znrd1,ENSMUSG00000036315,protein_coding -17010,Srp72,ENSMUSG00000036323,protein_coding -4012,Qsox2,ENSMUSG00000036327,protein_coding -30541,Slc18a1,ENSMUSG00000036330,protein_coding -43255,Kidins220,ENSMUSG00000036333,protein_coding -10671,Igsf10,ENSMUSG00000036334,protein_coding -32987,Tmem260,ENSMUSG00000036339,protein_coding -4004,Ubac1,ENSMUSG00000036352,protein_coding -10669,P2ry12,ENSMUSG00000036353,protein_coding -30532,Csgalnact1,ENSMUSG00000036356,protein_coding -8100,Gpr101,ENSMUSG00000036357,protein_coding -10668,P2ry13,ENSMUSG00000036362,protein_coding -50855,Rmdn2,ENSMUSG00000036368,protein_coding -20051,Serbp1,ENSMUSG00000036371,protein_coding -54448,Tmem258,ENSMUSG00000036372,protein_coding -40828,Abt1,ENSMUSG00000036376,protein_coding -17004,C530008M17Rik,ENSMUSG00000036377,protein_coding -10666,P2ry14,ENSMUSG00000036381,protein_coding -20045,Gadd45a,ENSMUSG00000036390,protein_coding -38062,Sec24a,ENSMUSG00000036391,protein_coding -34904,Glb1l2,ENSMUSG00000036395,protein_coding -50042,Ppp1r11,ENSMUSG00000036398,protein_coding -3998,Glt6d1,ENSMUSG00000036401,protein_coding -20040,Gng12,ENSMUSG00000036402,protein_coding -17002,Cep135,ENSMUSG00000036403,protein_coding -34850,9530077C05Rik,ENSMUSG00000036411,protein_coding -53561,Arsi,ENSMUSG00000036412,protein_coding -34024,Pcdh8,ENSMUSG00000036422,protein_coding -23564,Gpi1,ENSMUSG00000036427,protein_coding -50322,Tbcc,ENSMUSG00000036430,protein_coding -10654,Siah2,ENSMUSG00000036432,protein_coding -17001,Exoc1,ENSMUSG00000036435,protein_coding -30507,Npy1r,ENSMUSG00000036437,protein_coding -50993,Calm2,ENSMUSG00000036438,protein_coding -31386,Thap11,ENSMUSG00000036442,protein_coding -28647,Lum,ENSMUSG00000036446,protein_coding -3986,Lcn8,ENSMUSG00000036449,protein_coding -55160,Hif1an,ENSMUSG00000036450,protein_coding -53266,Arhgap26,ENSMUSG00000036452,protein_coding -23559,Wtip,ENSMUSG00000036459,protein_coding -34017,Elf1,ENSMUSG00000036461,protein_coding -20013,4930544G11Rik,ENSMUSG00000036463,protein_coding -35862,Megf11,ENSMUSG00000036466,protein_coding -30495,March1,ENSMUSG00000036469,protein_coding -49272,Tbc1d24,ENSMUSG00000036473,protein_coding -28634,Btg1,ENSMUSG00000036478,protein_coding -1498,Prss56,ENSMUSG00000036480,protein_coding -50041,Rnf39,ENSMUSG00000036492,protein_coding -28621,Eea1,ENSMUSG00000036499,protein_coding -1495,Akp3,ENSMUSG00000036500,protein_coding -53053,Fam13b,ENSMUSG00000036501,protein_coding -7836,Tmem255a,ENSMUSG00000036502,protein_coding -10631,Rnf13,ENSMUSG00000036503,protein_coding -3977,Phpt1,ENSMUSG00000036504,protein_coding -31253,Cdh8,ENSMUSG00000036510,protein_coding -10626,Commd2,ENSMUSG00000036513,protein_coding -23435,Scgb2b2,ENSMUSG00000036521,protein_coding -43027,Greb1,ENSMUSG00000036523,protein_coding -18589,Card11,ENSMUSG00000036526,protein_coding -25711,Ppfibp2,ENSMUSG00000036528,protein_coding -46705,Sbf1,ENSMUSG00000036529,protein_coding -50853,Cdc42ep3,ENSMUSG00000036533,protein_coding -31228,Slc38a7,ENSMUSG00000036534,protein_coding -7785,Rnf113a1,ENSMUSG00000036537,protein_coding -38022,Adamts2,ENSMUSG00000036545,protein_coding -31218,Cnot1,ENSMUSG00000036550,protein_coding -7783,Akap14,ENSMUSG00000036551,protein_coding -48989,Ermard,ENSMUSG00000036552,protein_coding -16328,Sh3tc1,ENSMUSG00000036553,protein_coding -18585,Iqce,ENSMUSG00000036555,protein_coding -50992,Stpg4,ENSMUSG00000036557,protein_coding -23424,Lgi4,ENSMUSG00000036560,protein_coding -46703,Ppp6r2,ENSMUSG00000036561,protein_coding -31216,Ndrg4,ENSMUSG00000036564,protein_coding -18584,Ttyh3,ENSMUSG00000036565,protein_coding -50321,Bicral,ENSMUSG00000036568,protein_coding -23423,Fxyd1,ENSMUSG00000036570,protein_coding -7781,Upf3b,ENSMUSG00000036572,protein_coding -1468,Tex44,ENSMUSG00000036574,protein_coding -23422,Fxyd7,ENSMUSG00000036578,protein_coding -10589,Spg20,ENSMUSG00000036580,protein_coding -53264,Fgf1,ENSMUSG00000036585,protein_coding -18581,Grifin,ENSMUSG00000036586,protein_coding -3963,Fut7,ENSMUSG00000036587,protein_coding -3844,Arhgap21,ENSMUSG00000036591,protein_coding -49801,H2-Aa,ENSMUSG00000036594,protein_coding -16321,Cpz,ENSMUSG00000036596,protein_coding -31208,Ccdc113,ENSMUSG00000036598,protein_coding -18578,Chst12,ENSMUSG00000036599,protein_coding -28734,Alx1,ENSMUSG00000036602,protein_coding -46699,Plxnb2,ENSMUSG00000036606,protein_coding -34890,Eepd1,ENSMUSG00000036611,protein_coding -43301,Eipr1,ENSMUSG00000036613,protein_coding -10583,Rfxap,ENSMUSG00000036615,protein_coding -3833,Etl4,ENSMUSG00000036617,protein_coding -38009,Mgat4b,ENSMUSG00000036620,protein_coding -15156,Atp13a2,ENSMUSG00000036622,protein_coding -10581,Alg5,ENSMUSG00000036632,protein_coding -23410,Mag,ENSMUSG00000036634,protein_coding -49335,Clcn7,ENSMUSG00000036636,protein_coding -18571,Nudt1,ENSMUSG00000036639,protein_coding -4605,Ccdc148,ENSMUSG00000036641,protein_coding -38005,Tbc1d9b,ENSMUSG00000036644,protein_coding -3956,Man1b1,ENSMUSG00000036646,protein_coding -25689,Olfr6,ENSMUSG00000036647,protein_coding -43295,Colec11,ENSMUSG00000036655,protein_coding -40681,Olfr11,ENSMUSG00000036658,protein_coding -46129,Dennd3,ENSMUSG00000036661,protein_coding -19546,Tcaf1,ENSMUSG00000036667,protein_coding -31385,Cenpt,ENSMUSG00000036672,protein_coding -28712,Tmtc3,ENSMUSG00000036676,protein_coding -47070,Aaas,ENSMUSG00000036678,protein_coding -30913,Cc2d1a,ENSMUSG00000036686,protein_coding -18557,Tmem184a,ENSMUSG00000036687,protein_coding -16302,Nop14,ENSMUSG00000036693,protein_coding -46120,Ago2,ENSMUSG00000036698,protein_coding -7750,Zcchc12,ENSMUSG00000036699,protein_coding -1440,Cab39,ENSMUSG00000036707,protein_coding -31046,Cyld,ENSMUSG00000036712,protein_coding -18553,Micall2,ENSMUSG00000036718,protein_coding -40660,Zscan12,ENSMUSG00000036721,protein_coding -3942,Cysrt1,ENSMUSG00000036731,protein_coding -23388,Rbm42,ENSMUSG00000036733,protein_coding -37107,Oxsr1,ENSMUSG00000036737,protein_coding -52847,Psma8,ENSMUSG00000036743,protein_coding -25678,Olfr701,ENSMUSG00000036744,protein_coding -12596,Ttll7,ENSMUSG00000036745,protein_coding -55214,Cuedc2,ENSMUSG00000036748,protein_coding -15259,Pramel5,ENSMUSG00000036749,protein_coding -23386,Cox6b1,ENSMUSG00000036751,protein_coding -3939,Tubb4b,ENSMUSG00000036752,protein_coding -46112,Kcnk9,ENSMUSG00000036760,protein_coding -27780,Dnajc12,ENSMUSG00000036764,protein_coding -1395,Dner,ENSMUSG00000036766,protein_coding -37247,Kif15,ENSMUSG00000036768,protein_coding -7623,Wdr44,ENSMUSG00000036769,protein_coding -3937,Stpg3,ENSMUSG00000036770,protein_coding -49407,Decr2,ENSMUSG00000036775,protein_coding -34846,Anln,ENSMUSG00000036777,protein_coding -31038,Tent4b,ENSMUSG00000036779,protein_coding -35926,Rps27l,ENSMUSG00000036781,protein_coding -7620,Klhl13,ENSMUSG00000036782,protein_coding -8190,Slitrk2,ENSMUSG00000036790,protein_coding -4504,Mbd5,ENSMUSG00000036792,protein_coding -46103,Fam135b,ENSMUSG00000036800,protein_coding -3930,Noxa1,ENSMUSG00000036805,protein_coding -31033,Cnep1r1,ENSMUSG00000036810,protein_coding -3929,Entpd8,ENSMUSG00000036813,protein_coding -37266,Slc6a20a,ENSMUSG00000036814,protein_coding -1995,Dpp10,ENSMUSG00000036815,protein_coding -27771,Atoh7,ENSMUSG00000036816,protein_coding -18536,Sun1,ENSMUSG00000036817,protein_coding -38288,Jmjd4,ENSMUSG00000036819,protein_coding -49270,Amdhd2,ENSMUSG00000036820,protein_coding -13172,Topors,ENSMUSG00000036822,protein_coding -12581,Ssx2ip,ENSMUSG00000036825,protein_coding -23381,Igflr1,ENSMUSG00000036826,protein_coding -12576,Lpar3,ENSMUSG00000036832,protein_coding -3921,Pnpla7,ENSMUSG00000036833,protein_coding -10726,Plch1,ENSMUSG00000036834,protein_coding -23378,Psenen,ENSMUSG00000036835,protein_coding -31007,Siah1a,ENSMUSG00000036840,protein_coding -23377,Lin37,ENSMUSG00000036845,protein_coding -3920,Mrpl41,ENSMUSG00000036850,protein_coding -12574,Mcoln3,ENSMUSG00000036853,protein_coding -23375,Hspb6,ENSMUSG00000036854,protein_coding -52748,Gjd4,ENSMUSG00000036855,protein_coding -15054,Wnt4,ENSMUSG00000036856,protein_coding -50318,Ptcra,ENSMUSG00000036858,protein_coding -38278,Mrpl55,ENSMUSG00000036860,protein_coding -25647,Dchs1,ENSMUSG00000036862,protein_coding -12570,Syde2,ENSMUSG00000036863,protein_coding -23373,Proser3,ENSMUSG00000036864,protein_coding -35841,Smad6,ENSMUSG00000036867,protein_coding -31002,Abcc12,ENSMUSG00000036872,protein_coding -12569,2410004B18Rik,ENSMUSG00000036873,protein_coding -27762,Dna2,ENSMUSG00000036875,protein_coding -30997,Phkb,ENSMUSG00000036879,protein_coding -53810,Acaa2,ENSMUSG00000036880,protein_coding -23372,Arhgap33,ENSMUSG00000036882,protein_coding -10714,Arhgef26,ENSMUSG00000036885,protein_coding -15043,C1qa,ENSMUSG00000036887,protein_coding -4474,Gtdc1,ENSMUSG00000036890,protein_coding -23370,Prodh2,ENSMUSG00000036892,protein_coding -3913,Ehmt1,ENSMUSG00000036893,protein_coding -10708,Rap2b,ENSMUSG00000036894,protein_coding -15042,C1qc,ENSMUSG00000036896,protein_coding -18519,Zfp157,ENSMUSG00000036898,protein_coding -19511,Trpv5,ENSMUSG00000036899,protein_coding -30993,Neto2,ENSMUSG00000036902,protein_coding -52747,Fzd8,ENSMUSG00000036904,protein_coding -15041,C1qb,ENSMUSG00000036905,protein_coding -1957,C1ql2,ENSMUSG00000036907,protein_coding -54092,Unc93b1,ENSMUSG00000036908,protein_coding -34590,Piwil4,ENSMUSG00000036912,protein_coding -31849,Trim67,ENSMUSG00000036913,protein_coding -23366,Kirrel2,ENSMUSG00000036915,protein_coding -7940,Zfp280c,ENSMUSG00000036916,protein_coding -50989,Ttc7,ENSMUSG00000036918,protein_coding -15037,Tex46,ENSMUSG00000036921,protein_coding -27747,Stox1,ENSMUSG00000036923,protein_coding -6370,Cst13,ENSMUSG00000036924,protein_coding -47144,Mucl2,ENSMUSG00000036925,protein_coding -18511,Stag3,ENSMUSG00000036928,protein_coding -23363,Nfkbid,ENSMUSG00000036931,protein_coding -7938,Aifm1,ENSMUSG00000036932,protein_coding -30986,4921524J17Rik,ENSMUSG00000036934,protein_coding -19476,Try5,ENSMUSG00000036938,protein_coding -15033,Kdm1a,ENSMUSG00000036940,protein_coding -53792,Elac1,ENSMUSG00000036941,protein_coding -35925,Rab8b,ENSMUSG00000036943,protein_coding -46063,Tmem71,ENSMUSG00000036944,protein_coding -18506,Map11,ENSMUSG00000036948,protein_coding -3750,Slc39a12,ENSMUSG00000036949,protein_coding -10687,C130079G13Rik,ENSMUSG00000036951,protein_coding -27742,Kif1bp,ENSMUSG00000036955,protein_coding -23359,Lrfn3,ENSMUSG00000036957,protein_coding -6366,Cst11,ENSMUSG00000036958,protein_coding -7936,Bcorl1,ENSMUSG00000036959,protein_coding -12557,Clca2,ENSMUSG00000036960,protein_coding -55156,Wnt8b,ENSMUSG00000036961,protein_coding -1950,Cfap221,ENSMUSG00000036962,protein_coding -38266,Trim17,ENSMUSG00000036964,protein_coding -47056,Spryd3,ENSMUSG00000036966,protein_coding -18499,Cnpy4,ENSMUSG00000036968,protein_coding -36432,Zic4,ENSMUSG00000036972,protein_coding -1949,Tmem177,ENSMUSG00000036975,protein_coding -30822,Anapc10,ENSMUSG00000036977,protein_coding -18498,Taf6,ENSMUSG00000036980,protein_coding -48772,Tfb1m,ENSMUSG00000036983,protein_coding -7929,Zdhhc9,ENSMUSG00000036985,protein_coding -35731,Pml,ENSMUSG00000036986,protein_coding -25635,Trim3,ENSMUSG00000036989,protein_coding -30818,Otud4,ENSMUSG00000036990,protein_coding -6362,Nxt1,ENSMUSG00000036992,protein_coding -15024,Asap3,ENSMUSG00000036995,protein_coding -38259,Zfp39,ENSMUSG00000037001,protein_coding -47055,Tns2,ENSMUSG00000037003,protein_coding -7927,Xpnpep2,ENSMUSG00000037005,protein_coding -18491,Zfp113,ENSMUSG00000037007,protein_coding -7926,Apln,ENSMUSG00000037010,protein_coding -27735,Hk1,ENSMUSG00000037012,protein_coding -52845,Ss18,ENSMUSG00000037013,protein_coding -6356,Sstr4,ENSMUSG00000037014,protein_coding -10561,Frem2,ENSMUSG00000037016,protein_coding -18490,Zscan21,ENSMUSG00000037017,protein_coding -23348,Wdr62,ENSMUSG00000037020,protein_coding -30810,Mmaa,ENSMUSG00000037022,protein_coding -6352,Foxa2,ENSMUSG00000037025,protein_coding -14134,Gm12800,ENSMUSG00000037028,protein_coding -23339,Zfp146,ENSMUSG00000037029,protein_coding -27732,Tspan15,ENSMUSG00000037031,protein_coding -25633,Apbb1,ENSMUSG00000037032,protein_coding -12549,Clca3b,ENSMUSG00000037033,protein_coding -6345,Pax1,ENSMUSG00000037034,protein_coding -1932,Inhbb,ENSMUSG00000037035,protein_coding -25632,Smpd1,ENSMUSG00000037049,protein_coding -18482,Azgp1,ENSMUSG00000037053,protein_coding -53095,Paip2,ENSMUSG00000037058,protein_coding -25630,Cavin3,ENSMUSG00000037060,protein_coding -12542,Sh3glb1,ENSMUSG00000037062,protein_coding -30796,Rbmxl1,ENSMUSG00000037070,protein_coding -55153,Scd1,ENSMUSG00000037071,protein_coding -12539,Selenof,ENSMUSG00000037072,protein_coding -45961,Rnf139,ENSMUSG00000037075,protein_coding -45960,Trmt12,ENSMUSG00000037085,protein_coding -7905,Prr32,ENSMUSG00000037086,protein_coding -50260,Slc35b2,ENSMUSG00000037089,protein_coding -50506,Lrg1,ENSMUSG00000037095,protein_coding -49405,Rab11fip3,ENSMUSG00000037098,protein_coding -30793,Ttc29,ENSMUSG00000037101,protein_coding -30911,Dcaf15,ENSMUSG00000037103,protein_coding -50984,Socs5,ENSMUSG00000037104,protein_coding -45954,Fer1l6,ENSMUSG00000037106,protein_coding -18470,Zcwpw1,ENSMUSG00000037108,protein_coding -6326,Ralgapa2,ENSMUSG00000037110,protein_coding -10504,Setd7,ENSMUSG00000037111,protein_coding -35563,Sik2,ENSMUSG00000037112,protein_coding -45953,Fam91a1,ENSMUSG00000037119,protein_coding -38241,Trim58,ENSMUSG00000037124,protein_coding -55212,Psd,ENSMUSG00000037126,protein_coding -35450,Tmprss13,ENSMUSG00000037129,protein_coding -50033,H2-M10.6,ENSMUSG00000037130,protein_coding -30781,Prmt9,ENSMUSG00000037134,protein_coding -567,Aff3,ENSMUSG00000037138,protein_coding -15004,Myom3,ENSMUSG00000037139,protein_coding -19416,Tas2r108,ENSMUSG00000037140,protein_coding -6324,Cfap61,ENSMUSG00000037143,protein_coding -38239,2210407C18Rik,ENSMUSG00000037145,protein_coding -30775,Arhgap10,ENSMUSG00000037148,protein_coding -42980,Ddx1,ENSMUSG00000037149,protein_coding -27712,Lrrc20,ENSMUSG00000037151,protein_coding -10498,Ndufc1,ENSMUSG00000037152,protein_coding -15002,Il22ra1,ENSMUSG00000037157,protein_coding -19413,Wee2,ENSMUSG00000037159,protein_coding -10496,Mgarp,ENSMUSG00000037161,protein_coding -23301,Ppp1r14a,ENSMUSG00000037166,protein_coding -7594,Spaca5,ENSMUSG00000037167,protein_coding -42972,Mycn,ENSMUSG00000037169,protein_coding -27708,Nodal,ENSMUSG00000037171,protein_coding -19412,E330009J07Rik,ENSMUSG00000037172,protein_coding -10490,Elf2,ENSMUSG00000037174,protein_coding -47004,Krt80,ENSMUSG00000037185,protein_coding -14997,Grhl3,ENSMUSG00000037188,protein_coding -36786,Cyb561d2,ENSMUSG00000037190,protein_coding -48890,Pacrg,ENSMUSG00000037196,protein_coding -3703,Rbm17,ENSMUSG00000037197,protein_coding -27703,Prf1,ENSMUSG00000037202,protein_coding -47000,Atg101,ENSMUSG00000037204,protein_coding -35727,Islr,ENSMUSG00000037206,protein_coding -16291,Fam193a,ENSMUSG00000037210,protein_coding -10337,Spry1,ENSMUSG00000037211,protein_coding -29945,Thap1,ENSMUSG00000037214,protein_coding -555,Lipt1,ENSMUSG00000037216,protein_coding -7585,Syn1,ENSMUSG00000037217,protein_coding -18453,Mospd3,ENSMUSG00000037221,protein_coding -16284,Zfyve28,ENSMUSG00000037224,protein_coding -10328,Fgf2,ENSMUSG00000037225,protein_coding -29937,Hook3,ENSMUSG00000037234,protein_coding -16282,Mxd4,ENSMUSG00000037235,protein_coding -53092,Matr3,ENSMUSG00000037236,protein_coding -23292,Spred3,ENSMUSG00000037239,protein_coding -14982,Clic4,ENSMUSG00000037242,protein_coding -38216,Zfp692,ENSMUSG00000037243,protein_coding -50032,H2-M10.5,ENSMUSG00000037246,protein_coding -29935,Pomk,ENSMUSG00000037251,protein_coding -53785,Mex3c,ENSMUSG00000037253,protein_coding -3555,Itih2,ENSMUSG00000037254,protein_coding -35836,Aagab,ENSMUSG00000037257,protein_coding -6302,Dzank1,ENSMUSG00000037259,protein_coding -29934,Hgsnat,ENSMUSG00000037260,protein_coding -3554,Kin,ENSMUSG00000037262,protein_coding -54093,Aldh3b3,ENSMUSG00000037263,protein_coding -14977,Rsrp1,ENSMUSG00000037266,protein_coding -10310,4932438A13Rik,ENSMUSG00000037270,protein_coding -38208,Gemin5,ENSMUSG00000037275,protein_coding -38960,Tmem97,ENSMUSG00000037278,protein_coding -6295,Ovol2,ENSMUSG00000037279,protein_coding -46983,Galnt6,ENSMUSG00000037280,protein_coding -36598,Stag1,ENSMUSG00000037286,protein_coding -35319,Tbcel,ENSMUSG00000037287,protein_coding -14973,Ldlrap1,ENSMUSG00000037295,protein_coding -29930,Lsm1,ENSMUSG00000037296,protein_coding -31844,Ttc13,ENSMUSG00000037300,protein_coding -14971,Man1c1,ENSMUSG00000037306,protein_coding -6285,Banf2,ENSMUSG00000037307,protein_coding -16261,Tacc3,ENSMUSG00000037313,protein_coding -7570,Jade3,ENSMUSG00000037315,protein_coding -29929,Bag4,ENSMUSG00000037316,protein_coding -3403,Traf3ip3,ENSMUSG00000037318,protein_coding -49794,Tap1,ENSMUSG00000037321,protein_coding -10306,Bbs7,ENSMUSG00000037325,protein_coding -49403,Capn15,ENSMUSG00000037326,protein_coding -38204,Larp1,ENSMUSG00000037331,protein_coding -50023,H2-M1,ENSMUSG00000037334,protein_coding -38199,Hand1,ENSMUSG00000037335,protein_coding -42868,Mfsd2b,ENSMUSG00000037336,protein_coding -23282,Map4k1,ENSMUSG00000037337,protein_coding -16256,Fam53a,ENSMUSG00000037339,protein_coding -7564,Slc9a7,ENSMUSG00000037341,protein_coding -45904,Taf2,ENSMUSG00000037343,protein_coding -18438,Slc12a9,ENSMUSG00000037344,protein_coding -52829,Hrh4,ENSMUSG00000037346,protein_coding -7563,Chst7,ENSMUSG00000037347,protein_coding -14967,Paqr7,ENSMUSG00000037348,protein_coding -54318,Nudt22,ENSMUSG00000037349,protein_coding -526,Actr1b,ENSMUSG00000037351,protein_coding -46970,Letmd1,ENSMUSG00000037353,protein_coding -16253,Uvssa,ENSMUSG00000037355,protein_coding -7548,Dipk2b,ENSMUSG00000037358,protein_coding -42865,Sf3b6,ENSMUSG00000037361,protein_coding -45898,Ccn3,ENSMUSG00000037362,protein_coding -29916,Letm2,ENSMUSG00000037363,protein_coding -18434,Srrt,ENSMUSG00000037364,protein_coding -14962,Pafah2,ENSMUSG00000037366,protein_coding -7545,Kdm6a,ENSMUSG00000037369,protein_coding -27098,Enpp1,ENSMUSG00000037370,protein_coding -16249,Ctbp1,ENSMUSG00000037373,protein_coding -3388,Hhat,ENSMUSG00000037375,protein_coding -6187,Trmt6,ENSMUSG00000037376,protein_coding -16248,Spon2,ENSMUSG00000037379,protein_coding -45770,Rims2,ENSMUSG00000037386,protein_coding -18425,Muc3,ENSMUSG00000037390,protein_coding -38182,Nmur2,ENSMUSG00000037393,protein_coding -3384,Rcor3,ENSMUSG00000037395,protein_coding -10276,Atp11b,ENSMUSG00000037400,protein_coding -34761,Icam1,ENSMUSG00000037405,protein_coding -29900,Htra4,ENSMUSG00000037406,protein_coding -512,Cnnm4,ENSMUSG00000037408,protein_coding -36410,Tbc1d2b,ENSMUSG00000037410,protein_coding -18423,Serpine1,ENSMUSG00000037411,protein_coding -31383,Ranbp10,ENSMUSG00000037415,protein_coding -53425,Dmxl1,ENSMUSG00000037416,protein_coding -54439,Best1,ENSMUSG00000037418,protein_coding -34575,Endod1,ENSMUSG00000037419,protein_coding -27089,Taar9,ENSMUSG00000037424,protein_coding -16239,Depdc5,ENSMUSG00000037426,protein_coding -18421,Vgf,ENSMUSG00000037428,protein_coding -510,Fer1l5,ENSMUSG00000037432,protein_coding -3376,Slc30a1,ENSMUSG00000037434,protein_coding -29896,Adam32,ENSMUSG00000037437,protein_coding -27071,Vnn1,ENSMUSG00000037440,protein_coding -14947,Cep85,ENSMUSG00000037443,protein_coding -49521,Tulp1,ENSMUSG00000037446,protein_coding -506,Arid5a,ENSMUSG00000037447,protein_coding -54250,Slc22a20,ENSMUSG00000037451,protein_coding -27069,Slc18b1,ENSMUSG00000037455,protein_coding -45747,Azin1,ENSMUSG00000037458,protein_coding -3367,Ints7,ENSMUSG00000037461,protein_coding -23263,Fbxo27,ENSMUSG00000037463,protein_coding -45742,Klf10,ENSMUSG00000037465,protein_coding -44963,Tedc1,ENSMUSG00000037466,protein_coding -23260,Acp7,ENSMUSG00000037469,protein_coding -503,Uggt1,ENSMUSG00000037470,protein_coding -3366,Dtl,ENSMUSG00000037474,protein_coding -7878,Thoc2,ENSMUSG00000037475,protein_coding -54096,Tbx10,ENSMUSG00000037477,protein_coding -6161,Erv3,ENSMUSG00000037482,protein_coding -42828,Asxl2,ENSMUSG00000037486,protein_coding -45732,Ubr5,ENSMUSG00000037487,protein_coding -27053,Slc2a12,ENSMUSG00000037490,protein_coding -29878,Zmat4,ENSMUSG00000037492,protein_coding -35626,Cib2,ENSMUSG00000037493,protein_coding -3358,Nenf,ENSMUSG00000037499,protein_coding -483,Fam168b,ENSMUSG00000037503,protein_coding -478,Arhgef4,ENSMUSG00000037509,protein_coding -6146,Pank2,ENSMUSG00000037514,protein_coding -26713,Ppfia1,ENSMUSG00000037519,protein_coding -6145,Mavs,ENSMUSG00000037523,protein_coding -46921,Bcdin3d,ENSMUSG00000037525,protein_coding -32964,Atg14,ENSMUSG00000037526,protein_coding -471,Prss40,ENSMUSG00000037529,protein_coding -10218,Mrpl47,ENSMUSG00000037531,protein_coding -38148,Rapgef6,ENSMUSG00000037533,protein_coding -32961,Fbxo34,ENSMUSG00000037536,protein_coding -50018,H2-M11,ENSMUSG00000037537,protein_coding -26707,Shank2,ENSMUSG00000037541,protein_coding -27020,Aldh8a1,ENSMUSG00000037542,protein_coding -32958,Dlgap5,ENSMUSG00000037544,protein_coding -49791,H2-DMb2,ENSMUSG00000037548,protein_coding -23243,Plekhg2,ENSMUSG00000037552,protein_coding -14926,Zdhhc18,ENSMUSG00000037553,protein_coding -23241,Rps16,ENSMUSG00000037563,protein_coding -3340,Vash2,ENSMUSG00000037568,protein_coding -46907,Mcrs1,ENSMUSG00000037570,protein_coding -32948,Wdhd1,ENSMUSG00000037572,protein_coding -39293,Tob1,ENSMUSG00000037573,protein_coding -49674,Ephx3,ENSMUSG00000037577,protein_coding -55144,Pkd2l1,ENSMUSG00000037578,protein_coding -46906,Kcnh3,ENSMUSG00000037579,protein_coding -32943,Gch1,ENSMUSG00000037580,protein_coding -14921,Nr0b2,ENSMUSG00000037583,protein_coding -38942,Rskr,ENSMUSG00000037593,protein_coding -44944,Clba1,ENSMUSG00000037594,protein_coding -14919,Kdf1,ENSMUSG00000037600,protein_coding -39289,Nme1,ENSMUSG00000037601,protein_coding -17044,Adgrl3,ENSMUSG00000037605,protein_coding -26698,Osbpl5,ENSMUSG00000037606,protein_coding -27000,Bclaf1,ENSMUSG00000037608,protein_coding -10203,Kcnmb2,ENSMUSG00000037610,protein_coding -26697,Tnfrsf23,ENSMUSG00000037613,protein_coding -45680,Spag1,ENSMUSG00000037617,protein_coding -20336,Atoh8,ENSMUSG00000037621,protein_coding -14913,Wdtc1,ENSMUSG00000037622,protein_coding -3316,Kcnk2,ENSMUSG00000037624,protein_coding -10198,Cldn11,ENSMUSG00000037625,protein_coding -45674,Rgs22,ENSMUSG00000037627,protein_coding -32927,Cdkn3,ENSMUSG00000037628,protein_coding -7764,Slc25a43,ENSMUSG00000037636,protein_coding -44937,Zbtb42,ENSMUSG00000037638,protein_coding -23213,Zfp60,ENSMUSG00000037640,protein_coding -10195,Prkci,ENSMUSG00000037643,protein_coding -45663,Vps13b,ENSMUSG00000037646,protein_coding -49787,H2-DMa,ENSMUSG00000037649,protein_coding -10192,Phc3,ENSMUSG00000037652,protein_coding -16851,Kctd8,ENSMUSG00000037653,protein_coding -29849,Slc20a2,ENSMUSG00000037656,protein_coding -42896,Gdf7,ENSMUSG00000037660,protein_coding -10191,Gpr160,ENSMUSG00000037661,protein_coding -26684,Cdkn1c,ENSMUSG00000037664,protein_coding -42894,Ldah,ENSMUSG00000037669,protein_coding -36038,Rfx7,ENSMUSG00000037674,protein_coding -44932,Inf2,ENSMUSG00000037679,protein_coding -36572,Esyt3,ENSMUSG00000037681,protein_coding -3816,Armc3,ENSMUSG00000037683,protein_coding -16831,Atp8a1,ENSMUSG00000037685,protein_coding -44920,Aspg,ENSMUSG00000037686,protein_coding -50975,Tmem247,ENSMUSG00000037689,protein_coding -14902,Ahdc1,ENSMUSG00000037692,protein_coding -32897,Ddhd1,ENSMUSG00000037697,protein_coding -6122,Lzts3,ENSMUSG00000037703,protein_coding -35317,Tecta,ENSMUSG00000037705,protein_coding -26667,Cd81,ENSMUSG00000037706,protein_coding -3809,Spag6,ENSMUSG00000037708,protein_coding -19940,Fam13a,ENSMUSG00000037709,protein_coding -27885,Cisd1,ENSMUSG00000037710,protein_coding -32888,Fermt2,ENSMUSG00000037712,protein_coding -35723,Ccdc33,ENSMUSG00000037716,protein_coding -16821,Tmem33,ENSMUSG00000037720,protein_coding -32885,Gnpnat1,ENSMUSG00000037722,protein_coding -29837,Ckap2,ENSMUSG00000037725,protein_coding -6119,Avp,ENSMUSG00000037727,protein_coding -10182,Mynn,ENSMUSG00000037730,protein_coding -14893,Themis2,ENSMUSG00000037731,protein_coding -16810,Limch1,ENSMUSG00000037736,protein_coding -10181,Actrt3,ENSMUSG00000037737,protein_coding -29835,Nek5,ENSMUSG00000037738,protein_coding -6117,Mrps26,ENSMUSG00000037740,protein_coding -36189,Eef1a1,ENSMUSG00000037742,protein_coding -27862,Phyhipl,ENSMUSG00000037747,protein_coding -38926,Fam222b,ENSMUSG00000037750,protein_coding -14890,Xkr8,ENSMUSG00000037752,protein_coding -6711,Ppp1r16b,ENSMUSG00000037754,protein_coding -32872,Ptger2,ENSMUSG00000037759,protein_coding -6709,Actr5,ENSMUSG00000037761,protein_coding -27854,Slc16a9,ENSMUSG00000037762,protein_coding -6707,Slc32a1,ENSMUSG00000037771,protein_coding -26647,Mrpl23,ENSMUSG00000037772,protein_coding -6111,Pced1a,ENSMUSG00000037773,protein_coding -32667,Mbl1,ENSMUSG00000037780,protein_coding -36580,Dzip1l,ENSMUSG00000037784,protein_coding -44906,Apopt1,ENSMUSG00000037787,protein_coding -19900,Vopp1,ENSMUSG00000037788,protein_coding -29722,Defb7,ENSMUSG00000037790,protein_coding -38918,Phf12,ENSMUSG00000037791,protein_coding -16773,N4bp2,ENSMUSG00000037795,protein_coding -12452,Adh4,ENSMUSG00000037797,protein_coding -32664,Mat1a,ENSMUSG00000037798,protein_coding -35833,Iqch,ENSMUSG00000037801,protein_coding -49519,Rpl10a,ENSMUSG00000037805,protein_coding -34564,Fam76b,ENSMUSG00000037808,protein_coding -6686,D630003M21Rik,ENSMUSG00000037813,protein_coding -53078,Ctnna1,ENSMUSG00000037815,protein_coding -41433,Fbxw17,ENSMUSG00000037816,protein_coding -10385,Abhd18,ENSMUSG00000037818,protein_coding -6684,Tgm2,ENSMUSG00000037820,protein_coding -16759,Smim14,ENSMUSG00000037822,protein_coding -32659,Tspan14,ENSMUSG00000037824,protein_coding -19892,Ppm1k,ENSMUSG00000037826,protein_coding -32658,Sh2d4b,ENSMUSG00000037833,protein_coding -6680,Vstm2l,ENSMUSG00000037843,protein_coding -35559,Fdxacb1,ENSMUSG00000037845,protein_coding -27830,Rtkn2,ENSMUSG00000037846,protein_coding -54701,Nmrk1,ENSMUSG00000037847,protein_coding -3002,Ifi206,ENSMUSG00000037849,protein_coding -41403,Iars,ENSMUSG00000037851,protein_coding -30469,Cpe,ENSMUSG00000037852,protein_coding -27828,Zfp365,ENSMUSG00000037855,protein_coding -38910,Nufip2,ENSMUSG00000037857,protein_coding -3001,Aim2,ENSMUSG00000037860,protein_coding -27817,Egr2,ENSMUSG00000037868,protein_coding -2999,Ackr1,ENSMUSG00000037872,protein_coding -27809,Jmjd1c,ENSMUSG00000037876,protein_coding -6087,Stk35,ENSMUSG00000037885,protein_coding -26614,Dusp8,ENSMUSG00000037887,protein_coding -16748,Wdr19,ENSMUSG00000037890,protein_coding -10469,Pcdh18,ENSMUSG00000037892,protein_coding -12428,H2afz,ENSMUSG00000037894,protein_coding -44882,Rcor1,ENSMUSG00000037896,protein_coding -6084,Sirpa,ENSMUSG00000037902,protein_coding -44878,Ankrd9,ENSMUSG00000037904,protein_coding -18164,Bri3bp,ENSMUSG00000037905,protein_coding -38899,Ankrd13b,ENSMUSG00000037907,protein_coding -16745,Tmem156,ENSMUSG00000037913,protein_coding -54102,Ndufv1,ENSMUSG00000037916,protein_coding -30441,Ddx60,ENSMUSG00000037921,protein_coding -12404,Bank1,ENSMUSG00000037922,protein_coding -2987,Olfr16,ENSMUSG00000037924,protein_coding -38895,Ssh2,ENSMUSG00000037926,protein_coding -41394,Bicd2,ENSMUSG00000037933,protein_coding -39524,Smarce1,ENSMUSG00000037935,protein_coding -18157,Scarb1,ENSMUSG00000037936,protein_coding -6063,Chchd5,ENSMUSG00000037938,protein_coding -30849,Inpp4b,ENSMUSG00000037940,protein_coding -2981,Crp,ENSMUSG00000037942,protein_coding -39523,Ccr7,ENSMUSG00000037944,protein_coding -41393,Fgd3,ENSMUSG00000037946,protein_coding -37214,Ano10,ENSMUSG00000037949,protein_coding -36579,A4gnt,ENSMUSG00000037953,protein_coding -44868,Wdr20,ENSMUSG00000037957,protein_coding -38888,Nsrp1,ENSMUSG00000037958,protein_coding -41391,Card19,ENSMUSG00000037960,protein_coding -18152,Rflna,ENSMUSG00000037962,protein_coding -47304,Zc3h7a,ENSMUSG00000037965,protein_coding -41390,Ninj1,ENSMUSG00000037966,protein_coding -35558,1110032A03Rik,ENSMUSG00000037971,protein_coding -47302,Snn,ENSMUSG00000037972,protein_coding -19854,Itprid1,ENSMUSG00000037973,protein_coding -26605,Muc5ac,ENSMUSG00000037974,protein_coding -36780,6430571L13Rik,ENSMUSG00000037977,protein_coding -18150,Ccdc92,ENSMUSG00000037979,protein_coding -19852,Neurod6,ENSMUSG00000037984,protein_coding -41389,Wnk2,ENSMUSG00000037989,protein_coding -27659,Sh3rf3,ENSMUSG00000037990,protein_coding -47292,Rmi2,ENSMUSG00000037991,protein_coding -39515,Rara,ENSMUSG00000037992,protein_coding -31466,Dhx38,ENSMUSG00000037993,protein_coding -12383,Slc9b2,ENSMUSG00000037994,protein_coding -2970,Igsf9,ENSMUSG00000037995,protein_coding -13886,Slc24a2,ENSMUSG00000037996,protein_coding -21453,Parp11,ENSMUSG00000037997,protein_coding -16701,Arap2,ENSMUSG00000037999,protein_coding -31378,Acd,ENSMUSG00000038000,protein_coding -49330,Cramp1l,ENSMUSG00000038002,protein_coding -30423,Hpf1,ENSMUSG00000038005,protein_coding -13885,Acer2,ENSMUSG00000038007,protein_coding -46902,Dnajc22,ENSMUSG00000038009,protein_coding -27651,Ccdc138,ENSMUSG00000038010,protein_coding -18148,Dnah10,ENSMUSG00000038011,protein_coding -39512,Wipf2,ENSMUSG00000038013,protein_coding -41387,Fam120a,ENSMUSG00000038014,protein_coding -47286,Prm2,ENSMUSG00000038015,protein_coding -39511,Rapgefl1,ENSMUSG00000038020,protein_coding -19844,Mindy4,ENSMUSG00000038022,protein_coding -18145,Atp6v0a2,ENSMUSG00000038023,protein_coding -13882,Dennd4c,ENSMUSG00000038024,protein_coding -41385,Phf2,ENSMUSG00000038025,protein_coding -2963,Kcnj9,ENSMUSG00000038026,protein_coding -21438,Tigar,ENSMUSG00000038028,protein_coding -2961,Igsf8,ENSMUSG00000038034,protein_coding -47283,Socs1,ENSMUSG00000038037,protein_coding -27647,Gcc2,ENSMUSG00000038039,protein_coding -41381,Ptpdc1,ENSMUSG00000038042,protein_coding -16050,Cct8l1,ENSMUSG00000038044,protein_coding -50844,Sult6b1,ENSMUSG00000038045,protein_coding -38872,Mrm3,ENSMUSG00000038046,protein_coding -13878,Haus6,ENSMUSG00000038047,protein_coding -50575,Cntnap5c,ENSMUSG00000038048,protein_coding -47280,Dexi,ENSMUSG00000038055,protein_coding -16043,Kmt2c,ENSMUSG00000038056,protein_coding -38871,Dbil5,ENSMUSG00000038057,protein_coding -19837,Nod1,ENSMUSG00000038058,protein_coding -53550,Smim3,ENSMUSG00000038059,protein_coding -37100,Dlec1,ENSMUSG00000038060,protein_coding -30290,Cldn22,ENSMUSG00000038064,protein_coding -19833,Mturn,ENSMUSG00000038065,protein_coding -39503,Csf3,ENSMUSG00000038067,protein_coding -41356,Rnf144b,ENSMUSG00000038068,protein_coding -30281,Cdkn2aip,ENSMUSG00000038069,protein_coding -13864,Cntln,ENSMUSG00000038070,protein_coding -53346,Npy6r,ENSMUSG00000038071,protein_coding -16041,Galnt11,ENSMUSG00000038072,protein_coding -19830,Fkbp14,ENSMUSG00000038074,protein_coding -21424,Kcna6,ENSMUSG00000038077,protein_coding -910,Tmem237,ENSMUSG00000038079,protein_coding -41353,Kdm1b,ENSMUSG00000038080,protein_coding -47732,Opa1,ENSMUSG00000038084,protein_coding -6621,Cnbd2,ENSMUSG00000038085,protein_coding -35556,Hspb2,ENSMUSG00000038086,protein_coding -11632,Hsd3b5,ENSMUSG00000038092,protein_coding -47731,Atp13a4,ENSMUSG00000038094,protein_coding -18131,Sbno1,ENSMUSG00000038095,protein_coding -30273,Trappc11,ENSMUSG00000038102,protein_coding -35219,AW551984,ENSMUSG00000038112,protein_coding -21413,Ano2,ENSMUSG00000038115,protein_coding -6619,Phf20,ENSMUSG00000038116,protein_coding -35024,Cdon,ENSMUSG00000038119,protein_coding -53740,Fam210a,ENSMUSG00000038121,protein_coding -27616,Tbc1d32,ENSMUSG00000038122,protein_coding -18125,Mphosph9,ENSMUSG00000038126,protein_coding -47720,Ccdc50,ENSMUSG00000038127,protein_coding -53032,Camk4,ENSMUSG00000038128,protein_coding -41335,Rbm24,ENSMUSG00000038132,protein_coding -16029,Crygn,ENSMUSG00000038135,protein_coding -48807,Tmem181a,ENSMUSG00000038141,protein_coding -30270,Stox2,ENSMUSG00000038143,protein_coding -37210,Snrk,ENSMUSG00000038145,protein_coding -49672,Notch3,ENSMUSG00000038146,protein_coding -2943,Cd84,ENSMUSG00000038147,protein_coding -47711,Cldn16,ENSMUSG00000038148,protein_coding -39495,Ormdl3,ENSMUSG00000038150,protein_coding -27444,Prdm1,ENSMUSG00000038151,protein_coding -54104,Gstp2,ENSMUSG00000038155,protein_coding -25882,Spon1,ENSMUSG00000038156,protein_coding -27441,Atg5,ENSMUSG00000038160,protein_coding -21408,Plekhg6,ENSMUSG00000038167,protein_coding -47708,P3h2,ENSMUSG00000038168,protein_coding -11607,Pde4dip,ENSMUSG00000038170,protein_coding -13843,Ttc39b,ENSMUSG00000038172,protein_coding -30266,Enpp6,ENSMUSG00000038173,protein_coding -891,Fam126b,ENSMUSG00000038174,protein_coding -41324,Mylip,ENSMUSG00000038175,protein_coding -38850,Slc43a2,ENSMUSG00000038178,protein_coding -2930,Slamf7,ENSMUSG00000038179,protein_coding -6611,Spag4,ENSMUSG00000038180,protein_coding -16022,Chpf2,ENSMUSG00000038181,protein_coding -25871,Btbd10,ENSMUSG00000038187,protein_coding -38848,Scarf1,ENSMUSG00000038188,protein_coding -13838,Cer1,ENSMUSG00000038192,protein_coding -30377,Hand2,ENSMUSG00000038193,protein_coding -38847,Rilp,ENSMUSG00000038195,protein_coding -16019,Iqca1l,ENSMUSG00000038199,protein_coding -24003,Kcna7,ENSMUSG00000038201,protein_coding -19785,Hoxa13,ENSMUSG00000038203,protein_coding -16018,Asb10,ENSMUSG00000038204,protein_coding -11605,Prkab2,ENSMUSG00000038205,protein_coding -30369,Fbxo8,ENSMUSG00000038206,protein_coding -39486,Pgap3,ENSMUSG00000038208,protein_coding -2926,Itln1,ENSMUSG00000038209,protein_coding -19776,Hoxa11,ENSMUSG00000038210,protein_coding -41826,Mfsd14b,ENSMUSG00000038212,protein_coding -21398,Tapbpl,ENSMUSG00000038213,protein_coding -27420,Bend3,ENSMUSG00000038214,protein_coding -30368,Cep44,ENSMUSG00000038215,protein_coding -39485,Pnmt,ENSMUSG00000038216,protein_coding -38844,Tlcd2,ENSMUSG00000038217,protein_coding -38841,Serpinf2,ENSMUSG00000038224,protein_coding -30257,Primpol,ENSMUSG00000038225,protein_coding -19773,Hoxa9,ENSMUSG00000038227,protein_coding -37204,Gask1a,ENSMUSG00000038233,protein_coding -2921,F11r,ENSMUSG00000038235,protein_coding -19772,Hoxa7,ENSMUSG00000038236,protein_coding -23996,Hrc,ENSMUSG00000038239,protein_coding -27414,Pdss2,ENSMUSG00000038240,protein_coding -6606,Cep250,ENSMUSG00000038241,protein_coding -880,Aox4,ENSMUSG00000038242,protein_coding -25856,Mical2,ENSMUSG00000038244,protein_coding -41096,Fam50b,ENSMUSG00000038246,protein_coding -27409,Sobp,ENSMUSG00000038248,protein_coding -30843,Usp38,ENSMUSG00000038250,protein_coding -21391,Ncapd2,ENSMUSG00000038252,protein_coding -19770,Hoxa5,ENSMUSG00000038253,protein_coding -39480,Neurod2,ENSMUSG00000038255,protein_coding -11598,Bcl9,ENSMUSG00000038256,protein_coding -30360,Glra3,ENSMUSG00000038257,protein_coding -6605,Gdf5,ENSMUSG00000038259,protein_coding -23994,Trpm4,ENSMUSG00000038260,protein_coding -35720,Sema7a,ENSMUSG00000038264,protein_coding -41082,Slc22a23,ENSMUSG00000038267,protein_coding -38832,Ovca2,ENSMUSG00000038268,protein_coding -21389,Iffo1,ENSMUSG00000038271,protein_coding -54258,Fau,ENSMUSG00000038274,protein_coding -16008,Asic3,ENSMUSG00000038276,protein_coding -21388,Nop2,ENSMUSG00000038279,protein_coding -27399,Ostm1,ENSMUSG00000038280,protein_coding -41074,Bphl,ENSMUSG00000038286,protein_coding -38822,Smg6,ENSMUSG00000038290,protein_coding -30237,Snx25,ENSMUSG00000038291,protein_coding -23987,Ccdc155,ENSMUSG00000038292,protein_coding -16006,Atg9b,ENSMUSG00000038295,protein_coding -25850,Galnt18,ENSMUSG00000038296,protein_coding -11588,Pdzk1,ENSMUSG00000038298,protein_coding -53029,Wdr36,ENSMUSG00000038299,protein_coding -23986,Pth2,ENSMUSG00000038300,protein_coding -19747,Snx10,ENSMUSG00000038301,protein_coding -27393,Afg1l,ENSMUSG00000038302,protein_coding -11587,Cd160,ENSMUSG00000038304,protein_coding -873,Spats2l,ENSMUSG00000038305,protein_coding -6595,Edem2,ENSMUSG00000038312,protein_coding -16002,Kcnh2,ENSMUSG00000038319,protein_coding -865,1700066M21Rik,ENSMUSG00000038323,protein_coding -6593,Trpc4ap,ENSMUSG00000038324,protein_coding -41048,Serpinb9f,ENSMUSG00000038327,protein_coding -13905,C87499,ENSMUSG00000038330,protein_coding -857,Satb2,ENSMUSG00000038331,protein_coding -27386,Sesn1,ENSMUSG00000038332,protein_coding -38818,Tsr1,ENSMUSG00000038335,protein_coding -18084,Mlxip,ENSMUSG00000038342,protein_coding -9773,Txlng,ENSMUSG00000038344,protein_coding -21378,Zfp384,ENSMUSG00000038346,protein_coding -48961,Tcte2,ENSMUSG00000038347,protein_coding -847,Plcl1,ENSMUSG00000038349,protein_coding -38816,Sgsm2,ENSMUSG00000038351,protein_coding -39464,Arl5c,ENSMUSG00000038352,protein_coding -11579,Ankrd35,ENSMUSG00000038354,protein_coding -36932,Camp,ENSMUSG00000038357,protein_coding -29639,Fbxo25,ENSMUSG00000038365,protein_coding -39457,Lasp1,ENSMUSG00000038366,protein_coding -13893,Focad,ENSMUSG00000038368,protein_coding -6579,Ncoa6,ENSMUSG00000038369,protein_coding -2896,Pcp4l1,ENSMUSG00000038370,protein_coding -25828,Sbf2,ENSMUSG00000038371,protein_coding -41019,Gmds,ENSMUSG00000038372,protein_coding -11574,Rbm8a,ENSMUSG00000038374,protein_coding -6578,Trp53inp2,ENSMUSG00000038375,protein_coding -36273,Ttk,ENSMUSG00000038379,protein_coding -6576,Pigu,ENSMUSG00000038383,protein_coding -18072,Setd1b,ENSMUSG00000038384,protein_coding -23962,Rras,ENSMUSG00000038387,protein_coding -19718,Mpp6,ENSMUSG00000038388,protein_coding -21368,Gpr162,ENSMUSG00000038390,protein_coding -11568,Txnip,ENSMUSG00000038393,protein_coding -29632,Upf3a,ENSMUSG00000038398,protein_coding -7021,Pmepa1,ENSMUSG00000038400,protein_coding -41014,Foxf2,ENSMUSG00000038402,protein_coding -11566,Hjv,ENSMUSG00000038403,protein_coding -23960,Scaf1,ENSMUSG00000038406,protein_coding -37196,Higd1a,ENSMUSG00000038412,protein_coding -41010,Foxq1,ENSMUSG00000038415,protein_coding -29631,Cdc16,ENSMUSG00000038416,protein_coding -27366,Fig4,ENSMUSG00000038417,protein_coding -53070,Egr1,ENSMUSG00000038418,protein_coding -2881,Fcrla,ENSMUSG00000038421,protein_coding -13674,Hdhd3,ENSMUSG00000038422,protein_coding -53771,Poli,ENSMUSG00000038425,protein_coding -21364,Usp5,ENSMUSG00000038429,protein_coding -39443,Mllt6,ENSMUSG00000038437,protein_coding -27358,Cdc40,ENSMUSG00000038446,protein_coding -21362,Spsb2,ENSMUSG00000038451,protein_coding -39437,Srcin1,ENSMUSG00000038453,protein_coding -19393,Dennd2a,ENSMUSG00000038456,protein_coding -29624,Tmem255b,ENSMUSG00000038457,protein_coding -24983,Abhd17c,ENSMUSG00000038459,protein_coding -40991,Uqcrfs1,ENSMUSG00000038462,protein_coding -2873,Olfml2b,ENSMUSG00000038463,protein_coding -6560,Chmp4b,ENSMUSG00000038467,protein_coding -2870,Nos1ap,ENSMUSG00000038473,protein_coding -27351,Cdk19,ENSMUSG00000038481,protein_coding -29620,Tfdp1,ENSMUSG00000038482,protein_coding -39434,Socs7,ENSMUSG00000038485,protein_coding -11519,Sv2a,ENSMUSG00000038486,protein_coding -26596,Polr2l,ENSMUSG00000038489,protein_coding -11514,Otud7b,ENSMUSG00000038495,protein_coding -1372,Slc19a3,ENSMUSG00000038496,protein_coding -29619,Tmco3,ENSMUSG00000038497,protein_coding -54190,Catsper1,ENSMUSG00000038498,protein_coding -49961,Prr3,ENSMUSG00000038500,protein_coding -23948,Ptov1,ENSMUSG00000038502,protein_coding -24980,Mesd,ENSMUSG00000038503,protein_coding -29615,Dcun1d2,ENSMUSG00000038506,protein_coding -19381,Parp12,ENSMUSG00000038507,protein_coding -30611,Gdf15,ENSMUSG00000038508,protein_coding -27343,Rpf2,ENSMUSG00000038510,protein_coding -29611,Grtp1,ENSMUSG00000038515,protein_coding -39425,Tbkbp1,ENSMUSG00000038517,protein_coding -41313,Jarid2,ENSMUSG00000038518,protein_coding -23944,Tbc1d17,ENSMUSG00000038520,protein_coding -21341,C1s1,ENSMUSG00000038521,protein_coding -27335,Mfsd4b1,ENSMUSG00000038522,protein_coding -6553,1700003F12Rik,ENSMUSG00000038523,protein_coding -53236,Fchsd1,ENSMUSG00000038524,protein_coding -15933,Armc10,ENSMUSG00000038525,protein_coding -11507,Car14,ENSMUSG00000038526,protein_coding -21339,C1rl,ENSMUSG00000038527,protein_coding -27334,Mfsd4b5,ENSMUSG00000038528,protein_coding -2853,Rgs4,ENSMUSG00000038530,protein_coding -6551,Cbfa2t2,ENSMUSG00000038533,protein_coding -39420,Osbpl7,ENSMUSG00000038534,protein_coding -36021,Zfp280d,ENSMUSG00000038535,protein_coding -6997,Mc3r,ENSMUSG00000038537,protein_coding -19368,Ubn2,ENSMUSG00000038538,protein_coding -23941,Atf5,ENSMUSG00000038539,protein_coding -24965,Tmc3,ENSMUSG00000038540,protein_coding -50781,Srd5a2,ENSMUSG00000038541,protein_coding -29606,Pcid2,ENSMUSG00000038542,protein_coding -11505,BC028528,ENSMUSG00000038543,protein_coding -13605,Inip,ENSMUSG00000038544,protein_coding -50310,Cul7,ENSMUSG00000038545,protein_coding -41283,Ranbp9,ENSMUSG00000038546,protein_coding -11504,Ciart,ENSMUSG00000038550,protein_coding -16187,Fndc4,ENSMUSG00000038552,protein_coding -53069,Reep2,ENSMUSG00000038555,protein_coding -39413,Sp6,ENSMUSG00000038560,protein_coding -24945,Efl1,ENSMUSG00000038563,protein_coding -16186,Ift172,ENSMUSG00000038564,protein_coding -6978,Cyp24a1,ENSMUSG00000038567,protein_coding -18031,Rad9b,ENSMUSG00000038569,protein_coding -24944,Saxo2,ENSMUSG00000038570,protein_coding -6545,Bpifb5,ENSMUSG00000038572,protein_coding -3215,Susd4,ENSMUSG00000038576,protein_coding -13598,Susd1,ENSMUSG00000038578,protein_coding -26577,Sct,ENSMUSG00000038580,protein_coding -18029,Pptc7,ENSMUSG00000038582,protein_coding -27590,Pln,ENSMUSG00000038583,protein_coding -26757,Akap12,ENSMUSG00000038587,protein_coding -45894,Colec10,ENSMUSG00000038591,protein_coding -18027,Tctn1,ENSMUSG00000038593,protein_coding -27587,Cep85l,ENSMUSG00000038594,protein_coding -13592,Shoc1,ENSMUSG00000038598,protein_coding -3211,Capn8,ENSMUSG00000038599,protein_coding -19357,Atp6v0a4,ENSMUSG00000038600,protein_coding -27578,Slc35f1,ENSMUSG00000038602,protein_coding -31371,Ripor1,ENSMUSG00000038604,protein_coding -7237,Samd10,ENSMUSG00000038605,protein_coding -13591,Gng10,ENSMUSG00000038607,protein_coding -1331,Dock10,ENSMUSG00000038608,protein_coding -26574,Phrf1,ENSMUSG00000038611,protein_coding -11491,Mcl1,ENSMUSG00000038612,protein_coding -39402,Nfe2l1,ENSMUSG00000038615,protein_coding -26572,Rassf7,ENSMUSG00000038618,protein_coding -11489,Ensa,ENSMUSG00000038619,protein_coding -45881,Med30,ENSMUSG00000038622,protein_coding -24928,Tm6sf1,ENSMUSG00000038623,protein_coding -27568,Nepn,ENSMUSG00000038624,protein_coding -7248,Polr3k,ENSMUSG00000038628,protein_coding -13587,Zkscan16,ENSMUSG00000038630,protein_coding -3201,Degs1,ENSMUSG00000038633,protein_coding -26570,Lrrc56,ENSMUSG00000038637,protein_coding -19350,Akr1d1,ENSMUSG00000038641,protein_coding -11483,Ctss,ENSMUSG00000038642,protein_coding -23926,Pold1,ENSMUSG00000038644,protein_coding -24922,Ramac,ENSMUSG00000038646,protein_coding -19349,Creb3l2,ENSMUSG00000038648,protein_coding -26566,Rnh1,ENSMUSG00000038650,protein_coding -41243,Sycp2l,ENSMUSG00000038651,protein_coding -18705,Cyp3a16,ENSMUSG00000038656,protein_coding -54858,Ric1,ENSMUSG00000038658,protein_coding -24915,Fsd2,ENSMUSG00000038663,protein_coding -35912,Herc1,ENSMUSG00000038664,protein_coding -19346,Dgki,ENSMUSG00000038665,protein_coding -13578,Lpar1,ENSMUSG00000038668,protein_coding -23924,Mybpc2,ENSMUSG00000038670,protein_coding -7222,Arfrp1,ENSMUSG00000038671,protein_coding -16174,Ucn,ENSMUSG00000038676,protein_coding -49510,Scube3,ENSMUSG00000038677,protein_coding -45860,Trps1,ENSMUSG00000038679,protein_coding -41238,Pak1ip1,ENSMUSG00000038683,protein_coding -7221,Rtel1,ENSMUSG00000038685,protein_coding -18695,Atp5j2,ENSMUSG00000038690,protein_coding -34660,Mbd3l1,ENSMUSG00000038691,protein_coding -39388,Hoxb4,ENSMUSG00000038692,protein_coding -23918,Josd2,ENSMUSG00000038695,protein_coding -4288,Mapkap1,ENSMUSG00000038696,protein_coding -31829,Taf5l,ENSMUSG00000038697,protein_coding -39386,Hoxb5,ENSMUSG00000038700,protein_coding -1870,Dsel,ENSMUSG00000038702,protein_coding -23917,Aspdh,ENSMUSG00000038704,protein_coding -7218,Gmeb2,ENSMUSG00000038705,protein_coding -37089,Golga4,ENSMUSG00000038708,protein_coding -13574,Txndc8,ENSMUSG00000038709,protein_coding -11469,Mindy1,ENSMUSG00000038712,protein_coding -35424,Atp5l,ENSMUSG00000038717,protein_coding -4286,Pbx3,ENSMUSG00000038718,protein_coding -39383,Hoxb7,ENSMUSG00000038721,protein_coding -18692,Bud31,ENSMUSG00000038722,protein_coding -45826,Pkhd1l1,ENSMUSG00000038725,protein_coding -13568,Akap2,ENSMUSG00000038729,protein_coding -40985,Mboat1,ENSMUSG00000038732,protein_coding -3177,Wdr26,ENSMUSG00000038733,protein_coding -45823,Nudcd1,ENSMUSG00000038736,protein_coding -23910,Shank1,ENSMUSG00000038738,protein_coding -4279,Mvb12b,ENSMUSG00000038740,protein_coding -34751,Angptl6,ENSMUSG00000038742,protein_coding -26549,Nlrp6,ENSMUSG00000038745,protein_coding -36178,Omt2b,ENSMUSG00000038750,protein_coding -7213,Ptk6,ENSMUSG00000038751,protein_coding -55205,Elovl3,ENSMUSG00000038754,protein_coding -39373,Ttll6,ENSMUSG00000038756,protein_coding -19327,Nup205,ENSMUSG00000038759,protein_coding -45818,Trhr,ENSMUSG00000038760,protein_coding -49959,Abcf1,ENSMUSG00000038762,protein_coding -24898,Alpk3,ENSMUSG00000038763,protein_coding -13560,Ptpn3,ENSMUSG00000038764,protein_coding -4274,Lmx1b,ENSMUSG00000038765,protein_coding -11462,Gabpb2,ENSMUSG00000038766,protein_coding -3170,9130409I23Rik,ENSMUSG00000038768,protein_coding -18682,Kpna7,ENSMUSG00000038770,protein_coding -53067,Kdm3b,ENSMUSG00000038773,protein_coding -27529,Ascc3,ENSMUSG00000038774,protein_coding -37098,Vill,ENSMUSG00000038775,protein_coding -3167,Ephx1,ENSMUSG00000038776,protein_coding -11461,Sema6c,ENSMUSG00000038777,protein_coding -18680,Smurf1,ENSMUSG00000038780,protein_coding -50497,Stap2,ENSMUSG00000038781,protein_coding -23895,1700028J19Rik,ENSMUSG00000038782,protein_coding -19326,Cnot4,ENSMUSG00000038784,protein_coding -53331,Scgb3a2,ENSMUSG00000038791,protein_coding -3163,Lefty1,ENSMUSG00000038793,protein_coding -24888,Zscan2,ENSMUSG00000038797,protein_coding -26540,Scgb1c1,ENSMUSG00000038801,protein_coding -16159,Ost4,ENSMUSG00000038803,protein_coding -50954,Six3,ENSMUSG00000038805,protein_coding -3158,Sde2,ENSMUSG00000038806,protein_coding -38807,Rap1gap2,ENSMUSG00000038807,protein_coding -39356,Gngt2,ENSMUSG00000038811,protein_coding -54306,Trmt112,ENSMUSG00000038812,protein_coding -13551,Ctnnal1,ENSMUSG00000038816,protein_coding -27473,Hace1,ENSMUSG00000038822,protein_coding -13550,Abitram,ENSMUSG00000038827,protein_coding -16155,Tmem214,ENSMUSG00000038828,protein_coding -4264,Ralgps1,ENSMUSG00000038831,protein_coding -19316,Agbl3,ENSMUSG00000038836,protein_coding -49934,Vars2,ENSMUSG00000038838,protein_coding -7202,Birc7,ENSMUSG00000038840,protein_coding -54687,Gcnt1,ENSMUSG00000038843,protein_coding -6267,Kif16b,ENSMUSG00000038844,protein_coding -39348,Phb,ENSMUSG00000038845,protein_coding -7201,Ythdf1,ENSMUSG00000038848,protein_coding -3139,Itpkb,ENSMUSG00000038855,protein_coding -18671,Baiap2l1,ENSMUSG00000038859,protein_coding -4262,Garnl3,ENSMUSG00000038860,protein_coding -11444,Pi4kb,ENSMUSG00000038861,protein_coding -1810,Zcchc2,ENSMUSG00000038866,protein_coding -19310,Bpgm,ENSMUSG00000038871,protein_coding -31455,Zfhx3,ENSMUSG00000038872,protein_coding -27173,Rnf146,ENSMUSG00000038876,protein_coding -45654,Nipal2,ENSMUSG00000038879,protein_coding -49327,Mrps34,ENSMUSG00000038880,protein_coding -40938,Prl3a1,ENSMUSG00000038883,protein_coding -34750,A230050P20Rik,ENSMUSG00000038884,protein_coding -24876,Man2a2,ENSMUSG00000038886,protein_coding -23835,Ctu1,ENSMUSG00000038888,protein_coding -40937,Prl3b1,ENSMUSG00000038891,protein_coding -39340,Fam117a,ENSMUSG00000038893,protein_coding -29543,Irs2,ENSMUSG00000038894,protein_coding -34822,Zfp653,ENSMUSG00000038895,protein_coding -4259,Rpl12,ENSMUSG00000038900,protein_coding -11438,Pogz,ENSMUSG00000038902,protein_coding -53762,Ccdc68,ENSMUSG00000038903,protein_coding -39337,Kat7,ENSMUSG00000038909,protein_coding -50409,Plcl2,ENSMUSG00000038910,protein_coding -7188,Dido1,ENSMUSG00000038914,protein_coding -27168,Soga3,ENSMUSG00000038916,protein_coding -32349,E330034G19Rik,ENSMUSG00000038925,protein_coding -24870,Rccd1,ENSMUSG00000038930,protein_coding -7187,Tcfl5,ENSMUSG00000038932,protein_coding -3126,Sccpdh,ENSMUSG00000038936,protein_coding -24871,Prc1,ENSMUSG00000038943,protein_coding -3125,Cnst,ENSMUSG00000038949,protein_coding -50247,Supt3,ENSMUSG00000038954,protein_coding -35715,Edc3,ENSMUSG00000038957,protein_coding -7182,Slco4a1,ENSMUSG00000038963,protein_coding -47430,Ube2l3,ENSMUSG00000038965,protein_coding -39328,Pdk2,ENSMUSG00000038967,protein_coding -23884,Klk1b16,ENSMUSG00000038968,protein_coding -18664,Lmtk2,ENSMUSG00000038970,protein_coding -23817,Cldnd2,ENSMUSG00000038973,protein_coding -12680,Rabggtb,ENSMUSG00000038975,protein_coding -39326,Ppp1r9b,ENSMUSG00000038976,protein_coding -7176,Rbbp8nl,ENSMUSG00000038980,protein_coding -41192,Bloc1s5,ENSMUSG00000038982,protein_coding -45631,Tspyl5,ENSMUSG00000038984,protein_coding -4252,Cfap157,ENSMUSG00000038987,protein_coding -7175,Cables2,ENSMUSG00000038990,protein_coding -41187,Txndc5,ENSMUSG00000038991,protein_coding -39325,Hils1,ENSMUSG00000038994,protein_coding -12677,Asb17,ENSMUSG00000038997,protein_coding -16115,Ube3c,ENSMUSG00000039000,protein_coding -7173,Rps21,ENSMUSG00000039001,protein_coding -41185,Bmp6,ENSMUSG00000039004,protein_coding -13723,Tlr4,ENSMUSG00000039005,protein_coding -45627,Cpq,ENSMUSG00000039007,protein_coding -23811,Siglecf,ENSMUSG00000039013,protein_coding -35550,Timm8b,ENSMUSG00000039016,protein_coding -26502,Mtg1,ENSMUSG00000039018,protein_coding -4249,Ttc16,ENSMUSG00000039021,protein_coding -27140,Arhgap18,ENSMUSG00000039031,protein_coding -19258,Tsga13,ENSMUSG00000039032,protein_coding -6247,Tasp1,ENSMUSG00000039033,protein_coding -12658,St6galnac5,ENSMUSG00000039037,protein_coding -7170,Adrm1,ENSMUSG00000039041,protein_coding -24853,Arpin,ENSMUSG00000039043,protein_coding -3519,Usp6nl,ENSMUSG00000039046,protein_coding -12656,Pigk,ENSMUSG00000039047,protein_coding -35017,Foxred1,ENSMUSG00000039048,protein_coding -7169,Osbpl2,ENSMUSG00000039050,protein_coding -39308,Eme1,ENSMUSG00000039055,protein_coding -29526,Myo16,ENSMUSG00000039057,protein_coding -12654,Ak5,ENSMUSG00000039058,protein_coding -7167,Hrh3,ENSMUSG00000039059,protein_coding -24847,Anpep,ENSMUSG00000039062,protein_coding -3516,Echdc3,ENSMUSG00000039063,protein_coding -45612,Atpsckmt,ENSMUSG00000039065,protein_coding -31450,Psmd7,ENSMUSG00000039067,protein_coding -12653,Zzz3,ENSMUSG00000039068,protein_coding -7166,Mtg2,ENSMUSG00000039069,protein_coding -19244,Cpa4,ENSMUSG00000039070,protein_coding -31698,Trhr2,ENSMUSG00000039079,protein_coding -32327,Zfp503,ENSMUSG00000039081,protein_coding -39305,Chad,ENSMUSG00000039084,protein_coding -7165,Ss18l1,ENSMUSG00000039086,protein_coding -41173,Rreb1,ENSMUSG00000039087,protein_coding -27130,L3mbtl3,ENSMUSG00000039089,protein_coding -6244,Sptlc3,ENSMUSG00000039092,protein_coding -16096,En2,ENSMUSG00000039095,protein_coding -39303,Rsad1,ENSMUSG00000039096,protein_coding -54852,Rln1,ENSMUSG00000039097,protein_coding -24843,Wdr93,ENSMUSG00000039099,protein_coding -45606,March6,ENSMUSG00000039100,protein_coding -12645,Nexn,ENSMUSG00000039103,protein_coding -13698,Atp6v1g1,ENSMUSG00000039105,protein_coding -16089,Htr5a,ENSMUSG00000039106,protein_coding -7163,Lsm14b,ENSMUSG00000039108,protein_coding -41146,F13a1,ENSMUSG00000039109,protein_coding -39302,Mycbpap,ENSMUSG00000039110,protein_coding -41142,Nrn1,ENSMUSG00000039114,protein_coding -37092,Itga9,ENSMUSG00000039115,protein_coding -26913,Adgrg6,ENSMUSG00000039116,protein_coding -7160,Taf4,ENSMUSG00000039117,protein_coding -54681,Prune2,ENSMUSG00000039126,protein_coding -3506,Cdc123,ENSMUSG00000039128,protein_coding -19237,Zc3hc1,ENSMUSG00000039130,protein_coding -12639,Gipc2,ENSMUSG00000039131,protein_coding -13697,Whrn,ENSMUSG00000039137,protein_coding -3504,Camk1d,ENSMUSG00000039145,protein_coding -12630,Ifi44l,ENSMUSG00000039146,protein_coding -54194,Sart1,ENSMUSG00000039148,protein_coding -50242,Runx2,ENSMUSG00000039153,protein_coding -50495,Shd,ENSMUSG00000039154,protein_coding -7152,Cdh26,ENSMUSG00000039155,protein_coding -16621,Stim2,ENSMUSG00000039156,protein_coding -4230,Fam102a,ENSMUSG00000039157,protein_coding -13695,Akna,ENSMUSG00000039158,protein_coding -19234,Ube2h,ENSMUSG00000039159,protein_coding -37080,Cmc1,ENSMUSG00000039163,protein_coding -4227,Naif1,ENSMUSG00000039164,protein_coding -27108,Akap7,ENSMUSG00000039166,protein_coding -12627,Adgrl4,ENSMUSG00000039167,protein_coding -45598,Dap,ENSMUSG00000039168,protein_coding -12190,1700003H04Rik,ENSMUSG00000039174,protein_coding -24825,Polg,ENSMUSG00000039176,protein_coding -16618,Tbc1d19,ENSMUSG00000039178,protein_coding -47276,Tekt5,ENSMUSG00000039179,protein_coding -40539,AW209491,ENSMUSG00000039182,protein_coding -49323,Nubp2,ENSMUSG00000039183,protein_coding -24823,Fanci,ENSMUSG00000039187,protein_coding -16614,Rbpj,ENSMUSG00000039191,protein_coding -50795,Nlrc4,ENSMUSG00000039193,protein_coding -24822,Rlbp1,ENSMUSG00000039194,protein_coding -4222,1110008P14Rik,ENSMUSG00000039195,protein_coding -13691,Orm1,ENSMUSG00000039196,protein_coding -32312,Adk,ENSMUSG00000039197,protein_coding -40301,Ptchd3,ENSMUSG00000039198,protein_coding -31364,Zdhhc1,ENSMUSG00000039199,protein_coding -47270,Atf7ip2,ENSMUSG00000039200,protein_coding -7389,Tbc1d25,ENSMUSG00000039201,protein_coding -24818,Abhd2,ENSMUSG00000039202,protein_coding -4221,Ciz1,ENSMUSG00000039205,protein_coding -18644,Daglb,ENSMUSG00000039206,protein_coding -40297,Metrnl,ENSMUSG00000039208,protein_coding -47268,Rpl39l,ENSMUSG00000039209,protein_coding -3300,Gpatch2,ENSMUSG00000039210,protein_coding -19698,Svs1,ENSMUSG00000039215,protein_coding -35547,Il18,ENSMUSG00000039217,protein_coding -49241,Srrm2,ENSMUSG00000039218,protein_coding -40526,Arid4b,ENSMUSG00000039219,protein_coding -49955,Ppp1r10,ENSMUSG00000039220,protein_coding -10155,Rpl22l1,ENSMUSG00000039221,protein_coding -3292,D1Pas1,ENSMUSG00000039224,protein_coding -40294,Tbcd,ENSMUSG00000039230,protein_coding -7381,Suv39h1,ENSMUSG00000039231,protein_coding -26884,Stx11,ENSMUSG00000039232,protein_coding -40523,Tbce,ENSMUSG00000039233,protein_coding -12166,Sec24d,ENSMUSG00000039234,protein_coding -24811,Isg20,ENSMUSG00000039236,protein_coding -40295,Zfp750,ENSMUSG00000039238,protein_coding -3283,Tgfb2,ENSMUSG00000039239,protein_coding -40521,B3galnt2,ENSMUSG00000039242,protein_coding -18631,E130309D02Rik,ENSMUSG00000039244,protein_coding -3276,Lyplal1,ENSMUSG00000039246,protein_coding -16592,Lgi2,ENSMUSG00000039252,protein_coding -40292,Fn3krp,ENSMUSG00000039253,protein_coding -4213,Pomt1,ENSMUSG00000039254,protein_coding -23720,Vstm2b,ENSMUSG00000039257,protein_coding -4208,Prrc2b,ENSMUSG00000039262,protein_coding -7037,Npepl1,ENSMUSG00000039263,protein_coding -19682,Gimap3,ENSMUSG00000039264,protein_coding -49929,2300002M23Rik,ENSMUSG00000039269,protein_coding -13743,Megf9,ENSMUSG00000039270,protein_coding -40287,Foxk2,ENSMUSG00000039275,protein_coding -7370,Pcsk1n,ENSMUSG00000039278,protein_coding -37079,Azi2,ENSMUSG00000039285,protein_coding -10139,Fndc3b,ENSMUSG00000039286,protein_coding -40285,Cybc1,ENSMUSG00000039294,protein_coding -18625,Spdye4a,ENSMUSG00000039296,protein_coding -13740,Cdk5rap2,ENSMUSG00000039298,protein_coding -10137,Tnfsf10,ENSMUSG00000039304,protein_coding -40284,Hexdc,ENSMUSG00000039307,protein_coding -32305,Ndst2,ENSMUSG00000039308,protein_coding -36392,Minar1,ENSMUSG00000039313,protein_coding -16389,Clnk,ENSMUSG00000039315,protein_coding -50400,Rftn1,ENSMUSG00000039316,protein_coding -3258,Rab3gap2,ENSMUSG00000039318,protein_coding -40281,Uts2r,ENSMUSG00000039321,protein_coding -1172,Igfbp2,ENSMUSG00000039323,protein_coding -30014,Rnf122,ENSMUSG00000039328,protein_coding -40280,Tex19.1,ENSMUSG00000039329,protein_coding -54196,Tsga10ip,ENSMUSG00000039330,protein_coding -10127,Spata16,ENSMUSG00000039335,protein_coding -40279,Tex19.2,ENSMUSG00000039337,protein_coding -27331,Mfsd4b2-ps,ENSMUSG00000039339,protein_coding -1168,Ankar,ENSMUSG00000039342,protein_coding -47244,Mettl22,ENSMUSG00000039345,protein_coding -19660,Atp6v0e2,ENSMUSG00000039347,protein_coding -3252,C130074G19Rik,ENSMUSG00000039349,protein_coding -1166,Smarcal1,ENSMUSG00000039354,protein_coding -4192,Exosc2,ENSMUSG00000039356,protein_coding -32301,Fut11,ENSMUSG00000039357,protein_coding -16380,Drd5,ENSMUSG00000039358,protein_coding -25122,Picalm,ENSMUSG00000039361,protein_coding -40274,Sectm1b,ENSMUSG00000039364,protein_coding -32298,Sec24c,ENSMUSG00000039367,protein_coding -1164,March4,ENSMUSG00000039372,protein_coding -30345,Wdr17,ENSMUSG00000039375,protein_coding -32297,Synpo2l,ENSMUSG00000039376,protein_coding -3246,Hlx,ENSMUSG00000039377,protein_coding -7354,Wdr45,ENSMUSG00000039382,protein_coding -3235,Dusp10,ENSMUSG00000039384,protein_coding -45446,Cdh6,ENSMUSG00000039385,protein_coding -25113,Ccdc81,ENSMUSG00000039391,protein_coding -1155,Mreg,ENSMUSG00000039395,protein_coding -30337,Neil3,ENSMUSG00000039396,protein_coding -25105,Prss23,ENSMUSG00000039405,protein_coding -15542,Prdm16,ENSMUSG00000039410,protein_coding -50838,Heatr5b,ENSMUSG00000039414,protein_coding -19593,Cntnap2,ENSMUSG00000039419,protein_coding -47229,Alg1,ENSMUSG00000039427,protein_coding -25097,Tmem135,ENSMUSG00000039428,protein_coding -30151,Mtmr7,ENSMUSG00000039431,protein_coding -35419,Ttc36,ENSMUSG00000039438,protein_coding -3486,Prpf18,ENSMUSG00000039449,protein_coding -40257,Dcxr,ENSMUSG00000039450,protein_coding -35908,Snx22,ENSMUSG00000039452,protein_coding -48674,Morc3,ENSMUSG00000039456,protein_coding -47222,Ppl,ENSMUSG00000039457,protein_coding -45429,Mtmr12,ENSMUSG00000039458,protein_coding -36845,Tcta,ENSMUSG00000039461,protein_coding -27274,Col10a1,ENSMUSG00000039462,protein_coding -6914,Slc9a8,ENSMUSG00000039463,protein_coding -30148,Zdhhc2,ENSMUSG00000039470,protein_coding -47220,Ubn1,ENSMUSG00000039473,protein_coding -16354,Wfs1,ENSMUSG00000039474,protein_coding -4171,Prrx2,ENSMUSG00000039476,protein_coding -18610,Tnrc18,ENSMUSG00000039477,protein_coding -30146,Micu3,ENSMUSG00000039478,protein_coding -27270,Nt5dc1,ENSMUSG00000039480,protein_coding -50535,Nrtn,ENSMUSG00000039481,protein_coding -4170,Asb6,ENSMUSG00000039483,protein_coding -27273,Tspyl4,ENSMUSG00000039485,protein_coding -34529,Cntn5,ENSMUSG00000039488,protein_coding -15531,Ccdc27,ENSMUSG00000039492,protein_coding -3462,Cdnf,ENSMUSG00000039496,protein_coding -27264,Dse,ENSMUSG00000039497,protein_coding -6900,Znfx1,ENSMUSG00000039501,protein_coding -27261,Calhm4,ENSMUSG00000039508,protein_coding -31826,Nup133,ENSMUSG00000039509,protein_coding -49498,Uhrf1bp1,ENSMUSG00000039512,protein_coding -4159,Ptpa,ENSMUSG00000039515,protein_coding -49927,Cdsn,ENSMUSG00000039518,protein_coding -10046,Cyp7b1,ENSMUSG00000039519,protein_coding -7344,Foxp3,ENSMUSG00000039521,protein_coding -15528,Cep104,ENSMUSG00000039523,protein_coding -53638,Atp8b1,ENSMUSG00000039529,protein_coding -30134,Tusc3,ENSMUSG00000039530,protein_coding -27253,Zup1,ENSMUSG00000039531,protein_coding -18606,Mmd2,ENSMUSG00000039533,protein_coding -6897,Stau1,ENSMUSG00000039536,protein_coding -30127,Sgcz,ENSMUSG00000039539,protein_coding -52716,4921524L21Rik,ENSMUSG00000039540,protein_coding -35533,Ncam1,ENSMUSG00000039542,protein_coding -32288,Cfap70,ENSMUSG00000039543,protein_coding -15522,Ajap1,ENSMUSG00000039546,protein_coding -27252,Rsph4a,ENSMUSG00000039552,protein_coding -13466,Cylc2,ENSMUSG00000039555,protein_coding -7341,Ppp1r3f,ENSMUSG00000039556,protein_coding -47209,Ubald1,ENSMUSG00000039568,protein_coding -15516,Nphp4,ENSMUSG00000039577,protein_coding -19968,Ccser1,ENSMUSG00000039578,protein_coding -13460,Grin3a,ENSMUSG00000039579,protein_coding -35762,Myo9a,ENSMUSG00000039585,protein_coding -32284,Fam149b,ENSMUSG00000039599,protein_coding -50236,Rcan2,ENSMUSG00000039601,protein_coding -37070,Rbms3,ENSMUSG00000039607,protein_coding -25057,Olfr303,ENSMUSG00000039608,protein_coding -13458,Tmem246,ENSMUSG00000039611,protein_coding -49387,Stub1,ENSMUSG00000039615,protein_coding -52944,Mocos,ENSMUSG00000039616,protein_coding -30111,Trmt9b,ENSMUSG00000039620,protein_coding -6889,Prex1,ENSMUSG00000039621,protein_coding -18602,Ap5z1,ENSMUSG00000039623,protein_coding -49317,Hs3st6,ENSMUSG00000039628,protein_coding -19222,Strip2,ENSMUSG00000039629,protein_coding -3112,Hnrnpu,ENSMUSG00000039630,protein_coding -34818,Ccdc151,ENSMUSG00000039632,protein_coding -30107,Lonrf1,ENSMUSG00000039633,protein_coding -13456,Zfp189,ENSMUSG00000039634,protein_coding -47200,Coro7,ENSMUSG00000039637,protein_coding -48647,Kcne1,ENSMUSG00000039639,protein_coding -40228,Mrpl12,ENSMUSG00000039640,protein_coding -47201,Vasn,ENSMUSG00000039646,protein_coding -4144,Kyat1,ENSMUSG00000039648,protein_coding -54983,Cpeb3,ENSMUSG00000039652,protein_coding -13454,Baat,ENSMUSG00000039653,protein_coding -49779,Rxrb,ENSMUSG00000039656,protein_coding -4143,Spout1,ENSMUSG00000039660,protein_coding -30013,Dusp26,ENSMUSG00000039661,protein_coding -15509,Icmt,ENSMUSG00000039662,protein_coding -40224,Oxld1,ENSMUSG00000039670,protein_coding -6872,Zmynd8,ENSMUSG00000039671,protein_coding -48642,Kcne2,ENSMUSG00000039672,protein_coding -45386,Capsl,ENSMUSG00000039676,protein_coding -4140,Tbc1d13,ENSMUSG00000039678,protein_coding -48634,Mrps6,ENSMUSG00000039680,protein_coding -16487,Lap3,ENSMUSG00000039682,protein_coding -18592,Sdk1,ENSMUSG00000039683,protein_coding -4139,Zer1,ENSMUSG00000039686,protein_coding -40222,Tspan10,ENSMUSG00000039691,protein_coding -13436,Msantd3,ENSMUSG00000039693,protein_coding -27198,Ncoa7,ENSMUSG00000039697,protein_coding -54271,Batf2,ENSMUSG00000039699,protein_coding -12156,Usp53,ENSMUSG00000039701,protein_coding -40221,Nploc4,ENSMUSG00000039703,protein_coding -45381,Lmbrd2,ENSMUSG00000039704,protein_coding -16466,Ldb2,ENSMUSG00000039706,protein_coding -10003,Slc7a12,ENSMUSG00000039710,protein_coding -15499,Plekhg5,ENSMUSG00000039713,protein_coding -35710,Cplx3,ENSMUSG00000039714,protein_coding -4135,Wdr34,ENSMUSG00000039715,protein_coding -36767,Dock3,ENSMUSG00000039716,protein_coding -9991,Ralyl,ENSMUSG00000039717,protein_coding -29978,Got1l1,ENSMUSG00000039720,protein_coding -6867,Trp53rka,ENSMUSG00000039725,protein_coding -24179,Slc6a5,ENSMUSG00000039728,protein_coding -12142,Fnbp1l,ENSMUSG00000039735,protein_coding -18400,Prkrip1,ENSMUSG00000039737,protein_coding -47184,Slx4,ENSMUSG00000039738,protein_coding -13422,Alg2,ENSMUSG00000039740,protein_coding -40212,Bahcc1,ENSMUSG00000039741,protein_coding -19201,Fam71f1,ENSMUSG00000039742,protein_coding -24175,Htatip2,ENSMUSG00000039745,protein_coding -18399,Orai2,ENSMUSG00000039747,protein_coding -3055,Exo1,ENSMUSG00000039748,protein_coding -16435,Fbxl5,ENSMUSG00000039753,protein_coding -18398,Alkbh4,ENSMUSG00000039754,protein_coding -12135,Dnttip2,ENSMUSG00000039756,protein_coding -15492,Thap3,ENSMUSG00000039759,protein_coding -26984,Il22ra2,ENSMUSG00000039760,protein_coding -48619,Dnajc28,ENSMUSG00000039763,protein_coding -16434,Cc2d2a,ENSMUSG00000039765,protein_coding -15491,Dnajc11,ENSMUSG00000039768,protein_coding -50760,Ypel5,ENSMUSG00000039770,protein_coding -18395,Polr2j,ENSMUSG00000039771,protein_coding -13415,Galnt12,ENSMUSG00000039774,protein_coding -29715,Defb3,ENSMUSG00000039775,protein_coding -40203,Cep131,ENSMUSG00000039781,protein_coding -16424,Cpeb2,ENSMUSG00000039782,protein_coding -3050,Kmo,ENSMUSG00000039783,protein_coding -29713,Defb5,ENSMUSG00000039785,protein_coding -4129,Cercam,ENSMUSG00000039787,protein_coding -47172,Zfp597,ENSMUSG00000039789,protein_coding -9971,Zfand1,ENSMUSG00000039795,protein_coding -45365,Cplane1,ENSMUSG00000039801,protein_coding -6854,Ncoa5,ENSMUSG00000039804,protein_coding -13410,Gabbr2,ENSMUSG00000039809,protein_coding -29241,Zc3h10,ENSMUSG00000039810,protein_coding -13409,Tbc1d2,ENSMUSG00000039813,protein_coding -29695,Xkr5,ENSMUSG00000039814,protein_coding -29236,Myl6b,ENSMUSG00000039824,protein_coding -4122,Trub2,ENSMUSG00000039826,protein_coding -45361,Wdr70,ENSMUSG00000039828,protein_coding -48600,Olig2,ENSMUSG00000039830,protein_coding -12124,Arhgap29,ENSMUSG00000039831,protein_coding -6850,Zfp335,ENSMUSG00000039834,protein_coding -26962,Nhsl1,ENSMUSG00000039835,protein_coding -15475,Slc45a1,ENSMUSG00000039838,protein_coding -53864,Epg5,ENSMUSG00000039840,protein_coding -19178,Zfp800,ENSMUSG00000039841,protein_coding -29686,Mcph1,ENSMUSG00000039842,protein_coding -4118,Rapgef1,ENSMUSG00000039844,protein_coding -6849,Pcif1,ENSMUSG00000039849,protein_coding -40188,Endov,ENSMUSG00000039850,protein_coding -48593,4932438H23Rik,ENSMUSG00000039851,protein_coding -15471,Rere,ENSMUSG00000039852,protein_coding -13405,Trim14,ENSMUSG00000039853,protein_coding -18379,Srrm3,ENSMUSG00000039860,protein_coding -12110,Slc44a3,ENSMUSG00000039865,protein_coding -6846,Neurl2,ENSMUSG00000039873,protein_coding -29228,Slc39a5,ENSMUSG00000039878,protein_coding -26954,Heca,ENSMUSG00000039879,protein_coding -15930,Lrrc17,ENSMUSG00000039883,protein_coding -18375,Tmem120a,ENSMUSG00000039886,protein_coding -12104,Alg14,ENSMUSG00000039887,protein_coding -26952,Txlnb,ENSMUSG00000039891,protein_coding -15927,Fgl2,ENSMUSG00000039899,protein_coding -55196,Armh3,ENSMUSG00000039901,protein_coding -48581,Eva1c,ENSMUSG00000039903,protein_coding -19164,Gpr37,ENSMUSG00000039904,protein_coding -40184,Slc26a11,ENSMUSG00000039908,protein_coding -26950,Cited2,ENSMUSG00000039910,protein_coding -15458,Spsb1,ENSMUSG00000039911,protein_coding -6219,Pak7,ENSMUSG00000039913,protein_coding -29222,Coq10a,ENSMUSG00000039914,protein_coding -18369,Rhbdd2,ENSMUSG00000039917,protein_coding -48580,Urb1,ENSMUSG00000039929,protein_coding -15923,Gsap,ENSMUSG00000039934,protein_coding -15447,Pik3cd,ENSMUSG00000039936,protein_coding -45328,Ptger4,ENSMUSG00000039942,protein_coding -6213,Plcb4,ENSMUSG00000039943,protein_coding -36842,Dag1,ENSMUSG00000039952,protein_coding -15446,Clstn1,ENSMUSG00000039953,protein_coding -53322,Stk32a,ENSMUSG00000039954,protein_coding -48579,Mrap,ENSMUSG00000039956,protein_coding -22002,Etfbkmt,ENSMUSG00000039958,protein_coding -18364,Hip1,ENSMUSG00000039959,protein_coding -31814,Rhou,ENSMUSG00000039960,protein_coding -35146,Olfr906,ENSMUSG00000039962,protein_coding -40179,Ccdc40,ENSMUSG00000039963,protein_coding -13125,Zfp292,ENSMUSG00000039967,protein_coding -15915,Rsbn1l,ENSMUSG00000039968,protein_coding -40174,Tbc1d16,ENSMUSG00000039976,protein_coding -34631,Deup1,ENSMUSG00000039977,protein_coding -26801,Zc3h12d,ENSMUSG00000039981,protein_coding -54566,Dtx4,ENSMUSG00000039982,protein_coding -5823,Ccdc32,ENSMUSG00000039983,protein_coding -21990,Sinhcaf,ENSMUSG00000039985,protein_coding -15912,Phtf2,ENSMUSG00000039987,protein_coding -12741,Ankrd13c,ENSMUSG00000039988,protein_coding -40170,Cbx4,ENSMUSG00000039989,protein_coding -26394,Edrf1,ENSMUSG00000039990,protein_coding -29212,Timeless,ENSMUSG00000039994,protein_coding -3018,Ifi203,ENSMUSG00000039997,protein_coding -15902,Magi2,ENSMUSG00000040003,protein_coding -26797,Ginm1,ENSMUSG00000040006,protein_coding -5819,Bahd1,ENSMUSG00000040007,protein_coding -27918,Gnaz,ENSMUSG00000040009,protein_coding -31687,Slc7a5,ENSMUSG00000040010,protein_coding -18359,Fkbp6,ENSMUSG00000040013,protein_coding -12734,Ptger3,ENSMUSG00000040016,protein_coding -24081,Saa4,ENSMUSG00000040017,protein_coding -55130,Cox15,ENSMUSG00000040018,protein_coding -26794,Lats1,ENSMUSG00000040021,protein_coding -55415,Rab11fip2,ENSMUSG00000040022,protein_coding -14856,Ythdf2,ENSMUSG00000040025,protein_coding -24080,Saa3,ENSMUSG00000040026,protein_coding -29429,Elavl1,ENSMUSG00000040028,protein_coding -21984,Ipo8,ENSMUSG00000040029,protein_coding -29215,Stat2,ENSMUSG00000040033,protein_coding -26793,Nup43,ENSMUSG00000040034,protein_coding -5815,Disp2,ENSMUSG00000040035,protein_coding -12716,Negr1,ENSMUSG00000040037,protein_coding -33569,Ift88,ENSMUSG00000040040,protein_coding -29206,Rbms2,ENSMUSG00000040043,protein_coding -13118,Orc3,ENSMUSG00000040044,protein_coding -24077,Tph1,ENSMUSG00000040046,protein_coding -49314,Ndufb10,ENSMUSG00000040048,protein_coding -29205,Baz2a,ENSMUSG00000040054,protein_coding -33562,Gjb6,ENSMUSG00000040055,protein_coding -5811,Plcb2,ENSMUSG00000040061,protein_coding -54564,Pfpl,ENSMUSG00000040065,protein_coding -5808,Bub1b,ENSMUSG00000040084,protein_coding -12697,Tnni3k,ENSMUSG00000040086,protein_coding -5806,Bmf,ENSMUSG00000040093,protein_coding -49237,Flywch1,ENSMUSG00000040097,protein_coding -21956,Klhl42,ENSMUSG00000040102,protein_coding -54839,Plpp6,ENSMUSG00000040105,protein_coding -35258,Gramd1b,ENSMUSG00000040111,protein_coding -21954,Mrps35,ENSMUSG00000040112,protein_coding -2730,Mettl11b,ENSMUSG00000040113,protein_coding -15874,Cacna2d1,ENSMUSG00000040118,protein_coding -21953,Rep15,ENSMUSG00000040121,protein_coding -33550,Zmym5,ENSMUSG00000040123,protein_coding -2726,Gorab,ENSMUSG00000040124,protein_coding -26361,Gpr26,ENSMUSG00000040125,protein_coding -29190,Sdr9c7,ENSMUSG00000040127,protein_coding -13099,Pnrc1,ENSMUSG00000040128,protein_coding -6801,Svs2,ENSMUSG00000040132,protein_coding -5800,Gpr176,ENSMUSG00000040133,protein_coding -29189,Rdh7,ENSMUSG00000040134,protein_coding -24063,Abcc8,ENSMUSG00000040136,protein_coding -7535,Ndp,ENSMUSG00000040138,protein_coding -26448,9430038I01Rik,ENSMUSG00000040139,protein_coding -50232,Tdrd6,ENSMUSG00000040140,protein_coding -34817,Rgl3,ENSMUSG00000040146,protein_coding -7534,Maob,ENSMUSG00000040147,protein_coding -26353,Hmx3,ENSMUSG00000040148,protein_coding -12536,Hs2st1,ENSMUSG00000040151,protein_coding -5794,Thbs1,ENSMUSG00000040152,protein_coding -6796,Wfdc5,ENSMUSG00000040154,protein_coding -38739,Tax1bp3,ENSMUSG00000040158,protein_coding -21950,1700034J05Rik,ENSMUSG00000040163,protein_coding -6795,Kcns1,ENSMUSG00000040164,protein_coding -29407,Cd209c,ENSMUSG00000040165,protein_coding -26345,Ikzf5,ENSMUSG00000040167,protein_coding -2714,Fmo2,ENSMUSG00000040170,protein_coding -5461,Alkbh3,ENSMUSG00000040174,protein_coding -26342,2310057M21Rik,ENSMUSG00000040177,protein_coding -2713,Fmo1,ENSMUSG00000040181,protein_coding -13086,Ankrd6,ENSMUSG00000040183,protein_coding -21946,Arntl2,ENSMUSG00000040187,protein_coding -35708,Scamp2,ENSMUSG00000040188,protein_coding -24056,Ccdc114,ENSMUSG00000040189,protein_coding -29176,Nemp1,ENSMUSG00000040195,protein_coding -29404,Cd209e,ENSMUSG00000040197,protein_coding -35903,Pclaf,ENSMUSG00000040204,protein_coding -26339,Cuzd1,ENSMUSG00000040205,protein_coding -9946,Zfp704,ENSMUSG00000040209,protein_coding -24054,Emp3,ENSMUSG00000040212,protein_coding -12502,Kyat3,ENSMUSG00000040213,protein_coding -35532,Ttc12,ENSMUSG00000040219,protein_coding -31761,Gas8,ENSMUSG00000040220,protein_coding -2710,Prrc2c,ENSMUSG00000040225,protein_coding -7511,Gpr34,ENSMUSG00000040229,protein_coding -24052,Syngr4,ENSMUSG00000040231,protein_coding -21939,Tm7sf3,ENSMUSG00000040234,protein_coding -29395,Trappc5,ENSMUSG00000040236,protein_coding -21938,Fgfr1op2,ENSMUSG00000040242,protein_coding -54121,Tbc1d10c,ENSMUSG00000040247,protein_coding -29170,Lrp1,ENSMUSG00000040249,protein_coding -21937,Ints13,ENSMUSG00000040250,protein_coding -12494,Gbp7,ENSMUSG00000040253,protein_coding -15815,Sema3d,ENSMUSG00000040254,protein_coding -29169,Nxph4,ENSMUSG00000040258,protein_coding -50394,Daam2,ENSMUSG00000040260,protein_coding -31681,Klhdc4,ENSMUSG00000040263,protein_coding -12497,Gbp2b,ENSMUSG00000040264,protein_coding -2697,Dnm3,ENSMUSG00000040265,protein_coding -26332,Plekha1,ENSMUSG00000040268,protein_coding -9936,Mrps28,ENSMUSG00000040269,protein_coding -13076,Bach2,ENSMUSG00000040270,protein_coding -5455,Accs,ENSMUSG00000040272,protein_coding -15644,Cdk6,ENSMUSG00000040274,protein_coding -49492,Pacsin1,ENSMUSG00000040276,protein_coding -29167,Ndufa4l2,ENSMUSG00000040280,protein_coding -5767,BC052040,ENSMUSG00000040282,protein_coding -37970,Btnl9,ENSMUSG00000040283,protein_coding -33525,Gzmg,ENSMUSG00000040284,protein_coding -29166,Stac3,ENSMUSG00000040287,protein_coding -9931,Hey1,ENSMUSG00000040289,protein_coding -13171,Ddx58,ENSMUSG00000040296,protein_coding -2691,Suco,ENSMUSG00000040297,protein_coding -26330,Btbd16,ENSMUSG00000040298,protein_coding -15651,Rbm48,ENSMUSG00000040302,protein_coding -5452,Alx4,ENSMUSG00000040310,protein_coding -49925,Cchcr1,ENSMUSG00000040312,protein_coding -33522,Ctsg,ENSMUSG00000040314,protein_coding -5752,Zfp770,ENSMUSG00000040321,protein_coding -11964,Slc25a24,ENSMUSG00000040322,protein_coding -36762,Dcaf1,ENSMUSG00000040325,protein_coding -50304,Cul9,ENSMUSG00000040327,protein_coding -37959,Olfr56,ENSMUSG00000040328,protein_coding -9920,Il7,ENSMUSG00000040329,protein_coding -26325,Nsmce4a,ENSMUSG00000040331,protein_coding -11958,Fam102b,ENSMUSG00000040339,protein_coding -29377,Tex45,ENSMUSG00000040340,protein_coding -29159,Arhgap9,ENSMUSG00000040345,protein_coding -37952,Trim7,ENSMUSG00000040350,protein_coding -15659,Ankib1,ENSMUSG00000040351,protein_coding -29157,Mars,ENSMUSG00000040354,protein_coding -49845,Skiv2l,ENSMUSG00000040356,protein_coding -13022,Ufl1,ENSMUSG00000040359,protein_coding -7480,Bcor,ENSMUSG00000040363,protein_coding -24033,Sec1,ENSMUSG00000040364,protein_coding -37950,Trim41,ENSMUSG00000040365,protein_coding -15663,Lrrd1,ENSMUSG00000040367,protein_coding -21910,Etfrf1,ENSMUSG00000040370,protein_coding -13019,Gpr63,ENSMUSG00000040372,protein_coding -39909,Cacng5,ENSMUSG00000040373,protein_coding -9899,Pex2,ENSMUSG00000040374,protein_coding -33506,Cbln3,ENSMUSG00000040380,protein_coding -5750,Aqr,ENSMUSG00000040383,protein_coding -54122,Ppp1ca,ENSMUSG00000040385,protein_coding -13015,Klhl32,ENSMUSG00000040387,protein_coding -11946,Wdr47,ENSMUSG00000040389,protein_coding -23210,Map3k10,ENSMUSG00000040390,protein_coding -29522,Abhd13,ENSMUSG00000040396,protein_coding -37933,Havcr1,ENSMUSG00000040405,protein_coding -15666,Akap9,ENSMUSG00000040407,protein_coding -12995,Fbxl4,ENSMUSG00000040410,protein_coding -11937,5330417C22Rik,ENSMUSG00000040412,protein_coding -37931,Timd2,ENSMUSG00000040413,protein_coding -55127,Slc25a28,ENSMUSG00000040414,protein_coding -29147,Dtx3,ENSMUSG00000040415,protein_coding -45511,Cdh18,ENSMUSG00000040420,protein_coding -2654,Rc3h1,ENSMUSG00000040423,protein_coding -23204,Hipk4,ENSMUSG00000040424,protein_coding -24019,Plekha4,ENSMUSG00000040428,protein_coding -15665,Mterf1a,ENSMUSG00000040429,protein_coding -39891,Pitpnc1,ENSMUSG00000040430,protein_coding -33499,Ltb4r2,ENSMUSG00000040432,protein_coding -36508,Zbtb38,ENSMUSG00000040433,protein_coding -5424,Large2,ENSMUSG00000040434,protein_coding -24018,Ppp1r15a,ENSMUSG00000040435,protein_coding -29145,Slc26a10,ENSMUSG00000040441,protein_coding -52940,Rprd1a,ENSMUSG00000040446,protein_coding -38717,Spns2,ENSMUSG00000040447,protein_coding -54886,Sgms1,ENSMUSG00000040451,protein_coding -45500,Cdh12,ENSMUSG00000040452,protein_coding -12988,Usp45,ENSMUSG00000040455,protein_coding -7445,Hypm,ENSMUSG00000040456,protein_coding -29493,Arglu1,ENSMUSG00000040459,protein_coding -29141,Os9,ENSMUSG00000040462,protein_coding -38716,Mybbp1a,ENSMUSG00000040463,protein_coding -15689,Gtpbp10,ENSMUSG00000040464,protein_coding -23198,Blvrb,ENSMUSG00000040466,protein_coding -38715,Ggt6,ENSMUSG00000040471,protein_coding -33495,Rabggta,ENSMUSG00000040472,protein_coding -15692,Cfap69,ENSMUSG00000040473,protein_coding -12985,Prdm13,ENSMUSG00000040478,protein_coding -5414,Dgkz,ENSMUSG00000040479,protein_coding -39886,Bptf,ENSMUSG00000040481,protein_coding -49844,Dxo,ENSMUSG00000040482,protein_coding -38709,Xaf1,ENSMUSG00000040483,protein_coding -2837,Lrrc52,ENSMUSG00000040485,protein_coding -23193,Ltbp4,ENSMUSG00000040488,protein_coding -37904,Sox30,ENSMUSG00000040489,protein_coding -50385,Lrfn2,ENSMUSG00000040490,protein_coding -5412,Chrm4,ENSMUSG00000040495,protein_coding -22848,Igsf23,ENSMUSG00000040498,protein_coding -29136,March9,ENSMUSG00000040502,protein_coding -50936,Abcg5,ENSMUSG00000040505,protein_coding -5410,Ambra1,ENSMUSG00000040506,protein_coding -22845,Pvr,ENSMUSG00000040511,protein_coding -15713,Tex47,ENSMUSG00000040514,protein_coding -13036,Manea,ENSMUSG00000040520,protein_coding -29132,Tsfm,ENSMUSG00000040521,protein_coding -9839,Tlr8,ENSMUSG00000040522,protein_coding -35899,Zfp609,ENSMUSG00000040524,protein_coding -22835,Cblc,ENSMUSG00000040525,protein_coding -39872,Milr1,ENSMUSG00000040528,protein_coding -18344,Abhd11,ENSMUSG00000040532,protein_coding -14833,Matn1,ENSMUSG00000040533,protein_coding -12916,Necab1,ENSMUSG00000040536,protein_coding -15723,Adam22,ENSMUSG00000040537,protein_coding -35253,Tmem225,ENSMUSG00000040541,protein_coding -38700,Pitpnm3,ENSMUSG00000040543,protein_coding -39870,Tex2,ENSMUSG00000040548,protein_coding -5401,Ckap5,ENSMUSG00000040549,protein_coding -12913,Otud6b,ENSMUSG00000040550,protein_coding -21284,C3ar1,ENSMUSG00000040552,protein_coding -38698,Aipl1,ENSMUSG00000040554,protein_coding -18338,Mettl27,ENSMUSG00000040557,protein_coding -53621,Wdr7,ENSMUSG00000040560,protein_coding -11904,Gstm2,ENSMUSG00000040562,protein_coding -34814,Plppr2,ENSMUSG00000040563,protein_coding -22827,Apoc1,ENSMUSG00000040564,protein_coding -54982,Btaf1,ENSMUSG00000040565,protein_coding -12910,Slc26a7,ENSMUSG00000040569,protein_coding -15730,Rundc3b,ENSMUSG00000040570,protein_coding -18332,Tmem270,ENSMUSG00000040576,protein_coding -23147,Cyp2b13,ENSMUSG00000040583,protein_coding -15732,Abcb1a,ENSMUSG00000040584,protein_coding -9820,Ofd1,ENSMUSG00000040586,protein_coding -5396,1110051M20Rik,ENSMUSG00000040591,protein_coding -39862,Cd79b,ENSMUSG00000040592,protein_coding -37788,Ranbp17,ENSMUSG00000040594,protein_coding -2812,Pogk,ENSMUSG00000040596,protein_coding -38678,Mis12,ENSMUSG00000040599,protein_coding -11893,Eps8l3,ENSMUSG00000040600,protein_coding -23159,Nlrp4a,ENSMUSG00000040601,protein_coding -48736,Bace2,ENSMUSG00000040605,protein_coding -15206,Kazn,ENSMUSG00000040606,protein_coding -37786,Tlx3,ENSMUSG00000040610,protein_coding -2808,Ildr2,ENSMUSG00000040612,protein_coding -21271,Apobec1,ENSMUSG00000040613,protein_coding -23158,Nlrp9c,ENSMUSG00000040614,protein_coding -15203,Tmem51,ENSMUSG00000040616,protein_coding -33471,Pck2,ENSMUSG00000040618,protein_coding -38675,Dhx33,ENSMUSG00000040620,protein_coding -9817,Gemin8,ENSMUSG00000040621,protein_coding -26748,Plekhg1,ENSMUSG00000040624,protein_coding -21267,Aicda,ENSMUSG00000040627,protein_coding -2807,Mael,ENSMUSG00000040629,protein_coding -31184,Dok4,ENSMUSG00000040631,protein_coding -33468,Nrl,ENSMUSG00000040632,protein_coding -32430,Erc2,ENSMUSG00000040640,protein_coding -1733,Ppip5k2,ENSMUSG00000040648,protein_coding -21264,Rimklb,ENSMUSG00000040649,protein_coding -23163,Cyp2b23,ENSMUSG00000040650,protein_coding -32425,Tasor,ENSMUSG00000040651,protein_coding -35898,Oaz2,ENSMUSG00000040652,protein_coding -26745,Ppp1r14c,ENSMUSG00000040653,protein_coding -50303,Dnph1,ENSMUSG00000040658,protein_coding -15196,Efhd2,ENSMUSG00000040659,protein_coding -23151,Cyp2b9,ENSMUSG00000040660,protein_coding -36760,Rad54l2,ENSMUSG00000040661,protein_coding -54126,Clcf1,ENSMUSG00000040663,protein_coding -48725,Sh3bgr,ENSMUSG00000040666,protein_coding -38672,Nup88,ENSMUSG00000040667,protein_coding -21260,Phc1,ENSMUSG00000040669,protein_coding -26752,Mthfd1l,ENSMUSG00000040675,protein_coding -49235,Kremen2,ENSMUSG00000040680,protein_coding -48721,Hmgn1,ENSMUSG00000040681,protein_coding -5386,Madd,ENSMUSG00000040687,protein_coding -49308,Tbl3,ENSMUSG00000040688,protein_coding -14810,Col16a1,ENSMUSG00000040690,protein_coding -1722,Slco4c1,ENSMUSG00000040693,protein_coding -50378,Apobec2,ENSMUSG00000040694,protein_coding -15191,Dnajc16,ENSMUSG00000040697,protein_coding -39850,Limd2,ENSMUSG00000040699,protein_coding -33454,Ap1g2,ENSMUSG00000040701,protein_coding -23133,Cyp2s1,ENSMUSG00000040703,protein_coding -15190,Agmat,ENSMUSG00000040706,protein_coding -1708,St8sia4,ENSMUSG00000040710,protein_coding -37769,Sh3pxd2b,ENSMUSG00000040711,protein_coding -38659,Camta2,ENSMUSG00000040712,protein_coding -2791,Creg1,ENSMUSG00000040713,protein_coding -22807,Klc3,ENSMUSG00000040714,protein_coding -15189,Rsc1a1,ENSMUSG00000040715,protein_coding -32420,Il17rd,ENSMUSG00000040717,protein_coding -12870,Virma,ENSMUSG00000040720,protein_coding -33452,Zfhx2,ENSMUSG00000040721,protein_coding -35699,Scamp5,ENSMUSG00000040722,protein_coding -2787,Rcsd1,ENSMUSG00000040723,protein_coding -11871,Kcna2,ENSMUSG00000040724,protein_coding -23130,Hnrnpul1,ENSMUSG00000040725,protein_coding -32419,Hesx1,ENSMUSG00000040726,protein_coding -12867,Esrp1,ENSMUSG00000040728,protein_coding -34513,Cep126,ENSMUSG00000040729,protein_coding -18327,Eif4h,ENSMUSG00000040731,protein_coding -48708,Erg,ENSMUSG00000040732,protein_coding -22803,Ppp1r13l,ENSMUSG00000040734,protein_coding -12863,Ints8,ENSMUSG00000040738,protein_coding -15186,Slc25a34,ENSMUSG00000040740,protein_coding -38654,Rnf167,ENSMUSG00000040746,protein_coding -11864,Cd53,ENSMUSG00000040747,protein_coding -9795,Siah1b,ENSMUSG00000040749,protein_coding -18326,Lat2,ENSMUSG00000040751,protein_coding -33440,Myh6,ENSMUSG00000040752,protein_coding -33438,Cmtm5,ENSMUSG00000040759,protein_coding -32417,Appl1,ENSMUSG00000040760,protein_coding -15181,Spen,ENSMUSG00000040761,protein_coding -37760,Snrnp25,ENSMUSG00000040767,protein_coding -33437,Il25,ENSMUSG00000040770,protein_coding -50376,Oard1,ENSMUSG00000040771,protein_coding -11857,Cept1,ENSMUSG00000040774,protein_coding -2616,Cop1,ENSMUSG00000040782,protein_coding -48692,Ttc3,ENSMUSG00000040785,protein_coding -5372,C1qtnf4,ENSMUSG00000040794,protein_coding -14792,Iqcc,ENSMUSG00000040795,protein_coding -21239,Iqsec3,ENSMUSG00000040797,protein_coding -9781,S100g,ENSMUSG00000040808,protein_coding -11848,Chil3,ENSMUSG00000040809,protein_coding -22788,Eml2,ENSMUSG00000040811,protein_coding -5369,Agbl2,ENSMUSG00000040812,protein_coding -36759,Tex264,ENSMUSG00000040813,protein_coding -32409,Dennd6a,ENSMUSG00000040818,protein_coding -48683,Hlcs,ENSMUSG00000040820,protein_coding -33423,1700123O20Rik,ENSMUSG00000040822,protein_coding -22786,Snrpd2,ENSMUSG00000040824,protein_coding -50530,Catsperd,ENSMUSG00000040828,protein_coding -38638,Zmynd15,ENSMUSG00000040829,protein_coding -2771,Gpr161,ENSMUSG00000040836,protein_coding -39818,Gm11639,ENSMUSG00000040838,protein_coding -22782,Six5,ENSMUSG00000040841,protein_coding -15162,Szrd1,ENSMUSG00000040842,protein_coding -2769,Tiprl,ENSMUSG00000040843,protein_coding -2768,Sft2d2,ENSMUSG00000040848,protein_coding -37738,Psme4,ENSMUSG00000040850,protein_coding -50931,Plekhh2,ENSMUSG00000040852,protein_coding -9762,Reps2,ENSMUSG00000040855,protein_coding -44737,Dlk1,ENSMUSG00000040856,protein_coding -23103,Erf,ENSMUSG00000040857,protein_coding -14780,Bsdc1,ENSMUSG00000040859,protein_coding -15158,Crocc,ENSMUSG00000040860,protein_coding -998,Ino80d,ENSMUSG00000040865,protein_coding -22778,Rsph6a,ENSMUSG00000040866,protein_coding -44733,Begain,ENSMUSG00000040867,protein_coding -37046,Osbpl10,ENSMUSG00000040875,protein_coding -44730,Wdr25,ENSMUSG00000040877,protein_coding -34812,Tmem205,ENSMUSG00000040883,protein_coding -49306,Gfer,ENSMUSG00000040888,protein_coding -22776,Foxa3,ENSMUSG00000040891,protein_coding -11822,Kcnd3,ENSMUSG00000040896,protein_coding -48856,Ccr6,ENSMUSG00000040899,protein_coding -55401,Kcnk18,ENSMUSG00000040901,protein_coding -38624,Gm21988,ENSMUSG00000040904,protein_coding -23090,Atp1a3,ENSMUSG00000040907,protein_coding -55190,Fbxw4,ENSMUSG00000040913,protein_coding -2744,Slc19a2,ENSMUSG00000040918,protein_coding -37730,4930505A04Rik,ENSMUSG00000040919,protein_coding -14767,S100pbp,ENSMUSG00000040928,protein_coding -54824,Rfx3,ENSMUSG00000040929,protein_coding -15146,Padi6,ENSMUSG00000040935,protein_coding -37171,Ulk4,ENSMUSG00000040936,protein_coding -38618,Slc16a11,ENSMUSG00000040938,protein_coding -23085,Arhgef1,ENSMUSG00000040940,protein_coding -12353,Tet2,ENSMUSG00000040943,protein_coding -15144,Rcc2,ENSMUSG00000040945,protein_coding -38616,Mgl2,ENSMUSG00000040950,protein_coding -23083,Rps19,ENSMUSG00000040952,protein_coding -52802,Cables1,ENSMUSG00000040957,protein_coding -38614,Asgr2,ENSMUSG00000040963,protein_coding -15141,Arhgef10l,ENSMUSG00000040964,protein_coding -48912,Slc22a2,ENSMUSG00000040966,protein_coding -12349,Arhgef38,ENSMUSG00000040969,protein_coding -15132,Igsf21,ENSMUSG00000040972,protein_coding -37467,Gm11992,ENSMUSG00000040978,protein_coding -37466,Sun3,ENSMUSG00000040985,protein_coding -22765,Mill2,ENSMUSG00000040987,protein_coding -9730,Sh3kbp1,ENSMUSG00000040990,protein_coding -33395,Abhd4,ENSMUSG00000040997,protein_coding -12342,Npnt,ENSMUSG00000040998,protein_coding -14757,Trim62,ENSMUSG00000041000,protein_coding -36754,Iqcf4,ENSMUSG00000041009,protein_coding -37043,Cmtm8,ENSMUSG00000041012,protein_coding -32641,Nrg3,ENSMUSG00000041014,protein_coding -9725,Map7d2,ENSMUSG00000041020,protein_coding -15120,Iffo2,ENSMUSG00000041025,protein_coding -32630,Ghitm,ENSMUSG00000041028,protein_coding -55189,Gm17018,ENSMUSG00000041035,protein_coding -23060,Irgq,ENSMUSG00000041037,protein_coding -936,Fam117b,ENSMUSG00000041040,protein_coding -32626,Lrit1,ENSMUSG00000041044,protein_coding -37442,Ramp3,ENSMUSG00000041046,protein_coding -12971,Slc7a13,ENSMUSG00000041052,protein_coding -50745,Wdr43,ENSMUSG00000041057,protein_coding -12969,Wwp1,ENSMUSG00000041058,protein_coding -49372,Mslnl,ENSMUSG00000041062,protein_coding -35896,Pif1,ENSMUSG00000041064,protein_coding -32611,4930596D02Rik,ENSMUSG00000041068,protein_coding -37436,Nacad,ENSMUSG00000041073,protein_coding -919,Fzd7,ENSMUSG00000041075,protein_coding -32606,Grid1,ENSMUSG00000041078,protein_coding -48495,Rwdd2b,ENSMUSG00000041079,protein_coding -12299,Ostc,ENSMUSG00000041084,protein_coding -9602,Tspyl2,ENSMUSG00000041096,protein_coding -40639,Elmo1,ENSMUSG00000041112,protein_coding -9598,Iqsec2,ENSMUSG00000041115,protein_coding -22698,Ccdc8,ENSMUSG00000041117,protein_coding -49640,Pde9a,ENSMUSG00000041119,protein_coding -15105,Nbl1,ENSMUSG00000041120,protein_coding -34469,Msantd4,ENSMUSG00000041124,protein_coding -37423,H2afv,ENSMUSG00000041126,protein_coding -49304,Zfp598,ENSMUSG00000041130,protein_coding -18857,N4bp2l1,ENSMUSG00000041132,protein_coding -9595,Smc1a,ENSMUSG00000041133,protein_coding -48477,Cyyr1,ENSMUSG00000041134,protein_coding -12936,Ripk2,ENSMUSG00000041135,protein_coding -40633,Nme8,ENSMUSG00000041138,protein_coding -22696,Pnmal1,ENSMUSG00000041141,protein_coding -15102,Tmco4,ENSMUSG00000041143,protein_coding -711,Dnah7b,ENSMUSG00000041144,protein_coding -18851,Brca2,ENSMUSG00000041147,protein_coding -12933,Osgin2,ENSMUSG00000041153,protein_coding -15100,Otud3,ENSMUSG00000041161,protein_coding -37420,Zmiz2,ENSMUSG00000041164,protein_coding -38587,Spem1,ENSMUSG00000041165,protein_coding -50529,Lonp1,ENSMUSG00000041168,protein_coding -54976,Hectd2,ENSMUSG00000041180,protein_coding -22687,Prkd2,ENSMUSG00000041187,protein_coding -38582,Chrnb1,ENSMUSG00000041189,protein_coding -15095,Pla2g5,ENSMUSG00000041193,protein_coding -49371,Rpusd1,ENSMUSG00000041199,protein_coding -15094,Pla2g2d,ENSMUSG00000041202,protein_coding -30963,Trir,ENSMUSG00000041203,protein_coding -47557,Map6d1,ENSMUSG00000041205,protein_coding -47555,Yeats2,ENSMUSG00000041215,protein_coding -12833,Clvs1,ENSMUSG00000041216,protein_coding -5743,Arhgap11a,ENSMUSG00000041219,protein_coding -12275,Elovl6,ENSMUSG00000041220,protein_coding -52705,Arhgap12,ENSMUSG00000041225,protein_coding -9580,Phf8,ENSMUSG00000041229,protein_coding -37882,Ublcp1,ENSMUSG00000041231,protein_coding -12829,Chd7,ENSMUSG00000041235,protein_coding -40602,Vps41,ENSMUSG00000041236,protein_coding -11161,Pklr,ENSMUSG00000041237,protein_coding -52798,Rbbp8,ENSMUSG00000041238,protein_coding -15087,Mul1,ENSMUSG00000041241,protein_coding -9572,Wnk3,ENSMUSG00000041245,protein_coding -47546,Lamp3,ENSMUSG00000041247,protein_coding -34974,Kcnj1,ENSMUSG00000041248,protein_coding -5731,Tmco5b,ENSMUSG00000041255,protein_coding -53951,Zfp236,ENSMUSG00000041258,protein_coding -12822,Car8,ENSMUSG00000041261,protein_coding -11157,Rusc1,ENSMUSG00000041263,protein_coding -18812,Uspl1,ENSMUSG00000041264,protein_coding -35623,Dmxl2,ENSMUSG00000041268,protein_coding -12814,Tox,ENSMUSG00000041272,protein_coding -37876,Ttc1,ENSMUSG00000041278,protein_coding -38565,Sox15,ENSMUSG00000041287,protein_coding -50225,Adgrf1,ENSMUSG00000041293,protein_coding -40580,Cdk13,ENSMUSG00000041297,protein_coding -18794,Katnal1,ENSMUSG00000041298,protein_coding -19082,Cftr,ENSMUSG00000041301,protein_coding -820,Gtf3c3,ENSMUSG00000041303,protein_coding -31429,Sntb2,ENSMUSG00000041308,protein_coding -26477,Nkx6-2,ENSMUSG00000041309,protein_coding -18784,Slc7a1,ENSMUSG00000041313,protein_coding -49227,Thoc6,ENSMUSG00000041319,protein_coding -44674,Ak7,ENSMUSG00000041323,protein_coding -40556,Inhba,ENSMUSG00000041324,protein_coding -25171,Pcf11,ENSMUSG00000041328,protein_coding -38558,Atp1b2,ENSMUSG00000041329,protein_coding -13610,Mup4,ENSMUSG00000041333,protein_coding -44671,Atg2b,ENSMUSG00000041341,protein_coding -25167,Ankrd42,ENSMUSG00000041343,protein_coding -38556,Wrap53,ENSMUSG00000041346,protein_coding -44670,Bdkrb1,ENSMUSG00000041347,protein_coding -15067,Rap1gap,ENSMUSG00000041351,protein_coding -9550,Tmem29,ENSMUSG00000041353,protein_coding -49759,Rgl2,ENSMUSG00000041354,protein_coding -11131,Ssr2,ENSMUSG00000041355,protein_coding -5717,Nutm1,ENSMUSG00000041358,protein_coding -44662,Tcl1,ENSMUSG00000041359,protein_coding -54821,Pum3,ENSMUSG00000041360,protein_coding -35996,Myzap,ENSMUSG00000041361,protein_coding -55397,Shtn1,ENSMUSG00000041362,protein_coding -21205,B4galnt3,ENSMUSG00000041372,protein_coding -22660,Ccdc9,ENSMUSG00000041375,protein_coding -21203,Ninj2,ENSMUSG00000041377,protein_coding -47506,Cldn5,ENSMUSG00000041378,protein_coding -9484,Htr2c,ENSMUSG00000041380,protein_coding -19040,Mdfic,ENSMUSG00000041390,protein_coding -2735,Mettl18,ENSMUSG00000041396,protein_coding -15062,1700013G24Rik,ENSMUSG00000041399,protein_coding -2734,BC055324,ENSMUSG00000041406,protein_coding -32603,Wapl,ENSMUSG00000041408,protein_coding -44645,Dicer1,ENSMUSG00000041415,protein_coding -42530,Pik3r1,ENSMUSG00000041417,protein_coding -22653,Meis3,ENSMUSG00000041420,protein_coding -11112,Paqr6,ENSMUSG00000041423,protein_coding -792,Hibch,ENSMUSG00000041426,protein_coding -49301,Nthl1,ENSMUSG00000041429,protein_coding -42514,Ccnb1,ENSMUSG00000041431,protein_coding -31427,Utp4,ENSMUSG00000041438,protein_coding -788,Mfsd6,ENSMUSG00000041439,protein_coding -36494,Gk5,ENSMUSG00000041440,protein_coding -34970,Arhgap32,ENSMUSG00000041444,protein_coding -32592,Mmrn2,ENSMUSG00000041445,protein_coding -18747,Rpl21,ENSMUSG00000041453,protein_coding -15417,Tardbp,ENSMUSG00000041459,protein_coding -21190,Cacna2d4,ENSMUSG00000041460,protein_coding -18742,Gpr12,ENSMUSG00000041468,protein_coding -32584,Shld2,ENSMUSG00000041471,protein_coding -9708,Smpx,ENSMUSG00000041476,protein_coding -21189,Dcp1b,ENSMUSG00000041477,protein_coding -32582,Syt15,ENSMUSG00000041479,protein_coding -44633,Serpina3g,ENSMUSG00000041481,protein_coding -53611,Piezo2,ENSMUSG00000041482,protein_coding -2284,Zfp281,ENSMUSG00000041483,protein_coding -54524,Stx3,ENSMUSG00000041488,protein_coding -54663,Cep78,ENSMUSG00000041491,protein_coding -2277,Kif14,ENSMUSG00000041498,protein_coding -18737,Gm3402,ENSMUSG00000041505,protein_coding -36749,Rrp9,ENSMUSG00000041506,protein_coding -31656,Irf8,ENSMUSG00000041515,protein_coding -35392,Upk2,ENSMUSG00000041523,protein_coding -36830,Rnf123,ENSMUSG00000041528,protein_coding -14702,Ago1,ENSMUSG00000041530,protein_coding -32566,Rbp3,ENSMUSG00000041534,protein_coding -44626,Serpina3a,ENSMUSG00000041536,protein_coding -49799,H2-Ob,ENSMUSG00000041538,protein_coding -21893,Sox5,ENSMUSG00000041540,protein_coding -15403,Disp3,ENSMUSG00000041544,protein_coding -17874,Hspb8,ENSMUSG00000041548,protein_coding -44624,Serpina5,ENSMUSG00000041550,protein_coding -9681,Ptchd1,ENSMUSG00000041552,protein_coding -15402,Fbxo2,ENSMUSG00000041556,protein_coding -2196,Fmod,ENSMUSG00000041559,protein_coding -22643,Nop53,ENSMUSG00000041560,protein_coding -50687,L3mbtl4,ENSMUSG00000041565,protein_coding -47466,Tssk1,ENSMUSG00000041566,protein_coding -44622,Serpina12,ENSMUSG00000041567,protein_coding -2274,Camsap2,ENSMUSG00000041570,protein_coding -22642,Selenow,ENSMUSG00000041571,protein_coding -2193,Prelp,ENSMUSG00000041577,protein_coding -22639,Crx,ENSMUSG00000041578,protein_coding -22638,Obox6,ENSMUSG00000041583,protein_coding -39979,Sdk2,ENSMUSG00000041592,protein_coding -34422,Tmtc4,ENSMUSG00000041594,protein_coding -39977,Cdc42ep4,ENSMUSG00000041598,protein_coding -2268,Inava,ENSMUSG00000041605,protein_coding -53949,Mbp,ENSMUSG00000041607,protein_coding -37149,Entpd3,ENSMUSG00000041608,protein_coding -17855,Bicdl1,ENSMUSG00000041609,protein_coding -15389,Nppa,ENSMUSG00000041616,protein_coding -47451,Ccdc74a,ENSMUSG00000041617,protein_coding -34492,Mmp1b,ENSMUSG00000041620,protein_coding -39975,D11Wsu47e,ENSMUSG00000041623,protein_coding -34462,Gucy1a2,ENSMUSG00000041624,protein_coding -34420,Ggact,ENSMUSG00000041625,protein_coding -39973,Fam104a,ENSMUSG00000041629,protein_coding -42476,Mrps27,ENSMUSG00000041632,protein_coding -9631,Kctd12b,ENSMUSG00000041633,protein_coding -17850,Gcn1l1,ENSMUSG00000041638,protein_coding -2266,Kif21b,ENSMUSG00000041642,protein_coding -5654,Slc5a12,ENSMUSG00000041644,protein_coding -44592,Ddx24,ENSMUSG00000041645,protein_coding -9624,Klf8,ENSMUSG00000041649,protein_coding -34417,Pcca,ENSMUSG00000041650,protein_coding -46604,Pnpla3,ENSMUSG00000041653,protein_coding -39969,Slc39a11,ENSMUSG00000041654,protein_coding -9622,Rragb,ENSMUSG00000041658,protein_coding -5650,Bbox1,ENSMUSG00000041660,protein_coding -44584,Prima1,ENSMUSG00000041669,protein_coding -300,Rims1,ENSMUSG00000041670,protein_coding -21867,Pyroxd1,ENSMUSG00000041671,protein_coding -32549,Lrrc18,ENSMUSG00000041673,protein_coding -31356,Lrrc29,ENSMUSG00000041679,protein_coding -21866,Iapp,ENSMUSG00000041681,protein_coding -673,Bivm,ENSMUSG00000041684,protein_coding -42458,Fcho2,ENSMUSG00000041685,protein_coding -9466,Amot,ENSMUSG00000041688,protein_coding -39956,Kcnj2,ENSMUSG00000041695,protein_coding -35888,Rasl12,ENSMUSG00000041696,protein_coding -17834,Cox6a1,ENSMUSG00000041697,protein_coding -21857,Slco1a1,ENSMUSG00000041698,protein_coding -9465,Lhfpl1,ENSMUSG00000041700,protein_coding -44575,Btbd7,ENSMUSG00000041702,protein_coding -34411,Zic5,ENSMUSG00000041703,protein_coding -32544,Tmem273,ENSMUSG00000041707,protein_coding -46597,Mpped1,ENSMUSG00000041708,protein_coding -9454,Trpc5,ENSMUSG00000041710,protein_coding -44573,Ubr7,ENSMUSG00000041712,protein_coding -44569,Gm20604,ENSMUSG00000041716,protein_coding -9452,Alg13,ENSMUSG00000041718,protein_coding -47437,Pi4ka,ENSMUSG00000041720,protein_coding -288,Khdc1c,ENSMUSG00000041722,protein_coding -35814,Coro2b,ENSMUSG00000041729,protein_coding -32541,Prrxl1,ENSMUSG00000041730,protein_coding -54781,Pgm5,ENSMUSG00000041731,protein_coding -17827,Coq5,ENSMUSG00000041733,protein_coding -11064,Kirrel,ENSMUSG00000041734,protein_coding -15279,Gm13178,ENSMUSG00000041735,protein_coding -46594,Tspo,ENSMUSG00000041736,protein_coding -34953,Tmem45b,ENSMUSG00000041737,protein_coding -17825,Rnf10,ENSMUSG00000041740,protein_coding -21848,Pde3a,ENSMUSG00000041741,protein_coding -42440,Utp15,ENSMUSG00000041747,protein_coding -50369,Trem3,ENSMUSG00000041754,protein_coding -2168,Plekha6,ENSMUSG00000041757,protein_coding -4817,Gpr155,ENSMUSG00000041762,protein_coding -666,Tpp2,ENSMUSG00000041763,protein_coding -34397,Ubac2,ENSMUSG00000041765,protein_coding -26461,Ppp2r2d,ENSMUSG00000041769,protein_coding -44554,Slc24a4,ENSMUSG00000041771,protein_coding -42423,Enc1,ENSMUSG00000041773,protein_coding -47429,Ydjc,ENSMUSG00000041774,protein_coding -26460,Mapk1ip1,ENSMUSG00000041775,protein_coding -4815,Cir1,ENSMUSG00000041777,protein_coding -275,Tram2,ENSMUSG00000041779,protein_coding -44552,Cpsf2,ENSMUSG00000041781,protein_coding -2254,Lad1,ENSMUSG00000041782,protein_coding -21827,Capza3,ENSMUSG00000041791,protein_coding -37143,Myrip,ENSMUSG00000041794,protein_coding -39950,Abca9,ENSMUSG00000041797,protein_coding -37406,Gck,ENSMUSG00000041798,protein_coding -2252,Phlda3,ENSMUSG00000041801,protein_coding -15221,Pramel1,ENSMUSG00000041805,protein_coding -274,Efhc1,ENSMUSG00000041809,protein_coding -46578,Poldip3,ENSMUSG00000041815,protein_coding -42417,Fam169a,ENSMUSG00000041817,protein_coding -17808,Oasl1,ENSMUSG00000041827,protein_coding -39949,Abca8a,ENSMUSG00000041828,protein_coding -48822,Sytl3,ENSMUSG00000041831,protein_coding -26420,Ptpre,ENSMUSG00000041836,protein_coding -35884,Pdcd7,ENSMUSG00000041837,protein_coding -53859,Haus1,ENSMUSG00000041840,protein_coding -45322,Rpl37,ENSMUSG00000041841,protein_coding -11010,Fhdc1,ENSMUSG00000041842,protein_coding -54137,Rhod,ENSMUSG00000041845,protein_coding -44533,Ppp4r3a,ENSMUSG00000041846,protein_coding -45321,Card6,ENSMUSG00000041849,protein_coding -46569,Tcf20,ENSMUSG00000041852,protein_coding -54517,Oosp1,ENSMUSG00000041857,protein_coding -266,Mcm3,ENSMUSG00000041859,protein_coding -17798,Ankrd13a,ENSMUSG00000041870,protein_coding -265,Il17f,ENSMUSG00000041872,protein_coding -2244,Ipo9,ENSMUSG00000041879,protein_coding -49747,Ndufa7,ENSMUSG00000041881,protein_coding -25607,Olfr680-ps1,ENSMUSG00000041885,protein_coding -45282,Macc1,ENSMUSG00000041886,protein_coding -2243,Shisa4,ENSMUSG00000041889,protein_coding -17796,Git2,ENSMUSG00000041890,protein_coding -53684,Lman1,ENSMUSG00000041891,protein_coding -39939,Wipi1,ENSMUSG00000041895,protein_coding -642,Gpr45,ENSMUSG00000041907,protein_coding -4779,Dlx1,ENSMUSG00000041911,protein_coding -11415,Tdrkh,ENSMUSG00000041912,protein_coding -52998,Ammecr1l,ENSMUSG00000041915,protein_coding -39935,Slc16a6,ENSMUSG00000041920,protein_coding -4776,Metap1d,ENSMUSG00000041921,protein_coding -52918,Nol4,ENSMUSG00000041923,protein_coding -2239,Rnpep,ENSMUSG00000041926,protein_coding -17790,Fam222a,ENSMUSG00000041930,protein_coding -45305,AW549877,ENSMUSG00000041935,protein_coding -15621,Agrn,ENSMUSG00000041936,protein_coding -17788,Mvk,ENSMUSG00000041939,protein_coding -621,Mfsd9,ENSMUSG00000041945,protein_coding -31423,Tango6,ENSMUSG00000041949,protein_coding -15613,Tnfrsf18,ENSMUSG00000041954,protein_coding -47393,Pkp2,ENSMUSG00000041957,protein_coding -38946,Pigs,ENSMUSG00000041958,protein_coding -11362,S100a10,ENSMUSG00000041959,protein_coding -37387,Znrf3,ENSMUSG00000041961,protein_coding -4768,Dcaf17,ENSMUSG00000041966,protein_coding -47390,Spidr,ENSMUSG00000041974,protein_coding -4767,Mettl8,ENSMUSG00000041975,protein_coding -11075,Arhgef11,ENSMUSG00000041977,protein_coding -11353,Rptn,ENSMUSG00000041984,protein_coding -35616,Elmod1,ENSMUSG00000041986,protein_coding -11350,Hrnr,ENSMUSG00000041991,protein_coding -45261,Rapgef5,ENSMUSG00000041992,protein_coding -42374,Zbed3,ENSMUSG00000041995,protein_coding -4764,Tlk1,ENSMUSG00000041997,protein_coding -17782,Foxn4,ENSMUSG00000042002,protein_coding -17780,Acacb,ENSMUSG00000042010,protein_coding -42368,Wdr41,ENSMUSG00000042015,protein_coding -45251,Ncapg2,ENSMUSG00000042029,protein_coding -11328,Lce3b,ENSMUSG00000042031,protein_coding -37840,Mat2b,ENSMUSG00000042032,protein_coding -11695,Igsf3,ENSMUSG00000042035,protein_coding -54140,2010003K11Rik,ENSMUSG00000042041,protein_coding -21175,Csgalnact2,ENSMUSG00000042042,protein_coding -42366,Tbca,ENSMUSG00000042043,protein_coding -35614,Sln,ENSMUSG00000042045,protein_coding -2151,Dstyk,ENSMUSG00000042046,protein_coding -45245,Wdr60,ENSMUSG00000042050,protein_coding -26312,Wdr11,ENSMUSG00000042055,protein_coding -45238,Zfp386,ENSMUSG00000042063,protein_coding -4753,Myo3b,ENSMUSG00000042064,protein_coding -2149,Tmcc2,ENSMUSG00000042066,protein_coding -36744,Abhd14b,ENSMUSG00000042073,protein_coding -17774,Svop,ENSMUSG00000042078,protein_coding -21167,Hnrnpf,ENSMUSG00000042079,protein_coding -42344,Arsb,ENSMUSG00000042082,protein_coding -11312,Lce1c,ENSMUSG00000042092,protein_coding -17773,Dao,ENSMUSG00000042096,protein_coding -21165,Zfp239,ENSMUSG00000042097,protein_coding -49745,Kank3,ENSMUSG00000042099,protein_coding -42340,Dmgdh,ENSMUSG00000042102,protein_coding -34363,Uggt2,ENSMUSG00000042104,protein_coding -26296,Inpp5f,ENSMUSG00000042105,protein_coding -36821,Inka1,ENSMUSG00000042106,protein_coding -46500,Csdc2,ENSMUSG00000042109,protein_coding -461,Ccdc115,ENSMUSG00000042111,protein_coding -2146,Klhdc8a,ENSMUSG00000042115,protein_coding -15588,Vwa1,ENSMUSG00000042116,protein_coding -42339,Bhmt2,ENSMUSG00000042118,protein_coding -17772,Ssh1,ENSMUSG00000042121,protein_coding -11316,Lce1f,ENSMUSG00000042124,protein_coding -21145,Rassf4,ENSMUSG00000042129,protein_coding -4740,Ppig,ENSMUSG00000042133,protein_coding -35092,Msantd2,ENSMUSG00000042138,protein_coding -38445,Cox10,ENSMUSG00000042148,protein_coding -4746,Klhl23,ENSMUSG00000042155,protein_coding -34359,Dzip1,ENSMUSG00000042156,protein_coding -11294,Sprr2i,ENSMUSG00000042157,protein_coding -11296,Gm9774,ENSMUSG00000042165,protein_coding -42327,Tent2,ENSMUSG00000042167,protein_coding -26279,Armc5,ENSMUSG00000042178,protein_coding -55389,Pnliprp1,ENSMUSG00000042179,protein_coding -445,Bend6,ENSMUSG00000042182,protein_coding -17758,1700069L16Rik,ENSMUSG00000042184,protein_coding -34952,Nfrkb,ENSMUSG00000042185,protein_coding -38435,Tekt3,ENSMUSG00000042189,protein_coding -17756,Cmklr1,ENSMUSG00000042190,protein_coding -35613,Slc35f2,ENSMUSG00000042195,protein_coding -443,Zfp451,ENSMUSG00000042197,protein_coding -12786,Chchd7,ENSMUSG00000042198,protein_coding -38433,Cdrt4,ENSMUSG00000042200,protein_coding -15574,Slc35e2,ENSMUSG00000042202,protein_coding -49583,Tbc1d22b,ENSMUSG00000042203,protein_coding -2219,Kdm5b,ENSMUSG00000042207,protein_coding -37643,0610010F05Rik,ENSMUSG00000042208,protein_coding -36743,Abhd14a,ENSMUSG00000042210,protein_coding -53601,Fbxo38,ENSMUSG00000042211,protein_coding -11289,Sprr2d,ENSMUSG00000042212,protein_coding -21129,Zfand4,ENSMUSG00000042213,protein_coding -441,Bag2,ENSMUSG00000042215,protein_coding -17749,Sgsm1,ENSMUSG00000042216,protein_coding -25517,Olfr631,ENSMUSG00000042219,protein_coding -9435,Ammecr1,ENSMUSG00000042225,protein_coding -12776,Lyn,ENSMUSG00000042228,protein_coding -2214,Rabif,ENSMUSG00000042229,protein_coding -17739,Crybb2,ENSMUSG00000042240,protein_coding -41672,BC051665,ENSMUSG00000042243,protein_coding -11269,Pglyrp3,ENSMUSG00000042244,protein_coding -25947,Tmc7,ENSMUSG00000042246,protein_coding -55042,Cyp2c37,ENSMUSG00000042248,protein_coding -17737,Grk3,ENSMUSG00000042249,protein_coding -11260,Pglyrp4,ENSMUSG00000042250,protein_coding -2123,Pm20d1,ENSMUSG00000042251,protein_coding -35878,Cilp,ENSMUSG00000042254,protein_coding -50217,Ptchd4,ENSMUSG00000042256,protein_coding -42736,Isl1,ENSMUSG00000042258,protein_coding -37130,Ccr8,ENSMUSG00000042262,protein_coding -50367,Trem1,ENSMUSG00000042265,protein_coding -2122,Slc26a9,ENSMUSG00000042268,protein_coding -31637,Fam92b,ENSMUSG00000042269,protein_coding -9421,Nxt2,ENSMUSG00000042271,protein_coding -4905,Sestd1,ENSMUSG00000042272,protein_coding -42726,Pelo,ENSMUSG00000042275,protein_coding -21120,H1foo,ENSMUSG00000042279,protein_coding -9418,Gucy2f,ENSMUSG00000042282,protein_coding -42724,Itga1,ENSMUSG00000042284,protein_coding -32491,Stab1,ENSMUSG00000042286,protein_coding -26244,Hsd3b7,ENSMUSG00000042289,protein_coding -46467,Mrtfa,ENSMUSG00000042292,protein_coding -35517,Gm5617,ENSMUSG00000042293,protein_coding -38400,Ttc19,ENSMUSG00000042298,protein_coding -37609,Ehbp1,ENSMUSG00000042302,protein_coding -46465,Sgsm3,ENSMUSG00000042303,protein_coding -2207,Tmem183a,ENSMUSG00000042305,protein_coding -11244,S100a14,ENSMUSG00000042306,protein_coding -26241,Setd1a,ENSMUSG00000042308,protein_coding -11243,S100a13,ENSMUSG00000042312,protein_coding -44203,Prox2,ENSMUSG00000042320,protein_coding -32486,Pbrm1,ENSMUSG00000042323,protein_coding -17722,Hps4,ENSMUSG00000042328,protein_coding -38396,Specc1,ENSMUSG00000042331,protein_coding -15554,Tnfrsf14,ENSMUSG00000042333,protein_coding -26237,Ctf1,ENSMUSG00000042340,protein_coding -49635,Ubash3a,ENSMUSG00000042345,protein_coding -42707,Arl15,ENSMUSG00000042348,protein_coding -2108,Ikbke,ENSMUSG00000042349,protein_coding -44198,Arel1,ENSMUSG00000042350,protein_coding -46455,Grap2,ENSMUSG00000042351,protein_coding -30835,Frem3,ENSMUSG00000042353,protein_coding -32482,Gnl3,ENSMUSG00000042354,protein_coding -14738,Gjb5,ENSMUSG00000042357,protein_coding -4895,Osbpl6,ENSMUSG00000042359,protein_coding -37581,Lgalsl,ENSMUSG00000042363,protein_coding -42700,Snx18,ENSMUSG00000042364,protein_coding -14736,Gjb3,ENSMUSG00000042367,protein_coding -4894,Rbm45,ENSMUSG00000042369,protein_coding -38387,Slc5a10,ENSMUSG00000042371,protein_coding -54796,Dmrt3,ENSMUSG00000042372,protein_coding -38388,Fam83g,ENSMUSG00000042377,protein_coding -42692,Esm1,ENSMUSG00000042379,protein_coding -14732,Smim12,ENSMUSG00000042380,protein_coding -42691,Gzmk,ENSMUSG00000042385,protein_coding -9397,Tex13b,ENSMUSG00000042386,protein_coding -14731,Dlgap3,ENSMUSG00000042388,protein_coding -21099,Tsen2,ENSMUSG00000042389,protein_coding -11231,Gatad2b,ENSMUSG00000042390,protein_coding -35516,Rbm7,ENSMUSG00000042396,protein_coding -55098,Crtac1,ENSMUSG00000042401,protein_coding -11228,Dennd4b,ENSMUSG00000042404,protein_coding -46448,Atf4,ENSMUSG00000042406,protein_coding -14727,Zmym6,ENSMUSG00000042408,protein_coding -4887,Agps,ENSMUSG00000042410,protein_coding -156,Prdm14,ENSMUSG00000042414,protein_coding -42678,Ccno,ENSMUSG00000042417,protein_coding -49899,Nfkbil1,ENSMUSG00000042419,protein_coding -26218,Fbrs,ENSMUSG00000042423,protein_coding -9387,Frmpd3,ENSMUSG00000042425,protein_coding -42676,Dhx29,ENSMUSG00000042426,protein_coding -46443,Mgat3,ENSMUSG00000042428,protein_coding -2203,Adora1,ENSMUSG00000042429,protein_coding -9383,Pih1h3b,ENSMUSG00000042433,protein_coding -38380,Mfap4,ENSMUSG00000042436,protein_coding -53665,Zfp532,ENSMUSG00000042439,protein_coding -35983,Mindy2,ENSMUSG00000042444,protein_coding -14720,Zmym4,ENSMUSG00000042446,protein_coding -18983,Mios,ENSMUSG00000042447,protein_coding -4863,Hoxd1,ENSMUSG00000042448,protein_coding -2202,Mybph,ENSMUSG00000042451,protein_coding -15941,Reln,ENSMUSG00000042453,protein_coding -6538,Bpifa2,ENSMUSG00000042459,protein_coding -18977,C1galt1,ENSMUSG00000042460,protein_coding -26199,Dctpp1,ENSMUSG00000042462,protein_coding -44179,Zfp410,ENSMUSG00000042472,protein_coding -9374,Tbc1d8b,ENSMUSG00000042473,protein_coding -2095,Fcmr,ENSMUSG00000042474,protein_coding -15738,Abcb4,ENSMUSG00000042476,protein_coding -14714,Tfap2e,ENSMUSG00000042477,protein_coding -32475,Mustn1,ENSMUSG00000042485,protein_coding -36106,Leo1,ENSMUSG00000042487,protein_coding -14708,Clspn,ENSMUSG00000042489,protein_coding -26194,Tbc1d10b,ENSMUSG00000042492,protein_coding -34951,Prdm10,ENSMUSG00000042496,protein_coding -9370,Radx,ENSMUSG00000042498,protein_coding -4847,Hoxd11,ENSMUSG00000042499,protein_coding -14705,Ago4,ENSMUSG00000042500,protein_coding -127,Cpa6,ENSMUSG00000042501,protein_coding -26193,Cd2bp2,ENSMUSG00000042502,protein_coding -18966,Sdhaf3,ENSMUSG00000042505,protein_coding -38370,Usp22,ENSMUSG00000042506,protein_coding -44172,Elmsan1,ENSMUSG00000042507,protein_coding -15747,Dmtf1,ENSMUSG00000042508,protein_coding -2089,AA986860,ENSMUSG00000042510,protein_coding -52914,Klhl14,ENSMUSG00000042514,protein_coding -9365,Pwwp3b,ENSMUSG00000042515,protein_coding -11211,Ubap2l,ENSMUSG00000042520,protein_coding -44170,Dnal1,ENSMUSG00000042523,protein_coding -46421,Sun2,ENSMUSG00000042524,protein_coding -9363,4933428M09Rik,ENSMUSG00000042525,protein_coding -38367,Kcnj12,ENSMUSG00000042529,protein_coding -55097,Golga7b,ENSMUSG00000042532,protein_coding -46419,Gtpbp1,ENSMUSG00000042535,protein_coding -44167,Acot5,ENSMUSG00000042540,protein_coding -18952,Sem1,ENSMUSG00000042541,protein_coding -6521,Asxl1,ENSMUSG00000042548,protein_coding -2081,Zp3r,ENSMUSG00000042554,protein_coding -35687,Sin3a,ENSMUSG00000042557,protein_coding -14698,Adprhl2,ENSMUSG00000042558,protein_coding -46413,Fam227a,ENSMUSG00000042564,protein_coding -32183,Nek10,ENSMUSG00000042567,protein_coding -38361,Dhrs7b,ENSMUSG00000042569,protein_coding -28104,Mier2,ENSMUSG00000042570,protein_coding -11201,Ube2q1,ENSMUSG00000042572,protein_coding -2063,Thsd7b,ENSMUSG00000042581,protein_coding -18019,Cux2,ENSMUSG00000042589,protein_coding -42585,Ipo11,ENSMUSG00000042590,protein_coding -18014,Sh2b3,ENSMUSG00000042594,protein_coding -9336,Fam199x,ENSMUSG00000042595,protein_coding -248,Tfap2d,ENSMUSG00000042596,protein_coding -19384,Kdm7a,ENSMUSG00000042599,protein_coding -5621,Kcna4,ENSMUSG00000042604,protein_coding -18010,Atxn2,ENSMUSG00000042605,protein_coding -26162,Hirip3,ENSMUSG00000042606,protein_coding -18941,Asb4,ENSMUSG00000042607,protein_coding -14689,Stk40,ENSMUSG00000042608,protein_coding -11195,Pbxip1,ENSMUSG00000042613,protein_coding -14686,Oscp1,ENSMUSG00000042616,protein_coding -46400,Maff,ENSMUSG00000042622,protein_coding -50525,Safb2,ENSMUSG00000042625,protein_coding -11193,Shc1,ENSMUSG00000042626,protein_coding -44143,Zfyve1,ENSMUSG00000042628,protein_coding -6511,Xkr7,ENSMUSG00000042631,protein_coding -46398,Pla2g6,ENSMUSG00000042632,protein_coding -21782,Gucy2c,ENSMUSG00000042638,protein_coding -2518,Rgsl1,ENSMUSG00000042641,protein_coding -11191,Flad1,ENSMUSG00000042642,protein_coding -49458,Itpr3,ENSMUSG00000042644,protein_coding -18007,Acad12,ENSMUSG00000042647,protein_coding -38332,Alkbh5,ENSMUSG00000042650,protein_coding -42566,Shisal2b,ENSMUSG00000042655,protein_coding -24666,Arrdc4,ENSMUSG00000042659,protein_coding -53138,Wdr55,ENSMUSG00000042660,protein_coding -6507,Dusp15,ENSMUSG00000042662,protein_coding -5599,Immp1l,ENSMUSG00000042670,protein_coding -2510,Rgs8,ENSMUSG00000042671,protein_coding -11185,Dcst1,ENSMUSG00000042672,protein_coding -26151,Ypel3,ENSMUSG00000042675,protein_coding -14678,Zc3h12a,ENSMUSG00000042677,protein_coding -38331,Myo15,ENSMUSG00000042678,protein_coding -52907,Garem1,ENSMUSG00000042680,protein_coding -32452,Selenok,ENSMUSG00000042682,protein_coding -2508,Npl,ENSMUSG00000042684,protein_coding -218,Jph1,ENSMUSG00000042686,protein_coding -36102,Mapk6,ENSMUSG00000042688,protein_coding -55251,Stn1,ENSMUSG00000042694,protein_coding -2507,Dhx9,ENSMUSG00000042699,protein_coding -44128,Sipa1l1,ENSMUSG00000042700,protein_coding -53397,Commd10,ENSMUSG00000042705,protein_coding -14674,Dnali1,ENSMUSG00000042707,protein_coding -2504,Shcbp1l,ENSMUSG00000042708,protein_coding -38326,Atpaf2,ENSMUSG00000042709,protein_coding -9294,Tceal9,ENSMUSG00000042712,protein_coding -19036,Ppp1r3a,ENSMUSG00000042717,protein_coding -17994,Naa25,ENSMUSG00000042719,protein_coding -44121,Map3k9,ENSMUSG00000042724,protein_coding -17991,Trafd1,ENSMUSG00000042726,protein_coding -54379,Wdr74,ENSMUSG00000042729,protein_coding -44119,Ttc9,ENSMUSG00000042734,protein_coding -11176,Dpm3,ENSMUSG00000042737,protein_coding -19025,Bmt2,ENSMUSG00000042742,protein_coding -42557,Sgtb,ENSMUSG00000042743,protein_coding -17989,Hectd4,ENSMUSG00000042744,protein_coding -6498,Id1,ENSMUSG00000042745,protein_coding -11175,Krtcap2,ENSMUSG00000042747,protein_coding -9285,Bex2,ENSMUSG00000042750,protein_coding -2497,Nmnat2,ENSMUSG00000042751,protein_coding -36673,Tmem108,ENSMUSG00000042757,protein_coding -26134,Apobr,ENSMUSG00000042759,protein_coding -36329,Mrap2,ENSMUSG00000042761,protein_coding -14665,Maneal,ENSMUSG00000042763,protein_coding -11174,Trim46,ENSMUSG00000042766,protein_coding -21750,Hebp1,ENSMUSG00000042770,protein_coding -2493,Smg7,ENSMUSG00000042772,protein_coding -28091,Olfr1353,ENSMUSG00000042774,protein_coding -11173,Muc1,ENSMUSG00000042784,protein_coding -37115,Exog,ENSMUSG00000042787,protein_coding -13298,Fam166b,ENSMUSG00000042788,protein_coding -35463,Rnf214,ENSMUSG00000042790,protein_coding -2228,Lgr6,ENSMUSG00000042793,protein_coding -5079,Olfr1032,ENSMUSG00000042796,protein_coding -25235,Aqp11,ENSMUSG00000042797,protein_coding -2871,Spata46,ENSMUSG00000042800,protein_coding -29282,Olfr769,ENSMUSG00000042801,protein_coding -15507,Gpr153,ENSMUSG00000042804,protein_coding -814,Hecw2,ENSMUSG00000042807,protein_coding -44010,Gpx2,ENSMUSG00000042808,protein_coding -19652,Krba1,ENSMUSG00000042810,protein_coding -31660,Foxf1,ENSMUSG00000042812,protein_coding -6496,Mcts2,ENSMUSG00000042814,protein_coding -53313,Gpr151,ENSMUSG00000042816,protein_coding -18765,Flt3,ENSMUSG00000042817,protein_coding -6918,Snai1,ENSMUSG00000042821,protein_coding -38583,Fgf11,ENSMUSG00000042826,protein_coding -26263,Trim72,ENSMUSG00000042828,protein_coding -23352,Alkbh6,ENSMUSG00000042831,protein_coding -53036,Nrep,ENSMUSG00000042834,protein_coding -41035,Serpinb6b,ENSMUSG00000042842,protein_coding -6798,Wfdc12,ENSMUSG00000042845,protein_coding -27785,Lrrtm3,ENSMUSG00000042846,protein_coding -49109,Vmn1r226,ENSMUSG00000042848,protein_coding -1633,Olfr1414,ENSMUSG00000042849,protein_coding -6057,Zc3h6,ENSMUSG00000042851,protein_coding -6893,Trp53rkb,ENSMUSG00000042854,protein_coding -11872,Kcna10,ENSMUSG00000042861,protein_coding -5073,Olfr1026,ENSMUSG00000042863,protein_coding -40650,Olfr1370,ENSMUSG00000042869,protein_coding -30744,Tom1,ENSMUSG00000042870,protein_coding -21029,Lhfpl4,ENSMUSG00000042873,protein_coding -34199,Prr30,ENSMUSG00000042888,protein_coding -5340,Olfr1260,ENSMUSG00000042894,protein_coding -45795,Abra,ENSMUSG00000042895,protein_coding -3223,Aida,ENSMUSG00000042901,protein_coding -8768,Foxo4,ENSMUSG00000042903,protein_coding -25541,Olfr648,ENSMUSG00000042909,protein_coding -24031,Mamstr,ENSMUSG00000042918,protein_coding -52779,Greb1l,ENSMUSG00000042942,protein_coding -45353,Egflam,ENSMUSG00000042961,protein_coding -26115,Sbk1,ENSMUSG00000042978,protein_coding -18389,Upk3b,ENSMUSG00000042985,protein_coding -40250,Notum,ENSMUSG00000042988,protein_coding -21733,Borcs5,ENSMUSG00000042992,protein_coding -13133,Ifnk,ENSMUSG00000042993,protein_coding -10555,Nhlrc3,ENSMUSG00000042997,protein_coding -13771,Rasef,ENSMUSG00000043003,protein_coding -32273,Gng2,ENSMUSG00000043004,protein_coding -47553,Klhl6,ENSMUSG00000043008,protein_coding -36090,Onecut1,ENSMUSG00000043013,protein_coding -786,Nemp2,ENSMUSG00000043015,protein_coding -22692,Ptgir,ENSMUSG00000043017,protein_coding -2477,Edem3,ENSMUSG00000043019,protein_coding -12572,Wdr63,ENSMUSG00000043020,protein_coding -38744,Trpv3,ENSMUSG00000043029,protein_coding -18023,Ccdc63,ENSMUSG00000043036,protein_coding -47284,Tnp2,ENSMUSG00000043050,protein_coding -31859,Disc1,ENSMUSG00000043051,protein_coding -16180,Zfp513,ENSMUSG00000043059,protein_coding -43775,Fscb,ENSMUSG00000043060,protein_coding -43322,Tmem18,ENSMUSG00000043061,protein_coding -48040,Spice1,ENSMUSG00000043065,protein_coding -22337,Vmn1r66,ENSMUSG00000043066,protein_coding -34869,Dpy19l1,ENSMUSG00000043067,protein_coding -31846,Fam89a,ENSMUSG00000043068,protein_coding -25580,Usp17le,ENSMUSG00000043073,protein_coding -53554,Synpo,ENSMUSG00000043079,protein_coding -15184,Tmem82,ENSMUSG00000043085,protein_coding -34694,Olfr855,ENSMUSG00000043087,protein_coding -21048,Il17re,ENSMUSG00000043088,protein_coding -30564,Zfp866,ENSMUSG00000043090,protein_coding -46896,Tuba1c,ENSMUSG00000043091,protein_coding -38828,Hic1,ENSMUSG00000043099,protein_coding -4195,Qrfp,ENSMUSG00000043102,protein_coding -6191,Lrrn4,ENSMUSG00000043110,protein_coding -19559,Olfr448,ENSMUSG00000043119,protein_coding -44930,A530016L24Rik,ENSMUSG00000043122,protein_coding -20497,Mob1a,ENSMUSG00000043131,protein_coding -47250,Tmem186,ENSMUSG00000043140,protein_coding -46927,Aqp6,ENSMUSG00000043144,protein_coding -43431,Crppa,ENSMUSG00000043153,protein_coding -36608,Ppp2r3a,ENSMUSG00000043154,protein_coding -14407,Hpdl,ENSMUSG00000043155,protein_coding -33639,Arl11,ENSMUSG00000043157,protein_coding -19897,Pyurf,ENSMUSG00000043162,protein_coding -10142,Tmem212,ENSMUSG00000043164,protein_coding -11272,Lor,ENSMUSG00000043165,protein_coding -41503,Simc1,ENSMUSG00000043183,protein_coding -42145,Rfesd,ENSMUSG00000043190,protein_coding -14568,Zmpste24,ENSMUSG00000043207,protein_coding -19771,Hoxa6,ENSMUSG00000043219,protein_coding -5053,Olfr1009,ENSMUSG00000043226,protein_coding -1321,Fam124b,ENSMUSG00000043230,protein_coding -3512,Upf2,ENSMUSG00000043241,protein_coding -30665,Fam129c,ENSMUSG00000043243,protein_coding -31351,Exoc3l,ENSMUSG00000043251,protein_coding -12919,Tmem64,ENSMUSG00000043252,protein_coding -14929,Pigv,ENSMUSG00000043257,protein_coding -27859,Fam13c,ENSMUSG00000043259,protein_coding -24094,Uevld,ENSMUSG00000043262,protein_coding -3006,Ifi209,ENSMUSG00000043263,protein_coding -5078,Olfr1031,ENSMUSG00000043267,protein_coding -5188,Olfr1123,ENSMUSG00000043274,protein_coding -18424,Trim56,ENSMUSG00000043279,protein_coding -2520,Teddm1b,ENSMUSG00000043282,protein_coding -38362,Tmem11,ENSMUSG00000043284,protein_coding -49546,Pnpla1,ENSMUSG00000043286,protein_coding -36241,Mei4,ENSMUSG00000043289,protein_coding -22153,Zfp784,ENSMUSG00000043290,protein_coding -21789,Smco3,ENSMUSG00000043298,protein_coding -10851,B3galnt1,ENSMUSG00000043300,protein_coding -48700,Kcnj6,ENSMUSG00000043301,protein_coding -22428,Vmn1r75,ENSMUSG00000043308,protein_coding -25434,Olfr571,ENSMUSG00000043310,protein_coding -49885,D17H6S53E,ENSMUSG00000043311,protein_coding -50124,Olfr131,ENSMUSG00000043312,protein_coding -53193,Pcdhb19,ENSMUSG00000043313,protein_coding -38226,Olfr30,ENSMUSG00000043314,protein_coding -44579,Cox8c,ENSMUSG00000043319,protein_coding -17666,Fbrsl1,ENSMUSG00000043323,protein_coding -35240,Olfr975,ENSMUSG00000043331,protein_coding -14631,Rhbdl2,ENSMUSG00000043333,protein_coding -48193,Filip1l,ENSMUSG00000043336,protein_coding -4852,Hoxd9,ENSMUSG00000043342,protein_coding -25440,Olfr577,ENSMUSG00000043354,protein_coding -48228,Olfr187,ENSMUSG00000043357,protein_coding -25421,Olfr78,ENSMUSG00000043366,protein_coding -39775,Hexim2,ENSMUSG00000043372,protein_coding -14487,Olfr1342,ENSMUSG00000043383,protein_coding -9277,Gprasp1,ENSMUSG00000043384,protein_coding -13584,Olfr267,ENSMUSG00000043385,protein_coding -18677,Tmem130,ENSMUSG00000043388,protein_coding -47611,2510009E07Rik,ENSMUSG00000043391,protein_coding -43924,Gpr135,ENSMUSG00000043398,protein_coding -17535,Hfm1,ENSMUSG00000043410,protein_coding -15066,Usp48,ENSMUSG00000043411,protein_coding -3828,Otud1,ENSMUSG00000043415,protein_coding -32627,Lrit2,ENSMUSG00000043418,protein_coding -38544,Rnf227,ENSMUSG00000043419,protein_coding -19199,Hilpda,ENSMUSG00000043421,protein_coding -53325,Eif3j2,ENSMUSG00000043424,protein_coding -3214,Ccdc185,ENSMUSG00000043429,protein_coding -16334,Psapl1,ENSMUSG00000043430,protein_coding -22089,Leng9,ENSMUSG00000043432,protein_coding -39440,Epop,ENSMUSG00000043439,protein_coding -10716,Gpr149,ENSMUSG00000043441,protein_coding -49288,Pgp,ENSMUSG00000043445,protein_coding -38275,Gjc2,ENSMUSG00000043448,protein_coding -8296,Magea10,ENSMUSG00000043453,protein_coding -23640,Zfp536,ENSMUSG00000043456,protein_coding -53186,Pcdhb12,ENSMUSG00000043458,protein_coding -46358,Elfn2,ENSMUSG00000043460,protein_coding -10856,Sptssb,ENSMUSG00000043461,protein_coding -9325,Rab9b,ENSMUSG00000043463,protein_coding -2661,Zbtb37,ENSMUSG00000043467,protein_coding -11616,Adam30,ENSMUSG00000043468,protein_coding -11332,Lce3d,ENSMUSG00000043472,protein_coding -39596,Krt34,ENSMUSG00000043485,protein_coding -44169,Acot6,ENSMUSG00000043487,protein_coding -19815,Tril,ENSMUSG00000043496,protein_coding -46364,Lgals2,ENSMUSG00000043501,protein_coding -19679,Gimap5,ENSMUSG00000043505,protein_coding -17684,Hscb,ENSMUSG00000043510,protein_coding -9753,Rai2,ENSMUSG00000043518,protein_coding -11865,Olfr266,ENSMUSG00000043529,protein_coding -55281,Sorcs1,ENSMUSG00000043531,protein_coding -4110,Setx,ENSMUSG00000043535,protein_coding -49103,Vmn1r225,ENSMUSG00000043537,protein_coding -21908,Casc1,ENSMUSG00000043541,protein_coding -9919,Zc2hc1a,ENSMUSG00000043542,protein_coding -8662,Fam90a1b,ENSMUSG00000043549,protein_coding -45552,Fbxl7,ENSMUSG00000043556,protein_coding -49595,Mdga1,ENSMUSG00000043557,protein_coding -9170,Cldn34c4,ENSMUSG00000043569,protein_coding -14194,Pars2,ENSMUSG00000043572,protein_coding -6982,4930470P17Rik,ENSMUSG00000043583,protein_coding -36513,Pxylp1,ENSMUSG00000043587,protein_coding -50380,Unc5cl,ENSMUSG00000043592,protein_coding -38666,Zfp3,ENSMUSG00000043602,protein_coding -19573,Olfr13,ENSMUSG00000043605,protein_coding -34493,Mmp3,ENSMUSG00000043613,protein_coding -18350,Vps37d,ENSMUSG00000043614,protein_coding -15091,Ubxn10,ENSMUSG00000043621,protein_coding -794,1700019D03Rik,ENSMUSG00000043629,protein_coding -41398,Ecm2,ENSMUSG00000043631,protein_coding -13320,Fam221b,ENSMUSG00000043633,protein_coding -17177,Adamts3,ENSMUSG00000043635,protein_coding -55320,Rbm20,ENSMUSG00000043639,protein_coding -38303,Pld6,ENSMUSG00000043648,protein_coding -34864,Npsr1,ENSMUSG00000043659,protein_coding -30661,Tmem221,ENSMUSG00000043664,protein_coding -31057,Tox3,ENSMUSG00000043668,protein_coding -28215,Diras1,ENSMUSG00000043670,protein_coding -23615,Dpy19l3,ENSMUSG00000043671,protein_coding -42938,Kcns3,ENSMUSG00000043673,protein_coding -32590,Fam25c,ENSMUSG00000043681,protein_coding -50515,Fem1a,ENSMUSG00000043683,protein_coding -31648,1190005I06Rik,ENSMUSG00000043687,protein_coding -38787,Olfr399,ENSMUSG00000043692,protein_coding -14485,Olfr62,ENSMUSG00000043698,protein_coding -32412,Pde12,ENSMUSG00000043702,protein_coding -50768,Capn13,ENSMUSG00000043705,protein_coding -8396,Olfr1326-ps1,ENSMUSG00000043715,protein_coding -201,Rpl7,ENSMUSG00000043716,protein_coding -36719,Col6a6,ENSMUSG00000043719,protein_coding -6081,F830045P16Rik,ENSMUSG00000043727,protein_coding -17985,Ptpn11,ENSMUSG00000043733,protein_coding -50372,B430306N03Rik,ENSMUSG00000043740,protein_coding -49232,1520401A03Rik,ENSMUSG00000043747,protein_coding -13963,Dmrta1,ENSMUSG00000043753,protein_coding -256,Pkhd1,ENSMUSG00000043760,protein_coding -49226,Bicdl2,ENSMUSG00000043782,protein_coding -29702,Defb12,ENSMUSG00000043787,protein_coding -54465,Vwce,ENSMUSG00000043789,protein_coding -26645,Prr33,ENSMUSG00000043795,protein_coding -49881,Ly6g5b,ENSMUSG00000043807,protein_coding -47476,Rtn4r,ENSMUSG00000043811,protein_coding -28168,Adamtsl5,ENSMUSG00000043822,protein_coding -50063,Olfr94,ENSMUSG00000043827,protein_coding -24633,Lysmd4,ENSMUSG00000043831,protein_coding -21288,Clec4a3,ENSMUSG00000043832,protein_coding -23107,Tmem145,ENSMUSG00000043843,protein_coding -10658,Clrn1,ENSMUSG00000043850,protein_coding -25731,Olfr479,ENSMUSG00000043855,protein_coding -40111,Mgat5b,ENSMUSG00000043857,protein_coding -19143,Tas2r118,ENSMUSG00000043865,protein_coding -25645,Taf10,ENSMUSG00000043866,protein_coding -14725,Zmym1,ENSMUSG00000043872,protein_coding -11843,Chil5,ENSMUSG00000043873,protein_coding -38240,Olfr323,ENSMUSG00000043880,protein_coding -34014,Kbtbd7,ENSMUSG00000043881,protein_coding -34635,Slc36a4,ENSMUSG00000043885,protein_coding -5063,Olfr1018,ENSMUSG00000043892,protein_coding -34759,S1pr2,ENSMUSG00000043895,protein_coding -10738,Vmn2r2,ENSMUSG00000043897,protein_coding -31695,Zfp469,ENSMUSG00000043903,protein_coding -5901,Trp53bp1,ENSMUSG00000043909,protein_coding -35169,Olfr922,ENSMUSG00000043911,protein_coding -17866,Ccdc60,ENSMUSG00000043913,protein_coding -35389,Ccdc84,ENSMUSG00000043923,protein_coding -14987,Ncmap,ENSMUSG00000043924,protein_coding -25395,Olfr544,ENSMUSG00000043925,protein_coding -8657,Klhl15,ENSMUSG00000043929,protein_coding -19676,Gimap7,ENSMUSG00000043931,protein_coding -21566,Klri2,ENSMUSG00000043932,protein_coding -50366,A530064D06Rik,ENSMUSG00000043939,protein_coding -17436,Wdfy3,ENSMUSG00000043940,protein_coding -34653,Naalad2,ENSMUSG00000043943,protein_coding -45012,Adam6a,ENSMUSG00000043945,protein_coding -25622,Olfr691,ENSMUSG00000043948,protein_coding -36987,Ccrl2,ENSMUSG00000043953,protein_coding -14693,Thrap3,ENSMUSG00000043962,protein_coding -26240,Orai3,ENSMUSG00000043964,protein_coding -55409,Emx2,ENSMUSG00000043969,protein_coding -50218,Opn5,ENSMUSG00000043972,protein_coding -48532,Krtap19-4,ENSMUSG00000043982,protein_coding -41643,Spata31d1d,ENSMUSG00000043986,protein_coding -35457,Cep164,ENSMUSG00000043987,protein_coding -53119,Pura,ENSMUSG00000043991,protein_coding -43845,Mgat2,ENSMUSG00000043998,protein_coding -37740,Gpr75,ENSMUSG00000043999,protein_coding -29208,Gls2,ENSMUSG00000044005,protein_coding -30569,Cilp2,ENSMUSG00000044006,protein_coding -30506,Npy5r,ENSMUSG00000044014,protein_coding -18214,Adgrd1,ENSMUSG00000044017,protein_coding -13455,Mrpl50,ENSMUSG00000044018,protein_coding -46759,Muc19,ENSMUSG00000044021,protein_coding -53195,Pcdhb21,ENSMUSG00000044022,protein_coding -53235,Rell2,ENSMUSG00000044024,protein_coding -29305,Olfr790,ENSMUSG00000044025,protein_coding -55018,Slc35g1,ENSMUSG00000044026,protein_coding -48220,Olfr178,ENSMUSG00000044029,protein_coding -22775,Irf2bp1,ENSMUSG00000044030,protein_coding -4903,Ccdc141,ENSMUSG00000044033,protein_coding -40241,Npb,ENSMUSG00000044034,protein_coding -36975,Als2cl,ENSMUSG00000044037,protein_coding -5676,Olfr1288,ENSMUSG00000044039,protein_coding -54645,Olfr1497,ENSMUSG00000044040,protein_coding -39603,Krt13,ENSMUSG00000044041,protein_coding -5733,Fmn1,ENSMUSG00000044042,protein_coding -53188,Pcdhb14,ENSMUSG00000044043,protein_coding -39662,Ccr10,ENSMUSG00000044052,protein_coding -1644,Otos,ENSMUSG00000044055,protein_coding -37771,Efcab9,ENSMUSG00000044056,protein_coding -37572,Cep68,ENSMUSG00000044066,protein_coding -43351,Gpr22,ENSMUSG00000044067,protein_coding -37628,Zrsr1,ENSMUSG00000044068,protein_coding -29097,Tafa2,ENSMUSG00000044071,protein_coding -37722,Eml6,ENSMUSG00000044072,protein_coding -11242,S100a1,ENSMUSG00000044080,protein_coding -6530,Efcab8,ENSMUSG00000044083,protein_coding -38586,Spem2,ENSMUSG00000044084,protein_coding -20863,Lmod3,ENSMUSG00000044086,protein_coding -18546,C130050O18Rik,ENSMUSG00000044092,protein_coding -11771,Rsbn1,ENSMUSG00000044098,protein_coding -3894,Il1f9,ENSMUSG00000044103,protein_coding -34716,Olfr868,ENSMUSG00000044106,protein_coding -47353,2900011O08Rik,ENSMUSG00000044117,protein_coding -25612,Olfr683,ENSMUSG00000044120,protein_coding -8487,5430402E10Rik,ENSMUSG00000044121,protein_coding -38937,Proca1,ENSMUSG00000044122,protein_coding -18018,Pheta1,ENSMUSG00000044134,protein_coding -26250,Prss53,ENSMUSG00000044139,protein_coding -6007,1810024B03Rik,ENSMUSG00000044145,protein_coding -43860,Arf6,ENSMUSG00000044147,protein_coding -7487,1810030O07Rik,ENSMUSG00000044148,protein_coding -7772,Nkrf,ENSMUSG00000044149,protein_coding -9727,Bclaf3,ENSMUSG00000044150,protein_coding -19094,Lsm8,ENSMUSG00000044155,protein_coding -18900,Hepacam2,ENSMUSG00000044156,protein_coding -20006,Tnip3,ENSMUSG00000044162,protein_coding -41290,Rnf182,ENSMUSG00000044164,protein_coding -11766,Bcl2l15,ENSMUSG00000044165,protein_coding -10533,Foxo1,ENSMUSG00000044167,protein_coding -37984,Olfr1384,ENSMUSG00000044170,protein_coding -49334,Ptx4,ENSMUSG00000044172,protein_coding -53602,Spink10,ENSMUSG00000044176,protein_coding -39294,Wfikkn2,ENSMUSG00000044177,protein_coding -33814,Nkx2-6,ENSMUSG00000044186,protein_coding -18545,Gpr146,ENSMUSG00000044197,protein_coding -28259,S1pr4,ENSMUSG00000044199,protein_coding -53063,Cdc25c,ENSMUSG00000044201,protein_coding -35250,Olfr983,ENSMUSG00000044205,protein_coding -8703,Vsig4,ENSMUSG00000044206,protein_coding -5146,Olfr1094,ENSMUSG00000044213,protein_coding -46407,Kcnj4,ENSMUSG00000044216,protein_coding -46926,Aqp5,ENSMUSG00000044217,protein_coding -55124,Nkx2-3,ENSMUSG00000044220,protein_coding -17165,Grsf1,ENSMUSG00000044221,protein_coding -29830,Defb13,ENSMUSG00000044222,protein_coding -45394,Dnajc21,ENSMUSG00000044224,protein_coding -48547,Gm9789,ENSMUSG00000044227,protein_coding -35511,Nxpe4,ENSMUSG00000044229,protein_coding -41350,Nhlrc1,ENSMUSG00000044231,protein_coding -38879,Bhlha9,ENSMUSG00000044243,protein_coding -36594,Il20rb,ENSMUSG00000044244,protein_coding -20732,Vmn1r45,ENSMUSG00000044248,protein_coding -6484,Defb29,ENSMUSG00000044249,protein_coding -46823,Pced1b,ENSMUSG00000044250,protein_coding -52824,Osbpl1a,ENSMUSG00000044252,protein_coding -14186,Pcsk9,ENSMUSG00000044254,protein_coding -41681,Ctla2a,ENSMUSG00000044258,protein_coding -25539,Olfm5,ENSMUSG00000044265,protein_coding -50553,Crb3,ENSMUSG00000044279,protein_coding -33143,Olfr221,ENSMUSG00000044286,protein_coding -31390,Nrn1l,ENSMUSG00000044287,protein_coding -13106,Cnr1,ENSMUSG00000044288,protein_coding -35243,Olfr978,ENSMUSG00000044292,protein_coding -29309,Olfr794,ENSMUSG00000044293,protein_coding -47018,Krt84,ENSMUSG00000044294,protein_coding -38027,Zfp879,ENSMUSG00000044296,protein_coding -13957,Cdkn2a,ENSMUSG00000044303,protein_coding -4751,Ubr3,ENSMUSG00000044308,protein_coding -46311,Apol7c,ENSMUSG00000044309,protein_coding -27731,Neurog3,ENSMUSG00000044312,protein_coding -11704,Mab21l3,ENSMUSG00000044313,protein_coding -22790,Gpr4,ENSMUSG00000044317,protein_coding -4167,1700001O22Rik,ENSMUSG00000044320,protein_coding -52888,Dsc1,ENSMUSG00000044322,protein_coding -38902,Trp53i13,ENSMUSG00000044328,protein_coding -1581,Ackr3,ENSMUSG00000044337,protein_coding -5021,Aplnr,ENSMUSG00000044338,protein_coding -17778,Alkbh2,ENSMUSG00000044339,protein_coding -1814,Phlpp1,ENSMUSG00000044340,protein_coding -55093,Marveld1,ENSMUSG00000044345,protein_coding -9333,Slc25a53,ENSMUSG00000044348,protein_coding -6696,Snhg11,ENSMUSG00000044349,protein_coding -33970,Lacc1,ENSMUSG00000044350,protein_coding -38107,Sowaha,ENSMUSG00000044352,protein_coding -8749,P2ry4,ENSMUSG00000044359,protein_coding -46229,BC024139,ENSMUSG00000044361,protein_coding -25130,Ccdc89,ENSMUSG00000044362,protein_coding -6449,Tmem74b,ENSMUSG00000044364,protein_coding -12366,Cxxc4,ENSMUSG00000044365,protein_coding -38619,Slc16a13,ENSMUSG00000044367,protein_coding -50751,Pcare,ENSMUSG00000044375,protein_coding -21806,Slc15a5,ENSMUSG00000044378,protein_coding -54245,Tigd3,ENSMUSG00000044390,protein_coding -52896,Dsg2,ENSMUSG00000044393,protein_coding -7776,Sowahd,ENSMUSG00000044400,protein_coding -6704,Adig,ENSMUSG00000044405,protein_coding -43616,Sptssa,ENSMUSG00000044408,protein_coding -1060,Cryga,ENSMUSG00000044429,protein_coding -23841,Klk12,ENSMUSG00000044430,protein_coding -29383,Camsap3,ENSMUSG00000044433,protein_coding -54576,Olfr1442,ENSMUSG00000044441,protein_coding -48492,N6amt1,ENSMUSG00000044442,protein_coding -41538,Pfn3,ENSMUSG00000044444,protein_coding -33796,Dock5,ENSMUSG00000044447,protein_coding -23616,Zfp507,ENSMUSG00000044452,protein_coding -23406,Ffar1,ENSMUSG00000044453,protein_coding -34715,Olfr867,ENSMUSG00000044454,protein_coding -44557,Rin3,ENSMUSG00000044456,protein_coding -33613,Shisa2,ENSMUSG00000044461,protein_coding -25624,Fam160a2,ENSMUSG00000044465,protein_coding -11671,Tent5c,ENSMUSG00000044468,protein_coding -50508,Tnfaip8l1,ENSMUSG00000044469,protein_coding -27679,Ascc1,ENSMUSG00000044475,protein_coding -49601,Zfand3,ENSMUSG00000044477,protein_coding -23864,Klk1b11,ENSMUSG00000044485,protein_coding -5168,Olfr1112,ENSMUSG00000044487,protein_coding -15387,2510039O18Rik,ENSMUSG00000044496,protein_coding -27287,Hs3st5,ENSMUSG00000044499,protein_coding -49179,Zfp758,ENSMUSG00000044501,protein_coding -37750,Bod1,ENSMUSG00000044502,protein_coding -11413,Lingo4,ENSMUSG00000044505,protein_coding -14337,Foxe3,ENSMUSG00000044518,protein_coding -32567,Zfp488,ENSMUSG00000044519,protein_coding -50523,Znrf4,ENSMUSG00000044526,protein_coding -12183,Tram1l1,ENSMUSG00000044528,protein_coding -49310,Rps2,ENSMUSG00000044533,protein_coding -37199,Ackr2,ENSMUSG00000044534,protein_coding -38034,Prop1,ENSMUSG00000044542,protein_coding -43911,Dact1,ENSMUSG00000044548,protein_coding -9316,Tceal3,ENSMUSG00000044550,protein_coding -33851,9930012K11Rik,ENSMUSG00000044551,protein_coding -14366,Tex38,ENSMUSG00000044556,protein_coding -5694,Olfr1302,ENSMUSG00000044560,protein_coding -24028,Rasip1,ENSMUSG00000044562,protein_coding -41176,Cage1,ENSMUSG00000044566,protein_coding -43324,Acp1,ENSMUSG00000044573,protein_coding -16131,Garem2,ENSMUSG00000044576,protein_coding -28305,4932415D10Rik,ENSMUSG00000044581,protein_coding -9840,Tlr7,ENSMUSG00000044583,protein_coding -1829,Serpinb3a,ENSMUSG00000044594,protein_coding -53139,Dnd1,ENSMUSG00000044595,protein_coding -7305,Mycs,ENSMUSG00000044597,protein_coding -30721,Smim7,ENSMUSG00000044600,protein_coding -29180,Zbtb39,ENSMUSG00000044617,protein_coding -26968,Gm4922,ENSMUSG00000044624,protein_coding -47622,Liph,ENSMUSG00000044626,protein_coding -4216,Swi5,ENSMUSG00000044627,protein_coding -3943,Rnf208,ENSMUSG00000044628,protein_coding -37527,Cnrip1,ENSMUSG00000044629,protein_coding -46971,Csrnp2,ENSMUSG00000044636,protein_coding -6936,Pard6b,ENSMUSG00000044641,protein_coding -53833,Zbtb7c,ENSMUSG00000044646,protein_coding -4678,Csrnp3,ENSMUSG00000044647,protein_coding -39553,Krtap4-2,ENSMUSG00000044649,protein_coding -36968,Prss42,ENSMUSG00000044664,protein_coding -12067,Plppr4,ENSMUSG00000044667,protein_coding -15671,Fzd1,ENSMUSG00000044674,protein_coding -31486,Zfp612,ENSMUSG00000044676,protein_coding -46157,Ly6k,ENSMUSG00000044678,protein_coding -16098,Cnpy1,ENSMUSG00000044681,protein_coding -15449,Tmem201,ENSMUSG00000044700,protein_coding -26135,Il27,ENSMUSG00000044701,protein_coding -26048,Palb2,ENSMUSG00000044702,protein_coding -33604,Phf11a,ENSMUSG00000044703,protein_coding -25594,Olfr670,ENSMUSG00000044705,protein_coding -37869,Ccnjl,ENSMUSG00000044707,protein_coding -2965,Kcnj10,ENSMUSG00000044708,protein_coding -22818,Gemin7,ENSMUSG00000044709,protein_coding -43949,Slc38a6,ENSMUSG00000044712,protein_coding -44673,Gskip,ENSMUSG00000044715,protein_coding -16313,Dok7,ENSMUSG00000044716,protein_coding -53133,E230025N22Rik,ENSMUSG00000044719,protein_coding -54114,Gpr152,ENSMUSG00000044724,protein_coding -45648,Erich5,ENSMUSG00000044726,protein_coding -14669,9930104L06Rik,ENSMUSG00000044730,protein_coding -41031,Serpinb1a,ENSMUSG00000044734,protein_coding -23836,Klk14,ENSMUSG00000044737,protein_coding -29823,Defb10,ENSMUSG00000044743,protein_coding -29819,Defb1,ENSMUSG00000044748,protein_coding -39951,Abca6,ENSMUSG00000044749,protein_coding -48165,Trmt10c,ENSMUSG00000044763,protein_coding -1732,D1Ertd622e,ENSMUSG00000044768,protein_coding -27405,Scml4,ENSMUSG00000044770,protein_coding -32153,Sntn,ENSMUSG00000044772,protein_coding -1547,Hjurp,ENSMUSG00000044783,protein_coding -23244,Zfp36,ENSMUSG00000044786,protein_coding -39779,Spata32,ENSMUSG00000044787,protein_coding -40015,Fads6,ENSMUSG00000044788,protein_coding -36958,Setd2,ENSMUSG00000044791,protein_coding -41647,Isca1,ENSMUSG00000044792,protein_coding -38547,Cyb5d1,ENSMUSG00000044795,protein_coding -35170,Olfr923,ENSMUSG00000044798,protein_coding -13329,Olfr159,ENSMUSG00000044801,protein_coding -38026,Zfp354c,ENSMUSG00000044807,protein_coding -40001,Cd300c2,ENSMUSG00000044811,protein_coding -13376,Shb,ENSMUSG00000044813,protein_coding -25394,Olfr543,ENSMUSG00000044814,protein_coding -1062,D630023F18Rik,ENSMUSG00000044816,protein_coding -34371,Oxgr1,ENSMUSG00000044819,protein_coding -35637,AY074887,ENSMUSG00000044820,protein_coding -25396,Olfr545,ENSMUSG00000044824,protein_coding -16740,Tlr1,ENSMUSG00000044827,protein_coding -2678,Ankrd45,ENSMUSG00000044835,protein_coding -37902,Lsm11,ENSMUSG00000044847,protein_coding -3236,1700056E22Rik,ENSMUSG00000044854,protein_coding -49464,Lemd2,ENSMUSG00000044857,protein_coding -36562,Gm1123,ENSMUSG00000044860,protein_coding -6490,Defb36,ENSMUSG00000044863,protein_coding -10360,Ankrd50,ENSMUSG00000044864,protein_coding -22197,Zfp444,ENSMUSG00000044876,protein_coding -25323,Coa4,ENSMUSG00000044881,protein_coding -38101,Uqcrq,ENSMUSG00000044894,protein_coding -29342,Olfr821,ENSMUSG00000044897,protein_coding -25542,Olfr649,ENSMUSG00000044899,protein_coding -22754,Psg22,ENSMUSG00000044903,protein_coding -53769,4930503L19Rik,ENSMUSG00000044906,protein_coding -43965,Syt16,ENSMUSG00000044912,protein_coding -5426,1700029I15Rik,ENSMUSG00000044916,protein_coding -28730,Rassf9,ENSMUSG00000044921,protein_coding -5077,Olfr1030,ENSMUSG00000044923,protein_coding -20673,H1fx,ENSMUSG00000044927,protein_coding -46353,Sstr3,ENSMUSG00000044933,protein_coding -41793,Zfp367,ENSMUSG00000044934,protein_coding -28424,Ttc41,ENSMUSG00000044937,protein_coding -36149,Klhl31,ENSMUSG00000044938,protein_coding -55262,Cfap43,ENSMUSG00000044948,protein_coding -37770,Ubtd2,ENSMUSG00000044949,protein_coding -37875,Pwwp2a,ENSMUSG00000044950,protein_coding -41027,Mylk4,ENSMUSG00000044951,protein_coding -25219,Kctd21,ENSMUSG00000044952,protein_coding -50444,Pp2d1,ENSMUSG00000044957,protein_coding -37524,Fbxo48,ENSMUSG00000044966,protein_coding -15934,Napepld,ENSMUSG00000044968,protein_coding -36080,Wdr72,ENSMUSG00000044976,protein_coding -53004,Sft2d3,ENSMUSG00000044982,protein_coding -50107,Olfr124,ENSMUSG00000044985,protein_coding -46345,Tst,ENSMUSG00000044986,protein_coding -40336,Ucn3,ENSMUSG00000044988,protein_coding -6183,Shld1,ENSMUSG00000044991,protein_coding -54543,Olfr1426,ENSMUSG00000044994,protein_coding -15161,Spata21,ENSMUSG00000045004,protein_coding -1047,Fzd5,ENSMUSG00000045005,protein_coding -39659,Tubg2,ENSMUSG00000045007,protein_coding -21051,Prrt3,ENSMUSG00000045009,protein_coding -8789,Gm4779,ENSMUSG00000045010,protein_coding -25690,Olfr711,ENSMUSG00000045013,protein_coding -50542,Acer1,ENSMUSG00000045019,protein_coding -42467,1700024P04Rik,ENSMUSG00000045022,protein_coding -49260,Prss22,ENSMUSG00000045027,protein_coding -54577,Olfr1443,ENSMUSG00000045030,protein_coding -10322,Cetn4,ENSMUSG00000045031,protein_coding -42315,Ankrd34b,ENSMUSG00000045034,protein_coding -50630,Tmem232,ENSMUSG00000045036,protein_coding -50966,Prkce,ENSMUSG00000045038,protein_coding -23108,Megf8,ENSMUSG00000045039,protein_coding -54142,Lrfn4,ENSMUSG00000045045,protein_coding -55423,Prlhr,ENSMUSG00000045052,protein_coding -50911,Kcng3,ENSMUSG00000045053,protein_coding -53179,Pcdhb7,ENSMUSG00000045062,protein_coding -44214,Zc2hc1c,ENSMUSG00000045064,protein_coding -39932,9930022D16Rik,ENSMUSG00000045065,protein_coding -13604,E130308A19Rik,ENSMUSG00000045071,protein_coding -18616,Rnf216,ENSMUSG00000045078,protein_coding -13139,Lingo2,ENSMUSG00000045083,protein_coding -34779,S1pr5,ENSMUSG00000045087,protein_coding -1659,Aqp12,ENSMUSG00000045091,protein_coding -12013,S1pr1,ENSMUSG00000045092,protein_coding -53582,Arhgef37,ENSMUSG00000045094,protein_coding -20826,Magi1,ENSMUSG00000045095,protein_coding -54083,Kmt5b,ENSMUSG00000045098,protein_coding -20833,Slc25a26,ENSMUSG00000045100,protein_coding -16276,Poln,ENSMUSG00000045102,protein_coding -8537,Dmd,ENSMUSG00000045103,protein_coding -5578,Ccdc73,ENSMUSG00000045106,protein_coding -32275,Saysd1,ENSMUSG00000045107,protein_coding -39554,Krtap4-7,ENSMUSG00000045109,protein_coding -27077,Taar6,ENSMUSG00000045111,protein_coding -26178,Prrt2,ENSMUSG00000045114,protein_coding -54586,Olfr1445,ENSMUSG00000045126,protein_coding -30633,Rpl18a,ENSMUSG00000045128,protein_coding -25490,Olfr620,ENSMUSG00000045132,protein_coding -41079,Tubb2b,ENSMUSG00000045136,protein_coding -39129,Pigw,ENSMUSG00000045140,protein_coding -5334,Olfr1255,ENSMUSG00000045148,protein_coding -5237,Olfr1161,ENSMUSG00000045150,protein_coding -20498,Bola3,ENSMUSG00000045160,protein_coding -26187,AI467606,ENSMUSG00000045165,protein_coding -473,Amer3,ENSMUSG00000045174,protein_coding -38525,Borcs6,ENSMUSG00000045176,protein_coding -8141,Sox3,ENSMUSG00000045179,protein_coding -9610,Shroom2,ENSMUSG00000045180,protein_coding -28154,Cirbp,ENSMUSG00000045193,protein_coding -32193,Lrrc3b,ENSMUSG00000045201,protein_coding -50106,Olfr123,ENSMUSG00000045202,protein_coding -34673,Olfr835,ENSMUSG00000045204,protein_coding -12864,Dpy19l4,ENSMUSG00000045205,protein_coding -108,Vcpip1,ENSMUSG00000045210,protein_coding -33872,Nudt18,ENSMUSG00000045211,protein_coding -52917,Asxl3,ENSMUSG00000045215,protein_coding -500,Hs6st1,ENSMUSG00000045216,protein_coding -5226,Olfr1152,ENSMUSG00000045225,protein_coding -30906,Rln3,ENSMUSG00000045232,protein_coding -26636,Krtap5-4,ENSMUSG00000045236,protein_coding -8250,1110012L19Rik,ENSMUSG00000045237,protein_coding -31620,Kcng4,ENSMUSG00000045246,protein_coding -30717,Med26,ENSMUSG00000045248,protein_coding -26212,Zfp688,ENSMUSG00000045251,protein_coding -23093,Zfp574,ENSMUSG00000045252,protein_coding -50876,Morn2,ENSMUSG00000045257,protein_coding -2914,Klhdc9,ENSMUSG00000045259,protein_coding -45617,Tas2r119,ENSMUSG00000045267,protein_coding -14514,Zfp691,ENSMUSG00000045268,protein_coding -42512,Cenph,ENSMUSG00000045273,protein_coding -48724,Lca5l,ENSMUSG00000045275,protein_coding -46132,Gpr20,ENSMUSG00000045281,protein_coding -22119,Tmem86b,ENSMUSG00000045282,protein_coding -7903,Dcaf12l1,ENSMUSG00000045284,protein_coding -38834,Rtn4rl1,ENSMUSG00000045287,protein_coding -40017,Ush1g,ENSMUSG00000045288,protein_coding -21772,E330021D16Rik,ENSMUSG00000045291,protein_coding -16095,Insig1,ENSMUSG00000045294,protein_coding -16164,Preb,ENSMUSG00000045302,protein_coding -33042,Olfr734,ENSMUSG00000045306,protein_coding -42351,Lhfpl2,ENSMUSG00000045312,protein_coding -17261,Sowahb,ENSMUSG00000045314,protein_coding -49321,Fahd1,ENSMUSG00000045316,protein_coding -16315,Adra2c,ENSMUSG00000045318,protein_coding -3513,Proser2,ENSMUSG00000045319,protein_coding -36735,Tlr9,ENSMUSG00000045322,protein_coding -11953,Fndc7,ENSMUSG00000045326,protein_coding -12381,Cenpe,ENSMUSG00000045328,protein_coding -8162,4933402E13Rik,ENSMUSG00000045330,protein_coding -48515,2310079G19Rik,ENSMUSG00000045331,protein_coding -31031,Zfp423,ENSMUSG00000045333,protein_coding -856,Hsfy2,ENSMUSG00000045336,protein_coding -29825,Defb11,ENSMUSG00000045337,protein_coding -22353,Vmn1r70,ENSMUSG00000045340,protein_coding -47540,Olfr167,ENSMUSG00000045341,protein_coding -18466,Nyap1,ENSMUSG00000045348,protein_coding -15077,Sh2d5,ENSMUSG00000045349,protein_coding -46949,Fam186a,ENSMUSG00000045350,protein_coding -26694,Tnfrsf26,ENSMUSG00000045362,protein_coding -13946,Ifne,ENSMUSG00000045364,protein_coding -38842,Wdr81,ENSMUSG00000045374,protein_coding -38549,Tmem88,ENSMUSG00000045377,protein_coding -49550,Pxt1,ENSMUSG00000045378,protein_coding -3022,Olfr433,ENSMUSG00000045381,protein_coding -2059,Cxcr4,ENSMUSG00000045382,protein_coding -5080,Olfr1033,ENSMUSG00000045392,protein_coding -51000,Epcam,ENSMUSG00000045394,protein_coding -54647,Olfr1499,ENSMUSG00000045395,protein_coding -44502,Kcnk13,ENSMUSG00000045404,protein_coding -50004,Trim39,ENSMUSG00000045409,protein_coding -40356,Akr1e1,ENSMUSG00000045410,protein_coding -23898,2410002F23Rik,ENSMUSG00000045411,protein_coding -36462,Dipk2a,ENSMUSG00000045414,protein_coding -49113,Vmn1r230,ENSMUSG00000045417,protein_coding -37977,Olfr1390,ENSMUSG00000045421,protein_coding -9232,Hnrnph2,ENSMUSG00000045427,protein_coding -15914,Tmem60,ENSMUSG00000045435,protein_coding -18541,Cox19,ENSMUSG00000045438,protein_coding -43659,Insm2,ENSMUSG00000045440,protein_coding -19948,Gprin3,ENSMUSG00000045441,protein_coding -19642,Zfp956,ENSMUSG00000045466,protein_coding -24867,Ttll13,ENSMUSG00000045467,protein_coding -39642,Hcrt,ENSMUSG00000045471,protein_coding -40653,Olfr1368,ENSMUSG00000045474,protein_coding -11330,Lce3c,ENSMUSG00000045475,protein_coding -19515,Olfr459,ENSMUSG00000045479,protein_coding -18678,Trrap,ENSMUSG00000045482,protein_coding -7193,Bhlhe23,ENSMUSG00000045493,protein_coding -53174,Pcdhb3,ENSMUSG00000045498,protein_coding -18106,Hcar2,ENSMUSG00000045502,protein_coding -6812,Sys1,ENSMUSG00000045503,protein_coding -50618,A930002H24Rik,ENSMUSG00000045506,protein_coding -40659,Olfr1367,ENSMUSG00000045508,protein_coding -42147,Gpr150,ENSMUSG00000045509,protein_coding -19420,Olfr460,ENSMUSG00000045514,protein_coding -637,Pou3f3,ENSMUSG00000045515,protein_coding -28179,Onecut3,ENSMUSG00000045518,protein_coding -34728,Zfp560,ENSMUSG00000045519,protein_coding -47467,Tssk2,ENSMUSG00000045521,protein_coding -35127,Olfr891,ENSMUSG00000045528,protein_coding -39767,C1ql1,ENSMUSG00000045532,protein_coding -21420,Kcna5,ENSMUSG00000045534,protein_coding -31405,Ddx28,ENSMUSG00000045538,protein_coding -11299,Sprr3,ENSMUSG00000045539,protein_coding -25469,Olfr600,ENSMUSG00000045540,protein_coding -39607,Krt14,ENSMUSG00000045545,protein_coding -49047,Fpr1,ENSMUSG00000045551,protein_coding -27357,Mettl24,ENSMUSG00000045555,protein_coding -29352,Olfr827,ENSMUSG00000045559,protein_coding -11302,Sprr4,ENSMUSG00000045566,protein_coding -53744,Mc2r,ENSMUSG00000045569,protein_coding -12791,Penk,ENSMUSG00000045573,protein_coding -49120,Vmn1r233,ENSMUSG00000045575,protein_coding -11814,St7l,ENSMUSG00000045576,protein_coding -25687,Olfr710,ENSMUSG00000045581,protein_coding -25480,Olfr610,ENSMUSG00000045584,protein_coding -23073,Lypd10,ENSMUSG00000045587,protein_coding -13557,Frrs1l,ENSMUSG00000045589,protein_coding -26980,Olig3,ENSMUSG00000045591,protein_coding -37031,Glb1,ENSMUSG00000045594,protein_coding -26197,Zfp553,ENSMUSG00000045598,protein_coding -46795,Dbx2,ENSMUSG00000045608,protein_coding -19340,Chrm2,ENSMUSG00000045613,protein_coding -35677,Odf3l1,ENSMUSG00000045620,protein_coding -6251,Esf1,ENSMUSG00000045624,protein_coding -47790,Pigz,ENSMUSG00000045625,protein_coding -53596,Sh3tc2,ENSMUSG00000045629,protein_coding -46959,Tmprss12,ENSMUSG00000045631,protein_coding -30165,Mtus1,ENSMUSG00000045636,protein_coding -26232,Zfp629,ENSMUSG00000045639,protein_coding -1136,Vwc2l,ENSMUSG00000045648,protein_coding -33987,Fam216b,ENSMUSG00000045655,protein_coding -53182,Pcdhb10,ENSMUSG00000045657,protein_coding -1391,Pid1,ENSMUSG00000045658,protein_coding -25911,Plekha7,ENSMUSG00000045659,protein_coding -11957,Henmt1,ENSMUSG00000045662,protein_coding -54248,Cdc42ep2,ENSMUSG00000045664,protein_coding -47064,Mfsd5,ENSMUSG00000045665,protein_coding -38714,Smtnl2,ENSMUSG00000045667,protein_coding -37566,Spred2,ENSMUSG00000045671,protein_coding -13689,Col27a1,ENSMUSG00000045672,protein_coding -54636,Olfr1489,ENSMUSG00000045678,protein_coding -43036,Pqlc3,ENSMUSG00000045679,protein_coding -27059,Tcf21,ENSMUSG00000045680,protein_coding -3988,Lcn6,ENSMUSG00000045684,protein_coding -53176,Pcdhb4,ENSMUSG00000045689,protein_coding -43982,Wdr89,ENSMUSG00000045690,protein_coding -33453,Thtpa,ENSMUSG00000045691,protein_coding -22998,Nlrp4e,ENSMUSG00000045693,protein_coding -15340,Gm21411,ENSMUSG00000045699,protein_coding -19560,Olfr447,ENSMUSG00000045708,protein_coding -19826,Prr15,ENSMUSG00000045725,protein_coding -53598,Adrb2,ENSMUSG00000045730,protein_coding -33748,Pnoc,ENSMUSG00000045731,protein_coding -26503,Sprn,ENSMUSG00000045733,protein_coding -49290,Bricd5,ENSMUSG00000045744,protein_coding -13014,Mms22l,ENSMUSG00000045751,protein_coding -26669,Tssc4,ENSMUSG00000045752,protein_coding -26210,Zfp764,ENSMUSG00000045757,protein_coding -50749,Togaram2,ENSMUSG00000045761,protein_coding -45538,Basp1,ENSMUSG00000045763,protein_coding -41556,B230219D22Rik,ENSMUSG00000045767,protein_coding -40028,Slc16a5,ENSMUSG00000045775,protein_coding -32441,Lrtm1,ENSMUSG00000045776,protein_coding -26638,Ifitm10,ENSMUSG00000045777,protein_coding -25494,Olfr624,ENSMUSG00000045780,protein_coding -16590,Ccdc149,ENSMUSG00000045790,protein_coding -25441,Olfr578,ENSMUSG00000045792,protein_coding -7243,Lkaaear1,ENSMUSG00000045794,protein_coding -24916,Whamm,ENSMUSG00000045795,protein_coding -7431,4930402K13Rik,ENSMUSG00000045797,protein_coding -8704,Hsf3,ENSMUSG00000045802,protein_coding -35251,Olfr984,ENSMUSG00000045812,protein_coding -50926,Zfp36l2,ENSMUSG00000045817,protein_coding -6843,Zswim3,ENSMUSG00000045822,protein_coding -25437,Olfr574,ENSMUSG00000045824,protein_coding -54116,Ptprcap,ENSMUSG00000045826,protein_coding -41037,Serpinb9,ENSMUSG00000045827,protein_coding -40927,Hdgfl1,ENSMUSG00000045835,protein_coding -5813,Ccdc9b,ENSMUSG00000045838,protein_coding -13085,Lyrm2,ENSMUSG00000045854,protein_coding -28593,Cradd,ENSMUSG00000045867,protein_coding -25660,Gvin1,ENSMUSG00000045868,protein_coding -34294,Slitrk6,ENSMUSG00000045871,protein_coding -33769,Adra1a,ENSMUSG00000045875,protein_coding -53180,Pcdhb8,ENSMUSG00000045876,protein_coding -54603,Olfr1461,ENSMUSG00000045883,protein_coding -27422,Gm9803,ENSMUSG00000045886,protein_coding -20524,Paip2b,ENSMUSG00000045896,protein_coding -54164,Npas4,ENSMUSG00000045903,protein_coding -28106,C2cd4c,ENSMUSG00000045912,protein_coding -38509,Ccdc42,ENSMUSG00000045915,protein_coding -13699,Tmem268,ENSMUSG00000045917,protein_coding -22212,Gm20715,ENSMUSG00000045929,protein_coding -43703,Clec14a,ENSMUSG00000045930,protein_coding -54938,Ifit2,ENSMUSG00000045932,protein_coding -11515,Mtmr11,ENSMUSG00000045934,protein_coding -38051,BC049762,ENSMUSG00000045942,protein_coding -23265,Mrps12,ENSMUSG00000045948,protein_coding -758,Cavin2,ENSMUSG00000045954,protein_coding -21200,Wnk1,ENSMUSG00000045962,protein_coding -3855,Gpr158,ENSMUSG00000045967,protein_coding -2519,Teddm2,ENSMUSG00000045968,protein_coding -29558,Ing1,ENSMUSG00000045969,protein_coding -13374,Slc25a51,ENSMUSG00000045973,protein_coding -48748,C2cd2,ENSMUSG00000045975,protein_coding -40011,Tmem104,ENSMUSG00000045980,protein_coding -47585,Eif4g1,ENSMUSG00000045983,protein_coding -26158,4930451I11Rik,ENSMUSG00000045989,protein_coding -53633,Onecut2,ENSMUSG00000045991,protein_coding -34903,B3gat1,ENSMUSG00000045994,protein_coding -45679,Polr2k,ENSMUSG00000045996,protein_coding -17352,Naa11,ENSMUSG00000046000,protein_coding -42622,Gapt,ENSMUSG00000046006,protein_coding -55386,Pnlip,ENSMUSG00000046008,protein_coding -39045,Zfp830,ENSMUSG00000046010,protein_coding -40679,Olfr1364,ENSMUSG00000046016,protein_coding -6518,Pofut1,ENSMUSG00000046020,protein_coding -24967,Stard5,ENSMUSG00000046027,protein_coding -27263,Calhm6,ENSMUSG00000046031,protein_coding -8764,Snx12,ENSMUSG00000046032,protein_coding -45566,Otulin,ENSMUSG00000046034,protein_coding -29319,Olfr803,ENSMUSG00000046041,protein_coding -33718,Rp1l1,ENSMUSG00000046049,protein_coding -23401,Sbsn,ENSMUSG00000046056,protein_coding -23234,Eid2,ENSMUSG00000046058,protein_coding -2167,Ppp1r15b,ENSMUSG00000046062,protein_coding -49322,Igfals,ENSMUSG00000046070,protein_coding -17507,Lrrc8d,ENSMUSG00000046079,protein_coding -21545,Clec9a,ENSMUSG00000046080,protein_coding -42455,Tmem174,ENSMUSG00000046082,protein_coding -5551,4931422A03Rik,ENSMUSG00000046085,protein_coding -14612,Hpcal4,ENSMUSG00000046093,protein_coding -39599,Krt32,ENSMUSG00000046095,protein_coding -26009,Mosmo,ENSMUSG00000046096,protein_coding -113,Mcmdc2,ENSMUSG00000046101,protein_coding -31701,Il17c,ENSMUSG00000046108,protein_coding -5912,Serinc4,ENSMUSG00000046110,protein_coding -34621,Cep295,ENSMUSG00000046111,protein_coding -23154,Vmn1r184,ENSMUSG00000046130,protein_coding -14132,C130073F10Rik,ENSMUSG00000046133,protein_coding -54861,9930021J03Rik,ENSMUSG00000046138,protein_coding -54531,Patl1,ENSMUSG00000046139,protein_coding -35161,Olfr918,ENSMUSG00000046150,protein_coding -30018,Fut10,ENSMUSG00000046152,protein_coding -44043,Tmem229b,ENSMUSG00000046157,protein_coding -40439,Chrm3,ENSMUSG00000046159,protein_coding -48602,Olig1,ENSMUSG00000046160,protein_coding -35622,Gldn,ENSMUSG00000046167,protein_coding -33652,Kcnrg,ENSMUSG00000046168,protein_coding -42562,Adamts6,ENSMUSG00000046169,protein_coding -48881,Pabpc6,ENSMUSG00000046173,protein_coding -18996,Nxph1,ENSMUSG00000046178,protein_coding -24163,E2f8,ENSMUSG00000046179,protein_coding -8117,4930550L24Rik,ENSMUSG00000046180,protein_coding -26108,Gsg1l,ENSMUSG00000046182,protein_coding -23313,Zfp84,ENSMUSG00000046185,protein_coding -36193,Cd109,ENSMUSG00000046186,protein_coding -53194,Pcdhb20,ENSMUSG00000046191,protein_coding -19147,Iqub,ENSMUSG00000046192,protein_coding -50872,Ttc39d,ENSMUSG00000046196,protein_coding -48763,Scaf8,ENSMUSG00000046201,protein_coding -33774,Pnma2,ENSMUSG00000046204,protein_coding -38506,Pik3r6,ENSMUSG00000046207,protein_coding -33043,Olfr735,ENSMUSG00000046210,protein_coding -11873,Cym,ENSMUSG00000046213,protein_coding -39792,Rprml,ENSMUSG00000046215,protein_coding -23056,Plaur,ENSMUSG00000046223,protein_coding -6620,Scand1,ENSMUSG00000046229,protein_coding -54676,Vps13a,ENSMUSG00000046230,protein_coding -35088,Hepacam,ENSMUSG00000046240,protein_coding -36576,Nme9,ENSMUSG00000046242,protein_coding -18471,Pilra,ENSMUSG00000046245,protein_coding -26625,Krtap5-3,ENSMUSG00000046248,protein_coding -30359,Adam29,ENSMUSG00000046258,protein_coding -11293,Sprr2h,ENSMUSG00000046259,protein_coding -15257,C87977,ENSMUSG00000046262,protein_coding -7334,Usp27x,ENSMUSG00000046269,protein_coding -54585,Olfr1444,ENSMUSG00000046272,protein_coding -38880,Trarg1,ENSMUSG00000046275,protein_coding -11203,She,ENSMUSG00000046280,protein_coding -30159,Adam20,ENSMUSG00000046282,protein_coding -8312,Pnma3,ENSMUSG00000046287,protein_coding -30650,Ankle1,ENSMUSG00000046295,protein_coding -1637,Olfr1412,ENSMUSG00000046300,protein_coding -37971,Zfp62,ENSMUSG00000046311,protein_coding -13217,Myorg,ENSMUSG00000046312,protein_coding -43520,Stxbp6,ENSMUSG00000046314,protein_coding -11557,BC107364,ENSMUSG00000046317,protein_coding -53686,Ccbe1,ENSMUSG00000046318,protein_coding -26033,Hs3st2,ENSMUSG00000046321,protein_coding -21274,Dppa3,ENSMUSG00000046323,protein_coding -54859,Ermp1,ENSMUSG00000046324,protein_coding -50552,Slc25a23,ENSMUSG00000046329,protein_coding -1171,Rpl37a,ENSMUSG00000046330,protein_coding -522,Fam178b,ENSMUSG00000046337,protein_coding -6016,Gpat2,ENSMUSG00000046338,protein_coding -47810,Smco1,ENSMUSG00000046345,protein_coding -40826,Zfp322a,ENSMUSG00000046351,protein_coding -33560,Gjb2,ENSMUSG00000046352,protein_coding -29708,Defb14,ENSMUSG00000046354,protein_coding -25799,Rpl27a,ENSMUSG00000046364,protein_coding -2218,Mgat4e,ENSMUSG00000046367,protein_coding -26170,Asphd1,ENSMUSG00000046378,protein_coding -46145,Jrk,ENSMUSG00000046380,protein_coding -53191,Pcdhb17,ENSMUSG00000046387,protein_coding -25479,Olfr609,ENSMUSG00000046396,protein_coding -36537,Rbp1,ENSMUSG00000046402,protein_coding -2087,Yod1,ENSMUSG00000046404,protein_coding -30900,1700067K01Rik,ENSMUSG00000046408,protein_coding -23297,Kcnk6,ENSMUSG00000046410,protein_coding -38416,Lrrc75a,ENSMUSG00000046417,protein_coding -25784,Olfr518,ENSMUSG00000046431,protein_coding -9295,Bex3,ENSMUSG00000046432,protein_coding -47119,Hnrnpa1,ENSMUSG00000046434,protein_coding -15239,Gm13078,ENSMUSG00000046435,protein_coding -23538,Scgb2b24,ENSMUSG00000046438,protein_coding -31490,Cmtr2,ENSMUSG00000046441,protein_coding -39184,Ppm1e,ENSMUSG00000046442,protein_coding -15088,Camk2n1,ENSMUSG00000046447,protein_coding -8891,Nexmif,ENSMUSG00000046449,protein_coding -13325,Olfr71,ENSMUSG00000046450,protein_coding -22125,Tmem150b,ENSMUSG00000046456,protein_coding -35624,Sh2d7,ENSMUSG00000046460,protein_coding -7240,Sox18,ENSMUSG00000046470,protein_coding -39576,Krtap4-16,ENSMUSG00000046474,protein_coding -35440,Scn4b,ENSMUSG00000046480,protein_coding -3030,Olfr231,ENSMUSG00000046486,protein_coding -38512,Rnf222,ENSMUSG00000046490,protein_coding -37868,C1qtnf2,ENSMUSG00000046491,protein_coding -28093,Olfr1352,ENSMUSG00000046493,protein_coding -20856,Tafa4,ENSMUSG00000046500,protein_coding -47971,Cox17,ENSMUSG00000046516,protein_coding -43392,Ferd3l,ENSMUSG00000046518,protein_coding -11486,Golph3l,ENSMUSG00000046519,protein_coding -33953,Kctd4,ENSMUSG00000046523,protein_coding -8719,Ar,ENSMUSG00000046532,protein_coding -23100,Zfp526,ENSMUSG00000046541,protein_coding -47754,Fam43a,ENSMUSG00000046546,protein_coding -9635,Spin2c,ENSMUSG00000046550,protein_coding -31200,Zfp319,ENSMUSG00000046556,protein_coding -12201,Arsj,ENSMUSG00000046561,protein_coding -17820,Unc119b,ENSMUSG00000046562,protein_coding -28714,4930430F08Rik,ENSMUSG00000046567,protein_coding -16387,Zfp518b,ENSMUSG00000046572,protein_coding -41128,Lyrm4,ENSMUSG00000046573,protein_coding -23963,Prr12,ENSMUSG00000046574,protein_coding -55268,Cfap58,ENSMUSG00000046585,protein_coding -29421,Lrrc8e,ENSMUSG00000046589,protein_coding -24837,Ticrr,ENSMUSG00000046591,protein_coding -13177,Tmem215,ENSMUSG00000046593,protein_coding -47776,Bdh1,ENSMUSG00000046598,protein_coding -37234,Tcaim,ENSMUSG00000046603,protein_coding -40296,B3gntl1,ENSMUSG00000046605,protein_coding -17912,Hrk,ENSMUSG00000046607,protein_coding -53669,Oacyl,ENSMUSG00000046610,protein_coding -47575,Vwa5b2,ENSMUSG00000046613,protein_coding -7913,Actrt1,ENSMUSG00000046615,protein_coding -4431,Olfml2a,ENSMUSG00000046618,protein_coding -14737,Gjb4,ENSMUSG00000046623,protein_coding -37463,Pkd1l1,ENSMUSG00000046634,protein_coding -15550,Ttc34,ENSMUSG00000046637,protein_coding -2994,Olfr218,ENSMUSG00000046643,protein_coding -54562,Olfr1440,ENSMUSG00000046650,protein_coding -19535,Tas2r143,ENSMUSG00000046652,protein_coding -18628,Zfp316,ENSMUSG00000046658,protein_coding -12890,Rbm12b1,ENSMUSG00000046667,protein_coding -53108,Cxxc5,ENSMUSG00000046668,protein_coding -14969,Mtfr1l,ENSMUSG00000046671,protein_coding -44571,Tmem251,ENSMUSG00000046675,protein_coding -11325,Lce1l,ENSMUSG00000046676,protein_coding -35246,Olfr981,ENSMUSG00000046678,protein_coding -20608,C87436,ENSMUSG00000046679,protein_coding -12247,Tifa,ENSMUSG00000046688,protein_coding -31425,Chtf8,ENSMUSG00000046691,protein_coding -14917,Tent5b,ENSMUSG00000046694,protein_coding -40167,Enpp7,ENSMUSG00000046697,protein_coding -8176,Slitrk4,ENSMUSG00000046699,protein_coding -31204,Csnk2a2,ENSMUSG00000046707,protein_coding -49484,Hmga1,ENSMUSG00000046711,protein_coding -31663,Foxc2,ENSMUSG00000046714,protein_coding -22345,Vmn1r67,ENSMUSG00000046716,protein_coding -21813,Igbp1b,ENSMUSG00000046717,protein_coding -30659,Bst2,ENSMUSG00000046718,protein_coding -39345,Nxph3,ENSMUSG00000046719,protein_coding -11465,Cdc42se1,ENSMUSG00000046722,protein_coding -30152,Adam24,ENSMUSG00000046723,protein_coding -53121,Cystm1,ENSMUSG00000046727,protein_coding -38593,Kctd11,ENSMUSG00000046731,protein_coding -21747,Gprc5a,ENSMUSG00000046733,protein_coding -10365,Fat4,ENSMUSG00000046743,protein_coding -48182,Tmem45a2,ENSMUSG00000046748,protein_coding -23236,Selenov,ENSMUSG00000046750,protein_coding -32426,Ccdc66,ENSMUSG00000046753,protein_coding -39275,Kif2b,ENSMUSG00000046755,protein_coding -40037,Mrps7,ENSMUSG00000046756,protein_coding -46219,Fam83h,ENSMUSG00000046761,protein_coding -43977,Rhoj,ENSMUSG00000046768,protein_coding -8790,8030474K03Rik,ENSMUSG00000046774,protein_coding -43696,Ttc6,ENSMUSG00000046782,protein_coding -36993,Epm2aip1,ENSMUSG00000046785,protein_coding -14488,Olfr1341,ENSMUSG00000046790,protein_coding -44158,Riox1,ENSMUSG00000046791,protein_coding -22193,Zfp787,ENSMUSG00000046792,protein_coding -11916,Gpr61,ENSMUSG00000046793,protein_coding -30086,Ppp1r3b,ENSMUSG00000046794,protein_coding -15688,Cldn12,ENSMUSG00000046798,protein_coding -5814,Phgr1,ENSMUSG00000046804,protein_coding -54565,Mpeg1,ENSMUSG00000046805,protein_coding -19322,Cyren,ENSMUSG00000046806,protein_coding -27932,Lrrc75b,ENSMUSG00000046807,protein_coding -38642,Gltpd2,ENSMUSG00000046811,protein_coding -5828,Gchfr,ENSMUSG00000046814,protein_coding -12420,Ddit4l,ENSMUSG00000046818,protein_coding -28217,Slc39a3,ENSMUSG00000046822,protein_coding -23419,Fam187b,ENSMUSG00000046826,protein_coding -675,Mettl21e,ENSMUSG00000046828,protein_coding -47043,Krt1,ENSMUSG00000046834,protein_coding -3222,Brox,ENSMUSG00000046836,protein_coding -28352,Ckap4,ENSMUSG00000046841,protein_coding -31559,Vat1l,ENSMUSG00000046844,protein_coding -3899,Il1f10,ENSMUSG00000046845,protein_coding -35811,Spesp1,ENSMUSG00000046846,protein_coding -4233,Pip5kl1,ENSMUSG00000046854,protein_coding -1012,Gpr1,ENSMUSG00000046856,protein_coding -14418,Hectd3,ENSMUSG00000046861,protein_coding -15223,Pramef8,ENSMUSG00000046862,protein_coding -23230,Fbl,ENSMUSG00000046865,protein_coding -9698,Mbtps2,ENSMUSG00000046873,protein_coding -41328,Atxn1,ENSMUSG00000046876,protein_coding -37954,Irgm1,ENSMUSG00000046879,protein_coding -30694,Olfr374,ENSMUSG00000046881,protein_coding -53448,Zfp474,ENSMUSG00000046886,protein_coding -47060,Zfp740,ENSMUSG00000046897,protein_coding -40952,Prl7a2,ENSMUSG00000046899,protein_coding -33500,Ltb4r1,ENSMUSG00000046908,protein_coding -39005,Tefm,ENSMUSG00000046909,protein_coding -54593,Olfr1451,ENSMUSG00000046913,protein_coding -26772,Myct1,ENSMUSG00000046916,protein_coding -27363,Gpr6,ENSMUSG00000046922,protein_coding -23023,Vmn1r179,ENSMUSG00000046924,protein_coding -40748,Vmn1r193,ENSMUSG00000046932,protein_coding -28683,Csl,ENSMUSG00000046934,protein_coding -8513,Mageb16,ENSMUSG00000046942,protein_coding -19395,Adck2,ENSMUSG00000046947,protein_coding -41070,Nqo2,ENSMUSG00000046949,protein_coding -42318,Spz1,ENSMUSG00000046957,protein_coding -17602,Slc26a1,ENSMUSG00000046959,protein_coding -47962,Gpr156,ENSMUSG00000046961,protein_coding -48750,Zbtb21,ENSMUSG00000046962,protein_coding -5872,Pla2g4f,ENSMUSG00000046971,protein_coding -37927,BC053393,ENSMUSG00000046974,protein_coding -5065,Olfr1020,ENSMUSG00000046975,protein_coding -53984,Tshz1,ENSMUSG00000046982,protein_coding -16457,Tapt1,ENSMUSG00000046985,protein_coding -48980,Wdr27,ENSMUSG00000046991,protein_coding -842,Mars2,ENSMUSG00000046994,protein_coding -36516,Spsb4,ENSMUSG00000046997,protein_coding -10829,1110032F04Rik,ENSMUSG00000046999,protein_coding -42941,Msgn1,ENSMUSG00000047002,protein_coding -46197,Zfp41,ENSMUSG00000047003,protein_coding -20565,Fbxo41,ENSMUSG00000047013,protein_coding -44541,Catsperb,ENSMUSG00000047014,protein_coding -1230,Cfap65,ENSMUSG00000047021,protein_coding -43690,Mipol1,ENSMUSG00000047022,protein_coding -28652,Ccer1,ENSMUSG00000047025,protein_coding -25977,Acsm4,ENSMUSG00000047026,protein_coding -6915,Spata2,ENSMUSG00000047030,protein_coding -22349,Vmn1r68,ENSMUSG00000047031,protein_coding -53189,Pcdhb15,ENSMUSG00000047033,protein_coding -46991,Ankrd33,ENSMUSG00000047034,protein_coding -37237,Zfp445,ENSMUSG00000047036,protein_coding -24237,Nipa1,ENSMUSG00000047037,protein_coding -5225,Olfr1151,ENSMUSG00000047039,protein_coding -39410,Prr15l,ENSMUSG00000047040,protein_coding -54703,D030056L22Rik,ENSMUSG00000047044,protein_coding -9432,Tmem164,ENSMUSG00000047045,protein_coding -3023,Olfr432,ENSMUSG00000047048,protein_coding -35154,Olfr914,ENSMUSG00000047050,protein_coding -2522,Teddm1a,ENSMUSG00000047053,protein_coding -1652,Dusp28,ENSMUSG00000047067,protein_coding -9041,Ube2dnl2,ENSMUSG00000047079,protein_coding -24868,Ngrn,ENSMUSG00000047084,protein_coding -23916,Lrrc4b,ENSMUSG00000047085,protein_coding -29263,Tmem198b,ENSMUSG00000047090,protein_coding -41217,Ofcc1,ENSMUSG00000047094,protein_coding -33479,Rnf31,ENSMUSG00000047098,protein_coding -19522,Tas2r139,ENSMUSG00000047102,protein_coding -21757,Pbp2,ENSMUSG00000047104,protein_coding -46477,Dnajb7,ENSMUSG00000047108,protein_coding -48677,Cldn14,ENSMUSG00000047109,protein_coding -19710,Fam221a,ENSMUSG00000047115,protein_coding -42399,Ankdd1b,ENSMUSG00000047117,protein_coding -50516,Ticam1,ENSMUSG00000047123,protein_coding -39171,Cltc,ENSMUSG00000047126,protein_coding -28434,1700113H08Rik,ENSMUSG00000047129,protein_coding -27424,Cd24a,ENSMUSG00000047139,protein_coding -48287,Zfp654,ENSMUSG00000047141,protein_coding -14291,Dmrta2,ENSMUSG00000047143,protein_coding -27755,Tet1,ENSMUSG00000047146,protein_coding -5101,Olfr1052,ENSMUSG00000047149,protein_coding -50335,1700001C19Rik,ENSMUSG00000047150,protein_coding -33507,Khnyn,ENSMUSG00000047153,protein_coding -14343,Cyp4x1,ENSMUSG00000047155,protein_coding -52860,Chst9,ENSMUSG00000047161,protein_coding -30248,Helt,ENSMUSG00000047171,protein_coding -505,Neurl3,ENSMUSG00000047180,protein_coding -39327,Samd14,ENSMUSG00000047181,protein_coding -18462,Irs3,ENSMUSG00000047182,protein_coding -12827,Rab2a,ENSMUSG00000047187,protein_coding -34487,Dync2h1,ENSMUSG00000047193,protein_coding -39517,Gjd3,ENSMUSG00000047197,protein_coding -20749,Vmn1r54,ENSMUSG00000047203,protein_coding -37335,Dusp18,ENSMUSG00000047205,protein_coding -54643,Olfr1495,ENSMUSG00000047207,protein_coding -10030,Ythdf3,ENSMUSG00000047213,protein_coding -16752,Rpl9,ENSMUSG00000047215,protein_coding -1863,Cdh19,ENSMUSG00000047216,protein_coding -36854,Ccdc36,ENSMUSG00000047220,protein_coding -15928,Fam185a,ENSMUSG00000047221,protein_coding -33097,Rnase2a,ENSMUSG00000047222,protein_coding -25614,Olfr684,ENSMUSG00000047225,protein_coding -43780,Gm527,ENSMUSG00000047227,protein_coding -21499,A2ml1,ENSMUSG00000047228,protein_coding -9378,Cldn2,ENSMUSG00000047230,protein_coding -36905,Fbxw21,ENSMUSG00000047237,protein_coding -9617,Mageh1,ENSMUSG00000047238,protein_coding -8932,Taf9b,ENSMUSG00000047242,protein_coding -40852,Hist1h2be,ENSMUSG00000047246,protein_coding -25315,C2cd3,ENSMUSG00000047248,protein_coding -4339,Ptgs1,ENSMUSG00000047250,protein_coding -39544,Krtap1-5,ENSMUSG00000047253,protein_coding -36971,Prss45,ENSMUSG00000047257,protein_coding -53708,Mc4r,ENSMUSG00000047259,protein_coding -38738,Emc6,ENSMUSG00000047260,protein_coding -48009,Gap43,ENSMUSG00000047261,protein_coding -29378,Zfp358,ENSMUSG00000047264,protein_coding -14925,Sfn,ENSMUSG00000047281,protein_coding -38597,Neurl4,ENSMUSG00000047284,protein_coding -30876,Olfr370,ENSMUSG00000047286,protein_coding -48212,Gpr15,ENSMUSG00000047293,protein_coding -46758,Smgc,ENSMUSG00000047295,protein_coding -54818,Kcnv2,ENSMUSG00000047298,protein_coding -53187,Pcdhb13,ENSMUSG00000047307,protein_coding -1303,Kcne4,ENSMUSG00000047330,protein_coding -38424,Zfp286,ENSMUSG00000047342,protein_coding -667,Mettl21c,ENSMUSG00000047343,protein_coding -7433,Lancl3,ENSMUSG00000047344,protein_coding -35241,Olfr976,ENSMUSG00000047352,protein_coding -3989,Lcn10,ENSMUSG00000047356,protein_coding -921,Gm973,ENSMUSG00000047361,protein_coding -4162,Cstad,ENSMUSG00000047363,protein_coding -54733,Abhd17b,ENSMUSG00000047368,protein_coding -3186,Dnah14,ENSMUSG00000047369,protein_coding -26205,Zfp768,ENSMUSG00000047371,protein_coding -54168,B4gat1,ENSMUSG00000047379,protein_coding -31580,Atmin,ENSMUSG00000047388,protein_coding -29824,Defb9,ENSMUSG00000047390,protein_coding -46713,Odf3b,ENSMUSG00000047394,protein_coding -50712,Tgif1,ENSMUSG00000047407,protein_coding -37096,Ctdspl,ENSMUSG00000047409,protein_coding -34944,Zbtb44,ENSMUSG00000047412,protein_coding -44436,Flrt2,ENSMUSG00000047414,protein_coding -44527,Gpr68,ENSMUSG00000047415,protein_coding -28182,Rexo1,ENSMUSG00000047417,protein_coding -42324,Cmya5,ENSMUSG00000047419,protein_coding -19331,Fam180a,ENSMUSG00000047420,protein_coding -54200,AI837181,ENSMUSG00000047423,protein_coding -50292,Dlk2,ENSMUSG00000047428,protein_coding -47762,Xxylt1,ENSMUSG00000047434,protein_coding -32571,Antxrl,ENSMUSG00000047441,protein_coding -1608,Erfe,ENSMUSG00000047443,protein_coding -38786,Olfr139,ENSMUSG00000047444,protein_coding -43470,Arl4a,ENSMUSG00000047446,protein_coding -44031,Gphn,ENSMUSG00000047454,protein_coding -6574,Dynlrb1,ENSMUSG00000047459,protein_coding -40635,Gpr141b,ENSMUSG00000047462,protein_coding -53855,8030462N17Rik,ENSMUSG00000047466,protein_coding -23315,Zfp30,ENSMUSG00000047473,protein_coding -9710,Klhl34,ENSMUSG00000047485,protein_coding -29162,Inhbe,ENSMUSG00000047492,protein_coding -29645,Dlgap2,ENSMUSG00000047495,protein_coding -1792,Rnf152,ENSMUSG00000047496,protein_coding -45410,Adamts12,ENSMUSG00000047497,protein_coding -18339,Cldn4,ENSMUSG00000047501,protein_coding -14196,Mroh7,ENSMUSG00000047502,protein_coding -49342,Baiap3,ENSMUSG00000047507,protein_coding -34657,Mbd3l2,ENSMUSG00000047508,protein_coding -37967,Olfr1396,ENSMUSG00000047511,protein_coding -27269,Tspyl1,ENSMUSG00000047514,protein_coding -21788,BC049715,ENSMUSG00000047515,protein_coding -26335,Dmbt1,ENSMUSG00000047517,protein_coding -14548,Slfnl1,ENSMUSG00000047518,protein_coding -896,Als2cr12,ENSMUSG00000047528,protein_coding -47685,Rtp2,ENSMUSG00000047531,protein_coding -43790,Mis18bp1,ENSMUSG00000047534,protein_coding -25504,Olfr67,ENSMUSG00000047535,protein_coding -3203,Fbxo28,ENSMUSG00000047539,protein_coding -25485,Olfr616,ENSMUSG00000047544,protein_coding -25500,Olfr629,ENSMUSG00000047545,protein_coding -41509,Cltb,ENSMUSG00000047547,protein_coding -25814,Tmem41b,ENSMUSG00000047554,protein_coding -10805,Lxn,ENSMUSG00000047557,protein_coding -34495,Mmp10,ENSMUSG00000047562,protein_coding -39543,Krtap3-1,ENSMUSG00000047564,protein_coding -45502,Acot10,ENSMUSG00000047565,protein_coding -12692,Tyw3,ENSMUSG00000047583,protein_coding -23257,Nccrp1,ENSMUSG00000047586,protein_coding -46203,Mafa,ENSMUSG00000047591,protein_coding -18504,Nxpe5,ENSMUSG00000047592,protein_coding -5186,Olfr1122,ENSMUSG00000047594,protein_coding -23036,Zfp235,ENSMUSG00000047603,protein_coding -55078,Frat2,ENSMUSG00000047604,protein_coding -36396,Ankrd34c,ENSMUSG00000047606,protein_coding -15526,A430005L14Rik,ENSMUSG00000047613,protein_coding -3966,Paxx,ENSMUSG00000047617,protein_coding -34482,Ddi1,ENSMUSG00000047619,protein_coding -29306,Olfr791,ENSMUSG00000047626,protein_coding -29214,Apof,ENSMUSG00000047631,protein_coding -54981,Fgfbp3,ENSMUSG00000047632,protein_coding -18129,2810006K23Rik,ENSMUSG00000047635,protein_coding -14205,Cdcp2,ENSMUSG00000047636,protein_coding -28479,Nr1h4,ENSMUSG00000047638,protein_coding -47009,Krt87,ENSMUSG00000047641,protein_coding -29067,D930020B18Rik,ENSMUSG00000047642,protein_coding -26860,Fbxo30,ENSMUSG00000047648,protein_coding -22802,Cd3eap,ENSMUSG00000047649,protein_coding -30572,Tssk6,ENSMUSG00000047654,protein_coding -22424,Vmn1r74,ENSMUSG00000047655,protein_coding -54319,Trpt1,ENSMUSG00000047656,protein_coding -54186,Gal3st3,ENSMUSG00000047658,protein_coding -35172,Olfr26,ENSMUSG00000047667,protein_coding -27618,Msl3l2,ENSMUSG00000047669,protein_coding -14425,Tctex1d4,ENSMUSG00000047671,protein_coding -12485,Pdha2,ENSMUSG00000047674,protein_coding -14429,Rps8,ENSMUSG00000047675,protein_coding -7513,Gpr82,ENSMUSG00000047678,protein_coding -8954,Rtl3,ENSMUSG00000047686,protein_coding -8726,Yipf6,ENSMUSG00000047694,protein_coding -10286,Ccdc144b,ENSMUSG00000047696,protein_coding -37980,Olfr1388,ENSMUSG00000047702,protein_coding -29634,Champ1,ENSMUSG00000047710,protein_coding -26807,Ust,ENSMUSG00000047712,protein_coding -47767,Ppp1r2,ENSMUSG00000047714,protein_coding -33045,Olfr736,ENSMUSG00000047716,protein_coding -15406,Ubiad1,ENSMUSG00000047719,protein_coding -21541,4922502D21Rik,ENSMUSG00000047720,protein_coding -26142,Bola2,ENSMUSG00000047721,protein_coding -46180,Ly6g2,ENSMUSG00000047728,protein_coding -23226,Fcgbp,ENSMUSG00000047730,protein_coding -55225,Wbp1l,ENSMUSG00000047731,protein_coding -54263,Tmem262,ENSMUSG00000047733,protein_coding -18899,Samd9l,ENSMUSG00000047735,protein_coding -47944,Fbxo40,ENSMUSG00000047746,protein_coding -30862,Rnf150,ENSMUSG00000047747,protein_coding -19362,Zc3hav1l,ENSMUSG00000047749,protein_coding -26486,Utf1,ENSMUSG00000047751,protein_coding -9810,Fancb,ENSMUSG00000047757,protein_coding -38450,Hs3st3a1,ENSMUSG00000047759,protein_coding -35779,Lrrc49,ENSMUSG00000047766,protein_coding -25347,Atg16l2,ENSMUSG00000047767,protein_coding -39258,Ankfn1,ENSMUSG00000047773,protein_coding -15493,Phf13,ENSMUSG00000047777,protein_coding -49033,Lix1,ENSMUSG00000047786,protein_coding -54324,Flrt1,ENSMUSG00000047787,protein_coding -42669,Slc38a9,ENSMUSG00000047789,protein_coding -1665,Sned1,ENSMUSG00000047793,protein_coding -25615,Olfr685,ENSMUSG00000047794,protein_coding -8775,Gjb1,ENSMUSG00000047797,protein_coding -40008,Cd300lf,ENSMUSG00000047798,protein_coding -15224,Oog4,ENSMUSG00000047799,protein_coding -38393,Akap10,ENSMUSG00000047804,protein_coding -54304,Ccdc88b,ENSMUSG00000047810,protein_coding -11015,Tigd4,ENSMUSG00000047819,protein_coding -38428,Trim16,ENSMUSG00000047821,protein_coding -34809,Angptl8,ENSMUSG00000047822,protein_coding -11194,Pygo2,ENSMUSG00000047824,protein_coding -44945,Cdca4,ENSMUSG00000047832,protein_coding -7216,Fndc11,ENSMUSG00000047841,protein_coding -41467,Diras2,ENSMUSG00000047842,protein_coding -18670,Bri3,ENSMUSG00000047843,protein_coding -9288,Bex4,ENSMUSG00000047844,protein_coding -48276,Stx19,ENSMUSG00000047854,protein_coding -37801,Foxi1,ENSMUSG00000047861,protein_coding -31004,Lonp2,ENSMUSG00000047866,protein_coding -19674,Gimap6,ENSMUSG00000047867,protein_coding -29283,Olfr770,ENSMUSG00000047868,protein_coding -15464,Gpr157,ENSMUSG00000047875,protein_coding -46583,A4galt,ENSMUSG00000047878,protein_coding -52770,Usp14,ENSMUSG00000047879,protein_coding -35399,Cxcr5,ENSMUSG00000047880,protein_coding -16709,Rell1,ENSMUSG00000047881,protein_coding -23844,Klk9,ENSMUSG00000047884,protein_coding -46460,Tnrc6b,ENSMUSG00000047888,protein_coding -41059,Serpinb6d,ENSMUSG00000047889,protein_coding -33095,Ang2,ENSMUSG00000047894,protein_coding -36326,Ripply2,ENSMUSG00000047897,protein_coding -37033,Ccr4,ENSMUSG00000047898,protein_coding -39971,Sstr2,ENSMUSG00000047904,protein_coding -6967,Tshz2,ENSMUSG00000047907,protein_coding -3710,Ankrd16,ENSMUSG00000047909,protein_coding -53190,Pcdhb16,ENSMUSG00000047910,protein_coding -33876,Npm2,ENSMUSG00000047911,protein_coding -46114,Trappc9,ENSMUSG00000047921,protein_coding -12469,Stpg2,ENSMUSG00000047940,protein_coding -14784,Marcksl1,ENSMUSG00000047945,protein_coding -47749,Gp5,ENSMUSG00000047953,protein_coding -11868,Kcna3,ENSMUSG00000047959,protein_coding -48227,Olfr186,ENSMUSG00000047960,protein_coding -17253,Stbd1,ENSMUSG00000047963,protein_coding -5144,Olfr1093,ENSMUSG00000047969,protein_coding -21423,Kcna1,ENSMUSG00000047976,protein_coding -33822,Synb,ENSMUSG00000047977,protein_coding -35042,Pate5,ENSMUSG00000047980,protein_coding -30904,Palm3,ENSMUSG00000047986,protein_coding -39436,4933428G20Rik,ENSMUSG00000047988,protein_coding -52934,Ino80c,ENSMUSG00000047989,protein_coding -35942,C2cd4a,ENSMUSG00000047990,protein_coding -53997,Dipk1c,ENSMUSG00000047992,protein_coding -34872,Cypt4,ENSMUSG00000047995,protein_coding -8501,Prrg1,ENSMUSG00000047996,protein_coding -1505,Gigyf2,ENSMUSG00000048000,protein_coding -15556,Hes5,ENSMUSG00000048001,protein_coding -14951,Catsper4,ENSMUSG00000048003,protein_coding -45290,Tmem196,ENSMUSG00000048004,protein_coding -9227,Timm8a1,ENSMUSG00000048007,protein_coding -23938,Zfp473,ENSMUSG00000048012,protein_coding -39600,Krt35,ENSMUSG00000048013,protein_coding -29353,Neurod4,ENSMUSG00000048015,protein_coding -19160,Tmem229a,ENSMUSG00000048022,protein_coding -49010,Rgmb,ENSMUSG00000048027,protein_coding -55396,Eno4,ENSMUSG00000048029,protein_coding -11070,Fcrl5,ENSMUSG00000048031,protein_coding -4013,Ccdc187,ENSMUSG00000048038,protein_coding -11085,Isg20l2,ENSMUSG00000048039,protein_coding -9283,Arxes2,ENSMUSG00000048040,protein_coding -7835,Zbtb33,ENSMUSG00000048047,protein_coding -5511,Ldlrad3,ENSMUSG00000048058,protein_coding -49055,Fpr-rs4,ENSMUSG00000048062,protein_coding -25713,Cyb5r2,ENSMUSG00000048065,protein_coding -22221,Olfr1349,ENSMUSG00000048067,protein_coding -38471,Pirt,ENSMUSG00000048070,protein_coding -38281,Arf1,ENSMUSG00000048076,protein_coding -46847,H1fnt,ENSMUSG00000048077,protein_coding -25203,Tenm4,ENSMUSG00000048078,protein_coding -33039,Olfr731,ENSMUSG00000048080,protein_coding -48080,Gm4737,ENSMUSG00000048087,protein_coding -2241,Lmod1,ENSMUSG00000048096,protein_coding -11945,Taf13,ENSMUSG00000048100,protein_coding -47402,Olfr19,ENSMUSG00000048101,protein_coding -21148,Tmem72,ENSMUSG00000048108,protein_coding -11878,Rbm15,ENSMUSG00000048109,protein_coding -43905,Arid4a,ENSMUSG00000048118,protein_coding -55056,Entpd1,ENSMUSG00000048120,protein_coding -1590,Col6a3,ENSMUSG00000048126,protein_coding -54798,Dmrt2,ENSMUSG00000048138,protein_coding -16275,Nat8l,ENSMUSG00000048142,protein_coding -30723,Nwd1,ENSMUSG00000048148,protein_coding -33396,Olfr49,ENSMUSG00000048153,protein_coding -46882,Kmt2d,ENSMUSG00000048154,protein_coding -9328,H2bfm,ENSMUSG00000048155,protein_coding -17765,Selplg,ENSMUSG00000048163,protein_coding -26299,Mcmbp,ENSMUSG00000048170,protein_coding -7966,Olfr1324,ENSMUSG00000048173,protein_coding -2154,Tmem81,ENSMUSG00000048174,protein_coding -46841,Asb8,ENSMUSG00000048175,protein_coding -27782,Gm7075,ENSMUSG00000048185,protein_coding -3488,Bend7,ENSMUSG00000048186,protein_coding -26603,Muc6,ENSMUSG00000048191,protein_coding -5228,Olfr1153,ENSMUSG00000048197,protein_coding -26593,Cracr2b,ENSMUSG00000048200,protein_coding -20685,Dnajb8,ENSMUSG00000048206,protein_coding -19026,Gpr85,ENSMUSG00000048216,protein_coding -39722,Nags,ENSMUSG00000048217,protein_coding -46822,Amigo2,ENSMUSG00000048218,protein_coding -5916,Mfap1b,ENSMUSG00000048222,protein_coding -5287,Olfr1212,ENSMUSG00000048226,protein_coding -45677,Fbxo43,ENSMUSG00000048230,protein_coding -50013,H2-M10.4,ENSMUSG00000048231,protein_coding -13365,Fbxo10,ENSMUSG00000048232,protein_coding -595,Rnf149,ENSMUSG00000048234,protein_coding -25715,Ovch2,ENSMUSG00000048236,protein_coding -28213,Gng7,ENSMUSG00000048240,protein_coding -49438,Crebrf,ENSMUSG00000048249,protein_coding -44705,Bcl11b,ENSMUSG00000048251,protein_coding -40422,Dip2c,ENSMUSG00000048264,protein_coding -16099,Rbm33,ENSMUSG00000048271,protein_coding -40138,Syngr2,ENSMUSG00000048277,protein_coding -33635,Sacs,ENSMUSG00000048279,protein_coding -41982,Zfp738,ENSMUSG00000048280,protein_coding -33669,Dleu7,ENSMUSG00000048281,protein_coding -19537,Tas2r126,ENSMUSG00000048284,protein_coding -43898,Frmd6,ENSMUSG00000048285,protein_coding -54525,Olfr1417,ENSMUSG00000048292,protein_coding -39572,Krtap4-13,ENSMUSG00000048294,protein_coding -35220,Olfr148,ENSMUSG00000048299,protein_coding -10880,Slitrk3,ENSMUSG00000048304,protein_coding -45691,Ankrd46,ENSMUSG00000048307,protein_coding -31391,Pskh1,ENSMUSG00000048310,protein_coding -23725,Gm4884,ENSMUSG00000048312,protein_coding -6073,Ckap2l,ENSMUSG00000048327,protein_coding -38507,Mfsd6l,ENSMUSG00000048329,protein_coding -25790,Ric3,ENSMUSG00000048330,protein_coding -10546,Lhfp,ENSMUSG00000048332,protein_coding -32577,Npy4r,ENSMUSG00000048337,protein_coding -53192,Pcdhb18,ENSMUSG00000048347,protein_coding -34236,Pou4f1,ENSMUSG00000048349,protein_coding -14245,Coa7,ENSMUSG00000048351,protein_coding -9284,Arxes1,ENSMUSG00000048355,protein_coding -54644,Olfr1496,ENSMUSG00000048356,protein_coding -41400,Omd,ENSMUSG00000048368,protein_coding -31309,Pdp2,ENSMUSG00000048371,protein_coding -16449,Fgfbp1,ENSMUSG00000048373,protein_coding -42384,F2r,ENSMUSG00000048376,protein_coding -26417,Foxi2,ENSMUSG00000048377,protein_coding -37969,Olfr1394,ENSMUSG00000048378,protein_coding -32952,Socs4,ENSMUSG00000048379,protein_coding -46258,Scrt1,ENSMUSG00000048385,protein_coding -42920,Osr1,ENSMUSG00000048387,protein_coding -4978,Fam171b,ENSMUSG00000048388,protein_coding -34680,Olfr843,ENSMUSG00000048391,protein_coding -22171,Gm5065,ENSMUSG00000048398,protein_coding -47702,Tprg,ENSMUSG00000048399,protein_coding -31210,Prss54,ENSMUSG00000048400,protein_coding -1925,Gli2,ENSMUSG00000048402,protein_coding -53988,Zfp407,ENSMUSG00000048410,protein_coding -374,Gm597,ENSMUSG00000048411,protein_coding -10803,Mlf1,ENSMUSG00000048416,protein_coding -45378,Ranbp3l,ENSMUSG00000048424,protein_coding -25616,Olfr686,ENSMUSG00000048425,protein_coding -34798,Timm29,ENSMUSG00000048429,protein_coding -15997,Nupl2,ENSMUSG00000048439,protein_coding -49688,Cyp4f16,ENSMUSG00000048440,protein_coding -40053,Smim5,ENSMUSG00000048442,protein_coding -40265,Ccdc57,ENSMUSG00000048445,protein_coding -16368,Msx1,ENSMUSG00000048450,protein_coding -11298,Sprr1b,ENSMUSG00000048455,protein_coding -54590,Olfr1448,ENSMUSG00000048456,protein_coding -11825,Inka2,ENSMUSG00000048458,protein_coding -25426,Olfr564,ENSMUSG00000048469,protein_coding -21877,Sult6b2,ENSMUSG00000048473,protein_coding -31744,Spata33,ENSMUSG00000048478,protein_coding -1192,Cxcr1,ENSMUSG00000048480,protein_coding -22773,Mypop,ENSMUSG00000048481,protein_coding -5640,Bdnf,ENSMUSG00000048482,protein_coding -44255,Zdhhc22,ENSMUSG00000048483,protein_coding -14778,Zbtb8b,ENSMUSG00000048485,protein_coding -6774,Fitm2,ENSMUSG00000048486,protein_coding -21147,Depp1,ENSMUSG00000048489,protein_coding -48377,Nrip1,ENSMUSG00000048490,protein_coding -866,Tyw5,ENSMUSG00000048495,protein_coding -38419,Mmgt2,ENSMUSG00000048497,protein_coding -40006,Cd300e,ENSMUSG00000048498,protein_coding -29827,Defb15,ENSMUSG00000048500,protein_coding -35191,Olfr938,ENSMUSG00000048501,protein_coding -32386,Duxbl1,ENSMUSG00000048502,protein_coding -35328,Tmem136,ENSMUSG00000048503,protein_coding -30191,Adam26a,ENSMUSG00000048516,protein_coding -15929,Fbxl13,ENSMUSG00000048520,protein_coding -37277,Cxcr6,ENSMUSG00000048521,protein_coding -26374,Nkx1-2,ENSMUSG00000048528,protein_coding -35436,Jaml,ENSMUSG00000048534,protein_coding -35411,Phldb1,ENSMUSG00000048537,protein_coding -11706,Nhlh2,ENSMUSG00000048540,protein_coding -46495,Tob2,ENSMUSG00000048546,protein_coding -3852,Thnsl1,ENSMUSG00000048550,protein_coding -45277,Sp8,ENSMUSG00000048562,protein_coding -54780,Tmem252,ENSMUSG00000048572,protein_coding -9628,Cypt3,ENSMUSG00000048573,protein_coding -17821,Mlec,ENSMUSG00000048578,protein_coding -10731,E130311K13Rik,ENSMUSG00000048581,protein_coding -33559,Gja3,ENSMUSG00000048582,protein_coding -26654,Igf2,ENSMUSG00000048583,protein_coding -49721,Morc2b,ENSMUSG00000048602,protein_coding -55004,Myof,ENSMUSG00000048612,protein_coding -39256,Nog,ENSMUSG00000048616,protein_coding -30955,Rtbdn,ENSMUSG00000048617,protein_coding -22215,Olfr1336,ENSMUSG00000048620,protein_coding -8985,Gm6377,ENSMUSG00000048621,protein_coding -14453,Klf17,ENSMUSG00000048626,protein_coding -20815,A730049H05Rik,ENSMUSG00000048636,protein_coding -29426,Ctxn1,ENSMUSG00000048644,protein_coding -5843,Exd1,ENSMUSG00000048647,protein_coding -12591,Samd13,ENSMUSG00000048652,protein_coding -10590,Ccdc169,ENSMUSG00000048655,protein_coding -29043,Lemd3,ENSMUSG00000048661,protein_coding -21483,Rhno1,ENSMUSG00000048668,protein_coding -26731,Tpcn2,ENSMUSG00000048677,protein_coding -14747,Hmgb4,ENSMUSG00000048686,protein_coding -19575,Olfr435,ENSMUSG00000048693,protein_coding -28173,Mex3d,ENSMUSG00000048696,protein_coding -19911,Vmn1r26,ENSMUSG00000048697,protein_coding -47020,Krt90,ENSMUSG00000048699,protein_coding -27852,Ccdc6,ENSMUSG00000048701,protein_coding -16084,Speer4b,ENSMUSG00000048703,protein_coding -13819,Lurap1l,ENSMUSG00000048706,protein_coding -3948,Tprn,ENSMUSG00000048707,protein_coding -55024,Tbc1d12,ENSMUSG00000048720,protein_coding -37914,Fndc9,ENSMUSG00000048721,protein_coding -38030,Zfp454,ENSMUSG00000048728,protein_coding -49455,Ggnbp1,ENSMUSG00000048731,protein_coding -39627,Klhl11,ENSMUSG00000048732,protein_coding -29341,Olfr820,ENSMUSG00000048745,protein_coding -14052,E130114P18Rik,ENSMUSG00000048747,protein_coding -36973,Prss50,ENSMUSG00000048752,protein_coding -46593,Mcat,ENSMUSG00000048755,protein_coding -27391,Foxo3,ENSMUSG00000048756,protein_coding -36741,Rpl29,ENSMUSG00000048758,protein_coding -39389,Hoxb3,ENSMUSG00000048763,protein_coding -17082,Tmprss11f,ENSMUSG00000048764,protein_coding -14319,Skint10,ENSMUSG00000048766,protein_coding -14441,Tmem53,ENSMUSG00000048772,protein_coding -1827,Serpinb13,ENSMUSG00000048775,protein_coding -21960,Pthlh,ENSMUSG00000048776,protein_coding -25339,P2ry6,ENSMUSG00000048779,protein_coding -25895,Insc,ENSMUSG00000048782,protein_coding -25986,Dcun1d3,ENSMUSG00000048787,protein_coding -20756,Cfap100,ENSMUSG00000048794,protein_coding -11919,Cyb561d1,ENSMUSG00000048796,protein_coding -53460,Cep120,ENSMUSG00000048799,protein_coding -13897,Ifnb1,ENSMUSG00000048806,protein_coding -37336,Slc35e4,ENSMUSG00000048807,protein_coding -48243,Olfr202,ENSMUSG00000048810,protein_coding -574,Lonrf2,ENSMUSG00000048814,protein_coding -48970,Dact2,ENSMUSG00000048826,protein_coding -31470,Pkd1l3,ENSMUSG00000048827,protein_coding -48523,2310057N15Rik,ENSMUSG00000048830,protein_coding -54469,Vps37c,ENSMUSG00000048832,protein_coding -44086,Slc39a9,ENSMUSG00000048833,protein_coding -37513,Vstm2a,ENSMUSG00000048834,protein_coding -37956,Gm12185,ENSMUSG00000048852,protein_coding -44725,Slc25a47,ENSMUSG00000048856,protein_coding -2917,Arhgap30,ENSMUSG00000048865,protein_coding -389,Phf3,ENSMUSG00000048874,protein_coding -53433,Pudp,ENSMUSG00000048875,protein_coding -39774,Hexim1,ENSMUSG00000048878,protein_coding -39014,Cdk5r1,ENSMUSG00000048895,protein_coding -24855,Zfp710,ENSMUSG00000048897,protein_coding -14531,Rimkla,ENSMUSG00000048899,protein_coding -41570,Neurog1,ENSMUSG00000048904,protein_coding -49547,4930539E08Rik,ENSMUSG00000048905,protein_coding -6148,Rnf24,ENSMUSG00000048911,protein_coding -50606,Efna5,ENSMUSG00000048915,protein_coding -38297,Olfr223,ENSMUSG00000048919,protein_coding -22685,Fkrp,ENSMUSG00000048920,protein_coding -26214,Zfp689,ENSMUSG00000048921,protein_coding -33793,Cdca2,ENSMUSG00000048922,protein_coding -42509,Ccdc125,ENSMUSG00000048924,protein_coding -21039,Tada3,ENSMUSG00000048930,protein_coding -33028,Olfr722,ENSMUSG00000048933,protein_coding -11732,Nr1h5,ENSMUSG00000048938,protein_coding -47729,Atp13a5,ENSMUSG00000048939,protein_coding -136,Prex2,ENSMUSG00000048960,protein_coding -26701,Mrgpre,ENSMUSG00000048965,protein_coding -30570,Yjefn3,ENSMUSG00000048967,protein_coding -7846,C1galt1c1,ENSMUSG00000048970,protein_coding -40916,Nrsn1,ENSMUSG00000048978,protein_coding -39597,Krt31,ENSMUSG00000048981,protein_coding -43978,Gphb5,ENSMUSG00000048982,protein_coding -18562,Elfn1,ENSMUSG00000048988,protein_coding -49247,Prss32,ENSMUSG00000048992,protein_coding -7446,H2al3,ENSMUSG00000048994,protein_coding -40677,Olfr1366,ENSMUSG00000048996,protein_coding -11920,Atxn7l2,ENSMUSG00000048997,protein_coding -20007,Ndnf,ENSMUSG00000049001,protein_coding -29656,BB014433,ENSMUSG00000049008,protein_coding -35249,Olfr982,ENSMUSG00000049010,protein_coding -33037,Olfr729,ENSMUSG00000049011,protein_coding -11038,Prss48,ENSMUSG00000049013,protein_coding -54610,Olfr1467,ENSMUSG00000049015,protein_coding -4386,Olfr368,ENSMUSG00000049018,protein_coding -34725,Olfr873,ENSMUSG00000049028,protein_coding -44965,Tmem121,ENSMUSG00000049036,protein_coding -21287,Clec4a1,ENSMUSG00000049037,protein_coding -28363,Mterf2,ENSMUSG00000049038,protein_coding -38785,Olfr398,ENSMUSG00000049041,protein_coding -4790,Rapgef4,ENSMUSG00000049044,protein_coding -9241,Armcx3,ENSMUSG00000049047,protein_coding -29330,Olfr812,ENSMUSG00000049052,protein_coding -5337,Olfr1257,ENSMUSG00000049057,protein_coding -35252,Olfr985,ENSMUSG00000049073,protein_coding -47764,Acap2,ENSMUSG00000049076,protein_coding -3992,Bmyc,ENSMUSG00000049086,protein_coding -53985,Zadh2,ENSMUSG00000049090,protein_coding -26200,Sephs2,ENSMUSG00000049091,protein_coding -32880,Gpr137c,ENSMUSG00000049092,protein_coding -20058,Il23r,ENSMUSG00000049093,protein_coding -11571,Ankrd34a,ENSMUSG00000049097,protein_coding -35140,Olfr147,ENSMUSG00000049098,protein_coding -10432,Pcdh10,ENSMUSG00000049100,protein_coding -37283,Ccr2,ENSMUSG00000049103,protein_coding -44071,Dcaf5,ENSMUSG00000049106,protein_coding -21416,Ntf3,ENSMUSG00000049107,protein_coding -27158,Themis,ENSMUSG00000049109,protein_coding -21011,Oxtr,ENSMUSG00000049112,protein_coding -40988,Agtr1a,ENSMUSG00000049115,protein_coding -12800,Fam110b,ENSMUSG00000049119,protein_coding -13779,Frmd3,ENSMUSG00000049122,protein_coding -23310,Catsperg2,ENSMUSG00000049123,protein_coding -11304,Ivl,ENSMUSG00000049128,protein_coding -22658,C5ar1,ENSMUSG00000049130,protein_coding -11343,Flg2,ENSMUSG00000049133,protein_coding -55350,Nrap,ENSMUSG00000049134,protein_coding -45312,Plcxd3,ENSMUSG00000049148,protein_coding -5338,Olfr1258,ENSMUSG00000049149,protein_coding -45384,Ugt3a2,ENSMUSG00000049152,protein_coding -38252,Fam183b,ENSMUSG00000049154,protein_coding -2679,Tex50,ENSMUSG00000049160,protein_coding -55064,Zfp518a,ENSMUSG00000049164,protein_coding -19558,Olfr449,ENSMUSG00000049168,protein_coding -53553,Myoz3,ENSMUSG00000049173,protein_coding -9845,Frmpd4,ENSMUSG00000049176,protein_coding -30035,Purg,ENSMUSG00000049184,protein_coding -8796,Rtl5,ENSMUSG00000049191,protein_coding -14305,Skint7,ENSMUSG00000049214,protein_coding -29303,Olfr788,ENSMUSG00000049217,protein_coding -12880,Pdp1,ENSMUSG00000049225,protein_coding -34681,Olfr844,ENSMUSG00000049229,protein_coding -9956,Gm9833,ENSMUSG00000049230,protein_coding -19947,Tigd2,ENSMUSG00000049232,protein_coding -32783,Gm7324,ENSMUSG00000049235,protein_coding -18107,Hcar1,ENSMUSG00000049241,protein_coding -4460,Lrp1b,ENSMUSG00000049252,protein_coding -16146,Kcnk3,ENSMUSG00000049265,protein_coding -25773,Olfr509,ENSMUSG00000049280,protein_coding -35256,Scn3b,ENSMUSG00000049281,protein_coding -18500,Mblac1,ENSMUSG00000049285,protein_coding -38274,Iba57,ENSMUSG00000049287,protein_coding -11572,Lix1l,ENSMUSG00000049288,protein_coding -38285,Prss38,ENSMUSG00000049291,protein_coding -33139,Zfp219,ENSMUSG00000049295,protein_coding -38541,Trappc1,ENSMUSG00000049299,protein_coding -11971,Prmt6,ENSMUSG00000049300,protein_coding -54139,Syt12,ENSMUSG00000049303,protein_coding -36858,Ccdc71,ENSMUSG00000049305,protein_coding -34593,Fut4,ENSMUSG00000049307,protein_coding -35310,Sorl1,ENSMUSG00000049313,protein_coding -36906,Fbxw13,ENSMUSG00000049314,protein_coding -4358,Olfr348,ENSMUSG00000049315,protein_coding -38031,Zfp2,ENSMUSG00000049321,protein_coding -38356,Smcr8,ENSMUSG00000049323,protein_coding -18134,Kmt5a,ENSMUSG00000049327,protein_coding -35142,Olfr902,ENSMUSG00000049334,protein_coding -37821,Tenm2,ENSMUSG00000049336,protein_coding -1237,Retreg2,ENSMUSG00000049339,protein_coding -12438,Gm5105,ENSMUSG00000049349,protein_coding -26184,Zg16,ENSMUSG00000049350,protein_coding -3377,Rd3,ENSMUSG00000049353,protein_coding -39844,Dcaf7,ENSMUSG00000049354,protein_coding -53057,4933408B17Rik,ENSMUSG00000049357,protein_coding -48217,Olfr173,ENSMUSG00000049362,protein_coding -5260,Olfr1183,ENSMUSG00000049372,protein_coding -47051,Krt8,ENSMUSG00000049382,protein_coding -16876,Cox7b2,ENSMUSG00000049387,protein_coding -38870,Gemin4,ENSMUSG00000049396,protein_coding -7185,Ogfr,ENSMUSG00000049401,protein_coding -10807,Rarres1,ENSMUSG00000049404,protein_coding -20650,Prokr1,ENSMUSG00000049409,protein_coding -14942,Zfp683,ENSMUSG00000049410,protein_coding -52805,Tmem241,ENSMUSG00000049411,protein_coding -27126,Tmem200a,ENSMUSG00000049420,protein_coding -23335,Zfp260,ENSMUSG00000049421,protein_coding -27952,Chchd10,ENSMUSG00000049422,protein_coding -47987,Upk1b,ENSMUSG00000049436,protein_coding -945,Cyp20a1,ENSMUSG00000049439,protein_coding -2995,Olfr1404,ENSMUSG00000049456,protein_coding -38097,Aff4,ENSMUSG00000049470,protein_coding -31708,Ctu2,ENSMUSG00000049482,protein_coding -12885,Tmem67,ENSMUSG00000049488,protein_coding -38967,Ccnq,ENSMUSG00000049489,protein_coding -38165,Slc36a3,ENSMUSG00000049491,protein_coding -36485,Pls1,ENSMUSG00000049493,protein_coding -54591,Olfr1449,ENSMUSG00000049498,protein_coding -47885,Dtx3l,ENSMUSG00000049502,protein_coding -10557,Proser1,ENSMUSG00000049504,protein_coding -39802,Sppl2c,ENSMUSG00000049506,protein_coding -36237,Htr1b,ENSMUSG00000049511,protein_coding -1606,Espnl,ENSMUSG00000049515,protein_coding -24100,Spty2d1,ENSMUSG00000049516,protein_coding -42294,Rps23,ENSMUSG00000049517,protein_coding -46363,Cdc42ep1,ENSMUSG00000049521,protein_coding -35754,Tmem202,ENSMUSG00000049526,protein_coding -3026,Olfr429,ENSMUSG00000049528,protein_coding -16486,Clrn2,ENSMUSG00000049530,protein_coding -33158,Sall2,ENSMUSG00000049532,protein_coding -9319,Tceal1,ENSMUSG00000049536,protein_coding -17056,Tecrl,ENSMUSG00000049537,protein_coding -42050,Adamts16,ENSMUSG00000049538,protein_coding -40876,Hist1h1a,ENSMUSG00000049539,protein_coding -47019,Krt82,ENSMUSG00000049548,protein_coding -18099,Clip1,ENSMUSG00000049550,protein_coding -18357,Fzd9,ENSMUSG00000049551,protein_coding -20326,Polr1a,ENSMUSG00000049553,protein_coding -36974,Tmie,ENSMUSG00000049555,protein_coding -35675,Lingo1,ENSMUSG00000049556,protein_coding -6480,Defb20,ENSMUSG00000049560,protein_coding -50064,Olfr95,ENSMUSG00000049561,protein_coding -54217,Ap5b1,ENSMUSG00000049562,protein_coding -11951,Aknad1,ENSMUSG00000049565,protein_coding -26478,Cfap46,ENSMUSG00000049571,protein_coding -29295,Olfr780,ENSMUSG00000049573,protein_coding -31696,Zfpm1,ENSMUSG00000049577,protein_coding -25245,Tsku,ENSMUSG00000049580,protein_coding -25080,Grm5,ENSMUSG00000049583,protein_coding -38161,Ccdc69,ENSMUSG00000049588,protein_coding -11319,Lce1h,ENSMUSG00000049593,protein_coding -2975,Vsig8,ENSMUSG00000049598,protein_coding -22504,Zbtb45,ENSMUSG00000049600,protein_coding -39375,Hoxb13,ENSMUSG00000049604,protein_coding -2997,Olfr418,ENSMUSG00000049605,protein_coding -17531,Zfp644,ENSMUSG00000049606,protein_coding -1444,Gpr55,ENSMUSG00000049608,protein_coding -38983,Omg,ENSMUSG00000049612,protein_coding -50077,Olfr103,ENSMUSG00000049618,protein_coding -49245,Prss33,ENSMUSG00000049620,protein_coding -36192,Slc17a5,ENSMUSG00000049624,protein_coding -41567,Tifab,ENSMUSG00000049625,protein_coding -3723,C1ql3,ENSMUSG00000049630,protein_coding -27557,Vgll2,ENSMUSG00000049641,protein_coding -23206,2310022A10Rik,ENSMUSG00000049643,protein_coding -13486,Olfr273,ENSMUSG00000049648,protein_coding -14907,Gpr3,ENSMUSG00000049649,protein_coding -46239,Spatc1,ENSMUSG00000049653,protein_coding -13362,Zbtb5,ENSMUSG00000049657,protein_coding -42486,Bdp1,ENSMUSG00000049658,protein_coding -37579,Aftph,ENSMUSG00000049659,protein_coding -55090,Morn4,ENSMUSG00000049670,protein_coding -50697,Zbtb14,ENSMUSG00000049672,protein_coding -25695,Olfr714,ENSMUSG00000049674,protein_coding -23295,Catsperg1,ENSMUSG00000049676,protein_coding -37397,Urgcp,ENSMUSG00000049680,protein_coding -23166,Cyp2g1,ENSMUSG00000049685,protein_coding -18061,Orai1,ENSMUSG00000049686,protein_coding -46525,Pheta2,ENSMUSG00000049687,protein_coding -2018,Nckap5,ENSMUSG00000049690,protein_coding -16411,Nkx3-2,ENSMUSG00000049691,protein_coding -6110,Tmem239,ENSMUSG00000049692,protein_coding -20751,BC048671,ENSMUSG00000049694,protein_coding -36895,Ucn2,ENSMUSG00000049699,protein_coding -35195,Olfr27,ENSMUSG00000049708,protein_coding -25786,Nlrp10,ENSMUSG00000049709,protein_coding -29521,Lig4,ENSMUSG00000049717,protein_coding -36972,Prss46,ENSMUSG00000049719,protein_coding -37341,Gal3st1,ENSMUSG00000049721,protein_coding -34491,Mmp12,ENSMUSG00000049723,protein_coding -26248,Zfp668,ENSMUSG00000049728,protein_coding -36900,Trex1,ENSMUSG00000049734,protein_coding -40682,Olfr1361,ENSMUSG00000049737,protein_coding -26249,Zfp646,ENSMUSG00000049739,protein_coding -11135,Rxfp4,ENSMUSG00000049741,protein_coding -4471,Arhgap15,ENSMUSG00000049744,protein_coding -43843,Rpl36al,ENSMUSG00000049751,protein_coding -38217,Zfp672,ENSMUSG00000049755,protein_coding -5714,Olfr1318,ENSMUSG00000049758,protein_coding -50527,Micos13,ENSMUSG00000049760,protein_coding -23392,Pmis2,ENSMUSG00000049761,protein_coding -27960,Zfp280b,ENSMUSG00000049764,protein_coding -9838,Tmsb4x,ENSMUSG00000049775,protein_coding -25103,Fzd4,ENSMUSG00000049791,protein_coding -44905,Bag5,ENSMUSG00000049792,protein_coding -10068,Crh,ENSMUSG00000049796,protein_coding -25531,Olfr642,ENSMUSG00000049797,protein_coding -14010,Lrrc19,ENSMUSG00000049799,protein_coding -37577,Sertad2,ENSMUSG00000049800,protein_coding -9236,Armcx4,ENSMUSG00000049804,protein_coding -41829,Olfr466,ENSMUSG00000049806,protein_coding -39435,Arhgap23,ENSMUSG00000049807,protein_coding -39547,Krtap9-3,ENSMUSG00000049809,protein_coding -37630,Fam161a,ENSMUSG00000049811,protein_coding -9167,4921511C20Rik,ENSMUSG00000049815,protein_coding -49853,Zbtb12,ENSMUSG00000049823,protein_coding -5155,Olfr1102,ENSMUSG00000049843,protein_coding -22843,Ceacam19,ENSMUSG00000049848,protein_coding -29253,Suox,ENSMUSG00000049858,protein_coding -35138,Olfr250,ENSMUSG00000049864,protein_coding -1557,Arl4c,ENSMUSG00000049866,protein_coding -47183,Nlrc3,ENSMUSG00000049871,protein_coding -27262,Calhm5,ENSMUSG00000049872,protein_coding -14576,Rlf,ENSMUSG00000049878,protein_coding -43863,Vcpkmt,ENSMUSG00000049882,protein_coding -38313,Rasd1,ENSMUSG00000049892,protein_coding -29326,Olfr808,ENSMUSG00000049894,protein_coding -4051,Stkld1,ENSMUSG00000049897,protein_coding -41639,4921517D22Rik,ENSMUSG00000049902,protein_coding -37613,Tmem17,ENSMUSG00000049904,protein_coding -16944,Rasl11b,ENSMUSG00000049907,protein_coding -11592,Gja8,ENSMUSG00000049908,protein_coding -47423,2610318N02Rik,ENSMUSG00000049916,protein_coding -5431,Slc35c1,ENSMUSG00000049922,protein_coding -35167,Olfr921,ENSMUSG00000049926,protein_coding -38490,Glp2r,ENSMUSG00000049928,protein_coding -8955,Lpar4,ENSMUSG00000049929,protein_coding -35376,H2afx,ENSMUSG00000049932,protein_coding -19188,Lrrc4,ENSMUSG00000049939,protein_coding -10395,Pgrmc2,ENSMUSG00000049940,protein_coding -30406,BC030500,ENSMUSG00000049946,protein_coding -3452,Rpp38,ENSMUSG00000049950,protein_coding -40225,Ccdc137,ENSMUSG00000049957,protein_coding -32287,Mrps16,ENSMUSG00000049960,protein_coding -12855,Plekhf2,ENSMUSG00000049969,protein_coding -18193,Glt1d1,ENSMUSG00000049971,protein_coding -14312,Skint9,ENSMUSG00000049972,protein_coding -42659,Ankrd55,ENSMUSG00000049985,protein_coding -30610,Lrrc25,ENSMUSG00000049988,protein_coding -7150,Ppp1r3d,ENSMUSG00000049999,protein_coding -41598,Idnk,ENSMUSG00000050002,protein_coding -16830,Shisa3,ENSMUSG00000050010,protein_coding -46313,Apol10b,ENSMUSG00000050014,protein_coding -4363,Olfr350,ENSMUSG00000050015,protein_coding -17698,Pitpnb,ENSMUSG00000050017,protein_coding -18587,Amz1,ENSMUSG00000050022,protein_coding -5253,Olfr1176,ENSMUSG00000050023,protein_coding -33055,Olfr745,ENSMUSG00000050028,protein_coding -7971,Rap2c,ENSMUSG00000050029,protein_coding -33036,Olfr728,ENSMUSG00000050030,protein_coding -28361,Fhl4,ENSMUSG00000050035,protein_coding -4998,Tmx2,ENSMUSG00000050043,protein_coding -17254,Ccdc158,ENSMUSG00000050050,protein_coding -29640,Tdrp,ENSMUSG00000050052,protein_coding -47285,Prm3,ENSMUSG00000050058,protein_coding -23847,Klk6,ENSMUSG00000050063,protein_coding -11623,Zfp697,ENSMUSG00000050064,protein_coding -3045,Grem2,ENSMUSG00000050069,protein_coding -9292,Bex1,ENSMUSG00000050071,protein_coding -36929,Spink8,ENSMUSG00000050074,protein_coding -10665,Gpr171,ENSMUSG00000050075,protein_coding -31170,Rspry1,ENSMUSG00000050079,protein_coding -25492,Olfr622,ENSMUSG00000050085,protein_coding -38016,Cby3,ENSMUSG00000050087,protein_coding -48982,1600012H06Rik,ENSMUSG00000050088,protein_coding -7325,Akap4,ENSMUSG00000050089,protein_coding -11286,Sprr2b,ENSMUSG00000050092,protein_coding -31323,Ces2b,ENSMUSG00000050097,protein_coding -26354,Hmx2,ENSMUSG00000050100,protein_coding -49133,Vmn1r235,ENSMUSG00000050102,protein_coding -43442,Agmo,ENSMUSG00000050103,protein_coding -14956,Grrp1,ENSMUSG00000050105,protein_coding -40133,Tmc8,ENSMUSG00000050106,protein_coding -38736,Haspin,ENSMUSG00000050107,protein_coding -28390,Bpifc,ENSMUSG00000050108,protein_coding -4938,Prdx6b,ENSMUSG00000050114,protein_coding -55067,Opalin,ENSMUSG00000050121,protein_coding -537,Vwa3b,ENSMUSG00000050122,protein_coding -5069,Olfr1023,ENSMUSG00000050128,protein_coding -38953,Sarm1,ENSMUSG00000050132,protein_coding -3025,Olfr430,ENSMUSG00000050134,protein_coding -51002,Kcnk12,ENSMUSG00000050138,protein_coding -13282,Fam205c,ENSMUSG00000050141,protein_coding -11119,Slc25a44,ENSMUSG00000050144,protein_coding -30726,F2rl3,ENSMUSG00000050147,protein_coding -9626,Ubqln2,ENSMUSG00000050148,protein_coding -12384,Slc9b1,ENSMUSG00000050150,protein_coding -27953,Gm867,ENSMUSG00000050157,protein_coding -47537,Olfr165,ENSMUSG00000050158,protein_coding -46472,Mchr1,ENSMUSG00000050164,protein_coding -10330,Nudt6,ENSMUSG00000050174,protein_coding -14688,Lsm10,ENSMUSG00000050188,protein_coding -30192,Gm5346,ENSMUSG00000050190,protein_coding -10153,Eif5a2,ENSMUSG00000050192,protein_coding -55152,Scd4,ENSMUSG00000050195,protein_coding -7833,Rhox13,ENSMUSG00000050197,protein_coding -29281,Olfr768,ENSMUSG00000050198,protein_coding -5644,Lgr4,ENSMUSG00000050199,protein_coding -40016,Otop2,ENSMUSG00000050201,protein_coding -5869,Pla2g4e,ENSMUSG00000050211,protein_coding -14691,Eva1b,ENSMUSG00000050212,protein_coding -14675,Snip1,ENSMUSG00000050213,protein_coding -13324,Olfr70,ENSMUSG00000050215,protein_coding -401,Lgsn,ENSMUSG00000050217,protein_coding -33572,Il17d,ENSMUSG00000050222,protein_coding -48521,Krtap13,ENSMUSG00000050224,protein_coding -2966,Pigm,ENSMUSG00000050229,protein_coding -8786,Cxcr3,ENSMUSG00000050232,protein_coding -14735,Gja4,ENSMUSG00000050234,protein_coding -48513,Krtap24-1,ENSMUSG00000050239,protein_coding -47433,Hic2,ENSMUSG00000050240,protein_coding -21556,Klre1,ENSMUSG00000050241,protein_coding -40465,Heatr1,ENSMUSG00000050244,protein_coding -16363,Evc2,ENSMUSG00000050248,protein_coding -29327,Olfr809,ENSMUSG00000050251,protein_coding -25621,Olfr690,ENSMUSG00000050266,protein_coding -38474,Tmem220,ENSMUSG00000050270,protein_coding -30102,Prag1,ENSMUSG00000050271,protein_coding -48732,Dscam,ENSMUSG00000050272,protein_coding -26699,Mrgprg,ENSMUSG00000050276,protein_coding -25501,Olfr630,ENSMUSG00000050281,protein_coding -39748,Fzd2,ENSMUSG00000050288,protein_coding -41018,Foxc1,ENSMUSG00000050295,protein_coding -1140,Abca12,ENSMUSG00000050296,protein_coding -53198,Slc25a2,ENSMUSG00000050304,protein_coding -45349,Rictor,ENSMUSG00000050310,protein_coding -48273,Nsun3,ENSMUSG00000050312,protein_coding -12161,Synpo2,ENSMUSG00000050315,protein_coding -54014,Neto1,ENSMUSG00000050321,protein_coding -12856,Ndufaf6,ENSMUSG00000050323,protein_coding -47096,Hoxc12,ENSMUSG00000050328,protein_coding -8689,Amer1,ENSMUSG00000050332,protein_coding -32957,Lgals3,ENSMUSG00000050335,protein_coding -38036,Olfr1378,ENSMUSG00000050343,protein_coding -41674,4930486L24Rik,ENSMUSG00000050345,protein_coding -34398,Gpr18,ENSMUSG00000050350,protein_coding -31377,Carmil2,ENSMUSG00000050357,protein_coding -11301,Sprr1a,ENSMUSG00000050359,protein_coding -26506,Olfr524,ENSMUSG00000050366,protein_coding -4849,Hoxd10,ENSMUSG00000050368,protein_coding -54934,Ch25h,ENSMUSG00000050370,protein_coding -6841,Snx21,ENSMUSG00000050373,protein_coding -42666,Il31ra,ENSMUSG00000050377,protein_coding -7774,Sept6,ENSMUSG00000050379,protein_coding -24838,Kif7,ENSMUSG00000050382,protein_coding -6802,Svs3b,ENSMUSG00000050383,protein_coding -14769,C77080,ENSMUSG00000050390,protein_coding -9240,Armcx6,ENSMUSG00000050394,protein_coding -13707,Tnfsf15,ENSMUSG00000050395,protein_coding -36560,Foxl2,ENSMUSG00000050397,protein_coding -49924,Tcf19,ENSMUSG00000050410,protein_coding -41127,Ppp1r3g,ENSMUSG00000050423,protein_coding -8310,Pnma5,ENSMUSG00000050424,protein_coding -24154,Mrgprb2,ENSMUSG00000050425,protein_coding -22784,Fbxo46,ENSMUSG00000050428,protein_coding -9040,Ube2dnl1,ENSMUSG00000050435,protein_coding -46452,Enthd1,ENSMUSG00000050439,protein_coding -23414,Hamp,ENSMUSG00000050440,protein_coding -37200,Cyp8b1,ENSMUSG00000050445,protein_coding -4515,Lypd6,ENSMUSG00000050447,protein_coding -47050,Krt78,ENSMUSG00000050463,protein_coding -6014,Astl,ENSMUSG00000050468,protein_coding -35018,Fam118b,ENSMUSG00000050471,protein_coding -26989,Slc35d3,ENSMUSG00000050473,protein_coding -29325,Olfr807,ENSMUSG00000050478,protein_coding -14788,Fam167b,ENSMUSG00000050493,protein_coding -35915,Fbxl22,ENSMUSG00000050503,protein_coding -7961,Olfr1323,ENSMUSG00000050504,protein_coding -34096,Pcdh20,ENSMUSG00000050505,protein_coding -14854,Oprd1,ENSMUSG00000050511,protein_coding -48511,Cldn8,ENSMUSG00000050520,protein_coding -2208,4933406M09Rik,ENSMUSG00000050526,protein_coding -3449,Fam171a1,ENSMUSG00000050530,protein_coding -1977,Htr5b,ENSMUSG00000050534,protein_coding -39458,B230217C12Rik,ENSMUSG00000050538,protein_coding -37877,Adra1b,ENSMUSG00000050541,protein_coding -42860,Fam228b,ENSMUSG00000050545,protein_coding -12253,Fam241a,ENSMUSG00000050549,protein_coding -18505,Lamtor4,ENSMUSG00000050552,protein_coding -17354,Gk2,ENSMUSG00000050553,protein_coding -35030,Hyls1,ENSMUSG00000050555,protein_coding -6908,Kcnb1,ENSMUSG00000050556,protein_coding -6174,Prokr2,ENSMUSG00000050558,protein_coding -2562,Tor1aip2,ENSMUSG00000050565,protein_coding -38013,Maml1,ENSMUSG00000050567,protein_coding -34490,Mmp13,ENSMUSG00000050578,protein_coding -5483,Lrrc4c,ENSMUSG00000050587,protein_coding -4204,Fam78a,ENSMUSG00000050592,protein_coding -7056,Zfp831,ENSMUSG00000050600,protein_coding -5052,Olfr1008,ENSMUSG00000050603,protein_coding -23045,Zfp61,ENSMUSG00000050605,protein_coding -15106,Micos10,ENSMUSG00000050608,protein_coding -50638,Txndc2,ENSMUSG00000050612,protein_coding -50108,Olfr125,ENSMUSG00000050613,protein_coding -5899,Zscan29,ENSMUSG00000050619,protein_coding -37047,Rps27rt,ENSMUSG00000050621,protein_coding -54309,Catsperz,ENSMUSG00000050623,protein_coding -3185,Ccdc121,ENSMUSG00000050625,protein_coding -37045,Gpd1l,ENSMUSG00000050627,protein_coding -40083,Ubald2,ENSMUSG00000050628,protein_coding -11291,Sprr2f,ENSMUSG00000050635,protein_coding -17394,Tmem150c,ENSMUSG00000050640,protein_coding -36856,BC048562,ENSMUSG00000050641,protein_coding -6488,Defb19,ENSMUSG00000050645,protein_coding -24112,Mrgpra1,ENSMUSG00000050650,protein_coding -21141,Olfr215,ENSMUSG00000050654,protein_coding -28892,Trhde,ENSMUSG00000050663,protein_coding -32546,Vstm4,ENSMUSG00000050666,protein_coding -50839,Gpatch11,ENSMUSG00000050668,protein_coding -44270,Ism2,ENSMUSG00000050671,protein_coding -38652,Gp1ba,ENSMUSG00000050675,protein_coding -16339,Ccdc96,ENSMUSG00000050677,protein_coding -48121,Ccdc54,ENSMUSG00000050685,protein_coding -45325,Prkaa1,ENSMUSG00000050697,protein_coding -6736,Emilin3,ENSMUSG00000050700,protein_coding -35658,4930563M21Rik,ENSMUSG00000050702,protein_coding -48519,2310061N02Rik,ENSMUSG00000050704,protein_coding -49951,2310061I04Rik,ENSMUSG00000050705,protein_coding -24009,Ftl1,ENSMUSG00000050708,protein_coding -1306,Scg2,ENSMUSG00000050711,protein_coding -4391,Zbtb26,ENSMUSG00000050714,protein_coding -35895,Plekho2,ENSMUSG00000050721,protein_coding -34523,Arhgap42,ENSMUSG00000050730,protein_coding -20344,Vamp8,ENSMUSG00000050732,protein_coding -4172,Ptges,ENSMUSG00000050737,protein_coding -47534,Olfr164,ENSMUSG00000050742,protein_coding -50036,Trim15,ENSMUSG00000050747,protein_coding -31839,Pgbd5,ENSMUSG00000050751,protein_coding -29711,Defb6,ENSMUSG00000050756,protein_coding -47502,Gp1bb,ENSMUSG00000050761,protein_coding -49265,Prss27,ENSMUSG00000050762,protein_coding -37968,Olfr1395,ENSMUSG00000050763,protein_coding -33096,Ear14,ENSMUSG00000050766,protein_coding -5189,Olfr1124,ENSMUSG00000050772,protein_coding -5713,Olfr1317,ENSMUSG00000050776,protein_coding -1953,Tmem37,ENSMUSG00000050777,protein_coding -48292,Htr1f,ENSMUSG00000050783,protein_coding -19709,Ccdc126,ENSMUSG00000050786,protein_coding -3037,Olfr419,ENSMUSG00000050788,protein_coding -15607,B3galt6,ENSMUSG00000050796,protein_coding -40884,Hist1h2ba,ENSMUSG00000050799,protein_coding -34712,Olfr866,ENSMUSG00000050803,protein_coding -5656,Muc15,ENSMUSG00000050808,protein_coding -15255,Oog3,ENSMUSG00000050810,protein_coding -13585,Ecpas,ENSMUSG00000050812,protein_coding -38228,Olfr332,ENSMUSG00000050813,protein_coding -54563,Olfr1441,ENSMUSG00000050815,protein_coding -38231,Olfr330,ENSMUSG00000050818,protein_coding -47587,Fam131a,ENSMUSG00000050821,protein_coding -18609,Slc29a4,ENSMUSG00000050822,protein_coding -49356,Sstr5,ENSMUSG00000050824,protein_coding -37475,Vwc2,ENSMUSG00000050830,protein_coding -1785,Cdh20,ENSMUSG00000050840,protein_coding -27025,1700020N01Rik,ENSMUSG00000050844,protein_coding -46214,Zfp623,ENSMUSG00000050846,protein_coding -35217,Olfr958,ENSMUSG00000050853,protein_coding -14481,Tmem125,ENSMUSG00000050854,protein_coding -23318,Zfp940,ENSMUSG00000050855,protein_coding -17591,Atp5k,ENSMUSG00000050856,protein_coding -39354,Phospho1,ENSMUSG00000050860,protein_coding -54642,Olfr1494,ENSMUSG00000050865,protein_coding -26419,Clrn3,ENSMUSG00000050866,protein_coding -24151,Mrgprb8,ENSMUSG00000050870,protein_coding -53532,Minar2,ENSMUSG00000050875,protein_coding -41641,Spata31d1a,ENSMUSG00000050876,protein_coding -14957,Pdik1l,ENSMUSG00000050890,protein_coding -45963,Tatdn1,ENSMUSG00000050891,protein_coding -5012,Rtn4rl2,ENSMUSG00000050896,protein_coding -34637,Mtnr1b,ENSMUSG00000050901,protein_coding -47278,Tvp23a,ENSMUSG00000050908,protein_coding -40020,Cdr2l,ENSMUSG00000050910,protein_coding -34505,Tmem123,ENSMUSG00000050912,protein_coding -30235,Ankrd37,ENSMUSG00000050914,protein_coding -26723,Fgf4,ENSMUSG00000050917,protein_coding -42474,Zfp366,ENSMUSG00000050919,protein_coding -8956,P2ry10,ENSMUSG00000050921,protein_coding -7900,Dcaf12l2,ENSMUSG00000050926,protein_coding -31864,Map10,ENSMUSG00000050930,protein_coding -12322,Sgms2,ENSMUSG00000050931,protein_coding -49114,Vmn1r231,ENSMUSG00000050933,protein_coding -38891,Efcab5,ENSMUSG00000050944,protein_coding -52694,Zfp438,ENSMUSG00000050945,protein_coding -11918,Amigo1,ENSMUSG00000050947,protein_coding -27620,Gja1,ENSMUSG00000050953,protein_coding -41372,Zfp169,ENSMUSG00000050954,protein_coding -54851,Insl6,ENSMUSG00000050957,protein_coding -33126,AY358078,ENSMUSG00000050961,protein_coding -45657,Kcns2,ENSMUSG00000050963,protein_coding -39910,Prkca,ENSMUSG00000050965,protein_coding -14940,Lin28a,ENSMUSG00000050966,protein_coding -598,Creg2,ENSMUSG00000050967,protein_coding -24865,Gdpgp1,ENSMUSG00000050973,protein_coding -46309,Apol10a,ENSMUSG00000050982,protein_coding -14970,Selenon,ENSMUSG00000050989,protein_coding -26846,Adgb,ENSMUSG00000050994,protein_coding -52757,Cetn1,ENSMUSG00000050996,protein_coding -11028,Fam160a1,ENSMUSG00000051000,protein_coding -47162,Olfr161,ENSMUSG00000051003,protein_coding -26586,Gatd1,ENSMUSG00000051007,protein_coding -16402,Hs3st1,ENSMUSG00000051022,protein_coding -41040,Serpinb1b,ENSMUSG00000051029,protein_coding -18222,Zfp11,ENSMUSG00000051034,protein_coding -11094,Ttc24,ENSMUSG00000051036,protein_coding -41953,Zfp455,ENSMUSG00000051037,protein_coding -7830,Rhox11,ENSMUSG00000051038,protein_coding -25709,Olfml1,ENSMUSG00000051041,protein_coding -39994,Gprc5c,ENSMUSG00000051043,protein_coding -21140,Olfr214,ENSMUSG00000051046,protein_coding -25313,P4ha3,ENSMUSG00000051048,protein_coding -53337,Spink14,ENSMUSG00000051050,protein_coding -26505,Olfr523,ENSMUSG00000051051,protein_coding -41646,1700014D04Rik,ENSMUSG00000051054,protein_coding -13080,Gja10,ENSMUSG00000051056,protein_coding -37817,Fbll1,ENSMUSG00000051062,protein_coding -47726,Mb21d2,ENSMUSG00000051065,protein_coding -28206,Lingo3,ENSMUSG00000051067,protein_coding -11682,Vtcn1,ENSMUSG00000051076,protein_coding -2388,Rgs13,ENSMUSG00000051079,protein_coding -2817,Gm4847,ENSMUSG00000051081,protein_coding -35254,Olfr986,ENSMUSG00000051095,protein_coding -42213,Mblac2,ENSMUSG00000051098,protein_coding -42391,Sv2c,ENSMUSG00000051111,protein_coding -23922,Fam71e1,ENSMUSG00000051113,protein_coding -34707,Olfr77,ENSMUSG00000051118,protein_coding -19672,Gimap9,ENSMUSG00000051124,protein_coding -10138,Ghsr,ENSMUSG00000051136,protein_coding -47581,Camk2n2,ENSMUSG00000051146,protein_coding -30520,Nat2,ENSMUSG00000051147,protein_coding -6941,Adnp,ENSMUSG00000051149,protein_coding -21640,Tas2r105,ENSMUSG00000051153,protein_coding -3807,Commd3,ENSMUSG00000051154,protein_coding -54639,Olfr1491,ENSMUSG00000051156,protein_coding -8804,Cited1,ENSMUSG00000051159,protein_coding -34692,Olfr853,ENSMUSG00000051160,protein_coding -44473,Eml5,ENSMUSG00000051166,protein_coding -21042,Rpusd3,ENSMUSG00000051169,protein_coding -25596,Olfr672,ENSMUSG00000051172,protein_coding -30188,Zfp42,ENSMUSG00000051176,protein_coding -6208,Plcb1,ENSMUSG00000051177,protein_coding -26504,Olfr522,ENSMUSG00000051180,protein_coding -25568,Olfr655,ENSMUSG00000051182,protein_coding -22148,Zfp524,ENSMUSG00000051184,protein_coding -1705,Fam174a,ENSMUSG00000051185,protein_coding -28087,Olfr1356,ENSMUSG00000051190,protein_coding -25777,Olfr513,ENSMUSG00000051200,protein_coding -26734,Mrgprd,ENSMUSG00000051207,protein_coding -7943,Gpr119,ENSMUSG00000051209,protein_coding -34399,Gpr183,ENSMUSG00000051212,protein_coding -8800,Ercc6l,ENSMUSG00000051220,protein_coding -884,Bzw1,ENSMUSG00000051223,protein_coding -9825,Tceanc,ENSMUSG00000051224,protein_coding -45939,Fam83a,ENSMUSG00000051225,protein_coding -7508,Nyx,ENSMUSG00000051228,protein_coding -38956,Tmem199,ENSMUSG00000051232,protein_coding -36501,Rnf7,ENSMUSG00000051234,protein_coding -42943,Gen1,ENSMUSG00000051235,protein_coding -29039,Msrb3,ENSMUSG00000051236,protein_coding -34815,Swsap1,ENSMUSG00000051238,protein_coding -53181,Pcdhb9,ENSMUSG00000051242,protein_coding -35729,Islr2,ENSMUSG00000051243,protein_coding -16308,Msantd1,ENSMUSG00000051246,protein_coding -2948,Nhlh1,ENSMUSG00000051251,protein_coding -21047,Jagn1,ENSMUSG00000051256,protein_coding -9368,Trap1a,ENSMUSG00000051257,protein_coding -40680,Olfr1362,ENSMUSG00000051258,protein_coding -20573,Nat8f3,ENSMUSG00000051262,protein_coding -15549,Actrt2,ENSMUSG00000051276,protein_coding -12239,Zgrf1,ENSMUSG00000051278,protein_coding -12840,Gdf6,ENSMUSG00000051279,protein_coding -67,Pcmtd1,ENSMUSG00000051285,protein_coding -18650,Usp42,ENSMUSG00000051306,protein_coding -5342,Olfr1262,ENSMUSG00000051313,protein_coding -23404,Ffar2,ENSMUSG00000051314,protein_coding -53199,Taf7,ENSMUSG00000051316,protein_coding -6030,Mtln,ENSMUSG00000051319,protein_coding -9207,Pcdh19,ENSMUSG00000051323,protein_coding -5366,Nup160,ENSMUSG00000051329,protein_coding -21186,Cacna1c,ENSMUSG00000051331,protein_coding -41280,Gfod1,ENSMUSG00000051335,protein_coding -17742,2900026A02Rik,ENSMUSG00000051339,protein_coding -25534,Olfr645,ENSMUSG00000051340,protein_coding -49149,Zfp52,ENSMUSG00000051341,protein_coding -20559,Rab11fip5,ENSMUSG00000051343,protein_coding -1049,Plekhm3,ENSMUSG00000051344,protein_coding -29209,Spryd4,ENSMUSG00000051346,protein_coding -15026,Zfp46,ENSMUSG00000051351,protein_coding -27129,Samd3,ENSMUSG00000051354,protein_coding -37627,Commd1,ENSMUSG00000051355,protein_coding -45715,Ncald,ENSMUSG00000051359,protein_coding -7851,6030498E09Rik,ENSMUSG00000051361,protein_coding -25454,Olfr589,ENSMUSG00000051362,protein_coding -43944,Six1,ENSMUSG00000051367,protein_coding -4207,Plpp7,ENSMUSG00000051373,protein_coding -53241,Pcdh1,ENSMUSG00000051375,protein_coding -39766,Kif18b,ENSMUSG00000051378,protein_coding -6257,Flrt3,ENSMUSG00000051379,protein_coding -49757,Zbtb22,ENSMUSG00000051390,protein_coding -18385,Ywhag,ENSMUSG00000051391,protein_coding -11601,Olfr1402,ENSMUSG00000051392,protein_coding -3461,Gm45902,ENSMUSG00000051396,protein_coding -20060,Tacstd2,ENSMUSG00000051397,protein_coding -53281,Kctd16,ENSMUSG00000051401,protein_coding -22817,Ppp1r37,ENSMUSG00000051403,protein_coding -55499,Vamp7,ENSMUSG00000051412,protein_coding -6517,Plagl2,ENSMUSG00000051413,protein_coding -34666,Olfr829,ENSMUSG00000051414,protein_coding -5261,Olfr1184,ENSMUSG00000051424,protein_coding -37351,Ccdc157,ENSMUSG00000051427,protein_coding -10667,Gpr87,ENSMUSG00000051431,protein_coding -15199,Fhad1,ENSMUSG00000051435,protein_coding -25538,Ubqlnl,ENSMUSG00000051437,protein_coding -53134,Cd14,ENSMUSG00000051439,protein_coding -10324,Bbs12,ENSMUSG00000051444,protein_coding -25131,Crebzf,ENSMUSG00000051451,protein_coding -39118,Gm11437,ENSMUSG00000051452,protein_coding -39753,Meioc,ENSMUSG00000051455,protein_coding -42701,Hspb3,ENSMUSG00000051456,protein_coding -26189,Spn,ENSMUSG00000051457,protein_coding -52932,Zfp24,ENSMUSG00000051469,protein_coding -39586,Krtap31-2,ENSMUSG00000051481,protein_coding -48666,Cbr1,ENSMUSG00000051483,protein_coding -53183,Pcdhb11,ENSMUSG00000051486,protein_coding -54785,Foxd4,ENSMUSG00000051490,protein_coding -35239,Olfr974,ENSMUSG00000051493,protein_coding -31877,Irf2bp2,ENSMUSG00000051495,protein_coding -39955,Kcnj16,ENSMUSG00000051497,protein_coding -16741,Tlr6,ENSMUSG00000051498,protein_coding -19636,Zfp786,ENSMUSG00000051499,protein_coding -18433,Ufsp1,ENSMUSG00000051502,protein_coding -17723,Gm6583,ENSMUSG00000051503,protein_coding -24229,Siglech,ENSMUSG00000051504,protein_coding -32548,Wdfy4,ENSMUSG00000051506,protein_coding -3042,Olfr414,ENSMUSG00000051509,protein_coding -40245,Mafg,ENSMUSG00000051510,protein_coding -25183,Fam181b,ENSMUSG00000051515,protein_coding -13305,Arhgef39,ENSMUSG00000051517,protein_coding -46449,Rps19bp1,ENSMUSG00000051518,protein_coding -22227,Usp29,ENSMUSG00000051527,protein_coding -3031,Olfr424,ENSMUSG00000051528,protein_coding -22146,Zfp579,ENSMUSG00000051550,protein_coding -15602,Pusl1,ENSMUSG00000051557,protein_coding -9289,Tceal8,ENSMUSG00000051579,protein_coding -8744,Otud6a,ENSMUSG00000051582,protein_coding -21225,Mical3,ENSMUSG00000051586,protein_coding -2036,Map3k19,ENSMUSG00000051590,protein_coding -25673,Olfr697,ENSMUSG00000051591,protein_coding -7324,Ccnb3,ENSMUSG00000051592,protein_coding -13489,Olfr272,ENSMUSG00000051593,protein_coding -16378,Otop1,ENSMUSG00000051596,protein_coding -53173,Pcdhb2,ENSMUSG00000051599,protein_coding -50092,Olfr112,ENSMUSG00000051611,protein_coding -34377,Rap2a,ENSMUSG00000051615,protein_coding -39606,Krt9,ENSMUSG00000051617,protein_coding -25537,Ubqln3,ENSMUSG00000051618,protein_coding -40853,Hist1h1e,ENSMUSG00000051627,protein_coding -31361,Kctd19,ENSMUSG00000051648,protein_coding -37620,B3gnt2,ENSMUSG00000051650,protein_coding -28036,Lrrc3,ENSMUSG00000051652,protein_coding -53172,Pcdhb1,ENSMUSG00000051663,protein_coding -47225,AU021092,ENSMUSG00000051669,protein_coding -31873,Coa6,ENSMUSG00000051671,protein_coding -16928,Dcun1d4,ENSMUSG00000051674,protein_coding -13719,Trim32,ENSMUSG00000051675,protein_coding -53178,Pcdhb6,ENSMUSG00000051678,protein_coding -25668,Olfr693,ENSMUSG00000051680,protein_coding -50370,Treml4,ENSMUSG00000051682,protein_coding -22421,Vmn1r73,ENSMUSG00000051687,protein_coding -20615,Pcbp1,ENSMUSG00000051695,protein_coding -1264,Tmem198,ENSMUSG00000051703,protein_coding -35761,Senp8,ENSMUSG00000051705,protein_coding -8397,Olfr1325,ENSMUSG00000051706,protein_coding -29213,Apon,ENSMUSG00000051716,protein_coding -42867,Wdcp,ENSMUSG00000051721,protein_coding -43039,Kcnf1,ENSMUSG00000051726,protein_coding -25224,Kctd14,ENSMUSG00000051727,protein_coding -48645,Fam243,ENSMUSG00000051728,protein_coding -4748,Mettl5,ENSMUSG00000051730,protein_coding -53275,Pabpc2,ENSMUSG00000051732,protein_coding -23270,Rinl,ENSMUSG00000051735,protein_coding -27309,Fam229b,ENSMUSG00000051736,protein_coding -4901,Ttn,ENSMUSG00000051747,protein_coding -39106,Wfdc21,ENSMUSG00000051748,protein_coding -23062,Xrcc1,ENSMUSG00000051768,protein_coding -6799,Wfdc15a,ENSMUSG00000051769,protein_coding -10816,Iqcj,ENSMUSG00000051777,protein_coding -46695,Tubgcp6,ENSMUSG00000051786,protein_coding -11820,4930564D02Rik,ENSMUSG00000051788,protein_coding -38588,Nlgn2,ENSMUSG00000051790,protein_coding -46850,Olfr284,ENSMUSG00000051793,protein_coding -48533,Krtap19-5,ENSMUSG00000051802,protein_coding -45008,Adam6b,ENSMUSG00000051804,protein_coding -22128,Cox6b2,ENSMUSG00000051811,protein_coding -6473,Sox12,ENSMUSG00000051817,protein_coding -7788,Rhox2a,ENSMUSG00000051827,protein_coding -30833,Gypa,ENSMUSG00000051839,protein_coding -8023,Rtl8c,ENSMUSG00000051851,protein_coding -46876,Arf3,ENSMUSG00000051853,protein_coding -19251,Mest,ENSMUSG00000051855,protein_coding -10187,Samd7,ENSMUSG00000051860,protein_coding -46659,Tbc1d22a,ENSMUSG00000051864,protein_coding -25029,Vmn2r72,ENSMUSG00000051877,protein_coding -47038,Krt71,ENSMUSG00000051879,protein_coding -43851,Klhdc1,ENSMUSG00000051890,protein_coding -20286,Tex37,ENSMUSG00000051896,protein_coding -25998,Abca16,ENSMUSG00000051900,protein_coding -29415,Cd209f,ENSMUSG00000051906,protein_coding -25905,Sox6,ENSMUSG00000051910,protein_coding -19523,Tas2r144,ENSMUSG00000051917,protein_coding -45809,Rspo2,ENSMUSG00000051920,protein_coding -46903,Spats2,ENSMUSG00000051934,protein_coding -19428,Prss58,ENSMUSG00000051936,protein_coding -16072,5031410I06Rik,ENSMUSG00000051940,protein_coding -18834,B3glct,ENSMUSG00000051950,protein_coding -2,Xkr4,ENSMUSG00000051951,protein_coding -30980,Olfr371,ENSMUSG00000051952,protein_coding -19141,Rnf133,ENSMUSG00000051956,protein_coding -22770,Nanos2,ENSMUSG00000051965,protein_coding -33044,Tlr11,ENSMUSG00000051969,protein_coding -49003,Prdm9,ENSMUSG00000051977,protein_coding -29642,Erich1,ENSMUSG00000051978,protein_coding -47925,Casr,ENSMUSG00000051980,protein_coding -55157,Sec31b,ENSMUSG00000051984,protein_coding -2259,Igfn1,ENSMUSG00000051985,protein_coding -48643,Smim11,ENSMUSG00000051989,protein_coding -2185,Lax1,ENSMUSG00000051998,protein_coding -29312,Olfr796,ENSMUSG00000052012,protein_coding -48058,Btla,ENSMUSG00000052013,protein_coding -18255,A330070K13Rik,ENSMUSG00000052014,protein_coding -53565,Slc6a7,ENSMUSG00000052026,protein_coding -48831,Tagap1,ENSMUSG00000052031,protein_coding -6979,Pfdn4,ENSMUSG00000052033,protein_coding -24562,Klf13,ENSMUSG00000052040,protein_coding -6971,Zfp217,ENSMUSG00000052056,protein_coding -35141,Olfr901,ENSMUSG00000052058,protein_coding -980,Pard3b,ENSMUSG00000052062,protein_coding -42416,1700029F12Rik,ENSMUSG00000052075,protein_coding -54789,Dock8,ENSMUSG00000052085,protein_coding -41536,Rgs14,ENSMUSG00000052087,protein_coding -33720,Prss51,ENSMUSG00000052099,protein_coding -53249,Gnpda1,ENSMUSG00000052102,protein_coding -50660,Mtcl1,ENSMUSG00000052105,protein_coding -13572,D630039A03Rik,ENSMUSG00000052117,protein_coding -19308,Akr1b7,ENSMUSG00000052131,protein_coding -47874,Sema5b,ENSMUSG00000052133,protein_coding -14545,Foxo6,ENSMUSG00000052135,protein_coding -12886,Rbm12b2,ENSMUSG00000052137,protein_coding -16206,Babam2,ENSMUSG00000052139,protein_coding -49683,Rasal3,ENSMUSG00000052142,protein_coding -20912,Ppp4r2,ENSMUSG00000052144,protein_coding -49490,Rps10,ENSMUSG00000052146,protein_coding -28102,Plpp2,ENSMUSG00000052151,protein_coding -4502,Acvr2a,ENSMUSG00000052155,protein_coding -44942,Pld4,ENSMUSG00000052160,protein_coding -41067,Serpinb6c,ENSMUSG00000052180,protein_coding -34688,Olfr849,ENSMUSG00000052182,protein_coding -25512,Hbb-y,ENSMUSG00000052187,protein_coding -23076,Cd177,ENSMUSG00000052212,protein_coding -22791,Opa3,ENSMUSG00000052214,protein_coding -25510,Hbb-bh1,ENSMUSG00000052217,protein_coding -43999,Ppp1r36,ENSMUSG00000052221,protein_coding -53006,Gpr17,ENSMUSG00000052229,protein_coding -39217,Epx,ENSMUSG00000052234,protein_coding -45549,Zfp622,ENSMUSG00000052253,protein_coding -49048,Fpr2,ENSMUSG00000052270,protein_coding -18666,Bhlha15,ENSMUSG00000052271,protein_coding -25988,Dnah3,ENSMUSG00000052273,protein_coding -47716,Ostn,ENSMUSG00000052276,protein_coding -42508,Taf9,ENSMUSG00000052293,protein_coding -22120,Ppp6r1,ENSMUSG00000052296,protein_coding -38150,Cdc42se2,ENSMUSG00000052298,protein_coding -48493,Ltn1,ENSMUSG00000052299,protein_coding -26160,Doc2a,ENSMUSG00000052301,protein_coding -29053,Tbc1d30,ENSMUSG00000052302,protein_coding -24109,Mrgpra6,ENSMUSG00000052303,protein_coding -25506,Hbb-bs,ENSMUSG00000052305,protein_coding -11226,Slc39a1,ENSMUSG00000052310,protein_coding -47748,Lrrc15,ENSMUSG00000052316,protein_coding -825,Ankrd44,ENSMUSG00000052331,protein_coding -32697,1700024B05Rik,ENSMUSG00000052334,protein_coding -37121,Cx3cr1,ENSMUSG00000052336,protein_coding -20322,Immt,ENSMUSG00000052337,protein_coding -24982,Cemip,ENSMUSG00000052353,protein_coding -47820,Zdhhc19,ENSMUSG00000052363,protein_coding -7411,B630019K06Rik,ENSMUSG00000052364,protein_coding -46835,Tmem106c,ENSMUSG00000052369,protein_coding -8569,Il1rapl1,ENSMUSG00000052372,protein_coding -39714,Mpp3,ENSMUSG00000052373,protein_coding -40463,Actn2,ENSMUSG00000052374,protein_coding -33083,Rnase9,ENSMUSG00000052382,protein_coding -47804,Nrros,ENSMUSG00000052384,protein_coding -54741,Trpm3,ENSMUSG00000052387,protein_coding -44163,Acot4,ENSMUSG00000052392,protein_coding -32470,Rft1,ENSMUSG00000052395,protein_coding -25271,Mogat2,ENSMUSG00000052396,protein_coding -48823,Ezr,ENSMUSG00000052397,protein_coding -4052,Rexo4,ENSMUSG00000052406,protein_coding -13851,Ccdc171,ENSMUSG00000052407,protein_coding -47085,Gm28047,ENSMUSG00000052414,protein_coding -11357,Tchh,ENSMUSG00000052415,protein_coding -32164,Olfr720,ENSMUSG00000052417,protein_coding -19027,2610001J05Rik,ENSMUSG00000052419,protein_coding -2903,B4galt3,ENSMUSG00000052423,protein_coding -2832,Tmco1,ENSMUSG00000052428,protein_coding -12490,Bmpr1b,ENSMUSG00000052430,protein_coding -33425,Cebpe,ENSMUSG00000052435,protein_coding -30683,Zfp961,ENSMUSG00000052446,protein_coding -30962,Asna1,ENSMUSG00000052456,protein_coding -48035,Atp6v1a,ENSMUSG00000052459,protein_coding -9958,Pmp2,ENSMUSG00000052468,protein_coding -48947,Tcp10c,ENSMUSG00000052469,protein_coding -1406,C130026I21Rik,ENSMUSG00000052477,protein_coding -42456,Tmem171,ENSMUSG00000052485,protein_coding -30713,Cherp,ENSMUSG00000052488,protein_coding -46647,Pkdrej,ENSMUSG00000052496,protein_coding -48282,Epha3,ENSMUSG00000052504,protein_coding -26522,Olfr536,ENSMUSG00000052508,protein_coding -24166,Nav2,ENSMUSG00000052512,protein_coding -48349,Robo2,ENSMUSG00000052516,protein_coding -14046,Cyp2j5,ENSMUSG00000052520,protein_coding -50741,Spdya,ENSMUSG00000052525,protein_coding -4151,Nup188,ENSMUSG00000052533,protein_coding -2841,Pbx1,ENSMUSG00000052534,protein_coding -48239,Olfr198,ENSMUSG00000052537,protein_coding -11774,Magi3,ENSMUSG00000052539,protein_coding -12664,St6galnac3,ENSMUSG00000052544,protein_coding -9219,Arl13a,ENSMUSG00000052549,protein_coding -40393,Adarb2,ENSMUSG00000052551,protein_coding -29703,Defb34,ENSMUSG00000052554,protein_coding -31587,Gan,ENSMUSG00000052557,protein_coding -46736,Cpne8,ENSMUSG00000052560,protein_coding -54361,Slc22a30,ENSMUSG00000052562,protein_coding -38060,D930048N14Rik,ENSMUSG00000052563,protein_coding -40842,Hist1h1d,ENSMUSG00000052565,protein_coding -30960,Hook2,ENSMUSG00000052566,protein_coding -25135,Dlg2,ENSMUSG00000052572,protein_coding -20427,Lrrtm4,ENSMUSG00000052581,protein_coding -33962,Serp2,ENSMUSG00000052584,protein_coding -43163,Adam17,ENSMUSG00000052593,protein_coding -54882,A1cf,ENSMUSG00000052595,protein_coding -22136,Isoc2b,ENSMUSG00000052605,protein_coding -44003,Plekhg3,ENSMUSG00000052609,protein_coding -27907,Pcdh15,ENSMUSG00000052613,protein_coding -31304,Terb1,ENSMUSG00000052616,protein_coding -34690,Olfr851,ENSMUSG00000052625,protein_coding -20348,Sh2d6,ENSMUSG00000052631,protein_coding -43154,Asap2,ENSMUSG00000052632,protein_coding -38225,Lypd9,ENSMUSG00000052642,protein_coding -20309,Rnf103,ENSMUSG00000052656,protein_coding -23034,Zfp112,ENSMUSG00000052675,protein_coding -9247,Zmat1,ENSMUSG00000052676,protein_coding -28965,Rap1b,ENSMUSG00000052681,protein_coding -14019,Jun,ENSMUSG00000052684,protein_coding -2121,Rab7b,ENSMUSG00000052688,protein_coding -35933,Tln2,ENSMUSG00000052698,protein_coding -26076,Tnrc6a,ENSMUSG00000052707,protein_coding -49571,BC004004,ENSMUSG00000052712,protein_coding -53472,Zfp608,ENSMUSG00000052713,protein_coding -2364,Kcnt2,ENSMUSG00000052726,protein_coding -42478,Map1b,ENSMUSG00000052727,protein_coding -21560,Klrc2,ENSMUSG00000052736,protein_coding -20371,Suclg1,ENSMUSG00000052738,protein_coding -2466,Swt1,ENSMUSG00000052748,protein_coding -25555,Trim30b,ENSMUSG00000052749,protein_coding -19665,Repin1,ENSMUSG00000052751,protein_coding -49293,Traf7,ENSMUSG00000052752,protein_coding -2273,Gpr25,ENSMUSG00000052759,protein_coding -1435,A630001G21Rik,ENSMUSG00000052760,protein_coding -19640,Zfp212,ENSMUSG00000052763,protein_coding -17981,Oas1a,ENSMUSG00000052776,protein_coding -16304,Grk4,ENSMUSG00000052783,protein_coding -25436,Olfr573-ps1,ENSMUSG00000052785,protein_coding -30712,1700030K09Rik,ENSMUSG00000052794,protein_coding -28963,Nup107,ENSMUSG00000052798,protein_coding -42870,Atad2b,ENSMUSG00000052812,protein_coding -29331,Olfr813,ENSMUSG00000052818,protein_coding -30961,Best2,ENSMUSG00000052819,protein_coding -8942,Cysltr1,ENSMUSG00000052821,protein_coding -22662,Sae1,ENSMUSG00000052833,protein_coding -30959,Junb,ENSMUSG00000052837,protein_coding -19415,Tas2r137,ENSMUSG00000052850,protein_coding -20317,Reep1,ENSMUSG00000052852,protein_coding -9360,Nrk,ENSMUSG00000052854,protein_coding -20368,Dnah6,ENSMUSG00000052861,protein_coding -26058,Prkcb,ENSMUSG00000052889,protein_coding -30039,Ubxn8,ENSMUSG00000052906,protein_coding -36860,Lamb2,ENSMUSG00000052911,protein_coding -14044,Cyp2j6,ENSMUSG00000052914,protein_coding -39508,Msl1,ENSMUSG00000052915,protein_coding -48167,Senp7,ENSMUSG00000052917,protein_coding -54878,Prkg1,ENSMUSG00000052920,protein_coding -38516,Arhgef15,ENSMUSG00000052921,protein_coding -6689,Bpi,ENSMUSG00000052922,protein_coding -30956,Rnaseh2a,ENSMUSG00000052926,protein_coding -53829,Ctif,ENSMUSG00000052928,protein_coding -31673,Fbxo31,ENSMUSG00000052934,protein_coding -54830,Glis3,ENSMUSG00000052942,protein_coding -40081,Rnf157,ENSMUSG00000052949,protein_coding -19819,Cpvl,ENSMUSG00000052955,protein_coding -41659,Gas1,ENSMUSG00000052957,protein_coding -20320,Mrpl35,ENSMUSG00000052962,protein_coding -23175,Cyp2f2,ENSMUSG00000052974,protein_coding -42030,Ube2ql1,ENSMUSG00000052981,protein_coding -23560,Uba2,ENSMUSG00000052997,protein_coding -21082,Hrh1,ENSMUSG00000053004,protein_coding -19811,Creb5,ENSMUSG00000053007,protein_coding -20296,Krcc1,ENSMUSG00000053012,protein_coding -2155,Cntn2,ENSMUSG00000053024,protein_coding -24772,Sv2b,ENSMUSG00000053025,protein_coding -17018,Spink2,ENSMUSG00000053030,protein_coding -35923,Aph1c,ENSMUSG00000053040,protein_coding -20299,Cd8b1,ENSMUSG00000053044,protein_coding -26609,Brsk2,ENSMUSG00000053046,protein_coding -12448,Adh6a,ENSMUSG00000053054,protein_coding -48466,Jam2,ENSMUSG00000053062,protein_coding -21542,Clec12a,ENSMUSG00000053063,protein_coding -34512,Cfap300,ENSMUSG00000053070,protein_coding -54338,2700081O15Rik,ENSMUSG00000053080,protein_coding -24623,Lins1,ENSMUSG00000053091,protein_coding -33443,Myh7,ENSMUSG00000053093,protein_coding -18249,Tmem248,ENSMUSG00000053094,protein_coding -40636,Gpr141,ENSMUSG00000053101,protein_coding -34509,Yap1,ENSMUSG00000053110,protein_coding -26401,Fank1,ENSMUSG00000053111,protein_coding -40146,Socs3,ENSMUSG00000053113,protein_coding -20312,Chmp3,ENSMUSG00000053119,protein_coding -35355,Rnf26,ENSMUSG00000053128,protein_coding -18756,Gsx1,ENSMUSG00000053129,protein_coding -16197,Supt7l,ENSMUSG00000053134,protein_coding -46698,Mapk11,ENSMUSG00000053137,protein_coding -6748,Ptprt,ENSMUSG00000053141,protein_coding -4365,Olfr352,ENSMUSG00000053146,protein_coding -1129,Spag16,ENSMUSG00000053153,protein_coding -24877,Fes,ENSMUSG00000053158,protein_coding -1381,Daw1,ENSMUSG00000053161,protein_coding -4395,Gpr21,ENSMUSG00000053164,protein_coding -6857,Cdh22,ENSMUSG00000053166,protein_coding -22836,Bcl3,ENSMUSG00000053175,protein_coding -15670,Mterf1b,ENSMUSG00000053178,protein_coding -41373,A830005F24Rik,ENSMUSG00000053181,protein_coding -48061,Gm609,ENSMUSG00000053182,protein_coding -39020,Spaca3,ENSMUSG00000053184,protein_coding -11464,Mllt11,ENSMUSG00000053192,protein_coding -16142,Cib4,ENSMUSG00000053194,protein_coding -23202,Prx,ENSMUSG00000053198,protein_coding -35578,Arhgap20,ENSMUSG00000053199,protein_coding -32884,Styx,ENSMUSG00000053205,protein_coding -51090,Zfy1,ENSMUSG00000053211,protein_coding -40832,Btn2a2,ENSMUSG00000053216,protein_coding -21675,Tas2r136,ENSMUSG00000053217,protein_coding -27040,Raet1e,ENSMUSG00000053219,protein_coding -30942,Dand5,ENSMUSG00000053226,protein_coding -22705,Ceacam3,ENSMUSG00000053228,protein_coding -21137,Olfr212,ENSMUSG00000053251,protein_coding -34249,Ndfip2,ENSMUSG00000053253,protein_coding -17476,Dspp,ENSMUSG00000053268,protein_coding -54719,Aldh1a1,ENSMUSG00000053279,protein_coding -2467,Trmt1l,ENSMUSG00000053286,protein_coding -5056,Olfr1013,ENSMUSG00000053287,protein_coding -35597,Ddx10,ENSMUSG00000053289,protein_coding -23180,Rab4b,ENSMUSG00000053291,protein_coding -18362,Pom121,ENSMUSG00000053293,protein_coding -19669,AI854703,ENSMUSG00000053297,protein_coding -54350,Slc22a26,ENSMUSG00000053303,protein_coding -35096,Nrgn,ENSMUSG00000053310,protein_coding -13423,Sec61b,ENSMUSG00000053317,protein_coding -2976,Slamf8,ENSMUSG00000053318,protein_coding -28046,Gatd3a,ENSMUSG00000053329,protein_coding -1477,Dis3l2,ENSMUSG00000053333,protein_coding -17759,Ficd,ENSMUSG00000053334,protein_coding -22050,Tarm1,ENSMUSG00000053338,protein_coding -49166,Zfp943,ENSMUSG00000053347,protein_coding -22201,Epp13,ENSMUSG00000053367,protein_coding -18841,Rxfp2,ENSMUSG00000053368,protein_coding -50977,Atp6v1e2,ENSMUSG00000053375,protein_coding -18360,Trim50,ENSMUSG00000053388,protein_coding -21637,Tas2r107,ENSMUSG00000053389,protein_coding -49717,Zfp952,ENSMUSG00000053390,protein_coding -21136,Olfr211,ENSMUSG00000053391,protein_coding -22044,Cacng8,ENSMUSG00000053395,protein_coding -11622,Phgdh,ENSMUSG00000053398,protein_coding -31557,Adamts18,ENSMUSG00000053399,protein_coding -46432,Cbx7,ENSMUSG00000053411,protein_coding -48570,Hunk,ENSMUSG00000053414,protein_coding -28573,Gm4792,ENSMUSG00000053420,protein_coding -49539,Mapk14,ENSMUSG00000053436,protein_coding -53531,Adamts19,ENSMUSG00000053441,protein_coding -32160,Thoc7,ENSMUSG00000053453,protein_coding -20345,Ggcx,ENSMUSG00000053460,protein_coding -3228,Hhipl2,ENSMUSG00000053461,protein_coding -34365,Hs6st3,ENSMUSG00000053465,protein_coding -46065,Tg,ENSMUSG00000053469,protein_coding -20313,Kdm3a,ENSMUSG00000053470,protein_coding -4541,Tnfaip6,ENSMUSG00000053475,protein_coding -53758,Tcf4,ENSMUSG00000053477,protein_coding -2905,Usp21,ENSMUSG00000053483,protein_coding -30481,Trim60,ENSMUSG00000053490,protein_coding -47130,Gtsf2,ENSMUSG00000053508,protein_coding -14272,Nrd1,ENSMUSG00000053510,protein_coding -37794,Kcnip1,ENSMUSG00000053519,protein_coding -23276,Lgals7,ENSMUSG00000053522,protein_coding -54880,Cstf2t,ENSMUSG00000053536,protein_coding -22135,Shisa7,ENSMUSG00000053550,protein_coding -6104,Ebf4,ENSMUSG00000053552,protein_coding -18543,3110082I17Rik,ENSMUSG00000053553,protein_coding -46979,Smagp,ENSMUSG00000053559,protein_coding -30934,Ier2,ENSMUSG00000053560,protein_coding -23281,Eif3k,ENSMUSG00000053565,protein_coding -39834,Tanc2,ENSMUSG00000053580,protein_coding -18549,Zfand2a,ENSMUSG00000053581,protein_coding -8289,Fate1,ENSMUSG00000053593,protein_coding -49716,Zfp472,ENSMUSG00000053600,protein_coding -20282,Rpia,ENSMUSG00000053604,protein_coding -55249,Sh3pxd2a,ENSMUSG00000053617,protein_coding -53450,Gykl1,ENSMUSG00000053624,protein_coding -30465,Tll1,ENSMUSG00000053626,protein_coding -35865,Dennd4a,ENSMUSG00000053641,protein_coding -53494,Aldh7a1,ENSMUSG00000053644,protein_coding -36904,Plxnb1,ENSMUSG00000053646,protein_coding -18547,Gper1,ENSMUSG00000053647,protein_coding -39612,Krt42,ENSMUSG00000053654,protein_coding -5893,Tgm5,ENSMUSG00000053675,protein_coding -29697,Defb40,ENSMUSG00000053678,protein_coding -29069,BC048403,ENSMUSG00000053684,protein_coding -31400,Dpep2,ENSMUSG00000053687,protein_coding -30953,Mast1,ENSMUSG00000053693,protein_coding -29698,Defb37,ENSMUSG00000053695,protein_coding -3786,Nebl,ENSMUSG00000053702,protein_coding -10709,B430305J03Rik,ENSMUSG00000053706,protein_coding -36740,Dusp7,ENSMUSG00000053716,protein_coding -23867,Klk1b26,ENSMUSG00000053719,protein_coding -22283,Vmn2r43,ENSMUSG00000053720,protein_coding -53343,Spinkl,ENSMUSG00000053729,protein_coding -14796,Tmem39b,ENSMUSG00000053730,protein_coding -23690,Gm5114,ENSMUSG00000053742,protein_coding -4251,Ptrh1,ENSMUSG00000053746,protein_coding -36585,Sox14,ENSMUSG00000053747,protein_coding -33151,Chd8,ENSMUSG00000053754,protein_coding -17977,Oas1f,ENSMUSG00000053765,protein_coding -19288,Chchd3,ENSMUSG00000053768,protein_coding -11457,Lysmd1,ENSMUSG00000053769,protein_coding -34749,Rdh8,ENSMUSG00000053773,protein_coding -47813,Ubxn7,ENSMUSG00000053774,protein_coding -38864,1700016K19Rik,ENSMUSG00000053783,protein_coding -29699,Defb38,ENSMUSG00000053790,protein_coding -39610,Krt16,ENSMUSG00000053797,protein_coding -54998,Exoc6,ENSMUSG00000053799,protein_coding -24047,Grwd1,ENSMUSG00000053801,protein_coding -33054,Olfr744,ENSMUSG00000053815,protein_coding -12205,Camk2d,ENSMUSG00000053819,protein_coding -37028,Bcl2a1c,ENSMUSG00000053820,protein_coding -28790,Ppfia2,ENSMUSG00000053825,protein_coding -49965,H2-T24,ENSMUSG00000053835,protein_coding -37410,Nudcd3,ENSMUSG00000053838,protein_coding -14794,Txlna,ENSMUSG00000053841,protein_coding -53815,Lipg,ENSMUSG00000053846,protein_coding -8090,Adgrg4,ENSMUSG00000053852,protein_coding -16171,Dnajc5g,ENSMUSG00000053856,protein_coding -53797,Gm9925,ENSMUSG00000053861,protein_coding -35889,Slc51b,ENSMUSG00000053862,protein_coding -17481,Mepe,ENSMUSG00000053863,protein_coding -33601,Gm5142,ENSMUSG00000053868,protein_coding -12700,Fpgt,ENSMUSG00000053870,protein_coding -17754,Aym1,ENSMUSG00000053873,protein_coding -26222,Srcap,ENSMUSG00000053877,protein_coding -30529,Sh2d4a,ENSMUSG00000053886,protein_coding -4718,4933409G03Rik,ENSMUSG00000053896,protein_coding -12396,Slc39a8,ENSMUSG00000053897,protein_coding -23273,Ech1,ENSMUSG00000053898,protein_coding -20346,Mat2a,ENSMUSG00000053907,protein_coding -7829,Rhox10,ENSMUSG00000053909,protein_coding -34581,Kdm4d,ENSMUSG00000053914,protein_coding -6422,Nanp,ENSMUSG00000053916,protein_coding -46270,Cyhr1,ENSMUSG00000053929,protein_coding -38468,Shisa6,ENSMUSG00000053930,protein_coding -12108,Cnn3,ENSMUSG00000053931,protein_coding -53896,Adnp2,ENSMUSG00000053950,protein_coding -32806,Ang5,ENSMUSG00000053961,protein_coding -3143,Stum,ENSMUSG00000053963,protein_coding -23274,Lgals4,ENSMUSG00000053964,protein_coding -12148,Pde5a,ENSMUSG00000053965,protein_coding -20304,Cd8a,ENSMUSG00000053977,protein_coding -23330,Zfp14,ENSMUSG00000053985,protein_coding -13991,Tusc1,ENSMUSG00000054000,protein_coding -44918,Tdrd9,ENSMUSG00000054003,protein_coding -22729,Mill1,ENSMUSG00000054005,protein_coding -53555,Ndst1,ENSMUSG00000054008,protein_coding -44931,Tmem179,ENSMUSG00000054013,protein_coding -41281,Sirt5,ENSMUSG00000054021,protein_coding -28427,Nt5dc3,ENSMUSG00000054027,protein_coding -30160,Adam39,ENSMUSG00000054033,protein_coding -9291,Tceal5,ENSMUSG00000054034,protein_coding -46859,Olfr279,ENSMUSG00000054036,protein_coding -23839,Klk13,ENSMUSG00000054046,protein_coding -32539,Ercc6,ENSMUSG00000054051,protein_coding -29187,Rdh19,ENSMUSG00000054052,protein_coding -25051,Olfr309,ENSMUSG00000054054,protein_coding -3792,A930004D18Rik,ENSMUSG00000054057,protein_coding -26561,Pkp3,ENSMUSG00000054065,protein_coding -53547,Iigp1,ENSMUSG00000054072,protein_coding -3794,Skida1,ENSMUSG00000054074,protein_coding -39285,Utp18,ENSMUSG00000054079,protein_coding -23277,Capn12,ENSMUSG00000054083,protein_coding -36910,Fbxw28,ENSMUSG00000054087,protein_coding -12155,1810037I17Rik,ENSMUSG00000054091,protein_coding -15729,Slc25a40,ENSMUSG00000054099,protein_coding -23161,Nlrp9a,ENSMUSG00000054102,protein_coding -19474,Try4,ENSMUSG00000054106,protein_coding -45380,Skp2,ENSMUSG00000054115,protein_coding -16042,E130116L18Rik,ENSMUSG00000054116,protein_coding -46678,Zdhhc25,ENSMUSG00000054117,protein_coding -49988,H2-T3,ENSMUSG00000054128,protein_coding -49625,Umodl1,ENSMUSG00000054134,protein_coding -46707,Adm2,ENSMUSG00000054136,protein_coding -34664,Olfr24,ENSMUSG00000054141,protein_coding -49135,Vmn1r236,ENSMUSG00000054142,protein_coding -39604,Krt15,ENSMUSG00000054146,protein_coding -44649,Syne3,ENSMUSG00000054150,protein_coding -35529,Gm4894,ENSMUSG00000054156,protein_coding -6334,Nkx2-4,ENSMUSG00000054160,protein_coding -24039,Fam83e,ENSMUSG00000054161,protein_coding -30451,Spock3,ENSMUSG00000054162,protein_coding -23077,Ceacam10,ENSMUSG00000054169,protein_coding -30951,Klf1,ENSMUSG00000054191,protein_coding -45765,Cthrc1,ENSMUSG00000054196,protein_coding -11147,Gon4l,ENSMUSG00000054199,protein_coding -55008,Ffar4,ENSMUSG00000054200,protein_coding -3021,Ifi205,ENSMUSG00000054203,protein_coding -43323,Alkal2,ENSMUSG00000054204,protein_coding -28113,Gzmm,ENSMUSG00000054206,protein_coding -11297,Sprr2k,ENSMUSG00000054215,protein_coding -20585,Tprkb,ENSMUSG00000054226,protein_coding -25733,Olfr481,ENSMUSG00000054236,protein_coding -55011,Fra10ac1,ENSMUSG00000054237,protein_coding -16264,Fgfr3,ENSMUSG00000054252,protein_coding -17840,Msi1,ENSMUSG00000054256,protein_coding -45352,Lifr,ENSMUSG00000054263,protein_coding -41044,Serpinb9d,ENSMUSG00000054266,protein_coding -22374,Zscan4c,ENSMUSG00000054272,protein_coding -46585,Arfgap3,ENSMUSG00000054277,protein_coding -16236,Prr14l,ENSMUSG00000054280,protein_coding -8957,P2ry10b,ENSMUSG00000054293,protein_coding -43620,Eapp,ENSMUSG00000054302,protein_coding -43160,Cpsf3,ENSMUSG00000054309,protein_coding -22616,Obox1,ENSMUSG00000054310,protein_coding -11502,Mrps21,ENSMUSG00000054312,protein_coding -31362,Lrrc36,ENSMUSG00000054320,protein_coding -52848,Taf4b,ENSMUSG00000054321,protein_coding -11327,Lce3a,ENSMUSG00000054325,protein_coding -23052,Kcnn4,ENSMUSG00000054342,protein_coding -35281,Bsx,ENSMUSG00000054360,protein_coding -14192,Lexm,ENSMUSG00000054362,protein_coding -42902,Rhob,ENSMUSG00000054364,protein_coding -26206,Zfp747,ENSMUSG00000054381,protein_coding -44171,Pnma1,ENSMUSG00000054383,protein_coding -23117,Ceacam2,ENSMUSG00000054385,protein_coding -2162,Mdm4,ENSMUSG00000054387,protein_coding -31290,Cklf,ENSMUSG00000054400,protein_coding -39055,Slfn5,ENSMUSG00000054404,protein_coding -14886,Dnajc8,ENSMUSG00000054405,protein_coding -38805,Olfr411,ENSMUSG00000054406,protein_coding -30341,Spcs3,ENSMUSG00000054408,protein_coding -45817,Tmem74,ENSMUSG00000054409,protein_coding -12020,Slc30a7,ENSMUSG00000054414,protein_coding -18713,Cyp3a44,ENSMUSG00000054417,protein_coding -20293,Fabp1,ENSMUSG00000054422,protein_coding -32140,Cadps,ENSMUSG00000054423,protein_coding -14884,Atpif1,ENSMUSG00000054428,protein_coding -19556,Olfr450,ENSMUSG00000054431,protein_coding -18066,Tmem120b,ENSMUSG00000054434,protein_coding -19673,Gimap4,ENSMUSG00000054435,protein_coding -19247,Cpa1,ENSMUSG00000054446,protein_coding -28265,Aes,ENSMUSG00000054452,protein_coding -7447,Sytl5,ENSMUSG00000054453,protein_coding -7030,Vapb,ENSMUSG00000054455,protein_coding -42946,Vsnl1,ENSMUSG00000054459,protein_coding -50764,Lclat1,ENSMUSG00000054469,protein_coding -20291,Thnsl2,ENSMUSG00000054474,protein_coding -53361,Kcnn2,ENSMUSG00000054477,protein_coding -5888,Tmem62,ENSMUSG00000054484,protein_coding -21636,Tas2r130,ENSMUSG00000054497,protein_coding -25052,Olfr308,ENSMUSG00000054498,protein_coding -23097,Dedd2,ENSMUSG00000054499,protein_coding -33543,Parp4,ENSMUSG00000054509,protein_coding -40066,Trim65,ENSMUSG00000054517,protein_coding -38289,Zfp867,ENSMUSG00000054519,protein_coding -16296,Sh3bp2,ENSMUSG00000054520,protein_coding -54495,Ms4a5,ENSMUSG00000054523,protein_coding -54648,Olfr1500,ENSMUSG00000054526,protein_coding -17085,Tmprss11e,ENSMUSG00000054537,protein_coding -1536,Ugt1a6a,ENSMUSG00000054545,protein_coding -26402,Adam12,ENSMUSG00000054555,protein_coding -25583,Usp17la,ENSMUSG00000054568,protein_coding -4630,Pla2r1,ENSMUSG00000054580,protein_coding -6790,Pabpc1l,ENSMUSG00000054582,protein_coding -22055,Oscar,ENSMUSG00000054594,protein_coding -16784,9130230L23Rik,ENSMUSG00000054598,protein_coding -48288,Cggbp1,ENSMUSG00000054604,protein_coding -54136,Kdm2a,ENSMUSG00000054611,protein_coding -26440,Mgmt,ENSMUSG00000054612,protein_coding -46961,Mettl7a1,ENSMUSG00000054619,protein_coding -8035,Xlr,ENSMUSG00000054626,protein_coding -17104,Ugt2b5,ENSMUSG00000054630,protein_coding -50891,Slc8a1,ENSMUSG00000054640,protein_coding -19962,Mmrn1,ENSMUSG00000054641,protein_coding -30561,Zfp869,ENSMUSG00000054648,protein_coding -13096,Pm20d2,ENSMUSG00000054659,protein_coding -26563,Ano9,ENSMUSG00000054662,protein_coding -49726,Olfr63,ENSMUSG00000054666,protein_coding -9409,Irs4,ENSMUSG00000054667,protein_coding -26508,5830411N06Rik,ENSMUSG00000054672,protein_coding -17763,Tmem119,ENSMUSG00000054675,protein_coding -23651,1600014C10Rik,ENSMUSG00000054676,protein_coding -13098,Srsf12,ENSMUSG00000054679,protein_coding -12417,Emcn,ENSMUSG00000054690,protein_coding -35985,Adam10,ENSMUSG00000054693,protein_coding -1309,Ap1s3,ENSMUSG00000054702,protein_coding -28272,Ankrd24,ENSMUSG00000054708,protein_coding -22494,Zscan22,ENSMUSG00000054715,protein_coding -26198,Zfp771,ENSMUSG00000054716,protein_coding -30384,Hmgb2,ENSMUSG00000054717,protein_coding -17505,Lrrc8c,ENSMUSG00000054720,protein_coding -50532,Vmac,ENSMUSG00000054723,protein_coding -8031,1700013H16Rik,ENSMUSG00000054727,protein_coding -41273,Phactr1,ENSMUSG00000054728,protein_coding -33725,Msra,ENSMUSG00000054733,protein_coding -7593,Zfp182,ENSMUSG00000054737,protein_coding -52947,Gm9955,ENSMUSG00000054745,protein_coding -25997,Abca15,ENSMUSG00000054746,protein_coding -13502,Fsd1l,ENSMUSG00000054752,protein_coding -22025,AU018091,ENSMUSG00000054753,protein_coding -40354,Akr1c20,ENSMUSG00000054757,protein_coding -26623,Krtap5-2,ENSMUSG00000054759,protein_coding -33696,Defb42,ENSMUSG00000054763,protein_coding -30211,Mtnr1a,ENSMUSG00000054764,protein_coding -4136,Set,ENSMUSG00000054766,protein_coding -875,Kctd18,ENSMUSG00000054770,protein_coding -36956,Klhl18,ENSMUSG00000054792,protein_coding -23057,Cadm4,ENSMUSG00000054793,protein_coding -23279,Actn4,ENSMUSG00000054808,protein_coding -16935,Usp46,ENSMUSG00000054814,protein_coding -29919,Nsd3,ENSMUSG00000054823,protein_coding -55044,Cyp2c50,ENSMUSG00000054827,protein_coding -36950,Elp6,ENSMUSG00000054836,protein_coding -55378,Atrnl1,ENSMUSG00000054843,protein_coding -8077,Smim10l2a,ENSMUSG00000054850,protein_coding -46872,Rnd1,ENSMUSG00000054855,protein_coding -46672,Tafa5,ENSMUSG00000054863,protein_coding -37259,Tmem158,ENSMUSG00000054871,protein_coding -54222,Pcnx3,ENSMUSG00000054874,protein_coding -13276,4930578G10Rik,ENSMUSG00000054885,protein_coding -41180,Dsp,ENSMUSG00000054889,protein_coding -40678,Olfr1535,ENSMUSG00000054890,protein_coding -16900,Txk,ENSMUSG00000054892,protein_coding -22203,Zfp667,ENSMUSG00000054893,protein_coding -43867,Dmac2l,ENSMUSG00000054894,protein_coding -50877,Arhgef33,ENSMUSG00000054901,protein_coding -47923,Stfa3,ENSMUSG00000054905,protein_coding -16746,Klhl5,ENSMUSG00000054920,protein_coding -40671,Zkscan4,ENSMUSG00000054931,protein_coding -17186,Afp,ENSMUSG00000054932,protein_coding -28919,Kcnmb4,ENSMUSG00000054934,protein_coding -22216,Olfr1346,ENSMUSG00000054938,protein_coding -47171,Zfp174,ENSMUSG00000054939,protein_coding -50130,Olfr137,ENSMUSG00000054940,protein_coding -12647,Miga1,ENSMUSG00000054942,protein_coding -50206,9130008F23Rik,ENSMUSG00000054951,protein_coding -14613,Nt5c1a,ENSMUSG00000054958,protein_coding -21919,Lmntd1,ENSMUSG00000054966,protein_coding -46287,Zfp647,ENSMUSG00000054967,protein_coding -1337,Nyap2,ENSMUSG00000054976,protein_coding -35887,Kbtbd13,ENSMUSG00000054978,protein_coding -37344,Sec14l3,ENSMUSG00000054986,protein_coding -10082,Agtr1b,ENSMUSG00000054988,protein_coding -9266,AV320801,ENSMUSG00000054994,protein_coding -54267,Naaladl1,ENSMUSG00000054999,protein_coding -21191,Lrtm2,ENSMUSG00000055003,protein_coding -37653,A830031A19Rik,ENSMUSG00000055010,protein_coding -1569,Agap1,ENSMUSG00000055013,protein_coding -46762,Cntn1,ENSMUSG00000055022,protein_coding -46483,Ep300,ENSMUSG00000055024,protein_coding -24247,Gabrg3,ENSMUSG00000055026,protein_coding -20294,Smyd1,ENSMUSG00000055027,protein_coding -11290,Sprr2e,ENSMUSG00000055030,protein_coding -3034,Olfr420,ENSMUSG00000055033,protein_coding -46285,Commd5,ENSMUSG00000055041,protein_coding -55048,Pdlim1,ENSMUSG00000055044,protein_coding -28253,Nfic,ENSMUSG00000055053,protein_coding -46410,Ddx17,ENSMUSG00000055065,protein_coding -3121,Smyd3,ENSMUSG00000055067,protein_coding -35610,Rab39,ENSMUSG00000055069,protein_coding -24255,Gabra5,ENSMUSG00000055078,protein_coding -4367,Olfr354,ENSMUSG00000055088,protein_coding -53339,Spink6,ENSMUSG00000055095,protein_coding -23831,Zfp819,ENSMUSG00000055102,protein_coding -28666,Phxr2,ENSMUSG00000055108,protein_coding -9683,Gm15155,ENSMUSG00000055109,protein_coding -25869,Arntl,ENSMUSG00000055116,protein_coding -32933,Cgrrf1,ENSMUSG00000055128,protein_coding -40568,Sugct,ENSMUSG00000055137,protein_coding -30707,Klf2,ENSMUSG00000055148,protein_coding -22207,Zfp78,ENSMUSG00000055150,protein_coding -28987,Ifng,ENSMUSG00000055170,protein_coding -21340,C1ra,ENSMUSG00000055172,protein_coding -6365,Cstl1,ENSMUSG00000055177,protein_coding -2117,Fam72a,ENSMUSG00000055184,protein_coding -23853,Klk15,ENSMUSG00000055193,protein_coding -42633,Actbl2,ENSMUSG00000055194,protein_coding -1227,Fev,ENSMUSG00000055197,protein_coding -14333,Gm12830,ENSMUSG00000055198,protein_coding -23200,Sertad3,ENSMUSG00000055200,protein_coding -49701,Zfp811,ENSMUSG00000055202,protein_coding -17182,Ankrd17,ENSMUSG00000055204,protein_coding -14335,Foxd2,ENSMUSG00000055210,protein_coding -3059,Pld5,ENSMUSG00000055214,protein_coding -41737,Zfp935,ENSMUSG00000055228,protein_coding -16028,Wdr86,ENSMUSG00000055235,protein_coding -20362,Kcmf1,ENSMUSG00000055239,protein_coding -49730,Zfp101,ENSMUSG00000055240,protein_coding -41620,Ntrk2,ENSMUSG00000055254,protein_coding -13552,Tmem245,ENSMUSG00000055296,protein_coding -41685,Ctsj,ENSMUSG00000055298,protein_coding -12445,Adh7,ENSMUSG00000055301,protein_coding -16349,Mrfap1,ENSMUSG00000055302,protein_coding -23042,Zfp93,ENSMUSG00000055305,protein_coding -5589,Them7,ENSMUSG00000055312,protein_coding -40665,Pgbd1,ENSMUSG00000055313,protein_coding -26304,Sec23ip,ENSMUSG00000055319,protein_coding -25862,Tead1,ENSMUSG00000055320,protein_coding -1189,Tns1,ENSMUSG00000055322,protein_coding -38170,Fat2,ENSMUSG00000055333,protein_coding -35684,Snupn,ENSMUSG00000055334,protein_coding -41957,Zfp457,ENSMUSG00000055341,protein_coding -9865,4933400A11Rik,ENSMUSG00000055357,protein_coding -40494,Prl2c5,ENSMUSG00000055360,protein_coding -31119,Slc6a2,ENSMUSG00000055368,protein_coding -4552,Stam2,ENSMUSG00000055371,protein_coding -13030,Fut9,ENSMUSG00000055373,protein_coding -17604,Rnf212,ENSMUSG00000055385,protein_coding -15399,Fbxo6,ENSMUSG00000055401,protein_coding -25274,Map6,ENSMUSG00000055407,protein_coding -24180,Nell1,ENSMUSG00000055409,protein_coding -37859,Atp10b,ENSMUSG00000055415,protein_coding -34119,Pcdh9,ENSMUSG00000055421,protein_coding -19939,Nap1l5,ENSMUSG00000055430,protein_coding -31569,Maf,ENSMUSG00000055435,protein_coding -12743,Srsf11,ENSMUSG00000055436,protein_coding -48117,Cd47,ENSMUSG00000055447,protein_coding -50754,Alk,ENSMUSG00000055471,protein_coding -41955,Zfp458,ENSMUSG00000055480,protein_coding -6659,Soga1,ENSMUSG00000055485,protein_coding -24193,Ano5,ENSMUSG00000055489,protein_coding -55202,Pprc1,ENSMUSG00000055491,protein_coding -26863,Epm2a,ENSMUSG00000055493,protein_coding -28098,Vmn2r81,ENSMUSG00000055515,protein_coding -55338,Gucy2g,ENSMUSG00000055523,protein_coding -28950,Cpsf6,ENSMUSG00000055531,protein_coding -32377,Zcchc24,ENSMUSG00000055538,protein_coding -48255,Epha6,ENSMUSG00000055540,protein_coding -22083,Lair1,ENSMUSG00000055541,protein_coding -37934,Timd4,ENSMUSG00000055546,protein_coding -2490,Apobec4,ENSMUSG00000055547,protein_coding -30604,Kxd1,ENSMUSG00000055553,protein_coding -8030,4930502E18Rik,ENSMUSG00000055555,protein_coding -41967,Zfp459,ENSMUSG00000055560,protein_coding -53336,Spink5,ENSMUSG00000055561,protein_coding -1082,Unc80,ENSMUSG00000055567,protein_coding -25055,Olfr305,ENSMUSG00000055571,protein_coding -21705,5530400C23Rik,ENSMUSG00000055594,protein_coding -48936,Tcp10b,ENSMUSG00000055602,protein_coding -37764,Hba-x,ENSMUSG00000055609,protein_coding -25053,Olfr307,ENSMUSG00000055610,protein_coding -4798,Cdca7,ENSMUSG00000055612,protein_coding -26560,B4galnt4,ENSMUSG00000055629,protein_coding -4184,Hmcn2,ENSMUSG00000055632,protein_coding -22155,Zfp580,ENSMUSG00000055633,protein_coding -34157,Dach1,ENSMUSG00000055639,protein_coding -25536,Ubqln5,ENSMUSG00000055643,protein_coding -24786,Klhl25,ENSMUSG00000055652,protein_coding -7985,Gpc3,ENSMUSG00000055653,protein_coding -51075,Mettl4,ENSMUSG00000055660,protein_coding -38725,Zzef1,ENSMUSG00000055670,protein_coding -29653,Kbtbd11,ENSMUSG00000055675,protein_coding -41688,Ctsr,ENSMUSG00000055679,protein_coding -30592,Cope,ENSMUSG00000055681,protein_coding -9748,Gja6,ENSMUSG00000055691,protein_coding -47436,Tmem191c,ENSMUSG00000055692,protein_coding -30598,Klhl26,ENSMUSG00000055707,protein_coding -34229,Slain1,ENSMUSG00000055717,protein_coding -35719,Ubl7,ENSMUSG00000055720,protein_coding -25883,Rras2,ENSMUSG00000055723,protein_coding -17347,Paqr3,ENSMUSG00000055725,protein_coding -31319,Ces2a,ENSMUSG00000055730,protein_coding -9106,Nap1l3,ENSMUSG00000055733,protein_coding -45299,Ghr,ENSMUSG00000055737,protein_coding -46614,Rtl6,ENSMUSG00000055745,protein_coding -9663,Magea2,ENSMUSG00000055746,protein_coding -46033,Gsdmc4,ENSMUSG00000055748,protein_coding -50873,Gemin6,ENSMUSG00000055760,protein_coding -12977,Nkain3,ENSMUSG00000055761,protein_coding -46210,Eef1d,ENSMUSG00000055762,protein_coding -38485,Myh8,ENSMUSG00000055775,protein_coding -7980,Usp26,ENSMUSG00000055780,protein_coding -46748,Abcd2,ENSMUSG00000055782,protein_coding -48209,E330017A01Rik,ENSMUSG00000055789,protein_coding -20355,Tcf7l1,ENSMUSG00000055799,protein_coding -39776,Fmnl1,ENSMUSG00000055805,protein_coding -22111,Dnaaf3,ENSMUSG00000055809,protein_coding -48510,Cldn17,ENSMUSG00000055811,protein_coding -50912,Mta3,ENSMUSG00000055817,protein_coding -35230,Olfr967,ENSMUSG00000055820,protein_coding -23049,Tescl,ENSMUSG00000055826,protein_coding -46031,Gsdmc3,ENSMUSG00000055827,protein_coding -31530,Zfp1,ENSMUSG00000055835,protein_coding -4371,Olfr357,ENSMUSG00000055838,protein_coding -49244,Elob,ENSMUSG00000055839,protein_coding -20342,Rnf181,ENSMUSG00000055850,protein_coding -28196,Izumo4,ENSMUSG00000055862,protein_coding -11807,Tafa3,ENSMUSG00000055865,protein_coding -1612,Per2,ENSMUSG00000055866,protein_coding -20287,Foxi3,ENSMUSG00000055874,protein_coding -7228,Abhd16b,ENSMUSG00000055882,protein_coding -43788,Fancm,ENSMUSG00000055884,protein_coding -11883,Ubl4b,ENSMUSG00000055891,protein_coding -54516,Oosp2,ENSMUSG00000055895,protein_coding -14392,Tmem69,ENSMUSG00000055900,protein_coding -20341,Tmem150a,ENSMUSG00000055912,protein_coding -43480,Zfp277,ENSMUSG00000055917,protein_coding -17006,Aasdh,ENSMUSG00000055923,protein_coding -5830,Gm14137,ENSMUSG00000055926,protein_coding -31082,Fto,ENSMUSG00000055932,protein_coding -54520,Oosp3,ENSMUSG00000055933,protein_coding -39531,Krt28,ENSMUSG00000055937,protein_coding -22568,Obox7,ENSMUSG00000055942,protein_coding -5724,Emc7,ENSMUSG00000055943,protein_coding -48861,Prr18,ENSMUSG00000055945,protein_coding -14308,Skint4,ENSMUSG00000055960,protein_coding -12896,Triqk,ENSMUSG00000055963,protein_coding -38755,Olfr378,ENSMUSG00000055971,protein_coding -30098,Cldn23,ENSMUSG00000055976,protein_coding -24032,Fut2,ENSMUSG00000055978,protein_coding -1344,Irs1,ENSMUSG00000055980,protein_coding -18697,Zkscan5,ENSMUSG00000055991,protein_coding -31045,Nod2,ENSMUSG00000055994,protein_coding -15751,9330182L06Rik,ENSMUSG00000056004,protein_coding -39312,Gm11541,ENSMUSG00000056008,protein_coding -18527,A430033K04Rik,ENSMUSG00000056014,protein_coding -31901,Ccdc7b,ENSMUSG00000056018,protein_coding -30679,Zfp709,ENSMUSG00000056019,protein_coding -12544,Clca3a1,ENSMUSG00000056025,protein_coding -18715,Cyp3a11,ENSMUSG00000056035,protein_coding -23612,Rgs9bp,ENSMUSG00000056043,protein_coding -3224,Mia3,ENSMUSG00000056050,protein_coding -11256,S100a8,ENSMUSG00000056054,protein_coding -1518,Sag,ENSMUSG00000056055,protein_coding -45569,Otulinl,ENSMUSG00000056069,protein_coding -11257,S100a9,ENSMUSG00000056071,protein_coding -27507,Grik2,ENSMUSG00000056073,protein_coding -18574,Eif3b,ENSMUSG00000056076,protein_coding -54926,Lipm,ENSMUSG00000056078,protein_coding -20332,St3gal5,ENSMUSG00000056091,protein_coding -4534,Tas2r134,ENSMUSG00000056115,protein_coding -49967,H2-T22,ENSMUSG00000056116,protein_coding -50829,Fez2,ENSMUSG00000056121,protein_coding -52900,B4galt6,ENSMUSG00000056124,protein_coding -53381,Ticam2,ENSMUSG00000056130,protein_coding -36314,Pgm3,ENSMUSG00000056131,protein_coding -48841,Unc93a2,ENSMUSG00000056133,protein_coding -25546,Trim34a,ENSMUSG00000056144,protein_coding -29184,Rdh9,ENSMUSG00000056148,protein_coding -54031,Socs6,ENSMUSG00000056153,protein_coding -30916,Nanos3,ENSMUSG00000056155,protein_coding -39281,Car10,ENSMUSG00000056158,protein_coding -53991,Cndp1,ENSMUSG00000056162,protein_coding -37036,Cnot10,ENSMUSG00000056167,protein_coding -14697,Col8a2,ENSMUSG00000056174,protein_coding -46851,Olfr283,ENSMUSG00000056184,protein_coding -54212,Snx32,ENSMUSG00000056185,protein_coding -20376,4931417E11Rik,ENSMUSG00000056197,protein_coding -54211,Cfl1,ENSMUSG00000056201,protein_coding -19536,Tas2r135,ENSMUSG00000056203,protein_coding -30612,Pgpep1,ENSMUSG00000056204,protein_coding -55192,Npm3,ENSMUSG00000056209,protein_coding -2044,R3hdm1,ENSMUSG00000056211,protein_coding -53893,Pard6g,ENSMUSG00000056214,protein_coding -19298,Lrguk,ENSMUSG00000056215,protein_coding -23586,Cebpg,ENSMUSG00000056216,protein_coding -2433,Pla2g4a,ENSMUSG00000056220,protein_coding -41592,Spock1,ENSMUSG00000056222,protein_coding -41821,Spata31,ENSMUSG00000056223,protein_coding -29557,Cars2,ENSMUSG00000056228,protein_coding -32525,Ncoa4,ENSMUSG00000056234,protein_coding -46057,Kcnq3,ENSMUSG00000056258,protein_coding -11862,Lrif1,ENSMUSG00000056260,protein_coding -36567,Cep70,ENSMUSG00000056267,protein_coding -2335,Dennd1b,ENSMUSG00000056268,protein_coding -11276,Prr9,ENSMUSG00000056270,protein_coding -35711,Lman1l,ENSMUSG00000056271,protein_coding -7959,Olfr1322,ENSMUSG00000056281,protein_coding -54503,Ms4a4b,ENSMUSG00000056290,protein_coding -46029,Gsdmc2,ENSMUSG00000056293,protein_coding -32151,Synpr,ENSMUSG00000056296,protein_coding -15332,Zfp981,ENSMUSG00000056300,protein_coding -20338,Usp39,ENSMUSG00000056305,protein_coding -10585,Sertm1,ENSMUSG00000056306,protein_coding -18253,Tyw1,ENSMUSG00000056310,protein_coding -29886,Tcim,ENSMUSG00000056313,protein_coding -38482,Myh1,ENSMUSG00000056328,protein_coding -37636,Usp34,ENSMUSG00000056342,protein_coding -48520,Krtap13-1,ENSMUSG00000056350,protein_coding -16058,Actr3b,ENSMUSG00000056367,protein_coding -20337,Sftpb,ENSMUSG00000056370,protein_coding -27072,Taar1,ENSMUSG00000056379,protein_coding -8283,Gpr50,ENSMUSG00000056380,protein_coding -23748,AI987944,ENSMUSG00000056383,protein_coding -22521,Lig1,ENSMUSG00000056394,protein_coding -49345,Prss34,ENSMUSG00000056399,protein_coding -18539,Adap1,ENSMUSG00000056413,protein_coding -47719,Uts2b,ENSMUSG00000056423,protein_coding -30522,Nat3,ENSMUSG00000056426,protein_coding -37809,Slit3,ENSMUSG00000056427,protein_coding -20353,Tgoln1,ENSMUSG00000056429,protein_coding -4893,Cyct,ENSMUSG00000056436,protein_coding -40481,Prl2c3,ENSMUSG00000056457,protein_coding -44869,Mok,ENSMUSG00000056458,protein_coding -43995,Zbtb25,ENSMUSG00000056459,protein_coding -10664,Med12l,ENSMUSG00000056476,protein_coding -54172,Cd248,ENSMUSG00000056481,protein_coding -4827,Chn1,ENSMUSG00000056486,protein_coding -46964,Mettl7a2,ENSMUSG00000056487,protein_coding -50226,Adgrf5,ENSMUSG00000056492,protein_coding -18601,Foxk1,ENSMUSG00000056493,protein_coding -12963,Cngb3,ENSMUSG00000056494,protein_coding -11016,Tmem154,ENSMUSG00000056498,protein_coding -6926,Cebpb,ENSMUSG00000056501,protein_coding -44865,1700001K19Rik,ENSMUSG00000056508,protein_coding -24099,Gm9999,ENSMUSG00000056509,protein_coding -50639,Rab31,ENSMUSG00000056515,protein_coding -14888,Ptafr,ENSMUSG00000056529,protein_coding -17579,Ccdc18,ENSMUSG00000056531,protein_coding -1797,Pign,ENSMUSG00000056536,protein_coding -8886,Rlim,ENSMUSG00000056537,protein_coding -6487,Defb21,ENSMUSG00000056544,protein_coding -45254,Ptprn2,ENSMUSG00000056553,protein_coding -38238,Olfr324,ENSMUSG00000056564,protein_coding -2895,Mpz,ENSMUSG00000056569,protein_coding -18849,Zar1l,ENSMUSG00000056586,protein_coding -23829,Zfp658,ENSMUSG00000056592,protein_coding -14918,Trnp1,ENSMUSG00000056596,protein_coding -38325,Drc3,ENSMUSG00000056598,protein_coding -50053,Olfr90,ENSMUSG00000056600,protein_coding -18844,Fry,ENSMUSG00000056602,protein_coding -47040,Krt72,ENSMUSG00000056605,protein_coding -31064,Chd9,ENSMUSG00000056608,protein_coding -54314,Ppp1r14b,ENSMUSG00000056612,protein_coding -35482,4931429L15Rik,ENSMUSG00000056617,protein_coding -54315,Fkbp2,ENSMUSG00000056629,protein_coding -52894,Dsg3,ENSMUSG00000056632,protein_coding -20752,Chst13,ENSMUSG00000056643,protein_coding -39382,Hoxb8,ENSMUSG00000056648,protein_coding -46325,Apol8,ENSMUSG00000056656,protein_coding -46149,Them6,ENSMUSG00000056665,protein_coding -20351,Retsat,ENSMUSG00000056666,protein_coding -53295,Prelid2,ENSMUSG00000056671,protein_coding -51095,Kdm5d,ENSMUSG00000056673,protein_coding -9603,Gpr173,ENSMUSG00000056679,protein_coding -49495,Ilrun,ENSMUSG00000056692,protein_coding -22223,Olfr1350,ENSMUSG00000056696,protein_coding -20350,Elmod3,ENSMUSG00000056698,protein_coding -48556,Krtap7-1,ENSMUSG00000056706,protein_coding -2539,Ier5,ENSMUSG00000056708,protein_coding -3508,Gm13199,ENSMUSG00000056718,protein_coding -36961,Nbeal2,ENSMUSG00000056724,protein_coding -41673,Ctsll3,ENSMUSG00000056728,protein_coding -20349,Capg,ENSMUSG00000056737,protein_coding -41472,Nfil3,ENSMUSG00000056749,protein_coding -38466,Dnah9,ENSMUSG00000056752,protein_coding -20996,Grm7,ENSMUSG00000056755,protein_coding -29028,Hmga2,ENSMUSG00000056758,protein_coding -122,Cspp1,ENSMUSG00000056763,protein_coding -44706,Setd3,ENSMUSG00000056770,protein_coding -25589,Olfr667,ENSMUSG00000056782,protein_coding -53631,St8sia3,ENSMUSG00000056812,protein_coding -8241,Gm6812,ENSMUSG00000056815,protein_coding -31857,Tsnax,ENSMUSG00000056820,protein_coding -47539,Olfr166,ENSMUSG00000056822,protein_coding -6862,Zfp663,ENSMUSG00000056824,protein_coding -54680,Foxb2,ENSMUSG00000056829,protein_coding -19366,Ttc26,ENSMUSG00000056832,protein_coding -47079,Pcbp2,ENSMUSG00000056851,protein_coding -29278,Olfr765,ENSMUSG00000056853,protein_coding -8994,Pou3f4,ENSMUSG00000056854,protein_coding -26467,Jakmip3,ENSMUSG00000056856,protein_coding -54650,Olfr1502,ENSMUSG00000056858,protein_coding -25679,Olfr702,ENSMUSG00000056863,protein_coding -680,Gulp1,ENSMUSG00000056870,protein_coding -37062,Gadl1,ENSMUSG00000056880,protein_coding -26519,Olfr533,ENSMUSG00000056883,protein_coding -26632,Gm4559,ENSMUSG00000056885,protein_coding -28864,Glipr1,ENSMUSG00000056888,protein_coding -38264,Hist3h2ba,ENSMUSG00000056895,protein_coding -43487,Immp2l,ENSMUSG00000056899,protein_coding -10220,Usp13,ENSMUSG00000056900,protein_coding -21674,Tas2r102,ENSMUSG00000056901,protein_coding -49096,Vmn2r107,ENSMUSG00000056910,protein_coding -28716,1700017N19Rik,ENSMUSG00000056912,protein_coding -54221,Sipa1,ENSMUSG00000056917,protein_coding -36331,Cep162,ENSMUSG00000056919,protein_coding -38778,Olfr394,ENSMUSG00000056921,protein_coding -21684,Tas2r113,ENSMUSG00000056926,protein_coding -39773,Acbd4,ENSMUSG00000056938,protein_coding -6525,Commd7,ENSMUSG00000056941,protein_coding -25776,Olfr512,ENSMUSG00000056946,protein_coding -10604,Mab21l1,ENSMUSG00000056947,protein_coding -21066,Tatdn2,ENSMUSG00000056952,protein_coding -38250,Olfr315,ENSMUSG00000056959,protein_coding -35163,Olfr919,ENSMUSG00000056961,protein_coding -40106,Jmjd6,ENSMUSG00000056962,protein_coding -18481,Gjc3,ENSMUSG00000056966,protein_coding -24543,Magel2,ENSMUSG00000056972,protein_coding -31125,Ces1d,ENSMUSG00000056973,protein_coding -23412,Hamp2,ENSMUSG00000056978,protein_coding -44037,Fam71d,ENSMUSG00000056987,protein_coding -5255,Olfr1178,ENSMUSG00000056995,protein_coding -54989,Ide,ENSMUSG00000056999,protein_coding -9286,Nxf3,ENSMUSG00000057000,protein_coding -38484,Myh4,ENSMUSG00000057003,protein_coding -15630,Vmn2r-ps159,ENSMUSG00000057021,protein_coding -2351,Cfhr1,ENSMUSG00000057037,protein_coding -7029,1700010B08Rik,ENSMUSG00000057047,protein_coding -38784,Olfr397,ENSMUSG00000057050,protein_coding -38660,Inca1,ENSMUSG00000057054,protein_coding -39396,Skap1,ENSMUSG00000057058,protein_coding -31872,Slc35f3,ENSMUSG00000057060,protein_coding -25063,Olfr297,ENSMUSG00000057067,protein_coding -17252,Fam47e,ENSMUSG00000057068,protein_coding -40470,Ero1lb,ENSMUSG00000057069,protein_coding -3346,Spata45,ENSMUSG00000057072,protein_coding -31130,Ces1g,ENSMUSG00000057074,protein_coding -23425,Fxyd3,ENSMUSG00000057092,protein_coding -23212,Zfp607b,ENSMUSG00000057093,protein_coding -37888,Ebf1,ENSMUSG00000057098,protein_coding -23033,Zfp180,ENSMUSG00000057101,protein_coding -20584,Nat8f1,ENSMUSG00000057103,protein_coding -4309,Cntrl,ENSMUSG00000057110,protein_coding -37784,Npm1,ENSMUSG00000057113,protein_coding -29633,AF366264,ENSMUSG00000057116,protein_coding -11591,Gja5,ENSMUSG00000057123,protein_coding -53903,Txnl4a,ENSMUSG00000057130,protein_coding -33148,Rpgrip1,ENSMUSG00000057132,protein_coding -6738,Chd6,ENSMUSG00000057133,protein_coding -27818,Ado,ENSMUSG00000057134,protein_coding -38667,Scimp,ENSMUSG00000057135,protein_coding -19321,Tmem140,ENSMUSG00000057137,protein_coding -25553,Trim12c,ENSMUSG00000057143,protein_coding -5755,Dph6,ENSMUSG00000057147,protein_coding -5693,Olfr1301,ENSMUSG00000057149,protein_coding -33430,Homez,ENSMUSG00000057156,protein_coding -19488,Prss2,ENSMUSG00000057163,protein_coding -40933,Prl3d1,ENSMUSG00000057170,protein_coding -600,Rfx8,ENSMUSG00000057173,protein_coding -48535,Krtap19-9b,ENSMUSG00000057174,protein_coding -26228,Ccdc189,ENSMUSG00000057176,protein_coding -23101,Gsk3a,ENSMUSG00000057177,protein_coding -33059,Olfr747,ENSMUSG00000057179,protein_coding -39011,5730455P16Rik,ENSMUSG00000057181,protein_coding -4675,Scn3a,ENSMUSG00000057182,protein_coding -34794,AB124611,ENSMUSG00000057191,protein_coding -34786,Slc44a2,ENSMUSG00000057193,protein_coding -22722,Ceacam13,ENSMUSG00000057195,protein_coding -49131,Vmn1r234,ENSMUSG00000057203,protein_coding -5075,Olfr1028,ENSMUSG00000057207,protein_coding -40029,Armc7,ENSMUSG00000057219,protein_coding -30415,Aadat,ENSMUSG00000057228,protein_coding -23122,Dmac2,ENSMUSG00000057229,protein_coding -20628,Aak1,ENSMUSG00000057230,protein_coding -5633,Mettl15,ENSMUSG00000057234,protein_coding -14773,Rbbp4,ENSMUSG00000057236,protein_coding -54491,Ms4a13,ENSMUSG00000057240,protein_coding -49810,BC051142,ENSMUSG00000057246,protein_coding -44183,Bbof1,ENSMUSG00000057265,protein_coding -54649,Olfr1501,ENSMUSG00000057270,protein_coding -20603,Snrpg,ENSMUSG00000057278,protein_coding -13577,Musk,ENSMUSG00000057280,protein_coding -40100,St6galnac2,ENSMUSG00000057286,protein_coding -17445,Arhgap24,ENSMUSG00000057315,protein_coding -25461,Usp17ld,ENSMUSG00000057321,protein_coding -39988,Rpl38,ENSMUSG00000057322,protein_coding -1818,Bcl2,ENSMUSG00000057329,protein_coding -3069,Cep170,ENSMUSG00000057335,protein_coding -27682,Chst3,ENSMUSG00000057337,protein_coding -24037,Sphk2,ENSMUSG00000057342,protein_coding -46304,Apol9a,ENSMUSG00000057346,protein_coding -35204,Olfr948,ENSMUSG00000057349,protein_coding -660,Uxs1,ENSMUSG00000057363,protein_coding -34507,Birc2,ENSMUSG00000057367,protein_coding -14214,Yipf1,ENSMUSG00000057375,protein_coding -5730,Ryr3,ENSMUSG00000057378,protein_coding -21680,Tas2r123,ENSMUSG00000057381,protein_coding -48926,Mrpl18,ENSMUSG00000057388,protein_coding -41947,Zfp759,ENSMUSG00000057396,protein_coding -31123,Ces1c,ENSMUSG00000057400,protein_coding -9667,Cldn34b2,ENSMUSG00000057402,protein_coding -16268,Nsd2,ENSMUSG00000057406,protein_coding -49146,Zfp53,ENSMUSG00000057409,protein_coding -49380,Fam173a,ENSMUSG00000057411,protein_coding -49256,Dcpp3,ENSMUSG00000057417,protein_coding -8696,Las1l,ENSMUSG00000057421,protein_coding -35183,Olfr934,ENSMUSG00000057424,protein_coding -17108,Ugt2b37,ENSMUSG00000057425,protein_coding -9309,Kir3dl2,ENSMUSG00000057439,protein_coding -52728,Mpp7,ENSMUSG00000057440,protein_coding -50129,Olfr138,ENSMUSG00000057443,protein_coding -41690,Cts8,ENSMUSG00000057446,protein_coding -5281,Olfr1205,ENSMUSG00000057447,protein_coding -23067,Lypd3,ENSMUSG00000057454,protein_coding -52987,Rit2,ENSMUSG00000057455,protein_coding -9691,Phex,ENSMUSG00000057457,protein_coding -25456,Olfr591,ENSMUSG00000057461,protein_coding -1632,Olfr1415,ENSMUSG00000057464,protein_coding -24084,Saa2,ENSMUSG00000057465,protein_coding -43031,E2f6,ENSMUSG00000057469,protein_coding -20602,Fam136a,ENSMUSG00000057497,protein_coding -54601,Olfr1459,ENSMUSG00000057503,protein_coding -55143,Bloc1s2,ENSMUSG00000057506,protein_coding -23020,Vmn1r177,ENSMUSG00000057513,protein_coding -39343,Spop,ENSMUSG00000057522,protein_coding -15070,Ece1,ENSMUSG00000057530,protein_coding -41315,Dtnbp1,ENSMUSG00000057531,protein_coding -39245,Elobl,ENSMUSG00000057534,protein_coding -25048,Olfr310,ENSMUSG00000057540,protein_coding -15983,Pus7,ENSMUSG00000057541,protein_coding -34696,Zfp317,ENSMUSG00000057551,protein_coding -40467,Lgals8,ENSMUSG00000057554,protein_coding -53383,Eif1a,ENSMUSG00000057561,protein_coding -33169,Olfr1508,ENSMUSG00000057564,protein_coding -14775,Zbtb8os,ENSMUSG00000057572,protein_coding -20736,Vmn1r48,ENSMUSG00000057592,protein_coding -40226,Arl16,ENSMUSG00000057594,protein_coding -25560,Trim30d,ENSMUSG00000057596,protein_coding -21005,Lmcd1,ENSMUSG00000057604,protein_coding -32507,Colq,ENSMUSG00000057606,protein_coding -11308,Lce1a1,ENSMUSG00000057609,protein_coding -15894,Gnai1,ENSMUSG00000057614,protein_coding -8154,Ldoc1,ENSMUSG00000057615,protein_coding -15211,Prdm2,ENSMUSG00000057637,protein_coding -42089,Brd9,ENSMUSG00000057649,protein_coding -48530,Krtap19-2,ENSMUSG00000057650,protein_coding -38234,Olfr328,ENSMUSG00000057654,protein_coding -21390,Gapdh,ENSMUSG00000057666,protein_coding -22814,Bloc1s3,ENSMUSG00000057667,protein_coding -30885,Pkn1,ENSMUSG00000057672,protein_coding -39548,Gm11938,ENSMUSG00000057674,protein_coding -19647,Zfp746,ENSMUSG00000057691,protein_coding -20746,Vmn1r53,ENSMUSG00000057697,protein_coding -21689,Tas2r131,ENSMUSG00000057699,protein_coding -24953,Mex3b,ENSMUSG00000057706,protein_coding -36625,9630041A04Rik,ENSMUSG00000057710,protein_coding -140,A830018L16Rik,ENSMUSG00000057715,protein_coding -19405,Tmem178b,ENSMUSG00000057716,protein_coding -53298,Sh3rf2,ENSMUSG00000057719,protein_coding -14128,Lepr,ENSMUSG00000057722,protein_coding -39595,Krt33b,ENSMUSG00000057723,protein_coding -41050,Serpinb9g,ENSMUSG00000057726,protein_coding -28130,Prtn3,ENSMUSG00000057729,protein_coding -5262,Olfr1185-ps1,ENSMUSG00000057735,protein_coding -4133,Sptan1,ENSMUSG00000057738,protein_coding -15539,Megf6,ENSMUSG00000057751,protein_coding -21638,Tas2r106,ENSMUSG00000057754,protein_coding -5067,Olfr1022,ENSMUSG00000057761,protein_coding -42418,Gm6169,ENSMUSG00000057762,protein_coding -52811,Ankrd29,ENSMUSG00000057766,protein_coding -25590,Olfr668,ENSMUSG00000057770,protein_coding -11051,Mab21l2,ENSMUSG00000057777,protein_coding -38724,Cyb5d2,ENSMUSG00000057778,protein_coding -30591,Ddx49,ENSMUSG00000057788,protein_coding -49456,Bak1,ENSMUSG00000057789,protein_coding -50128,Olfr135,ENSMUSG00000057801,protein_coding -8619,1700084M14Rik,ENSMUSG00000057805,protein_coding -17364,Cfap299,ENSMUSG00000057816,protein_coding -54587,Olfr1446,ENSMUSG00000057817,protein_coding -50488,Zfp119a,ENSMUSG00000057835,protein_coding -8314,Xlr3a,ENSMUSG00000057836,protein_coding -21112,Rpl32,ENSMUSG00000057841,protein_coding -41959,Zfp595,ENSMUSG00000057842,protein_coding -48544,Krtap6-3,ENSMUSG00000057855,protein_coding -55420,Fam204a,ENSMUSG00000057858,protein_coding -50528,Rpl36,ENSMUSG00000057863,protein_coding -47246,Abat,ENSMUSG00000057880,protein_coding -38042,Olfr1375,ENSMUSG00000057890,protein_coding -22489,Zfp329,ENSMUSG00000057894,protein_coding -37243,Zfp105,ENSMUSG00000057895,protein_coding -37409,Camk2b,ENSMUSG00000057897,protein_coding -50220,Adgrf2,ENSMUSG00000057899,protein_coding -33048,Olfr739,ENSMUSG00000057903,protein_coding -3752,Cacnb2,ENSMUSG00000057914,protein_coding -36179,Gsta2,ENSMUSG00000057933,protein_coding -40063,Unc13d,ENSMUSG00000057948,protein_coding -44562,Itpk1,ENSMUSG00000057963,protein_coding -37783,Fgf18,ENSMUSG00000057967,protein_coding -36801,Sema3b,ENSMUSG00000057969,protein_coding -14327,Skint11,ENSMUSG00000057977,protein_coding -19877,Vmn1r12,ENSMUSG00000057981,protein_coding -34837,Zfp809,ENSMUSG00000057982,protein_coding -26531,Olfr541,ENSMUSG00000057997,protein_coding -11374,Tdpoz3,ENSMUSG00000058005,protein_coding -13084,Mdn1,ENSMUSG00000058006,protein_coding -17265,Sept11,ENSMUSG00000058013,protein_coding -25762,Olfr502,ENSMUSG00000058014,protein_coding -1830,Serpinb3d,ENSMUSG00000058017,protein_coding -31136,Ces5a,ENSMUSG00000058019,protein_coding -41260,Adtrp,ENSMUSG00000058022,protein_coding -22200,Zscan5b,ENSMUSG00000058028,protein_coding -49137,Vmn1r237,ENSMUSG00000058030,protein_coding -13677,4933430I17Rik,ENSMUSG00000058046,protein_coding -29829,Defb35,ENSMUSG00000058052,protein_coding -30437,Palld,ENSMUSG00000058056,protein_coding -46962,Mettl7a3,ENSMUSG00000058057,protein_coding -50039,Trim31,ENSMUSG00000058063,protein_coding -44712,Eml1,ENSMUSG00000058070,protein_coding -29336,Olfr818,ENSMUSG00000058071,protein_coding -2894,Sdhc,ENSMUSG00000058076,protein_coding -29349,Olfr825,ENSMUSG00000058084,protein_coding -41978,Zfp729b,ENSMUSG00000058093,protein_coding -46574,Nfam1,ENSMUSG00000058099,protein_coding -50113,Olfr127,ENSMUSG00000058114,protein_coding -19479,Gm5771,ENSMUSG00000058119,protein_coding -50007,H2-M10.3,ENSMUSG00000058124,protein_coding -34311,Tpm3-rs7,ENSMUSG00000058126,protein_coding -22463,Vmn1r82,ENSMUSG00000058132,protein_coding -11906,Gstm1,ENSMUSG00000058135,protein_coding -24627,Adamts17,ENSMUSG00000058145,protein_coding -8325,Xlr3c,ENSMUSG00000058147,protein_coding -53533,Chsy3,ENSMUSG00000058152,protein_coding -17728,Sez6l,ENSMUSG00000058153,protein_coding -48862,T2,ENSMUSG00000058159,protein_coding -37955,Gm5431,ENSMUSG00000058163,protein_coding -48540,Krtap6-1,ENSMUSG00000058172,protein_coding -34630,Smco4,ENSMUSG00000058173,protein_coding -10340,Gm5148,ENSMUSG00000058174,protein_coding -15552,Mmel1,ENSMUSG00000058183,protein_coding -15305,Zfp980,ENSMUSG00000058186,protein_coding -33057,Olfr746,ENSMUSG00000058188,protein_coding -34739,Zfp846,ENSMUSG00000058192,protein_coding -5190,Olfr1126,ENSMUSG00000058194,protein_coding -25513,Olfr66,ENSMUSG00000058200,protein_coding -44637,Serpina3k,ENSMUSG00000058207,protein_coding -54105,Gstp3,ENSMUSG00000058216,protein_coding -22670,Arhgap35,ENSMUSG00000058230,protein_coding -23415,Usf2,ENSMUSG00000058239,protein_coding -48625,Cryzl1,ENSMUSG00000058240,protein_coding -25770,Olfr506,ENSMUSG00000058244,protein_coding -41994,Gm10037,ENSMUSG00000058246,protein_coding -3387,Kcnh1,ENSMUSG00000058248,protein_coding -19422,Tas2r138,ENSMUSG00000058250,protein_coding -29343,Olfr822,ENSMUSG00000058251,protein_coding -9273,Tcp11x2,ENSMUSG00000058252,protein_coding -7453,Tspan7,ENSMUSG00000058254,protein_coding -40404,Idi1,ENSMUSG00000058258,protein_coding -44620,Serpina9,ENSMUSG00000058260,protein_coding -2632,Mrps14,ENSMUSG00000058267,protein_coding -35136,Olfr25,ENSMUSG00000058270,protein_coding -38806,Olfr412,ENSMUSG00000058275,protein_coding -38224,Gm12253,ENSMUSG00000058287,protein_coding -47066,Espl1,ENSMUSG00000058290,protein_coding -18523,Zfp68,ENSMUSG00000058291,protein_coding -21665,Prp2,ENSMUSG00000058295,protein_coding -27680,Spock2,ENSMUSG00000058297,protein_coding -27598,Mcm9,ENSMUSG00000058298,protein_coding -30595,Upf1,ENSMUSG00000058301,protein_coding -32235,Ube2e2,ENSMUSG00000058317,protein_coding -5420,Phf21a,ENSMUSG00000058318,protein_coding -26408,Dock1,ENSMUSG00000058325,protein_coding -8316,Xlr5a,ENSMUSG00000058328,protein_coding -41988,Zfp85,ENSMUSG00000058331,protein_coding -21677,Tas2r117,ENSMUSG00000058349,protein_coding -32489,Smim4,ENSMUSG00000058351,protein_coding -47033,Krt6a,ENSMUSG00000058354,protein_coding -30820,Abce1,ENSMUSG00000058355,protein_coding -48552,Krtap21-1,ENSMUSG00000058368,protein_coding -40846,Hist1h2bg,ENSMUSG00000058385,protein_coding -11773,Phtf1,ENSMUSG00000058388,protein_coding -49669,Rrp1b,ENSMUSG00000058392,protein_coding -29181,Gpr182,ENSMUSG00000058396,protein_coding -36970,Prss43,ENSMUSG00000058398,protein_coding -10291,Qrfpr,ENSMUSG00000058400,protein_coding -23321,Zfp420,ENSMUSG00000058402,protein_coding -562,Txndc9,ENSMUSG00000058407,protein_coding -25943,Syt17,ENSMUSG00000058420,protein_coding -17204,Cxcl2,ENSMUSG00000058427,protein_coding -49811,Btnl4,ENSMUSG00000058435,protein_coding -19228,Nrf1,ENSMUSG00000058440,protein_coding -46691,Panx2,ENSMUSG00000058441,protein_coding -35541,Rpl10-ps3,ENSMUSG00000058443,protein_coding -35830,Map2k5,ENSMUSG00000058444,protein_coding -19835,Znrf2,ENSMUSG00000058446,protein_coding -23332,Gm26920,ENSMUSG00000058447,protein_coding -26704,Dhcr7,ENSMUSG00000058454,protein_coding -54506,Gm8369,ENSMUSG00000058470,protein_coding -19323,Wdr91,ENSMUSG00000058486,protein_coding -18867,Kl,ENSMUSG00000058488,protein_coding -34717,Olfr869,ENSMUSG00000058491,protein_coding -15510,Rnf207,ENSMUSG00000058498,protein_coding -19516,Pip,ENSMUSG00000058499,protein_coding -15647,Fam133b,ENSMUSG00000058503,protein_coding -29317,Olfr801,ENSMUSG00000058513,protein_coding -35182,Olfr933,ENSMUSG00000058515,protein_coding -13652,Mup5,ENSMUSG00000058523,protein_coding -27642,AW822073,ENSMUSG00000058537,protein_coding -38932,Rpl23a,ENSMUSG00000058546,protein_coding -48095,Dppa4,ENSMUSG00000058550,protein_coding -17570,Rpl5,ENSMUSG00000058558,protein_coding -29821,Defb50,ENSMUSG00000058568,protein_coding -41551,Tmed9,ENSMUSG00000058569,protein_coding -34334,Gpc6,ENSMUSG00000058571,protein_coding -15193,Cela2a,ENSMUSG00000058579,protein_coding -46575,Serhl,ENSMUSG00000058586,protein_coding -36108,Tmod3,ENSMUSG00000058587,protein_coding -28487,Anks1b,ENSMUSG00000058589,protein_coding -3709,Fbh1,ENSMUSG00000058594,protein_coding -45645,Rpl30,ENSMUSG00000058600,protein_coding -29813,AY761184,ENSMUSG00000058618,protein_coding -6015,Adra2b,ENSMUSG00000058620,protein_coding -54730,Gda,ENSMUSG00000058624,protein_coding -50266,Capn11,ENSMUSG00000058626,protein_coding -35106,Olfr875,ENSMUSG00000058628,protein_coding -22182,Vmn1r63,ENSMUSG00000058631,protein_coding -22491,Zfp110,ENSMUSG00000058638,protein_coding -15889,Speer4f1,ENSMUSG00000058643,protein_coding -47054,Eif4b,ENSMUSG00000058655,protein_coding -45885,Samd12,ENSMUSG00000058656,protein_coding -34701,Olfr58,ENSMUSG00000058659,protein_coding -25502,Olfr69,ENSMUSG00000058662,protein_coding -1960,En1,ENSMUSG00000058665,protein_coding -43683,Nkx2-9,ENSMUSG00000058669,protein_coding -9160,4932411N23Rik,ENSMUSG00000058670,protein_coding -41075,Tubb2a,ENSMUSG00000058672,protein_coding -22334,Vmn2r51,ENSMUSG00000058685,protein_coding -32623,Ccser2,ENSMUSG00000058690,protein_coding -34683,Olfr846,ENSMUSG00000058692,protein_coding -50787,Memo1,ENSMUSG00000058704,protein_coding -20340,0610030E20Rik,ENSMUSG00000058706,protein_coding -23179,Egln2,ENSMUSG00000058709,protein_coding -2900,Fcer1g,ENSMUSG00000058715,protein_coding -23074,Lypd11,ENSMUSG00000058717,protein_coding -39549,Gm11937,ENSMUSG00000058725,protein_coding -39999,Cd300c,ENSMUSG00000058728,protein_coding -3148,Lin9,ENSMUSG00000058729,protein_coding -4001,Kcnt1,ENSMUSG00000058740,protein_coding -23106,Prr19,ENSMUSG00000058741,protein_coding -24046,Kcnj14,ENSMUSG00000058743,protein_coding -29439,Zfp958,ENSMUSG00000058748,protein_coding -37356,Osm,ENSMUSG00000058755,protein_coding -39506,Thra,ENSMUSG00000058756,protein_coding -25302,Rnf169,ENSMUSG00000058761,protein_coding -40698,Hist1h1b,ENSMUSG00000058773,protein_coding -6173,Cds2,ENSMUSG00000058793,protein_coding -47120,Nfe2,ENSMUSG00000058794,protein_coding -28847,Nap1l1,ENSMUSG00000058799,protein_coding -50066,Olfr97,ENSMUSG00000058802,protein_coding -27726,Col13a1,ENSMUSG00000058806,protein_coding -38229,Olfr331,ENSMUSG00000058807,protein_coding -22068,Pirb,ENSMUSG00000058818,protein_coding -35185,Olfr146,ENSMUSG00000058820,protein_coding -19204,Opn1sw,ENSMUSG00000058831,protein_coding -30602,Rex1bd,ENSMUSG00000058833,protein_coding -3875,Abi1,ENSMUSG00000058835,protein_coding -35209,Olfr952,ENSMUSG00000058856,protein_coding -53964,Zfp516,ENSMUSG00000058881,protein_coding -41943,Zfp708,ENSMUSG00000058883,protein_coding -5072,Olfr1025-ps1,ENSMUSG00000058884,protein_coding -26579,Deaf1,ENSMUSG00000058886,protein_coding -12292,Col25a1,ENSMUSG00000058897,protein_coding -41950,Rsl1,ENSMUSG00000058900,protein_coding -1634,Olfr1413,ENSMUSG00000058904,protein_coding -45406,C1qtnf3,ENSMUSG00000058914,protein_coding -9970,Slc10a5,ENSMUSG00000058921,protein_coding -53518,Ccdc192,ENSMUSG00000058925,protein_coding -54784,Gm10053,ENSMUSG00000058927,protein_coding -8079,Gm2174,ENSMUSG00000058932,protein_coding -13703,Tex48,ENSMUSG00000058935,protein_coding -12285,Cfi,ENSMUSG00000058952,protein_coding -26155,Fam57b,ENSMUSG00000058966,protein_coding -24071,Kcnc1,ENSMUSG00000058975,protein_coding -25473,Usp17lc,ENSMUSG00000058976,protein_coding -21211,Hdhd5,ENSMUSG00000058979,protein_coding -2993,Olfr1406,ENSMUSG00000058981,protein_coding -34002,Vwa8,ENSMUSG00000058997,protein_coding -47265,Grin2a,ENSMUSG00000059003,protein_coding -4879,Hnrnpa3,ENSMUSG00000059005,protein_coding -4247,Sh2d3c,ENSMUSG00000059013,protein_coding -19211,Kcp,ENSMUSG00000059022,protein_coding -5279,Olfr1201,ENSMUSG00000059023,protein_coding -50114,Olfr128,ENSMUSG00000059030,protein_coding -25735,Olfr482,ENSMUSG00000059031,protein_coding -53414,Eno1b,ENSMUSG00000059040,protein_coding -23861,Klk1b9,ENSMUSG00000059042,protein_coding -47170,Olfr15,ENSMUSG00000059043,protein_coding -13839,Frem1,ENSMUSG00000059049,protein_coding -44057,Rad51b,ENSMUSG00000059060,protein_coding -33062,Olfr749,ENSMUSG00000059069,protein_coding -24038,Rpl18,ENSMUSG00000059070,protein_coding -25873,Pth,ENSMUSG00000059077,protein_coding -25674,Olfr698,ENSMUSG00000059087,protein_coding -2887,Fcgr4,ENSMUSG00000059089,protein_coding -54652,Olfr1504,ENSMUSG00000059105,protein_coding -35222,Olfr961,ENSMUSG00000059106,protein_coding -26557,Ifitm6,ENSMUSG00000059108,protein_coding -5343,Olfr1263,ENSMUSG00000059112,protein_coding -48546,Gm10061,ENSMUSG00000059113,protein_coding -44249,Lrrc74a,ENSMUSG00000059114,protein_coding -26688,Nap1l4,ENSMUSG00000059119,protein_coding -23253,Ifnl2,ENSMUSG00000059128,protein_coding -29332,Olfr814,ENSMUSG00000059134,protein_coding -26528,Olfr539,ENSMUSG00000059136,protein_coding -49192,Zfp945,ENSMUSG00000059142,protein_coding -24797,Ntrk3,ENSMUSG00000059146,protein_coding -2133,Mfsd4a,ENSMUSG00000059149,protein_coding -39539,Krt40,ENSMUSG00000059169,protein_coding -4936,Pde1a,ENSMUSG00000059173,protein_coding -19750,Skap2,ENSMUSG00000059182,protein_coding -35890,Mtfmt,ENSMUSG00000059183,protein_coding -20852,Tafa1,ENSMUSG00000059187,protein_coding -35153,Olfr913,ENSMUSG00000059189,protein_coding -19192,Lep,ENSMUSG00000059201,protein_coding -9350,Il1rapl2,ENSMUSG00000059203,protein_coding -5202,Olfr1130,ENSMUSG00000059205,protein_coding -22359,Vmn1r71,ENSMUSG00000059206,protein_coding -49738,Hnrnpm,ENSMUSG00000059208,protein_coding -46879,Ddn,ENSMUSG00000059213,protein_coding -15243,Gm13084,ENSMUSG00000059218,protein_coding -29709,Defb4,ENSMUSG00000059230,protein_coding -35962,Foxb1,ENSMUSG00000059246,protein_coding -40122,Sept9,ENSMUSG00000059248,protein_coding -4354,Olfr345,ENSMUSG00000059251,protein_coding -33524,Gzmd,ENSMUSG00000059256,protein_coding -25854,Usp47,ENSMUSG00000059263,protein_coding -22663,Zc3h4,ENSMUSG00000059273,protein_coding -38548,Naa38,ENSMUSG00000059278,protein_coding -38235,Olfr224,ENSMUSG00000059279,protein_coding -47472,Vpreb2,ENSMUSG00000059280,protein_coding -41119,Cdyl,ENSMUSG00000059288,protein_coding -15019,Rpl11,ENSMUSG00000059291,protein_coding -34674,Olfr836,ENSMUSG00000059303,protein_coding -47410,Vpreb1,ENSMUSG00000059305,protein_coding -4124,Slc27a4,ENSMUSG00000059316,protein_coding -25066,Olfr295,ENSMUSG00000059319,protein_coding -46269,Tonsl,ENSMUSG00000059323,protein_coding -17014,Hopx,ENSMUSG00000059325,protein_coding -55444,Csf2ra,ENSMUSG00000059326,protein_coding -8738,Eda,ENSMUSG00000059327,protein_coding -8034,Zfp36l3,ENSMUSG00000059334,protein_coding -53866,Slc14a1,ENSMUSG00000059336,protein_coding -13756,Aldoart1,ENSMUSG00000059343,protein_coding -30972,Wdr83os,ENSMUSG00000059355,protein_coding -6472,Nrsn2,ENSMUSG00000059361,protein_coding -54774,Fxn,ENSMUSG00000059363,protein_coding -35218,Olfr959,ENSMUSG00000059366,protein_coding -3028,Olfr427,ENSMUSG00000059371,protein_coding -20027,Vmn1r33,ENSMUSG00000059375,protein_coding -5057,Olfr1014,ENSMUSG00000059379,protein_coding -21667,Tas2r120,ENSMUSG00000059382,protein_coding -36124,Gfral,ENSMUSG00000059383,protein_coding -40669,Nkapl,ENSMUSG00000059395,protein_coding -38041,Olfr54,ENSMUSG00000059397,protein_coding -8273,Mamld1,ENSMUSG00000059401,protein_coding -28209,Tmprss9,ENSMUSG00000059406,protein_coding -48923,Mrgprh,ENSMUSG00000059408,protein_coding -50314,Ppp2r5d,ENSMUSG00000059409,protein_coding -21686,Tas2r125,ENSMUSG00000059410,protein_coding -19577,Olfr434,ENSMUSG00000059411,protein_coding -35452,Fxyd2,ENSMUSG00000059412,protein_coding -15381,Zfp933,ENSMUSG00000059423,protein_coding -4381,Olfr365,ENSMUSG00000059429,protein_coding -20504,Actg2,ENSMUSG00000059430,protein_coding -16188,Gckr,ENSMUSG00000059434,protein_coding -44012,Max,ENSMUSG00000059436,protein_coding -39145,Bcas3,ENSMUSG00000059439,protein_coding -16133,Hadhb,ENSMUSG00000059447,protein_coding -53300,Plac8l1,ENSMUSG00000059455,protein_coding -33763,Ptk2b,ENSMUSG00000059456,protein_coding -46846,Olfr286,ENSMUSG00000059460,protein_coding -29704,Spag11b,ENSMUSG00000059463,protein_coding -35244,Olfr979,ENSMUSG00000059473,protein_coding -39286,Mbtd1,ENSMUSG00000059474,protein_coding -34735,Zfp426,ENSMUSG00000059475,protein_coding -23123,B3gnt8,ENSMUSG00000059479,protein_coding -48909,Plg,ENSMUSG00000059481,protein_coding -12853,2610301B20Rik,ENSMUSG00000059482,protein_coding -19863,Kbtbd2,ENSMUSG00000059486,protein_coding -33035,Olfr727,ENSMUSG00000059488,protein_coding -9755,Nhs,ENSMUSG00000059493,protein_coding -35326,Arhgef12,ENSMUSG00000059495,protein_coding -2889,Fcgr3,ENSMUSG00000059498,protein_coding -3041,Olfr248,ENSMUSG00000059503,protein_coding -38251,Olfr314,ENSMUSG00000059504,protein_coding -18416,Znhit1,ENSMUSG00000059518,protein_coding -37367,Uqcr10,ENSMUSG00000059534,protein_coding -7241,Tcea2,ENSMUSG00000059540,protein_coding -36927,Fbxw26,ENSMUSG00000059547,protein_coding -38557,Trp53,ENSMUSG00000059552,protein_coding -26959,Ccdc28a,ENSMUSG00000059554,protein_coding -3934,Tor4a,ENSMUSG00000059555,protein_coding -49336,Ccdc154,ENSMUSG00000059562,protein_coding -45973,Nsmce2,ENSMUSG00000059586,protein_coding -4991,Calcrl,ENSMUSG00000059588,protein_coding -35184,Olfr935,ENSMUSG00000059595,protein_coding -28394,Syn3,ENSMUSG00000059602,protein_coding -33093,Rnase2b,ENSMUSG00000059606,protein_coding -38295,Olfr222,ENSMUSG00000059610,protein_coding -34724,Olfr39,ENSMUSG00000059623,protein_coding -4000,Sohlh1,ENSMUSG00000059625,protein_coding -7804,Rhox3e,ENSMUSG00000059626,protein_coding -48555,Krtap8-1,ENSMUSG00000059632,protein_coding -21289,Clec4a4,ENSMUSG00000059639,protein_coding -16074,Gm7361,ENSMUSG00000059645,protein_coding -33084,Rnase11,ENSMUSG00000059648,protein_coding -20415,Reg1,ENSMUSG00000059654,protein_coding -47895,Stfa2l1,ENSMUSG00000059657,protein_coding -9649,4930524N10Rik,ENSMUSG00000059663,protein_coding -47048,Krt4,ENSMUSG00000059668,protein_coding -43240,Taf1b,ENSMUSG00000059669,protein_coding -33418,Cdh24,ENSMUSG00000059674,protein_coding -50086,Olfr108,ENSMUSG00000059687,protein_coding -21164,Zfp637,ENSMUSG00000059689,protein_coding -8248,1700020N15Rik,ENSMUSG00000059690,protein_coding -7762,Akap17b,ENSMUSG00000059708,protein_coding -14990,Rcan3,ENSMUSG00000059713,protein_coding -49943,Flot1,ENSMUSG00000059714,protein_coding -37983,Olfr1385,ENSMUSG00000059729,protein_coding -54089,Ndufs8,ENSMUSG00000059734,protein_coding -36967,Myl3,ENSMUSG00000059741,protein_coding -4658,Kcnh7,ENSMUSG00000059742,protein_coding -11158,Fdps,ENSMUSG00000059743,protein_coding -29280,Olfr767,ENSMUSG00000059762,protein_coding -27073,Taar2,ENSMUSG00000059763,protein_coding -26141,Slx1b,ENSMUSG00000059772,protein_coding -49947,Nrm,ENSMUSG00000059791,protein_coding -38569,Eif4a1,ENSMUSG00000059796,protein_coding -13678,Rgs3,ENSMUSG00000059810,protein_coding -50862,Atl2,ENSMUSG00000059811,protein_coding -14246,Shisal2a,ENSMUSG00000059816,protein_coding -35551,Nkapd1,ENSMUSG00000059820,protein_coding -34685,Olfr847,ENSMUSG00000059821,protein_coding -24036,Dbp,ENSMUSG00000059824,protein_coding -11323,Kprp,ENSMUSG00000059832,protein_coding -10406,Sclt1,ENSMUSG00000059834,protein_coding -41971,Zfp874b,ENSMUSG00000059839,protein_coding -6558,Zfp341,ENSMUSG00000059842,protein_coding -39581,Gm11567,ENSMUSG00000059845,protein_coding -22127,Kmt5c,ENSMUSG00000059851,protein_coding -53906,Kcng2,ENSMUSG00000059852,protein_coding -31492,Hydin,ENSMUSG00000059854,protein_coding -11967,Ntng1,ENSMUSG00000059857,protein_coding -29351,Olfr826,ENSMUSG00000059862,protein_coding -37973,Olfr1393,ENSMUSG00000059864,protein_coding -16295,Tnip2,ENSMUSG00000059866,protein_coding -35221,Olfr960,ENSMUSG00000059867,protein_coding -5076,Olfr1029,ENSMUSG00000059873,protein_coding -25472,Olfr603,ENSMUSG00000059874,protein_coding -21143,Zfp422,ENSMUSG00000059878,protein_coding -46787,Irak4,ENSMUSG00000059883,protein_coding -33171,Olfr1507,ENSMUSG00000059887,protein_coding -35426,Ube4a,ENSMUSG00000059890,protein_coding -23953,Tsks,ENSMUSG00000059891,protein_coding -46133,Ptp4a3,ENSMUSG00000059895,protein_coding -30545,Zfp930,ENSMUSG00000059897,protein_coding -52886,Dsc3,ENSMUSG00000059898,protein_coding -21107,Tmem40,ENSMUSG00000059900,protein_coding -27702,Adamts14,ENSMUSG00000059901,protein_coding -21247,Mug1,ENSMUSG00000059908,protein_coding -5345,Olfr1265,ENSMUSG00000059910,protein_coding -48286,4930453N24Rik,ENSMUSG00000059920,protein_coding -12487,Unc5c,ENSMUSG00000059921,protein_coding -40041,Grb2,ENSMUSG00000059923,protein_coding -21663,Prh1,ENSMUSG00000059934,protein_coding -15619,9430015G10Rik,ENSMUSG00000059939,protein_coding -1826,Serpinb12,ENSMUSG00000059956,protein_coding -50100,Olfr119,ENSMUSG00000059964,protein_coding -43998,Hspa2,ENSMUSG00000059970,protein_coding -34928,Ntm,ENSMUSG00000059974,protein_coding -23326,Zfp74,ENSMUSG00000059975,protein_coding -26163,Taok2,ENSMUSG00000059981,protein_coding -18675,Nptx2,ENSMUSG00000059991,protein_coding -11069,Fcrl1,ENSMUSG00000059994,protein_coding -39732,Atxn7l3,ENSMUSG00000059995,protein_coding -28461,Chpt1,ENSMUSG00000060002,protein_coding -33730,Kif13b,ENSMUSG00000060012,protein_coding -50103,Olfr121,ENSMUSG00000060017,protein_coding -40799,Vmn1r213,ENSMUSG00000060024,protein_coding -38248,Olfr317,ENSMUSG00000060030,protein_coding -21785,H2afj,ENSMUSG00000060032,protein_coding -26238,Ctf2,ENSMUSG00000060034,protein_coding -46436,Rpl3,ENSMUSG00000060036,protein_coding -30970,Dhps,ENSMUSG00000060038,protein_coding -27838,Tmem26,ENSMUSG00000060044,protein_coding -54553,Olfr235,ENSMUSG00000060049,protein_coding -48234,Olfr193,ENSMUSG00000060057,protein_coding -18817,Alox5ap,ENSMUSG00000060063,protein_coding -29807,Defa26,ENSMUSG00000060070,protein_coding -43903,Psma3,ENSMUSG00000060073,protein_coding -40885,Hist1h2aa,ENSMUSG00000060081,protein_coding -33060,Olfr748,ENSMUSG00000060084,protein_coding -7567,Rp2,ENSMUSG00000060090,protein_coding -40874,Hist1h4a,ENSMUSG00000060093,protein_coding -31413,Prmt7,ENSMUSG00000060098,protein_coding -25765,Olfr504,ENSMUSG00000060105,protein_coding -26513,Olfr60,ENSMUSG00000060112,protein_coding -35151,Olfr910,ENSMUSG00000060114,protein_coding -43710,Gemin2,ENSMUSG00000060121,protein_coding -33950,Tpt1,ENSMUSG00000060126,protein_coding -5988,Atp8b4,ENSMUSG00000060131,protein_coding -41068,Serpinb6a,ENSMUSG00000060147,protein_coding -17824,Pop5,ENSMUSG00000060152,protein_coding -47481,Zdhhc8,ENSMUSG00000060166,protein_coding -38081,Olfr1371,ENSMUSG00000060170,protein_coding -41605,Kif27,ENSMUSG00000060176,protein_coding -23880,Klk1b22,ENSMUSG00000060177,protein_coding -38486,Myh13,ENSMUSG00000060180,protein_coding -28962,Slc35e3,ENSMUSG00000060181,protein_coding -28939,Lrrc10,ENSMUSG00000060187,protein_coding -23112,Cxcl17,ENSMUSG00000060188,protein_coding -53604,Spink7,ENSMUSG00000060201,protein_coding -16895,Gm5868,ENSMUSG00000060204,protein_coding -28095,Olfr57,ENSMUSG00000060205,protein_coding -13523,Zfp462,ENSMUSG00000060206,protein_coding -29809,Defa17,ENSMUSG00000060208,protein_coding -31866,Pcnx2,ENSMUSG00000060212,protein_coding -38634,Arrb2,ENSMUSG00000060216,protein_coding -55107,Pyroxd2,ENSMUSG00000060224,protein_coding -5925,Casc4,ENSMUSG00000060227,protein_coding -26587,Cend1,ENSMUSG00000060240,protein_coding -2924,Alyref2,ENSMUSG00000060244,protein_coding -49111,Vmn1r228,ENSMUSG00000060245,protein_coding -35245,Olfr980,ENSMUSG00000060254,protein_coding -11373,Tdpoz4,ENSMUSG00000060256,protein_coding -6462,Scrt2,ENSMUSG00000060257,protein_coding -26474,Pwwp2b,ENSMUSG00000060260,protein_coding -18310,Gtf2i,ENSMUSG00000060261,protein_coding -14439,Armh1,ENSMUSG00000060268,protein_coding -53117,Nrg2,ENSMUSG00000060275,protein_coding -23952,Ap2a1,ENSMUSG00000060279,protein_coding -47072,Sp7,ENSMUSG00000060284,protein_coding -14525,Ppih,ENSMUSG00000060288,protein_coding -28096,2610008E11Rik,ENSMUSG00000060301,protein_coding -54588,Olfr1447,ENSMUSG00000060303,protein_coding -47140,Mucl1,ENSMUSG00000060311,protein_coding -26536,Zfp941,ENSMUSG00000060314,protein_coding -13449,Acnat2,ENSMUSG00000060317,protein_coding -6096,Tmc2,ENSMUSG00000060332,protein_coding -38763,Olfr384,ENSMUSG00000060335,protein_coding -6391,Zfp937,ENSMUSG00000060336,protein_coding -18254,Caln1,ENSMUSG00000060371,protein_coding -33147,Hnrnpc,ENSMUSG00000060373,protein_coding -23124,Bckdha,ENSMUSG00000060376,protein_coding -34167,Rpl36a-ps1,ENSMUSG00000060377,protein_coding -22493,Zfp128,ENSMUSG00000060397,protein_coding -23583,Chst8,ENSMUSG00000060402,protein_coding -40651,Olfr1369-ps1,ENSMUSG00000060404,protein_coding -23171,Cyp2a12,ENSMUSG00000060407,protein_coding -21672,Tas2r124,ENSMUSG00000060412,protein_coding -30559,Zfp868,ENSMUSG00000060427,protein_coding -45916,Sntb1,ENSMUSG00000060429,protein_coding -25548,Trim5,ENSMUSG00000060441,protein_coding -7149,Sycp2,ENSMUSG00000060445,protein_coding -53247,Rnf14,ENSMUSG00000060450,protein_coding -47651,Kng2,ENSMUSG00000060459,protein_coding -36180,Dppa5a,ENSMUSG00000060461,protein_coding -48531,Krtap19-3,ENSMUSG00000060469,protein_coding -31189,Adgrg3,ENSMUSG00000060470,protein_coding -48933,Wtap,ENSMUSG00000060475,protein_coding -21062,Irak2,ENSMUSG00000060477,protein_coding -47544,Olfr171,ENSMUSG00000060480,protein_coding -26828,Samd5,ENSMUSG00000060487,protein_coding -14282,4930522H14Rik,ENSMUSG00000060491,protein_coding -43801,Rpl10l,ENSMUSG00000060499,protein_coding -25700,Olfr715,ENSMUSG00000060503,protein_coding -22852,Nlrp9b,ENSMUSG00000060508,protein_coding -37278,Xcr1,ENSMUSG00000060509,protein_coding -34737,Zfp266,ENSMUSG00000060510,protein_coding -16708,0610040J01Rik,ENSMUSG00000060512,protein_coding -2576,Tor3a,ENSMUSG00000060519,protein_coding -33034,Olfr726,ENSMUSG00000060523,protein_coding -53779,Dcc,ENSMUSG00000060534,protein_coding -26164,Tmem219,ENSMUSG00000060538,protein_coding -33633,Tnfrsf19,ENSMUSG00000060548,protein_coding -49919,H2-Q7,ENSMUSG00000060550,protein_coding -54526,Olfr1418,ENSMUSG00000060556,protein_coding -31338,Ces4a,ENSMUSG00000060560,protein_coding -23665,Gm5591,ENSMUSG00000060565,protein_coding -2824,Fam78b,ENSMUSG00000060568,protein_coding -15157,Mfap2,ENSMUSG00000060572,protein_coding -32110,Fhit,ENSMUSG00000060579,protein_coding -35116,Olfr881,ENSMUSG00000060583,protein_coding -49803,H2-Eb1,ENSMUSG00000060586,protein_coding -26553,Ifitm2,ENSMUSG00000060591,protein_coding -35564,Layn,ENSMUSG00000060594,protein_coding -38657,Eno3,ENSMUSG00000060600,protein_coding -23929,Nr1h2,ENSMUSG00000060601,protein_coding -55045,Cyp2c70,ENSMUSG00000060613,protein_coding -33124,Ang4,ENSMUSG00000060615,protein_coding -22816,Nkpd1,ENSMUSG00000060621,protein_coding -38766,Zfp735,ENSMUSG00000060630,protein_coding -47838,Rpl35a,ENSMUSG00000060636,protein_coding -40726,Hist1h4i,ENSMUSG00000060639,protein_coding -47357,Marf1,ENSMUSG00000060657,protein_coding -48218,Olfr175-ps1,ENSMUSG00000060663,protein_coding -11206,Atp8b2,ENSMUSG00000060671,protein_coding -8526,4930595M18Rik,ENSMUSG00000060673,protein_coding -54342,Pla2g16,ENSMUSG00000060675,protein_coding -40861,Hist1h4c,ENSMUSG00000060678,protein_coding -640,Mrps9,ENSMUSG00000060679,protein_coding -8085,Slc9a6,ENSMUSG00000060681,protein_coding -7418,Fthl17c,ENSMUSG00000060685,protein_coding -25069,Olfr292,ENSMUSG00000060688,protein_coding -48529,Krtap19-1,ENSMUSG00000060691,protein_coding -20029,Vmn1r35,ENSMUSG00000060699,protein_coding -49107,Fpr-rs3,ENSMUSG00000060701,protein_coding -4625,Cd302,ENSMUSG00000060703,protein_coding -16346,Bloc1s4,ENSMUSG00000060708,protein_coding -797,1700019A02Rik,ENSMUSG00000060715,protein_coding -44044,Plekhh1,ENSMUSG00000060716,protein_coding -20735,Vmn1r47,ENSMUSG00000060724,protein_coding -9274,Tmsb15a,ENSMUSG00000060726,protein_coding -27886,Ipmk,ENSMUSG00000060733,protein_coding -45409,Rxfp3,ENSMUSG00000060735,protein_coding -40955,Prl7c1,ENSMUSG00000060738,protein_coding -42419,Nsa2,ENSMUSG00000060739,protein_coding -5127,Olfr1076,ENSMUSG00000060742,protein_coding -3154,H3f3a,ENSMUSG00000060743,protein_coding -23254,Ifnl3,ENSMUSG00000060747,protein_coding -39550,Krtap2-4,ENSMUSG00000060756,protein_coding -25764,Olfr503,ENSMUSG00000060759,protein_coding -38237,Olfr325,ENSMUSG00000060765,protein_coding -553,Tsga10,ENSMUSG00000060771,protein_coding -20398,Lrrtm1,ENSMUSG00000060780,protein_coding -39220,Olfr464,ENSMUSG00000060787,protein_coding -23249,Gmfg,ENSMUSG00000060791,protein_coding -46251,Tssk5,ENSMUSG00000060794,protein_coding -10378,Intu,ENSMUSG00000060798,protein_coding -5933,B2m,ENSMUSG00000060802,protein_coding -54103,Gstp1,ENSMUSG00000060803,protein_coding -44601,Serpina6,ENSMUSG00000060807,protein_coding -20744,Vmn1r52,ENSMUSG00000060816,protein_coding -5272,Olfr1193,ENSMUSG00000060827,protein_coding -27783,Ctnna3,ENSMUSG00000060843,protein_coding -22134,Ube2s,ENSMUSG00000060860,protein_coding -15045,Zbtb40,ENSMUSG00000060862,protein_coding -44657,Tcl1b2,ENSMUSG00000060863,protein_coding -54530,Olfr1420,ENSMUSG00000060878,protein_coding -19110,Kcnd2,ENSMUSG00000060882,protein_coding -25428,Olfr566,ENSMUSG00000060888,protein_coding -8750,Arr3,ENSMUSG00000060890,protein_coding -28474,Arl1,ENSMUSG00000060904,protein_coding -10066,Trim55,ENSMUSG00000060913,protein_coding -38040,Olfr51,ENSMUSG00000060918,protein_coding -37731,Acyp2,ENSMUSG00000060923,protein_coding -29675,Csmd1,ENSMUSG00000060924,protein_coding -28362,Tmem263,ENSMUSG00000060935,protein_coding -38513,Rpl26,ENSMUSG00000060938,protein_coding -44903,Trmt61a,ENSMUSG00000060950,protein_coding -17172,Slc4a4,ENSMUSG00000060961,protein_coding -23402,Dmkn,ENSMUSG00000060962,protein_coding -8021,Etd,ENSMUSG00000060967,protein_coding -42059,Irx1,ENSMUSG00000060969,protein_coding -26515,Olfr530,ENSMUSG00000060974,protein_coding -40835,Hist1h4h,ENSMUSG00000060981,protein_coding -2565,Tdrd5,ENSMUSG00000060985,protein_coding -4568,Galnt13,ENSMUSG00000060988,protein_coding -47121,Copz1,ENSMUSG00000060992,protein_coding -25771,Olfr507,ENSMUSG00000061000,protein_coding -52722,Mkx,ENSMUSG00000061013,protein_coding -40818,Vmn1r222,ENSMUSG00000061022,protein_coding -101,Rrs1,ENSMUSG00000061024,protein_coding -22820,Clasrp,ENSMUSG00000061028,protein_coding -28058,Rrp1,ENSMUSG00000061032,protein_coding -35165,Olfr920,ENSMUSG00000061039,protein_coding -49377,Haghl,ENSMUSG00000061046,protein_coding -31419,Cdh3,ENSMUSG00000061048,protein_coding -7443,H2al1o,ENSMUSG00000061065,protein_coding -33519,Mcpt4,ENSMUSG00000061068,protein_coding -25822,Zfp143,ENSMUSG00000061079,protein_coding -47997,Lsamp,ENSMUSG00000061080,protein_coding -8014,Plac1,ENSMUSG00000061082,protein_coding -39811,Myl4,ENSMUSG00000061086,protein_coding -23398,Gapdhs,ENSMUSG00000061099,protein_coding -48104,Retnla,ENSMUSG00000061100,protein_coding -31226,Sap18b,ENSMUSG00000061104,protein_coding -40234,Mcrip1,ENSMUSG00000061111,protein_coding -18349,Dnajc30,ENSMUSG00000061118,protein_coding -25178,Prcp,ENSMUSG00000061119,protein_coding -49686,Cyp4f39,ENSMUSG00000061126,protein_coding -50944,Ppm1b,ENSMUSG00000061130,protein_coding -55065,Blnk,ENSMUSG00000061132,protein_coding -4556,Prpf40a,ENSMUSG00000061136,protein_coding -10512,Maml3,ENSMUSG00000061143,protein_coding -53340,Spink12,ENSMUSG00000061144,protein_coding -35103,Olfr160,ENSMUSG00000061165,protein_coding -4672,Slc38a11,ENSMUSG00000061171,protein_coding -10936,Fnip2,ENSMUSG00000061175,protein_coding -17077,Tmprss11c,ENSMUSG00000061184,protein_coding -3558,Sfmbt2,ENSMUSG00000061186,protein_coding -5677,Olfr1289,ENSMUSG00000061195,protein_coding -49843,Stk19,ENSMUSG00000061207,protein_coding -19873,Vmn1r8,ENSMUSG00000061208,protein_coding -19578,Olfr47,ENSMUSG00000061210,protein_coding -49770,H2-K1,ENSMUSG00000061232,protein_coding -33009,Exoc5,ENSMUSG00000061244,protein_coding -17079,Tmprss11d,ENSMUSG00000061259,protein_coding -8083,Mmgt1,ENSMUSG00000061273,protein_coding -23125,Exosc5,ENSMUSG00000061286,protein_coding -17887,Taok3,ENSMUSG00000061288,protein_coding -18723,Cyp3a59,ENSMUSG00000061292,protein_coding -5341,Olfr1261,ENSMUSG00000061295,protein_coding -40786,Vmn1r210,ENSMUSG00000061296,protein_coding -14293,Agbl4,ENSMUSG00000061298,protein_coding -40207,Slc38a10,ENSMUSG00000061306,protein_coding -5506,Rag1,ENSMUSG00000061311,protein_coding -29926,Ddhd2,ENSMUSG00000061313,protein_coding -29198,Naca,ENSMUSG00000061315,protein_coding -13222,Dnaic1,ENSMUSG00000061322,protein_coding -50097,Olfr116,ENSMUSG00000061336,protein_coding -5245,Olfr1168,ENSMUSG00000061342,protein_coding -21154,Cxcl12,ENSMUSG00000061353,protein_coding -33750,Nuggc,ENSMUSG00000061356,protein_coding -46497,Phf5a,ENSMUSG00000061360,protein_coding -47541,Olfr168,ENSMUSG00000061361,protein_coding -29284,Olfr771,ENSMUSG00000061367,protein_coding -28296,Zfp873,ENSMUSG00000061371,protein_coding -22147,Fiz1,ENSMUSG00000061374,protein_coding -40812,Vmn1r219,ENSMUSG00000061376,protein_coding -54638,Olfr1490,ENSMUSG00000061387,protein_coding -17132,Csn1s2b,ENSMUSG00000061388,protein_coding -8622,Mageb5,ENSMUSG00000061392,protein_coding -37112,Acvr2b,ENSMUSG00000061393,protein_coding -47049,Krt79,ENSMUSG00000061397,protein_coding -31676,Zcchc14,ENSMUSG00000061410,protein_coding -6522,Nol4l,ENSMUSG00000061411,protein_coding -21455,Cracr2a,ENSMUSG00000061414,protein_coding -19376,Hipk2,ENSMUSG00000061436,protein_coding -54173,Tmem151a,ENSMUSG00000061451,protein_coding -13430,Stx17,ENSMUSG00000061455,protein_coding -34721,Olfr871,ENSMUSG00000061457,protein_coding -43044,Nol10,ENSMUSG00000061458,protein_coding -16609,Smim20,ENSMUSG00000061461,protein_coding -38272,Obscn,ENSMUSG00000061462,protein_coding -38050,Gm12569,ENSMUSG00000061469,protein_coding -42511,Mrps36,ENSMUSG00000061474,protein_coding -43296,Rps7,ENSMUSG00000061477,protein_coding -23184,Snrpa,ENSMUSG00000061479,protein_coding -40851,Hist1h4d,ENSMUSG00000061482,protein_coding -26510,Olfr525,ENSMUSG00000061489,protein_coding -34342,Sox21,ENSMUSG00000061517,protein_coding -525,Cox5b,ENSMUSG00000061518,protein_coding -5196,Olfr153,ENSMUSG00000061520,protein_coding -34413,Zic2,ENSMUSG00000061524,protein_coding -6440,4921509C19Rik,ENSMUSG00000061525,protein_coding -47036,Krt5,ENSMUSG00000061527,protein_coding -3745,Tmem236,ENSMUSG00000061531,protein_coding -44396,Cep128,ENSMUSG00000061533,protein_coding -16430,C1qtnf7,ENSMUSG00000061535,protein_coding -37185,Sec22c,ENSMUSG00000061536,protein_coding -13694,Orm2,ENSMUSG00000061540,protein_coding -49158,Zfp229,ENSMUSG00000061544,protein_coding -25059,Olfr301,ENSMUSG00000061549,protein_coding -35632,Wdr61,ENSMUSG00000061559,protein_coding -30692,Olfr373,ENSMUSG00000061561,protein_coding -16078,Dpp6,ENSMUSG00000061576,protein_coding -31186,Adgrg5,ENSMUSG00000061577,protein_coding -17900,Ksr2,ENSMUSG00000061578,protein_coding -558,Lyg2,ENSMUSG00000061584,protein_coding -28198,Dot1l,ENSMUSG00000061589,protein_coding -15843,Pclo,ENSMUSG00000061601,protein_coding -22452,Vmn1r78,ENSMUSG00000061602,protein_coding -43598,Akap6,ENSMUSG00000061603,protein_coding -49945,Mdc1,ENSMUSG00000061607,protein_coding -49657,U2af1,ENSMUSG00000061613,protein_coding -34682,Olfr845,ENSMUSG00000061614,protein_coding -40869,Hist1h2ab,ENSMUSG00000061615,protein_coding -1642,Olfr12,ENSMUSG00000061616,protein_coding -25503,Olfr68,ENSMUSG00000061626,protein_coding -46504,1700029P11Rik,ENSMUSG00000061633,protein_coding -54599,Olfr1457,ENSMUSG00000061637,protein_coding -38312,Med9,ENSMUSG00000061650,protein_coding -20733,Vmn1r46,ENSMUSG00000061653,protein_coding -55502,Spry3,ENSMUSG00000061654,protein_coding -50222,Cd2ap,ENSMUSG00000061665,protein_coding -39175,Gdpd1,ENSMUSG00000061666,protein_coding -6638,Dlgap4,ENSMUSG00000061689,protein_coding -36907,Fbxw20,ENSMUSG00000061701,protein_coding -23126,Tmem91,ENSMUSG00000061702,protein_coding -18684,Gm4871,ENSMUSG00000061707,protein_coding -39481,Ppp1r1b,ENSMUSG00000061718,protein_coding -26646,Tnnt3,ENSMUSG00000061723,protein_coding -45883,Ext1,ENSMUSG00000061731,protein_coding -46529,Cyp2d22,ENSMUSG00000061740,protein_coding -54295,Slc22a12,ENSMUSG00000061742,protein_coding -47855,Kalrn,ENSMUSG00000061751,protein_coding -16412,Bod1l,ENSMUSG00000061755,protein_coding -19307,Akr1b10,ENSMUSG00000061758,protein_coding -26762,Armt1,ENSMUSG00000061759,protein_coding -18972,Tac1,ENSMUSG00000061762,protein_coding -21576,Klra6,ENSMUSG00000061769,protein_coding -9809,Mospd2,ENSMUSG00000061778,protein_coding -28133,Cfd,ENSMUSG00000061780,protein_coding -24905,Rps17,ENSMUSG00000061787,protein_coding -5282,Olfr1204,ENSMUSG00000061798,protein_coding -52725,Armc4,ENSMUSG00000061802,protein_coding -52898,Ttr,ENSMUSG00000061808,protein_coding -3891,Tbpl2,ENSMUSG00000061809,protein_coding -1190,Rufy4,ENSMUSG00000061815,protein_coding -1098,Myl1,ENSMUSG00000061816,protein_coding -31325,Ces2c,ENSMUSG00000061825,protein_coding -40801,Vmn1r214,ENSMUSG00000061829,protein_coding -20849,Suclg2,ENSMUSG00000061838,protein_coding -29760,Defa35,ENSMUSG00000061845,protein_coding -29701,Defb39,ENSMUSG00000061847,protein_coding -14063,Patj,ENSMUSG00000061859,protein_coding -5222,Olfr1148,ENSMUSG00000061875,protein_coding -24910,BC048679,ENSMUSG00000061877,protein_coding -40089,Sphk1,ENSMUSG00000061878,protein_coding -18110,Ccdc62,ENSMUSG00000061882,protein_coding -14201,Ssbp3,ENSMUSG00000061887,protein_coding -14749,Zscan20,ENSMUSG00000061894,protein_coding -18623,Rbak,ENSMUSG00000061898,protein_coding -28498,Slc25a3,ENSMUSG00000061904,protein_coding -17116,Ugt2b38,ENSMUSG00000061906,protein_coding -43311,Myt1l,ENSMUSG00000061911,protein_coding -45736,Odf1,ENSMUSG00000061923,protein_coding -52891,Dsg1b,ENSMUSG00000061928,protein_coding -17131,Csn1s2a,ENSMUSG00000061937,protein_coding -44599,Serpina10,ENSMUSG00000061947,protein_coding -50641,Ppp4r1,ENSMUSG00000061950,protein_coding -38038,Olfr1377,ENSMUSG00000061952,protein_coding -29764,Gm14851,ENSMUSG00000061958,protein_coding -31127,Ces1e,ENSMUSG00000061959,protein_coding -29333,Olfr815,ENSMUSG00000061961,protein_coding -50073,Olfr99,ENSMUSG00000061972,protein_coding -30288,Cldn24,ENSMUSG00000061974,protein_coding -21639,Tas2r104,ENSMUSG00000061977,protein_coding -18305,Rcc1l,ENSMUSG00000061979,protein_coding -38921,Flot2,ENSMUSG00000061981,protein_coding -27065,Rps12,ENSMUSG00000061983,protein_coding -38774,Olfr392,ENSMUSG00000061984,protein_coding -40836,Hist1h2af,ENSMUSG00000061991,protein_coding -12301,Rpl34,ENSMUSG00000062006,protein_coding -30700,Hsh2d,ENSMUSG00000062007,protein_coding -49220,Zfp13,ENSMUSG00000062012,protein_coding -32931,Gmfb,ENSMUSG00000062014,protein_coding -25996,Abca14,ENSMUSG00000062017,protein_coding -23054,Irgc1,ENSMUSG00000062028,protein_coding -26550,Pgghg,ENSMUSG00000062031,protein_coding -46849,Olfr285,ENSMUSG00000062037,protein_coding -23322,Zfp27,ENSMUSG00000062040,protein_coding -25067,Olfr294,ENSMUSG00000062042,protein_coding -24043,Lmtk3,ENSMUSG00000062044,protein_coding -43162,Iah1,ENSMUSG00000062054,protein_coding -3990,Obp2a,ENSMUSG00000062061,protein_coding -15467,Slc2a7,ENSMUSG00000062064,protein_coding -8931,Pgk1,ENSMUSG00000062070,protein_coding -27311,Ccn6,ENSMUSG00000062074,protein_coding -28211,Lmnb2,ENSMUSG00000062075,protein_coding -16172,Trim54,ENSMUSG00000062077,protein_coding -48888,Qk,ENSMUSG00000062078,protein_coding -48049,Cd200r4,ENSMUSG00000062082,protein_coding -6236,Btbd3,ENSMUSG00000062098,protein_coding -50491,Zfp119b,ENSMUSG00000062101,protein_coding -35171,Olfr924,ENSMUSG00000062103,protein_coding -48224,Olfr183,ENSMUSG00000062105,protein_coding -16945,Scfd2,ENSMUSG00000062110,protein_coding -38316,Rai1,ENSMUSG00000062115,protein_coding -22243,Zfp954,ENSMUSG00000062116,protein_coding -35226,Olfr149,ENSMUSG00000062121,protein_coding -6489,Defb45,ENSMUSG00000062124,protein_coding -11819,Cttnbp2nl,ENSMUSG00000062127,protein_coding -38748,Olfr20,ENSMUSG00000062128,protein_coding -23371,Arhgap33os,ENSMUSG00000062132,protein_coding -25432,Olfr569,ENSMUSG00000062142,protein_coding -33130,Ear6,ENSMUSG00000062148,protein_coding -36074,Unc13c,ENSMUSG00000062151,protein_coding -46343,Tex33,ENSMUSG00000062154,protein_coding -15000,Ifnlr1,ENSMUSG00000062157,protein_coding -8628,Mageb1,ENSMUSG00000062162,protein_coding -49116,Vmn1r232,ENSMUSG00000062165,protein_coding -9743,Ppef1,ENSMUSG00000062168,protein_coding -3176,Cnih4,ENSMUSG00000062169,protein_coding -8238,Fmr1nb,ENSMUSG00000062170,protein_coding -6645,Tgif2,ENSMUSG00000062175,protein_coding -31336,Ces3b,ENSMUSG00000062181,protein_coding -7978,Hs6st2,ENSMUSG00000062184,protein_coding -38780,Olfr395,ENSMUSG00000062186,protein_coding -19899,Lancl2,ENSMUSG00000062190,protein_coding -43639,2700097O09Rik,ENSMUSG00000062198,protein_coding -54608,Olfr1465,ENSMUSG00000062199,protein_coding -10757,Vmn2r7,ENSMUSG00000062200,protein_coding -40935,Prl3d3,ENSMUSG00000062201,protein_coding -49604,Btbd9,ENSMUSG00000062202,protein_coding -47312,Gspt1,ENSMUSG00000062203,protein_coding -38078,Olfr1373,ENSMUSG00000062204,protein_coding -1111,Erbb4,ENSMUSG00000062209,protein_coding -53426,Tnfaip8,ENSMUSG00000062210,protein_coding -10931,Rapgef2,ENSMUSG00000062232,protein_coding -17597,Gak,ENSMUSG00000062234,protein_coding -47543,Olfr170,ENSMUSG00000062245,protein_coding -41456,Cks2,ENSMUSG00000062248,protein_coding -49536,Lhfpl5,ENSMUSG00000062252,protein_coding -34924,Opcml,ENSMUSG00000062257,protein_coding -36405,Morf4l1,ENSMUSG00000062270,protein_coding -5201,Olfr1129,ENSMUSG00000062272,protein_coding -36923,Fbxw24,ENSMUSG00000062275,protein_coding -39551,Gm11562,ENSMUSG00000062278,protein_coding -36994,Trank1,ENSMUSG00000062296,protein_coding -22831,Nectin2,ENSMUSG00000062300,protein_coding -35703,Rpp25,ENSMUSG00000062309,protein_coding -1553,Glrp1,ENSMUSG00000062310,protein_coding -39487,Erbb2,ENSMUSG00000062312,protein_coding -54653,Olfr1505,ENSMUSG00000062314,protein_coding -48866,T,ENSMUSG00000062327,protein_coding -53817,Rpl17,ENSMUSG00000062328,protein_coding -16366,Cytl1,ENSMUSG00000062329,protein_coding -41047,Serpinb9e,ENSMUSG00000062342,protein_coding -1841,Serpinb2,ENSMUSG00000062345,protein_coding -43158,Itgb1bp1,ENSMUSG00000062352,protein_coding -25582,Usp17lb,ENSMUSG00000062369,protein_coding -16140,Otof,ENSMUSG00000062372,protein_coding -45957,Tmem65,ENSMUSG00000062373,protein_coding -55229,Borcs7,ENSMUSG00000062376,protein_coding -31756,Tubb3,ENSMUSG00000062380,protein_coding -42107,Ftl1-ps1,ENSMUSG00000062382,protein_coding -7323,Dgkk,ENSMUSG00000062393,protein_coding -45707,Zfp706,ENSMUSG00000062397,protein_coding -48542,Krtap6-5,ENSMUSG00000062400,protein_coding -11636,Hsd3b3,ENSMUSG00000062410,protein_coding -39800,Arf2,ENSMUSG00000062421,protein_coding -25056,Olfr304,ENSMUSG00000062426,protein_coding -54999,Cyp26c1,ENSMUSG00000062432,protein_coding -48553,Krtap6-2,ENSMUSG00000062433,protein_coding -25781,Olfr516,ENSMUSG00000062434,protein_coding -17999,Adam1b,ENSMUSG00000062438,protein_coding -24909,Ap3b2,ENSMUSG00000062444,protein_coding -54891,Rpl9-ps6,ENSMUSG00000062456,protein_coding -49692,Cyp4f37,ENSMUSG00000062464,protein_coding -15194,Ctrc,ENSMUSG00000062478,protein_coding -48930,Acat3,ENSMUSG00000062480,protein_coding -23011,Vmn1r171,ENSMUSG00000062483,protein_coding -54941,Ifit3b,ENSMUSG00000062488,protein_coding -1639,Olfr1411,ENSMUSG00000062497,protein_coding -3348,Nsl1,ENSMUSG00000062510,protein_coding -37483,4930512M02Rik,ENSMUSG00000062511,protein_coding -9961,Fabp4,ENSMUSG00000062515,protein_coding -15374,Zfp534,ENSMUSG00000062518,protein_coding -19637,Zfp398,ENSMUSG00000062519,protein_coding -22098,Ncr1,ENSMUSG00000062524,protein_coding -53719,Mppe1,ENSMUSG00000062526,protein_coding -2991,Olfr1408,ENSMUSG00000062527,protein_coding -21691,Tas2r109,ENSMUSG00000062528,protein_coding -25708,Syt9,ENSMUSG00000062542,protein_coding -14750,Tlr12,ENSMUSG00000062545,protein_coding -20745,V1ra8,ENSMUSG00000062546,protein_coding -40957,Prl2c1,ENSMUSG00000062551,protein_coding -25686,Olfr709-ps1,ENSMUSG00000062553,protein_coding -23436,Scgb1b2,ENSMUSG00000062556,protein_coding -43245,Cys1,ENSMUSG00000062563,protein_coding -8366,Tex28,ENSMUSG00000062564,protein_coding -2240,Timm17a,ENSMUSG00000062580,protein_coding -15009,Cnr2,ENSMUSG00000062585,protein_coding -1454,Armc9,ENSMUSG00000062590,protein_coding -50556,Tubb4a,ENSMUSG00000062591,protein_coding -27538,Gm49339,ENSMUSG00000062593,protein_coding -23022,Vmn1r178,ENSMUSG00000062598,protein_coding -15978,Srpk2,ENSMUSG00000062604,protein_coding -48236,Olfr195,ENSMUSG00000062608,protein_coding -48706,Kcnj15,ENSMUSG00000062609,protein_coding -50319,2310039H08Rik,ENSMUSG00000062619,protein_coding -35150,Olfr911-ps1,ENSMUSG00000062621,protein_coding -55037,Cyp2c67,ENSMUSG00000062624,protein_coding -14015,Mysm1,ENSMUSG00000062627,protein_coding -50094,Olfr114,ENSMUSG00000062629,protein_coding -49807,Btnl1,ENSMUSG00000062638,protein_coding -5875,Ganc,ENSMUSG00000062646,protein_coding -4044,Rpl7a,ENSMUSG00000062647,protein_coding -35214,Olfr44,ENSMUSG00000062649,protein_coding -3790,H2al2a,ENSMUSG00000062651,protein_coding -4183,Ncs1,ENSMUSG00000062661,protein_coding -47090,Atp5g2,ENSMUSG00000062683,protein_coding -30979,Cks1brt,ENSMUSG00000062687,protein_coding -50845,Cebpzos,ENSMUSG00000062691,protein_coding -21010,Cav3,ENSMUSG00000062694,protein_coding -41679,Tpbpb,ENSMUSG00000062705,protein_coding -26517,Olfr531,ENSMUSG00000062712,protein_coding -48682,Sim2,ENSMUSG00000062713,protein_coding -40725,Hist1h2bk,ENSMUSG00000062727,protein_coding -2904,Ppox,ENSMUSG00000062729,protein_coding -23081,Lypd4,ENSMUSG00000062732,protein_coding -40934,Prl3d2,ENSMUSG00000062737,protein_coding -49140,Zfp677,ENSMUSG00000062743,protein_coding -19483,Prss1,ENSMUSG00000062751,protein_coding -49485,AI413582,ENSMUSG00000062753,protein_coding -5283,Olfr1206,ENSMUSG00000062757,protein_coding -46613,Shisal1,ENSMUSG00000062760,protein_coding -16192,Zfp512,ENSMUSG00000062761,protein_coding -35068,Ei24,ENSMUSG00000062762,protein_coding -23070,Tex101,ENSMUSG00000062773,protein_coding -11847,Chia1,ENSMUSG00000062778,protein_coding -26511,Olfr527,ENSMUSG00000062782,protein_coding -55531,Csprs,ENSMUSG00000062783,protein_coding -23932,Kcnc3,ENSMUSG00000062785,protein_coding -9165,Gm382,ENSMUSG00000062791,protein_coding -5234,Olfr1158,ENSMUSG00000062793,protein_coding -34839,Zfp599,ENSMUSG00000062794,protein_coding -25114,Hikeshi,ENSMUSG00000062797,protein_coding -7596,Ssxa1,ENSMUSG00000062814,protein_coding -20741,Vmn1r51,ENSMUSG00000062818,protein_coding -42772,4833420G17Rik,ENSMUSG00000062822,protein_coding -40215,Actg1,ENSMUSG00000062825,protein_coding -31329,Ces2f,ENSMUSG00000062826,protein_coding -19519,Sval3,ENSMUSG00000062833,protein_coding -54600,Olfr1458,ENSMUSG00000062844,protein_coding -49506,Tcp11,ENSMUSG00000062859,protein_coding -22208,Zfp28,ENSMUSG00000062861,protein_coding -26892,Phactr2,ENSMUSG00000062866,protein_coding -36866,Impdh2,ENSMUSG00000062867,protein_coding -34667,Olfr830,ENSMUSG00000062868,protein_coding -28088,Olfr1355,ENSMUSG00000062873,protein_coding -25062,Olfr298,ENSMUSG00000062878,protein_coding -54592,Olfr1450,ENSMUSG00000062892,protein_coding -47554,Klhl24,ENSMUSG00000062901,protein_coding -20025,Vmn1r32,ENSMUSG00000062905,protein_coding -46696,Hdac10,ENSMUSG00000062906,protein_coding -12685,Acadm,ENSMUSG00000062908,protein_coding -29291,Olfr777,ENSMUSG00000062914,protein_coding -43633,Cfl2,ENSMUSG00000062929,protein_coding -28303,Zfp938,ENSMUSG00000062931,protein_coding -13952,Mtap,ENSMUSG00000062937,protein_coding -771,Stat4,ENSMUSG00000062939,protein_coding -26278,9130023H24Rik,ENSMUSG00000062944,protein_coding -8130,Atp11c,ENSMUSG00000062949,protein_coding -21683,Tas2r110,ENSMUSG00000062952,protein_coding -16984,Kdr,ENSMUSG00000062960,protein_coding -44090,Ccdc177,ENSMUSG00000062961,protein_coding -2906,Ufc1,ENSMUSG00000062963,protein_coding -19119,Cped1,ENSMUSG00000062980,protein_coding -28602,Mrpl42,ENSMUSG00000062981,protein_coding -25698,Olfr715b,ENSMUSG00000062987,protein_coding -30028,Nrg1,ENSMUSG00000062991,protein_coding -18995,Ica1,ENSMUSG00000062995,protein_coding -4436,Rpl35,ENSMUSG00000062997,protein_coding -49374,Msln,ENSMUSG00000063011,protein_coding -17267,Ccni,ENSMUSG00000063015,protein_coding -6667,Manbal,ENSMUSG00000063019,protein_coding -48235,Olfr194,ENSMUSG00000063020,protein_coding -40694,Hist1h2ak,ENSMUSG00000063021,protein_coding -23222,Zfp780b,ENSMUSG00000063047,protein_coding -30279,Ing2,ENSMUSG00000063049,protein_coding -12745,Lrrc40,ENSMUSG00000063052,protein_coding -36552,Prr23a2,ENSMUSG00000063058,protein_coding -33716,Sox7,ENSMUSG00000063060,protein_coding -20397,Ctnna2,ENSMUSG00000063063,protein_coding -26148,Mapk3,ENSMUSG00000063065,protein_coding -15431,Kif1b,ENSMUSG00000063077,protein_coding -23857,Klk1b8,ENSMUSG00000063089,protein_coding -33165,Olfr1510,ENSMUSG00000063106,protein_coding -34732,Zfp26,ENSMUSG00000063108,protein_coding -38804,Olfr410,ENSMUSG00000063116,protein_coding -25732,Olfr480,ENSMUSG00000063120,protein_coding -43658,Aldoart2,ENSMUSG00000063129,protein_coding -40329,Calml3,ENSMUSG00000063130,protein_coding -23859,Klk1b1,ENSMUSG00000063133,protein_coding -48219,Olfr177,ENSMUSG00000063137,protein_coding -32337,Kcnma1,ENSMUSG00000063142,protein_coding -4737,Bbs5,ENSMUSG00000063145,protein_coding -18320,Clip2,ENSMUSG00000063146,protein_coding -17127,Csn2,ENSMUSG00000063157,protein_coding -23191,Numbl,ENSMUSG00000063160,protein_coding -48322,Speer2,ENSMUSG00000063163,protein_coding -14211,Hspb11,ENSMUSG00000063172,protein_coding -29329,Olfr811,ENSMUSG00000063173,protein_coding -35212,Olfr955,ENSMUSG00000063176,protein_coding -23872,Klk1b27,ENSMUSG00000063177,protein_coding -26343,Pstk,ENSMUSG00000063179,protein_coding -50083,Olfr107,ENSMUSG00000063188,protein_coding -39997,Cd300lb,ENSMUSG00000063193,protein_coding -41282,Nol7,ENSMUSG00000063200,protein_coding -29810,Defa34,ENSMUSG00000063206,protein_coding -28094,Olfr1351,ENSMUSG00000063216,protein_coding -35178,Olfr930,ENSMUSG00000063221,protein_coding -35159,Olfr917,ENSMUSG00000063225,protein_coding -24090,Ldha,ENSMUSG00000063229,protein_coding -26521,Olfr535,ENSMUSG00000063230,protein_coding -44618,Serpina11,ENSMUSG00000063232,protein_coding -47124,Gpr84,ENSMUSG00000063234,protein_coding -46289,1110038F14Rik,ENSMUSG00000063236,protein_coding -49477,Grm4,ENSMUSG00000063239,protein_coding -50126,Olfr133,ENSMUSG00000063240,protein_coding -15339,Zfp993,ENSMUSG00000063245,protein_coding -39552,Krtap4-1,ENSMUSG00000063251,protein_coding -19430,Gm4744,ENSMUSG00000063252,protein_coding -30873,Scoc,ENSMUSG00000063253,protein_coding -46729,Syt10,ENSMUSG00000063260,protein_coding -46234,Parp10,ENSMUSG00000063268,protein_coding -10499,Naa15,ENSMUSG00000063273,protein_coding -3739,Hacd1,ENSMUSG00000063275,protein_coding -32084,Gm10128,ENSMUSG00000063277,protein_coding -52931,Zfp35,ENSMUSG00000063281,protein_coding -46789,Tmem117,ENSMUSG00000063296,protein_coding -24217,Luzp2,ENSMUSG00000063297,protein_coding -22751,Psg20,ENSMUSG00000063305,protein_coding -39686,Rpl27,ENSMUSG00000063316,protein_coding -26036,Usp31,ENSMUSG00000063317,protein_coding -28312,1190007I07Rik,ENSMUSG00000063320,protein_coding -28863,Krr1,ENSMUSG00000063334,protein_coding -35105,Olfr874,ENSMUSG00000063350,protein_coding -46267,Slc39a4,ENSMUSG00000063354,protein_coding -47416,Mapk1,ENSMUSG00000063358,protein_coding -29834,Alg11,ENSMUSG00000063362,protein_coding -6395,3300002I08Rik,ENSMUSG00000063364,protein_coding -29276,Olfr763,ENSMUSG00000063374,protein_coding -13906,Ifna13,ENSMUSG00000063376,protein_coding -35201,Olfr945,ENSMUSG00000063380,protein_coding -35398,Bcl9l,ENSMUSG00000063382,protein_coding -49173,Zfp947,ENSMUSG00000063383,protein_coding -37981,Olfr1387,ENSMUSG00000063386,protein_coding -25068,Olfr293,ENSMUSG00000063394,protein_coding -17578,Tmed5,ENSMUSG00000063406,protein_coding -18090,Lrrc43,ENSMUSG00000063409,protein_coding -34388,Stk24,ENSMUSG00000063410,protein_coding -30053,Gm10131,ENSMUSG00000063412,protein_coding -20540,Cyp26b1,ENSMUSG00000063415,protein_coding -27356,Ddo,ENSMUSG00000063428,protein_coding -17755,Wscd2,ENSMUSG00000063430,protein_coding -55270,Sorcs3,ENSMUSG00000063434,protein_coding -23127,B9d2,ENSMUSG00000063439,protein_coding -47203,Nmral1,ENSMUSG00000063445,protein_coding -13445,Plppr1,ENSMUSG00000063446,protein_coding -17567,Ube2d2b,ENSMUSG00000063447,protein_coding -43984,Syne2,ENSMUSG00000063450,protein_coding -19360,D630045J12Rik,ENSMUSG00000063455,protein_coding -28163,Rps15,ENSMUSG00000063457,protein_coding -32329,Lrmda,ENSMUSG00000063458,protein_coding -21641,Tas2r114,ENSMUSG00000063478,protein_coding -46507,Snu13,ENSMUSG00000063480,protein_coding -37240,Zkscan7,ENSMUSG00000063488,protein_coding -32553,Arhgap22,ENSMUSG00000063506,protein_coding -6376,Gm1330,ENSMUSG00000063507,protein_coding -24001,Snrnp70,ENSMUSG00000063511,protein_coding -46162,Ly6m,ENSMUSG00000063522,protein_coding -15470,Eno1,ENSMUSG00000063524,protein_coding -41334,Stmnd1,ENSMUSG00000063529,protein_coding -15839,Sema3e,ENSMUSG00000063531,protein_coding -22245,Zfp773,ENSMUSG00000063535,protein_coding -38242,Olfr322,ENSMUSG00000063549,protein_coding -25386,Nup98,ENSMUSG00000063550,protein_coding -877,Aox1,ENSMUSG00000063558,protein_coding -38054,Col23a1,ENSMUSG00000063564,protein_coding -19805,Jazf1,ENSMUSG00000063568,protein_coding -50312,Klhdc3,ENSMUSG00000063576,protein_coding -25586,Olfr666,ENSMUSG00000063582,protein_coding -1641,Olfr1410,ENSMUSG00000063583,protein_coding -54356,Slc22a28,ENSMUSG00000063590,protein_coding -22691,Gng8,ENSMUSG00000063594,protein_coding -10160,Egfem1,ENSMUSG00000063600,protein_coding -31185,Ccdc102a,ENSMUSG00000063605,protein_coding -25515,Olfr64,ENSMUSG00000063615,protein_coding -30000,Unc5d,ENSMUSG00000063626,protein_coding -43283,Sox11,ENSMUSG00000063632,protein_coding -16355,Jakmip1,ENSMUSG00000063646,protein_coding -23109,Cnfn,ENSMUSG00000063651,protein_coding -38130,Slc22a21,ENSMUSG00000063652,protein_coding -16906,Gm10135,ENSMUSG00000063656,protein_coding -3092,Zbtb18,ENSMUSG00000063659,protein_coding -50069,Olfr98,ENSMUSG00000063660,protein_coding -47041,Krt73,ENSMUSG00000063661,protein_coding -8975,Brwd3,ENSMUSG00000063663,protein_coding -29870,Nkx6-3,ENSMUSG00000063672,protein_coding -2345,Crb1,ENSMUSG00000063681,protein_coding -54574,Glyat,ENSMUSG00000063683,protein_coding -53177,Pcdhb5,ENSMUSG00000063687,protein_coding -11522,Hist2h2ab,ENSMUSG00000063689,protein_coding -19722,Cycs,ENSMUSG00000063694,protein_coding -55434,Sfxn4,ENSMUSG00000063698,protein_coding -46218,Mapk15,ENSMUSG00000063704,protein_coding -23886,Klk1b24,ENSMUSG00000063713,protein_coding -29334,Olfr816,ENSMUSG00000063715,protein_coding -45891,Tnfrsf11b,ENSMUSG00000063727,protein_coding -9658,Magea6,ENSMUSG00000063728,protein_coding -11635,Hsd3b2,ENSMUSG00000063730,protein_coding -27219,Rnf217,ENSMUSG00000063760,protein_coding -21688,Tas2r129,ENSMUSG00000063762,protein_coding -25772,Olfr508,ENSMUSG00000063764,protein_coding -46489,Chadl,ENSMUSG00000063765,protein_coding -11255,S100a7a,ENSMUSG00000063767,protein_coding -54613,Olfr1469,ENSMUSG00000063777,protein_coding -11850,Chil4,ENSMUSG00000063779,protein_coding -46854,Olfr282,ENSMUSG00000063780,protein_coding -7931,Utp14a,ENSMUSG00000063785,protein_coding -32303,Chchd1,ENSMUSG00000063787,protein_coding -54364,Slc22a8,ENSMUSG00000063796,protein_coding -14256,Prpf38a,ENSMUSG00000063800,protein_coding -24849,Ap3s2,ENSMUSG00000063801,protein_coding -22123,Hspbp1,ENSMUSG00000063802,protein_coding -27471,Lin28b,ENSMUSG00000063804,protein_coding -23601,Gpatch1,ENSMUSG00000063808,protein_coding -20568,Alms1,ENSMUSG00000063810,protein_coding -17011,Arl9,ENSMUSG00000063820,protein_coding -32317,Dupd1,ENSMUSG00000063821,protein_coding -26518,Olfr532,ENSMUSG00000063823,protein_coding -37986,Olfr1382,ENSMUSG00000063827,protein_coding -22090,Cdc42ep5,ENSMUSG00000063838,protein_coding -34706,Olfr862,ENSMUSG00000063842,protein_coding -5664,Olfr1276,ENSMUSG00000063844,protein_coding -42808,Gm21731,ENSMUSG00000063846,protein_coding -35698,Ppcdc,ENSMUSG00000063849,protein_coding -13671,Rnf183,ENSMUSG00000063851,protein_coding -36849,Gpx1,ENSMUSG00000063856,protein_coding -33164,Olfr1511,ENSMUSG00000063867,protein_coding -21385,Chd4,ENSMUSG00000063870,protein_coding -6312,Slc24a3,ENSMUSG00000063873,protein_coding -38749,Olfr376,ENSMUSG00000063881,protein_coding -14377,Uqcrh,ENSMUSG00000063882,protein_coding -20324,Ptcd3,ENSMUSG00000063884,protein_coding -10120,Nlgn1,ENSMUSG00000063887,protein_coding -50320,Rpl7l1,ENSMUSG00000063888,protein_coding -52663,Crem,ENSMUSG00000063889,protein_coding -40676,Zkscan8,ENSMUSG00000063894,protein_coding -33620,Nupl1,ENSMUSG00000063895,protein_coding -55564,CAAA01118383.1,ENSMUSG00000063897,protein_coding -30190,Adam26b,ENSMUSG00000063900,protein_coding -23894,Klk1,ENSMUSG00000063903,protein_coding -54161,Dpp3,ENSMUSG00000063904,protein_coding -41857,Gm10139,ENSMUSG00000063905,protein_coding -17876,Srrm4,ENSMUSG00000063919,protein_coding -14361,Cyp4a32,ENSMUSG00000063929,protein_coding -23584,Pepd,ENSMUSG00000063931,protein_coding -29989,Poteg,ENSMUSG00000063932,protein_coding -16894,Zar1,ENSMUSG00000063935,protein_coding -49543,Brpf3,ENSMUSG00000063952,protein_coding -27282,Amd2,ENSMUSG00000063953,protein_coding -11528,Hist2h2aa2,ENSMUSG00000063954,protein_coding -47010,Krt88,ENSMUSG00000063971,protein_coding -4423,Nr6a1,ENSMUSG00000063972,protein_coding -21864,Slco1a5,ENSMUSG00000063975,protein_coding -50102,Olfr120,ENSMUSG00000063994,protein_coding -40788,Vmn1r211,ENSMUSG00000063998,protein_coding -48231,Olfr190,ENSMUSG00000064006,protein_coding -37928,Dppa1,ENSMUSG00000064010,protein_coding -8082,Gm648,ENSMUSG00000064016,protein_coding -23845,Klk8,ENSMUSG00000064023,protein_coding -29259,Pym1,ENSMUSG00000064030,protein_coding -53794,Mro,ENSMUSG00000064036,protein_coding -16196,Gpn1,ENSMUSG00000064037,protein_coding -37281,Ccr1l1,ENSMUSG00000064039,protein_coding -38246,Olfr319,ENSMUSG00000064044,protein_coding -37992,Scgb3a1,ENSMUSG00000064057,protein_coding -48106,Dzip3,ENSMUSG00000064061,protein_coding -41995,BC048507,ENSMUSG00000064063,protein_coding -26783,Ipcef1,ENSMUSG00000064065,protein_coding -11169,Mtx1,ENSMUSG00000064068,protein_coding -20778,Fbln2,ENSMUSG00000064080,protein_coding -5280,Olfr1202,ENSMUSG00000064084,protein_coding -37676,Vrk2,ENSMUSG00000064090,protein_coding -55231,Cnnm2,ENSMUSG00000064105,protein_coding -23362,Hcst,ENSMUSG00000064109,protein_coding -35224,Olfr963,ENSMUSG00000064110,protein_coding -48307,Cadm2,ENSMUSG00000064115,protein_coding -50393,Mocs1,ENSMUSG00000064120,protein_coding -50065,Olfr96,ENSMUSG00000064121,protein_coding -29420,Prr36,ENSMUSG00000064125,protein_coding -7489,Med14,ENSMUSG00000064127,protein_coding -33541,Cenpj,ENSMUSG00000064128,protein_coding -8621,Gm14781,ENSMUSG00000064129,protein_coding -7822,Rhox8,ENSMUSG00000064137,protein_coding -42166,Fam172a,ENSMUSG00000064138,protein_coding -40877,Trim38,ENSMUSG00000064140,protein_coding -14564,Zfp69,ENSMUSG00000064141,protein_coding -36877,Arih2,ENSMUSG00000064145,protein_coding -49564,Rab44,ENSMUSG00000064147,protein_coding -17155,Prol1,ENSMUSG00000064156,protein_coding -24027,Izumo1,ENSMUSG00000064158,protein_coding -39538,Krt39,ENSMUSG00000064165,protein_coding -21069,Ghrl,ENSMUSG00000064177,protein_coding -22109,Tnnt1,ENSMUSG00000064179,protein_coding -28932,Rab3ip,ENSMUSG00000064181,protein_coding -23806,Zfp936,ENSMUSG00000064194,protein_coding -47042,Krt2,ENSMUSG00000064201,protein_coding -54838,4430402I18Rik,ENSMUSG00000064202,protein_coding -46801,Ano6,ENSMUSG00000064210,protein_coding -29817,Defa24,ENSMUSG00000064213,protein_coding -44594,Ifi27,ENSMUSG00000064215,protein_coding -11531,Hist2h2aa1,ENSMUSG00000064220,protein_coding -25669,Olfr694,ENSMUSG00000064223,protein_coding -39498,Gsdma3,ENSMUSG00000064224,protein_coding -36477,Paqr9,ENSMUSG00000064225,protein_coding -47027,Gm5414,ENSMUSG00000064232,protein_coding -2201,Chil1,ENSMUSG00000064246,protein_coding -17645,Plcxd1,ENSMUSG00000064247,protein_coding -38233,Olfr329-ps,ENSMUSG00000064252,protein_coding -23064,Ethe1,ENSMUSG00000064254,protein_coding -19879,Vmn1r13,ENSMUSG00000064259,protein_coding -19670,Gimap8,ENSMUSG00000064262,protein_coding -23058,Zfp428,ENSMUSG00000064264,protein_coding -18026,Hvcn1,ENSMUSG00000064267,protein_coding -1196,Gpbar1,ENSMUSG00000064272,protein_coding -15926,Ccdc146,ENSMUSG00000064280,protein_coding -46646,Cdpf1,ENSMUSG00000064284,protein_coding -40693,Hist1h4k,ENSMUSG00000064288,protein_coding -4641,Tank,ENSMUSG00000064289,protein_coding -20958,Cntn4,ENSMUSG00000064293,protein_coding -878,Aox3,ENSMUSG00000064294,protein_coding -1915,Clasp1,ENSMUSG00000064302,protein_coding -25373,Lrrc51,ENSMUSG00000064307,protein_coding -50047,2410137M14Rik,ENSMUSG00000064308,protein_coding -48150,Zpld1,ENSMUSG00000064310,protein_coding -30827,Hhip,ENSMUSG00000064325,protein_coding -44934,Siva1,ENSMUSG00000064326,protein_coding -4686,Scn1a,ENSMUSG00000064329,protein_coding -21795,Pde6h,ENSMUSG00000064330,protein_coding -35174,Olfr926,ENSMUSG00000064333,protein_coding -55455,mt-Nd1,ENSMUSG00000064341,protein_coding -55459,mt-Nd2,ENSMUSG00000064345,protein_coding -55465,mt-Co1,ENSMUSG00000064351,protein_coding -55468,mt-Co2,ENSMUSG00000064354,protein_coding -55470,mt-Atp8,ENSMUSG00000064356,protein_coding -55471,mt-Atp6,ENSMUSG00000064357,protein_coding -55472,mt-Co3,ENSMUSG00000064358,protein_coding -55474,mt-Nd3,ENSMUSG00000064360,protein_coding -55477,mt-Nd4,ENSMUSG00000064363,protein_coding -55481,mt-Nd5,ENSMUSG00000064367,protein_coding -55482,mt-Nd6,ENSMUSG00000064368,protein_coding -55484,mt-Cytb,ENSMUSG00000064370,protein_coding -45297,Selenop,ENSMUSG00000064373,protein_coding -55476,mt-Nd4l,ENSMUSG00000065947,protein_coding -29381,C330021F23Rik,ENSMUSG00000065952,protein_coding -29905,Tacc1,ENSMUSG00000065954,protein_coding -29800,Defa37,ENSMUSG00000065956,protein_coding -47352,Ifitm7,ENSMUSG00000065968,protein_coding -47319,Cpped1,ENSMUSG00000065979,protein_coding -29406,Cd209b,ENSMUSG00000065987,protein_coding -15595,Aurkaip1,ENSMUSG00000065990,protein_coding -15370,Zfp985,ENSMUSG00000065999,protein_coding -15371,Zfp979,ENSMUSG00000066000,protein_coding -15321,Zfp600,ENSMUSG00000066007,protein_coding -15319,Zfp987,ENSMUSG00000066009,protein_coding -15283,Dhrs3,ENSMUSG00000066026,protein_coding -44347,Gm10436,ENSMUSG00000066027,protein_coding -15256,Oog2,ENSMUSG00000066030,protein_coding -15242,Gm13023,ENSMUSG00000066031,protein_coding -15118,Ubr4,ENSMUSG00000066036,protein_coding -15028,Hnrnpr,ENSMUSG00000066037,protein_coding -14881,Med18,ENSMUSG00000066042,protein_coding -14878,Phactr4,ENSMUSG00000066043,protein_coding -14522,Cldn19,ENSMUSG00000066058,protein_coding -14494,Olfr1335,ENSMUSG00000066061,protein_coding -14349,Cyp4a12a,ENSMUSG00000066071,protein_coding -14357,Cyp4a10,ENSMUSG00000066072,protein_coding -14145,Insl5,ENSMUSG00000066090,protein_coding -14041,Cyp2j11,ENSMUSG00000066097,protein_coding -26635,Krtap5-1,ENSMUSG00000066100,protein_coding -26624,Gm10153,ENSMUSG00000066101,protein_coding -13984,Gm12666,ENSMUSG00000066107,protein_coding -26606,Muc5b,ENSMUSG00000066108,protein_coding -13870,Adamtsl1,ENSMUSG00000066113,protein_coding -26530,Olfr45,ENSMUSG00000066122,protein_coding -26485,Kndc1,ENSMUSG00000066129,protein_coding -13761,Gm11487,ENSMUSG00000066137,protein_coding -13749,Gm11232,ENSMUSG00000066141,protein_coding -13670,Prpf4,ENSMUSG00000066148,protein_coding -13669,Cdc26,ENSMUSG00000066149,protein_coding -13668,Slc31a1,ENSMUSG00000066150,protein_coding -13667,Fkbp15,ENSMUSG00000066151,protein_coding -13665,Slc31a2,ENSMUSG00000066152,protein_coding -13663,Mup21,ENSMUSG00000066153,protein_coding -13660,Mup3,ENSMUSG00000066154,protein_coding -26067,Cacng3,ENSMUSG00000066189,protein_coding -13412,Anks6,ENSMUSG00000066191,protein_coding -13318,Spag8,ENSMUSG00000066196,protein_coding -25962,Gpr139,ENSMUSG00000066197,protein_coding -13230,Arid3c,ENSMUSG00000066224,protein_coding -25818,Ipo7,ENSMUSG00000066232,protein_coding -37250,Tmem42,ENSMUSG00000066233,protein_coding -37205,Pomgnt2,ENSMUSG00000066235,protein_coding -25785,Olfr519,ENSMUSG00000066239,protein_coding -25783,Olfr517,ENSMUSG00000066240,protein_coding -25779,Olfr514,ENSMUSG00000066241,protein_coding -25716,Olfr467,ENSMUSG00000066242,protein_coding -48247,Olfr206,ENSMUSG00000066257,protein_coding -25550,Trim12a,ENSMUSG00000066258,protein_coding -25529,Olfr640,ENSMUSG00000066262,protein_coding -25528,Olfr639,ENSMUSG00000066263,protein_coding -25449,Olfr586,ENSMUSG00000066268,protein_coding -25438,Olfr575,ENSMUSG00000066269,protein_coding -25419,Olfr559,ENSMUSG00000066272,protein_coding -25418,Olfr33,ENSMUSG00000066273,protein_coding -18113,Vps37b,ENSMUSG00000066278,protein_coding -25385,Chrna10,ENSMUSG00000066279,protein_coding -25374,Numa1,ENSMUSG00000066306,protein_coding -36982,Rtp3,ENSMUSG00000066319,protein_coding -12795,Impad1,ENSMUSG00000066324,protein_coding -36873,Wdr6,ENSMUSG00000066357,protein_coding -44658,Tcl1b1,ENSMUSG00000066359,protein_coding -44628,Serpina3c,ENSMUSG00000066361,protein_coding -44631,Serpina3f,ENSMUSG00000066363,protein_coding -44627,Serpina3b,ENSMUSG00000066364,protein_coding -44612,Serpina1a,ENSMUSG00000066366,protein_coding -36815,Actl11,ENSMUSG00000066368,protein_coding -25007,Vmn2r65,ENSMUSG00000066372,protein_coding -24927,Gm10160,ENSMUSG00000066378,protein_coding -36752,Iqcf5,ENSMUSG00000066382,protein_coding -36750,Iqcf1,ENSMUSG00000066383,protein_coding -44377,Nrxn3,ENSMUSG00000066392,protein_coding -24777,Akap13,ENSMUSG00000066406,protein_coding -36606,Msl2,ENSMUSG00000066415,protein_coding -44089,Plekhd1,ENSMUSG00000066438,protein_coding -44056,Zfyve26,ENSMUSG00000066440,protein_coding -44054,Rdh11,ENSMUSG00000066441,protein_coding -36389,Mthfs,ENSMUSG00000066442,protein_coding -36256,Hmgn3,ENSMUSG00000066456,protein_coding -36176,Omt2a,ENSMUSG00000066463,protein_coding -23937,Izumo2,ENSMUSG00000066500,protein_coding -35893,Ankdd1a,ENSMUSG00000066510,protein_coding -23892,Klk1b5,ENSMUSG00000066512,protein_coding -23890,Klk1b4,ENSMUSG00000066513,protein_coding -23888,Klk1b3,ENSMUSG00000066515,protein_coding -23878,Klk1b21,ENSMUSG00000066516,protein_coding -23750,Vmn2r57,ENSMUSG00000066537,protein_coding -18805,Hmgb1,ENSMUSG00000066551,protein_coding -23571,Lsm14a,ENSMUSG00000066568,protein_coding -23568,4931406P16Rik,ENSMUSG00000066571,protein_coding -23555,Scgb1b27,ENSMUSG00000066583,protein_coding -23554,Scgb2b27,ENSMUSG00000066584,protein_coding -23549,Scgb2b26,ENSMUSG00000066586,protein_coding -3344,Flvcr1,ENSMUSG00000066595,protein_coding -35738,Insyn1,ENSMUSG00000066607,protein_coding -17642,Zfp932,ENSMUSG00000066613,protein_coding -18667,Tecpr1,ENSMUSG00000066621,protein_coding -42913,Ttc32,ENSMUSG00000066637,protein_coding -18612,Fbxl18,ENSMUSG00000066640,protein_coding -42912,Wdr35,ENSMUSG00000066643,protein_coding -3161,Lefty2,ENSMUSG00000066652,protein_coding -3043,Olfr220,ENSMUSG00000066671,protein_coding -3039,Olfr417,ENSMUSG00000066672,protein_coding -3008,Ifi208,ENSMUSG00000066677,protein_coding -18474,Pilrb2,ENSMUSG00000066682,protein_coding -18472,Pilrb1,ENSMUSG00000066684,protein_coding -35520,Zbtb16,ENSMUSG00000066687,protein_coding -15234,Gm13083,ENSMUSG00000066688,protein_coding -23165,Cyp2b19,ENSMUSG00000066704,protein_coding -35451,Fxyd6,ENSMUSG00000066705,protein_coding -49231,Cldn9,ENSMUSG00000066720,protein_coding -23063,Zfp575,ENSMUSG00000066721,protein_coding -18237,Vkorc1l1,ENSMUSG00000066735,protein_coding -35113,Olfr878,ENSMUSG00000066747,protein_coding -35111,Olfr145,ENSMUSG00000066748,protein_coding -35110,Olfr877,ENSMUSG00000066749,protein_coding -35107,Olfr876,ENSMUSG00000066750,protein_coding -2684,Tnfsf18,ENSMUSG00000066755,protein_coding -22727,Igfl3,ENSMUSG00000066756,protein_coding -22702,Psg16,ENSMUSG00000066760,protein_coding -22621,Obox3,ENSMUSG00000066772,protein_coding -2530,Zfp648,ENSMUSG00000066797,protein_coding -4390,Zbtb6,ENSMUSG00000066798,protein_coding -2516,Rnasel,ENSMUSG00000066800,protein_coding -22466,Vmn1r84,ENSMUSG00000066803,protein_coding -22464,Vmn1r83,ENSMUSG00000066804,protein_coding -34881,Gm10181,ENSMUSG00000066810,protein_coding -22176,Vmn2r28,ENSMUSG00000066820,protein_coding -22310,Vmn2r37,ENSMUSG00000066828,protein_coding -34841,Zfp810,ENSMUSG00000066829,protein_coding -22248,Zfp772,ENSMUSG00000066838,protein_coding -34824,Ecsit,ENSMUSG00000066839,protein_coding -2450,Hmcn1,ENSMUSG00000066842,protein_coding -22188,Vmn1r65,ENSMUSG00000066850,protein_coding -17980,Oas1g,ENSMUSG00000066861,protein_coding -17974,Oas1e,ENSMUSG00000066867,protein_coding -653,Nck2,ENSMUSG00000066877,protein_coding -51029,Gm10184,ENSMUSG00000066878,protein_coding -30681,Zfp617,ENSMUSG00000066880,protein_coding -34743,Fbxl12,ENSMUSG00000066892,protein_coding -17895,Vsig10,ENSMUSG00000066894,protein_coding -34726,Olfr18,ENSMUSG00000066896,protein_coding -34722,Olfr872,ENSMUSG00000066897,protein_coding -34718,Olfr870,ENSMUSG00000066899,protein_coding -17885,Suds3,ENSMUSG00000066900,protein_coding -34704,Olfr860,ENSMUSG00000066905,protein_coding -2143,Gm10188,ENSMUSG00000066936,protein_coding -50878,Gm10190,ENSMUSG00000066938,protein_coding -17783,Myo1h,ENSMUSG00000066952,protein_coding -17748,Tmem211,ENSMUSG00000066964,protein_coding -17717,Cryba4,ENSMUSG00000066975,protein_coding -26355,Bub3,ENSMUSG00000066979,protein_coding -1839,Serpinb7,ENSMUSG00000067001,protein_coding -1824,Serpinb5,ENSMUSG00000067006,protein_coding -17616,Vmn2r10,ENSMUSG00000067010,protein_coding -1752,Cntnap5b,ENSMUSG00000067028,protein_coding -55405,Rps12-ps3,ENSMUSG00000067038,protein_coding -48938,Unc93a,ENSMUSG00000067049,protein_coding -1631,Olfr1416,ENSMUSG00000067064,protein_coding -1611,Hes6,ENSMUSG00000067071,protein_coding -1578,Asb18,ENSMUSG00000067081,protein_coding -55309,Gm10197,ENSMUSG00000067085,protein_coding -42642,Gm10198,ENSMUSG00000067122,protein_coding -50298,Slc22a7,ENSMUSG00000067144,protein_coding -50287,Polr1c,ENSMUSG00000067148,protein_coding -17161,Jchain,ENSMUSG00000067149,protein_coding -50284,Xpo5,ENSMUSG00000067150,protein_coding -1350,Col4a4,ENSMUSG00000067158,protein_coding -24137,Mrgpra4,ENSMUSG00000067173,protein_coding -50125,Olfr132,ENSMUSG00000067186,protein_coding -9723,Eif1ax,ENSMUSG00000067194,protein_coding -55077,Frat1,ENSMUSG00000067199,protein_coding -50021,H2-M9,ENSMUSG00000067201,protein_coding -16929,Lrrc66,ENSMUSG00000067206,protein_coding -49966,H2-T23,ENSMUSG00000067212,protein_coding -9616,Usp51,ENSMUSG00000067215,protein_coding -16899,Nipal1,ENSMUSG00000067219,protein_coding -16896,Cnga1,ENSMUSG00000067220,protein_coding -55043,Cyp2c54,ENSMUSG00000067225,protein_coding -55031,Cyp2c66,ENSMUSG00000067229,protein_coding -55030,Cyp2c65,ENSMUSG00000067231,protein_coding -49921,H2-Q10,ENSMUSG00000067235,protein_coding -55012,Lgi1,ENSMUSG00000067242,protein_coding -14091,Foxd3,ENSMUSG00000067261,protein_coding -9494,Vmn1r239-ps,ENSMUSG00000067262,protein_coding -17849,Rplp0,ENSMUSG00000067274,protein_coding -9442,Capn6,ENSMUSG00000067276,protein_coding -54978,Ppp1r3c,ENSMUSG00000067279,protein_coding -16416,Gm16223,ENSMUSG00000067285,protein_coding -49746,Rps28,ENSMUSG00000067288,protein_coding -22001,Gm10203,ENSMUSG00000067292,protein_coding -54942,Ifit1bl2,ENSMUSG00000067297,protein_coding -1056,Crygd,ENSMUSG00000067299,protein_coding -933,Bmpr2,ENSMUSG00000067336,protein_coding -21924,Tuba3b,ENSMUSG00000067338,protein_coding -49804,H2-Eb2,ENSMUSG00000067341,protein_coding -9256,Pramel3,ENSMUSG00000067360,protein_coding -16377,Tmem128,ENSMUSG00000067365,protein_coding -16376,Lyar,ENSMUSG00000067367,protein_coding -9220,Trmt2b,ENSMUSG00000067369,protein_coding -49763,B3galt4,ENSMUSG00000067370,protein_coding -9210,Tspan6,ENSMUSG00000067377,protein_coding -36372,Trim43c,ENSMUSG00000067399,protein_coding -49720,Zfp563,ENSMUSG00000067424,protein_coding -49718,Zfp763,ENSMUSG00000067430,protein_coding -16318,Hmx1,ENSMUSG00000067438,protein_coding -9062,H2afb1,ENSMUSG00000067441,protein_coding -40692,Hist1h4j,ENSMUSG00000067455,protein_coding -54557,Olfr1436,ENSMUSG00000067513,protein_coding -54551,Olfr262,ENSMUSG00000067519,protein_coding -54546,Olfr76,ENSMUSG00000067522,protein_coding -54545,Olfr1428,ENSMUSG00000067524,protein_coding -54544,Olfr1427,ENSMUSG00000067525,protein_coding -54542,Olfr1425,ENSMUSG00000067526,protein_coding -54541,Olfr1424,ENSMUSG00000067528,protein_coding -54539,Olfr1423,ENSMUSG00000067529,protein_coding -21666,A630073D07Rik,ENSMUSG00000067541,protein_coding -21649,Prb1,ENSMUSG00000067543,protein_coding -54528,Olfr1419,ENSMUSG00000067545,protein_coding -8824,Dmrtc1c2,ENSMUSG00000067561,protein_coding -8820,Dmrtc1c1,ENSMUSG00000067562,protein_coding -8807,Hdac8,ENSMUSG00000067567,protein_coding -54482,Ms4a15,ENSMUSG00000067571,protein_coding -6993,Cbln4,ENSMUSG00000067578,protein_coding -41454,S1pr3,ENSMUSG00000067586,protein_coding -21591,Klra3,ENSMUSG00000067591,protein_coding -47044,Krt77,ENSMUSG00000067594,protein_coding -8746,Dgat2l6,ENSMUSG00000067597,protein_coding -21587,Klra7,ENSMUSG00000067599,protein_coding -578,Nms,ENSMUSG00000067604,protein_coding -21565,Klri1,ENSMUSG00000067610,protein_coding -47014,Krt83,ENSMUSG00000067613,protein_coding -47012,Krt86,ENSMUSG00000067614,protein_coding -47011,Krt81,ENSMUSG00000067615,protein_coding -23842,Klk11,ENSMUSG00000067616,protein_coding -29519,Gm10217,ENSMUSG00000067627,protein_coding -49450,Syngap1,ENSMUSG00000067629,protein_coding -16135,Adgrf3,ENSMUSG00000067642,protein_coding -8631,Mageb18,ENSMUSG00000067649,protein_coding -517,Ankrd23,ENSMUSG00000067653,protein_coding -54352,Slc22a27,ENSMUSG00000067656,protein_coding -8493,Obp1b,ENSMUSG00000067679,protein_coding -8491,Obp1a,ENSMUSG00000067684,protein_coding -16073,Gm10220,ENSMUSG00000067698,protein_coding -16067,Gm5862,ENSMUSG00000067700,protein_coding -21404,Tuba3a,ENSMUSG00000067702,protein_coding -6832,Wfdc13,ENSMUSG00000067704,protein_coding -46881,Prkag1,ENSMUSG00000067713,protein_coding -21383,Lpar5,ENSMUSG00000067714,protein_coding -49338,BC003965,ENSMUSG00000067722,protein_coding -16017,Gbx1,ENSMUSG00000067724,protein_coding -285,Khdc1a,ENSMUSG00000067750,protein_coding -8326,Xlr5c,ENSMUSG00000067764,protein_coding -21294,Clec4b2,ENSMUSG00000067767,protein_coding -8322,Xlr4b,ENSMUSG00000067768,protein_coding -8317,Gm14685,ENSMUSG00000067771,protein_coding -241,Defb41,ENSMUSG00000067773,protein_coding -227,Pi15,ENSMUSG00000067780,protein_coding -6673,Nnat,ENSMUSG00000067786,protein_coding -6672,Blcap,ENSMUSG00000067787,protein_coding -200,4930444P10Rik,ENSMUSG00000067795,protein_coding -171,Xkr9,ENSMUSG00000067813,protein_coding -6642,Myl9,ENSMUSG00000067818,protein_coding -21230,Pex26,ENSMUSG00000067825,protein_coding -6617,Romo1,ENSMUSG00000067847,protein_coding -15809,4933402N22Rik,ENSMUSG00000067848,protein_coding -124,Arfgef1,ENSMUSG00000067851,protein_coding -15833,Speer3,ENSMUSG00000067855,protein_coding -8108,Zic3,ENSMUSG00000067860,protein_coding -54155,Ccdc87,ENSMUSG00000067872,protein_coding -8092,Htatsf1,ENSMUSG00000067873,protein_coding -8089,Map7d3,ENSMUSG00000067878,protein_coding -105,Vxn,ENSMUSG00000067879,protein_coding -54147,Sptbn2,ENSMUSG00000067889,protein_coding -8058,Slxl1,ENSMUSG00000067909,protein_coding -15353,Zfp991,ENSMUSG00000067916,protein_coding -15346,Rex2,ENSMUSG00000067919,protein_coding -8027,Rtl8b,ENSMUSG00000067924,protein_coding -8026,Rtl8a,ENSMUSG00000067925,protein_coding -49157,Zfp760,ENSMUSG00000067928,protein_coding -49156,Gm10226,ENSMUSG00000067929,protein_coding -49150,Zfp948,ENSMUSG00000067931,protein_coding -48965,Gm7168,ENSMUSG00000067941,protein_coding -49121,Zfp160,ENSMUSG00000067942,protein_coding -49110,Vmn1r227,ENSMUSG00000067951,protein_coding -7956,Olfr1321,ENSMUSG00000067971,protein_coding -33952,Gtf2f2,ENSMUSG00000067995,protein_coding -6548,Bpifb9b,ENSMUSG00000067996,protein_coding -6546,Bpifb9a,ENSMUSG00000067998,protein_coding -6535,Bpifb3,ENSMUSG00000068008,protein_coding -6534,Bpifb6,ENSMUSG00000068009,protein_coding -21100,Mkrn2os,ENSMUSG00000068011,protein_coding -33928,Lrch1,ENSMUSG00000068015,protein_coding -48962,Afdn,ENSMUSG00000068036,protein_coding -48921,Mas1,ENSMUSG00000068037,protein_coding -48927,Tcp1,ENSMUSG00000068039,protein_coding -6515,Tm9sf4,ENSMUSG00000068040,protein_coding -7824,Rhox9,ENSMUSG00000068048,protein_coding -48549,1110057P08Rik,ENSMUSG00000068067,protein_coding -48548,Gm7735,ENSMUSG00000068068,protein_coding -48545,Gm6358,ENSMUSG00000068071,protein_coding -48543,1110025L11Rik,ENSMUSG00000068073,protein_coding -48541,Gm10228,ENSMUSG00000068074,protein_coding -48539,Gm10229,ENSMUSG00000068075,protein_coding -48522,2310034C09Rik,ENSMUSG00000068078,protein_coding -6464,Tcf15,ENSMUSG00000068079,protein_coding -16845,Grxcr1,ENSMUSG00000068082,protein_coding -46564,Cyp2d40,ENSMUSG00000068083,protein_coding -46530,Cyp2d11,ENSMUSG00000068085,protein_coding -46536,Cyp2d9,ENSMUSG00000068086,protein_coding -46521,1500009C09Rik,ENSMUSG00000068099,protein_coding -46520,Cenpm,ENSMUSG00000068101,protein_coding -46518,Tnfrsf13c,ENSMUSG00000068105,protein_coding -7625,Gm4907,ENSMUSG00000068113,protein_coding -46513,Ccdc134,ENSMUSG00000068114,protein_coding -6419,Ninl,ENSMUSG00000068115,protein_coding -46512,Mei1,ENSMUSG00000068117,protein_coding -7603,Agtr2,ENSMUSG00000068122,protein_coding -6404,Cst7,ENSMUSG00000068129,protein_coding -6399,Zfp442,ENSMUSG00000068130,protein_coding -6388,Zfp120,ENSMUSG00000068134,protein_coding -7557,Tex13c3,ENSMUSG00000068149,protein_coding -6325,Insm1,ENSMUSG00000068154,protein_coding -48285,Csnka2ip,ENSMUSG00000068167,protein_coding -7827,Btg1-ps1,ENSMUSG00000068173,protein_coding -20848,Gm10234,ENSMUSG00000068181,protein_coding -48244,Olfr203,ENSMUSG00000068182,protein_coding -42605,Ndufaf2,ENSMUSG00000068184,protein_coding -48194,Col8a1,ENSMUSG00000068196,protein_coding -6255,Macrod2,ENSMUSG00000068205,protein_coding -46393,Pick1,ENSMUSG00000068206,protein_coding -7395,Ssxb9,ENSMUSG00000068218,protein_coding -7394,Ssxb10,ENSMUSG00000068219,protein_coding -46372,Lgals1,ENSMUSG00000068220,protein_coding -46350,Il2rb,ENSMUSG00000068227,protein_coding -20729,Vmn1r43,ENSMUSG00000068231,protein_coding -20728,Vmn1r42,ENSMUSG00000068232,protein_coding -20730,Vmn1r44,ENSMUSG00000068234,protein_coding -12787,Gm11808,ENSMUSG00000068240,protein_coding -33606,Phf11d,ENSMUSG00000068245,protein_coding -46323,Apol9b,ENSMUSG00000068246,protein_coding -22003,Amn1,ENSMUSG00000068250,protein_coding -46305,Apol7b,ENSMUSG00000068252,protein_coding -19419,Olfr461,ENSMUSG00000068259,protein_coding -20657,Efcc1,ENSMUSG00000068263,protein_coding -6144,Ap5s1,ENSMUSG00000068264,protein_coding -6142,Cenpb,ENSMUSG00000068267,protein_coding -7322,Shroom4,ENSMUSG00000068270,protein_coding -48037,Usf3,ENSMUSG00000068284,protein_coding -33512,Cma2,ENSMUSG00000068289,protein_coding -6123,Ddrgk1,ENSMUSG00000068290,protein_coding -20583,Nat8f4,ENSMUSG00000068299,protein_coding -20561,Noto,ENSMUSG00000068302,protein_coding -20494,Slc4a5,ENSMUSG00000068323,protein_coding -20476,Tlx2,ENSMUSG00000068327,protein_coding -20474,Aup1,ENSMUSG00000068328,protein_coding -20473,Htra2,ENSMUSG00000068329,protein_coding -20470,Dok1,ENSMUSG00000068335,protein_coding -20412,Reg3d,ENSMUSG00000068341,protein_coding -46158,Gml,ENSMUSG00000068349,protein_coding -5559,D430041D05Rik,ENSMUSG00000068373,protein_coding -46119,Chrac1,ENSMUSG00000068391,protein_coding -33135,Rnase13,ENSMUSG00000068392,protein_coding -5976,Cep152,ENSMUSG00000068394,protein_coding -33104,Gm7247,ENSMUSG00000068399,protein_coding -33085,Rnase12,ENSMUSG00000068407,protein_coding -33079,Pnp2,ENSMUSG00000068417,protein_coding -47715,Gmnc,ENSMUSG00000068428,protein_coding -33051,Olfr742,ENSMUSG00000068431,protein_coding -33032,Olfr725,ENSMUSG00000068437,protein_coding -5940,Duox2,ENSMUSG00000068452,protein_coding -51101,Uty,ENSMUSG00000068457,protein_coding -5919,Mfap1a,ENSMUSG00000068479,protein_coding -33121,Gm5800,ENSMUSG00000068506,protein_coding -45873,Aard,ENSMUSG00000068522,protein_coding -12584,Gng5,ENSMUSG00000068523,protein_coding -47542,Olfr169,ENSMUSG00000068535,protein_coding -19695,Doxl2,ENSMUSG00000068536,protein_coding -12554,Clca4a,ENSMUSG00000068547,protein_coding -19654,Zfp467,ENSMUSG00000068551,protein_coding -22039,Myadm,ENSMUSG00000068566,protein_coding -19539,Olfr458,ENSMUSG00000068574,protein_coding -5832,Zfyve19,ENSMUSG00000068580,protein_coding -19423,Mgam,ENSMUSG00000068587,protein_coding -46159,Gml2,ENSMUSG00000068600,protein_coding -53543,Gm4841,ENSMUSG00000068606,protein_coding -5748,Actc1,ENSMUSG00000068614,protein_coding -5745,Gjd2,ENSMUSG00000068615,protein_coding -47374,Efcab1,ENSMUSG00000068617,protein_coding -5666,Olfr1278,ENSMUSG00000068647,protein_coding -47281,Clec16a,ENSMUSG00000068663,protein_coding -5556,Cd59b,ENSMUSG00000068686,protein_coding -12031,Gpr88,ENSMUSG00000068696,protein_coding -32296,Myoz1,ENSMUSG00000068697,protein_coding -19208,Flnc,ENSMUSG00000068699,protein_coding -11943,Tmem167b,ENSMUSG00000068732,protein_coding -5445,Trp53i11,ENSMUSG00000068735,protein_coding -11934,Sars,ENSMUSG00000068739,protein_coding -11933,Celsr2,ENSMUSG00000068740,protein_coding -5429,Cry2,ENSMUSG00000068742,protein_coding -11932,Psrc1,ENSMUSG00000068744,protein_coding -11930,Mybphl,ENSMUSG00000068745,protein_coding -11927,Sort1,ENSMUSG00000068747,protein_coding -19130,Ptprz1,ENSMUSG00000068748,protein_coding -11925,Psma5,ENSMUSG00000068749,protein_coding -32173,Il3ra,ENSMUSG00000068758,protein_coding -11900,Gstm6,ENSMUSG00000068762,protein_coding -18982,Col28a1,ENSMUSG00000068794,protein_coding -11827,Rap1a,ENSMUSG00000068798,protein_coding -5339,Olfr1259,ENSMUSG00000068806,protein_coding -5267,Olfr1189,ENSMUSG00000068808,protein_coding -5265,Olfr1188,ENSMUSG00000068809,protein_coding -5219,Olfr1145,ENSMUSG00000068814,protein_coding -5217,Olfr1143,ENSMUSG00000068815,protein_coding -5215,Olfr152,ENSMUSG00000068816,protein_coding -5212,Olfr1140,ENSMUSG00000068817,protein_coding -5204,Olfr1132,ENSMUSG00000068818,protein_coding -5203,Olfr1131,ENSMUSG00000068819,protein_coding -11735,Csde1,ENSMUSG00000068823,protein_coding -11525,Hist2h2be,ENSMUSG00000068854,protein_coding -11523,Hist2h2ac,ENSMUSG00000068855,protein_coding -11517,Sf3b4,ENSMUSG00000068856,protein_coding -4814,Sp9,ENSMUSG00000068859,protein_coding -11466,Gm128,ENSMUSG00000068860,protein_coding -11441,Selenbp1,ENSMUSG00000068874,protein_coding -11436,Cgn,ENSMUSG00000068876,protein_coding -11433,Selenbp2,ENSMUSG00000068877,protein_coding -11372,Gm5773,ENSMUSG00000068879,protein_coding -4747,Ssb,ENSMUSG00000068882,protein_coding -11335,Lce3f,ENSMUSG00000068885,protein_coding -11321,Lce1j,ENSMUSG00000068887,protein_coding -11320,Lce1i,ENSMUSG00000068888,protein_coding -11315,Lce1e,ENSMUSG00000068889,protein_coding -11311,Lce1a2,ENSMUSG00000068890,protein_coding -11280,Sprr2a2,ENSMUSG00000068893,protein_coding -11163,Clk2,ENSMUSG00000068917,protein_coding -11152,Dap3,ENSMUSG00000068921,protein_coding -11150,Msto1,ENSMUSG00000068922,protein_coding -11144,Syt11,ENSMUSG00000068923,protein_coding -44660,Tcl1b3,ENSMUSG00000068940,protein_coding -4383,Olfr366,ENSMUSG00000068947,protein_coding -4345,Olfr338,ENSMUSG00000068950,protein_coding -23695,Zfp619,ENSMUSG00000068959,protein_coding -23037,Zfp114,ENSMUSG00000068962,protein_coding -4270,Zbtb34,ENSMUSG00000068966,protein_coding -10750,Vmn2r5,ENSMUSG00000068999,protein_coding -4125,Urm1,ENSMUSG00000069020,protein_coding -51128,Sry,ENSMUSG00000069036,protein_coding -7473,H2al1j,ENSMUSG00000069038,protein_coding -10380,Slc25a31,ENSMUSG00000069041,protein_coding -51105,Usp9y,ENSMUSG00000069044,protein_coding -51102,Ddx3y,ENSMUSG00000069045,protein_coding -51098,Eif2s3y,ENSMUSG00000069049,protein_coding -51091,Uba1y,ENSMUSG00000069053,protein_coding -10199,Slc7a14,ENSMUSG00000069072,protein_coding -3995,Lcn11,ENSMUSG00000069080,protein_coding -1025,Dytn,ENSMUSG00000069085,protein_coding -42510,Cdk7,ENSMUSG00000069089,protein_coding -10059,Pde7a,ENSMUSG00000069094,protein_coding -9943,Zbtb10,ENSMUSG00000069114,protein_coding -42435,Gm10260,ENSMUSG00000069117,protein_coding -9911,1700008P02Rik,ENSMUSG00000069118,protein_coding -3883,Nxph2,ENSMUSG00000069132,protein_coding -48855,Fgfr1op,ENSMUSG00000069135,protein_coding -42208,Adgrv1,ENSMUSG00000069170,protein_coding -42173,Nr2f1,ENSMUSG00000069171,protein_coding -42105,Zfp72,ENSMUSG00000069184,protein_coding -42091,Zdhhc11,ENSMUSG00000069189,protein_coding -41968,Zfp874a,ENSMUSG00000069206,protein_coding -42113,Zfp825,ENSMUSG00000069208,protein_coding -41513,Gprin1,ENSMUSG00000069227,protein_coding -41340,Fam8a1,ENSMUSG00000069237,protein_coding -41066,Serpinb6e,ENSMUSG00000069248,protein_coding -40993,Dusp22,ENSMUSG00000069255,protein_coding -40945,Prl2b1,ENSMUSG00000069258,protein_coding -40941,Prl6a1,ENSMUSG00000069259,protein_coding -40875,Hist1h3a,ENSMUSG00000069265,protein_coding -40873,Hist1h4b,ENSMUSG00000069266,protein_coding -40871,Hist1h3b,ENSMUSG00000069267,protein_coding -40847,Hist1h2bf,ENSMUSG00000069268,protein_coding -40856,Hist1h2ac,ENSMUSG00000069270,protein_coding -40844,Hist1h2ae,ENSMUSG00000069272,protein_coding -40843,Hist1h3e,ENSMUSG00000069273,protein_coding -40841,Hist1h4f,ENSMUSG00000069274,protein_coding -40819,Vmn1r223,ENSMUSG00000069280,protein_coding -40769,Vmn1r203,ENSMUSG00000069289,protein_coding -40758,Vmn1r199,ENSMUSG00000069292,protein_coding -40754,Vmn1r197,ENSMUSG00000069294,protein_coding -40752,Vmn1r196,ENSMUSG00000069295,protein_coding -40750,Vmn1r195,ENSMUSG00000069296,protein_coding -40749,Vmn1r194,ENSMUSG00000069297,protein_coding -40735,Vmn1r188,ENSMUSG00000069299,protein_coding -40729,Hist1h2bj,ENSMUSG00000069300,protein_coding -40727,Hist1h2ag,ENSMUSG00000069301,protein_coding -40724,Hist1h2ah,ENSMUSG00000069302,protein_coding -40710,Hist1h2br,ENSMUSG00000069303,protein_coding -40707,Hist1h4n,ENSMUSG00000069305,protein_coding -40706,Hist1h4m,ENSMUSG00000069306,protein_coding -40703,Hist1h2bq,ENSMUSG00000069307,protein_coding -40702,Hist1h2bp,ENSMUSG00000069308,protein_coding -40700,Hist1h2an,ENSMUSG00000069309,protein_coding -40865,Hist1h3c,ENSMUSG00000069310,protein_coding -54024,Gm5096,ENSMUSG00000069324,protein_coding -15763,Gm5152,ENSMUSG00000069355,protein_coding -53516,Ctxn3,ENSMUSG00000069372,protein_coding -53458,Prdm6,ENSMUSG00000069378,protein_coding -53342,Gm10267,ENSMUSG00000069385,protein_coding -25684,Olfr707,ENSMUSG00000069390,protein_coding -29328,Olfr810,ENSMUSG00000069421,protein_coding -29275,Olfr9,ENSMUSG00000069430,protein_coding -52890,Dsg1a,ENSMUSG00000069441,protein_coding -29185,Rdh16,ENSMUSG00000069456,protein_coding -55443,Gm6020,ENSMUSG00000069475,protein_coding -38788,Zfp616,ENSMUSG00000069476,protein_coding -4509,Epc2,ENSMUSG00000069495,protein_coding -28949,Lyz1,ENSMUSG00000069515,protein_coding -28948,Lyz2,ENSMUSG00000069516,protein_coding -28934,Gm10271,ENSMUSG00000069518,protein_coding -28905,Tmem19,ENSMUSG00000069520,protein_coding -28482,Scyl2,ENSMUSG00000069539,protein_coding -28160,Dazap1,ENSMUSG00000069565,protein_coding -28008,Tspear,ENSMUSG00000069581,protein_coding -28030,Krtap10-4,ENSMUSG00000069582,protein_coding -28015,Krtap12-1,ENSMUSG00000069583,protein_coding -28011,Gm10272,ENSMUSG00000069584,protein_coding -40128,Gm11733,ENSMUSG00000069588,protein_coding -27849,Ank3,ENSMUSG00000069601,protein_coding -40005,Cd300ld3,ENSMUSG00000069607,protein_coding -40003,Cd300ld4,ENSMUSG00000069609,protein_coding -39852,Strada,ENSMUSG00000069631,protein_coding -29375,Pex11g,ENSMUSG00000069633,protein_coding -27292,Marcks,ENSMUSG00000069662,protein_coding -27249,Sult3a1,ENSMUSG00000069668,protein_coding -27223,Nkain2,ENSMUSG00000069670,protein_coding -20478,Pcgf1,ENSMUSG00000069678,protein_coding -27076,Taar5,ENSMUSG00000069706,protein_coding -27075,Taar4,ENSMUSG00000069707,protein_coding -27074,Taar3,ENSMUSG00000069708,protein_coding -27034,4930444G20Rik,ENSMUSG00000069712,protein_coding -39578,Gm11568,ENSMUSG00000069717,protein_coding -39556,Gm11563,ENSMUSG00000069718,protein_coding -15865,4930572O03Rik,ENSMUSG00000069720,protein_coding -39541,Krtap3-2,ENSMUSG00000069721,protein_coding -39540,Krtap3-3,ENSMUSG00000069722,protein_coding -23790,Zfp975,ENSMUSG00000069727,protein_coding -48784,Arid1b,ENSMUSG00000069729,protein_coding -14103,Ube2u,ENSMUSG00000069733,protein_coding -49162,Zfp820,ENSMUSG00000069743,protein_coding -39447,Psmb3,ENSMUSG00000069744,protein_coding -39260,Tmem100,ENSMUSG00000069763,protein_coding -39234,Msi2,ENSMUSG00000069769,protein_coding -39153,Gm11444,ENSMUSG00000069785,protein_coding -39104,Wfdc17,ENSMUSG00000069792,protein_coding -39058,Slfn9,ENSMUSG00000069793,protein_coding -38914,Gm10277,ENSMUSG00000069804,protein_coding -41758,Fbp1,ENSMUSG00000069805,protein_coding -22041,Cacng7,ENSMUSG00000069806,protein_coding -38869,Fam57a,ENSMUSG00000069808,protein_coding -38809,Ccdc92b,ENSMUSG00000069814,protein_coding -38782,Olfr23,ENSMUSG00000069816,protein_coding -38772,Olfr390,ENSMUSG00000069818,protein_coding -38752,Olfr1,ENSMUSG00000069823,protein_coding -38746,Gm49340,ENSMUSG00000069825,protein_coding -38682,Nlrp1a,ENSMUSG00000069830,protein_coding -54414,Ahnak,ENSMUSG00000069833,protein_coding -38560,Sat2,ENSMUSG00000069835,protein_coding -38476,Sco1,ENSMUSG00000069844,protein_coding -38374,Slc47a2,ENSMUSG00000069855,protein_coding -31716,Pabpn1l,ENSMUSG00000069867,protein_coding -38215,4930438A08Rik,ENSMUSG00000069873,protein_coding -38213,Irgm2,ENSMUSG00000069874,protein_coding -37962,9930111J21Rik2,ENSMUSG00000069892,protein_coding -37958,9930111J21Rik1,ENSMUSG00000069893,protein_coding -31473,Atxn1l,ENSMUSG00000069895,protein_coding -37906,Gm12166,ENSMUSG00000069899,protein_coding -37805,Spdl1,ENSMUSG00000069910,protein_coding -37804,Fam196b,ENSMUSG00000069911,protein_coding -37767,Hba-a2,ENSMUSG00000069917,protein_coding -37765,Hba-a1,ENSMUSG00000069919,protein_coding -31344,B3gnt9,ENSMUSG00000069920,protein_coding -31334,Ces3a,ENSMUSG00000069922,protein_coding -31029,4933402J07Rik,ENSMUSG00000069971,protein_coding -30689,Olfr372,ENSMUSG00000069998,protein_coding -30674,Fcho1,ENSMUSG00000070000,protein_coding -30606,Ell,ENSMUSG00000070002,protein_coding -30608,Ssbp4,ENSMUSG00000070003,protein_coding -30414,Gm10283,ENSMUSG00000070023,protein_coding -1430,Sp140,ENSMUSG00000070031,protein_coding -1428,Sp110,ENSMUSG00000070034,protein_coding -30218,Fam149a,ENSMUSG00000070044,protein_coding -30208,Fat1,ENSMUSG00000070047,protein_coding -30091,Mfhas1,ENSMUSG00000070056,protein_coding -37104,Slc22a14,ENSMUSG00000070280,protein_coding -36867,Ndufaf3,ENSMUSG00000070283,protein_coding -36829,Gmppb,ENSMUSG00000070284,protein_coding -36597,Slc35g2,ENSMUSG00000070287,protein_coding -36184,Ddx43,ENSMUSG00000070291,protein_coding -35694,Trcg1,ENSMUSG00000070298,protein_coding -35439,Scn2b,ENSMUSG00000070304,protein_coding -35435,Mpzl3,ENSMUSG00000070305,protein_coding -35366,Ccdc153,ENSMUSG00000070306,protein_coding -35131,Olfr894,ENSMUSG00000070311,protein_coding -35061,Pate14,ENSMUSG00000070313,protein_coding -34756,Eif3g,ENSMUSG00000070319,protein_coding -34497,Mmp27,ENSMUSG00000070323,protein_coding -36913,Fbxw22,ENSMUSG00000070324,protein_coding -40186,Rnf213,ENSMUSG00000070327,protein_coding -40142,Tmem235,ENSMUSG00000070330,protein_coding -40084,Qrich2,ENSMUSG00000070331,protein_coding -40025,Trim80,ENSMUSG00000070332,protein_coding -39583,Krtap31-1,ENSMUSG00000070334,protein_coding -39580,Krtap9-1,ENSMUSG00000070335,protein_coding -39459,Fbxo47,ENSMUSG00000070336,protein_coding -39433,Gpr179,ENSMUSG00000070337,protein_coding -39204,Hsf5,ENSMUSG00000070345,protein_coding -26729,Ccnd1,ENSMUSG00000070348,protein_coding -38984,Evi2,ENSMUSG00000070354,protein_coding -11998,Amy2a1,ENSMUSG00000070360,protein_coding -26310,Plpp4,ENSMUSG00000070366,protein_coding -11875,Prok1,ENSMUSG00000070368,protein_coding -26273,Itgad,ENSMUSG00000070369,protein_coding -26257,Prss36,ENSMUSG00000070371,protein_coding -11811,Capza1,ENSMUSG00000070372,protein_coding -38799,Olfr59,ENSMUSG00000070374,protein_coding -38797,Olfr406,ENSMUSG00000070375,protein_coding -38794,Olfr43,ENSMUSG00000070377,protein_coding -38793,Olfr403,ENSMUSG00000070378,protein_coding -38792,Olfr402,ENSMUSG00000070379,protein_coding -38791,Olfr401,ENSMUSG00000070380,protein_coding -38773,Olfr391-ps,ENSMUSG00000070382,protein_coding -38771,Olfr389,ENSMUSG00000070383,protein_coding -11738,Ampd1,ENSMUSG00000070385,protein_coding -38710,Fbxo39,ENSMUSG00000070388,protein_coding -38683,Nlrp1b,ENSMUSG00000070390,protein_coding -38589,Tmem256,ENSMUSG00000070394,protein_coding -38444,Hs3st3b1,ENSMUSG00000070407,protein_coding -25692,Olfr2,ENSMUSG00000070417,protein_coding -18702,Cyp3a57,ENSMUSG00000070419,protein_coding -18700,Zscan25,ENSMUSG00000070420,protein_coding -25571,Olfr658,ENSMUSG00000070421,protein_coding -25416,Olfr558,ENSMUSG00000070423,protein_coding -25383,Art5,ENSMUSG00000070424,protein_coding -25381,Xntrpc,ENSMUSG00000070425,protein_coding -25379,Rnf121,ENSMUSG00000070426,protein_coding -25378,Il18bp,ENSMUSG00000070427,protein_coding -25276,Serpinh1,ENSMUSG00000070436,protein_coding -38253,Olfr313,ENSMUSG00000070438,protein_coding -33109,Vmn2r89,ENSMUSG00000070448,protein_coding -25035,Vmn2r73,ENSMUSG00000070458,protein_coding -25004,Olfr290,ENSMUSG00000070459,protein_coding -25002,Olfr291,ENSMUSG00000070460,protein_coding -24979,Tlnrd1,ENSMUSG00000070462,protein_coding -18366,Ccl26,ENSMUSG00000070464,protein_coding -24942,Adamtsl3,ENSMUSG00000070469,protein_coding -10651,Erich6,ENSMUSG00000070471,protein_coding -18340,Cldn3,ENSMUSG00000070473,protein_coding -7151,Fam217b,ENSMUSG00000070476,protein_coding -18234,Chchd2,ENSMUSG00000070493,protein_coding -7017,Ctcfl,ENSMUSG00000070495,protein_coding -18168,Tmem132b,ENSMUSG00000070498,protein_coding -3003,Ifi214,ENSMUSG00000070501,protein_coding -2977,Fcrl6,ENSMUSG00000070504,protein_coding -24732,Rgma,ENSMUSG00000070509,protein_coding -24586,Nsmce3,ENSMUSG00000070520,protein_coding -2880,Fcrlb,ENSMUSG00000070524,protein_coding -24545,Peg12,ENSMUSG00000070526,protein_coding -24544,Mkrn3,ENSMUSG00000070527,protein_coding -6830,Wfdc10,ENSMUSG00000070529,protein_coding -6827,Wfdc16,ENSMUSG00000070530,protein_coding -6825,Wfdc6b,ENSMUSG00000070531,protein_coding -2856,Ccdc190,ENSMUSG00000070532,protein_coding -6823,Wfdc8,ENSMUSG00000070533,protein_coding -6729,Top1,ENSMUSG00000070544,protein_coding -24157,Mrgprb3,ENSMUSG00000070546,protein_coding -24152,Mrgprb1,ENSMUSG00000070547,protein_coding -24146,Mrgprb4,ENSMUSG00000070550,protein_coding -24144,Mrgprb5,ENSMUSG00000070551,protein_coding -24138,Mrgprx1,ENSMUSG00000070552,protein_coding -24040,Spaca4,ENSMUSG00000070563,protein_coding -24034,Ntn5,ENSMUSG00000070564,protein_coding -2590,Rasal2,ENSMUSG00000070565,protein_coding -23993,Slc6a21,ENSMUSG00000070568,protein_coding -23983,Slc17a7,ENSMUSG00000070570,protein_coding -17700,Mn1,ENSMUSG00000070576,protein_coding -15418,Gm572,ENSMUSG00000070577,protein_coding -15383,Fv1,ENSMUSG00000070583,protein_coding -2354,Gm4788,ENSMUSG00000070594,protein_coding -29355,Vmn2r84,ENSMUSG00000070601,protein_coding -23822,Vsig10l,ENSMUSG00000070604,protein_coding -15329,Zfp992,ENSMUSG00000070605,protein_coding -15277,Aadacl4,ENSMUSG00000070609,protein_coding -15229,Gm13040,ENSMUSG00000070616,protein_coding -15238,Gm13089,ENSMUSG00000070617,protein_coding -15230,BC080695,ENSMUSG00000070618,protein_coding -15264,Gm13119,ENSMUSG00000070619,protein_coding -15179,Srarp,ENSMUSG00000070637,protein_coding -17503,Lrrc8b,ENSMUSG00000070639,protein_coding -2176,Sox13,ENSMUSG00000070643,protein_coding -2175,Etnk2,ENSMUSG00000070644,protein_coding -2174,Ren1,ENSMUSG00000070645,protein_coding -15101,Rnf186,ENSMUSG00000070661,protein_coding -17313,D5Ertd577e,ENSMUSG00000070677,protein_coding -15038,Lactbl1,ENSMUSG00000070683,protein_coding -17276,C87414,ENSMUSG00000070686,protein_coding -15030,Htr1d,ENSMUSG00000070687,protein_coding -17191,5830473C10Rik,ENSMUSG00000070690,protein_coding -14980,Runx3,ENSMUSG00000070691,protein_coding -1886,Cntnap5a,ENSMUSG00000070695,protein_coding -17163,Utp3,ENSMUSG00000070697,protein_coding -23266,Sars2,ENSMUSG00000070699,protein_coding -17126,Csn1s1,ENSMUSG00000070702,protein_coding -17101,Ugt2b36,ENSMUSG00000070704,protein_coding -23235,Eid2b,ENSMUSG00000070705,protein_coding -6765,Gtsf1l,ENSMUSG00000070708,protein_coding -23219,Zfp974,ENSMUSG00000070709,protein_coding -30706,Gm10282,ENSMUSG00000070713,protein_coding -5871,Pla2g4d,ENSMUSG00000070719,protein_coding -14847,Tmem200b,ENSMUSG00000070720,protein_coding -5827,Rmdn3,ENSMUSG00000070730,protein_coding -1598,Rbm44,ENSMUSG00000070732,protein_coding -16911,Fryl,ENSMUSG00000070733,protein_coding -14729,Tmem35b,ENSMUSG00000070737,protein_coding -1519,Dgkd,ENSMUSG00000070738,protein_coding -22851,Ceacam20,ENSMUSG00000070777,protein_coding -16786,Rbm47,ENSMUSG00000070780,protein_coding -22749,Psg21,ENSMUSG00000070796,protein_coding -22743,Psg27,ENSMUSG00000070797,protein_coding -22741,Psg25,ENSMUSG00000070798,protein_coding -22738,Psg26,ENSMUSG00000070799,protein_coding -22694,Pnmal2,ENSMUSG00000070802,protein_coding -14553,Cited4,ENSMUSG00000070803,protein_coding -14530,Zmynd12,ENSMUSG00000070806,protein_coding -22647,Bicra,ENSMUSG00000070808,protein_coding -22552,Sult2a6,ENSMUSG00000070810,protein_coding -22539,Sult2a2,ENSMUSG00000070811,protein_coding -22520,Zswim9,ENSMUSG00000070814,protein_coding -22512,Vmn1r87,ENSMUSG00000070815,protein_coding -22511,Vmn1r86,ENSMUSG00000070816,protein_coding -22508,Vmn1r85,ENSMUSG00000070817,protein_coding -14490,Olfr1339,ENSMUSG00000070820,protein_coding -14489,Olfr1340,ENSMUSG00000070821,protein_coding -22484,Zscan18,ENSMUSG00000070822,protein_coding -22398,Zscan4f,ENSMUSG00000070828,protein_coding -22239,Aurkc,ENSMUSG00000070837,protein_coding -22268,Vmn2r34,ENSMUSG00000070841,protein_coding -22282,Vmn2r42,ENSMUSG00000070844,protein_coding -22255,Vmn2r30,ENSMUSG00000070847,protein_coding -5185,Olfr1121,ENSMUSG00000070852,protein_coding -5224,Olfr1150-ps1,ENSMUSG00000070853,protein_coding -5176,Olfr1116,ENSMUSG00000070855,protein_coding -5173,Olfr1115,ENSMUSG00000070856,protein_coding -5170,Olfr1113,ENSMUSG00000070857,protein_coding -16273,Gm1673,ENSMUSG00000070858,protein_coding -4959,Zfp804a,ENSMUSG00000070866,protein_coding -14329,Trabd2b,ENSMUSG00000070867,protein_coding -14311,Skint3,ENSMUSG00000070868,protein_coding -1055,Cryge,ENSMUSG00000070870,protein_coding -1046,Ccnyl1,ENSMUSG00000070871,protein_coding -22094,Lilra5,ENSMUSG00000070873,protein_coding -5153,Olfr1100,ENSMUSG00000070875,protein_coding -14209,Ldlrad1,ENSMUSG00000070877,protein_coding -4760,Gad1,ENSMUSG00000070880,protein_coding -4742,Ccdc173,ENSMUSG00000070883,protein_coding -14135,Gm12794,ENSMUSG00000070890,protein_coding -14081,Gm12689,ENSMUSG00000070891,protein_coding -13965,Zfp352,ENSMUSG00000070902,protein_coding -13943,Ifna4,ENSMUSG00000070904,protein_coding -13938,Gm13288,ENSMUSG00000070908,protein_coding -13913,Klhl9,ENSMUSG00000070923,protein_coding -15864,Speer4d,ENSMUSG00000070933,protein_coding -13877,Rraga,ENSMUSG00000070934,protein_coding -644,Tgfbrap1,ENSMUSG00000070939,protein_coding -610,Il1rl2,ENSMUSG00000070942,protein_coding -4370,Olfr356,ENSMUSG00000070943,protein_coding -4295,Rabepk,ENSMUSG00000070953,protein_coding -13589,Dnajc25,ENSMUSG00000070972,protein_coding -13547,Actl7a,ENSMUSG00000070979,protein_coding -13545,Actl7b,ENSMUSG00000070980,protein_coding -13491,Olfr270,ENSMUSG00000070983,protein_coding -13452,Acnat1,ENSMUSG00000070985,protein_coding -13399,Foxe1,ENSMUSG00000070990,protein_coding -13338,Ccin,ENSMUSG00000070999,protein_coding -13334,Olfr155,ENSMUSG00000071000,protein_coding -13326,Hrct1,ENSMUSG00000071001,protein_coding -13278,Ccl19,ENSMUSG00000071005,protein_coding -13174,Ndufb6,ENSMUSG00000071014,protein_coding -13123,Gm136,ENSMUSG00000071015,protein_coding -12790,Sdr16c6,ENSMUSG00000071019,protein_coding -50969,Gm10309,ENSMUSG00000071036,protein_coding -50947,Camkmt,ENSMUSG00000071037,protein_coding -50808,Rasgrp3,ENSMUSG00000071042,protein_coding -31121,Ces1a,ENSMUSG00000071047,protein_coding -50526,Safb,ENSMUSG00000071054,protein_coding -30806,Zfp827,ENSMUSG00000071064,protein_coding -29324,Olfr806,ENSMUSG00000071065,protein_coding -50371,Treml2,ENSMUSG00000071068,protein_coding -29199,Ptges3,ENSMUSG00000071072,protein_coding -50289,Lrrc73,ENSMUSG00000071073,protein_coding -50288,Yipf3,ENSMUSG00000071074,protein_coding -30614,Jund,ENSMUSG00000071076,protein_coding -30583,Nr2c2ap,ENSMUSG00000071078,protein_coding -30480,Trim75,ENSMUSG00000071089,protein_coding -30233,1700029J07Rik,ENSMUSG00000071103,protein_coding -30231,Ccdc110,ENSMUSG00000071104,protein_coding -21870,Spx,ENSMUSG00000071112,protein_coding -30054,Mboat4,ENSMUSG00000071113,protein_coding -29984,Tex24,ENSMUSG00000071138,protein_coding -21696,Tas2r140,ENSMUSG00000071147,protein_coding -21670,Tas2r115,ENSMUSG00000071149,protein_coding -21668,Tas2r121,ENSMUSG00000071150,protein_coding -21567,Gm156,ENSMUSG00000071158,protein_coding -29755,Gm6040,ENSMUSG00000071165,protein_coding -29712,Defb46,ENSMUSG00000071169,protein_coding -49558,Srsf3,ENSMUSG00000071172,protein_coding -29651,Arhgef10,ENSMUSG00000071176,protein_coding -44608,Serpina1d,ENSMUSG00000071177,protein_coding -44606,Serpina1b,ENSMUSG00000071178,protein_coding -44602,Serpina16,ENSMUSG00000071179,protein_coding -42606,Smim15,ENSMUSG00000071180,protein_coding -28078,Olfr1357,ENSMUSG00000071185,protein_coding -49395,Wfikkn1,ENSMUSG00000071192,protein_coding -28033,Gm10318,ENSMUSG00000071195,protein_coding -49379,Ccdc78,ENSMUSG00000071202,protein_coding -42492,Naip5,ENSMUSG00000071203,protein_coding -21217,Cecr2,ENSMUSG00000071226,protein_coding -34400,Timm8a2,ENSMUSG00000071229,protein_coding -49303,Npw,ENSMUSG00000071230,protein_coding -44190,Syndig1l,ENSMUSG00000071234,protein_coding -44189,Vrtn,ENSMUSG00000071235,protein_coding -42161,2210408I21Rik,ENSMUSG00000071252,protein_coding -27759,Slc25a16,ENSMUSG00000071253,protein_coding -49217,Zfp213,ENSMUSG00000071256,protein_coding -34009,Zfp957,ENSMUSG00000071262,protein_coding -49180,Zfp946,ENSMUSG00000071266,protein_coding -49165,Zfp942,ENSMUSG00000071267,protein_coding -49091,Fpr-rs6,ENSMUSG00000071275,protein_coding -49089,Fpr-rs7,ENSMUSG00000071276,protein_coding -41985,Zfp65,ENSMUSG00000071281,protein_coding -41973,Zfp58,ENSMUSG00000071291,protein_coding -31904,2610044O15Rik8,ENSMUSG00000071302,protein_coding -48935,Gpr31b,ENSMUSG00000071311,protein_coding -27470,Bves,ENSMUSG00000071317,protein_coding -48838,Tcp10a,ENSMUSG00000071322,protein_coding -27388,Armc2,ENSMUSG00000071324,protein_coding -27333,Mfsd4b3-ps,ENSMUSG00000071335,protein_coding -20606,Tia1,ENSMUSG00000071337,protein_coding -27260,Trappc3l,ENSMUSG00000071340,protein_coding -20566,Egr4,ENSMUSG00000071341,protein_coding -43473,Lsmem1,ENSMUSG00000071342,protein_coding -33630,C1qtnf9,ENSMUSG00000071347,protein_coding -33608,Setdb2,ENSMUSG00000071350,protein_coding -20411,Reg3b,ENSMUSG00000071356,protein_coding -27054,Tbpl1,ENSMUSG00000071359,protein_coding -33517,Mcpt9,ENSMUSG00000071361,protein_coding -26992,Map3k5,ENSMUSG00000071369,protein_coding -43053,Hpcal1,ENSMUSG00000071379,protein_coding -26958,Ect2l,ENSMUSG00000071392,protein_coding -43037,2410004P03Rik,ENSMUSG00000071398,protein_coding -39454,Rpl23,ENSMUSG00000071415,protein_coding -19982,Grid2,ENSMUSG00000071424,protein_coding -19921,Vmn1r27,ENSMUSG00000071428,protein_coding -41081,Psmg4,ENSMUSG00000071451,protein_coding -41049,Gm11397,ENSMUSG00000071452,protein_coding -42830,Dtnb,ENSMUSG00000071454,protein_coding -33066,Ccnb1ip1,ENSMUSG00000071470,protein_coding -48516,Krtap26-1,ENSMUSG00000071471,protein_coding -19644,Zfp777,ENSMUSG00000071477,protein_coding -40848,Hist1h2ad,ENSMUSG00000071478,protein_coding -19572,Olfr437,ENSMUSG00000071481,protein_coding -32863,Ptgdr,ENSMUSG00000071489,protein_coding -40791,Vmn1r212,ENSMUSG00000071490,protein_coding -40785,Vmn1r209,ENSMUSG00000071491,protein_coding -40783,Vmn1r208,ENSMUSG00000071493,protein_coding -19543,Olfr455,ENSMUSG00000071494,protein_coding -55317,Nutf2-ps1,ENSMUSG00000071497,protein_coding -19525,Tmem139,ENSMUSG00000071506,protein_coding -48216,Olfr172,ENSMUSG00000071510,protein_coding -40686,Hist1h2ai,ENSMUSG00000071516,protein_coding -19482,Gm10334,ENSMUSG00000071517,protein_coding -19478,Prss3,ENSMUSG00000071519,protein_coding -19477,Try10,ENSMUSG00000071521,protein_coding -40652,Olfr263,ENSMUSG00000071522,protein_coding -55239,Atp5md,ENSMUSG00000071528,protein_coding -32580,Gprin2,ENSMUSG00000071531,protein_coding -48164,Pcnp,ENSMUSG00000071533,protein_coding -19374,Klrg2,ENSMUSG00000071537,protein_coding -32542,3425401B19Rik,ENSMUSG00000071540,protein_coding -32490,Nt5dc2,ENSMUSG00000071547,protein_coding -48041,Cfap44,ENSMUSG00000071550,protein_coding -40347,Akr1c19,ENSMUSG00000071551,protein_coding -48023,Tigit,ENSMUSG00000071552,protein_coding -19243,Cpa2,ENSMUSG00000071553,protein_coding -47914,Cstdc5,ENSMUSG00000071561,protein_coding -47906,Stfa1,ENSMUSG00000071562,protein_coding -47082,Gm10337,ENSMUSG00000071586,protein_coding -54768,Fam189a2,ENSMUSG00000071604,protein_coding -31912,Gm10340,ENSMUSG00000071613,protein_coding -54621,Olfr1477,ENSMUSG00000071629,protein_coding -47509,2510002D24Rik,ENSMUSG00000071632,protein_coding -54573,Gm4952,ENSMUSG00000071633,protein_coding -47432,Rimbp3,ENSMUSG00000071636,protein_coding -47389,Cebpd,ENSMUSG00000071637,protein_coding -54413,Eef1g,ENSMUSG00000071644,protein_coding -54412,Tut1,ENSMUSG00000071645,protein_coding -54411,Mta2,ENSMUSG00000071646,protein_coding -54409,Eml3,ENSMUSG00000071647,protein_coding -54408,Rom1,ENSMUSG00000071648,protein_coding -54407,B3gat3,ENSMUSG00000071649,protein_coding -54406,Ganab,ENSMUSG00000071650,protein_coding -54405,Ints5,ENSMUSG00000071652,protein_coding -54403,1810009A15Rik,ENSMUSG00000071653,protein_coding -54398,Uqcc3,ENSMUSG00000071654,protein_coding -54397,Ubxn1,ENSMUSG00000071655,protein_coding -54396,Lrrn4cl,ENSMUSG00000071656,protein_coding -54395,Bscl2,ENSMUSG00000071657,protein_coding -54394,Gng3,ENSMUSG00000071658,protein_coding -54392,Hnrnpul2,ENSMUSG00000071659,protein_coding -54391,Ttc9c,ENSMUSG00000071660,protein_coding -54390,Zbtb3,ENSMUSG00000071661,protein_coding -54389,Polr2g,ENSMUSG00000071662,protein_coding -9621,Foxr2,ENSMUSG00000071665,protein_coding -47315,Snx29,ENSMUSG00000071669,protein_coding -9458,Rtl4,ENSMUSG00000071679,protein_coding -9355,Tex13a,ENSMUSG00000071686,protein_coding -54145,Gm960,ENSMUSG00000071691,protein_coding -9697,Sms,ENSMUSG00000071708,protein_coding -46346,Mpst,ENSMUSG00000071711,protein_coding -46340,Csf2rb,ENSMUSG00000071713,protein_coding -46338,Csf2rb2,ENSMUSG00000071714,protein_coding -46337,Ncf4,ENSMUSG00000071715,protein_coding -46319,Apol7e,ENSMUSG00000071716,protein_coding -8737,Tmem28,ENSMUSG00000071719,protein_coding -8683,Spin4,ENSMUSG00000071722,protein_coding -8673,Gspt2,ENSMUSG00000071723,protein_coding -46241,Smpd5,ENSMUSG00000071724,protein_coding -8633,Gm5941,ENSMUSG00000071726,protein_coding -8461,Cldn34b3,ENSMUSG00000071738,protein_coding -8318,DXBay18,ENSMUSG00000071745,protein_coding -8240,Gm14698,ENSMUSG00000071748,protein_coding -45981,4933412E24Rik,ENSMUSG00000071749,protein_coding -45934,Zhx2,ENSMUSG00000071757,protein_coding -7955,Olfr1320,ENSMUSG00000071764,protein_coding -7832,Rhox12,ENSMUSG00000071766,protein_coding -7821,Rhox7a,ENSMUSG00000071767,protein_coding -7814,Rhox3h,ENSMUSG00000071769,protein_coding -7805,Rhox4e,ENSMUSG00000071770,protein_coding -7795,Rhox4b,ENSMUSG00000071771,protein_coding -7789,Rhox3a,ENSMUSG00000071772,protein_coding -7787,Rhox1,ENSMUSG00000071773,protein_coding -7655,Gm14525,ENSMUSG00000071788,protein_coding -7423,Fthl17e,ENSMUSG00000071815,protein_coding -7407,Ssxb5,ENSMUSG00000071816,protein_coding -53609,Apcdd1,ENSMUSG00000071847,protein_coding -53372,Ccdc112,ENSMUSG00000071855,protein_coding -53350,Mcc,ENSMUSG00000071856,protein_coding -53332,Gm94,ENSMUSG00000071858,protein_coding -53079,Lrrtm2,ENSMUSG00000071862,protein_coding -37422,Ppia,ENSMUSG00000071866,protein_coding -2718,Mroh9,ENSMUSG00000071890,protein_coding -19868,Vmn1r4,ENSMUSG00000071893,protein_coding -41435,Nutm2,ENSMUSG00000071909,protein_coding -30157,Adam25,ENSMUSG00000071937,protein_coding -48847,Fndc1,ENSMUSG00000071984,protein_coding -9128,Vmn2r121,ENSMUSG00000072049,protein_coding -41743,6720489N17Rik,ENSMUSG00000072066,protein_coding -49276,Ccnf,ENSMUSG00000072082,protein_coding -9657,Cldn34b1,ENSMUSG00000072100,protein_coding -33088,Ang,ENSMUSG00000072115,protein_coding -32843,BC061237,ENSMUSG00000072145,protein_coding -15856,Gm10354,ENSMUSG00000072188,protein_coding -47504,Sept5,ENSMUSG00000072214,protein_coding -46893,Tuba1a,ENSMUSG00000072235,protein_coding -25544,Trim6,ENSMUSG00000072244,protein_coding -7428,Fthl17f,ENSMUSG00000072249,protein_coding -3226,Taf1a,ENSMUSG00000072258,protein_coding -23734,Gm5592,ENSMUSG00000072259,protein_coding -34186,Klf12,ENSMUSG00000072294,protein_coding -906,C2cd6,ENSMUSG00000072295,protein_coding -48096,Dppa2,ENSMUSG00000072419,protein_coding -33417,Psmb11,ENSMUSG00000072423,protein_coding -55427,Nanos1,ENSMUSG00000072437,protein_coding -32535,1700024G13Rik,ENSMUSG00000072473,protein_coding -20395,Gm9008,ENSMUSG00000072476,protein_coding -8319,Xlr5b,ENSMUSG00000072479,protein_coding -46135,Mroh5,ENSMUSG00000072487,protein_coding -33431,Ppp1r3e,ENSMUSG00000072494,protein_coding -46064,Phf20l1,ENSMUSG00000072501,protein_coding -46055,Hhla1,ENSMUSG00000072511,protein_coding -39252,Gm525,ENSMUSG00000072553,protein_coding -45992,Fam84b,ENSMUSG00000072568,protein_coding -33140,Tmem253,ENSMUSG00000072571,protein_coding -33132,Slc39a2,ENSMUSG00000072572,protein_coding -33089,Eddm3b,ENSMUSG00000072575,protein_coding -39170,Ptrh2,ENSMUSG00000072582,protein_coding -45889,Gm7489,ENSMUSG00000072584,protein_coding -32824,4930503E14Rik,ENSMUSG00000072595,protein_coding -32818,Ear2,ENSMUSG00000072596,protein_coding -32810,Ang6,ENSMUSG00000072598,protein_coding -32797,Ear1,ENSMUSG00000072601,protein_coding -32747,Gm10376,ENSMUSG00000072605,protein_coding -39065,Slfn2,ENSMUSG00000072620,protein_coding -21184,Zfp9,ENSMUSG00000072623,protein_coding -32569,Gm5460,ENSMUSG00000072624,protein_coding -32565,Gdf2,ENSMUSG00000072625,protein_coding -38970,Lyrm9,ENSMUSG00000072640,protein_coding -18001,Adam1a,ENSMUSG00000072647,protein_coding -21955,Mansc4,ENSMUSG00000072662,protein_coding -45389,Spef2,ENSMUSG00000072663,protein_coding -45383,Ugt3a1,ENSMUSG00000072664,protein_coding -32398,Duxbl3,ENSMUSG00000072672,protein_coding -32395,Plac9b,ENSMUSG00000072674,protein_coding -32393,Duxbl2,ENSMUSG00000072675,protein_coding -32383,Tmem254a,ENSMUSG00000072676,protein_coding -32389,Tmem254c,ENSMUSG00000072680,protein_coding -17817,Gm10401,ENSMUSG00000072693,protein_coding -17799,1500011B03Rik,ENSMUSG00000072694,protein_coding -21700,Smim10l1,ENSMUSG00000072704,protein_coding -32171,Olfr31,ENSMUSG00000072707,protein_coding -38759,Olfr381,ENSMUSG00000072708,protein_coding -38757,Olfr380,ENSMUSG00000072709,protein_coding -21588,Klra10,ENSMUSG00000072718,protein_coding -17735,Myo18b,ENSMUSG00000072720,protein_coding -17729,Gm6588,ENSMUSG00000072722,protein_coding -32072,Gm5797,ENSMUSG00000072726,protein_coding -32044,Gm16440,ENSMUSG00000072738,protein_coding -31926,Gm10408,ENSMUSG00000072739,protein_coding -17665,Lrcol1,ENSMUSG00000072754,protein_coding -17634,4930522L14Rik,ENSMUSG00000072762,protein_coding -17633,5430403G16Rik,ENSMUSG00000072763,protein_coding -21382,Acrbp,ENSMUSG00000072770,protein_coding -21356,Grcc10,ENSMUSG00000072772,protein_coding -17488,Zfp951,ENSMUSG00000072774,protein_coding -21325,Vmn2r27,ENSMUSG00000072778,protein_coding -21312,Vmn2r24,ENSMUSG00000072780,protein_coding -45278,Abcb5,ENSMUSG00000072791,protein_coding -44943,Ahnak2,ENSMUSG00000072812,protein_coding -17320,E330014E10Rik,ENSMUSG00000072813,protein_coding -17319,Gm7982,ENSMUSG00000072814,protein_coding -17286,Gm6351,ENSMUSG00000072821,protein_coding -17285,BC061212,ENSMUSG00000072822,protein_coding -44939,Cep170b,ENSMUSG00000072825,protein_coding -17080,Tmprss11a,ENSMUSG00000072845,protein_coding -44617,Serpina1e,ENSMUSG00000072849,protein_coding -20898,Rybp,ENSMUSG00000072872,protein_coding -20887,Gpr27,ENSMUSG00000072875,protein_coding -20853,1700123L14Rik,ENSMUSG00000072878,protein_coding -9653,Samt2,ENSMUSG00000072888,protein_coding -16891,Nfxl1,ENSMUSG00000072889,protein_coding -44312,Gm2016,ENSMUSG00000072905,protein_coding -38256,Gm12258,ENSMUSG00000072915,protein_coding -44266,Noxred1,ENSMUSG00000072919,protein_coding -9537,Gm10439,ENSMUSG00000072923,protein_coding -9506,Gm15107,ENSMUSG00000072930,protein_coding -9502,Gm15080,ENSMUSG00000072931,protein_coding -9455,Trpc5os,ENSMUSG00000072934,protein_coding -16589,Sod3,ENSMUSG00000072941,protein_coding -9382,Nup62cl,ENSMUSG00000072944,protein_coding -9376,Ripply1,ENSMUSG00000072945,protein_coding -44177,Ptgr2,ENSMUSG00000072946,protein_coding -44161,Acot1,ENSMUSG00000072949,protein_coding -9330,Tmsb15l,ENSMUSG00000072955,protein_coding -9280,Bhlhb9,ENSMUSG00000072964,protein_coding -9281,Gprasp2,ENSMUSG00000072966,protein_coding -48879,Gm17728,ENSMUSG00000072968,protein_coding -9276,Armcx5,ENSMUSG00000072969,protein_coding -44109,Adam4,ENSMUSG00000072972,protein_coding -44107,Gm4787,ENSMUSG00000072974,protein_coding -5846,Oip5,ENSMUSG00000072980,protein_coding -38033,4933414I15Rik,ENSMUSG00000072983,protein_coding -9057,Cpxcr1,ENSMUSG00000072995,protein_coding -8995,Cylc1,ENSMUSG00000073001,protein_coding -20343,Vamp5,ENSMUSG00000073002,protein_coding -8970,Gm732,ENSMUSG00000073006,protein_coding -8969,Tent5d,ENSMUSG00000073007,protein_coding -8960,Gpr174,ENSMUSG00000073008,protein_coding -8943,Gm5127,ENSMUSG00000073010,protein_coding -8933,Fnd3c2,ENSMUSG00000073012,protein_coding -8896,Uprt,ENSMUSG00000073016,protein_coding -8813,Dmrtc1b,ENSMUSG00000073027,protein_coding -19988,Atoh1,ENSMUSG00000073043,protein_coding -8675,Zxdb,ENSMUSG00000073062,protein_coding -37766,Hbq1b,ENSMUSG00000073063,protein_coding -8612,Mageb2,ENSMUSG00000073069,protein_coding -8512,Cfap47,ENSMUSG00000073077,protein_coding -43642,Srp54a,ENSMUSG00000073079,protein_coding -8465,Cldn34b4,ENSMUSG00000073085,protein_coding -8430,Smim9,ENSMUSG00000073094,protein_coding -19661,Lrrc61,ENSMUSG00000073096,protein_coding -16138,Drc1,ENSMUSG00000073102,protein_coding -19563,Olfr444,ENSMUSG00000073110,protein_coding -19561,Olfr446,ENSMUSG00000073111,protein_coding -16071,Gm10471,ENSMUSG00000073116,protein_coding -16069,Gm7347,ENSMUSG00000073117,protein_coding -16068,Speer4a,ENSMUSG00000073119,protein_coding -8321,Xlr3b,ENSMUSG00000073125,protein_coding -8287,Gm1141,ENSMUSG00000073130,protein_coding -8286,Vma21,ENSMUSG00000073131,protein_coding -8258,Tmem185a,ENSMUSG00000073139,protein_coding -43233,9030624G23Rik,ENSMUSG00000073158,protein_coding -8075,Zfp449,ENSMUSG00000073176,protein_coding -8072,Gm773,ENSMUSG00000073177,protein_coding -43077,5730507C01Rik,ENSMUSG00000073197,protein_coding -7999,Ccdc160,ENSMUSG00000073207,protein_coding -15867,Speer4c,ENSMUSG00000073208,protein_coding -19259,Klf14,ENSMUSG00000073209,protein_coding -15691,Gm8773,ENSMUSG00000073234,protein_coding -7786,Gm9,ENSMUSG00000073243,protein_coding -7743,Gm14819,ENSMUSG00000073245,protein_coding -7739,Gm10486,ENSMUSG00000073247,protein_coding -7690,Gm14632,ENSMUSG00000073255,protein_coding -7686,Gm10488,ENSMUSG00000073257,protein_coding -7642,Gm1993,ENSMUSG00000073267,protein_coding -7369,Gm10490,ENSMUSG00000073290,protein_coding -7368,Gm10491,ENSMUSG00000073291,protein_coding -7320,Nudt10,ENSMUSG00000073293,protein_coding -7318,AU022751,ENSMUSG00000073294,protein_coding -7315,Nudt11,ENSMUSG00000073295,protein_coding -50680,Lrrc30,ENSMUSG00000073375,protein_coding -50518,Arrdc5,ENSMUSG00000073380,protein_coding -50364,9830107B12Rik,ENSMUSG00000073386,protein_coding -50202,Esp1,ENSMUSG00000073396,protein_coding -50038,Trim40,ENSMUSG00000073399,protein_coding -50037,Trim10,ENSMUSG00000073400,protein_coding -49984,Gm8909,ENSMUSG00000073402,protein_coding -49968,Gm6034,ENSMUSG00000073407,protein_coding -49932,Mucl3,ENSMUSG00000073408,protein_coding -49918,H2-Q6,ENSMUSG00000073409,protein_coding -49907,H2-D1,ENSMUSG00000073411,protein_coding -49893,Lst1,ENSMUSG00000073412,protein_coding -49876,Ly6g6d,ENSMUSG00000073413,protein_coding -49874,Mpig6b,ENSMUSG00000073414,protein_coding -49836,C4b,ENSMUSG00000073418,protein_coding -49800,H2-Ab1,ENSMUSG00000073421,protein_coding -49776,H2-Ke6,ENSMUSG00000073422,protein_coding -49735,Zfp414,ENSMUSG00000073423,protein_coding -49695,Cyp4f15,ENSMUSG00000073424,protein_coding -28306,Gm4924,ENSMUSG00000073427,protein_coding -49415,Arhgdig,ENSMUSG00000073433,protein_coding -49392,Wdr90,ENSMUSG00000073434,protein_coding -49328,Nme3,ENSMUSG00000073435,protein_coding -49325,Eme2,ENSMUSG00000073436,protein_coding -48943,Smok2b,ENSMUSG00000073457,protein_coding -48942,Smok2a,ENSMUSG00000073458,protein_coding -48924,Pnldc1,ENSMUSG00000073460,protein_coding -48859,Sft2d1,ENSMUSG00000073468,protein_coding -48850,Rsph3a,ENSMUSG00000073471,protein_coding -3249,Marc2,ENSMUSG00000073481,protein_coding -3012,Ifi204,ENSMUSG00000073489,protein_coding -3011,Ifi207,ENSMUSG00000073490,protein_coding -3005,Ifi213,ENSMUSG00000073491,protein_coding -2944,Gm10521,ENSMUSG00000073492,protein_coding -2866,Sh2d1b2,ENSMUSG00000073494,protein_coding -17290,AA792892,ENSMUSG00000073497,protein_coding -54039,Dok6,ENSMUSG00000073514,protein_coding -2611,Pappa2,ENSMUSG00000073530,protein_coding -53729,Cep76,ENSMUSG00000073542,protein_coding -53605,Spink13,ENSMUSG00000073551,protein_coding -53541,Gm4951,ENSMUSG00000073555,protein_coding -2222,Ppp1r12b,ENSMUSG00000073557,protein_coding -53461,Csnk1g3,ENSMUSG00000073563,protein_coding -53435,Prr16,ENSMUSG00000073565,protein_coding -53394,Arl14epl,ENSMUSG00000073568,protein_coding -53345,Gm10542,ENSMUSG00000073572,protein_coding -53344,Spink11,ENSMUSG00000073573,protein_coding -53297,Grxcr2,ENSMUSG00000073574,protein_coding -53196,Pcdhb22,ENSMUSG00000073591,protein_coding -53102,1700066B19Rik,ENSMUSG00000073598,protein_coding -53103,Ecscr,ENSMUSG00000073599,protein_coding -53099,Prob1,ENSMUSG00000073600,protein_coding -1835,Serpinb3c,ENSMUSG00000073601,protein_coding -1832,Serpinb3b,ENSMUSG00000073602,protein_coding -1696,Gal3st2c,ENSMUSG00000073608,protein_coding -1691,D2hgdh,ENSMUSG00000073609,protein_coding -1643,Cops9,ENSMUSG00000073616,protein_coding -1402,Fbxo36,ENSMUSG00000073633,protein_coding -52718,Rab18,ENSMUSG00000073639,protein_coding -1311,Wdfy1,ENSMUSG00000073643,protein_coding -1204,Catip,ENSMUSG00000073650,protein_coding -1058,Crygb,ENSMUSG00000073658,protein_coding -943,Nbeal1,ENSMUSG00000073664,protein_coding -835,Hspe1,ENSMUSG00000073676,protein_coding -822,Pgap1,ENSMUSG00000073678,protein_coding -15590,Tmem88b,ENSMUSG00000073680,protein_coding -15576,Gm10563,ENSMUSG00000073682,protein_coding -15564,Faap20,ENSMUSG00000073684,protein_coding -15495,Klhl21,ENSMUSG00000073700,protein_coding -587,Rpl31,ENSMUSG00000073702,protein_coding -15428,Cenps,ENSMUSG00000073705,protein_coding -15265,Pramef20,ENSMUSG00000073721,protein_coding -366,4931408C20Rik,ENSMUSG00000073722,protein_coding -15254,Gm13102,ENSMUSG00000073723,protein_coding -15253,Pramel4,ENSMUSG00000073724,protein_coding -333,Lmbrd1,ENSMUSG00000073725,protein_coding -304,4933415F23Rik,ENSMUSG00000073730,protein_coding -15165,Cplane2,ENSMUSG00000073733,protein_coding -240,Defb18,ENSMUSG00000073735,protein_coding -14949,Gm7534,ENSMUSG00000073747,protein_coding -14711,5730409E04Rik,ENSMUSG00000073755,protein_coding -14692,Sh3d21,ENSMUSG00000073758,protein_coding -14578,Gm12888,ENSMUSG00000073764,protein_coding -14498,Olfr1330,ENSMUSG00000073768,protein_coding -14497,Olfr1331,ENSMUSG00000073769,protein_coding -14424,Btbd19,ENSMUSG00000073771,protein_coding -14372,Kncn,ENSMUSG00000073774,protein_coding -14267,Kti12,ENSMUSG00000073775,protein_coding -26628,Krtap5-5,ENSMUSG00000073785,protein_coding -26626,Gm7579,ENSMUSG00000073786,protein_coding -14096,Efcab7,ENSMUSG00000073791,protein_coding -14094,Alg6,ENSMUSG00000073792,protein_coding -14071,I0C0044D17Rik,ENSMUSG00000073794,protein_coding -26491,6430531B16Rik,ENSMUSG00000073795,protein_coding -13958,Cdkn2b,ENSMUSG00000073802,protein_coding -26414,Nps,ENSMUSG00000073804,protein_coding -26409,Insyn2a,ENSMUSG00000073805,protein_coding -13903,Ifna12,ENSMUSG00000073811,protein_coding -13638,Mup14,ENSMUSG00000073830,protein_coding -13629,Mup11,ENSMUSG00000073834,protein_coding -26127,Tufm,ENSMUSG00000073838,protein_coding -13612,Mup7,ENSMUSG00000073842,protein_coding -25959,Iqck,ENSMUSG00000073856,protein_coding -13251,Gm13305,ENSMUSG00000073876,protein_coding -13250,Gm13306,ENSMUSG00000073877,protein_coding -13247,Gm13304,ENSMUSG00000073878,protein_coding -13233,Ccl27a,ENSMUSG00000073888,protein_coding -13227,Il11ra1,ENSMUSG00000073889,protein_coding -25723,Olfr472,ENSMUSG00000073893,protein_coding -25704,Rbmxl2,ENSMUSG00000073894,protein_coding -25701,Olfr716,ENSMUSG00000073896,protein_coding -25697,Olfr17,ENSMUSG00000073897,protein_coding -25693,Olfr713,ENSMUSG00000073898,protein_coding -25685,Olfr1532-ps1,ENSMUSG00000073899,protein_coding -25681,Olfr704,ENSMUSG00000073900,protein_coding -25680,Olfr703,ENSMUSG00000073901,protein_coding -25623,Olfr692,ENSMUSG00000073906,protein_coding -25619,Olfr689,ENSMUSG00000073907,protein_coding -25618,Olfr688,ENSMUSG00000073909,protein_coding -13131,Mob3b,ENSMUSG00000073910,protein_coding -25603,Olfr678,ENSMUSG00000073913,protein_coding -25601,Olfr677,ENSMUSG00000073914,protein_coding -25600,Olfr676,ENSMUSG00000073915,protein_coding -25591,Olfr669,ENSMUSG00000073916,protein_coding -25585,Olfr665,ENSMUSG00000073917,protein_coding -25575,Olfr661,ENSMUSG00000073920,protein_coding -25573,Olfr659,ENSMUSG00000073922,protein_coding -25570,Olfr657,ENSMUSG00000073923,protein_coding -25569,Olfr656,ENSMUSG00000073924,protein_coding -25567,Olfr654,ENSMUSG00000073925,protein_coding -25566,Olfr653,ENSMUSG00000073926,protein_coding -25565,Olfr652,ENSMUSG00000073927,protein_coding -25564,Olfr651,ENSMUSG00000073928,protein_coding -25535,Olfr646,ENSMUSG00000073931,protein_coding -25530,Olfr641,ENSMUSG00000073932,protein_coding -25520,Olfr633,ENSMUSG00000073937,protein_coding -25518,Olfr632,ENSMUSG00000073938,protein_coding -25505,Hbb-bt,ENSMUSG00000073940,protein_coding -25495,Olfr625-ps1,ENSMUSG00000073943,protein_coding -25489,Olfr619,ENSMUSG00000073944,protein_coding -25488,Olfr618,ENSMUSG00000073945,protein_coding -25486,Olfr617,ENSMUSG00000073946,protein_coding -25484,Olfr615,ENSMUSG00000073947,protein_coding -25478,Olfr608,ENSMUSG00000073948,protein_coding -25476,Olfr606,ENSMUSG00000073949,protein_coding -25468,Olfr599,ENSMUSG00000073950,protein_coding -25467,Olfr598,ENSMUSG00000073951,protein_coding -25466,Olfr597,ENSMUSG00000073952,protein_coding -25465,Olfr596,ENSMUSG00000073953,protein_coding -25459,Olfr594,ENSMUSG00000073954,protein_coding -25458,Olfr593,ENSMUSG00000073955,protein_coding -25457,Olfr592,ENSMUSG00000073956,protein_coding -25447,Olfr584,ENSMUSG00000073959,protein_coding -25446,Olfr583,ENSMUSG00000073960,protein_coding -25445,Olfr582,ENSMUSG00000073961,protein_coding -25439,Olfr576,ENSMUSG00000073962,protein_coding -25435,Olfr572,ENSMUSG00000073963,protein_coding -25433,Olfr570,ENSMUSG00000073964,protein_coding -25431,Olfr568,ENSMUSG00000073965,protein_coding -25423,Olfr561,ENSMUSG00000073966,protein_coding -25415,Olfr557,ENSMUSG00000073967,protein_coding -25413,Trim68,ENSMUSG00000073968,protein_coding -25411,Olfr556,ENSMUSG00000073969,protein_coding -25410,Olfr555,ENSMUSG00000073970,protein_coding -25408,Olfr554,ENSMUSG00000073971,protein_coding -25407,Olfr553,ENSMUSG00000073972,protein_coding -25406,Olfr552,ENSMUSG00000073973,protein_coding -25405,Olfr551,ENSMUSG00000073974,protein_coding -25404,Olfr550,ENSMUSG00000073975,protein_coding -25401,Olfr549,ENSMUSG00000073977,protein_coding -25400,Olfr548-ps1,ENSMUSG00000073978,protein_coding -25399,Olfr547,ENSMUSG00000073979,protein_coding -25389,Rhog,ENSMUSG00000073982,protein_coding -12976,Ggh,ENSMUSG00000073987,protein_coding -12974,Ttpa,ENSMUSG00000073988,protein_coding -12960,Cnbd1,ENSMUSG00000073991,protein_coding -25295,Olfr521,ENSMUSG00000073997,protein_coding -25293,Olfr520,ENSMUSG00000073998,protein_coding -37191,Klhl40,ENSMUSG00000074001,protein_coding -25248,Gucy2d,ENSMUSG00000074003,protein_coding -25243,B3gnt6,ENSMUSG00000074004,protein_coding -25241,Omp,ENSMUSG00000074006,protein_coding -17292,Gm7682,ENSMUSG00000074011,protein_coding -37105,Slc22a13,ENSMUSG00000074028,protein_coding -31853,Exoc8,ENSMUSG00000074030,protein_coding -31754,Mc1r,ENSMUSG00000074037,protein_coding -36925,Fbxw18,ENSMUSG00000074059,protein_coding -36921,Fbxw15,ENSMUSG00000074060,protein_coding -36917,Fbxw19,ENSMUSG00000074061,protein_coding -36914,Fbxw16,ENSMUSG00000074062,protein_coding -31611,Osgin1,ENSMUSG00000074063,protein_coding -31610,Mlycd,ENSMUSG00000074064,protein_coding -24655,Fam169b,ENSMUSG00000074071,protein_coding -14819,Snrnp40,ENSMUSG00000074088,protein_coding -24203,Svip,ENSMUSG00000074093,protein_coding -36765,Rbm15b,ENSMUSG00000074102,protein_coding -24153,Mrgprx2,ENSMUSG00000074109,protein_coding -24110,Mrgpra9,ENSMUSG00000074111,protein_coding -24083,Saa1,ENSMUSG00000074115,protein_coding -24004,Ntf5,ENSMUSG00000074121,protein_coding -36556,7420426K07Rik,ENSMUSG00000074123,protein_coding -31291,Cmtm2a,ENSMUSG00000074127,protein_coding -23973,Rpl13a,ENSMUSG00000074129,protein_coding -36443,1700057G04Rik,ENSMUSG00000074139,protein_coding -23943,Il4i1,ENSMUSG00000074141,protein_coding -31166,Nlrc5,ENSMUSG00000074151,protein_coding -23848,Klk5,ENSMUSG00000074155,protein_coding -31131,Ces1h,ENSMUSG00000074156,protein_coding -23789,Zfp976,ENSMUSG00000074158,protein_coding -23759,Zfp788,ENSMUSG00000074165,protein_coding -23753,AW146154,ENSMUSG00000074166,protein_coding -23652,Plekhf1,ENSMUSG00000074170,protein_coding -36175,Gm10639,ENSMUSG00000074179,protein_coding -12562,Znhit6,ENSMUSG00000074182,protein_coding -36171,Gsta1,ENSMUSG00000074183,protein_coding -30974,Zfp791,ENSMUSG00000074194,protein_coding -12552,Clca4b,ENSMUSG00000074195,protein_coding -23403,Krtdap,ENSMUSG00000074199,protein_coding -30941,G430095P16Rik,ENSMUSG00000074203,protein_coding -12449,Adh6b,ENSMUSG00000074206,protein_coding -12446,Adh1,ENSMUSG00000074207,protein_coding -23356,E130208F15Rik,ENSMUSG00000074210,protein_coding -23355,Sdhaf1,ENSMUSG00000074211,protein_coding -12429,Dnajb14,ENSMUSG00000074212,protein_coding -30907,Gm10643,ENSMUSG00000074215,protein_coding -30902,2210011C24Rik,ENSMUSG00000074217,protein_coding -23341,Cox7a1,ENSMUSG00000074218,protein_coding -23337,Zfp382,ENSMUSG00000074220,protein_coding -23329,Zfp568,ENSMUSG00000074221,protein_coding -23300,Spint2,ENSMUSG00000074227,protein_coding -12248,Ap1ar,ENSMUSG00000074238,protein_coding -30701,Cib3,ENSMUSG00000074240,protein_coding -30653,Dda1,ENSMUSG00000074247,protein_coding -12113,4930432M17Rik,ENSMUSG00000074248,protein_coding -23157,Cyp2a4,ENSMUSG00000074254,protein_coding -35760,Gramd2,ENSMUSG00000074259,protein_coding -23121,Erich4,ENSMUSG00000074261,protein_coding -11999,Amy1,ENSMUSG00000074264,protein_coding -11994,Amy2a5,ENSMUSG00000074268,protein_coding -35742,Rec114,ENSMUSG00000074269,protein_coding -23116,Ceacam1,ENSMUSG00000074272,protein_coding -23065,Phldb3,ENSMUSG00000074277,protein_coding -23046,Zfp94,ENSMUSG00000074282,protein_coding -23040,Zfp109,ENSMUSG00000074283,protein_coding -23008,Vmn1r168,ENSMUSG00000074291,protein_coding -30487,Smim31,ENSMUSG00000074300,protein_coding -30486,Gm10663,ENSMUSG00000074302,protein_coding -35664,Peak1,ENSMUSG00000074305,protein_coding -22942,Vmn1r139,ENSMUSG00000074311,protein_coding -22922,Vmn1r132,ENSMUSG00000074322,protein_coding -22826,Apoc4,ENSMUSG00000074336,protein_coding -11839,Ovgp1,ENSMUSG00000074340,protein_coding -11835,I830077J02Rik,ENSMUSG00000074342,protein_coding -11834,Tmigd3,ENSMUSG00000074344,protein_coding -35617,Tnfaip8l3,ENSMUSG00000074345,protein_coding -22767,Ccdc61,ENSMUSG00000074358,protein_coding -22746,Psg23,ENSMUSG00000074359,protein_coding -22656,C5ar2,ENSMUSG00000074361,protein_coding -22646,Ehd2,ENSMUSG00000074364,protein_coding -22640,Crxos,ENSMUSG00000074365,protein_coding -22634,Obox5,ENSMUSG00000074366,protein_coding -22615,Obox2,ENSMUSG00000074369,protein_coding -22548,Sult2a3,ENSMUSG00000074375,protein_coding -22545,Sult2a4,ENSMUSG00000074377,protein_coding -22530,Bsph1,ENSMUSG00000074378,protein_coding -30114,AI429214,ENSMUSG00000074384,protein_coding -35391,Foxr1,ENSMUSG00000074397,protein_coding -11539,Hist2h3b,ENSMUSG00000074403,protein_coding -22150,Zfp865,ENSMUSG00000074405,protein_coding -22141,Zfp628,ENSMUSG00000074406,protein_coding -22075,Gm14548,ENSMUSG00000074417,protein_coding -22072,Gm15448,ENSMUSG00000074419,protein_coding -11398,Gm10696,ENSMUSG00000074424,protein_coding -11334,Lce3e,ENSMUSG00000074433,protein_coding -29802,Defa28,ENSMUSG00000074434,protein_coding -11305,Smcp,ENSMUSG00000074435,protein_coding -29783,Defa29,ENSMUSG00000074437,protein_coding -29780,Defa5,ENSMUSG00000074439,protein_coding -29779,Defa3,ENSMUSG00000074440,protein_coding -29777,Gm15292,ENSMUSG00000074441,protein_coding -29771,Defa31,ENSMUSG00000074442,protein_coding -29769,Defa22,ENSMUSG00000074443,protein_coding -29768,Defa30,ENSMUSG00000074444,protein_coding -11285,Sprr2a3,ENSMUSG00000074445,protein_coding -29759,Defa23,ENSMUSG00000074446,protein_coding -29757,Defa21,ENSMUSG00000074447,protein_coding -35056,Pate9,ENSMUSG00000074448,protein_coding -29742,Gm15319,ENSMUSG00000074449,protein_coding -35031,Pate2,ENSMUSG00000074452,protein_coding -29753,Defb33,ENSMUSG00000074454,protein_coding -11245,S100a16,ENSMUSG00000074457,protein_coding -34834,Zfp872,ENSMUSG00000074472,protein_coding -29577,A230072I06Rik,ENSMUSG00000074473,protein_coding -34805,Spc24,ENSMUSG00000074476,protein_coding -11126,Mex3a,ENSMUSG00000074480,protein_coding -11117,Bglap,ENSMUSG00000074483,protein_coding -11115,Bglap2,ENSMUSG00000074486,protein_coding -11113,Bglap3,ENSMUSG00000074489,protein_coding -29397,Clec4g,ENSMUSG00000074491,protein_coding -29373,A430078G23Rik,ENSMUSG00000074497,protein_coding -34658,Zfp558,ENSMUSG00000074500,protein_coding -34655,Ubtfl1,ENSMUSG00000074502,protein_coding -34638,Fat3,ENSMUSG00000074505,protein_coding -11011,Arfip1,ENSMUSG00000074513,protein_coding -7145,Zfp971,ENSMUSG00000074519,protein_coding -7141,Gm14327,ENSMUSG00000074521,protein_coding -7114,Gm14296,ENSMUSG00000074527,protein_coding -7142,Zfp972,ENSMUSG00000074529,protein_coding -34444,Gm10720,ENSMUSG00000074564,protein_coding -7006,Gcnt7,ENSMUSG00000074569,protein_coding -7003,Cass4,ENSMUSG00000074570,protein_coding -6945,Kcng1,ENSMUSG00000074575,protein_coding -6943,Mocs3,ENSMUSG00000074576,protein_coding -6933,Ripor3,ENSMUSG00000074577,protein_coding -10775,Lekr1,ENSMUSG00000074579,protein_coding -6894,Arfgef2,ENSMUSG00000074582,protein_coding -10628,Ankub1,ENSMUSG00000074591,protein_coding -6834,Spint5,ENSMUSG00000074593,protein_coding -6829,Wfdc9,ENSMUSG00000074594,protein_coding -6821,Wfdc6a,ENSMUSG00000074595,protein_coding -6820,Spint3,ENSMUSG00000074596,protein_coding -10510,Mgst2,ENSMUSG00000074604,protein_coding -6769,Tox2,ENSMUSG00000074607,protein_coding -10390,1700034I23Rik,ENSMUSG00000074619,protein_coding -6725,Mafb,ENSMUSG00000074622,protein_coding -6724,Gm826,ENSMUSG00000074623,protein_coding -6705,Arhgap40,ENSMUSG00000074625,protein_coding -6664,Mroh8,ENSMUSG00000074627,protein_coding -6660,Tldc2,ENSMUSG00000074628,protein_coding -42775,Tmem267,ENSMUSG00000074634,protein_coding -10261,Sox2,ENSMUSG00000074637,protein_coding -29188,Rdh16f2,ENSMUSG00000074639,protein_coding -6612,Cpne1,ENSMUSG00000074643,protein_coding -6607,6430550D23Rik,ENSMUSG00000074646,protein_coding -6602,Fam83c,ENSMUSG00000074647,protein_coding -6599,BC029722,ENSMUSG00000074649,protein_coding -42679,Mcidas,ENSMUSG00000074651,protein_coding -6591,Myh7b,ENSMUSG00000074652,protein_coding -10185,Lrrc31,ENSMUSG00000074653,protein_coding -10159,Gm1527,ENSMUSG00000074655,protein_coding -6567,Eif2s2,ENSMUSG00000074656,protein_coding -29150,Kif5a,ENSMUSG00000074657,protein_coding -6536,Bpifb4,ENSMUSG00000074665,protein_coding -6516,Tspyl3,ENSMUSG00000074671,protein_coding -6508,Ttll9,ENSMUSG00000074673,protein_coding -6506,Foxs1,ENSMUSG00000074676,protein_coding -10023,Sirpb1c,ENSMUSG00000074677,protein_coding -6492,Defb25,ENSMUSG00000074678,protein_coding -6483,Defb28,ENSMUSG00000074679,protein_coding -6482,Defb26,ENSMUSG00000074680,protein_coding -6478,Defb23,ENSMUSG00000074681,protein_coding -6474,Zcchc3,ENSMUSG00000074682,protein_coding -28973,Il22,ENSMUSG00000074695,protein_coding -6466,Csnk2a1,ENSMUSG00000074698,protein_coding -6446,Rad21l,ENSMUSG00000074704,protein_coding -42777,Ccl28,ENSMUSG00000074715,protein_coding -55540,AC140325.1,ENSMUSG00000074720,protein_coding -6401,Zfp345,ENSMUSG00000074731,protein_coding -55439,Zfp950,ENSMUSG00000074733,protein_coding -28915,Taf7l2,ENSMUSG00000074734,protein_coding -6392,Gm21994,ENSMUSG00000074735,protein_coding -6386,Syndig1,ENSMUSG00000074736,protein_coding -15581,Fndc10,ENSMUSG00000074738,protein_coding -6357,Thbd,ENSMUSG00000074743,protein_coding -55404,Pdzd8,ENSMUSG00000074746,protein_coding -28878,Atxn7l3b,ENSMUSG00000074748,protein_coding -6332,Kiz,ENSMUSG00000074749,protein_coding -6306,Smim26,ENSMUSG00000074754,protein_coding -6254,Sel1l2,ENSMUSG00000074764,protein_coding -6246,Ism1,ENSMUSG00000074766,protein_coding -42337,Bhmt,ENSMUSG00000074768,protein_coding -6223,Ankef1,ENSMUSG00000074771,protein_coding -28609,Anapc15-ps,ENSMUSG00000074780,protein_coding -28603,Ube2n,ENSMUSG00000074781,protein_coding -28584,Plxnc1,ENSMUSG00000074785,protein_coding -6137,Hspa12b,ENSMUSG00000074793,protein_coding -42196,Arrdc3,ENSMUSG00000074794,protein_coding -6125,Slc4a11,ENSMUSG00000074796,protein_coding -6124,Itpa,ENSMUSG00000074797,protein_coding -28478,Gas2l3,ENSMUSG00000074802,protein_coding -55200,Hps6,ENSMUSG00000074811,protein_coding -6043,Spdye4c,ENSMUSG00000074812,protein_coding -18604,Papolb,ENSMUSG00000074817,protein_coding -55176,Pdzd7,ENSMUSG00000074818,protein_coding -41946,Rslcan18,ENSMUSG00000074824,protein_coding -6006,Itpripl1,ENSMUSG00000074825,protein_coding -41936,Gm10767,ENSMUSG00000074826,protein_coding -37286,2010315B03Rik,ENSMUSG00000074829,protein_coding -41907,2410141K09Rik,ENSMUSG00000074832,protein_coding -6389,Gm10770,ENSMUSG00000074837,protein_coding -41836,Gm10775,ENSMUSG00000074847,protein_coding -41824,Spata31d1c,ENSMUSG00000074849,protein_coding -55111,Hpse2,ENSMUSG00000074852,protein_coding -28269,BC025920,ENSMUSG00000074862,protein_coding -41747,Platr25,ENSMUSG00000074863,protein_coding -41739,Zfp934,ENSMUSG00000074865,protein_coding -41721,Zfp808,ENSMUSG00000074867,protein_coding -41702,Cts3,ENSMUSG00000074870,protein_coding -41700,Ctsm,ENSMUSG00000074871,protein_coding -5968,Ctxn2,ENSMUSG00000074872,protein_coding -41678,Ctla2b,ENSMUSG00000074874,protein_coding -5927,Mageb3,ENSMUSG00000074881,protein_coding -55038,Cyp2c68,ENSMUSG00000074882,protein_coding -5913,Serf2,ENSMUSG00000074884,protein_coding -41540,Grk6,ENSMUSG00000074886,protein_coding -5896,Lcmt2,ENSMUSG00000074890,protein_coding -48726,B3galt5,ENSMUSG00000074892,protein_coding -41516,Eif4e1b,ENSMUSG00000074895,protein_coding -54939,Ifit3,ENSMUSG00000074896,protein_coding -5867,Sptbn5,ENSMUSG00000074899,protein_coding -54863,Ranbp6,ENSMUSG00000074909,protein_coding -54812,Gm815,ENSMUSG00000074913,protein_coding -5820,Chst14,ENSMUSG00000074916,protein_coding -5812,Inafm2,ENSMUSG00000074918,protein_coding -54778,Fam122a,ENSMUSG00000074922,protein_coding -5809,Pak6,ENSMUSG00000074923,protein_coding -54763,Ptar1,ENSMUSG00000074925,protein_coding -48527,Krtap14,ENSMUSG00000074928,protein_coding -5741,Grem1,ENSMUSG00000074934,protein_coding -5726,Chrm5,ENSMUSG00000074939,protein_coding -5710,Olfr1314,ENSMUSG00000074945,protein_coding -5709,Olfr1313,ENSMUSG00000074946,protein_coding -5708,Olfr1312,ENSMUSG00000074947,protein_coding -5703,Olfr1308,ENSMUSG00000074952,protein_coding -5699,Olfr1305,ENSMUSG00000074955,protein_coding -5665,Olfr1277,ENSMUSG00000074965,protein_coding -5662,Olfr1275,ENSMUSG00000074966,protein_coding -5655,Ano3,ENSMUSG00000074968,protein_coding -5651,Fibin,ENSMUSG00000074971,protein_coding -48249,Gabrr3,ENSMUSG00000074991,protein_coding -5575,Qser1,ENSMUSG00000074994,protein_coding -48242,Olfr201,ENSMUSG00000074995,protein_coding -48240,Olfr199,ENSMUSG00000074996,protein_coding -5573,Pin1rt1,ENSMUSG00000074997,protein_coding -27811,Nrbf2,ENSMUSG00000075000,protein_coding -48208,Gm813,ENSMUSG00000075002,protein_coding -5555,Gm10799,ENSMUSG00000075006,protein_coding -5518,Fjx1,ENSMUSG00000075012,protein_coding -5495,Gm10800,ENSMUSG00000075014,protein_coding -5494,Gm10801,ENSMUSG00000075015,protein_coding -5456,Accsl,ENSMUSG00000075023,protein_coding -5442,Prdm11,ENSMUSG00000075028,protein_coding -40868,Hist1h2bb,ENSMUSG00000075031,protein_coding -48156,Nxpe3,ENSMUSG00000075033,protein_coding -5405,Zfp408,ENSMUSG00000075040,protein_coding -54357,Slc22a29,ENSMUSG00000075044,protein_coding -27645,Gm4981,ENSMUSG00000075045,protein_coding -27644,Duxf3,ENSMUSG00000075046,protein_coding -40594,Yae1d1,ENSMUSG00000075054,protein_coding -5359,Olfr1272,ENSMUSG00000075061,protein_coding -5358,Olfr1271,ENSMUSG00000075062,protein_coding -5357,Olfr142,ENSMUSG00000075063,protein_coding -5355,Olfr1506,ENSMUSG00000075064,protein_coding -5352,Olfr1270,ENSMUSG00000075065,protein_coding -5351,Olfr32,ENSMUSG00000075066,protein_coding -5346,Olfr140,ENSMUSG00000075068,protein_coding -5344,Olfr1264,ENSMUSG00000075069,protein_coding -5336,Olfr48,ENSMUSG00000075072,protein_coding -5335,Olfr1256,ENSMUSG00000075073,protein_coding -5333,Olfr1254,ENSMUSG00000075074,protein_coding -5332,Olfr1253,ENSMUSG00000075075,protein_coding -5329,Olfr1250,ENSMUSG00000075078,protein_coding -5328,Olfr1249,ENSMUSG00000075079,protein_coding -5326,Olfr1247,ENSMUSG00000075081,protein_coding -5322,Olfr1243,ENSMUSG00000075084,protein_coding -5321,Olfr1242,ENSMUSG00000075085,protein_coding -5320,Olfr1241,ENSMUSG00000075086,protein_coding -5318,Olfr1239,ENSMUSG00000075088,protein_coding -5314,Olfr1234,ENSMUSG00000075090,protein_coding -5313,Olfr1233,ENSMUSG00000075091,protein_coding -5312,Olfr1232,ENSMUSG00000075092,protein_coding -5311,Olfr1231,ENSMUSG00000075093,protein_coding -5310,Olfr1230,ENSMUSG00000075094,protein_coding -5309,Olfr1229,ENSMUSG00000075095,protein_coding -5302,Olfr1226,ENSMUSG00000075097,protein_coding -5299,Olfr1224-ps1,ENSMUSG00000075099,protein_coding -5298,Olfr1223,ENSMUSG00000075100,protein_coding -5297,Olfr1222,ENSMUSG00000075101,protein_coding -5296,Olfr1221,ENSMUSG00000075102,protein_coding -5294,Olfr1219,ENSMUSG00000075104,protein_coding -5293,Olfr1218,ENSMUSG00000075105,protein_coding -5291,Olfr1216,ENSMUSG00000075107,protein_coding -5289,Olfr1214,ENSMUSG00000075110,protein_coding -5288,Gm13762,ENSMUSG00000075111,protein_coding -5286,Olfr1211,ENSMUSG00000075112,protein_coding -5285,Olfr1209,ENSMUSG00000075113,protein_coding -5284,Olfr1208,ENSMUSG00000075114,protein_coding -5278,Olfr1200,ENSMUSG00000075115,protein_coding -5276,Olfr1198,ENSMUSG00000075117,protein_coding -5275,Olfr1197,ENSMUSG00000075119,protein_coding -5274,Olfr1196,ENSMUSG00000075120,protein_coding -5273,Olfr1195,ENSMUSG00000075121,protein_coding -47977,Cd80,ENSMUSG00000075122,protein_coding -5258,Olfr1181,ENSMUSG00000075125,protein_coding -5256,Olfr1179,ENSMUSG00000075127,protein_coding -5254,Olfr1177-ps,ENSMUSG00000075128,protein_coding -5250,Olfr1173,ENSMUSG00000075132,protein_coding -5247,Olfr1170,ENSMUSG00000075133,protein_coding -5241,Olfr1164,ENSMUSG00000075136,protein_coding -5240,Olfr1163,ENSMUSG00000075137,protein_coding -5239,Olfr1162,ENSMUSG00000075139,protein_coding -5238,Olfr73,ENSMUSG00000075140,protein_coding -5236,Olfr1160,ENSMUSG00000075141,protein_coding -5233,Olfr74,ENSMUSG00000075142,protein_coding -5232,Olfr1157,ENSMUSG00000075143,protein_coding -5231,Olfr1156,ENSMUSG00000075144,protein_coding -5230,Olfr1155,ENSMUSG00000075145,protein_coding -5229,Olfr1154,ENSMUSG00000075146,protein_coding -5214,Olfr1141,ENSMUSG00000075148,protein_coding -5211,Olfr1138,ENSMUSG00000075149,protein_coding -5210,Olfr1137,ENSMUSG00000075150,protein_coding -5209,Olfr1136,ENSMUSG00000075151,protein_coding -5208,Olfr1135,ENSMUSG00000075153,protein_coding -5206,Olfr1133,ENSMUSG00000075155,protein_coding -5197,Olfr1128,ENSMUSG00000075156,protein_coding -5166,Olfr1111,ENSMUSG00000075158,protein_coding -5165,Olfr1110,ENSMUSG00000075159,protein_coding -5163,Olfr259,ENSMUSG00000075160,protein_coding -5162,Olfr1109,ENSMUSG00000075161,protein_coding -5160,Olfr1107,ENSMUSG00000075163,protein_coding -5159,Olfr1106,ENSMUSG00000075164,protein_coding -5158,Olfr1105,ENSMUSG00000075165,protein_coding -5157,Olfr1104,ENSMUSG00000075166,protein_coding -5154,Olfr1101,ENSMUSG00000075167,protein_coding -5152,Olfr1099,ENSMUSG00000075168,protein_coding -5150,Olfr1098,ENSMUSG00000075169,protein_coding -5149,Olfr1097,ENSMUSG00000075170,protein_coding -5147,Olfr1095,ENSMUSG00000075171,protein_coding -5141,Olfr1090,ENSMUSG00000075172,protein_coding -5138,Olfr1087,ENSMUSG00000075174,protein_coding -5137,Olfr1086,ENSMUSG00000075175,protein_coding -5136,Olfr1085,ENSMUSG00000075176,protein_coding -5130,Olfr1079,ENSMUSG00000075179,protein_coding -5118,Olfr1066,ENSMUSG00000075181,protein_coding -5113,Olfr1061,ENSMUSG00000075185,protein_coding -5109,Olfr1058,ENSMUSG00000075186,protein_coding -5108,Olfr1057,ENSMUSG00000075187,protein_coding -5107,Olfr1056,ENSMUSG00000075188,protein_coding -5106,Olfr1055,ENSMUSG00000075189,protein_coding -5104,Olfr1054,ENSMUSG00000075190,protein_coding -5102,Olfr1053,ENSMUSG00000075192,protein_coding -5100,Olfr1051,ENSMUSG00000075193,protein_coding -5098,Olfr1049,ENSMUSG00000075194,protein_coding -5097,Olfr1048,ENSMUSG00000075195,protein_coding -5096,Olfr1047,ENSMUSG00000075196,protein_coding -5095,Olfr1046,ENSMUSG00000075197,protein_coding -5094,Olfr1045,ENSMUSG00000075198,protein_coding -5093,Olfr52,ENSMUSG00000075199,protein_coding -5092,Olfr1044,ENSMUSG00000075200,protein_coding -5091,Olfr1043,ENSMUSG00000075201,protein_coding -5090,Olfr1042,ENSMUSG00000075202,protein_coding -5088,Olfr1040,ENSMUSG00000075203,protein_coding -5087,Olfr1039,ENSMUSG00000075204,protein_coding -5084,Olfr1037,ENSMUSG00000075205,protein_coding -5071,Olfr1024,ENSMUSG00000075206,protein_coding -5064,Olfr1019,ENSMUSG00000075208,protein_coding -5060,Olfr1016,ENSMUSG00000075209,protein_coding -5055,Olfr1012,ENSMUSG00000075210,protein_coding -5050,Olfr1006,ENSMUSG00000075211,protein_coding -5048,Olfr154,ENSMUSG00000075212,protein_coding -5045,Olfr1002,ENSMUSG00000075214,protein_coding -5043,Olfr1000,ENSMUSG00000075215,protein_coding -5038,4833423E24Rik,ENSMUSG00000075217,protein_coding -5037,Olfr995,ENSMUSG00000075218,protein_coding -5036,Olfr994,ENSMUSG00000075219,protein_coding -5034,Olfr993,ENSMUSG00000075220,protein_coding -5033,Olfr992,ENSMUSG00000075221,protein_coding -5029,Olfr988,ENSMUSG00000075222,protein_coding -5028,Olfr987,ENSMUSG00000075223,protein_coding -5024,Lrrc55,ENSMUSG00000075224,protein_coding -27379,Ccdc162,ENSMUSG00000075225,protein_coding -54259,Znhit2,ENSMUSG00000075227,protein_coding -47891,Ccdc58,ENSMUSG00000075229,protein_coding -27349,Amd1,ENSMUSG00000075232,protein_coding -4965,Fsip2,ENSMUSG00000075249,protein_coding -47848,Heg1,ENSMUSG00000075254,protein_coding -4929,Cerkl,ENSMUSG00000075256,protein_coding -27190,Cenpw,ENSMUSG00000075266,protein_coding -4898,Pjvk,ENSMUSG00000075267,protein_coding -47805,Bex6,ENSMUSG00000075269,protein_coding -4891,Pde11a,ENSMUSG00000075270,protein_coding -4890,Ttc30a1,ENSMUSG00000075271,protein_coding -4889,Ttc30a2,ENSMUSG00000075272,protein_coding -4888,Ttc30b,ENSMUSG00000075273,protein_coding -4822,Wipf1,ENSMUSG00000075284,protein_coding -54118,Carns1,ENSMUSG00000075289,protein_coding -54094,Aldh3b2,ENSMUSG00000075296,protein_coding -27041,H60b,ENSMUSG00000075297,protein_coding -4758,Erich2,ENSMUSG00000075302,protein_coding -4755,Sp5,ENSMUSG00000075304,protein_coding -4738,Klhl41,ENSMUSG00000075307,protein_coding -4690,Scn9a,ENSMUSG00000075316,protein_coding -4677,Scn2a,ENSMUSG00000075318,protein_coding -4662,Fign,ENSMUSG00000075324,protein_coding -26758,Zbtb2,ENSMUSG00000075327,protein_coding -4563,Rprm,ENSMUSG00000075334,protein_coding -47409,Igll1,ENSMUSG00000075370,protein_coding -4388,Rc3h2,ENSMUSG00000075376,protein_coding -4378,Olfr362,ENSMUSG00000075377,protein_coding -4377,Olfr361,ENSMUSG00000075378,protein_coding -4372,Olfr358,ENSMUSG00000075379,protein_coding -4369,Olfr355,ENSMUSG00000075380,protein_coding -4366,Olfr353,ENSMUSG00000075382,protein_coding -4364,Olfr351,ENSMUSG00000075383,protein_coding -4361,Olfr3,ENSMUSG00000075384,protein_coding -4349,Olfr341,ENSMUSG00000075387,protein_coding -47110,Hoxc4,ENSMUSG00000075394,protein_coding -47369,A630010A05Rik,ENSMUSG00000075395,protein_coding -47045,Krt76,ENSMUSG00000075402,protein_coding -47001,6030408B16Rik,ENSMUSG00000075408,protein_coding -40095,Prcd,ENSMUSG00000075410,protein_coding -4178,Fnbp1,ENSMUSG00000075415,protein_coding -4150,Dolk,ENSMUSG00000075419,protein_coding -40054,Smim6,ENSMUSG00000075420,protein_coding -4121,Gm13547,ENSMUSG00000075425,protein_coding -46843,Olfr288,ENSMUSG00000075427,protein_coding -4015,Dnlz,ENSMUSG00000075467,protein_coding -46733,Alg10b,ENSMUSG00000075470,protein_coding -34283,Slitrk1,ENSMUSG00000075478,protein_coding -34204,Commd6,ENSMUSG00000075486,protein_coding -34015,Kbtbd6,ENSMUSG00000075502,protein_coding -39764,Fam187a,ENSMUSG00000075510,protein_coding -3777,Malrd1,ENSMUSG00000075520,protein_coding -46511,4930407I10Rik,ENSMUSG00000075524,protein_coding -39682,Aarsd1,ENSMUSG00000075528,protein_coding -18763,Urad,ENSMUSG00000075543,protein_coding -18711,Cyp3a41a,ENSMUSG00000075551,protein_coding -18707,Cyp3a41b,ENSMUSG00000075552,protein_coding -39558,Krtap4-6,ENSMUSG00000075566,protein_coding -39545,Krtap1-4,ENSMUSG00000075567,protein_coding -18659,Rsph10b,ENSMUSG00000075569,protein_coding -39529,Krt26,ENSMUSG00000075570,protein_coding -33695,Defb30,ENSMUSG00000075571,protein_coding -33693,Defb43,ENSMUSG00000075572,protein_coding -33692,Defb47,ENSMUSG00000075573,protein_coding -33691,Defb48,ENSMUSG00000075574,protein_coding -39392,Hoxb2,ENSMUSG00000075588,protein_coding -46226,Nrbp2,ENSMUSG00000075590,protein_coding -33504,Nynrin,ENSMUSG00000075592,protein_coding -18508,Gal3st4,ENSMUSG00000075593,protein_coding -39352,Zfp652,ENSMUSG00000075595,protein_coding -18487,Smok3c,ENSMUSG00000075598,protein_coding -18485,Smok3a,ENSMUSG00000075599,protein_coding -46204,Zc3h3,ENSMUSG00000075600,protein_coding -46170,Ly6a,ENSMUSG00000075602,protein_coding -46160,Cyp11b1,ENSMUSG00000075604,protein_coding -46152,Slurp2,ENSMUSG00000075605,protein_coding -39315,Tmem92,ENSMUSG00000075610,protein_coding -10650,Selenot,ENSMUSG00000075700,protein_coding -24606,Selenos,ENSMUSG00000075701,protein_coding -37316,Selenom,ENSMUSG00000075702,protein_coding -16136,Selenoi,ENSMUSG00000075703,protein_coding -47496,Txnrd2,ENSMUSG00000075704,protein_coding -49316,Msrb1,ENSMUSG00000075705,protein_coding -28148,Gpx4,ENSMUSG00000075706,protein_coding -44853,Dio3,ENSMUSG00000075707,protein_coding -40998,Hus1b,ENSMUSG00000076430,protein_coding -40975,Sox4,ENSMUSG00000076431,protein_coding -43166,Ywhaq,ENSMUSG00000076432,protein_coding -40156,Cep295nl,ENSMUSG00000076433,protein_coding -6835,Wfdc3,ENSMUSG00000076434,protein_coding -39304,Acsf2,ENSMUSG00000076435,protein_coding -14621,Oxct2a,ENSMUSG00000076436,protein_coding -4997,Selenoh,ENSMUSG00000076437,protein_coding -14610,Oxct2b,ENSMUSG00000076438,protein_coding -50050,Mog,ENSMUSG00000076439,protein_coding -4185,Ass1,ENSMUSG00000076441,protein_coding -49741,Rab11b,ENSMUSG00000077450,protein_coding -25448,Olfr585,ENSMUSG00000078080,protein_coding -34665,Olfr828,ENSMUSG00000078116,protein_coding -25737,Olfr483,ENSMUSG00000078118,protein_coding -32545,Fam170b,ENSMUSG00000078127,protein_coding -6554,Actl10,ENSMUSG00000078129,protein_coding -39555,Gm11555,ENSMUSG00000078130,protein_coding -39546,Krtap1-3,ENSMUSG00000078131,protein_coding -39542,Gm11939,ENSMUSG00000078132,protein_coding -39497,Gm12355,ENSMUSG00000078134,protein_coding -5810,Ankrd63,ENSMUSG00000078137,protein_coding -40673,AK157302,ENSMUSG00000078139,protein_coding -31117,Capns2,ENSMUSG00000078144,protein_coding -37957,Psme2b,ENSMUSG00000078153,protein_coding -37951,Gm12184,ENSMUSG00000078154,protein_coding -37723,4931440F15Rik,ENSMUSG00000078157,protein_coding -12695,Erich3,ENSMUSG00000078161,protein_coding -11190,Lenep,ENSMUSG00000078173,protein_coding -19142,Rnf148,ENSMUSG00000078179,protein_coding -3060,Rbm8a2,ENSMUSG00000078184,protein_coding -3052,Chml,ENSMUSG00000078185,protein_coding -2570,Gm2000,ENSMUSG00000078193,protein_coding -6252,Gm17374,ENSMUSG00000078197,protein_coding -4380,Olfr364-ps1,ENSMUSG00000078198,protein_coding -3947,Tmem203,ENSMUSG00000078201,protein_coding -3932,Nrarp,ENSMUSG00000078202,protein_coding -7420,Fthl17d,ENSMUSG00000078206,protein_coding -7416,Fthl17b,ENSMUSG00000078208,protein_coding -7292,Btbd35f4,ENSMUSG00000078213,protein_coding -7271,Btbd35f3,ENSMUSG00000078218,protein_coding -15130,Klhdc7a,ENSMUSG00000078234,protein_coding -15084,Fam43b,ENSMUSG00000078235,protein_coding -53064,Gm3550,ENSMUSG00000078240,protein_coding -40259,Hmga1b,ENSMUSG00000078249,protein_coding -39593,Krtap17-1,ENSMUSG00000078252,protein_coding -39591,Krtap16-1,ENSMUSG00000078253,protein_coding -39590,Krtap29-1,ENSMUSG00000078254,protein_coding -39587,Krtap9-5,ENSMUSG00000078255,protein_coding -39584,Gm11565,ENSMUSG00000078256,protein_coding -39574,2300003K06Rik,ENSMUSG00000078257,protein_coding -39573,Gm11564,ENSMUSG00000078258,protein_coding -39571,Gm11554,ENSMUSG00000078259,protein_coding -39570,Gm11569,ENSMUSG00000078260,protein_coding -39569,Gm11596,ENSMUSG00000078261,protein_coding -39568,Krtap4-9,ENSMUSG00000078262,protein_coding -39564,Gm14180,ENSMUSG00000078269,protein_coding -39561,Gm14190,ENSMUSG00000078276,protein_coding -21669,Tas2r122,ENSMUSG00000078280,protein_coding -42445,Foxd1,ENSMUSG00000078302,protein_coding -35593,AI593442,ENSMUSG00000078307,protein_coding -8506,Fam47c,ENSMUSG00000078315,protein_coding -8323,F8a,ENSMUSG00000078317,protein_coding -7896,Tex13c1,ENSMUSG00000078320,protein_coding -7731,Btbd35f14,ENSMUSG00000078324,protein_coding -7516,H2al1n,ENSMUSG00000078346,protein_coding -26885,Sf3b5,ENSMUSG00000078348,protein_coding -15530,Smim1,ENSMUSG00000078350,protein_coding -13909,Ifna2,ENSMUSG00000078354,protein_coding -13908,Ifna16,ENSMUSG00000078355,protein_coding -12784,Mos,ENSMUSG00000078365,protein_coding -5145,Olfr141,ENSMUSG00000078420,protein_coding -29265,Sarnp,ENSMUSG00000078427,protein_coding -29128,Ctdsp2,ENSMUSG00000078429,protein_coding -28299,AU041133,ENSMUSG00000078435,protein_coding -28252,Smim24,ENSMUSG00000078439,protein_coding -28248,Dohh,ENSMUSG00000078440,protein_coding -28084,Ccdc105,ENSMUSG00000078442,protein_coding -27377,Ppil6,ENSMUSG00000078451,protein_coding -27049,Raet1d,ENSMUSG00000078452,protein_coding -26955,Abracl,ENSMUSG00000078453,protein_coding -26852,Gm10944,ENSMUSG00000078481,protein_coding -15625,Plekhn1,ENSMUSG00000078485,protein_coding -15624,Perm1,ENSMUSG00000078486,protein_coding -15592,Ankrd65,ENSMUSG00000078487,protein_coding -26781,Gm10945,ENSMUSG00000078488,protein_coding -15569,Cfap74,ENSMUSG00000078490,protein_coding -15488,Gm13090,ENSMUSG00000078491,protein_coding -15378,Zfp984,ENSMUSG00000078495,protein_coding -15366,Zfp982,ENSMUSG00000078496,protein_coding -15362,Zfp978,ENSMUSG00000078497,protein_coding -15361,Zfp988,ENSMUSG00000078498,protein_coding -15315,Zfp986,ENSMUSG00000078500,protein_coding -15300,Gm13212,ENSMUSG00000078502,protein_coding -15295,Zfp990,ENSMUSG00000078503,protein_coding -15281,Gm438,ENSMUSG00000078504,protein_coding -15278,Gm436,ENSMUSG00000078505,protein_coding -15275,Gm13124,ENSMUSG00000078506,protein_coding -15270,Aadacl3,ENSMUSG00000078507,protein_coding -15262,Gm13128,ENSMUSG00000078508,protein_coding -15250,Pramef17,ENSMUSG00000078509,protein_coding -15249,Gm13101,ENSMUSG00000078510,protein_coding -15248,Pramef25,ENSMUSG00000078511,protein_coding -15245,Pramef6,ENSMUSG00000078512,protein_coding -15235,Gm13088,ENSMUSG00000078513,protein_coding -15188,Ddi2,ENSMUSG00000078515,protein_coding -15117,Emc1,ENSMUSG00000078517,protein_coding -15098,Gm13030,ENSMUSG00000078518,protein_coding -15059,Cela3a,ENSMUSG00000078520,protein_coding -14968,Aunip,ENSMUSG00000078521,protein_coding -14820,Nkain1,ENSMUSG00000078532,protein_coding -42793,B020031M17Rik,ENSMUSG00000078537,protein_coding -49163,Zfp995,ENSMUSG00000078546,protein_coding -14791,Dcdc2b,ENSMUSG00000078552,protein_coding -14782,Fam229a,ENSMUSG00000078554,protein_coding -26462,Bnip3,ENSMUSG00000078566,protein_coding -14663,1110065P20Rik,ENSMUSG00000078570,protein_coding -40206,Ndufaf8,ENSMUSG00000078572,protein_coding -14586,Gm12887,ENSMUSG00000078575,protein_coding -14583,Gm12886,ENSMUSG00000078576,protein_coding -14571,Tmco2,ENSMUSG00000078577,protein_coding -12389,Ube2d3,ENSMUSG00000078578,protein_coding -26208,E430018J23Rik,ENSMUSG00000078580,protein_coding -14517,AU022252,ENSMUSG00000078584,protein_coding -14457,Ccdc24,ENSMUSG00000078588,protein_coding -12154,Gm10959,ENSMUSG00000078590,protein_coding -26086,Hs3st4,ENSMUSG00000078591,protein_coding -14384,1700042G07Rik,ENSMUSG00000078593,protein_coding -14351,Cyp4a12b,ENSMUSG00000078597,protein_coding -14325,Skint5,ENSMUSG00000078598,protein_coding -14304,Skint8,ENSMUSG00000078599,protein_coding -11886,Gm10961,ENSMUSG00000078604,protein_coding -39926,E030025P04Rik,ENSMUSG00000078605,protein_coding -25653,Gm4070,ENSMUSG00000078606,protein_coding -39885,1810010H24Rik,ENSMUSG00000078607,protein_coding -25625,Gm5901,ENSMUSG00000078611,protein_coding -14167,Fyb2,ENSMUSG00000078612,protein_coding -25556,Trim30c,ENSMUSG00000078616,protein_coding -39859,Smarcd2,ENSMUSG00000078619,protein_coding -11757,Gm10964,ENSMUSG00000078620,protein_coding -25509,Hbb-bh2,ENSMUSG00000078621,protein_coding -39854,Ccdc47,ENSMUSG00000078622,protein_coding -25483,Olfr613,ENSMUSG00000078624,protein_coding -14137,Gm12789,ENSMUSG00000078625,protein_coding -14136,Gm12790,ENSMUSG00000078626,protein_coding -39831,March10,ENSMUSG00000078627,protein_coding -25371,Tomt,ENSMUSG00000078630,protein_coding -39788,Lrrc37a,ENSMUSG00000078632,protein_coding -14047,Gm12695,ENSMUSG00000078639,protein_coding -39747,Gm11627,ENSMUSG00000078640,protein_coding -39679,G6pc,ENSMUSG00000078650,protein_coding -39673,Aoc2,ENSMUSG00000078651,protein_coding -39672,Psme3,ENSMUSG00000078652,protein_coding -39669,Cntd1,ENSMUSG00000078653,protein_coding -39665,Vps25,ENSMUSG00000078656,protein_coding -11341,Crnn,ENSMUSG00000078657,protein_coding -11279,Sprr2a1,ENSMUSG00000078664,protein_coding -39566,Gm11595,ENSMUSG00000078668,protein_coding -24752,Fam174b,ENSMUSG00000078670,protein_coding -24735,Chd2,ENSMUSG00000078671,protein_coding -13659,Mup20,ENSMUSG00000078672,protein_coding -13649,Mup19,ENSMUSG00000078673,protein_coding -13646,Mup18,ENSMUSG00000078674,protein_coding -13642,Mup16,ENSMUSG00000078675,protein_coding -39510,Casc3,ENSMUSG00000078676,protein_coding -24614,Gm10974,ENSMUSG00000078677,protein_coding -13627,Mup10,ENSMUSG00000078680,protein_coding -24598,Tm2d3,ENSMUSG00000078681,protein_coding -13623,Mup1,ENSMUSG00000078683,protein_coding -13620,Mup9,ENSMUSG00000078686,protein_coding -13616,Mup8,ENSMUSG00000078687,protein_coding -13614,Mup2,ENSMUSG00000078688,protein_coding -13611,Mup6,ENSMUSG00000078689,protein_coding -39445,Cisd3,ENSMUSG00000078695,protein_coding -24122,Mrgpra3,ENSMUSG00000078698,protein_coding -13366,Tomm5,ENSMUSG00000078713,protein_coding -13322,Tmem8b,ENSMUSG00000078716,protein_coding -13313,Msmp,ENSMUSG00000078719,protein_coding -13281,Fam205a1,ENSMUSG00000078721,protein_coding -13280,Gm12394,ENSMUSG00000078722,protein_coding -13274,Il11ra2,ENSMUSG00000078735,protein_coding -10568,Gm10985,ENSMUSG00000078742,protein_coding -13241,Fam205a4,ENSMUSG00000078746,protein_coding -13240,Gm20878,ENSMUSG00000078747,protein_coding -23558,Scgb1b30,ENSMUSG00000078752,protein_coding -23539,Scgb1b24,ENSMUSG00000078753,protein_coding -23437,Scgb2b3,ENSMUSG00000078754,protein_coding -23481,Scgb1b29,ENSMUSG00000078757,protein_coding -23452,Scgb1b7,ENSMUSG00000078759,protein_coding -23391,Haus5,ENSMUSG00000078762,protein_coding -39068,Slfn1,ENSMUSG00000078763,protein_coding -23379,U2af1l4,ENSMUSG00000078765,protein_coding -23333,Zfp566,ENSMUSG00000078768,protein_coding -38986,Evi2a,ENSMUSG00000078771,protein_coding -12968,Gm12353,ENSMUSG00000078772,protein_coding -12872,Rad54b,ENSMUSG00000078773,protein_coding -23228,9530053A07Rik,ENSMUSG00000078776,protein_coding -23216,Zfp59,ENSMUSG00000078779,protein_coding -10026,Gm5150,ENSMUSG00000078780,protein_coding -10019,Gm9733,ENSMUSG00000078783,protein_coding -10008,1810022K09Rik,ENSMUSG00000078784,protein_coding -23185,BC024978,ENSMUSG00000078786,protein_coding -23178,Cyp2t4,ENSMUSG00000078787,protein_coding -38833,Dph1,ENSMUSG00000078789,protein_coding -22720,Gm5155,ENSMUSG00000078793,protein_coding -22689,Dact3,ENSMUSG00000078794,protein_coding -22679,Ceacam15,ENSMUSG00000078795,protein_coding -22649,Zfp541,ENSMUSG00000078796,protein_coding -22541,Sult2a1,ENSMUSG00000078798,protein_coding -22534,Sult2a5,ENSMUSG00000078799,protein_coding -22533,Bsph2,ENSMUSG00000078800,protein_coding -22173,Vmn1r58,ENSMUSG00000078808,protein_coding -22100,Gp6,ENSMUSG00000078810,protein_coding -38599,Eif5a,ENSMUSG00000078812,protein_coding -22062,Leng1,ENSMUSG00000078813,protein_coding -22048,Cacng6,ENSMUSG00000078815,protein_coding -22040,Prkcg,ENSMUSG00000078816,protein_coding -22029,Nlrp12,ENSMUSG00000078817,protein_coding -31903,Gm10999,ENSMUSG00000078840,protein_coding -38267,Hist3h2a,ENSMUSG00000078851,protein_coding -38212,Igtp,ENSMUSG00000078853,protein_coding -7147,Zfp931,ENSMUSG00000078861,protein_coding -7143,Gm14326,ENSMUSG00000078862,protein_coding -7137,Gm14322,ENSMUSG00000078864,protein_coding -7135,Gm14406,ENSMUSG00000078865,protein_coding -7132,Zfp970,ENSMUSG00000078866,protein_coding -7129,Gm14418,ENSMUSG00000078867,protein_coding -7128,Gm14412,ENSMUSG00000078868,protein_coding -7127,Gm14409,ENSMUSG00000078869,protein_coding -7125,Gm14410,ENSMUSG00000078870,protein_coding -7119,Gm14401,ENSMUSG00000078872,protein_coding -7116,Gm14419,ENSMUSG00000078875,protein_coding -7112,Gm14408,ENSMUSG00000078876,protein_coding -7111,Gm14295,ENSMUSG00000078877,protein_coding -7108,Gm14305,ENSMUSG00000078878,protein_coding -7107,Zfp973,ENSMUSG00000078879,protein_coding -7106,Gm14308,ENSMUSG00000078880,protein_coding -7103,Gm14434,ENSMUSG00000078881,protein_coding -7092,Gm2026,ENSMUSG00000078886,protein_coding -7089,Gm6710,ENSMUSG00000078887,protein_coding -7086,Gm14288,ENSMUSG00000078889,protein_coding -7097,2210418O10Rik,ENSMUSG00000078894,protein_coding -7100,Gm11009,ENSMUSG00000078895,protein_coding -7080,Zfp965,ENSMUSG00000078896,protein_coding -7079,Gm4724,ENSMUSG00000078897,protein_coding -7077,Zfp968,ENSMUSG00000078898,protein_coding -7076,Gm4631,ENSMUSG00000078899,protein_coding -7074,Gm14440,ENSMUSG00000078901,protein_coding -7070,Gm14443,ENSMUSG00000078902,protein_coding -7071,Gm14391,ENSMUSG00000078903,protein_coding -7064,Gm14393,ENSMUSG00000078905,protein_coding -7061,Gm14444,ENSMUSG00000078906,protein_coding -46911,Fam186b,ENSMUSG00000078907,protein_coding -31553,Mon1b,ENSMUSG00000078908,protein_coding -6965,Gm11011,ENSMUSG00000078912,protein_coding -6942,Dpm1,ENSMUSG00000078919,protein_coding -37965,Ifi47,ENSMUSG00000078920,protein_coding -37963,Tgtp2,ENSMUSG00000078921,protein_coding -37960,Tgtp1,ENSMUSG00000078922,protein_coding -6921,Ube2v1,ENSMUSG00000078923,protein_coding -37923,Gm12169,ENSMUSG00000078924,protein_coding -42680,Cdc20b,ENSMUSG00000078926,protein_coding -31434,Pdf,ENSMUSG00000078931,protein_coding -46749,CN725425,ENSMUSG00000078932,protein_coding -35044,Pate13,ENSMUSG00000078934,protein_coding -6856,1700025C18Rik,ENSMUSG00000078935,protein_coding -46717,Cpt1b,ENSMUSG00000078937,protein_coding -46715,Syce3,ENSMUSG00000078938,protein_coding -6831,Wfdc11,ENSMUSG00000078940,protein_coding -42507,Ak6,ENSMUSG00000078941,protein_coding -42494,Naip6,ENSMUSG00000078942,protein_coding -42490,Naip2,ENSMUSG00000078945,protein_coding -6776,R3hdml,ENSMUSG00000078949,protein_coding -31295,Gm11020,ENSMUSG00000078953,protein_coding -46618,Arhgap8,ENSMUSG00000078954,protein_coding -42293,Atp6ap1l,ENSMUSG00000078958,protein_coding -53904,Hsbp1l1,ENSMUSG00000078963,protein_coding -31122,Ces1b,ENSMUSG00000078964,protein_coding -37530,Wdr92,ENSMUSG00000078970,protein_coding -6604,Gm15557,ENSMUSG00000078972,protein_coding -37515,Sec61g,ENSMUSG00000078974,protein_coding -45247,Gm11027,ENSMUSG00000078984,protein_coding -41965,Zfp429,ENSMUSG00000078994,protein_coding -41964,Zfp456,ENSMUSG00000078995,protein_coding -6537,Bpifa6,ENSMUSG00000078998,protein_coding -6524,4930404H24Rik,ENSMUSG00000079001,protein_coding -30899,Samd1,ENSMUSG00000079003,protein_coding -6426,Gm14147,ENSMUSG00000079005,protein_coding -6425,Gm14151,ENSMUSG00000079006,protein_coding -44661,Tcl1b4,ENSMUSG00000079007,protein_coding -6393,Gm14124,ENSMUSG00000079008,protein_coding -6390,Gm14139,ENSMUSG00000079009,protein_coding -37364,Gm11032,ENSMUSG00000079010,protein_coding -44638,Serpina3m,ENSMUSG00000079012,protein_coding -44636,Serpina3j,ENSMUSG00000079013,protein_coding -44635,Serpina3i,ENSMUSG00000079014,protein_coding -44614,Serpina1c,ENSMUSG00000079015,protein_coding -30702,Gm11034,ENSMUSG00000079016,protein_coding -44595,Ifi27l2a,ENSMUSG00000079017,protein_coding -46173,Ly6c1,ENSMUSG00000079018,protein_coding -30670,Insl3,ENSMUSG00000079019,protein_coding -46130,Slc45a4,ENSMUSG00000079020,protein_coding -46107,Col22a1,ENSMUSG00000079022,protein_coding -46045,Gm21961,ENSMUSG00000079024,protein_coding -46027,Gsdmc,ENSMUSG00000079025,protein_coding -44354,Gm5662,ENSMUSG00000079029,protein_coding -44306,BB287469,ENSMUSG00000079031,protein_coding -30585,Mef2b,ENSMUSG00000079033,protein_coding -44287,Gm8300,ENSMUSG00000079034,protein_coding -44276,Alkbh1,ENSMUSG00000079036,protein_coding -6163,Prnp,ENSMUSG00000079037,protein_coding -30548,D130040H23Rik,ENSMUSG00000079038,protein_coding -6138,Gm11037,ENSMUSG00000079039,protein_coding -30485,Apela,ENSMUSG00000079042,protein_coding -6121,Fastkd5,ENSMUSG00000079043,protein_coding -41033,Serpinb1c,ENSMUSG00000079049,protein_coding -6059,Vinac1,ENSMUSG00000079051,protein_coding -44102,Slc8a3,ENSMUSG00000079055,protein_coding -6018,Kcnip3,ENSMUSG00000079056,protein_coding -30217,Cyp4v3,ENSMUSG00000079057,protein_coding -30193,Adam34,ENSMUSG00000079058,protein_coding -43970,Gm11042,ENSMUSG00000079061,protein_coding -17485,BC005561,ENSMUSG00000079065,protein_coding -5949,Gm14085,ENSMUSG00000079071,protein_coding -34552,Jrkl,ENSMUSG00000079083,protein_coding -34553,Ccdc82,ENSMUSG00000079084,protein_coding -18738,Gm3404,ENSMUSG00000079091,protein_coding -40486,Prl2c2,ENSMUSG00000079092,protein_coding -18736,4930449I24Rik,ENSMUSG00000079093,protein_coding -5895,Tgm7,ENSMUSG00000079103,protein_coding -43689,Prps1l3,ENSMUSG00000079104,protein_coding -45318,C7,ENSMUSG00000079105,protein_coding -43650,Srp54c,ENSMUSG00000079108,protein_coding -18656,Pms2,ENSMUSG00000079109,protein_coding -5876,Capn3,ENSMUSG00000079110,protein_coding -18641,Kdelr2,ENSMUSG00000079111,protein_coding -29831,Fam90a1a,ENSMUSG00000079112,protein_coding -29804,Gm7861,ENSMUSG00000079113,protein_coding -29793,Gm7849,ENSMUSG00000079114,protein_coding -29772,Gm15293,ENSMUSG00000079116,protein_coding -29756,AY761185,ENSMUSG00000079120,protein_coding -3402,A130010J15Rik,ENSMUSG00000079144,protein_coding -18486,Smok3b,ENSMUSG00000079156,protein_coding -29504,Fam155a,ENSMUSG00000079157,protein_coding -36384,Trim43b,ENSMUSG00000079162,protein_coding -3217,Tlr5,ENSMUSG00000079164,protein_coding -18461,Sap25,ENSMUSG00000079165,protein_coding -29417,Cd209g,ENSMUSG00000079168,protein_coding -5659,Gm15130,ENSMUSG00000079169,protein_coding -5658,Gm13941,ENSMUSG00000079170,protein_coding -18441,Zan,ENSMUSG00000079173,protein_coding -5583,Gm11060,ENSMUSG00000079175,protein_coding -42859,Fam228a,ENSMUSG00000079177,protein_coding -2998,Mptx2,ENSMUSG00000079180,protein_coding -33546,Mphosph8,ENSMUSG00000079184,protein_coding -33532,Gzmc,ENSMUSG00000079186,protein_coding -55509,AC133103.1,ENSMUSG00000079190,protein_coding -55557,AC125149.1,ENSMUSG00000079192,protein_coding -33478,Psme2,ENSMUSG00000079197,protein_coding -18151,Zfp664,ENSMUSG00000079215,protein_coding -55526,AC132444.1,ENSMUSG00000079222,protein_coding -37284,Ccr5,ENSMUSG00000079227,protein_coding -37193,Ccdc13,ENSMUSG00000079235,protein_coding -37128,Xirp1,ENSMUSG00000079243,protein_coding -33110,Gm5622,ENSMUSG00000079244,protein_coding -4986,Gm13691,ENSMUSG00000079247,protein_coding -55242,Calhm1,ENSMUSG00000079258,protein_coding -37034,Trim71,ENSMUSG00000079259,protein_coding -37032,Tmppe,ENSMUSG00000079260,protein_coding -32911,Gm15217,ENSMUSG00000079261,protein_coding -21861,Slco1a6,ENSMUSG00000079262,protein_coding -21859,Gm6614,ENSMUSG00000079263,protein_coding -21841,Gm11077,ENSMUSG00000079264,protein_coding -32852,Gm8257,ENSMUSG00000079265,protein_coding -32835,Gm5930,ENSMUSG00000079267,protein_coding -32689,Gm3676,ENSMUSG00000079269,protein_coding -32696,1700049E17Rik1,ENSMUSG00000079271,protein_coding -4856,Hoxd3,ENSMUSG00000079277,protein_coding -17864,Tmem233,ENSMUSG00000079278,protein_coding -2330,2310009B15Rik,ENSMUSG00000079283,protein_coding -4808,Gm11084,ENSMUSG00000079286,protein_coding -21513,Klrb1b,ENSMUSG00000079298,protein_coding -21509,Klrb1,ENSMUSG00000079299,protein_coding -21486,Tex52,ENSMUSG00000079304,protein_coding -9822,Rab9,ENSMUSG00000079316,protein_coding -9821,Trappc2,ENSMUSG00000079317,protein_coding -36820,Gm20661,ENSMUSG00000079323,protein_coding -4719,4932414N04Rik,ENSMUSG00000079324,protein_coding -2139,Lemd1,ENSMUSG00000079330,protein_coding -50892,Gm11096,ENSMUSG00000079333,protein_coding -36796,Naa80,ENSMUSG00000079334,protein_coding -54940,Ifit1bl1,ENSMUSG00000079339,protein_coding -54916,Lipo1,ENSMUSG00000079342,protein_coding -21345,C1s2,ENSMUSG00000079343,protein_coding -54907,Lipo4,ENSMUSG00000079344,protein_coding -9665,Magea5,ENSMUSG00000079349,protein_coding -9662,Magea8,ENSMUSG00000079350,protein_coding -4606,Gm11099,ENSMUSG00000079353,protein_coding -36688,Ackr4,ENSMUSG00000079355,protein_coding -17497,Gm43302,ENSMUSG00000079362,protein_coding -17496,Gbp4,ENSMUSG00000079363,protein_coding -32080,Gm3558,ENSMUSG00000079364,protein_coding -32034,Gm3476,ENSMUSG00000079371,protein_coding -9515,Gm8334,ENSMUSG00000079374,protein_coding -31991,Gm8279,ENSMUSG00000079378,protein_coding -31995,Gm8271,ENSMUSG00000079380,protein_coding -31987,Gm3298,ENSMUSG00000079383,protein_coding -31963,Gm3173,ENSMUSG00000079386,protein_coding -9520,Luzp4,ENSMUSG00000079387,protein_coding -31959,2610042L04Rik,ENSMUSG00000079388,protein_coding -31958,Gm3149,ENSMUSG00000079389,protein_coding -31954,Gm2974,ENSMUSG00000079391,protein_coding -9511,Gm15114,ENSMUSG00000079395,protein_coding -32039,Gm3411,ENSMUSG00000079396,protein_coding -31932,Gm3020,ENSMUSG00000079402,protein_coding -31915,Gm5795,ENSMUSG00000079409,protein_coding -31909,Gm2897,ENSMUSG00000079410,protein_coding -54582,Cntf,ENSMUSG00000079415,protein_coding -17369,Gm11111,ENSMUSG00000079416,protein_coding -9401,Atg4a,ENSMUSG00000079418,protein_coding -54504,Ms4a6c,ENSMUSG00000079419,protein_coding -4317,4930402F06Rik,ENSMUSG00000079421,protein_coding -17314,Gm3286,ENSMUSG00000079423,protein_coding -17309,Gm3259,ENSMUSG00000079424,protein_coding -21040,Arpc4,ENSMUSG00000079426,protein_coding -36365,Mthfsl,ENSMUSG00000079427,protein_coding -9293,Tceal7,ENSMUSG00000079428,protein_coding -1546,Mroh2a,ENSMUSG00000079429,protein_coding -9252,Gm15023,ENSMUSG00000079432,protein_coding -36293,Gm11114,ENSMUSG00000079433,protein_coding -1511,Neu2,ENSMUSG00000079434,protein_coding -9230,Rpl36a,ENSMUSG00000079435,protein_coding -1507,Kcnj13,ENSMUSG00000079436,protein_coding -17150,Gm11115,ENSMUSG00000079438,protein_coding -17148,Gm11116,ENSMUSG00000079439,protein_coding -1493,Alpi,ENSMUSG00000079440,protein_coding -4234,St6galnac4,ENSMUSG00000079442,protein_coding -50345,Gm21981,ENSMUSG00000079444,protein_coding -1456,B3gnt7,ENSMUSG00000079445,protein_coding -9110,Cldn34c1,ENSMUSG00000079450,protein_coding -17081,Tmprss11g,ENSMUSG00000079451,protein_coding -9006,4933403O08Rik,ENSMUSG00000079460,protein_coding -20816,Gm15737,ENSMUSG00000079462,protein_coding -1351,Col4a3,ENSMUSG00000079465,protein_coding -4190,Prdm12,ENSMUSG00000079466,protein_coding -36063,Pigb,ENSMUSG00000079469,protein_coding -1300,Utp14b,ENSMUSG00000079470,protein_coding -50264,Mymx,ENSMUSG00000079471,protein_coding -8829,1700011M02Rik,ENSMUSG00000079476,protein_coding -20678,Rab7,ENSMUSG00000079477,protein_coding -54231,Sssca1,ENSMUSG00000079478,protein_coding -8812,Gm9112,ENSMUSG00000079479,protein_coding -8798,Pin4,ENSMUSG00000079480,protein_coding -8793,Nhsl2,ENSMUSG00000079481,protein_coding -4148,Phyhd1,ENSMUSG00000079484,protein_coding -8772,Med12,ENSMUSG00000079487,protein_coding -49969,Gm11127,ENSMUSG00000079492,protein_coding -20578,Nat8f5,ENSMUSG00000079494,protein_coding -20577,Nat8f6,ENSMUSG00000079495,protein_coding -4120,Gm13420,ENSMUSG00000079497,protein_coding -4105,Cfap77,ENSMUSG00000079502,protein_coding -49910,H2-Q1,ENSMUSG00000079507,protein_coding -8663,Apoo,ENSMUSG00000079508,protein_coding -8655,Zfx,ENSMUSG00000079509,protein_coding -20482,Gm42688,ENSMUSG00000079511,protein_coding -8598,4932429P05Rik,ENSMUSG00000079513,protein_coding -20413,Reg3a,ENSMUSG00000079516,protein_coding -8494,Gm14743,ENSMUSG00000079519,protein_coding -8492,Gm5938,ENSMUSG00000079521,protein_coding -8486,Gm14744,ENSMUSG00000079522,protein_coding -20365,Tmsb10,ENSMUSG00000079523,protein_coding -8467,Cldn34d,ENSMUSG00000079525,protein_coding -8414,Gm5936,ENSMUSG00000079531,protein_coding -8406,Gm6890,ENSMUSG00000079532,protein_coding -8399,Gm5640,ENSMUSG00000079534,protein_coding -8389,Gm6880,ENSMUSG00000079536,protein_coding -3993,Obp2b,ENSMUSG00000079539,protein_coding -48399,Gm11146,ENSMUSG00000079546,protein_coding -49792,H2-DMb1,ENSMUSG00000079547,protein_coding -912,Mpp4,ENSMUSG00000079550,protein_coding -49751,Kifc1,ENSMUSG00000079553,protein_coding -882,Aox2,ENSMUSG00000079554,protein_coding -16280,Haus3,ENSMUSG00000079555,protein_coding -49739,March2,ENSMUSG00000079557,protein_coding -35572,Colca2,ENSMUSG00000079559,protein_coding -19764,Hoxa3,ENSMUSG00000079560,protein_coding -16252,Maea,ENSMUSG00000079562,protein_coding -49685,Pglyrp2,ENSMUSG00000079563,protein_coding -8320,Spin2d,ENSMUSG00000079566,protein_coding -8220,Gm14692,ENSMUSG00000079577,protein_coding -8219,Gm1140,ENSMUSG00000079578,protein_coding -8174,Gm6760,ENSMUSG00000079579,protein_coding -49582,Tmem217,ENSMUSG00000079580,protein_coding -8133,Gm7073,ENSMUSG00000079583,protein_coding -8099,Gm364,ENSMUSG00000079584,protein_coding -622,Tmem182,ENSMUSG00000079588,protein_coding -35357,C1qtnf5,ENSMUSG00000079592,protein_coding -8022,Gm14597,ENSMUSG00000079593,protein_coding -47912,Cstdc6,ENSMUSG00000079594,protein_coding -47897,Cstdc4,ENSMUSG00000079597,protein_coding -19375,Clec2l,ENSMUSG00000079598,protein_coding -9811,Gm17604,ENSMUSG00000079600,protein_coding -3548,9230102O04Rik,ENSMUSG00000079602,protein_coding -49452,Zbtb9,ENSMUSG00000079605,protein_coding -7945,Gm595,ENSMUSG00000079606,protein_coding -53770,Stard6,ENSMUSG00000079608,protein_coding -519,Ankrd39,ENSMUSG00000079610,protein_coding -53734,Seh1l,ENSMUSG00000079614,protein_coding -7858,Cypt15,ENSMUSG00000079619,protein_coding -47827,Muc4,ENSMUSG00000079620,protein_coding -47814,Tm4sf19,ENSMUSG00000079625,protein_coding -7823,Rhox7b,ENSMUSG00000079626,protein_coding -7816,Rhox2h,ENSMUSG00000079627,protein_coding -7813,Rhox4g,ENSMUSG00000079628,protein_coding -7812,Rhox2g,ENSMUSG00000079629,protein_coding -7808,Rhox4f,ENSMUSG00000079630,protein_coding -7802,Rhox4d,ENSMUSG00000079633,protein_coding -7798,Rhox4c,ENSMUSG00000079635,protein_coding -7797,Rhox3c,ENSMUSG00000079636,protein_coding -7796,Rhox2c,ENSMUSG00000079637,protein_coding -7793,Rhox2b,ENSMUSG00000079638,protein_coding -7790,Rhox4a,ENSMUSG00000079639,protein_coding -7779,Rpl39,ENSMUSG00000079641,protein_coding -7758,Gm6268,ENSMUSG00000079642,protein_coding -34906,Gm1110,ENSMUSG00000079644,protein_coding -19200,Fam71f2,ENSMUSG00000079652,protein_coding -19196,Prrt4,ENSMUSG00000079654,protein_coding -7648,Gm5168,ENSMUSG00000079655,protein_coding -49294,Rab26,ENSMUSG00000079657,protein_coding -209,Eloc,ENSMUSG00000079658,protein_coding -15746,Tmem243,ENSMUSG00000079659,protein_coding -15687,1700015F17Rik,ENSMUSG00000079666,protein_coding -34770,Fdx1l,ENSMUSG00000079677,protein_coding -19020,Vwde,ENSMUSG00000079679,protein_coding -34767,Zglp1,ENSMUSG00000079681,protein_coding -26787,Ulbp1,ENSMUSG00000079685,protein_coding -7434,Gm14862,ENSMUSG00000079694,protein_coding -7410,Gm5751,ENSMUSG00000079697,protein_coding -49060,Vmn2r93,ENSMUSG00000079698,protein_coding -7409,Gm6592,ENSMUSG00000079699,protein_coding -49049,Fpr3,ENSMUSG00000079700,protein_coding -7400,Ssxb3,ENSMUSG00000079701,protein_coding -7399,Ssxb6,ENSMUSG00000079702,protein_coding -7405,Ssxb8,ENSMUSG00000079703,protein_coding -7398,Gm14459,ENSMUSG00000079704,protein_coding -7396,Ssxb1,ENSMUSG00000079705,protein_coding -48988,Tcte3,ENSMUSG00000079707,protein_coding -48986,Gm3448,ENSMUSG00000079710,protein_coding -48839,Ttll2,ENSMUSG00000079722,protein_coding -47341,3110001I22Rik,ENSMUSG00000079737,protein_coding -47290,Gm11172,ENSMUSG00000079740,protein_coding -55515,AC087559.2,ENSMUSG00000079773,protein_coding -55514,Fam205a2,ENSMUSG00000079774,protein_coding -55558,AC125149.2,ENSMUSG00000079794,protein_coding -55555,AC125149.3,ENSMUSG00000079800,protein_coding -51220,Gm21719,ENSMUSG00000079806,protein_coding -55562,AC168977.1,ENSMUSG00000079808,protein_coding -55503,Tmlhe,ENSMUSG00000079834,protein_coding -29706,Spag11a,ENSMUSG00000079842,protein_coding -8313,Xlr4a,ENSMUSG00000079845,protein_coding -21577,Klra4,ENSMUSG00000079852,protein_coding -21592,Klra1,ENSMUSG00000079853,protein_coding -40674,Gm11273,ENSMUSG00000079941,protein_coding -29133,Eef1akmt3,ENSMUSG00000080115,protein_coding -54170,Brms1,ENSMUSG00000080268,protein_coding -49041,Spaca6,ENSMUSG00000080316,protein_coding -5251,Olfr1174-ps,ENSMUSG00000080713,protein_coding -48752,B230307C23Rik,ENSMUSG00000080717,protein_coding -7657,Gm6121,ENSMUSG00000080725,protein_coding -12060,4930455H04Rik,ENSMUSG00000080907,protein_coding -7809,Rhox3g,ENSMUSG00000080933,protein_coding -50099,Olfr118,ENSMUSG00000080990,protein_coding -8650,AU015836,ENSMUSG00000081044,protein_coding -11526,Hist2h3c2,ENSMUSG00000081058,protein_coding -7633,Gm4297,ENSMUSG00000081218,protein_coding -14035,Cyp2j12,ENSMUSG00000081225,protein_coding -5184,Olfr1120,ENSMUSG00000081234,protein_coding -14039,Cyp2j7,ENSMUSG00000081362,protein_coding -8811,Gm3880,ENSMUSG00000081443,protein_coding -46831,Slc48a1,ENSMUSG00000081534,protein_coding -9413,Gm15294,ENSMUSG00000081607,protein_coding -21138,Olfr213,ENSMUSG00000081649,protein_coding -49900,Gm16181,ENSMUSG00000081650,protein_coding -18206,Fzd10,ENSMUSG00000081683,protein_coding -50121,Olfr129,ENSMUSG00000081724,protein_coding -38124,Gm12216,ENSMUSG00000081769,protein_coding -5257,Olfr1180,ENSMUSG00000081836,protein_coding -39096,Rpl9-ps1,ENSMUSG00000081906,protein_coding -25477,Olfr607,ENSMUSG00000081945,protein_coding -5270,Olfr1191-ps1,ENSMUSG00000081948,protein_coding -1544,Dnajb3,ENSMUSG00000081984,protein_coding -6528,Dnmt3c,ENSMUSG00000082079,protein_coding -39079,Slfn14,ENSMUSG00000082101,protein_coding -29782,Defa27,ENSMUSG00000082211,protein_coding -8841,Nap1l2,ENSMUSG00000082229,protein_coding -40816,Vmn1r221,ENSMUSG00000082316,protein_coding -17213,Btc,ENSMUSG00000082361,protein_coding -9058,H2afb2,ENSMUSG00000082482,protein_coding -7649,Gm2012,ENSMUSG00000082639,protein_coding -10065,1700064H15Rik,ENSMUSG00000082766,protein_coding -48667,Gm5678,ENSMUSG00000082815,protein_coding -5263,Olfr1186,ENSMUSG00000082882,protein_coding -14043,Cyp2j8,ENSMUSG00000082932,protein_coding -29754,Gm15056,ENSMUSG00000082976,protein_coding -5363,Olfr1274-ps,ENSMUSG00000082980,protein_coding -18648,Fam220a,ENSMUSG00000083012,protein_coding -14348,Cyp4a29,ENSMUSG00000083138,protein_coding -44885,4930595D18Rik,ENSMUSG00000083193,protein_coding -54156,Ctsf,ENSMUSG00000083282,protein_coding -4375,Olfr360,ENSMUSG00000083361,protein_coding -9088,H2afb3,ENSMUSG00000083616,protein_coding -7651,Gm2030,ENSMUSG00000083628,protein_coding -22161,Rasl2-9,ENSMUSG00000083649,protein_coding -9790,Rnf138rt1,ENSMUSG00000083695,protein_coding -5180,Olfr1118,ENSMUSG00000083706,protein_coding -14174,Gm12728,ENSMUSG00000083780,protein_coding -5252,Olfr1175-ps,ENSMUSG00000083855,protein_coding -50104,Olfr122,ENSMUSG00000083947,protein_coding -7632,Gm5934,ENSMUSG00000084063,protein_coding -31408,Esrp2,ENSMUSG00000084128,protein_coding -37504,Pom121l12,ENSMUSG00000084135,protein_coding -40779,Vmn1r207-ps,ENSMUSG00000084136,protein_coding -23256,Sycn,ENSMUSG00000084174,protein_coding -24182,4933405O20Rik,ENSMUSG00000084234,protein_coding -25452,Olfr588-ps1,ENSMUSG00000084262,protein_coding -5350,Olfr1269,ENSMUSG00000084336,protein_coding -14353,Cyp4a30b,ENSMUSG00000084346,protein_coding -13702,Gm11213,ENSMUSG00000084782,protein_coding -34744,Ubl5,ENSMUSG00000084786,protein_coding -15585,Tmem240,ENSMUSG00000084845,protein_coding -44708,Ccdc85c,ENSMUSG00000084883,protein_coding -6568,Gm14226,ENSMUSG00000084897,protein_coding -32157,Gm281,ENSMUSG00000084902,protein_coding -21853,Gm5724,ENSMUSG00000084927,protein_coding -55325,Bbip1,ENSMUSG00000084957,protein_coding -1664,Crocc2,ENSMUSG00000084989,protein_coding -10301,Gm11549,ENSMUSG00000085007,protein_coding -291,Khdc1b,ENSMUSG00000085079,protein_coding -28387,Ascl4,ENSMUSG00000085111,protein_coding -9446,A730046J19Rik,ENSMUSG00000085139,protein_coding -22145,Sbk3,ENSMUSG00000085272,protein_coding -3996,Gm13539,ENSMUSG00000085484,protein_coding -39695,Gm11634,ENSMUSG00000085486,protein_coding -34871,Dpy19l2,ENSMUSG00000085576,protein_coding -9437,Rtl9,ENSMUSG00000085584,protein_coding -22783,Gm4969,ENSMUSG00000085601,protein_coding -38472,Tmem238l,ENSMUSG00000085683,protein_coding -37798,4930469K13Rik,ENSMUSG00000085684,protein_coding -44186,Lin52,ENSMUSG00000085793,protein_coding -29964,Zfp703,ENSMUSG00000085795,protein_coding -40901,Armh2,ENSMUSG00000085861,protein_coding -44755,Rtl1,ENSMUSG00000085925,protein_coding -18323,Syna,ENSMUSG00000085957,protein_coding -42948,Rad51ap2,ENSMUSG00000086022,protein_coding -19827,Wipf3,ENSMUSG00000086040,protein_coding -2815,Gm4846,ENSMUSG00000086056,protein_coding -15344,Zfp989,ENSMUSG00000086147,protein_coding -36060,Ccpg1os,ENSMUSG00000086158,protein_coding -35885,Ubap1l,ENSMUSG00000086228,protein_coding -2694,4930558K02Rik,ENSMUSG00000086277,protein_coding -9633,Nbdy,ENSMUSG00000086316,protein_coding -14954,E130218I03Rik,ENSMUSG00000086322,protein_coding -5148,Gm13723,ENSMUSG00000086338,protein_coding -46137,Gm6569,ENSMUSG00000086361,protein_coding -39496,Lrrc3c,ENSMUSG00000086545,protein_coding -11686,Cd101,ENSMUSG00000086564,protein_coding -37025,Susd5,ENSMUSG00000086596,protein_coding -4996,Btbd18,ENSMUSG00000086598,protein_coding -396,4931428L18Rik,ENSMUSG00000086727,protein_coding -22137,Isoc2a,ENSMUSG00000086784,protein_coding -54911,Gm8978,ENSMUSG00000086812,protein_coding -16130,3110082J24Rik,ENSMUSG00000086815,protein_coding -11307,Lce6a,ENSMUSG00000086848,protein_coding -38205,Gm12248,ENSMUSG00000086962,protein_coding -47499,Rtl10,ENSMUSG00000086965,protein_coding -5458,Gm13889,ENSMUSG00000087006,protein_coding -6447,Tmem74bos,ENSMUSG00000087035,protein_coding -44890,Lbhd2,ENSMUSG00000087075,protein_coding -11753,Atg4a-ps,ENSMUSG00000087119,protein_coding -48076,Plcxd2,ENSMUSG00000087141,protein_coding -14067,L1td1,ENSMUSG00000087166,protein_coding -14320,Skint6,ENSMUSG00000087194,protein_coding -15439,Tmem274,ENSMUSG00000087198,protein_coding -2267,Mroh3,ENSMUSG00000087230,protein_coding -3127,Kif28,ENSMUSG00000087236,protein_coding -55,Alkal1,ENSMUSG00000087247,protein_coding -11876,Lamtor5,ENSMUSG00000087260,protein_coding -38650,4930544D05Rik,ENSMUSG00000087279,protein_coding -54915,Lipo2,ENSMUSG00000087303,protein_coding -41258,Tmem170b,ENSMUSG00000087370,protein_coding -23905,Gm15517,ENSMUSG00000087376,protein_coding -14537,Frg2f1,ENSMUSG00000087385,protein_coding -9600,Kantr,ENSMUSG00000087403,protein_coding -30593,Cers1,ENSMUSG00000087408,protein_coding -46967,Gm5475,ENSMUSG00000087444,protein_coding -37452,Cccd201,ENSMUSG00000087512,protein_coding -23038,Zfp111,ENSMUSG00000087598,protein_coding -11569,Gm16253,ENSMUSG00000087610,protein_coding -28337,1500009L16Rik,ENSMUSG00000087651,protein_coding -4007,Tmem250-ps,ENSMUSG00000087679,protein_coding -29386,Pet100,ENSMUSG00000087687,protein_coding -23183,Mia,ENSMUSG00000089661,protein_coding -29392,Fcor,ENSMUSG00000089665,protein_coding -38576,Tnfsf13,ENSMUSG00000089669,protein_coding -1530,Ugt1a8,ENSMUSG00000089675,protein_coding -45393,Agxt2,ENSMUSG00000089678,protein_coding -33432,Bcl2l2,ENSMUSG00000089682,protein_coding -14865,Rab42,ENSMUSG00000089687,protein_coding -20571,Nat8f7,ENSMUSG00000089694,protein_coding -11403,Gm4778,ENSMUSG00000089696,protein_coding -31835,Galnt2,ENSMUSG00000089704,protein_coding -46427,Cbx6,ENSMUSG00000089715,protein_coding -13483,Olfr275,ENSMUSG00000089717,protein_coding -40004,Cd300ld5,ENSMUSG00000089722,protein_coding -39567,Krtap4-8,ENSMUSG00000089724,protein_coding -21582,Klra8,ENSMUSG00000089727,protein_coding -29424,Tgfbr3l,ENSMUSG00000089736,protein_coding -6922,Gm20431,ENSMUSG00000089739,protein_coding -40002,Cd300ld2,ENSMUSG00000089753,protein_coding -7091,Zfp966,ENSMUSG00000089756,protein_coding -4160,Ier5l,ENSMUSG00000089762,protein_coding -9332,Tmsb15b1,ENSMUSG00000089768,protein_coding -14307,Skint1,ENSMUSG00000089773,protein_coding -48636,Slc5a3,ENSMUSG00000089774,protein_coding -29182,Rdh1,ENSMUSG00000089789,protein_coding -17556,1700028K03Rik,ENSMUSG00000089798,protein_coding -17375,Rasgef1b,ENSMUSG00000089809,protein_coding -6615,Rbm12,ENSMUSG00000089824,protein_coding -23194,Shkbp1,ENSMUSG00000089832,protein_coding -46426,Npcd,ENSMUSG00000089837,protein_coding -33561,Gm4491,ENSMUSG00000089840,protein_coding -1412,A530032D15Rik,ENSMUSG00000089844,protein_coding -25638,Timm10b,ENSMUSG00000089847,protein_coding -30680,Zfp882,ENSMUSG00000089857,protein_coding -5361,Gm13769,ENSMUSG00000089859,protein_coding -18985,Umad1,ENSMUSG00000089862,protein_coding -36057,Gm44503,ENSMUSG00000089865,protein_coding -3338,Rps6kc1,ENSMUSG00000089872,protein_coding -13636,Mup13,ENSMUSG00000089873,protein_coding -38584,Tmem102,ENSMUSG00000089876,protein_coding -5277,Olfr1199,ENSMUSG00000089892,protein_coding -32804,Gm8113,ENSMUSG00000089901,protein_coding -12047,Mfsd14a,ENSMUSG00000089911,protein_coding -7232,Uckl1,ENSMUSG00000089917,protein_coding -43557,Gm43517,ENSMUSG00000089922,protein_coding -36387,Bcl2a1b,ENSMUSG00000089929,protein_coding -22073,Pira2,ENSMUSG00000089942,protein_coding -1540,Ugt1a5,ENSMUSG00000089943,protein_coding -13567,Pakap,ENSMUSG00000089945,protein_coding -7101,Gm14435,ENSMUSG00000089951,protein_coding -3941,Rnf224,ENSMUSG00000089953,protein_coding -1545,Ugt1a1,ENSMUSG00000089960,protein_coding -18456,Fbxo24,ENSMUSG00000089984,protein_coding -23974,Gm45713,ENSMUSG00000089989,protein_coding -16696,G6pd2,ENSMUSG00000089992,protein_coding -9331,Tmsb15b2,ENSMUSG00000089996,protein_coding -20655,1810020O05Rik,ENSMUSG00000089997,protein_coding -53843,Ier3ip1,ENSMUSG00000090000,protein_coding -17640,Gm15446,ENSMUSG00000090015,protein_coding -19678,Gimap1,ENSMUSG00000090019,protein_coding -28665,Galnt4,ENSMUSG00000090035,protein_coding -13565,Palm2,ENSMUSG00000090053,protein_coding -5114,Olfr1062,ENSMUSG00000090059,protein_coding -16706,Nwd2,ENSMUSG00000090061,protein_coding -12439,1110002E22Rik,ENSMUSG00000090066,protein_coding -1226,Cdk5r2,ENSMUSG00000090071,protein_coding -7224,Lime1,ENSMUSG00000090077,protein_coding -49588,Rnf8,ENSMUSG00000090083,protein_coding -7449,Srpx,ENSMUSG00000090084,protein_coding -7068,Gm14399,ENSMUSG00000090093,protein_coding -5259,Gm13757,ENSMUSG00000090097,protein_coding -5885,Ttbk2,ENSMUSG00000090100,protein_coding -7626,Gm4985,ENSMUSG00000090102,protein_coding -39201,Gm11492,ENSMUSG00000090107,protein_coding -8436,Cmc4,ENSMUSG00000090110,protein_coding -26857,Shprh,ENSMUSG00000090112,protein_coding -49400,Nhlrc4,ENSMUSG00000090113,protein_coding -50343,Usp49,ENSMUSG00000090115,protein_coding -43884,Abhd12b,ENSMUSG00000090121,protein_coding -9423,Kcne1l,ENSMUSG00000090122,protein_coding -1531,Ugt1a7c,ENSMUSG00000090124,protein_coding -14650,Pou3f1,ENSMUSG00000090125,protein_coding -46845,Olfr287,ENSMUSG00000090129,protein_coding -8916,Cypt2,ENSMUSG00000090132,protein_coding -30603,Uba52,ENSMUSG00000090137,protein_coding -8769,Gm614,ENSMUSG00000090141,protein_coding -1533,Ugt1a6b,ENSMUSG00000090145,protein_coding -36687,Acad11,ENSMUSG00000090150,protein_coding -21517,BC035044,ENSMUSG00000090164,protein_coding -1527,Ugt1a10,ENSMUSG00000090165,protein_coding -32803,Ear10,ENSMUSG00000090166,protein_coding -1543,Ugt1a2,ENSMUSG00000090171,protein_coding -38429,Fbxw10,ENSMUSG00000090173,protein_coding -1529,Ugt1a9,ENSMUSG00000090175,protein_coding -48051,Cd200r2,ENSMUSG00000090176,protein_coding -12590,4930503B20Rik,ENSMUSG00000090202,protein_coding -31199,Tepp,ENSMUSG00000090206,protein_coding -11577,Itga10,ENSMUSG00000090210,protein_coding -6923,Tmem189,ENSMUSG00000090213,protein_coding -25552,Trim34b,ENSMUSG00000090215,protein_coding -25532,4930516K23Rik,ENSMUSG00000090219,protein_coding -48731,Pcp4,ENSMUSG00000090223,protein_coding -39579,Gm11559,ENSMUSG00000090225,protein_coding -49849,Cfb,ENSMUSG00000090231,protein_coding -47463,Car15,ENSMUSG00000090236,protein_coding -29270,Bloc1s1,ENSMUSG00000090247,protein_coding -44008,Churc1,ENSMUSG00000090258,protein_coding -53129,Eif4ebp3,ENSMUSG00000090264,protein_coding -40108,Mettl23,ENSMUSG00000090266,protein_coding -11399,Gm5286,ENSMUSG00000090268,protein_coding -3015,Mndal,ENSMUSG00000090272,protein_coding -50537,Prr22,ENSMUSG00000090273,protein_coding -31874,Tarbp1,ENSMUSG00000090290,protein_coding -54455,Lrrc10b,ENSMUSG00000090291,protein_coding -27247,Sult3a2,ENSMUSG00000090298,protein_coding -17154,Gm7714,ENSMUSG00000090302,protein_coding -49075,Vmn2r99,ENSMUSG00000090304,protein_coding -11326,2310050C09Rik,ENSMUSG00000090314,protein_coding -49086,Vmn2r104,ENSMUSG00000090315,protein_coding -16704,Dthd1,ENSMUSG00000090326,protein_coding -29629,Cfap97d2,ENSMUSG00000090336,protein_coding -22322,Vmn2r46,ENSMUSG00000090342,protein_coding -19870,Vmn1r5,ENSMUSG00000090346,protein_coding -5986,Gm17555,ENSMUSG00000090353,protein_coding -47604,Teddm3,ENSMUSG00000090356,protein_coding -25075,Vmn2r79,ENSMUSG00000090362,protein_coding -32066,Gm3512,ENSMUSG00000090363,protein_coding -31934,Gm3043,ENSMUSG00000090364,protein_coding -55089,4933411K16Rik,ENSMUSG00000090369,protein_coding -32836,Gm8229,ENSMUSG00000090379,protein_coding -23766,Vmn2r58,ENSMUSG00000090383,protein_coding -2629,4930523C07Rik,ENSMUSG00000090394,protein_coding -32055,Gm8362,ENSMUSG00000090404,protein_coding -10802,Gm17402,ENSMUSG00000090408,protein_coding -23014,Vmn1r174,ENSMUSG00000090411,protein_coding -49059,Vmn2r94,ENSMUSG00000090417,protein_coding -25044,Vmn2r75,ENSMUSG00000090436,protein_coding -27687,Gm17455,ENSMUSG00000090439,protein_coding -32685,1700049E17Rik2,ENSMUSG00000090440,protein_coding -11606,Gm17651,ENSMUSG00000090441,protein_coding -53868,Gm6133,ENSMUSG00000090451,protein_coding -28980,Iltifb,ENSMUSG00000090461,protein_coding -36553,Prr23a3,ENSMUSG00000090470,protein_coding -26622,Gm4553,ENSMUSG00000090471,protein_coding -31969,Gm3047,ENSMUSG00000090472,protein_coding -6486,Gm17416,ENSMUSG00000090485,protein_coding -1295,BC035947,ENSMUSG00000090486,protein_coding -31908,Gm2888,ENSMUSG00000090487,protein_coding -32681,Gm3486,ENSMUSG00000090505,protein_coding -49930,Sfta2,ENSMUSG00000090509,protein_coding -32019,Gm3424,ENSMUSG00000090512,protein_coding -48517,Krtap27-1,ENSMUSG00000090515,protein_coding -53024,Gypc,ENSMUSG00000090523,protein_coding -10680,Gm5538,ENSMUSG00000090527,protein_coding -31978,Gm9602,ENSMUSG00000090539,protein_coding -8149,Cdr1,ENSMUSG00000090546,protein_coding -32014,Gm8265,ENSMUSG00000090547,protein_coding -1594,Prlh,ENSMUSG00000090550,protein_coding -2179,Snrpe,ENSMUSG00000090553,protein_coding -49104,Vmn2r109,ENSMUSG00000090572,protein_coding -10751,Vmn2r6,ENSMUSG00000090581,protein_coding -49931,Gm9573,ENSMUSG00000090588,protein_coding -34558,Gm17571,ENSMUSG00000090592,protein_coding -32822,Gm3327,ENSMUSG00000090594,protein_coding -23772,Vmn2r60,ENSMUSG00000090619,protein_coding -27845,A930033H14Rik,ENSMUSG00000090622,protein_coding -7047,Gm20721,ENSMUSG00000090625,protein_coding -36035,Tex9,ENSMUSG00000090626,protein_coding -32047,Gm8356,ENSMUSG00000090627,protein_coding -48221,Olfr180,ENSMUSG00000090629,protein_coding -19541,Olfr456,ENSMUSG00000090631,protein_coding -32762,Gm8126,ENSMUSG00000090634,protein_coding -36663,Gm20425,ENSMUSG00000090639,protein_coding -41940,Zfp712,ENSMUSG00000090641,protein_coding -32027,Gm3453,ENSMUSG00000090643,protein_coding -50569,Vmn2r120,ENSMUSG00000090655,protein_coding -41825,Prss47,ENSMUSG00000090658,protein_coding -41990,Zfp493,ENSMUSG00000090659,protein_coding -22289,Vmn2r45,ENSMUSG00000090662,protein_coding -20874,Gm765,ENSMUSG00000090667,protein_coding -50091,Olfr111,ENSMUSG00000090675,protein_coding -19210,Atp6v1fnb,ENSMUSG00000090685,protein_coding -17619,Vmn2r12,ENSMUSG00000090688,protein_coding -32724,Gm8020,ENSMUSG00000090690,protein_coding -32057,Gm3667,ENSMUSG00000090691,protein_coding -36359,Trim43a,ENSMUSG00000090693,protein_coding -21745,Apold1,ENSMUSG00000090698,protein_coding -49694,Cyp4f40,ENSMUSG00000090700,protein_coding -32021,Gm8237,ENSMUSG00000090707,protein_coding -35054,Pate11,ENSMUSG00000090710,protein_coding -32765,Gm8127,ENSMUSG00000090713,protein_coding -22383,Zscan4d,ENSMUSG00000090714,protein_coding -23005,Vmn1r167,ENSMUSG00000090715,protein_coding -32838,Gm3371,ENSMUSG00000090716,protein_coding -11222,Rps27,ENSMUSG00000090733,protein_coding -35039,Pate7,ENSMUSG00000090738,protein_coding -33120,Gm17079,ENSMUSG00000090740,protein_coding -23755,Gm6871,ENSMUSG00000090744,protein_coding -50192,Esp8,ENSMUSG00000090747,protein_coding -23797,Vmn2r63,ENSMUSG00000090751,protein_coding -22481,Vmn2r56,ENSMUSG00000090762,protein_coding -31953,Gm3127,ENSMUSG00000090764,protein_coding -25037,Vmn2r74,ENSMUSG00000090774,protein_coding -47480,Ccdc188,ENSMUSG00000090777,protein_coding -33071,Klhl33,ENSMUSG00000090799,protein_coding -55500,CAAA01165726.1,ENSMUSG00000090805,protein_coding -25025,Vmn2r70,ENSMUSG00000090806,protein_coding -44265,Samd15,ENSMUSG00000090812,protein_coding -11624,Gm4450,ENSMUSG00000090817,protein_coding -22213,Olfr1344,ENSMUSG00000090824,protein_coding -31986,Gm8297,ENSMUSG00000090827,protein_coding -54380,1700092M07Rik,ENSMUSG00000090840,protein_coding -29235,Myl6,ENSMUSG00000090841,protein_coding -44159,Heatr4,ENSMUSG00000090843,protein_coding -41249,Gm17364,ENSMUSG00000090853,protein_coding -28758,Gm4340,ENSMUSG00000090854,protein_coding -25913,Rps13,ENSMUSG00000090862,protein_coding -41009,A530084C06Rik,ENSMUSG00000090863,protein_coding -22301,Vmn2r40,ENSMUSG00000090864,protein_coding -31973,Gm3239,ENSMUSG00000090872,protein_coding -33041,Olfr733,ENSMUSG00000090874,protein_coding -49861,Hspa1b,ENSMUSG00000090877,protein_coding -33603,Gm6904,ENSMUSG00000090881,protein_coding -33446,Gm17428,ENSMUSG00000090889,protein_coding -20635,D6Ertd527e,ENSMUSG00000090891,protein_coding -22281,Vmn2r41,ENSMUSG00000090892,protein_coding -50090,Olfr110,ENSMUSG00000090894,protein_coding -50196,Gm44501,ENSMUSG00000090897,protein_coding -10443,Pabpc4l,ENSMUSG00000090919,protein_coding -44111,Synj2bp,ENSMUSG00000090935,protein_coding -53545,F830016B08Rik,ENSMUSG00000090942,protein_coding -43364,Ccdc71l,ENSMUSG00000090946,protein_coding -25071,Vmn2r77,ENSMUSG00000090949,protein_coding -48222,Olfr181,ENSMUSG00000090951,protein_coding -25251,Lrrc32,ENSMUSG00000090958,protein_coding -17607,Vmn2r8,ENSMUSG00000090961,protein_coding -17643,Gm17655,ENSMUSG00000090963,protein_coding -49209,Vmn2r116,ENSMUSG00000090966,protein_coding -23775,Vmn2r61,ENSMUSG00000090967,protein_coding -18619,Olfr718-ps1,ENSMUSG00000090981,protein_coding -6814,Gm20458,ENSMUSG00000090996,protein_coding -26455,Tcerg1l,ENSMUSG00000091002,protein_coding -25020,Vmn2r69,ENSMUSG00000091006,protein_coding -20028,Vmn1r34,ENSMUSG00000091012,protein_coding -3200,Vmn1r1,ENSMUSG00000091013,protein_coding -54584,Gm5244,ENSMUSG00000091014,protein_coding -3352,Fam71a,ENSMUSG00000091017,protein_coding -31950,Gm3115,ENSMUSG00000091022,protein_coding -34441,Gm10722,ENSMUSG00000091028,protein_coding -17472,Gm17660,ENSMUSG00000091034,protein_coding -48551,Krtap20-2,ENSMUSG00000091039,protein_coding -31487,Gm17720,ENSMUSG00000091041,protein_coding -50207,Glyatl3,ENSMUSG00000091043,protein_coding -22480,Vmn2r55,ENSMUSG00000091045,protein_coding -16065,Gm1979,ENSMUSG00000091049,protein_coding -53865,Siglec15,ENSMUSG00000091055,protein_coding -17621,Vmn2r14,ENSMUSG00000091059,protein_coding -2709,Myocos,ENSMUSG00000091060,protein_coding -49208,Vmn2r115,ENSMUSG00000091076,protein_coding -51284,Gm21292,ENSMUSG00000091077,protein_coding -36551,Prr23a1,ENSMUSG00000091080,protein_coding -10210,Kcnmb3,ENSMUSG00000091091,protein_coding -17529,Gm17304,ENSMUSG00000091096,protein_coding -28700,Gm4302,ENSMUSG00000091101,protein_coding -42401,Gm17622,ENSMUSG00000091109,protein_coding -32672,Gm2832,ENSMUSG00000091110,protein_coding -31956,Gm3138,ENSMUSG00000091114,protein_coding -45298,Ccdc152,ENSMUSG00000091119,protein_coding -32848,Gm8247,ENSMUSG00000091122,protein_coding -36755,Iqcf6,ENSMUSG00000091129,protein_coding -32676,Gm7945,ENSMUSG00000091131,protein_coding -32729,Gm17124,ENSMUSG00000091140,protein_coding -33112,Gm17175,ENSMUSG00000091142,protein_coding -33607,Phf11c,ENSMUSG00000091144,protein_coding -31965,Gm3182,ENSMUSG00000091148,protein_coding -49099,Vmn1r224,ENSMUSG00000091151,protein_coding -33679,Serpine3,ENSMUSG00000091155,protein_coding -34851,Gm17545,ENSMUSG00000091159,protein_coding -35037,Pate10,ENSMUSG00000091174,protein_coding -11267,9130204L05Rik,ENSMUSG00000091175,protein_coding -41751,Gm5141,ENSMUSG00000091183,protein_coding -31983,Gm3278,ENSMUSG00000091185,protein_coding -37748,Gm17332,ENSMUSG00000091195,protein_coding -17000,Gm7271,ENSMUSG00000091204,protein_coding -25026,Vmn2r71,ENSMUSG00000091205,protein_coding -49057,Vmn2r91,ENSMUSG00000091206,protein_coding -48557,Krtap11-1,ENSMUSG00000091212,protein_coding -35036,Pate1,ENSMUSG00000091215,protein_coding -32077,Gm3755,ENSMUSG00000091227,protein_coding -39288,Gm20390,ENSMUSG00000091228,protein_coding -25047,Vmn2r76,ENSMUSG00000091239,protein_coding -48302,Vgll3,ENSMUSG00000091243,protein_coding -35059,Pate8,ENSMUSG00000091248,protein_coding -15855,Speer4e,ENSMUSG00000091255,protein_coding -49105,Vmn2r110,ENSMUSG00000091259,protein_coding -21300,Vmn2r19,ENSMUSG00000091260,protein_coding -41248,Smim13,ENSMUSG00000091264,protein_coding -32025,Gm3248,ENSMUSG00000091275,protein_coding -33118,Gm4181,ENSMUSG00000091296,protein_coding -14550,Gm8439,ENSMUSG00000091297,protein_coding -33419,Gm17606,ENSMUSG00000091306,protein_coding -41642,Spata31d1b,ENSMUSG00000091311,protein_coding -3705,Gm17490,ENSMUSG00000091312,protein_coding -418,Gm5415,ENSMUSG00000091318,protein_coding -5978,Eid1,ENSMUSG00000091337,protein_coding -36721,Col6a5,ENSMUSG00000091345,protein_coding -41892,Gm10772,ENSMUSG00000091347,protein_coding -49058,Vmn2r92,ENSMUSG00000091350,protein_coding -17623,Vmn2r15,ENSMUSG00000091375,protein_coding -10689,Aadacl2,ENSMUSG00000091376,protein_coding -28101,Vmn2r83,ENSMUSG00000091381,protein_coding -19886,Vmn1r18,ENSMUSG00000091382,protein_coding -42413,Gcnt4,ENSMUSG00000091387,protein_coding -32059,Gm6356,ENSMUSG00000091400,protein_coding -44919,Rd3l,ENSMUSG00000091402,protein_coding -11536,Hist2h4,ENSMUSG00000091405,protein_coding -49212,Vmn2r117,ENSMUSG00000091407,protein_coding -35047,Gm5916,ENSMUSG00000091411,protein_coding -27368,Ak9,ENSMUSG00000091415,protein_coding -31962,Gm3164,ENSMUSG00000091418,protein_coding -32844,Gm17093,ENSMUSG00000091429,protein_coding -22888,Vmn1r115,ENSMUSG00000091435,protein_coding -26558,Gm17387,ENSMUSG00000091441,protein_coding -17618,Vmn2r11,ENSMUSG00000091450,protein_coding -28805,Otogl,ENSMUSG00000091455,protein_coding -28099,Vmn2r82,ENSMUSG00000091468,protein_coding -55158,Gm20538,ENSMUSG00000091471,protein_coding -32071,Gm3739,ENSMUSG00000091472,protein_coding -23758,2610021A01Rik,ENSMUSG00000091474,protein_coding -3102,Catspere2,ENSMUSG00000091476,protein_coding -32774,Gm5799,ENSMUSG00000091477,protein_coding -30186,Triml2,ENSMUSG00000091490,protein_coding -49068,Vmn2r97,ENSMUSG00000091491,protein_coding -31980,Gm3269,ENSMUSG00000091494,protein_coding -8606,Gm44,ENSMUSG00000091497,protein_coding -50481,Vmn2r118,ENSMUSG00000091504,protein_coding -29360,Vmn2r87,ENSMUSG00000091511,protein_coding -12435,Lamtor3,ENSMUSG00000091512,protein_coding -53842,Skor2,ENSMUSG00000091519,protein_coding -23801,Vmn2r126,ENSMUSG00000091528,protein_coding -48773,Cldn20,ENSMUSG00000091530,protein_coding -50076,Olfr102,ENSMUSG00000091531,protein_coding -36901,Tma7,ENSMUSG00000091537,protein_coding -52660,Vmn1r238,ENSMUSG00000091539,protein_coding -19874,Vmn1r9,ENSMUSG00000091541,protein_coding -7760,Gm14569,ENSMUSG00000091556,protein_coding -53002,Gm6665,ENSMUSG00000091561,protein_coding -31951,Gm8108,ENSMUSG00000091563,protein_coding -32029,Gm8206,ENSMUSG00000091568,protein_coding -32839,Gm8232,ENSMUSG00000091569,protein_coding -10741,Vmn2r3,ENSMUSG00000091572,protein_coding -32792,Gm8180,ENSMUSG00000091584,protein_coding -55494,AC140365.1,ENSMUSG00000091585,protein_coding -49687,Cyp4f17,ENSMUSG00000091586,protein_coding -23791,Gm17067,ENSMUSG00000091594,protein_coding -50058,Olfr93,ENSMUSG00000091601,protein_coding -46931,Gm17349,ENSMUSG00000091604,protein_coding -49581,Gm17657,ENSMUSG00000091614,protein_coding -32078,Gm3752,ENSMUSG00000091617,protein_coding -26742,H60c,ENSMUSG00000091618,protein_coding -21310,Vmn2r23,ENSMUSG00000091620,protein_coding -17609,Vmn2r9,ENSMUSG00000091624,protein_coding -19861,Lsm5,ENSMUSG00000091625,protein_coding -49061,Vmn2r95,ENSMUSG00000091631,protein_coding -17620,Vmn2r13,ENSMUSG00000091635,protein_coding -50700,Akain1,ENSMUSG00000091636,protein_coding -22886,Vmn1r113,ENSMUSG00000091638,protein_coding -11409,C2cd4d,ENSMUSG00000091648,protein_coding -33605,Phf11b,ENSMUSG00000091649,protein_coding -46310,Apol11a,ENSMUSG00000091650,protein_coding -22272,Vmn2r36,ENSMUSG00000091651,protein_coding -22166,Vmn1r57,ENSMUSG00000091652,protein_coding -49094,Vmn2r106,ENSMUSG00000091656,protein_coding -32687,Gm3072,ENSMUSG00000091657,protein_coding -22351,Vmn1r69,ENSMUSG00000091662,protein_coding -49093,Vmn2r105,ENSMUSG00000091670,protein_coding -32073,Gm8374,ENSMUSG00000091676,protein_coding -49064,Vmn2r96,ENSMUSG00000091679,protein_coding -46714,Klhdc7b,ENSMUSG00000091680,protein_coding -10935,Gm17359,ENSMUSG00000091685,protein_coding -23722,4930433I11Rik,ENSMUSG00000091692,protein_coding -46316,Apol11b,ENSMUSG00000091694,protein_coding -32789,Gm6526,ENSMUSG00000091698,protein_coding -31975,Gm7876,ENSMUSG00000091700,protein_coding -49912,H2-Q2,ENSMUSG00000091705,protein_coding -47223,Sec14l5,ENSMUSG00000091712,protein_coding -32786,Gm16506,ENSMUSG00000091718,protein_coding -12333,Gimd1,ENSMUSG00000091721,protein_coding -33941,Siah3,ENSMUSG00000091722,protein_coding -32832,Gm8220,ENSMUSG00000091725,protein_coding -27656,Gm17542,ENSMUSG00000091731,protein_coding -32750,Gm8094,ENSMUSG00000091733,protein_coding -19924,Vmn1r29,ENSMUSG00000091734,protein_coding -36747,Gpr62,ENSMUSG00000091735,protein_coding -9700,Yy2,ENSMUSG00000091736,protein_coding -32692,Gm7929,ENSMUSG00000091740,protein_coding -32054,Gm3636,ENSMUSG00000091754,protein_coding -31945,Gm3095,ENSMUSG00000091756,protein_coding -30560,Zfp964,ENSMUSG00000091764,protein_coding -41528,Gm17617,ENSMUSG00000091768,protein_coding -49082,Vmn2r103,ENSMUSG00000091771,protein_coding -28752,Gm21293,ENSMUSG00000091779,protein_coding -46711,Sco2,ENSMUSG00000091780,protein_coding -32709,Gm3573,ENSMUSG00000091792,protein_coding -18877,Vmn2r18,ENSMUSG00000091794,protein_coding -44105,Cox16,ENSMUSG00000091803,protein_coding -49102,Vmn2r108,ENSMUSG00000091805,protein_coding -32175,Olfr721-ps1,ENSMUSG00000091809,protein_coding -22659,Inafm1,ENSMUSG00000091811,protein_coding -31332,Ces2h,ENSMUSG00000091813,protein_coding -32053,Gm8050,ENSMUSG00000091814,protein_coding -15887,Speer4f2,ENSMUSG00000091827,protein_coding -50736,Gm4707,ENSMUSG00000091831,protein_coding -669,Gm8251,ENSMUSG00000091844,protein_coding -49078,Vmn2r100,ENSMUSG00000091859,protein_coding -9607,Cldn34a,ENSMUSG00000091863,protein_coding -23170,Cyp2a22,ENSMUSG00000091867,protein_coding -33040,Olfr732,ENSMUSG00000091873,protein_coding -22165,Vmn1r56,ENSMUSG00000091874,protein_coding -17626,Vmn2r17,ENSMUSG00000091879,protein_coding -32031,Gm6337,ENSMUSG00000091882,protein_coding -28097,Vmn2r80,ENSMUSG00000091888,protein_coding -53106,Ube2d2a,ENSMUSG00000091896,protein_coding -15857,Gm17019,ENSMUSG00000091897,protein_coding -32493,Tnnc1,ENSMUSG00000091898,protein_coding -15797,Gm6460,ENSMUSG00000091903,protein_coding -32855,Gm8267,ENSMUSG00000091923,protein_coding -23162,Vmn1r185,ENSMUSG00000091924,protein_coding -23794,Vmn2r62,ENSMUSG00000091926,protein_coding -22335,Vmn2r52,ENSMUSG00000091930,protein_coding -44572,Gon7,ENSMUSG00000091931,protein_coding -15774,Gm8857,ENSMUSG00000091933,protein_coding -601,Gm3646,ENSMUSG00000091937,protein_coding -49207,Vmn2r114,ENSMUSG00000091945,protein_coding -23080,Gm9844,ENSMUSG00000091955,protein_coding -35941,C2cd4b,ENSMUSG00000091956,protein_coding -25074,Vmn2r78,ENSMUSG00000091962,protein_coding -49862,Hspa1a,ENSMUSG00000091971,protein_coding -47712,Tmem207,ENSMUSG00000091972,protein_coding -19540,Olfr457,ENSMUSG00000091983,protein_coding -51139,Gm10352,ENSMUSG00000091987,protein_coding -35444,BC049352,ENSMUSG00000091996,protein_coding -21094,Gm17482,ENSMUSG00000092004,protein_coding -55040,Cyp2c69,ENSMUSG00000092008,protein_coding -48109,Myh15,ENSMUSG00000092009,protein_coding -44292,Gm4027,ENSMUSG00000092019,protein_coding -23768,Vmn2r59,ENSMUSG00000092032,protein_coding -18927,Peg10,ENSMUSG00000092035,protein_coding -32259,Gm2244,ENSMUSG00000092036,protein_coding -50197,Esp6,ENSMUSG00000092043,protein_coding -21571,Gm17631,ENSMUSG00000092047,protein_coding -29357,Vmn2r85,ENSMUSG00000092048,protein_coding -10745,Vmn2r4,ENSMUSG00000092049,protein_coding -16824,Bend4,ENSMUSG00000092060,protein_coding -17296,Gm6205,ENSMUSG00000092073,protein_coding -48808,Dynlt1a,ENSMUSG00000092074,protein_coding -50075,Olfr101,ENSMUSG00000092077,protein_coding -17624,Vmn2r16,ENSMUSG00000092080,protein_coding -188,Kcnb2,ENSMUSG00000092083,protein_coding -15707,Zfp804b,ENSMUSG00000092094,protein_coding -34345,Gm9376,ENSMUSG00000092109,protein_coding -49196,Vmn2r113,ENSMUSG00000092111,protein_coding -42449,Gm10320,ENSMUSG00000092116,protein_coding -24201,Fancf,ENSMUSG00000092118,protein_coding -49039,Vmn2r90,ENSMUSG00000092120,protein_coding -6596,Gm17581,ENSMUSG00000092123,protein_coding -52995,B930094E09Rik,ENSMUSG00000092124,protein_coding -32677,Gm6482,ENSMUSG00000092142,protein_coding -32720,Gm8005,ENSMUSG00000092148,protein_coding -32712,Gm17026,ENSMUSG00000092152,protein_coding -39030,Gm17268,ENSMUSG00000092157,protein_coding -29358,Vmn2r86,ENSMUSG00000092162,protein_coding -21819,Rergl,ENSMUSG00000092164,protein_coding -32847,Gm5624,ENSMUSG00000092165,protein_coding -17305,Gm7942,ENSMUSG00000092166,protein_coding -32064,Gm3696,ENSMUSG00000092167,protein_coding -36056,Dnaaf4,ENSMUSG00000092192,protein_coding -28910,A930009A15Rik,ENSMUSG00000092210,protein_coding -22840,Gm19345,ENSMUSG00000092216,protein_coding -23795,Gm2381,ENSMUSG00000092225,protein_coding -33433,Gm20521,ENSMUSG00000092232,protein_coding -43637,Gm20403,ENSMUSG00000092233,protein_coding -49975,Gm7030,ENSMUSG00000092243,protein_coding -50139,Esp34,ENSMUSG00000092244,protein_coding -30562,Zfp963,ENSMUSG00000092260,protein_coding -49978,Gm19684,ENSMUSG00000092277,protein_coding -50093,Olfr113,ENSMUSG00000092292,protein_coding -22983,Gm4214,ENSMUSG00000092297,protein_coding -43399,Prps1l1,ENSMUSG00000092305,protein_coding -36069,Gm20509,ENSMUSG00000092310,protein_coding -50135,Esp36,ENSMUSG00000092322,protein_coding -31629,Gm20388,ENSMUSG00000092329,protein_coding -23787,Zfp977,ENSMUSG00000092335,protein_coding -50142,Esp31,ENSMUSG00000092342,protein_coding -13590,Gm20503,ENSMUSG00000092345,protein_coding -49753,Platr17,ENSMUSG00000092349,protein_coding -4030,Gm20532,ENSMUSG00000092356,protein_coding -27942,Gm20441,ENSMUSG00000092360,protein_coding -23195,Gm20479,ENSMUSG00000092367,protein_coding -50096,Olfr115,ENSMUSG00000092413,protein_coding -23782,Zfp141,ENSMUSG00000092416,protein_coding -49883,Gpank1,ENSMUSG00000092417,protein_coding -23028,V1rd19,ENSMUSG00000092456,protein_coding -8770,Gm20489,ENSMUSG00000092463,protein_coding -47464,Gm20518,ENSMUSG00000092470,protein_coding -23025,Vmn1r180,ENSMUSG00000092473,protein_coding -49850,Gm20547,ENSMUSG00000092511,protein_coding -22129,Fam71e2,ENSMUSG00000092518,protein_coding -49731,Actl9,ENSMUSG00000092519,protein_coding -26175,Pagr1b,ENSMUSG00000092534,protein_coding -36350,Gm20537,ENSMUSG00000092541,protein_coding -30563,Gm20422,ENSMUSG00000092544,protein_coding -50340,Med20,ENSMUSG00000092558,protein_coding -22180,Vmn1r62,ENSMUSG00000092579,protein_coding -49875,Ly6g6c,ENSMUSG00000092586,protein_coding -23751,Gm20449,ENSMUSG00000092592,protein_coding -11456,Scnm1,ENSMUSG00000092607,protein_coding -49863,Gm20481,ENSMUSG00000092609,protein_coding -49815,Btnl6,ENSMUSG00000092618,protein_coding -36068,Khdc3,ENSMUSG00000092622,protein_coding -19906,Vmn1r23,ENSMUSG00000093376,protein_coding -18459,Lrch4,ENSMUSG00000093445,protein_coding -36825,Gm20662,ENSMUSG00000093456,protein_coding -40137,Gm20708,ENSMUSG00000093485,protein_coding -22209,Smim17,ENSMUSG00000093536,protein_coding -46965,Higd1c,ENSMUSG00000093550,protein_coding -16234,Gm20671,ENSMUSG00000093574,protein_coding -47175,Gm20695,ENSMUSG00000093575,protein_coding -49390,Gm20683,ENSMUSG00000093593,protein_coding -46191,Ly6l,ENSMUSG00000093626,protein_coding -23815,Lim2,ENSMUSG00000093639,protein_coding -20885,Eif4e3,ENSMUSG00000093661,protein_coding -42167,Pou5f2,ENSMUSG00000093668,protein_coding -29242,Rpl41,ENSMUSG00000093674,protein_coding -19872,Vmn1r7,ENSMUSG00000093696,protein_coding -48606,Gm21970,ENSMUSG00000093701,protein_coding -6940,Gm20716,ENSMUSG00000093752,protein_coding -20037,Vmn1r39,ENSMUSG00000093755,protein_coding -11534,Hist2h3c1,ENSMUSG00000093769,protein_coding -46963,Methig1,ENSMUSG00000093789,protein_coding -37292,Ppp2r3d,ENSMUSG00000093803,protein_coding -5695,Olfr1303,ENSMUSG00000093804,protein_coding -1694,Gal3st2b,ENSMUSG00000093805,protein_coding -9870,Asmt,ENSMUSG00000093806,protein_coding -21304,Vmn2r21,ENSMUSG00000093820,protein_coding -33029,Olfr723,ENSMUSG00000093825,protein_coding -55544,Ccl27,ENSMUSG00000093828,protein_coding -32684,Gm7970,ENSMUSG00000093833,protein_coding -39218,Olfr462,ENSMUSG00000093839,protein_coding -44360,Gm5039,ENSMUSG00000093847,protein_coding -51470,Gm20865,ENSMUSG00000093848,protein_coding -22959,Vmn1r151,ENSMUSG00000093853,protein_coding -32679,Gm7954,ENSMUSG00000093863,protein_coding -12282,Lrit3,ENSMUSG00000093865,protein_coding -29318,Olfr802,ENSMUSG00000093866,protein_coding -51530,Gm20809,ENSMUSG00000093868,protein_coding -22953,Gm4187,ENSMUSG00000093871,protein_coding -22219,Olfr1348,ENSMUSG00000093877,protein_coding -51363,Gm21866,ENSMUSG00000093883,protein_coding -49723,Olfr239,ENSMUSG00000093884,protein_coding -31941,Gm3033,ENSMUSG00000093887,protein_coding -22908,Vmn1r127,ENSMUSG00000093890,protein_coding -51812,Gm21865,ENSMUSG00000093895,protein_coding -32048,Gm3629,ENSMUSG00000093898,protein_coding -35130,Olfr893,ENSMUSG00000093901,protein_coding -51357,Gm21454,ENSMUSG00000093903,protein_coding -31881,Tomm20,ENSMUSG00000093904,protein_coding -18630,Zfp853,ENSMUSG00000093910,protein_coding -22955,Gm8660,ENSMUSG00000093917,protein_coding -51142,Gm21677,ENSMUSG00000093918,protein_coding -39219,Olfr463,ENSMUSG00000093920,protein_coding -51004,Rpl36-ps4,ENSMUSG00000093922,protein_coding -7636,Gm5935,ENSMUSG00000093923,protein_coding -32707,Gm17027,ENSMUSG00000093926,protein_coding -51180,Gm20918,ENSMUSG00000093927,protein_coding -42780,Hmgcs1,ENSMUSG00000093930,protein_coding -11996,Amy2a3,ENSMUSG00000093931,protein_coding -35235,Olfr971,ENSMUSG00000093934,protein_coding -38985,Evi2b,ENSMUSG00000093938,protein_coding -22918,Vmn1r131,ENSMUSG00000093941,protein_coding -26525,Olfr46,ENSMUSG00000093942,protein_coding -32700,Gm3543,ENSMUSG00000093945,protein_coding -32733,Gm10378,ENSMUSG00000093948,protein_coding -52145,Gm20823,ENSMUSG00000093950,protein_coding -33826,Gm16867,ENSMUSG00000093954,protein_coding -50200,Esp3,ENSMUSG00000093957,protein_coding -13777,Gm11756,ENSMUSG00000093962,protein_coding -32782,Gm8165,ENSMUSG00000093968,protein_coding -24116,Mrgpra2a,ENSMUSG00000093973,protein_coding -32260,Gm2237,ENSMUSG00000093979,protein_coding -25751,Olfr493,ENSMUSG00000093980,protein_coding -32061,Gm10406,ENSMUSG00000093985,protein_coding -22896,Vmn1r120,ENSMUSG00000093986,protein_coding -51146,Gm21704,ENSMUSG00000093987,protein_coding -38626,Rnasek,ENSMUSG00000093989,protein_coding -52197,Gm20736,ENSMUSG00000093993,protein_coding -13265,Fam205a3,ENSMUSG00000093996,protein_coding -22899,Vmn1r122,ENSMUSG00000094001,protein_coding -9260,Gm5128,ENSMUSG00000094004,protein_coding -22894,Vmn1r119,ENSMUSG00000094010,protein_coding -22858,Vmn1r94,ENSMUSG00000094011,protein_coding -28012,Gm10024,ENSMUSG00000094012,protein_coding -11247,S100a2,ENSMUSG00000094018,protein_coding -32062,Gm3685,ENSMUSG00000094021,protein_coding -50163,Esp18,ENSMUSG00000094024,protein_coding -15779,Gm8879,ENSMUSG00000094025,protein_coding -42807,Gm21762,ENSMUSG00000094027,protein_coding -48448,Gm21833,ENSMUSG00000094030,protein_coding -15065,Ldlrad2,ENSMUSG00000094035,protein_coding -15806,Gm6465,ENSMUSG00000094036,protein_coding -17315,Gm7971,ENSMUSG00000094043,protein_coding -52291,Gm21118,ENSMUSG00000094052,protein_coding -23451,Scgb2b7,ENSMUSG00000094053,protein_coding -25526,Olfr638,ENSMUSG00000094063,protein_coding -13238,Gm21541,ENSMUSG00000094065,protein_coding -13269,Fam205a2,ENSMUSG00000094066,protein_coding -28282,Gm4767,ENSMUSG00000094076,protein_coding -28090,Olfr8,ENSMUSG00000094080,protein_coding -51242,Gm20826,ENSMUSG00000094081,protein_coding -48834,Gm1604b,ENSMUSG00000094083,protein_coding -11366,Tdpoz1,ENSMUSG00000094084,protein_coding -22887,Vmn1r114,ENSMUSG00000094085,protein_coding -39295,Gm21885,ENSMUSG00000094091,protein_coding -22287,Vmn2r44,ENSMUSG00000094098,protein_coding -43647,1700047I17Rik2,ENSMUSG00000094103,protein_coding -15800,Gm8922,ENSMUSG00000094105,protein_coding -22324,Vmn2r47,ENSMUSG00000094107,protein_coding -45781,9330182O14Rik,ENSMUSG00000094112,protein_coding -21631,5430401F13Rik,ENSMUSG00000094113,protein_coding -13767,Gm11237,ENSMUSG00000094116,protein_coding -25482,Olfr612,ENSMUSG00000094119,protein_coding -28021,Gm3233,ENSMUSG00000094120,protein_coding -55511,Ccl21c,ENSMUSG00000094121,protein_coding -4980,Gm13698,ENSMUSG00000094125,protein_coding -1427,G530012D18Rik,ENSMUSG00000094127,protein_coding -31981,Gm3264,ENSMUSG00000094132,protein_coding -54549,Olfr1431,ENSMUSG00000094133,protein_coding -33162,Olfr1512,ENSMUSG00000094140,protein_coding -28756,Gm21312,ENSMUSG00000094144,protein_coding -21301,Vmn2r20,ENSMUSG00000094145,protein_coding -28013,Gm10142,ENSMUSG00000094146,protein_coding -22948,Gm8453,ENSMUSG00000094149,protein_coding -32758,Gm7233,ENSMUSG00000094151,protein_coding -23991,Slc6a16,ENSMUSG00000094152,protein_coding -22559,Sult2a7,ENSMUSG00000094156,protein_coding -32716,Gm7995,ENSMUSG00000094157,protein_coding -51416,Gm21904,ENSMUSG00000094161,protein_coding -11381,Tdpoz5,ENSMUSG00000094163,protein_coding -50155,Gm21903,ENSMUSG00000094168,protein_coding -55492,AC163611.1,ENSMUSG00000094172,protein_coding -51862,Gm21739,ENSMUSG00000094181,protein_coding -35188,Olfr937,ENSMUSG00000094182,protein_coding -28309,Gm1553,ENSMUSG00000094186,protein_coding -23010,Vmn1r170,ENSMUSG00000094187,protein_coding -19554,Olfr452,ENSMUSG00000094192,protein_coding -17295,Gm6509,ENSMUSG00000094195,protein_coding -9659,Magea3,ENSMUSG00000094196,protein_coding -25726,Olfr474,ENSMUSG00000094197,protein_coding -19571,Olfr237-ps1,ENSMUSG00000094200,protein_coding -35269,Gm21915,ENSMUSG00000094204,protein_coding -15803,Gm8926,ENSMUSG00000094205,protein_coding -22916,Vmn1r130,ENSMUSG00000094208,protein_coding -55243,Calhm3,ENSMUSG00000094219,protein_coding -22905,Vmn1r124,ENSMUSG00000094221,protein_coding -51372,Gm21767,ENSMUSG00000094227,protein_coding -42786,AF067063,ENSMUSG00000094237,protein_coding -40704,Hist1h2ao,ENSMUSG00000094248,protein_coding -35232,Olfr969,ENSMUSG00000094254,protein_coding -31972,Gm8159,ENSMUSG00000094258,protein_coding -4348,Olfr340,ENSMUSG00000094266,protein_coding -35208,Olfr951,ENSMUSG00000094269,protein_coding -13924,Gm13290,ENSMUSG00000094271,protein_coding -7284,Btbd35f17,ENSMUSG00000094273,protein_coding -17965,Cfap73,ENSMUSG00000094282,protein_coding -22885,Vmn1r112,ENSMUSG00000094284,protein_coding -33052,Olfr743,ENSMUSG00000094285,protein_coding -13246,Gm3893,ENSMUSG00000094293,protein_coding -51469,Gm20909,ENSMUSG00000094294,protein_coding -29337,Olfr819,ENSMUSG00000094295,protein_coding -46044,Gm21798,ENSMUSG00000094296,protein_coding -22861,Gm6164,ENSMUSG00000094298,protein_coding -55533,AC165294.1,ENSMUSG00000094303,protein_coding -23522,Scgb2b20,ENSMUSG00000094305,protein_coding -7278,Btbd35f28,ENSMUSG00000094307,protein_coding -27306,Rfpl4b,ENSMUSG00000094311,protein_coding -28699,Gm4301,ENSMUSG00000094314,protein_coding -51419,Gm20831,ENSMUSG00000094325,protein_coding -11387,Gm9125,ENSMUSG00000094328,protein_coding -4981,Gm13693,ENSMUSG00000094336,protein_coding -55541,Gm3286,ENSMUSG00000094337,protein_coding -40685,Hist1h2bl,ENSMUSG00000094338,protein_coding -29299,Olfr784,ENSMUSG00000094347,protein_coding -32745,Gm8082,ENSMUSG00000094349,protein_coding -55548,Gm10931,ENSMUSG00000094350,protein_coding -35229,Olfr150,ENSMUSG00000094353,protein_coding -51815,Gm21882,ENSMUSG00000094354,protein_coding -29788,Defa33,ENSMUSG00000094362,protein_coding -32016,Gm3373,ENSMUSG00000094370,protein_coding -9540,Gm15097,ENSMUSG00000094378,protein_coding -40768,Vmn1r202,ENSMUSG00000094379,protein_coding -35144,Olfr904,ENSMUSG00000094380,protein_coding -55488,AC123873.1,ENSMUSG00000094383,protein_coding -22900,Vmn1r123,ENSMUSG00000094385,protein_coding -7695,Btbd35f29,ENSMUSG00000094391,protein_coding -49056,Vmn2r124,ENSMUSG00000094396,protein_coding -52596,Gm21477,ENSMUSG00000094399,protein_coding -52393,Gm21776,ENSMUSG00000094404,protein_coding -2183,Gm38394,ENSMUSG00000094410,protein_coding -48246,Olfr205,ENSMUSG00000094422,protein_coding -25730,Olfr478,ENSMUSG00000094426,protein_coding -1889,Gm19965,ENSMUSG00000094429,protein_coding -47829,Smbd1,ENSMUSG00000094430,protein_coding -15122,Gm21969,ENSMUSG00000094439,protein_coding -49725,Zfp955a,ENSMUSG00000094441,protein_coding -30462,Sgo2b,ENSMUSG00000094443,protein_coding -24204,1700015G11Rik,ENSMUSG00000094445,protein_coding -45644,9430069I07Rik,ENSMUSG00000094447,protein_coding -35236,Olfr972,ENSMUSG00000094449,protein_coding -32037,Gm21560,ENSMUSG00000094460,protein_coding -35119,Olfr883,ENSMUSG00000094461,protein_coding -23786,Gm21028,ENSMUSG00000094462,protein_coding -4347,Olfr339,ENSMUSG00000094464,protein_coding -47378,Gm21897,ENSMUSG00000094472,protein_coding -55489,AC123873.2,ENSMUSG00000094474,protein_coding -7081,Gm11007,ENSMUSG00000094475,protein_coding -37427,Purb,ENSMUSG00000094483,protein_coding -51230,Gm21244,ENSMUSG00000094484,protein_coding -10579,Gm21958,ENSMUSG00000094487,protein_coding -38776,Olfr393,ENSMUSG00000094488,protein_coding -25676,Olfr700,ENSMUSG00000094493,protein_coding -29307,Olfr792,ENSMUSG00000094496,protein_coding -30044,Smim18,ENSMUSG00000094500,protein_coding -18530,Gm5294,ENSMUSG00000094504,protein_coding -52364,Gm21913,ENSMUSG00000094507,protein_coding -51144,Gm21693,ENSMUSG00000094511,protein_coding -55510,AC133103.3,ENSMUSG00000094514,protein_coding -25522,Olfr635,ENSMUSG00000094520,protein_coding -25595,Olfr671,ENSMUSG00000094531,protein_coding -22984,Gm4216,ENSMUSG00000094532,protein_coding -34689,Olfr850,ENSMUSG00000094535,protein_coding -48232,Olfr191,ENSMUSG00000094539,protein_coding -22957,Vmn1r149,ENSMUSG00000094542,protein_coding -32826,Gm8212,ENSMUSG00000094543,protein_coding -22871,Vmn1r101,ENSMUSG00000094545,protein_coding -20740,Vmn1r50,ENSMUSG00000094553,protein_coding -51302,Gm20821,ENSMUSG00000094556,protein_coding -7258,Btbd35f11,ENSMUSG00000094558,protein_coding -46555,Cyp2d34,ENSMUSG00000094559,protein_coding -17288,A430089I19Rik,ENSMUSG00000094560,protein_coding -52035,Gm20931,ENSMUSG00000094570,protein_coding -51781,Gm20738,ENSMUSG00000094575,protein_coding -55512,AC087559.3,ENSMUSG00000094576,protein_coding -9638,4921511M17Rik,ENSMUSG00000094577,protein_coding -32005,Gm3187,ENSMUSG00000094578,protein_coding -54483,Ms4a18,ENSMUSG00000094584,protein_coding -20725,Vmn1r41,ENSMUSG00000094586,protein_coding -35137,Olfr898,ENSMUSG00000094588,protein_coding -22891,Vmn1r118,ENSMUSG00000094589,protein_coding -32050,Gm10251,ENSMUSG00000094590,protein_coding -9642,Gm15140,ENSMUSG00000094592,protein_coding -12873,Fsbp,ENSMUSG00000094595,protein_coding -7279,Btbd35f20,ENSMUSG00000094596,protein_coding -7709,Gm5926,ENSMUSG00000094601,protein_coding -23009,Vmn1r169,ENSMUSG00000094602,protein_coding -22332,Vmn2r50,ENSMUSG00000094606,protein_coding -25749,Olfr491,ENSMUSG00000094612,protein_coding -11874,A630076J17Rik,ENSMUSG00000094613,protein_coding -52297,Gm20888,ENSMUSG00000094616,protein_coding -13917,Gm13271,ENSMUSG00000094618,protein_coding -28280,Gm3055,ENSMUSG00000094622,protein_coding -7669,Gm4836,ENSMUSG00000094624,protein_coding -21210,Tmem121b,ENSMUSG00000094626,protein_coding -31976,Gm3252,ENSMUSG00000094628,protein_coding -29339,Gm10310,ENSMUSG00000094632,protein_coding -32033,Gm3468,ENSMUSG00000094634,protein_coding -40771,Vmn1r204,ENSMUSG00000094637,protein_coding -1453,Gm21972,ENSMUSG00000094638,protein_coding -52418,Gm21650,ENSMUSG00000094647,protein_coding -13937,Gm13287,ENSMUSG00000094648,protein_coding -55442,Gm7102,ENSMUSG00000094649,protein_coding -1692,Gal3st2,ENSMUSG00000094651,protein_coding -51135,Rbmy,ENSMUSG00000094658,protein_coding -52542,Gm21394,ENSMUSG00000094660,protein_coding -55546,Ccl19,ENSMUSG00000094661,protein_coding -29789,Defa36,ENSMUSG00000094662,protein_coding -19568,Olfr441,ENSMUSG00000094669,protein_coding -21314,Vmn2r25,ENSMUSG00000094672,protein_coding -28089,Olfr1354,ENSMUSG00000094673,protein_coding -34699,Olfr857,ENSMUSG00000094678,protein_coding -51285,Gm21721,ENSMUSG00000094679,protein_coding -22898,Vmn1r121,ENSMUSG00000094680,protein_coding -22996,Gm6902,ENSMUSG00000094682,protein_coding -13279,Ccl21a,ENSMUSG00000094686,protein_coding -29762,Defa25,ENSMUSG00000094687,protein_coding -50270,1600014C23Rik,ENSMUSG00000094690,protein_coding -33046,Olfr738,ENSMUSG00000094692,protein_coding -13264,Gm21953,ENSMUSG00000094695,protein_coding -22974,Vmn1r158,ENSMUSG00000094700,protein_coding -35157,Olfr916,ENSMUSG00000094701,protein_coding -32082,Gm10338,ENSMUSG00000094706,protein_coding -8051,Gm16430,ENSMUSG00000094714,protein_coding -33116,Gm17078,ENSMUSG00000094715,protein_coding -16842,Gm5108,ENSMUSG00000094719,protein_coding -54604,Olfr1462,ENSMUSG00000094721,protein_coding -55550,AC125178.1,ENSMUSG00000094722,protein_coding -48832,Rnaset2b,ENSMUSG00000094724,protein_coding -55530,AC132444.2,ENSMUSG00000094728,protein_coding -51997,Gm20747,ENSMUSG00000094729,protein_coding -47901,Csta3,ENSMUSG00000094733,protein_coding -29315,Olfr799,ENSMUSG00000094734,protein_coding -22860,Vmn1r95,ENSMUSG00000094735,protein_coding -52443,Gm20806,ENSMUSG00000094739,protein_coding -55539,AC140325.2,ENSMUSG00000094741,protein_coding -35211,Olfr954,ENSMUSG00000094745,protein_coding -52453,Gm20916,ENSMUSG00000094746,protein_coding -5702,Olfr1307,ENSMUSG00000094747,protein_coding -22961,Gm8677,ENSMUSG00000094748,protein_coding -54595,Olfr1453,ENSMUSG00000094755,protein_coding -22862,Gm4498,ENSMUSG00000094757,protein_coding -7694,Gm10487,ENSMUSG00000094759,protein_coding -22872,Gm10670,ENSMUSG00000094762,protein_coding -4355,Olfr346,ENSMUSG00000094764,protein_coding -51290,Gm21812,ENSMUSG00000094773,protein_coding -40708,Hist1h2ap,ENSMUSG00000094777,protein_coding -35132,Olfr143,ENSMUSG00000094778,protein_coding -51896,Gm21256,ENSMUSG00000094782,protein_coding -32760,Gm8122,ENSMUSG00000094784,protein_coding -7133,Gm14403,ENSMUSG00000094786,protein_coding -51661,Gm28490,ENSMUSG00000094789,protein_coding -55490,AC126035.1,ENSMUSG00000094791,protein_coding -13631,Mup12,ENSMUSG00000094793,protein_coding -42810,BC147527,ENSMUSG00000094796,protein_coding -55559,AC125149.4,ENSMUSG00000094799,protein_coding -32388,Gm9780,ENSMUSG00000094800,protein_coding -9637,Samt1,ENSMUSG00000094802,protein_coding -32742,Gm21977,ENSMUSG00000094804,protein_coding -38255,Olfr311,ENSMUSG00000094805,protein_coding -46532,Cyp2d10,ENSMUSG00000094806,protein_coding -19432,1810009J06Rik,ENSMUSG00000094808,protein_coding -35147,Olfr907,ENSMUSG00000094810,protein_coding -32040,Gm21103,ENSMUSG00000094811,protein_coding -51323,Gm20777,ENSMUSG00000094813,protein_coding -45392,Gm21973,ENSMUSG00000094814,protein_coding -32392,Cphx3,ENSMUSG00000094817,protein_coding -29798,Defa32,ENSMUSG00000094818,protein_coding -26527,Olfr53,ENSMUSG00000094819,protein_coding -52142,Gm21518,ENSMUSG00000094821,protein_coding -25498,Olfr243,ENSMUSG00000094822,protein_coding -32003,Gm3194,ENSMUSG00000094825,protein_coding -55487,AC123873.3,ENSMUSG00000094836,protein_coding -51329,Gm20828,ENSMUSG00000094838,protein_coding -18429,Muc3a,ENSMUSG00000094840,protein_coding -38592,Tmem95,ENSMUSG00000094845,protein_coding -54634,Olfr1487,ENSMUSG00000094846,protein_coding -55498,AC133095.1,ENSMUSG00000094855,protein_coding -12422,Gm21962,ENSMUSG00000094856,protein_coding -5686,Olfr1297,ENSMUSG00000094858,protein_coding -7297,Btbd35f27,ENSMUSG00000094860,protein_coding -8614,Gm5072,ENSMUSG00000094861,protein_coding -42784,Zfp131,ENSMUSG00000094870,protein_coding -55527,AC132444.3,ENSMUSG00000094874,protein_coding -7262,Btbd35f18,ENSMUSG00000094876,protein_coding -50122,Olfr130,ENSMUSG00000094878,protein_coding -22910,Vmn1r129,ENSMUSG00000094879,protein_coding -51126,H2al2c,ENSMUSG00000094881,protein_coding -50080,Olfr105-ps,ENSMUSG00000094884,protein_coding -9525,Ott,ENSMUSG00000094885,protein_coding -55493,AC163611.2,ENSMUSG00000094887,protein_coding -49722,Olfr55,ENSMUSG00000094891,protein_coding -49079,Vmn2r101,ENSMUSG00000094892,protein_coding -35497,Gm4791,ENSMUSG00000094893,protein_coding -40764,Vmn1r201,ENSMUSG00000094898,protein_coding -7467,H2al1d,ENSMUSG00000094904,protein_coding -22967,Vmn1r155,ENSMUSG00000094905,protein_coding -44666,D430019H16Rik,ENSMUSG00000094910,protein_coding -51536,Gm21921,ENSMUSG00000094911,protein_coding -28028,Gm9507,ENSMUSG00000094913,protein_coding -55561,AC168977.2,ENSMUSG00000094915,protein_coding -41441,Gm8765,ENSMUSG00000094918,protein_coding -49185,Vmn2r112,ENSMUSG00000094921,protein_coding -32674,Gm5798,ENSMUSG00000094925,protein_coding -50359,1700122O11Rik,ENSMUSG00000094928,protein_coding -31997,Gm3526,ENSMUSG00000094929,protein_coding -22978,Vmn1r160,ENSMUSG00000094931,protein_coding -7084,Gm2007,ENSMUSG00000094932,protein_coding -22985,Vmn1r163,ENSMUSG00000094934,protein_coding -54151,Rbm4,ENSMUSG00000094936,protein_coding -51548,Gm21773,ENSMUSG00000094939,protein_coding -9530,Gm15093,ENSMUSG00000094941,protein_coding -41728,Gm3604,ENSMUSG00000094942,protein_coding -7792,Rhox4a2,ENSMUSG00000094945,protein_coding -25008,Vmn2r66,ENSMUSG00000094950,protein_coding -32722,Gm8011,ENSMUSG00000094954,protein_coding -14263,3110021N24Rik,ENSMUSG00000094958,protein_coding -10597,Gm21954,ENSMUSG00000094962,protein_coding -35197,Olfr943,ENSMUSG00000094970,protein_coding -21770,Gm8994,ENSMUSG00000094973,protein_coding -23523,Scgb1b20,ENSMUSG00000094978,protein_coding -22947,Gm8653,ENSMUSG00000094981,protein_coding -37233,Topaz1,ENSMUSG00000094985,protein_coding -54596,Olfr1454,ENSMUSG00000094986,protein_coding -36924,Fbxw25,ENSMUSG00000094992,protein_coding -35035,Pate3,ENSMUSG00000094995,protein_coding -29314,Olfr798,ENSMUSG00000095002,protein_coding -51692,Gm20838,ENSMUSG00000095011,protein_coding -32067,Gm16434,ENSMUSG00000095015,protein_coding -55560,AC234645.1,ENSMUSG00000095019,protein_coding -28757,Gm20765,ENSMUSG00000095022,protein_coding -32261,Gm5458,ENSMUSG00000095024,protein_coding -30617,Gm3336,ENSMUSG00000095026,protein_coding -10021,Sirpb1b,ENSMUSG00000095028,protein_coding -42798,Gm36079,ENSMUSG00000095029,protein_coding -33160,Olfr1513,ENSMUSG00000095030,protein_coding -51307,Gm21310,ENSMUSG00000095032,protein_coding -18726,1700001J03Rik,ENSMUSG00000095040,protein_coding -55563,AC149090.1,ENSMUSG00000095041,protein_coding -32699,Gm6401,ENSMUSG00000095044,protein_coding -13769,Gm11239,ENSMUSG00000095048,protein_coding -31961,Gm3159,ENSMUSG00000095056,protein_coding -24564,E030018B13Rik,ENSMUSG00000095061,protein_coding -7653,Slx,ENSMUSG00000095063,protein_coding -22929,Vmn1r135,ENSMUSG00000095064,protein_coding -29797,Defa20,ENSMUSG00000095066,protein_coding -42809,AF067061,ENSMUSG00000095071,protein_coding -17293,Gm3139,ENSMUSG00000095074,protein_coding -29297,Olfr782,ENSMUSG00000095075,protein_coding -55552,AC125178.2,ENSMUSG00000095076,protein_coding -22969,Gm8693,ENSMUSG00000095081,protein_coding -9543,Gm15091,ENSMUSG00000095082,protein_coding -55556,AC125149.5,ENSMUSG00000095092,protein_coding -49184,Vmn2r111,ENSMUSG00000095093,protein_coding -38762,Olfr385,ENSMUSG00000095095,protein_coding -54205,Ccdc85b,ENSMUSG00000095098,protein_coding -13936,Gm13285,ENSMUSG00000095101,protein_coding -50168,Esp15,ENSMUSG00000095104,protein_coding -40468,Edaradd,ENSMUSG00000095105,protein_coding -32732,Gm3633,ENSMUSG00000095113,protein_coding -25945,Itpripl2,ENSMUSG00000095115,protein_coding -40756,Vmn1r198,ENSMUSG00000095125,protein_coding -52045,Gm21858,ENSMUSG00000095135,protein_coding -29296,Olfr781,ENSMUSG00000095138,protein_coding -12998,Pou3f2,ENSMUSG00000095139,protein_coding -52112,Gm28891,ENSMUSG00000095141,protein_coding -11627,Hsd3b4,ENSMUSG00000095143,protein_coding -51214,Gm20873,ENSMUSG00000095148,protein_coding -52535,Gm21095,ENSMUSG00000095153,protein_coding -5669,Olfr1281,ENSMUSG00000095156,protein_coding -22938,Vmn1r138,ENSMUSG00000095163,protein_coding -27079,Taar7b,ENSMUSG00000095171,protein_coding -51394,Gm20822,ENSMUSG00000095172,protein_coding -7819,Rhox5,ENSMUSG00000095180,protein_coding -34446,Gm10718,ENSMUSG00000095186,protein_coding -37987,Olfr1381,ENSMUSG00000095187,protein_coding -54616,Olfr1472,ENSMUSG00000095189,protein_coding -22883,Gm10668,ENSMUSG00000095190,protein_coding -22930,Gm5725,ENSMUSG00000095191,protein_coding -31924,Gm3005,ENSMUSG00000095195,protein_coding -7104,Zfp967,ENSMUSG00000095199,protein_coding -22855,Vmn1r91,ENSMUSG00000095201,protein_coding -55525,CR974586.1,ENSMUSG00000095207,protein_coding -25724,Olfr473,ENSMUSG00000095212,protein_coding -40696,Hist1h2bn,ENSMUSG00000095217,protein_coding -14491,Olfr1338,ENSMUSG00000095218,protein_coding -32740,Gm10377,ENSMUSG00000095226,protein_coding -23520,Scgb1b19,ENSMUSG00000095232,protein_coding -13266,Gm21586,ENSMUSG00000095234,protein_coding -19552,Olfr38,ENSMUSG00000095236,protein_coding -25756,Olfr497,ENSMUSG00000095239,protein_coding -7862,Cypt14,ENSMUSG00000095240,protein_coding -47029,Gm5478,ENSMUSG00000095241,protein_coding -51310,Gm20834,ENSMUSG00000095242,protein_coding -55518,Ccl27,ENSMUSG00000095247,protein_coding -25610,Olfr681,ENSMUSG00000095248,protein_coding -55504,AC133103.4,ENSMUSG00000095250,protein_coding -11389,Gm10697,ENSMUSG00000095251,protein_coding -49705,Zfp799,ENSMUSG00000095253,protein_coding -23438,Scgb1b3,ENSMUSG00000095257,protein_coding -51677,Gm21094,ENSMUSG00000095263,protein_coding -51424,Ssty1,ENSMUSG00000095267,protein_coding -13901,Ifna9,ENSMUSG00000095270,protein_coding -22945,Vmn1r142,ENSMUSG00000095273,protein_coding -22879,Vmn1r107,ENSMUSG00000095275,protein_coding -23985,Gfy,ENSMUSG00000095276,protein_coding -32254,Gm21738,ENSMUSG00000095280,protein_coding -50098,Olfr117,ENSMUSG00000095286,protein_coding -7666,Gm10058,ENSMUSG00000095293,protein_coding -29748,Gm21119,ENSMUSG00000095294,protein_coding -32006,Gm3488,ENSMUSG00000095295,protein_coding -15794,Gm8906,ENSMUSG00000095296,protein_coding -41429,Gm906,ENSMUSG00000095300,protein_coding -25727,Olfr476,ENSMUSG00000095301,protein_coding -52227,Ssty2,ENSMUSG00000095302,protein_coding -32382,Plac9a,ENSMUSG00000095304,protein_coding -22906,Vmn1r125,ENSMUSG00000095309,protein_coding -38760,Olfr382,ENSMUSG00000095312,protein_coding -6396,Gm10130,ENSMUSG00000095315,protein_coding -7598,Gm21876,ENSMUSG00000095316,protein_coding -32725,Gm8024,ENSMUSG00000095318,protein_coding -55547,Ccl21a,ENSMUSG00000095320,protein_coding -35215,Olfr957,ENSMUSG00000095322,protein_coding -51252,Gm20825,ENSMUSG00000095324,protein_coding -49710,Zfp870,ENSMUSG00000095325,protein_coding -50571,Gm21834,ENSMUSG00000095330,protein_coding -22368,Zscan4b,ENSMUSG00000095339,protein_coding -13770,Gm428,ENSMUSG00000095341,protein_coding -16840,Gm21905,ENSMUSG00000095346,protein_coding -22992,Vmn1r165,ENSMUSG00000095358,protein_coding -32046,Gm3594,ENSMUSG00000095360,protein_coding -7138,Gm14325,ENSMUSG00000095362,protein_coding -22971,Gm4567,ENSMUSG00000095363,protein_coding -51411,Rbm31y,ENSMUSG00000095365,protein_coding -31931,Gm3012,ENSMUSG00000095368,protein_coding -32715,Gm9611,ENSMUSG00000095371,protein_coding -50054,Olfr91,ENSMUSG00000095377,protein_coding -22960,Vmn1r152,ENSMUSG00000095383,protein_coding -32768,1700001F09Rik,ENSMUSG00000095384,protein_coding -14166,Gm17662,ENSMUSG00000095386,protein_coding -11629,Gm10681,ENSMUSG00000095388,protein_coding -35237,Olfr229,ENSMUSG00000095390,protein_coding -29321,Olfr804,ENSMUSG00000095401,protein_coding -50690,Tmem200c,ENSMUSG00000095407,protein_coding -15228,Gm13043,ENSMUSG00000095409,protein_coding -7468,H2al1e,ENSMUSG00000095413,protein_coding -22418,Vmn1r72,ENSMUSG00000095430,protein_coding -41976,Zfp748,ENSMUSG00000095432,protein_coding -46990,Fignl2,ENSMUSG00000095440,protein_coding -7472,H2al1i,ENSMUSG00000095445,protein_coding -34702,Olfr859,ENSMUSG00000095448,protein_coding -55529,AC132444.4,ENSMUSG00000095450,protein_coding -51828,Gm20795,ENSMUSG00000095452,protein_coding -55517,Il11ra2,ENSMUSG00000095456,protein_coding -51360,Gm21725,ENSMUSG00000095462,protein_coding -33824,Entpd4,ENSMUSG00000095463,protein_coding -47972,Gm21987,ENSMUSG00000095464,protein_coding -32738,Gm3015,ENSMUSG00000095466,protein_coding -43123,Gm21863,ENSMUSG00000095470,protein_coding -9120,Cldn34c2,ENSMUSG00000095474,protein_coding -55497,AC133095.2,ENSMUSG00000095475,protein_coding -29290,Olfr776,ENSMUSG00000095483,protein_coding -54625,Olfr1480,ENSMUSG00000095484,protein_coding -21308,Vmn2r22,ENSMUSG00000095486,protein_coding -32705,Gm7951,ENSMUSG00000095490,protein_coding -32570,A630023A22Rik,ENSMUSG00000095493,protein_coding -13944,Ifna1,ENSMUSG00000095498,protein_coding -55528,AC132444.5,ENSMUSG00000095500,protein_coding -17294,Gm3147,ENSMUSG00000095503,protein_coding -55538,AC164084.1,ENSMUSG00000095505,protein_coding -51343,Gm20812,ENSMUSG00000095508,protein_coding -7807,Rhox3f,ENSMUSG00000095510,protein_coding -42787,D13Ertd608e,ENSMUSG00000095514,protein_coding -32691,Gm8068,ENSMUSG00000095518,protein_coding -51407,Gm20877,ENSMUSG00000095520,protein_coding -10686,Gm8298,ENSMUSG00000095522,protein_coding -55496,AC124606.1,ENSMUSG00000095523,protein_coding -34671,Olfr834,ENSMUSG00000095525,protein_coding -35124,Olfr888,ENSMUSG00000095527,protein_coding -32778,Gm10375,ENSMUSG00000095528,protein_coding -31922,Gm10413,ENSMUSG00000095533,protein_coding -23181,Gm21983,ENSMUSG00000095538,protein_coding -50199,Esp4,ENSMUSG00000095540,protein_coding -22956,Vmn1r148,ENSMUSG00000095543,protein_coding -7083,Zfp969,ENSMUSG00000095545,protein_coding -7660,Gm10230,ENSMUSG00000095546,protein_coding -34445,Gm10719,ENSMUSG00000095547,protein_coding -16061,Gm21671,ENSMUSG00000095550,protein_coding -32695,Gm7980,ENSMUSG00000095551,protein_coding -55524,4933409K07Rik,ENSMUSG00000095552,protein_coding -18740,Gm3415,ENSMUSG00000095557,protein_coding -15627,Noc2l,ENSMUSG00000095567,protein_coding -27082,Taar7d,ENSMUSG00000095569,protein_coding -55542,AC140325.3,ENSMUSG00000095570,protein_coding -51130,H2al2b,ENSMUSG00000095573,protein_coding -2715,Fmo6,ENSMUSG00000095576,protein_coding -21630,Gm6619,ENSMUSG00000095577,protein_coding -52466,Gm21760,ENSMUSG00000095578,protein_coding -55519,CR974586.2,ENSMUSG00000095585,protein_coding -5675,Olfr1287,ENSMUSG00000095586,protein_coding -28023,Gm7138,ENSMUSG00000095593,protein_coding -43643,Fam177a,ENSMUSG00000095595,protein_coding -7791,Rhox3a2,ENSMUSG00000095601,protein_coding -50082,Olfr106-ps,ENSMUSG00000095603,protein_coding -51984,Gm21258,ENSMUSG00000095606,protein_coding -29338,Olfr247,ENSMUSG00000095608,protein_coding -42799,Gm21188,ENSMUSG00000095609,protein_coding -22973,Vmn1r157,ENSMUSG00000095619,protein_coding -47903,Csta2,ENSMUSG00000095620,protein_coding -9535,Gm15085,ENSMUSG00000095621,protein_coding -55545,Il11ra2,ENSMUSG00000095623,protein_coding -50154,Esp24,ENSMUSG00000095625,protein_coding -22518,Vmn1r89,ENSMUSG00000095629,protein_coding -23018,Vmn1r175,ENSMUSG00000095632,protein_coding -52260,Gm20816,ENSMUSG00000095634,protein_coding -54555,Olfr1434,ENSMUSG00000095640,protein_coding -27078,Taar7a,ENSMUSG00000095647,protein_coding -7095,Gm2004,ENSMUSG00000095648,protein_coding -52562,Gm20854,ENSMUSG00000095650,protein_coding -42805,Gm21818,ENSMUSG00000095653,protein_coding -36438,Plscr5,ENSMUSG00000095654,protein_coding -7469,H2al1f,ENSMUSG00000095655,protein_coding -49200,Vmn2r-ps130,ENSMUSG00000095658,protein_coding -7735,Spin2g,ENSMUSG00000095659,protein_coding -7470,H2al1g,ENSMUSG00000095662,protein_coding -25014,Vmn2r67,ENSMUSG00000095664,protein_coding -55536,AC164084.2,ENSMUSG00000095666,protein_coding -34693,Olfr854,ENSMUSG00000095667,protein_coding -19927,Vmn1r30,ENSMUSG00000095670,protein_coding -55508,AC133103.5,ENSMUSG00000095672,protein_coding -13261,Ccl21b,ENSMUSG00000095675,protein_coding -48821,Dynlt1f,ENSMUSG00000095677,protein_coding -31989,Gm8281,ENSMUSG00000095681,protein_coding -22989,Gm10662,ENSMUSG00000095683,protein_coding -31947,Gm3099,ENSMUSG00000095686,protein_coding -48854,Rnaset2a,ENSMUSG00000095687,protein_coding -51184,Gm21820,ENSMUSG00000095693,protein_coding -29301,Olfr786,ENSMUSG00000095696,protein_coding -7799,Rhox2d,ENSMUSG00000095698,protein_coding -48248,Olfr209,ENSMUSG00000095706,protein_coding -15773,Gm8871,ENSMUSG00000095710,protein_coding -7263,Btbd35f10,ENSMUSG00000095716,protein_coding -44325,Gm2056,ENSMUSG00000095717,protein_coding -17289,Gm6502,ENSMUSG00000095718,protein_coding -28025,Gm7137,ENSMUSG00000095721,protein_coding -44300,Gm21319,ENSMUSG00000095724,protein_coding -55551,AC125178.3,ENSMUSG00000095728,protein_coding -22250,Vmn2r29,ENSMUSG00000095730,protein_coding -7806,Rhox2f,ENSMUSG00000095741,protein_coding -55572,CAAA01147332.1,ENSMUSG00000095742,protein_coding -32776,Gm17654,ENSMUSG00000095743,protein_coding -22867,Gm4133,ENSMUSG00000095745,protein_coding -7696,Spin2f,ENSMUSG00000095754,protein_coding -55521,CR974586.3,ENSMUSG00000095755,protein_coding -22909,Vmn1r128,ENSMUSG00000095758,protein_coding -51355,Gm21828,ENSMUSG00000095759,protein_coding -55495,AC124606.2,ENSMUSG00000095763,protein_coding -33050,Olfr741,ENSMUSG00000095765,protein_coding -22881,Vmn1r111,ENSMUSG00000095768,protein_coding -51352,Gm21891,ENSMUSG00000095769,protein_coding -7704,Gm21637,ENSMUSG00000095770,protein_coding -22306,Vmn2r38,ENSMUSG00000095773,protein_coding -35233,Olfr970,ENSMUSG00000095774,protein_coding -52193,Gm20924,ENSMUSG00000095785,protein_coding -55505,AC133103.6,ENSMUSG00000095787,protein_coding -10020,Sirpb1a,ENSMUSG00000095788,protein_coding -18236,Nupr1l,ENSMUSG00000095789,protein_coding -51723,Gm20855,ENSMUSG00000095793,protein_coding -32004,Gm8246,ENSMUSG00000095797,protein_coding -44351,Gm8332,ENSMUSG00000095799,protein_coding -29347,Olfr824,ENSMUSG00000095804,protein_coding -22869,Gm5728,ENSMUSG00000095806,protein_coding -5678,Olfr1290,ENSMUSG00000095809,protein_coding -7308,Btbd35f7,ENSMUSG00000095814,protein_coding -28022,Gm3238,ENSMUSG00000095817,protein_coding -42812,Tcstv3,ENSMUSG00000095821,protein_coding -11379,Gm9117,ENSMUSG00000095822,protein_coding -4985,Gm13697,ENSMUSG00000095824,protein_coding -15787,Speer1,ENSMUSG00000095829,protein_coding -19550,Olfr453,ENSMUSG00000095831,protein_coding -22876,Gm4141,ENSMUSG00000095837,protein_coding -35227,Olfr965,ENSMUSG00000095839,protein_coding -30969,Gm5741,ENSMUSG00000095845,protein_coding -51136,Gm10256,ENSMUSG00000095852,protein_coding -50078,Olfr104-ps,ENSMUSG00000095858,protein_coding -22445,Vmn1r77,ENSMUSG00000095864,protein_coding -52448,Gm20917,ENSMUSG00000095867,protein_coding -11322,Lce1k,ENSMUSG00000095870,protein_coding -9500,Gm15128,ENSMUSG00000095872,protein_coding -51423,Gm21764,ENSMUSG00000095879,protein_coding -50175,Esp38,ENSMUSG00000095886,protein_coding -7675,Gm10096,ENSMUSG00000095887,protein_coding -34447,Gm10717,ENSMUSG00000095891,protein_coding -35120,Olfr884,ENSMUSG00000095893,protein_coding -13899,Ifna14,ENSMUSG00000095896,protein_coding -51364,Gm20773,ENSMUSG00000095900,protein_coding -26524,Olfr538,ENSMUSG00000095901,protein_coding -35231,Olfr968,ENSMUSG00000095903,protein_coding -41885,Gm10324,ENSMUSG00000095909,protein_coding -25718,Olfr469,ENSMUSG00000095910,protein_coding -31993,Gm3317,ENSMUSG00000095912,protein_coding -22329,Vmn2r48,ENSMUSG00000095914,protein_coding -40743,Vmn1r191,ENSMUSG00000095916,protein_coding -33049,Olfr740,ENSMUSG00000095917,protein_coding -15782,Gm5861,ENSMUSG00000095918,protein_coding -51437,Gm21822,ENSMUSG00000095927,protein_coding -48245,Olfr204,ENSMUSG00000095928,protein_coding -25743,Olfr487,ENSMUSG00000095929,protein_coding -42782,Nim1k,ENSMUSG00000095930,protein_coding -22976,Vmn1r159,ENSMUSG00000095931,protein_coding -20739,Vmn1r49,ENSMUSG00000095932,protein_coding -7730,Btbd35f13,ENSMUSG00000095934,protein_coding -13766,Gm11238,ENSMUSG00000095935,protein_coding -22392,Zscan4e,ENSMUSG00000095936,protein_coding -31488,Tle7,ENSMUSG00000095941,protein_coding -51148,Gm21708,ENSMUSG00000095948,protein_coding -51314,Gm20737,ENSMUSG00000095950,protein_coding -17299,Gm3183,ENSMUSG00000095954,protein_coding -34669,Olfr832,ENSMUSG00000095957,protein_coding -49080,Vmn2r102,ENSMUSG00000095961,protein_coding -22936,Vmn1r137,ENSMUSG00000095962,protein_coding -28009,Gm19402,ENSMUSG00000095970,protein_coding -22889,Vmn1r116,ENSMUSG00000095973,protein_coding -32384,Cphx1,ENSMUSG00000095975,protein_coding -22933,Gm5891,ENSMUSG00000095976,protein_coding -51924,Gm21209,ENSMUSG00000095979,protein_coding -22962,Gm4201,ENSMUSG00000095984,protein_coding -49026,Zfp97,ENSMUSG00000095990,protein_coding -48538,Krtap22-2,ENSMUSG00000095992,protein_coding -55448,Gm21060,ENSMUSG00000095993,protein_coding -17306,Gm16513,ENSMUSG00000095996,protein_coding -29287,Olfr773,ENSMUSG00000096000,protein_coding -32629,2610528A11Rik,ENSMUSG00000096001,protein_coding -22476,Vmn2r53,ENSMUSG00000096002,protein_coding -32001,Gm3500,ENSMUSG00000096003,protein_coding -50127,Olfr134,ENSMUSG00000096009,protein_coding -21783,Hist4h4,ENSMUSG00000096010,protein_coding -13898,Ifna15,ENSMUSG00000096011,protein_coding -29583,Sox1,ENSMUSG00000096014,protein_coding -52254,Gm20937,ENSMUSG00000096016,protein_coding -31999,Gm3542,ENSMUSG00000096023,protein_coding -33114,Gm17174,ENSMUSG00000096024,protein_coding -25606,Olfr679,ENSMUSG00000096029,protein_coding -17227,Odaph,ENSMUSG00000096035,protein_coding -51203,Gm21778,ENSMUSG00000096036,protein_coding -31916,D830030K20Rik,ENSMUSG00000096039,protein_coding -17275,Gm16427,ENSMUSG00000096044,protein_coding -16064,Gm21698,ENSMUSG00000096045,protein_coding -44323,Gm2075,ENSMUSG00000096049,protein_coding -20724,Vmn1r40,ENSMUSG00000096051,protein_coding -26767,Syne1,ENSMUSG00000096054,protein_coding -17307,Gm10424,ENSMUSG00000096066,protein_coding -25741,Olfr486,ENSMUSG00000096068,protein_coding -26526,Olfr61,ENSMUSG00000096069,protein_coding -22946,Vmn1r143,ENSMUSG00000096071,protein_coding -8615,Gm8914,ENSMUSG00000096072,protein_coding -22994,Vmn1r166,ENSMUSG00000096073,protein_coding -38425,Gm10428,ENSMUSG00000096083,protein_coding -1247,A630095N17Rik,ENSMUSG00000096094,protein_coding -7466,H2al1c,ENSMUSG00000096097,protein_coding -40813,Vmn1r220,ENSMUSG00000096099,protein_coding -55506,AC133103.7,ENSMUSG00000096100,protein_coding -35200,Olfr1537,ENSMUSG00000096109,protein_coding -51835,Gm21800,ENSMUSG00000096120,protein_coding -52078,Gm21943,ENSMUSG00000096122,protein_coding -28026,Gm9639,ENSMUSG00000096131,protein_coding -17318,Gm7978,ENSMUSG00000096139,protein_coding -50229,Ankrd66,ENSMUSG00000096140,protein_coding -810,Dnah7a,ENSMUSG00000096141,protein_coding -26252,Vkorc1,ENSMUSG00000096145,protein_coding -24062,Kcnj11,ENSMUSG00000096146,protein_coding -25728,Olfr477,ENSMUSG00000096151,protein_coding -22856,Gm16442,ENSMUSG00000096152,protein_coding -15231,Gm13057,ENSMUSG00000096154,protein_coding -22857,Vmn1r93,ENSMUSG00000096164,protein_coding -35123,Olfr887,ENSMUSG00000096167,protein_coding -49724,Olfr1564,ENSMUSG00000096169,protein_coding -42791,Gm21761,ENSMUSG00000096175,protein_coding -52647,Gm20837,ENSMUSG00000096178,protein_coding -22331,Vmn2r49,ENSMUSG00000096180,protein_coding -32070,Gm3727,ENSMUSG00000096183,protein_coding -31299,Cmtm4,ENSMUSG00000096188,protein_coding -7707,Btbd35f2,ENSMUSG00000096194,protein_coding -31988,4930555G01Rik,ENSMUSG00000096197,protein_coding -42845,Ptrhd1,ENSMUSG00000096199,protein_coding -34449,Gm10715,ENSMUSG00000096201,protein_coding -25774,Olfr510,ENSMUSG00000096209,protein_coding -46379,H1f0,ENSMUSG00000096210,protein_coding -47216,Smim22,ENSMUSG00000096215,protein_coding -31943,Gm2916,ENSMUSG00000096218,protein_coding -29289,Olfr775,ENSMUSG00000096220,protein_coding -51294,Gm21874,ENSMUSG00000096223,protein_coding -12690,Lhx8,ENSMUSG00000096225,protein_coding -22217,Olfr5,ENSMUSG00000096228,protein_coding -29288,Olfr774,ENSMUSG00000096229,protein_coding -17282,Gm16429,ENSMUSG00000096230,protein_coding -55534,AC165294.2,ENSMUSG00000096236,protein_coding -55549,CT868723.1,ENSMUSG00000096237,protein_coding -4699,Gm21830,ENSMUSG00000096240,protein_coding -55554,AC102264.1,ENSMUSG00000096244,protein_coding -33030,Olfr724,ENSMUSG00000096254,protein_coding -48815,Dynlt1b,ENSMUSG00000096255,protein_coding -23267,Ccer2,ENSMUSG00000096257,protein_coding -17287,Gm3106,ENSMUSG00000096259,protein_coding -29730,4930467E23Rik,ENSMUSG00000096265,protein_coding -51198,Gm20914,ENSMUSG00000096268,protein_coding -55516,Ccl21b,ENSMUSG00000096271,protein_coding -54618,Olfr1474,ENSMUSG00000096273,protein_coding -44318,Gm2042,ENSMUSG00000096276,protein_coding -49251,Dcpp2,ENSMUSG00000096278,protein_coding -22940,Gm6176,ENSMUSG00000096283,protein_coding -42790,Tcstv1,ENSMUSG00000096284,protein_coding -54629,Olfr1484,ENSMUSG00000096289,protein_coding -29787,Defa2,ENSMUSG00000096295,protein_coding -22943,Gm16451,ENSMUSG00000096304,protein_coding -36565,Faiml,ENSMUSG00000096316,protein_coding -54615,Olfr1471,ENSMUSG00000096320,protein_coding -42803,Gm20767,ENSMUSG00000096323,protein_coding -42442,Gm21976,ENSMUSG00000096330,protein_coding -13776,Gm13871,ENSMUSG00000096333,protein_coding -4984,Gm13694,ENSMUSG00000096337,protein_coding -18734,Gm6408,ENSMUSG00000096344,protein_coding -50166,Esp16,ENSMUSG00000096345,protein_coding -22944,Gm10666,ENSMUSG00000096348,protein_coding -15628,Samd11,ENSMUSG00000096351,protein_coding -35125,Olfr889,ENSMUSG00000096356,protein_coding -54605,Olfr1463,ENSMUSG00000096365,protein_coding -54148,Gm21992,ENSMUSG00000096370,protein_coding -32771,Gm8138,ENSMUSG00000096372,protein_coding -22259,Vmn2r31,ENSMUSG00000096373,protein_coding -28027,Gm19668,ENSMUSG00000096380,protein_coding -34442,Gm11168,ENSMUSG00000096385,protein_coding -22870,Vmn1r100,ENSMUSG00000096386,protein_coding -36977,Fam240a,ENSMUSG00000096393,protein_coding -22270,Vmn2r35,ENSMUSG00000096399,protein_coding -32617,4930474N05Rik,ENSMUSG00000096405,protein_coding -35126,Olfr890,ENSMUSG00000096409,protein_coding -28016,Gm10100,ENSMUSG00000096421,protein_coding -35121,Olfr885,ENSMUSG00000096424,protein_coding -7252,Btbd35f24,ENSMUSG00000096426,protein_coding -35133,Olfr895,ENSMUSG00000096427,protein_coding -49175,Zfp994,ENSMUSG00000096433,protein_coding -54559,Olfr1437,ENSMUSG00000096436,protein_coding -27086,Taar8a,ENSMUSG00000096442,protein_coding -49250,Dcpp1,ENSMUSG00000096445,protein_coding -32752,Gm8104,ENSMUSG00000096446,protein_coding -7671,Gm10147,ENSMUSG00000096457,protein_coding -44570,Moap1,ENSMUSG00000096458,protein_coding -33911,Gm21750,ENSMUSG00000096463,protein_coding -25744,Olfr488,ENSMUSG00000096465,protein_coding -23519,Scgb2b19,ENSMUSG00000096467,protein_coding -8050,Gm16405,ENSMUSG00000096468,protein_coding -32015,Gm9603,ENSMUSG00000096470,protein_coding -34783,Cdkn2d,ENSMUSG00000096472,protein_coding -50055,Olfr92,ENSMUSG00000096477,protein_coding -28024,Gm3250,ENSMUSG00000096481,protein_coding -4983,Gm13696,ENSMUSG00000096484,protein_coding -54609,Olfr1466,ENSMUSG00000096485,protein_coding -28624,Gm5426,ENSMUSG00000096486,protein_coding -31923,Gm10409,ENSMUSG00000096488,protein_coding -29302,Olfr787,ENSMUSG00000096497,protein_coding -55523,CR974586.4,ENSMUSG00000096506,protein_coding -9263,Gm7903,ENSMUSG00000096508,protein_coding -22926,Gm4175,ENSMUSG00000096513,protein_coding -25499,Olfr628,ENSMUSG00000096516,protein_coding -34443,Gm10721,ENSMUSG00000096519,protein_coding -51152,Gm3376,ENSMUSG00000096520,protein_coding -19433,Gm2663,ENSMUSG00000096525,protein_coding -18735,Gm6370,ENSMUSG00000096527,protein_coding -13774,Gm11758,ENSMUSG00000096530,protein_coding -48536,Krtap16-3,ENSMUSG00000096534,protein_coding -41810,Fam240b,ENSMUSG00000096537,protein_coding -27115,Smlr1,ENSMUSG00000096546,protein_coding -50347,Prickle4,ENSMUSG00000096549,protein_coding -55491,Gm16367,ENSMUSG00000096550,protein_coding -5670,Olfr1282,ENSMUSG00000096554,protein_coding -35199,Olfr944,ENSMUSG00000096555,protein_coding -5700,Olfr1306,ENSMUSG00000096566,protein_coding -11997,Amy2a2,ENSMUSG00000096569,protein_coding -31920,Gm2956,ENSMUSG00000096574,protein_coding -44291,Oog1,ENSMUSG00000096576,protein_coding -13926,Gm13289,ENSMUSG00000096582,protein_coding -25481,Olfr611,ENSMUSG00000096584,protein_coding -13927,Gm13272,ENSMUSG00000096591,protein_coding -22478,Vmn2r54,ENSMUSG00000096593,protein_coding -13271,Gm10591,ENSMUSG00000096596,protein_coding -53895,Gm21886,ENSMUSG00000096597,protein_coding -22986,Gm8720,ENSMUSG00000096601,protein_coding -25286,Tpbgl,ENSMUSG00000096606,protein_coding -46637,7530416G11Rik,ENSMUSG00000096607,protein_coding -44316,Gm2035,ENSMUSG00000096619,protein_coding -7640,Gm5169,ENSMUSG00000096620,protein_coding -11262,Gm5849,ENSMUSG00000096621,protein_coding -52094,Gm21861,ENSMUSG00000096626,protein_coding -32012,Gm3383,ENSMUSG00000096629,protein_coding -21319,Vmn2r26,ENSMUSG00000096630,protein_coding -28709,Gm4312,ENSMUSG00000096640,protein_coding -41439,Gm904,ENSMUSG00000096641,protein_coding -9666,Magea1,ENSMUSG00000096644,protein_coding -7702,Spin2e,ENSMUSG00000096645,protein_coding -55522,CR974586.5,ENSMUSG00000096646,protein_coding -51739,Gm20896,ENSMUSG00000096650,protein_coding -22304,Vmn2r39,ENSMUSG00000096658,protein_coding -22873,Vmn1r103,ENSMUSG00000096663,protein_coding -18739,Gm3409,ENSMUSG00000096664,protein_coding -52222,Gm20852,ENSMUSG00000096666,protein_coding -13640,Mup15,ENSMUSG00000096674,protein_coding -25758,Olfr498,ENSMUSG00000096679,protein_coding -55537,AC164084.3,ENSMUSG00000096680,protein_coding -13942,Ifna5,ENSMUSG00000096682,protein_coding -32056,Gm3642,ENSMUSG00000096685,protein_coding -51225,Gm20830,ENSMUSG00000096686,protein_coding -27328,Mfsd4b4,ENSMUSG00000096687,protein_coding -13644,Mup17,ENSMUSG00000096688,protein_coding -22264,Vmn2r33,ENSMUSG00000096691,protein_coding -48237,Olfr196,ENSMUSG00000096695,protein_coding -49025,Zfp960,ENSMUSG00000096696,protein_coding -50156,Esp23,ENSMUSG00000096697,protein_coding -13768,Gm11236,ENSMUSG00000096700,protein_coding -5674,Olfr1286,ENSMUSG00000096703,protein_coding -14499,Olfr1329,ENSMUSG00000096705,protein_coding -51195,Gm21854,ENSMUSG00000096706,protein_coding -54619,Olfr1475,ENSMUSG00000096708,protein_coding -25675,Olfr699,ENSMUSG00000096714,protein_coding -49070,Vmn2r98,ENSMUSG00000096717,protein_coding -28289,Zfp781,ENSMUSG00000096718,protein_coding -24117,Mrgpra2b,ENSMUSG00000096719,protein_coding -49793,Psmb9,ENSMUSG00000096727,protein_coding -55543,AC140325.4,ENSMUSG00000096728,protein_coding -4982,Gm13695,ENSMUSG00000096729,protein_coding -55571,Vmn2r122,ENSMUSG00000096730,protein_coding -15790,Gm8897,ENSMUSG00000096732,protein_coding -37295,Gm21836,ENSMUSG00000096734,protein_coding -22454,Vmn1r79,ENSMUSG00000096735,protein_coding -34448,Gm17535,ENSMUSG00000096736,protein_coding -22890,Vmn1r117,ENSMUSG00000096737,protein_coding -54399,Lbhd1,ENSMUSG00000096740,protein_coding -17302,Gm6367,ENSMUSG00000096742,protein_coding -22261,Vmn2r32,ENSMUSG00000096743,protein_coding -28704,Gm4307,ENSMUSG00000096744,protein_coding -29345,Olfr823,ENSMUSG00000096747,protein_coding -13775,Gm11757,ENSMUSG00000096750,protein_coding -44585,Fam181a,ENSMUSG00000096753,protein_coding -55535,AC165294.3,ENSMUSG00000096756,protein_coding -35139,Olfr251,ENSMUSG00000096757,protein_coding -22927,Gm4177,ENSMUSG00000096760,protein_coding -22875,Gm5726,ENSMUSG00000096761,protein_coding -5720,Gm21985,ENSMUSG00000096764,protein_coding -51952,Gm21117,ENSMUSG00000096769,protein_coding -11995,Amy2a4,ENSMUSG00000096770,protein_coding -25599,Olfr675,ENSMUSG00000096773,protein_coding -31948,Gm5796,ENSMUSG00000096775,protein_coding -55553,Vmn1r186,ENSMUSG00000096776,protein_coding -7803,Rhox2e,ENSMUSG00000096788,protein_coding -31938,Gm3002,ENSMUSG00000096793,protein_coding -35145,Olfr905,ENSMUSG00000096794,protein_coding -28276,Zfp433,ENSMUSG00000096795,protein_coding -18725,Gm6309,ENSMUSG00000096798,protein_coding -44324,Gm6803,ENSMUSG00000096803,protein_coding -38254,Olfr312,ENSMUSG00000096806,protein_coding -55532,AC132444.6,ENSMUSG00000096808,protein_coding -22907,Vmn1r126,ENSMUSG00000096813,protein_coding -51421,Srsy,ENSMUSG00000096819,protein_coding -51353,Gm21425,ENSMUSG00000096820,protein_coding -4353,Olfr344,ENSMUSG00000096822,protein_coding -13273,Ccl27b,ENSMUSG00000096826,protein_coding -9516,Gm15127,ENSMUSG00000096834,protein_coding -32049,Gm3460,ENSMUSG00000096839,protein_coding -50132,Olfr136,ENSMUSG00000096840,protein_coding -15784,Gm8890,ENSMUSG00000096846,protein_coding -50258,Tmem151b,ENSMUSG00000096847,protein_coding -52655,Gm21748,ENSMUSG00000096850,protein_coding -46549,Cyp2d12,ENSMUSG00000096852,protein_coding -13928,Ifnz,ENSMUSG00000096854,protein_coding -28273,Gm10778,ENSMUSG00000096856,protein_coding -29322,Olfr805,ENSMUSG00000096858,protein_coding -23019,Vmn1r176,ENSMUSG00000096859,protein_coding -25016,Vmn2r68,ENSMUSG00000096861,protein_coding -9639,Gm10057,ENSMUSG00000096867,protein_coding -32065,Gm6676,ENSMUSG00000096869,protein_coding -22958,Gm10665,ENSMUSG00000096871,protein_coding -54427,Scgb2a2,ENSMUSG00000096872,protein_coding -55520,Ccl21b,ENSMUSG00000096873,protein_coding -15863,Gm21083,ENSMUSG00000096878,protein_coding -11339,Gm4858,ENSMUSG00000096879,protein_coding -46517,Shisa8,ENSMUSG00000096883,protein_coding -51827,Gm21797,ENSMUSG00000096885,protein_coding -20425,Gm20594,ENSMUSG00000096887,protein_coding -52424,Gm20867,ENSMUSG00000096898,protein_coding -31944,Gm3029,ENSMUSG00000096901,protein_coding -52329,Gm20843,ENSMUSG00000096902,protein_coding -22874,Vmn1r104,ENSMUSG00000096903,protein_coding -32045,Gm3591,ENSMUSG00000096904,protein_coding -49727,Zfp955b,ENSMUSG00000096910,protein_coding -30398,Galntl6,ENSMUSG00000096914,protein_coding -7732,Btbd35f6,ENSMUSG00000096915,protein_coding -23223,Zfp850,ENSMUSG00000096916,protein_coding -37413,A730071L15Rik,ENSMUSG00000096923,protein_coding -13546,Gm26657,ENSMUSG00000096930,protein_coding -17704,Gm26897,ENSMUSG00000096949,protein_coding -8331,Gm18336,ENSMUSG00000096966,protein_coding -2879,Gm26620,ENSMUSG00000096968,protein_coding -47063,Gm9918,ENSMUSG00000097050,protein_coding -26172,Cdiptos,ENSMUSG00000097075,protein_coding -13914,Gm26566,ENSMUSG00000097078,protein_coding -31664,Foxl1,ENSMUSG00000097084,protein_coding -32122,Gm3839,ENSMUSG00000097148,protein_coding -5006,Gm19426,ENSMUSG00000097187,protein_coding -45431,1810049J17Rik,ENSMUSG00000097221,protein_coding -39681,Gm27029,ENSMUSG00000097239,protein_coding -16257,Gm9903,ENSMUSG00000097271,protein_coding -27037,E030030I06Rik,ENSMUSG00000097327,protein_coding -38577,Tnfsf12,ENSMUSG00000097328,protein_coding -41975,Zfp87,ENSMUSG00000097333,protein_coding -23027,Vmn1r181,ENSMUSG00000097425,protein_coding -28753,Gm6763,ENSMUSG00000097427,protein_coding -39683,Ptges3l,ENSMUSG00000097487,protein_coding -28755,Gm21304,ENSMUSG00000097550,protein_coding -41974,Gm26965,ENSMUSG00000097565,protein_coding -27565,Gm26741,ENSMUSG00000097704,protein_coding -24993,Gm2115,ENSMUSG00000097789,protein_coding -32009,Gm3532,ENSMUSG00000097853,protein_coding -28137,Gm26602,ENSMUSG00000097854,protein_coding -28754,Gm8764,ENSMUSG00000097878,protein_coding -38492,Gsg1l2,ENSMUSG00000097886,protein_coding -2261,Ascl5,ENSMUSG00000097918,protein_coding -31649,Gm27021,ENSMUSG00000097919,protein_coding -44663,Tunar,ENSMUSG00000097929,protein_coding -23498,Scgb2b15,ENSMUSG00000097972,protein_coding -23476,Scgb2b12,ENSMUSG00000097982,protein_coding -5459,Gm27027,ENSMUSG00000098004,protein_coding -23331,Zfp82,ENSMUSG00000098022,protein_coding -8852,Gm26992,ENSMUSG00000098078,protein_coding -23509,Scgb2b17,ENSMUSG00000098094,protein_coding -46980,Bin2,ENSMUSG00000098112,protein_coding -25864,Rassf10,ENSMUSG00000098132,protein_coding -44184,Rnf113a2,ENSMUSG00000098134,protein_coding -18558,Gm26938,ENSMUSG00000098140,protein_coding -46217,Ccdc166,ENSMUSG00000098176,protein_coding -27663,Sowahc,ENSMUSG00000098188,protein_coding -10845,Arl14,ENSMUSG00000098207,protein_coding -48160,Rpl24,ENSMUSG00000098274,protein_coding -2171,Gm28040,ENSMUSG00000098306,protein_coding -7828,Btg1-ps2,ENSMUSG00000098348,protein_coding -49589,Gm28043,ENSMUSG00000098374,protein_coding -6300,Pet117,ENSMUSG00000098387,protein_coding -37789,Gm12117,ENSMUSG00000098456,protein_coding -32391,Cphx2,ENSMUSG00000098463,protein_coding -21344,C1rb,ENSMUSG00000098470,protein_coding -5866,Pla2g4b,ENSMUSG00000098488,protein_coding -7250,Gm14496,ENSMUSG00000098505,protein_coding -2392,Rgs21,ENSMUSG00000098509,protein_coding -44567,Gm28051,ENSMUSG00000098530,protein_coding -703,Gm5269,ENSMUSG00000098549,protein_coding -34215,Kctd12,ENSMUSG00000098557,protein_coding -9375,Gm15013,ENSMUSG00000098559,protein_coding -35027,Gm1113,ENSMUSG00000098590,protein_coding -6564,Gm14214,ENSMUSG00000098640,protein_coding -37622,Gm28048,ENSMUSG00000098650,protein_coding -46208,Mroh6,ENSMUSG00000098678,protein_coding -41962,Gm28041,ENSMUSG00000098692,protein_coding -19675,Gm28053,ENSMUSG00000098715,protein_coding -6164,Prn,ENSMUSG00000098754,protein_coding -33832,Gm27179,ENSMUSG00000098773,protein_coding -41960,Gm28044,ENSMUSG00000098781,protein_coding -5864,Jmjd7,ENSMUSG00000098789,protein_coding -4149,Gm28038,ENSMUSG00000098794,protein_coding -35038,Gm27235,ENSMUSG00000098847,protein_coding -41961,Zfp953,ENSMUSG00000098905,protein_coding -1936,Tmem185b,ENSMUSG00000098923,protein_coding -6613,Gm28036,ENSMUSG00000098950,protein_coding -29183,Rdh16f1,ENSMUSG00000099009,protein_coding -116,Tcf24,ENSMUSG00000099032,protein_coding -4147,Gm28035,ENSMUSG00000099041,protein_coding -47086,Atf7,ENSMUSG00000099083,protein_coding -37449,Gm11983,ENSMUSG00000099102,protein_coding -48869,Gm17087,ENSMUSG00000099104,protein_coding -42393,Gm17190,ENSMUSG00000099115,protein_coding -22555,Obox8,ENSMUSG00000099216,protein_coding -40332,Calm5,ENSMUSG00000099269,protein_coding -13706,Gm11214,ENSMUSG00000099294,protein_coding -54496,Ms4a14,ENSMUSG00000099398,protein_coding -44029,Gm6657,ENSMUSG00000099418,protein_coding -13935,Gm13279,ENSMUSG00000099420,protein_coding -7471,H2al1h,ENSMUSG00000099443,protein_coding -25380,Xndc1,ENSMUSG00000099481,protein_coding -5308,Olfr1228,ENSMUSG00000099486,protein_coding -40838,Hist1h3g,ENSMUSG00000099517,protein_coding -13933,Gm13275,ENSMUSG00000099518,protein_coding -52292,Gm29644,ENSMUSG00000099530,protein_coding -52316,Gm20869,ENSMUSG00000099531,protein_coding -51595,Gm21488,ENSMUSG00000099541,protein_coding -13930,Gm13276,ENSMUSG00000099545,protein_coding -52287,Gm28079,ENSMUSG00000099550,protein_coding -36502,Gm28729,ENSMUSG00000099564,protein_coding -23468,Scgb1b10,ENSMUSG00000099581,protein_coding -40850,Hist1h3d,ENSMUSG00000099583,protein_coding -40737,Vmn1r189,ENSMUSG00000099611,protein_coding -55218,2310034G01Rik,ENSMUSG00000099655,protein_coding -25493,Olfr623,ENSMUSG00000099687,protein_coding -23324,Zfp383,ENSMUSG00000099689,protein_coding -7710,Btbd35f21,ENSMUSG00000099711,protein_coding -23474,Scgb2b11,ENSMUSG00000099729,protein_coding -52480,Gm28897,ENSMUSG00000099740,protein_coding -15859,Gm21149,ENSMUSG00000099762,protein_coding -52348,Gm20903,ENSMUSG00000099782,protein_coding -40745,Vmn1r192,ENSMUSG00000099787,protein_coding -52431,Gm29564,ENSMUSG00000099792,protein_coding -5083,Olfr1036,ENSMUSG00000099820,protein_coding -52546,Gm21409,ENSMUSG00000099840,protein_coding -52602,Gm20906,ENSMUSG00000099856,protein_coding -51251,Gm29049,ENSMUSG00000099861,protein_coding -52487,Gm20911,ENSMUSG00000099894,protein_coding -23510,Scgb1b17,ENSMUSG00000099898,protein_coding -23528,Scgb2b21,ENSMUSG00000099900,protein_coding -47510,Gm28539,ENSMUSG00000099908,protein_coding -803,Gm28551,ENSMUSG00000099913,protein_coding -40804,Vmn1r215,ENSMUSG00000099917,protein_coding -52516,Gm28827,ENSMUSG00000099925,protein_coding -36367,Bcl2a1d,ENSMUSG00000099974,protein_coding -27088,Taar8c,ENSMUSG00000100004,protein_coding -5290,Olfr1215,ENSMUSG00000100016,protein_coding -52265,Gm20817,ENSMUSG00000100032,protein_coding -52009,Gm20929,ENSMUSG00000100045,protein_coding -51579,Gm20890,ENSMUSG00000100055,protein_coding -23517,Scgb2b18,ENSMUSG00000100058,protein_coding -13910,Ifnab,ENSMUSG00000100079,protein_coding -9077,Tgif2lx1,ENSMUSG00000100133,protein_coding -27087,Taar8b,ENSMUSG00000100186,protein_coding -1374,Krtap28-10,ENSMUSG00000100190,protein_coding -9076,Tgif2lx2,ENSMUSG00000100194,protein_coding -7436,H2al1m,ENSMUSG00000100200,protein_coding -40840,Hist1h3f,ENSMUSG00000100210,protein_coding -51341,Gm29554,ENSMUSG00000100231,protein_coding -13931,Gm13277,ENSMUSG00000100234,protein_coding -41944,Gm28557,ENSMUSG00000100235,protein_coding -52576,Gm20820,ENSMUSG00000100240,protein_coding -32537,Slc18a3,ENSMUSG00000100241,protein_coding -7251,Btbd35f23,ENSMUSG00000100249,protein_coding -25382,Trpc2,ENSMUSG00000100254,protein_coding -1897,Gm28363,ENSMUSG00000100265,protein_coding -40772,Vmn1r205,ENSMUSG00000100296,protein_coding -1901,Gm28360,ENSMUSG00000100305,protein_coding -51934,Gm28870,ENSMUSG00000100338,protein_coding -7440,H2al1k,ENSMUSG00000100448,protein_coding -51648,Gm29582,ENSMUSG00000100467,protein_coding -51968,Gm20920,ENSMUSG00000100485,protein_coding -33672,Gm4131,ENSMUSG00000100486,protein_coding -13920,Gm13283,ENSMUSG00000100505,protein_coding -23347,Ovol3,ENSMUSG00000100512,protein_coding -52320,Gm20870,ENSMUSG00000100535,protein_coding -13940,Ifna11,ENSMUSG00000100549,protein_coding -22564,Vmn1r90,ENSMUSG00000100586,protein_coding -52642,Gm21996,ENSMUSG00000100608,protein_coding -1905,Gm7145,ENSMUSG00000100617,protein_coding -54262,Gm28374,ENSMUSG00000100621,protein_coding -42312,Gm20379,ENSMUSG00000100622,protein_coding -7464,H2al1a,ENSMUSG00000100626,protein_coding -51199,Gm28171,ENSMUSG00000100634,protein_coding -4859,Gm28230,ENSMUSG00000100642,protein_coding -51244,Gm28576,ENSMUSG00000100645,protein_coding -52121,1700040F15Rik,ENSMUSG00000100667,protein_coding -801,Gm28778,ENSMUSG00000100679,protein_coding -27083,Taar7e,ENSMUSG00000100689,protein_coding -17142,Gm28434,ENSMUSG00000100704,protein_coding -52398,Gm21173,ENSMUSG00000100708,protein_coding -13939,Ifna7,ENSMUSG00000100713,protein_coding -52131,Gm21497,ENSMUSG00000100726,protein_coding -36388,Gm29094,ENSMUSG00000100838,protein_coding -1061,Gm28845,ENSMUSG00000100846,protein_coding -52160,Gm21627,ENSMUSG00000100856,protein_coding -51790,Gm20897,ENSMUSG00000100892,protein_coding -5244,Olfr1167,ENSMUSG00000100899,protein_coding -52384,Gm20850,ENSMUSG00000100902,protein_coding -24006,Lhb,ENSMUSG00000100916,protein_coding -37975,Olfr10,ENSMUSG00000100923,protein_coding -54213,1700020D05Rik,ENSMUSG00000100937,protein_coding -51232,Gm28998,ENSMUSG00000100939,protein_coding -27085,Taar7f,ENSMUSG00000100950,protein_coding -10904,Gm29133,ENSMUSG00000100962,protein_coding -6108,Gm28372,ENSMUSG00000100963,protein_coding -46622,Gm29666,ENSMUSG00000100967,protein_coding -51914,Gm28553,ENSMUSG00000100972,protein_coding -46155,Ly6g6g,ENSMUSG00000101026,protein_coding -54493,Ms4a12,ENSMUSG00000101031,protein_coding -51275,Gm20815,ENSMUSG00000101053,protein_coding -40760,Vmn1r200,ENSMUSG00000101073,protein_coding -5243,Olfr1166,ENSMUSG00000101078,protein_coding -46312,1700025B11Rik,ENSMUSG00000101113,protein_coding -52375,Gm20814,ENSMUSG00000101146,protein_coding -36683,Gm28305,ENSMUSG00000101152,protein_coding -52073,Sly,ENSMUSG00000101155,protein_coding -51769,Gm20905,ENSMUSG00000101157,protein_coding -51610,Gm20894,ENSMUSG00000101158,protein_coding -13932,Gm13278,ENSMUSG00000101163,protein_coding -4857,Hoxd4,ENSMUSG00000101174,protein_coding -23499,Scgb1b15,ENSMUSG00000101232,protein_coding -51219,Gm28919,ENSMUSG00000101243,protein_coding -13941,Ifna6,ENSMUSG00000101252,protein_coding -52501,Gm21317,ENSMUSG00000101286,protein_coding -7725,Btbd35f15,ENSMUSG00000101294,protein_coding -19762,Gm28308,ENSMUSG00000101298,protein_coding -1900,B020011L13Rik,ENSMUSG00000101303,protein_coding -48966,Gm7356,ENSMUSG00000101307,protein_coding -1377,Krtap28-13,ENSMUSG00000101315,protein_coding -714,Dnah7c,ENSMUSG00000101337,protein_coding -40688,Hist1h3h,ENSMUSG00000101355,protein_coding -48967,Gm7358,ENSMUSG00000101361,protein_coding -7268,Btbd35f16,ENSMUSG00000101381,protein_coding -54499,Ms4a4a,ENSMUSG00000101389,protein_coding -5292,Olfr1217,ENSMUSG00000101391,protein_coding -51311,Gm28510,ENSMUSG00000101396,protein_coding -52082,Gm28961,ENSMUSG00000101399,protein_coding -23518,Scgb1b18,ENSMUSG00000101401,protein_coding -1904,Gm28168,ENSMUSG00000101415,protein_coding -22902,Gm5157,ENSMUSG00000101436,protein_coding -51845,Gm29276,ENSMUSG00000101471,protein_coding -5295,Olfr1220,ENSMUSG00000101480,protein_coding -7717,Btbd35f12,ENSMUSG00000101500,protein_coding -23477,Scgb1b12,ENSMUSG00000101520,protein_coding -2573,Gm10031,ENSMUSG00000101523,protein_coding -52362,Gm29110,ENSMUSG00000101528,protein_coding -40773,Vmn1r206,ENSMUSG00000101578,protein_coding -39842,Ace3,ENSMUSG00000101605,protein_coding -23475,Scgb1b11,ENSMUSG00000101638,protein_coding -4995,Gm28635,ENSMUSG00000101645,protein_coding -51905,Gm20792,ENSMUSG00000101653,protein_coding -51141,Gm29289,ENSMUSG00000101667,protein_coding -16176,Gm29609,ENSMUSG00000101678,protein_coding -52293,Gm29423,ENSMUSG00000101725,protein_coding -37974,Olfr1392,ENSMUSG00000101750,protein_coding -52181,Gm20908,ENSMUSG00000101766,protein_coding -7609,Gm28269,ENSMUSG00000101818,protein_coding -7465,H2al1b,ENSMUSG00000101819,protein_coding -10657,Mindy4b-ps,ENSMUSG00000101860,protein_coding -37976,Olfr1391,ENSMUSG00000101874,protein_coding -45940,9130401M01Rik,ENSMUSG00000101892,protein_coding -2084,Gm29427,ENSMUSG00000101904,protein_coding -52612,Gm28102,ENSMUSG00000101915,protein_coding -5300,Olfr1225,ENSMUSG00000101918,protein_coding -51260,Gm20824,ENSMUSG00000101928,protein_coding -51756,Gm20835,ENSMUSG00000101933,protein_coding -48788,Ldhal6b,ENSMUSG00000101959,protein_coding -40699,Hist1h3i,ENSMUSG00000101972,protein_coding -46220,Iqank1,ENSMUSG00000102018,protein_coding -1910,Gm29106,ENSMUSG00000102030,protein_coding -36379,Bcl2a1a,ENSMUSG00000102037,protein_coding -52634,Gm21294,ENSMUSG00000102045,protein_coding -2184,Zbed6,ENSMUSG00000102049,protein_coding -51132,Gm4064,ENSMUSG00000102053,protein_coding -5081,Olfr1034,ENSMUSG00000102091,protein_coding -51943,Gm20883,ENSMUSG00000102122,protein_coding -31898,Gm3952,ENSMUSG00000102141,protein_coding -53164,Pcdha11,ENSMUSG00000102206,protein_coding -53220,Pcdhga10,ENSMUSG00000102222,protein_coding -24500,Snrpn,ENSMUSG00000102252,protein_coding -11303,2310046K23Rik,ENSMUSG00000102308,protein_coding -53149,Pcdha3,ENSMUSG00000102312,protein_coding -52275,Gm33815,ENSMUSG00000102388,protein_coding -527,4933424G06Rik,ENSMUSG00000102416,protein_coding -2868,Sh2d1b1,ENSMUSG00000102418,protein_coding -10817,Iqschfp,ENSMUSG00000102422,protein_coding -53224,Pcdhga12,ENSMUSG00000102428,protein_coding -11345,Flg,ENSMUSG00000102439,protein_coding -53217,Pcdhga9,ENSMUSG00000102440,protein_coding -53228,Pcdhgc5,ENSMUSG00000102543,protein_coding -24501,Snurf,ENSMUSG00000102627,protein_coding -51465,Gm29866,ENSMUSG00000102668,protein_coding -10994,Dchs2,ENSMUSG00000102692,protein_coding -53169,Pcdhac2,ENSMUSG00000102697,protein_coding -51418,Gm20772,ENSMUSG00000102739,protein_coding -53222,Pcdhga11,ENSMUSG00000102742,protein_coding -53208,Pcdhgb2,ENSMUSG00000102748,protein_coding -2869,Gm7694,ENSMUSG00000102752,protein_coding -10112,Naaladl2,ENSMUSG00000102758,protein_coding -19424,Mgam2-ps,ENSMUSG00000102802,protein_coding -11012,Gm37240,ENSMUSG00000102805,protein_coding -53156,Gm37013,ENSMUSG00000102836,protein_coding -53226,Pcdhgc3,ENSMUSG00000102918,protein_coding -2181,Zc3h11a,ENSMUSG00000102976,protein_coding -9900,Gm8797,ENSMUSG00000103034,protein_coding -53206,Pcdhgb1,ENSMUSG00000103037,protein_coding -11317,Gm38119,ENSMUSG00000103084,protein_coding -53218,Pcdhgb6,ENSMUSG00000103088,protein_coding -53153,Pcdha5,ENSMUSG00000103092,protein_coding -9959,Gm37389,ENSMUSG00000103124,protein_coding -53158,Gm37388,ENSMUSG00000103125,protein_coding -53202,Pcdhga1,ENSMUSG00000103144,protein_coding -11313,Lce1d,ENSMUSG00000103243,protein_coding -53168,Pcdhac1,ENSMUSG00000103255,protein_coding -53166,Pcdha12,ENSMUSG00000103310,protein_coding -53204,Pcdhga2,ENSMUSG00000103332,protein_coding -11368,Gm37596,ENSMUSG00000103350,protein_coding -11365,Tdpoz2,ENSMUSG00000103362,protein_coding -51448,Gm35134,ENSMUSG00000103371,protein_coding -9969,Gm38303,ENSMUSG00000103392,protein_coding -36799,Lsmem2,ENSMUSG00000103409,protein_coding -2170,Golt1a,ENSMUSG00000103421,protein_coding -53146,Pcdha1,ENSMUSG00000103442,protein_coding -53152,Gm42416,ENSMUSG00000103458,protein_coding -51356,Gm21440,ENSMUSG00000103468,protein_coding -53213,Pcdhga7,ENSMUSG00000103472,protein_coding -10723,Strit1,ENSMUSG00000103476,protein_coding -11306,2210017I01Rik,ENSMUSG00000103523,protein_coding -51483,Gm31571,ENSMUSG00000103528,protein_coding -53209,Pcdhga5,ENSMUSG00000103567,protein_coding -53214,Pcdhgb4,ENSMUSG00000103585,protein_coding -53207,Pcdhga4,ENSMUSG00000103677,protein_coding -53155,Pcdha6,ENSMUSG00000103707,protein_coding -2919,Tstd1,ENSMUSG00000103711,protein_coding -53216,Pcdhgb5,ENSMUSG00000103749,protein_coding -11105,Gm38392,ENSMUSG00000103766,protein_coding -53161,Pcdha9,ENSMUSG00000103770,protein_coding -53211,Pcdhga6,ENSMUSG00000103793,protein_coding -53160,Pcdha8,ENSMUSG00000103800,protein_coding -53215,Pcdhga8,ENSMUSG00000103897,protein_coding -46211,Tigd5,ENSMUSG00000103906,protein_coding -51497,Gm21366,ENSMUSG00000103919,protein_coding -11008,Gm6525,ENSMUSG00000104043,protein_coding -53221,Pcdhgb7,ENSMUSG00000104063,protein_coding -53148,Pcdha2,ENSMUSG00000104148,protein_coding -3057,Gm38100,ENSMUSG00000104158,protein_coding -51380,Gm30737,ENSMUSG00000104191,protein_coding -34,Gm37988,ENSMUSG00000104217,protein_coding -53150,Pcdha4,ENSMUSG00000104252,protein_coding -51349,Gm20807,ENSMUSG00000104267,protein_coding -10896,Wdr49,ENSMUSG00000104301,protein_coding -53157,Pcdha7,ENSMUSG00000104318,protein_coding -53205,Pcdhga3,ENSMUSG00000104346,protein_coding -1376,A030005K14Rik,ENSMUSG00000104423,protein_coding -11104,Rhbg,ENSMUSG00000104445,protein_coding -36928,Fbxw27,ENSMUSG00000104614,protein_coding -18658,Gm42421,ENSMUSG00000104633,protein_coding -17499,Gbp6,ENSMUSG00000104713,protein_coding -18724,Gm5565,ENSMUSG00000104752,protein_coding -16060,Gm21663,ENSMUSG00000104824,protein_coding -11765,Gm43064,ENSMUSG00000105053,protein_coding -17387,Vamp9,ENSMUSG00000105078,protein_coding -17498,Gbp10,ENSMUSG00000105096,protein_coding -12049,Gm43191,ENSMUSG00000105103,protein_coding -11156,Gm43738,ENSMUSG00000105204,protein_coding -18002,Gm42878,ENSMUSG00000105340,protein_coding -12493,Gbp5,ENSMUSG00000105504,protein_coding -11250,Gm42674,ENSMUSG00000105518,protein_coding -36908,Fbxw14,ENSMUSG00000105589,protein_coding -11476,4930558C23Rik,ENSMUSG00000105734,protein_coding -11541,Hist2h2bb,ENSMUSG00000105827,protein_coding -16757,Gm43552,ENSMUSG00000105835,protein_coding -17588,Gm42517,ENSMUSG00000105867,protein_coding -18109,Gm43518,ENSMUSG00000105875,protein_coding -7887,Tex13c2,ENSMUSG00000105993,protein_coding -18509,Gm43720,ENSMUSG00000106247,protein_coding -18576,Gm4869,ENSMUSG00000106350,protein_coding -15964,Lhfpl3,ENSMUSG00000106379,protein_coding -15861,Gm21190,ENSMUSG00000106445,protein_coding -11576,Gm42957,ENSMUSG00000106447,protein_coding -16063,Gm21680,ENSMUSG00000106627,protein_coding -17555,Gm42669,ENSMUSG00000106631,protein_coding -17118,Ugt2a1,ENSMUSG00000106677,protein_coding -26226,Tmem265,ENSMUSG00000106715,protein_coding -30556,Gm7697,ENSMUSG00000106824,protein_coding -16177,Gtf3c2,ENSMUSG00000106864,protein_coding -18795,Gm42791,ENSMUSG00000106892,protein_coding -16202,Mrpl33,ENSMUSG00000106918,protein_coding -47791,0610012G03Rik,ENSMUSG00000107002,protein_coding -18870,Gm42906,ENSMUSG00000107011,protein_coding -26223,Gm42715,ENSMUSG00000107023,protein_coding -26177,Gm42742,ENSMUSG00000107068,protein_coding -19397,Gm42420,ENSMUSG00000107071,protein_coding -4298,Gm34653,ENSMUSG00000107167,protein_coding -17119,Gm43638,ENSMUSG00000107180,protein_coding -47253,Gm5767,ENSMUSG00000107252,protein_coding -30553,Gm9495,ENSMUSG00000107280,protein_coding -16175,Mpv17,ENSMUSG00000107283,protein_coding -17281,Gm7792,ENSMUSG00000107392,protein_coding -37985,Olfr1383,ENSMUSG00000107417,protein_coding -21357,Gm45234,ENSMUSG00000107478,protein_coding -23819,Etfbl,ENSMUSG00000107482,protein_coding -20481,Ccdc142,ENSMUSG00000107499,protein_coding -37236,Gm35549,ENSMUSG00000107504,protein_coding -20607,Gm44790,ENSMUSG00000107539,protein_coding -37979,Olfr1389,ENSMUSG00000107573,protein_coding -19664,Gm45021,ENSMUSG00000107588,protein_coding -37988,Olfr1380,ENSMUSG00000107645,protein_coding -29277,Olfr764-ps1,ENSMUSG00000107648,protein_coding -29286,Olfr772,ENSMUSG00000107662,protein_coding -38249,Olfr316,ENSMUSG00000107677,protein_coding -18986,Gm45062,ENSMUSG00000107705,protein_coding -38244,Olfr320,ENSMUSG00000107711,protein_coding -29279,Olfr766-ps1,ENSMUSG00000107748,protein_coding -19666,Gm44965,ENSMUSG00000107789,protein_coding -28069,Olfr1358,ENSMUSG00000107822,protein_coding -21512,Gm44511,ENSMUSG00000107872,protein_coding -21652,Prpmp5,ENSMUSG00000107874,protein_coding -38702,Gm43951,ENSMUSG00000107877,protein_coding -21139,Gm5580,ENSMUSG00000107906,protein_coding -20662,Gm45140,ENSMUSG00000107928,protein_coding -21487,Gm44596,ENSMUSG00000108011,protein_coding -21246,Gm7298,ENSMUSG00000108022,protein_coding -29316,Olfr800,ENSMUSG00000108114,protein_coding -14451,Gm12845,ENSMUSG00000108127,protein_coding -37982,Olfr1386,ENSMUSG00000108167,protein_coding -38247,Olfr318,ENSMUSG00000108265,protein_coding -22697,Gm42372,ENSMUSG00000108348,protein_coding -23086,Gm4881,ENSMUSG00000108367,protein_coding -14955,Gm30191,ENSMUSG00000108398,protein_coding -41828,Olfr465-ps1,ENSMUSG00000108426,protein_coding -40683,Olfr1360,ENSMUSG00000108534,protein_coding -26269,Gm49368,ENSMUSG00000108596,protein_coding -23896,Gm36864,ENSMUSG00000108622,protein_coding -40684,Olfr1359,ENSMUSG00000108674,protein_coding -46857,OR5BS1P,ENSMUSG00000108728,protein_coding -46844,Gm44579,ENSMUSG00000108748,protein_coding -47958,Gm36028,ENSMUSG00000108763,protein_coding -24603,Gm45213,ENSMUSG00000108793,protein_coding -26242,Gm49388,ENSMUSG00000108815,protein_coding -5706,Olfr1310,ENSMUSG00000108827,protein_coding -32559,Frmpd2,ENSMUSG00000108841,protein_coding -54547,Olfr1555-ps1,ENSMUSG00000108889,protein_coding -5679,Olfr1291-ps1,ENSMUSG00000108891,protein_coding -30657,Ccdc194,ENSMUSG00000108900,protein_coding -5681,Olfr1293-ps,ENSMUSG00000108908,protein_coding -5684,Olfr1295,ENSMUSG00000108919,protein_coding -55060,Cc2d2b,ENSMUSG00000108929,protein_coding -5672,Olfr1284,ENSMUSG00000108931,protein_coding -25671,Olfr695,ENSMUSG00000108948,protein_coding -7238,Gm29797,ENSMUSG00000108976,protein_coding -9751,Gm15262,ENSMUSG00000108981,protein_coding -25740,Olfr485,ENSMUSG00000108995,protein_coding -54550,Olfr1432,ENSMUSG00000109022,protein_coding -5711,Olfr1315-ps1,ENSMUSG00000109033,protein_coding -25682,Olfr705,ENSMUSG00000109058,protein_coding -22877,Gm4513,ENSMUSG00000109060,protein_coding -29423,Gm49320,ENSMUSG00000109061,protein_coding -22149,Gm44973,ENSMUSG00000109129,protein_coding -9002,Gm45194,ENSMUSG00000109156,protein_coding -22864,Gm5890,ENSMUSG00000109166,protein_coding -46249,Gm35339,ENSMUSG00000109179,protein_coding -50110,Olfr126,ENSMUSG00000109212,protein_coding -5688,Olfr1299,ENSMUSG00000109219,protein_coding -42151,Fam81b,ENSMUSG00000109228,protein_coding -53021,Gm35060,ENSMUSG00000109238,protein_coding -11187,Dcst2,ENSMUSG00000109293,protein_coding -26732,1810010D01Rik,ENSMUSG00000109305,protein_coding -5671,Olfr1283,ENSMUSG00000109322,protein_coding -23956,Prmt1,ENSMUSG00000109324,protein_coding -54560,Olfr1438-ps1,ENSMUSG00000109328,protein_coding -23248,Samd4b,ENSMUSG00000109336,protein_coding -24550,A26c2,ENSMUSG00000109344,protein_coding -22825,Gm44805,ENSMUSG00000109350,protein_coding -25683,Olfr706,ENSMUSG00000109354,protein_coding -8329,Gm45015,ENSMUSG00000109368,protein_coding -30096,Gm19410,ENSMUSG00000109372,protein_coding -50118,Olfr761,ENSMUSG00000109376,protein_coding -23380,Gm49396,ENSMUSG00000109378,protein_coding -26012,Gm5737,ENSMUSG00000109392,protein_coding -22964,Gm4565,ENSMUSG00000109396,protein_coding -22206,Gm3854,ENSMUSG00000109398,protein_coding -5707,Olfr1311,ENSMUSG00000109403,protein_coding -33890,Gm9195,ENSMUSG00000109446,protein_coding -5668,Olfr1280,ENSMUSG00000109449,protein_coding -43934,Gm4756,ENSMUSG00000109482,protein_coding -5687,Olfr1298,ENSMUSG00000109487,protein_coding -7355,Gm45208,ENSMUSG00000109493,protein_coding -25750,Olfr492,ENSMUSG00000109497,protein_coding -518,Gm42417,ENSMUSG00000109510,protein_coding -23942,Nup62,ENSMUSG00000109511,protein_coding -22913,Gm6882,ENSMUSG00000109516,protein_coding -54641,Olfr1493-ps1,ENSMUSG00000109520,protein_coding -30594,Gdf1,ENSMUSG00000109523,protein_coding -5704,Olfr1309,ENSMUSG00000109528,protein_coding -25720,Olfr470,ENSMUSG00000109542,protein_coding -5683,Olfr1294,ENSMUSG00000109547,protein_coding -42409,Ankrd31,ENSMUSG00000109561,protein_coding -34661,Muc16,ENSMUSG00000109564,protein_coding -16287,Cfap99,ENSMUSG00000109572,protein_coding -48180,Lnp1,ENSMUSG00000109588,protein_coding -25752,Olfr494,ENSMUSG00000109631,protein_coding -32090,Gm45521,ENSMUSG00000109649,protein_coding -26621,Gm29735,ENSMUSG00000109655,protein_coding -25475,Olfr605,ENSMUSG00000109659,protein_coding -18512,Pvrig,ENSMUSG00000109713,protein_coding -30482,Trim61,ENSMUSG00000109718,protein_coding -38585,Gm39566,ENSMUSG00000109737,protein_coding -50548,Gm17949,ENSMUSG00000109739,protein_coding -8830,1700018G05Rik,ENSMUSG00000109745,protein_coding -30215,Klkb1,ENSMUSG00000109764,protein_coding -17644,Gm35315,ENSMUSG00000109771,protein_coding -5712,Olfr1316,ENSMUSG00000109801,protein_coding -5667,Olfr1279,ENSMUSG00000109813,protein_coding -25527,Olfr643,ENSMUSG00000109824,protein_coding -38644,Gm40193,ENSMUSG00000109833,protein_coding -33038,Olfr730,ENSMUSG00000109835,protein_coding -29942,Gm45692,ENSMUSG00000109850,protein_coding -8691,Pfn5,ENSMUSG00000109858,protein_coding -26617,Gm45618,ENSMUSG00000109859,protein_coding -28323,Eid3,ENSMUSG00000109864,protein_coding -3460,Hspa14,ENSMUSG00000109865,protein_coding -25747,Olfr490,ENSMUSG00000109884,protein_coding -53718,Chmp1b,ENSMUSG00000109901,protein_coding -15883,Gm28710,ENSMUSG00000109903,protein_coding -24013,Gm45808,ENSMUSG00000109926,protein_coding -31515,Exosc6,ENSMUSG00000109941,protein_coding -4068,Brd3os,ENSMUSG00000109946,protein_coding -25470,Olfr601,ENSMUSG00000109951,protein_coding -22414,Zscan4-ps3,ENSMUSG00000109969,protein_coding -25422,Olfr560,ENSMUSG00000110008,protein_coding -25533,Olfr644,ENSMUSG00000110012,protein_coding -26639,Gm49369,ENSMUSG00000110040,protein_coding -26619,Gm45337,ENSMUSG00000110061,protein_coding -41587,Gm45623,ENSMUSG00000110086,protein_coding -26618,Gm39115,ENSMUSG00000110091,protein_coding -1330,Gm45261,ENSMUSG00000110100,protein_coding -22377,Zscan4-ps1,ENSMUSG00000110103,protein_coding -26552,Gm45717,ENSMUSG00000110104,protein_coding -22249,Gm45844,ENSMUSG00000110105,protein_coding -26537,Gm45785,ENSMUSG00000110136,protein_coding -22257,Gm45783,ENSMUSG00000110139,protein_coding -40101,St6galnac2,ENSMUSG00000110170,protein_coding -25739,Olfr484,ENSMUSG00000110171,protein_coding -53120,Igip,ENSMUSG00000110185,protein_coding -22408,Zscan4-ps2,ENSMUSG00000110190,protein_coding -25355,Pde2a,ENSMUSG00000110195,protein_coding -23980,Flt3l,ENSMUSG00000110206,protein_coding -50524,Gm20219,ENSMUSG00000110218,protein_coding -22138,Gm36210,ENSMUSG00000110221,protein_coding -25639,Gm45799,ENSMUSG00000110234,protein_coding -25754,Olfr495,ENSMUSG00000110253,protein_coding -25516,Olfr65,ENSMUSG00000110259,protein_coding -35569,Gm32742,ENSMUSG00000110266,protein_coding -54054,Gm45871,ENSMUSG00000110277,protein_coding -47687,Gm37419,ENSMUSG00000110291,protein_coding -26629,Gm40460,ENSMUSG00000110324,protein_coding -31063,Gm19935,ENSMUSG00000110332,protein_coding -29988,Gm45861,ENSMUSG00000110333,protein_coding -39263,Gm45716,ENSMUSG00000110344,protein_coding -28617,Gm33543,ENSMUSG00000110353,protein_coding -48457,A730009L09Rik,ENSMUSG00000110358,protein_coding -15620,Rnf223,ENSMUSG00000110404,protein_coding -31293,Cmtm1,ENSMUSG00000110430,protein_coding -13633,Mup22,ENSMUSG00000110439,protein_coding -30757,Gm10358,ENSMUSG00000110469,protein_coding -41575,Gm3045,ENSMUSG00000110477,protein_coding -34677,Olfr839-ps1,ENSMUSG00000110519,protein_coding -48098,Gm5485,ENSMUSG00000110573,protein_coding -53603,Gm36368,ENSMUSG00000110576,protein_coding -29733,Gm31371,ENSMUSG00000110591,protein_coding -34675,Olfr837,ENSMUSG00000110621,protein_coding -30616,Iqcn,ENSMUSG00000110622,protein_coding -53341,Gm37797,ENSMUSG00000110628,protein_coding -29705,Gm45826,ENSMUSG00000110641,protein_coding -27169,Gm49353,ENSMUSG00000110723,protein_coding -5135,Olfr1084,ENSMUSG00000110804,protein_coding -5331,Olfr1252,ENSMUSG00000110819,protein_coding -5131,Olfr1080,ENSMUSG00000110912,protein_coding -14495,Olfr1333,ENSMUSG00000110947,protein_coding -54098,Nudt8,ENSMUSG00000110949,protein_coding -13332,Olfr156,ENSMUSG00000110970,protein_coding -4359,Olfr50,ENSMUSG00000111021,protein_coding -14492,Olfr1337,ENSMUSG00000111159,protein_coding -5319,Olfr1240,ENSMUSG00000111174,protein_coding -5039,Olfr996,ENSMUSG00000111179,protein_coding -30957,Gm49661,ENSMUSG00000111184,protein_coding -5327,Olfr1248,ENSMUSG00000111239,protein_coding -14500,Olfr1328,ENSMUSG00000111259,protein_coding -29304,Olfr789,ENSMUSG00000111273,protein_coding -5117,Olfr1065,ENSMUSG00000111306,protein_coding -17553,Btbd8,ENSMUSG00000111375,protein_coding -35352,Gm49380,ENSMUSG00000111409,protein_coding -14385,Gm49337,ENSMUSG00000111410,protein_coding -35152,Olfr912,ENSMUSG00000111448,protein_coding -5041,Olfr998,ENSMUSG00000111454,protein_coding -5324,Olfr1245,ENSMUSG00000111456,protein_coding -34769,Gm38431,ENSMUSG00000111497,protein_coding -5317,Olfr1238,ENSMUSG00000111517,protein_coding -5330,Olfr1251,ENSMUSG00000111567,protein_coding -5271,Olfr1192-ps1,ENSMUSG00000111590,protein_coding -13333,Olfr157,ENSMUSG00000111611,protein_coding -5133,Olfr1082,ENSMUSG00000111689,protein_coding -34768,Gm49373,ENSMUSG00000111692,protein_coding -36173,Gm3776,ENSMUSG00000111709,protein_coding -5140,Olfr1089,ENSMUSG00000111711,protein_coding -5325,Olfr1246,ENSMUSG00000111715,protein_coding -29300,Olfr785,ENSMUSG00000111732,protein_coding -5207,Olfr1134,ENSMUSG00000111747,protein_coding -5122,Olfr228,ENSMUSG00000111772,protein_coding -35134,Olfr896-ps1,ENSMUSG00000111814,protein_coding -34821,Gm49318,ENSMUSG00000111842,protein_coding -4356,Olfr347,ENSMUSG00000111863,protein_coding -4350,Olfr342,ENSMUSG00000111869,protein_coding -28034,Gm3285,ENSMUSG00000111915,protein_coding -27539,Lilr4b,ENSMUSG00000112023,protein_coding -28983,Gm9045,ENSMUSG00000112027,protein_coding -28815,Gm5136,ENSMUSG00000112039,protein_coding -27768,Pbld1,ENSMUSG00000112129,protein_coding -27541,Lilrb4a,ENSMUSG00000112148,protein_coding -28298,BC024063,ENSMUSG00000112160,protein_coding -28010,Gm9508,ENSMUSG00000112170,protein_coding -43336,Gm6993,ENSMUSG00000112187,protein_coding -28975,Gm32717,ENSMUSG00000112216,protein_coding -28031,Krtap10-10,ENSMUSG00000112223,protein_coding -28003,Gm49325,ENSMUSG00000112241,protein_coding -28978,Gm32802,ENSMUSG00000112252,protein_coding -28020,Gm9736,ENSMUSG00000112380,protein_coding -28982,Gm9044,ENSMUSG00000112419,protein_coding -43646,Srp54b,ENSMUSG00000112449,protein_coding -28985,Gm9048,ENSMUSG00000112495,protein_coding -28029,Gm49918,ENSMUSG00000112600,protein_coding -28871,Glipr1l3,ENSMUSG00000112611,protein_coding -28285,Gm32687,ENSMUSG00000112640,protein_coding -28032,Gm36176,ENSMUSG00000112653,protein_coding -28426,Gm49358,ENSMUSG00000112743,protein_coding -28984,Gm9046,ENSMUSG00000112781,protein_coding -28981,Gm9040,ENSMUSG00000112814,protein_coding -28702,Gm4305,ENSMUSG00000112856,protein_coding -33428,Rnf212b,ENSMUSG00000112858,protein_coding -28018,Gm18596,ENSMUSG00000112864,protein_coding -28986,Gm9049,ENSMUSG00000112919,protein_coding -38747,Spata22,ENSMUSG00000112920,protein_coding -28701,Gm4303,ENSMUSG00000112931,protein_coding -40965,Gm11361,ENSMUSG00000113061,protein_coding -1367,A030003K21Rik,ENSMUSG00000113084,protein_coding -1370,Gm47959,ENSMUSG00000113097,protein_coding -43844,Gm49383,ENSMUSG00000113149,protein_coding -44303,Gm21936,ENSMUSG00000113201,protein_coding -41397,Tes3-ps,ENSMUSG00000113255,protein_coding -28249,Gm48551,ENSMUSG00000113262,protein_coding -1375,Gm47969,ENSMUSG00000113267,protein_coding -16359,Jakmip1,ENSMUSG00000113373,protein_coding -41736,Gm49359,ENSMUSG00000113450,protein_coding -44164,Gm49366,ENSMUSG00000113475,protein_coding -44295,Gm16381,ENSMUSG00000113489,protein_coding -46989,Gm47841,ENSMUSG00000113558,protein_coding -28188,Adat3,ENSMUSG00000113640,protein_coding -43847,Gm49384,ENSMUSG00000113786,protein_coding -44284,Gm5788,ENSMUSG00000113805,protein_coding -1366,Gm6217,ENSMUSG00000113846,protein_coding -1364,A030005L19Rik,ENSMUSG00000113880,protein_coding -1368,Gm7544,ENSMUSG00000113925,protein_coding -28187,Scamp4,ENSMUSG00000113949,protein_coding -44309,Gm2001,ENSMUSG00000113951,protein_coding -44314,Gm2022,ENSMUSG00000113971,protein_coding -1365,A030014E15Rik,ENSMUSG00000113973,protein_coding -28251,Gm48552,ENSMUSG00000114004,protein_coding -1369,Gm47955,ENSMUSG00000114011,protein_coding -43962,Snapc1l,ENSMUSG00000114046,protein_coding -41408,Gm30302,ENSMUSG00000114073,protein_coding -44336,Gm16368,ENSMUSG00000114075,protein_coding -42392,Gm20075,ENSMUSG00000114133,protein_coding -2458,Gm47985,ENSMUSG00000114212,protein_coding -49337,Gm38655,ENSMUSG00000114245,protein_coding -18094,Gm49027,ENSMUSG00000114278,protein_coding -40691,Hist1h2bm,ENSMUSG00000114279,protein_coding -1357,Gm47791,ENSMUSG00000114299,protein_coding -32135,Gm49355,ENSMUSG00000114378,protein_coding -41677,Gm49391,ENSMUSG00000114432,protein_coding -40839,Hist1h2bh,ENSMUSG00000114456,protein_coding -42725,Gm49395,ENSMUSG00000114470,protein_coding -41691,Gm49398,ENSMUSG00000114473,protein_coding -41676,Gm49393,ENSMUSG00000114487,protein_coding -26156,Aldoa,ENSMUSG00000114515,protein_coding -41434,Gm47429,ENSMUSG00000114559,protein_coding -2461,3110040M04Rik,ENSMUSG00000114582,protein_coding -2462,Gm47996,ENSMUSG00000114591,protein_coding -41675,Gm49392,ENSMUSG00000114635,protein_coding -2460,Gm47995,ENSMUSG00000114694,protein_coding -46382,Galr3,ENSMUSG00000114755,protein_coding -41648,Gm49354,ENSMUSG00000114763,protein_coding -33621,Gm49336,ENSMUSG00000114797,protein_coding -32693,Gm47189,ENSMUSG00000114852,protein_coding -33424,Gm29776,ENSMUSG00000114865,protein_coding -41701,Gm49352,ENSMUSG00000114904,protein_coding -41979,Gm49345,ENSMUSG00000114923,protein_coding -33576,Gm49361,ENSMUSG00000114942,protein_coding -2459,Gm8947,ENSMUSG00000114943,protein_coding -19880,Vmn1r14,ENSMUSG00000114982,protein_coding -3944,AL732309.1,ENSMUSG00000115018,protein_coding -40811,Vmn1r218,ENSMUSG00000115020,protein_coding -23013,Vmn1r173,ENSMUSG00000115021,protein_coding -32509,Gm49387,ENSMUSG00000115022,protein_coding -22462,Vmn1r81,ENSMUSG00000115027,protein_coding -31401,Dpep2,ENSMUSG00000115067,protein_coding -12763,Vmn1r2,ENSMUSG00000115072,protein_coding -3946,Ndor1,ENSMUSG00000115074,protein_coding -19905,Vmn1r22,ENSMUSG00000115091,protein_coding -49750,Smim40-ps,ENSMUSG00000115113,protein_coding -14543,AL607142.1,ENSMUSG00000115115,protein_coding -20036,Vmn1r38,ENSMUSG00000115170,protein_coding -19875,Vmn1r10,ENSMUSG00000115181,protein_coding -19881,Vmn1r15,ENSMUSG00000115199,protein_coding -47579,Eef1akmt4,ENSMUSG00000115219,protein_coding -33483,Gm49378,ENSMUSG00000115232,protein_coding -19876,Vmn1r11,ENSMUSG00000115236,protein_coding -19888,Vmn1r20,ENSMUSG00000115253,protein_coding -22432,Vmn1r76,ENSMUSG00000115267,protein_coding -47580,Gm49333,ENSMUSG00000115293,protein_coding -26655,Gm49394,ENSMUSG00000115302,protein_coding -33076,Pnp,ENSMUSG00000115338,protein_coding -19901,Vmn1r21,ENSMUSG00000115343,protein_coding -1024,Gm39653,ENSMUSG00000115378,protein_coding -46228,Eppk1,ENSMUSG00000115388,protein_coding -19932,Vmn1r31,ENSMUSG00000115404,protein_coding -6228,AL731706.1,ENSMUSG00000115423,protein_coding -45962,Gm49356,ENSMUSG00000115463,protein_coding -12764,Vmn1r3,ENSMUSG00000115466,protein_coding -20032,Vmn1r37,ENSMUSG00000115467,protein_coding -20031,Vmn1r36,ENSMUSG00000115482,protein_coding -32763,Gm9732,ENSMUSG00000115483,protein_coding -19908,Vmn1r24,ENSMUSG00000115507,protein_coding -19885,Vmn1r17,ENSMUSG00000115644,protein_coding -19909,Vmn1r25,ENSMUSG00000115668,protein_coding -40805,Vmn1r216,ENSMUSG00000115697,protein_coding -19871,Vmn1r6,ENSMUSG00000115701,protein_coding -19923,Vmn1r28,ENSMUSG00000115705,protein_coding -32568,Gm30083,ENSMUSG00000115726,protein_coding -22456,Vmn1r80,ENSMUSG00000115744,protein_coding -31404,Dpep2nb,ENSMUSG00000115768,protein_coding -40809,Vmn1r217,ENSMUSG00000115791,protein_coding -19884,Vmn1r16,ENSMUSG00000115792,protein_coding -46447,AC140267.1,ENSMUSG00000115798,protein_coding -19887,Vmn1r19,ENSMUSG00000115799,protein_coding -2172,Gm28040,ENSMUSG00000115958,protein_coding -46268,Vps28,ENSMUSG00000115987,protein_coding -42638,Gm49496,ENSMUSG00000116016,protein_coding -46284,Gm49527,ENSMUSG00000116024,protein_coding -1679,Sept2,ENSMUSG00000116048,protein_coding -40806,Vmn1r216,ENSMUSG00000116057,protein_coding -46368,Gm49510,ENSMUSG00000116069,protein_coding -46314,Gm36245,ENSMUSG00000116079,protein_coding -46394,Gm49486,ENSMUSG00000116121,protein_coding -46279,C030006K11Rik,ENSMUSG00000116138,protein_coding -2173,Kiss1,ENSMUSG00000116158,protein_coding -46371,Pdxp,ENSMUSG00000116165,protein_coding -54400,Gm49403,ENSMUSG00000116166,protein_coding -25005,Olfr290,ENSMUSG00000116179,protein_coding -22152,4632433K11Rik,ENSMUSG00000116184,protein_coding -42770,Nnt,ENSMUSG00000116207,protein_coding -2182,Zc3h11a,ENSMUSG00000116275,protein_coding -47017,Gm49425,ENSMUSG00000116336,protein_coding -54404,Gm49416,ENSMUSG00000116347,protein_coding -46890,Gm49450,ENSMUSG00000116358,protein_coding -46260,Tmem249,ENSMUSG00000116376,protein_coding -46381,Gcat,ENSMUSG00000116378,protein_coding -29156,Ddit3,ENSMUSG00000116429,protein_coding -46716,Gm44502,ENSMUSG00000116461,protein_coding -48063,Gm17783,ENSMUSG00000116542,protein_coding -6229,AL731706.2,ENSMUSG00000116563,protein_coding -49031,Riok2,ENSMUSG00000116564,protein_coding -47503,Gm49601,ENSMUSG00000116594,protein_coding -48526,Gm20741,ENSMUSG00000116636,protein_coding -47462,B830017H08Rik,ENSMUSG00000116652,protein_coding -48840,Gm49630,ENSMUSG00000116672,protein_coding -48753,A630089N07Rik,ENSMUSG00000116673,protein_coding -48984,Gm3417,ENSMUSG00000116780,protein_coding -48833,Gm49721,ENSMUSG00000116876,protein_coding -48987,Gm3435,ENSMUSG00000116895,protein_coding -47484,Gm49776,ENSMUSG00000116925,protein_coding -48635,Gm49711,ENSMUSG00000116930,protein_coding -48623,Atp5o,ENSMUSG00000116933,protein_coding -48985,9030025P20Rik,ENSMUSG00000116953,protein_coding -48852,Gm49673,ENSMUSG00000116988,protein_coding -48728,Gm49948,ENSMUSG00000117081,protein_coding -394,Gm5698,ENSMUSG00000117091,protein_coding -50723,Gm49909,ENSMUSG00000117098,protein_coding -50177,Gm49835,ENSMUSG00000117136,protein_coding -49540,Gm4356,ENSMUSG00000117145,protein_coding -49112,Vmn1r229,ENSMUSG00000117148,protein_coding -50198,Esp5,ENSMUSG00000117190,protein_coding -49176,Gm7072,ENSMUSG00000117284,protein_coding -16357,Gm1043,ENSMUSG00000117286,protein_coding -392,Ptp4a1,ENSMUSG00000117310,protein_coding -50157,Esp22,ENSMUSG00000117311,protein_coding -49488,Gm49804,ENSMUSG00000117338,protein_coding -49273,Ntn3,ENSMUSG00000117406,protein_coding -23059,Gm50092,ENSMUSG00000117477,protein_coding -53399,Hspe1-rs1,ENSMUSG00000117621,protein_coding -54266,AC131692.1,ENSMUSG00000117666,protein_coding -53131,Apbb3,ENSMUSG00000117679,protein_coding -53087,Gm28285,ENSMUSG00000117694,protein_coding -53844,Hdhd2,ENSMUSG00000117732,protein_coding -22175,Vmn1r59,ENSMUSG00000117744,protein_coding -31426,Chtf8,ENSMUSG00000117748,protein_coding -54001,C18orf63,ENSMUSG00000117781,protein_coding -54169,AC124502.3,ENSMUSG00000117789,protein_coding -806,Asdurf,ENSMUSG00000117809,protein_coding -49453,Ggnbp1,ENSMUSG00000117819,protein_coding -22514,Vmn1r88,ENSMUSG00000117853,protein_coding -22179,Vmn1r61,ENSMUSG00000117863,protein_coding -54127,AC109138.7,ENSMUSG00000117873,protein_coding -53098,AC121821.1,ENSMUSG00000117874,protein_coding -54384,Tmem223,ENSMUSG00000117924,protein_coding -53128,AC115631.1,ENSMUSG00000117942,protein_coding -27973,AC141477.1,ENSMUSG00000117965,protein_coding -55266,Itprip,ENSMUSG00000117975,protein_coding -54393,AC129217.1,ENSMUSG00000118124,protein_coding -54230,Sssca1,ENSMUSG00000118125,protein_coding -49381,AC134908.4,ENSMUSG00000118135,protein_coding -53260,Arhgap26,ENSMUSG00000118193,protein_coding -22164,Vmn1r55,ENSMUSG00000118215,protein_coding -2140,AC124108.1,ENSMUSG00000118219,protein_coding -481,Arhgef4,ENSMUSG00000118272,protein_coding -22177,Vmn1r60,ENSMUSG00000118298,protein_coding -23029,Vmn1r183,ENSMUSG00000118318,protein_coding -18647,Fam220a,ENSMUSG00000118332,protein_coding -54385,Tmem179b,ENSMUSG00000118346,protein_coding -22184,Vmn1r64,ENSMUSG00000118388,protein_coding -36751,IQCF2,ENSMUSG00000118396,protein_coding -2641,Gpr52,ENSMUSG00000118401,protein_coding -7705,CT868697.1,ENSMUSG00000118402,protein_coding -7711,FQ976806.1,ENSMUSG00000118405,protein_coding -53452,Ancv1r,ENSMUSG00000118407,protein_coding -7713,Btbd35f5,ENSMUSG00000118409,protein_coding -9116,BX088531.1,ENSMUSG00000118415,protein_coding -7734,CU019598.1,ENSMUSG00000118429,protein_coding -7703,Btbd35f25,ENSMUSG00000118431,protein_coding -7736,Btbd35f8,ENSMUSG00000118438,protein_coding -7697,Btbd35f26,ENSMUSG00000118441,protein_coding -23600,AC150683.1,ENSMUSG00000118454,protein_coding -7708,Btbd35f1,ENSMUSG00000118459,protein_coding -24005,AC151602.1,ENSMUSG00000118462,protein_coding -7700,CT867961.1,ENSMUSG00000118465,protein_coding -50333,AC112683.2,ENSMUSG00000118471,protein_coding -9127,BX571804.1,ENSMUSG00000118483,protein_coding diff --git a/sfaira/versions/genome_versions/mouse/__init__.py b/sfaira/versions/genome_versions/mouse/__init__.py deleted file mode 100644 index 90be3cbe7..000000000 --- a/sfaira/versions/genome_versions/mouse/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .genome_sizes import GENOME_SIZE_DICT -from .genome_container import GenomeContainer diff --git a/sfaira/versions/genome_versions/mouse/genome_container.py b/sfaira/versions/genome_versions/mouse/genome_container.py deleted file mode 100644 index 13c341234..000000000 --- a/sfaira/versions/genome_versions/mouse/genome_container.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import pandas - -from .genome_sizes import GENOME_SIZE_DICT - - -class GenomeContainer: - available_genomes = ["Mus_musculus_GRCm38_97"] - - def __init__(self): - self.genomes = { - "Mus_musculus_GRCm38_97": "Mus_musculus_GRCm38_97.csv" - } - self.genome_sizes = { - "Mus_musculus_GRCm38_97": GENOME_SIZE_DICT["Mus_musculus_GRCm38_97"] - } - - def read_local_csv(self, genome): - return pandas.read_csv(os.path.join(str(os.path.dirname(__file__)), self.genomes[genome])) diff --git a/sfaira/versions/genome_versions/mouse/genome_sizes.py b/sfaira/versions/genome_versions/mouse/genome_sizes.py deleted file mode 100644 index 63cf95ff0..000000000 --- a/sfaira/versions/genome_versions/mouse/genome_sizes.py +++ /dev/null @@ -1,3 +0,0 @@ -GENOME_SIZE_DICT = { - "Mus_musculus_GRCm38_97": (21900, ) -} diff --git a/sfaira/versions/genomes.py b/sfaira/versions/genomes.py new file mode 100644 index 000000000..872fd0063 --- /dev/null +++ b/sfaira/versions/genomes.py @@ -0,0 +1,139 @@ +""" +Functionalities to interact with gene sets defined in an assembly and gene-annotation (such as protein-coding). +""" + +import gzip +import os +from typing import Union +import pandas +import pathlib +import urllib.request + +KEY_SYMBOL = "gene_name" +KEY_ID = "gene_id" +KEY_TYPE = "gene_biotype" +VALUE_GTF_GENE = "gene" +KEY_GTF_REGION_TYPE = 2 +KEY_GTF_REGION_DETAIL_FIELD = 8 +IDX_GTF_REGION_DETAIL_FIELD_ID = 0 +IDX_GTF_REGION_DETAIL_FIELD_SYMBOL = 2 +IDX_GTF_REGION_DETAIL_FIELD_TYPE = 4 + + +class GtfInterface: + + def __init__(self, assembly: str): + self.assembly = assembly + + @property + def cache_dir(self): + """ + The cache dir is in a cache directory in the sfaira installation that is excempt from git versioning. + """ + cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "cache", "genomes") + cache_dir_path = pathlib.Path(cache_dir) + cache_dir_path.mkdir(parents=True, exist_ok=True) + return cache_dir + + @property + def cache_fn(self): + return os.path.join(self.cache_dir, self.assembly + ".csv") + + @property + def release(self) -> str: + return self.assembly.split(".")[-1] + + @property + def organism(self): + return self.assembly.split(".")[0].lower() + + @property + def url_ensembl_ftp(self): + return f"ftp://ftp.ensembl.org/pub/release-{self.release}/gtf/{self.organism}/{self.assembly}.gtf.gz" + + def download_gtf_ensembl(self): + """ + Download .gtf file from ensembl FTP server and turn into reduced, gene-centric cache .csv. + """ + temp_file = os.path.join(self.cache_dir, self.assembly + ".gtf.gz") + print(f"downloading {self.url_ensembl_ftp} into a temporary file {temp_file}") + _ = urllib.request.urlretrieve(url=self.url_ensembl_ftp, filename=temp_file) + with gzip.open(temp_file) as f: + tab = pandas.read_csv(f, sep="\t", comment="#", header=None) + os.remove(temp_file) # Delete temporary file .gtf.gz. + tab = tab.loc[tab[KEY_GTF_REGION_TYPE].values == VALUE_GTF_GENE, :] + conversion_tab = pandas.DataFrame({ + "gene_id": [ + x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_ID].split(" ")[-1].strip("\"") + for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], + "gene_name": [ + x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_SYMBOL].split(" ")[-1].strip("\"") + for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], + "gene_biotype": [ + x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_TYPE].split(" ")[-1].strip("\"") + for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], + }).sort_values("gene_id") + conversion_tab.to_csv(self.cache_fn) + + @property + def cache(self) -> pandas.DataFrame: + if not os.path.exists(self.cache_fn): + self.download_gtf_ensembl() + return pandas.read_csv(self.cache_fn) + + +class GenomeContainer: + genome_tab: pandas.DataFrame + assembly: str + organism: str + + def __init__( + self, + organism: str, + assembly: Union[None, str], + ): + self.organism = organism + # Set defaults: + if self.organism == "human": + self.assembly = assembly if assembly is not None else "Homo_sapiens.GRCh38.102" + elif self.organism == "mouse": + self.assembly = assembly if assembly is not None else "Mus_musculus.GRCm38.102" + else: + raise ValueError(f"organism {organism} not found") + self.gc = GtfInterface(assembly=self.assembly) + self.load_genome() + + def load_genome(self): + self.genome_tab = self.gc.cache + + def subset(self, gene_biotype: str): + self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_TYPE].values == gene_biotype, :].copy() + + @property + def names(self): + return self.genome_tab[KEY_SYMBOL].values.tolist() + + @property + def ensembl(self): + return self.genome_tab[KEY_ID].values.tolist() + + @property + def type(self): + return self.genome_tab[KEY_TYPE].values.tolist() + + @property + def ngenes(self) -> int: + return self.genome_tab.shape[0] + + @property + def names_to_id_dict(self): + return dict(zip(self.genome_tab[KEY_SYMBOL].values.tolist(), self.genome_tab[KEY_ID].values.tolist())) + + @property + def id_to_names_dict(self): + return dict(zip(self.genome_tab[KEY_ID].values.tolist(), self.genome_tab[KEY_SYMBOL].values.tolist())) + + @property + def strippednames_to_id_dict(self): + return dict(zip([i.split(".")[0] for i in self.genome_tab[KEY_SYMBOL]], + self.genome_tab[KEY_ID].values.tolist())) diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index 349707146..3aa0d973a 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,4 +1,4 @@ from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyObo, \ - OntologyCelltypes, OntologyUberon, OntologyHancestro, OntologyHsapdv, OntologyMmusdv, \ + OntologyCelltypes, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 3a0e73d35..22b04144e 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -142,7 +142,9 @@ def recursive_search(iri): terms = requests.get(get_url(iri=iri)).json()["_embedded"]["terms"] nodes_new = {} for x in terms: - nodes_new[x["iri"].split("/")[-1]] = { + k = x["iri"].split("/")[-1] + k = ":".join(k.split("_")) + nodes_new[k] = { "name": x["label"], "description": x["description"], "synonyms": x["synonyms"], @@ -159,6 +161,10 @@ def recursive_search(iri): def node_names(self) -> List[str]: return [v["name"] for k, v in self.nodes.items()] + def id_from_name(self, x: str) -> str: + self.validate_node(x=x) + return [k for k, v in self.nodes.items() if v["name"] == x][0] + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): """ Map free text node name to ontology node names via fuzzy string matching. @@ -332,10 +338,8 @@ class OntologyExtendedObo(OntologyObo): def __init__(self, obo, **kwargs): super().__init__(obo=obo, **kwargs) - # ToDo distinguish here: - self.add_extension(dict_ontology=ONTOLOGIY_EXTENSION_HUMAN) - def add_extension(self, dict_ontology: Dict[str, List[str]]): + def add_extension(self, dict_ontology: Dict[str, List[Dict[str, dict]]]): """ Extend ontology by additional edges and nodes defined in a dictionary. @@ -344,21 +348,22 @@ def add_extension(self, dict_ontology: Dict[str, List[str]]): :param dict_ontology: Dictionary of nodes and edges to add to ontology. Parsing: - keys: parent nodes (which must be in ontology) - - values: children nodes (which can be in ontology), must be given as list of stringd. + - values: children nodes (which can be in ontology), must be given as a dictionary in which keys are + ontology IDs and values are node values.. If these are in the ontology, an edge is added, otherwise, an edge and the node are added. :return: """ for k, v in dict_ontology.items(): - assert isinstance(v, list), "dictionary values should be list of strings" + assert isinstance(v, dict), "dictionary values should be dictionaries" # Check that parent node is present: - if k not in self.nodes: + if k not in self.node_ids: raise ValueError(f"key {k} was not in reference ontology") # Check if edge is added only, or edge and node. - for child_node in v: - if child_node not in self.nodes: # Add node. - self.graph.add_node(child_node) + for child_node_k, child_node_v in v.items(): + if child_node_k not in self.node_ids: # Add node + self.graph.add_node(node_for_adding=child_node_k, **child_node_v) # Add edge. - self.graph.add_edge(k, child_node) + self.graph.add_edge(k, child_node_k) # Check that DAG was not broken: self._check_graph() @@ -593,13 +598,16 @@ def synonym_node_properties(self) -> List[str]: return ["synonym"] -class OntologyHancestro(OntologyExtendedObo): +# use OWL for OntologyHancestro + + +class OntologyHsapdv(OntologyExtendedObo): def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/hancestro.obo") + super().__init__(obo="http://purl.obolibrary.org/obo/hsapdv.obo") # Clean up nodes: nodes_to_delete = [] @@ -609,32 +617,18 @@ def __init__( for k in nodes_to_delete: self.graph.remove_node(k) - # Clean up edges: - # The graph object can hold different types of edges, - # and multiple types are loaded from the obo, not all of which are relevant for us: - # All edge types (based on previous download, assert below that this is not extended): - edge_types = [] # ToDo - edges_to_delete = [] - for i, x in enumerate(self.graph.edges): - assert x[2] in edge_types, x - if x[2] not in []: - edges_to_delete.append((x[0], x[1])) - for x in edges_to_delete: - self.graph.remove_edge(u=x[0], v=x[1]) - self._check_graph() - @property def synonym_node_properties(self) -> List[str]: return ["synonym"] -class OntologyHsapdv(OntologyExtendedObo): +class OntologyMmusdv(OntologyExtendedObo): def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/hsapdv.obo") + super().__init__(obo="http://purl.obolibrary.org/obo/mmusdv.obo") # Clean up nodes: nodes_to_delete = [] @@ -649,13 +643,13 @@ def synonym_node_properties(self) -> List[str]: return ["synonym"] -class OntologyMmusdv(OntologyExtendedObo): +class OntologyMondo(OntologyExtendedObo): def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/mmusdv.obo") + super().__init__(obo="http://purl.obolibrary.org/obo/mondo.obo") # Clean up nodes: nodes_to_delete = [] @@ -665,6 +659,15 @@ def __init__( for k in nodes_to_delete: self.graph.remove_node(k) + # add healthy property + # Add node "healthy" under root node "MONDO:0000001": "quality". + # We use a PATO node for this label: PATO:0000461. + self.add_extension(dict_ontology={ + "MONDO:0000001": { + "PATO:0000461": {"name": "healthy"} + }, + }) + @property def synonym_node_properties(self) -> List[str]: return ["synonym"] diff --git a/sfaira/versions/metadata/universe.py b/sfaira/versions/metadata/universe.py index 448095415..69a52bd63 100644 --- a/sfaira/versions/metadata/universe.py +++ b/sfaira/versions/metadata/universe.py @@ -66,7 +66,7 @@ def target_universe_ids(self): :return: """ - return [self.onto_cl.map_class_to_id(x) for x in self._target_universe] + return [self.onto_cl.id_from_name(x) for x in self._target_universe] @property def ntypes(self): @@ -79,21 +79,10 @@ def __validate_target_universe_table(self, tab: pd.DataFrame): assert len(tab.columns) == 2 assert tab.columns[0] == "name" and tab.columns[1] == "id" - def load_target_universe(self, organ): + def load_target_universe(self, fn): """ - :param organ: Anatomic structure to load target universe for. - :return: - """ - # ToDo: Use pydoc based query of universes stored in ./target_universes/.. - tab = None - self.__validate_target_universe_table(tab=tab) - self.target_universe = None # ToDo - - def read_target_universe_csv(self, fn): - """ - - :param fn: File containing target universe. + :param fn: .csv file containing target universe. :return: """ tab = pd.read_csv(fn) diff --git a/sfaira/versions/topology_versions/__init__.py b/sfaira/versions/topologies/__init__.py similarity index 100% rename from sfaira/versions/topology_versions/__init__.py rename to sfaira/versions/topologies/__init__.py diff --git a/sfaira/versions/topology_versions/class_interface.py b/sfaira/versions/topologies/class_interface.py similarity index 94% rename from sfaira/versions/topology_versions/class_interface.py rename to sfaira/versions/topologies/class_interface.py index 0aa186c53..30f824a73 100644 --- a/sfaira/versions/topology_versions/class_interface.py +++ b/sfaira/versions/topologies/class_interface.py @@ -1,4 +1,4 @@ -from .external import SuperGenomeContainer +from sfaira.versions.genomes import GenomeContainer from . import human from . import mouse @@ -59,7 +59,7 @@ def __init__( assert topology_id in list(self.topologies[organism][model_class][model_type].keys()), \ "topology_id %s not found in %s" % \ (topology_id, list(self.topologies[organism][model_class][model_type].keys())) - self.genome_container = SuperGenomeContainer(organism=organism, genome=self.topology["genome"]) + self.genome_container = GenomeContainer(organism=organism, assembly=self.topology["genome"]) @property def topology(self): diff --git a/sfaira/versions/topologies/human/__init__.py b/sfaira/versions/topologies/human/__init__.py new file mode 100644 index 000000000..6630987ef --- /dev/null +++ b/sfaira/versions/topologies/human/__init__.py @@ -0,0 +1,2 @@ +from sfaira.versions.topologies.human import celltype +from sfaira.versions.topologies.human import embedding diff --git a/sfaira/versions/topologies/human/celltype/__init__.py b/sfaira/versions/topologies/human/celltype/__init__.py new file mode 100644 index 000000000..e657e4e78 --- /dev/null +++ b/sfaira/versions/topologies/human/celltype/__init__.py @@ -0,0 +1,2 @@ +from sfaira.versions.topologies.human.celltype.celltypemarker import CELLTYPEMARKER_TOPOLOGIES +from sfaira.versions.topologies.human.celltype.celltypemlp import CELLTYPEMLP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py b/sfaira/versions/topologies/human/celltype/celltypemarker.py similarity index 89% rename from sfaira/versions/topology_versions/human/celltype/celltypemarker.py rename to sfaira/versions/topologies/human/celltype/celltypemarker.py index a31807448..c99b1025d 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemarker.py +++ b/sfaira/versions/topologies/human/celltype/celltypemarker.py @@ -1,6 +1,7 @@ CELLTYPEMARKER_TOPOLOGIES = { "0.0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "l1_coef": 0., "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py b/sfaira/versions/topologies/human/celltype/celltypemlp.py similarity index 86% rename from sfaira/versions/topology_versions/human/celltype/celltypemlp.py rename to sfaira/versions/topologies/human/celltype/celltypemlp.py index 827ffba91..1d5cd616b 100644 --- a/sfaira/versions/topology_versions/human/celltype/celltypemlp.py +++ b/sfaira/versions/topologies/human/celltype/celltypemlp.py @@ -1,6 +1,7 @@ CELLTYPEMLP_TOPOLOGIES = { "0.0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [], "activation": None, @@ -16,7 +17,8 @@ } }, "0.1.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [128], "activation": "selu", @@ -32,7 +34,8 @@ } }, "0.1.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [256, 128], "activation": "selu", @@ -48,7 +51,8 @@ } }, "0.1.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [512, 256, 128], "activation": "selu", diff --git a/sfaira/versions/topologies/human/embedding/__init__.py b/sfaira/versions/topologies/human/embedding/__init__.py new file mode 100644 index 000000000..829c1623e --- /dev/null +++ b/sfaira/versions/topologies/human/embedding/__init__.py @@ -0,0 +1,6 @@ +from sfaira.versions.topologies.human.embedding.ae import AE_TOPOLOGIES +from sfaira.versions.topologies.human.embedding.linear import LINEAR_TOPOLOGIES +from sfaira.versions.topologies.human.embedding.nmf import NMF_TOPOLOGIES +from sfaira.versions.topologies.human.embedding.vae import VAE_TOPOLOGIES +from sfaira.versions.topologies.human.embedding.vaeiaf import VAEIAF_TOPOLOGIES +from sfaira.versions.topologies.human.embedding.vaevamp import VAEVAMP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/human/embedding/ae.py b/sfaira/versions/topologies/human/embedding/ae.py similarity index 84% rename from sfaira/versions/topology_versions/human/embedding/ae.py rename to sfaira/versions/topologies/human/embedding/ae.py index fd449a8c1..d190585d6 100644 --- a/sfaira/versions/topology_versions/human/embedding/ae.py +++ b/sfaira/versions/topologies/human/embedding/ae.py @@ -1,6 +1,7 @@ AE_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -15,7 +16,8 @@ }, "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -30,7 +32,8 @@ }, "0.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -45,7 +48,8 @@ }, "0.4": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/human/embedding/linear.py b/sfaira/versions/topologies/human/embedding/linear.py similarity index 80% rename from sfaira/versions/topology_versions/human/embedding/linear.py rename to sfaira/versions/topologies/human/embedding/linear.py index ef1bc2c53..ca2721129 100644 --- a/sfaira/versions/topology_versions/human/embedding/linear.py +++ b/sfaira/versions/topologies/human/embedding/linear.py @@ -1,6 +1,7 @@ LINEAR_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -11,7 +12,8 @@ }, "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -22,7 +24,8 @@ }, "0.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topology_versions/human/embedding/nmf.py b/sfaira/versions/topologies/human/embedding/nmf.py similarity index 80% rename from sfaira/versions/topology_versions/human/embedding/nmf.py rename to sfaira/versions/topologies/human/embedding/nmf.py index 7ab548d78..2efd21f9c 100644 --- a/sfaira/versions/topology_versions/human/embedding/nmf.py +++ b/sfaira/versions/topologies/human/embedding/nmf.py @@ -1,6 +1,7 @@ NMF_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -11,7 +12,8 @@ }, "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -22,7 +24,8 @@ }, "0.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topology_versions/human/embedding/vae.py b/sfaira/versions/topologies/human/embedding/vae.py similarity index 83% rename from sfaira/versions/topology_versions/human/embedding/vae.py rename to sfaira/versions/topologies/human/embedding/vae.py index 8ba9d4199..4ec8370c7 100644 --- a/sfaira/versions/topology_versions/human/embedding/vae.py +++ b/sfaira/versions/topologies/human/embedding/vae.py @@ -1,6 +1,7 @@ VAE_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -14,7 +15,8 @@ }, "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -28,7 +30,8 @@ }, "0.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -42,7 +45,8 @@ }, "0.4": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py b/sfaira/versions/topologies/human/embedding/vaeiaf.py similarity index 86% rename from sfaira/versions/topology_versions/human/embedding/vaeiaf.py rename to sfaira/versions/topologies/human/embedding/vaeiaf.py index 5ad4cf9ea..d732fd1e4 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaeiaf.py +++ b/sfaira/versions/topologies/human/embedding/vaeiaf.py @@ -1,6 +1,7 @@ VAEIAF_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "n_iaf": 2, @@ -14,7 +15,8 @@ } }, "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "n_iaf": 2, diff --git a/sfaira/versions/topology_versions/human/embedding/vaevamp.py b/sfaira/versions/topologies/human/embedding/vaevamp.py similarity index 86% rename from sfaira/versions/topology_versions/human/embedding/vaevamp.py rename to sfaira/versions/topologies/human/embedding/vaevamp.py index d4fff9f69..a94131783 100644 --- a/sfaira/versions/topology_versions/human/embedding/vaevamp.py +++ b/sfaira/versions/topologies/human/embedding/vaevamp.py @@ -1,6 +1,7 @@ VAEVAMP_TOPOLOGIES = { "0.2": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, (32, 32), 128, 256), "l1_coef": 0., @@ -13,7 +14,8 @@ } }, "0.3": { - "genome": "Homo_sapiens_GRCh38_97", + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, (64, 64), 256, 512), "l1_coef": 0., diff --git a/sfaira/versions/topologies/mouse/__init__.py b/sfaira/versions/topologies/mouse/__init__.py new file mode 100644 index 000000000..4105b813a --- /dev/null +++ b/sfaira/versions/topologies/mouse/__init__.py @@ -0,0 +1,2 @@ +from sfaira.versions.topologies.mouse import celltype +from sfaira.versions.topologies.mouse import embedding diff --git a/sfaira/versions/topologies/mouse/celltype/__init__.py b/sfaira/versions/topologies/mouse/celltype/__init__.py new file mode 100644 index 000000000..d98e272bf --- /dev/null +++ b/sfaira/versions/topologies/mouse/celltype/__init__.py @@ -0,0 +1,2 @@ +from sfaira.versions.topologies.mouse.celltype.celltypemarker import CELLTYPEMARKER_TOPOLOGIES +from sfaira.versions.topologies.mouse.celltype.celltypemlp import CELLTYPEMLP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py similarity index 89% rename from sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py rename to sfaira/versions/topologies/mouse/celltype/celltypemarker.py index 8043c48ad..0625f7636 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemarker.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py @@ -1,6 +1,7 @@ CELLTYPEMARKER_TOPOLOGIES = { "0.0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "l1_coef": 0., "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py similarity index 86% rename from sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py rename to sfaira/versions/topologies/mouse/celltype/celltypemlp.py index 97d029fb9..1f85bc78a 100644 --- a/sfaira/versions/topology_versions/mouse/celltype/celltypemlp.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py @@ -1,6 +1,7 @@ CELLTYPEMLP_TOPOLOGIES = { "0.0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [], "activation": None, @@ -16,7 +17,8 @@ } }, "0.1.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [128], "activation": "selu", @@ -32,7 +34,8 @@ } }, "0.1.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [256, 128], "activation": "selu", @@ -48,7 +51,8 @@ } }, "0.1.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "units": [512, 256, 128], "activation": "selu", diff --git a/sfaira/versions/topologies/mouse/embedding/__init__.py b/sfaira/versions/topologies/mouse/embedding/__init__.py new file mode 100644 index 000000000..d36c96479 --- /dev/null +++ b/sfaira/versions/topologies/mouse/embedding/__init__.py @@ -0,0 +1,6 @@ +from sfaira.versions.topologies.mouse.embedding.ae import AE_TOPOLOGIES +from sfaira.versions.topologies.mouse.embedding.linear import LINEAR_TOPOLOGIES +from sfaira.versions.topologies.mouse.embedding.nmf import NMF_TOPOLOGIES +from sfaira.versions.topologies.mouse.embedding.vae import VAE_TOPOLOGIES +from sfaira.versions.topologies.mouse.embedding.vaeiaf import VAEIAF_TOPOLOGIES +from sfaira.versions.topologies.mouse.embedding.vaevamp import VAEVAMP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/mouse/embedding/ae.py b/sfaira/versions/topologies/mouse/embedding/ae.py similarity index 84% rename from sfaira/versions/topology_versions/mouse/embedding/ae.py rename to sfaira/versions/topologies/mouse/embedding/ae.py index 12b092138..d837520d2 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/ae.py +++ b/sfaira/versions/topologies/mouse/embedding/ae.py @@ -1,6 +1,7 @@ AE_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -15,7 +16,8 @@ }, "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -30,7 +32,8 @@ }, "0.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -45,7 +48,8 @@ }, "0.4": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/mouse/embedding/linear.py b/sfaira/versions/topologies/mouse/embedding/linear.py similarity index 80% rename from sfaira/versions/topology_versions/mouse/embedding/linear.py rename to sfaira/versions/topologies/mouse/embedding/linear.py index f073b42a2..1eaecc63a 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/linear.py +++ b/sfaira/versions/topologies/mouse/embedding/linear.py @@ -1,6 +1,7 @@ LINEAR_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -11,7 +12,8 @@ }, "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -22,7 +24,8 @@ }, "0.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topology_versions/mouse/embedding/nmf.py b/sfaira/versions/topologies/mouse/embedding/nmf.py similarity index 80% rename from sfaira/versions/topology_versions/mouse/embedding/nmf.py rename to sfaira/versions/topologies/mouse/embedding/nmf.py index 9283ae40f..4817cd588 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/nmf.py +++ b/sfaira/versions/topologies/mouse/embedding/nmf.py @@ -1,6 +1,7 @@ NMF_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -11,7 +12,8 @@ }, "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -22,7 +24,8 @@ }, "0.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topology_versions/mouse/embedding/vae.py b/sfaira/versions/topologies/mouse/embedding/vae.py similarity index 83% rename from sfaira/versions/topology_versions/mouse/embedding/vae.py rename to sfaira/versions/topologies/mouse/embedding/vae.py index aaeab8e76..cacaf76c7 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vae.py +++ b/sfaira/versions/topologies/mouse/embedding/vae.py @@ -1,6 +1,7 @@ VAE_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -14,7 +15,8 @@ }, "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -28,7 +30,8 @@ }, "0.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -42,7 +45,8 @@ }, "0.4": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py similarity index 86% rename from sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py rename to sfaira/versions/topologies/mouse/embedding/vaeiaf.py index 28989d580..b23b3675f 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py @@ -1,6 +1,7 @@ VAEIAF_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "n_iaf": 2, @@ -14,7 +15,8 @@ } }, "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "n_iaf": 2, diff --git a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py b/sfaira/versions/topologies/mouse/embedding/vaevamp.py similarity index 86% rename from sfaira/versions/topology_versions/mouse/embedding/vaevamp.py rename to sfaira/versions/topologies/mouse/embedding/vaevamp.py index 33e488224..13553a77d 100644 --- a/sfaira/versions/topology_versions/mouse/embedding/vaevamp.py +++ b/sfaira/versions/topologies/mouse/embedding/vaevamp.py @@ -1,6 +1,7 @@ VAEVAMP_TOPOLOGIES = { "0.2": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, (32, 32), 128, 256), "l1_coef": 0., @@ -13,7 +14,8 @@ } }, "0.3": { - "genome": "Mus_musculus_GRCm38_97", + "genome": "Mus_musculus.GRCm38.102", + "genes": ["protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, (64, 64), 256, 512), "l1_coef": 0., diff --git a/sfaira/versions/topology_versions/external.py b/sfaira/versions/topology_versions/external.py deleted file mode 100644 index 86fafa27f..000000000 --- a/sfaira/versions/topology_versions/external.py +++ /dev/null @@ -1 +0,0 @@ -from sfaira.versions.genome_versions import SuperGenomeContainer # noqa: W292 diff --git a/sfaira/versions/topology_versions/human/__init__.py b/sfaira/versions/topology_versions/human/__init__.py deleted file mode 100644 index ff96b16d3..000000000 --- a/sfaira/versions/topology_versions/human/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.versions.topology_versions.human import celltype -from sfaira.versions.topology_versions.human import embedding diff --git a/sfaira/versions/topology_versions/human/celltype/__init__.py b/sfaira/versions/topology_versions/human/celltype/__init__.py deleted file mode 100644 index 23e55e86e..000000000 --- a/sfaira/versions/topology_versions/human/celltype/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.versions.topology_versions.human.celltype.celltypemarker import CELLTYPEMARKER_TOPOLOGIES -from sfaira.versions.topology_versions.human.celltype.celltypemlp import CELLTYPEMLP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/human/embedding/__init__.py b/sfaira/versions/topology_versions/human/embedding/__init__.py deleted file mode 100644 index bf21871a0..000000000 --- a/sfaira/versions/topology_versions/human/embedding/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from sfaira.versions.topology_versions.human.embedding.ae import AE_TOPOLOGIES -from sfaira.versions.topology_versions.human.embedding.linear import LINEAR_TOPOLOGIES -from sfaira.versions.topology_versions.human.embedding.nmf import NMF_TOPOLOGIES -from sfaira.versions.topology_versions.human.embedding.vae import VAE_TOPOLOGIES -from sfaira.versions.topology_versions.human.embedding.vaeiaf import VAEIAF_TOPOLOGIES -from sfaira.versions.topology_versions.human.embedding.vaevamp import VAEVAMP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/mouse/__init__.py b/sfaira/versions/topology_versions/mouse/__init__.py deleted file mode 100644 index 63a125972..000000000 --- a/sfaira/versions/topology_versions/mouse/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.versions.topology_versions.mouse import celltype -from sfaira.versions.topology_versions.mouse import embedding diff --git a/sfaira/versions/topology_versions/mouse/celltype/__init__.py b/sfaira/versions/topology_versions/mouse/celltype/__init__.py deleted file mode 100644 index b555ab76a..000000000 --- a/sfaira/versions/topology_versions/mouse/celltype/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.versions.topology_versions.mouse.celltype.celltypemarker import CELLTYPEMARKER_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.celltype.celltypemlp import CELLTYPEMLP_TOPOLOGIES diff --git a/sfaira/versions/topology_versions/mouse/embedding/__init__.py b/sfaira/versions/topology_versions/mouse/embedding/__init__.py deleted file mode 100644 index a9404744f..000000000 --- a/sfaira/versions/topology_versions/mouse/embedding/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from sfaira.versions.topology_versions.mouse.embedding.ae import AE_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.embedding.linear import LINEAR_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.embedding.nmf import NMF_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.embedding.vae import VAE_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.embedding.vaeiaf import VAEIAF_TOPOLOGIES -from sfaira.versions.topology_versions.mouse.embedding.vaevamp import VAEVAMP_TOPOLOGIES From ad390791149647015673973ee55a29f640c8e72b Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 12 Apr 2021 11:03:46 +0200 Subject: [PATCH 100/161] fixed various bugs (#218) --- sfaira/consts/adata_fields.py | 19 ++- sfaira/consts/ontologies.py | 4 +- sfaira/data/__init__.py | 2 +- sfaira/data/base/dataset.py | 139 +++++++++++------- sfaira/data/base/dataset_group.py | 119 ++++++++++++++- sfaira/data/dataloaders/__init__.py | 2 +- .../anatomical_groups/human/human_adipose.py | 4 +- .../human/human_adrenalgland.py | 4 +- .../anatomical_groups/human/human_artery.py | 4 +- .../anatomical_groups/human/human_bladder.py | 4 +- .../anatomical_groups/human/human_blood.py | 4 +- .../anatomical_groups/human/human_bone.py | 4 +- .../anatomical_groups/human/human_brain.py | 4 +- .../anatomical_groups/human/human_calvaria.py | 4 +- .../anatomical_groups/human/human_cervix.py | 4 +- .../human/human_chorionicvillus.py | 4 +- .../anatomical_groups/human/human_colon.py | 4 +- .../anatomical_groups/human/human_duodenum.py | 4 +- .../human/human_epityphlon.py | 4 +- .../human/human_esophagus.py | 4 +- .../anatomical_groups/human/human_eye.py | 4 +- .../human/human_fallopiantube.py | 4 +- .../human/human_femalegonad.py | 4 +- .../human/human_gallbladder.py | 4 +- .../anatomical_groups/human/human_heart.py | 4 +- .../anatomical_groups/human/human_hesc.py | 4 +- .../anatomical_groups/human/human_ileum.py | 4 +- .../anatomical_groups/human/human_jejunum.py | 4 +- .../anatomical_groups/human/human_kidney.py | 4 +- .../anatomical_groups/human/human_liver.py | 4 +- .../anatomical_groups/human/human_lung.py | 4 +- .../human/human_malegonad.py | 4 +- .../anatomical_groups/human/human_muscle.py | 4 +- .../anatomical_groups/human/human_omentum.py | 4 +- .../anatomical_groups/human/human_pancreas.py | 4 +- .../anatomical_groups/human/human_placenta.py | 4 +- .../anatomical_groups/human/human_pleura.py | 4 +- .../anatomical_groups/human/human_prostate.py | 4 +- .../anatomical_groups/human/human_rectum.py | 4 +- .../anatomical_groups/human/human_rib.py | 4 +- .../anatomical_groups/human/human_skin.py | 4 +- .../human/human_spinalcord.py | 4 +- .../anatomical_groups/human/human_spleen.py | 4 +- .../anatomical_groups/human/human_stomach.py | 4 +- .../anatomical_groups/human/human_thymus.py | 4 +- .../anatomical_groups/human/human_thyroid.py | 4 +- .../anatomical_groups/human/human_trachea.py | 4 +- .../anatomical_groups/human/human_ureter.py | 4 +- .../anatomical_groups/human/human_uterus.py | 4 +- .../anatomical_groups/mouse/mouse_adipose.py | 4 +- .../anatomical_groups/mouse/mouse_bladder.py | 4 +- .../anatomical_groups/mouse/mouse_blood.py | 4 +- .../anatomical_groups/mouse/mouse_bone.py | 4 +- .../anatomical_groups/mouse/mouse_brain.py | 4 +- .../anatomical_groups/mouse/mouse_colon.py | 4 +- .../mouse/mouse_diaphragm.py | 4 +- .../mouse/mouse_femalegonad.py | 4 +- .../anatomical_groups/mouse/mouse_heart.py | 4 +- .../anatomical_groups/mouse/mouse_ileum.py | 4 +- .../anatomical_groups/mouse/mouse_kidney.py | 4 +- .../anatomical_groups/mouse/mouse_liver.py | 4 +- .../anatomical_groups/mouse/mouse_lung.py | 4 +- .../mouse/mouse_malegonad.py | 4 +- .../mouse/mouse_mammarygland.py | 4 +- .../anatomical_groups/mouse/mouse_muscle.py | 4 +- .../anatomical_groups/mouse/mouse_pancreas.py | 4 +- .../anatomical_groups/mouse/mouse_placenta.py | 4 +- .../anatomical_groups/mouse/mouse_prostate.py | 4 +- .../anatomical_groups/mouse/mouse_rib.py | 4 +- .../anatomical_groups/mouse/mouse_skin.py | 4 +- .../anatomical_groups/mouse/mouse_spleen.py | 4 +- .../anatomical_groups/mouse/mouse_stomach.py | 4 +- .../anatomical_groups/mouse/mouse_thymus.py | 4 +- .../anatomical_groups/mouse/mouse_tongue.py | 4 +- .../anatomical_groups/mouse/mouse_trachea.py | 4 +- .../anatomical_groups/mouse/mouse_uterus.py | 4 +- sfaira/data/dataloaders/super_group.py | 2 +- .../create_anatomical_configs.py | 4 +- sfaira/data/utils_scripts/create_meta.py | 2 +- .../utils_scripts/create_meta_and_cache.py | 2 +- .../data/utils_scripts/streamline_selected.py | 2 +- .../data/utils_scripts/write_backed_human.py | 2 +- .../data/utils_scripts/write_backed_mouse.py | 2 +- sfaira/train/summaries.py | 8 +- sfaira/train/train_model.py | 4 +- sfaira/unit_tests/data/test_dataset.py | 26 ++-- sfaira/versions/metadata/__init__.py | 2 +- sfaira/versions/metadata/base.py | 45 +++++- 88 files changed, 435 insertions(+), 231 deletions(-) diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index afa095805..a768ade0e 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -1,9 +1,8 @@ -from typing import List - """ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. """ +from typing import List class AdataIds: @@ -154,7 +153,7 @@ class AdataIdsCellxgene(AdataIds): def __init__(self): self.assay_sc = "assay" - self.cell_types_original = "free_annotation" + self.cell_types_original = "cell_type" # TODO "free_annotation" not always given. self.cell_ontology_class = "cell_type" self.cell_ontology_id = "cell_type_ontology_term_id" self.default_embedding = "default_embedding" @@ -178,7 +177,10 @@ def __init__(self): # selected element entries used for parsing: self.author_names = "names" + self.unknown_celltype_identifier = None self.unknown_metadata_identifier = "unknown" + self.invalid_metadata_identifier = "na" + self.unknown_metadata_ontology_id_identifier = "" # accepted file names self.accepted_file_names = [ @@ -206,3 +208,14 @@ def __init__(self): "id", "title", ] + # These attributes related to obs and uns keys above are also in the data set attributes that can be + # inquired before download via the REST API: + self.dataset_keys = [ + "assay_sc", + "development_stage", + "disease", + "ethnicity", + "organ", + "organism", + "sex", + ] diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 6c7623245..7c1fd2af0 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -17,11 +17,12 @@ def __init__(self): self.cell_line = OntologyCellosaurus() self.cellontology_class = "v2021-02-01" self.cellontology_original = None + self.collection_id = None self.default_embedding = None self.development_stage = None # OntologyHsapdv() # TODO allow for other organisms here too. self.disease = OntologyMondo() self.doi = None - self.ethnicity = None # OntologyHancestro() + self.ethnicity = None # OntologyHancestro() # TODO self.id = None self.individual = None self.normalization = None @@ -30,6 +31,7 @@ def __init__(self): self.primary_data = OntologyList(terms=[True, False]) self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "tumor"]) self.sex = OntologyList(terms=["female", "male", "mixed", "unknown", "other"]) + self.supplier = OntologyList(terms=["cellxgene", "sfaira"]) self.tech_sample = None self.title = None self.year = OntologyList(terms=list(range(2000, 3000))) diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 921814a8d..5b9c26f76 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -2,6 +2,6 @@ DatasetGroup, DatasetGroupDirectoryOriented, \ DatasetSuperGroup from . import dataloaders -from .dataloaders import DatasetSuperGroupSfaira +from .dataloaders import Universe from .interactive import DatasetInteractive from . import utils diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 845d886c2..2e3d07dc5 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -18,7 +18,7 @@ import ssl from sfaira.versions.genomes import GenomeContainer -from sfaira.versions.metadata import Ontology, CelltypeUniverse +from sfaira.versions.metadata import Ontology, OntologyHierarchical, CelltypeUniverse from sfaira.consts import AdataIds, AdataIdsSfaira, META_DATA_FIELDS, OCS from sfaira.data.utils import collapse_matrix, read_yaml @@ -72,6 +72,7 @@ class DatasetBase(abc.ABC): cache_path: Union[None, str] id: Union[None, str] genome: Union[None, str] + supplier: str _assay_sc: Union[None, str] _assay_differentiation: Union[None, str] @@ -252,6 +253,8 @@ def __init__( self.dict_load_func_annotation = dict_load_func_annotation self._additional_annotation_key = additional_annotation_key + self.supplier = "sfaira" + @property def _directory_formatted_id(self) -> str: return "_".join("_".join(self.id.split("/")).split(".")) @@ -319,9 +322,8 @@ def download(self, **kwargs): fn = cgi.parse_header(urllib.request.urlopen(url).info()['Content-Disposition'])[1]["filename"] else: fn = url.split("/")[-1] - if os.path.isfile(os.path.join(self.data_dir, fn)): - print(f"File {fn} already found on disk, skipping download.") - else: + # Only download if file not already downloaded: + if not os.path.isfile(os.path.join(self.data_dir, fn)): print(f"Downloading: {fn}") urllib.request.urlretrieve(url, os.path.join(self.data_dir, fn)) @@ -368,6 +370,7 @@ def _load_cached( self, load_raw: bool, allow_caching: bool, + **kwargs ): """ Wraps data set specific load and allows for caching. @@ -381,7 +384,9 @@ def _load_cached( """ def _assembly_wrapper(): - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + if self.load_func is None: + raise ValueError(f"Tried to access load_func for {self.id} but did not find any.") + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn, **kwargs) # Enable loading of additional annotation, e.g. secondary cell type annotation # The additional annotation `obs2 needs to be on a subset of the original annotation `self.adata.obs`. if self.dict_load_func_annotation is not None: @@ -429,6 +434,7 @@ def load( load_raw: bool = False, allow_caching: bool = True, set_metadata: bool = True, + **kwargs ): if match_to_reference and not remove_gene_version: warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" @@ -441,7 +447,7 @@ def load( raise ValueError("No sfaira data repo path provided in constructor.") # Run data set-specific loading script: - self._load_cached(load_raw=load_raw, allow_caching=allow_caching) + self._load_cached(load_raw=load_raw, allow_caching=allow_caching, **kwargs) # Set loading hyper-parameter-specific meta data: self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference @@ -1139,19 +1145,20 @@ def meta_fn(self): meta = self.data_dir else: meta = os.path.join(self.meta_path, self.directory_formatted_doi) - - return os.path.join(meta, "meta", self.doi_cleaned_id + "_meta.csv") + if meta is None: + return None + else: + return os.path.join(meta, "meta", self.doi_cleaned_id + "_meta.csv") def load_meta(self, fn: Union[PathLike, str, None]): if fn is None: - if self.meta_fn is None: - raise ValueError("provide either fn in load or path in constructor") - fn = self.meta_fn + if self.meta_fn is not None: + fn = self.meta_fn else: if isinstance(fn, str): fn = os.path.normpath(fn) # Only load meta data if file exists: - if os.path.isfile(fn): + if fn is not None and os.path.isfile(fn): meta = pandas.read_csv( fn, usecols=list(META_DATA_FIELDS.keys()), @@ -1309,7 +1316,7 @@ def assay_sc(self) -> Union[None, str]: @assay_sc.setter def assay_sc(self, x: str): self.__erasing_protection(attr="assay_sc", val_old=self._assay_sc, val_new=x) - self._value_protection(attr="assay_sc", allowed=self.ontology_container_sfaira.assay_sc, attempted=x) + x = self._value_protection(attr="assay_sc", allowed=self.ontology_container_sfaira.assay_sc, attempted=x) self._assay_sc = x @property @@ -1327,8 +1334,8 @@ def assay_differentiation(self) -> Union[None, str]: @assay_differentiation.setter def assay_differentiation(self, x: str): self.__erasing_protection(attr="assay_differentiation", val_old=self._assay_differentiation, val_new=x) - self._value_protection(attr="assay_differentiation", - allowed=self.ontology_container_sfaira.assay_differentiation, attempted=x) + x = self._value_protection(attr="assay_differentiation", + allowed=self.ontology_container_sfaira.assay_differentiation, attempted=x) self._assay_differentiation = x @property @@ -1346,8 +1353,8 @@ def assay_type_differentiation(self) -> Union[None, str]: @assay_type_differentiation.setter def assay_type_differentiation(self, x: str): self.__erasing_protection(attr="assay_type_differentiation", val_old=self._assay_type_differentiation, val_new=x) - self._value_protection(attr="assay_type_differentiation", - allowed=self.ontology_container_sfaira.assay_type_differentiation, attempted=x) + x = self._value_protection(attr="assay_type_differentiation", + allowed=self.ontology_container_sfaira.assay_type_differentiation, attempted=x) self._assay_type_differentiation = x @property @@ -1388,11 +1395,14 @@ def cell_line(self, x: str): def data_dir(self): # Data is either directly in user supplied directory or in a sub directory if the overall directory is managed # by sfaira: In this case, the sub directory is named after the doi of the data set. - sfaira_path = os.path.join(self.data_dir_base, self.directory_formatted_doi) - if os.path.exists(sfaira_path): - return sfaira_path + if self.data_dir_base is None: + return None else: - return self.data_dir_base + sfaira_path = os.path.join(self.data_dir_base, self.directory_formatted_doi) + if os.path.exists(sfaira_path): + return sfaira_path + else: + return self.data_dir_base @property def default_embedding(self) -> Union[None, str]: @@ -1409,8 +1419,8 @@ def default_embedding(self) -> Union[None, str]: @default_embedding.setter def default_embedding(self, x: str): self.__erasing_protection(attr="default_embedding", val_old=self._development_stage, val_new=x) - self._value_protection(attr="default_embedding", allowed=self.ontology_container_sfaira.default_embedding, - attempted=x) + x = self._value_protection(attr="default_embedding", allowed=self.ontology_container_sfaira.default_embedding, + attempted=x) self._default_embedding = x @property @@ -1428,8 +1438,8 @@ def development_stage(self) -> Union[None, str]: @development_stage.setter def development_stage(self, x: str): self.__erasing_protection(attr="development_stage", val_old=self._development_stage, val_new=x) - self._value_protection(attr="development_stage", allowed=self.ontology_container_sfaira.development_stage, - attempted=x) + x = self._value_protection(attr="development_stage", allowed=self.ontology_container_sfaira.development_stage, + attempted=x) self._development_stage = x @property @@ -1447,8 +1457,8 @@ def disease(self) -> Union[None, str]: @disease.setter def disease(self, x: str): self.__erasing_protection(attr="disease", val_old=self._disease, val_new=x) - self._value_protection(attr="disease", allowed=self.ontology_container_sfaira.disease, - attempted=x) + x = self._value_protection(attr="disease", allowed=self.ontology_container_sfaira.disease, + attempted=x) self._disease = x @property @@ -1557,7 +1567,7 @@ def ethnicity(self) -> Union[None, str]: @ethnicity.setter def ethnicity(self, x: str): self.__erasing_protection(attr="ethnicity", val_old=self._ethnicity, val_new=x) - self._value_protection(attr="ethnicity", allowed=self._adata_ids_sfaira.ontology_ethnicity, attempted=x) + x = self._value_protection(attr="ethnicity", allowed=self.ontology_container_sfaira.ethnicity, attempted=x) self._ethnicity = x @property @@ -1645,8 +1655,8 @@ def normalization(self) -> Union[None, str]: @normalization.setter def normalization(self, x: str): self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) - self._value_protection(attr="normalization", allowed=self.ontology_container_sfaira.normalization, - attempted=x) + x = self._value_protection(attr="normalization", allowed=self.ontology_container_sfaira.normalization, + attempted=x) self._normalization = x @property @@ -1664,8 +1674,8 @@ def primary_data(self) -> Union[None, bool]: @primary_data.setter def primary_data(self, x: bool): self.__erasing_protection(attr="primary_data", val_old=self._primary_data, val_new=x) - self._value_protection(attr="primary_data", allowed=self.ontology_container_sfaira.primary_data, - attempted=x) + x = self._value_protection(attr="primary_data", allowed=self.ontology_container_sfaira.primary_data, + attempted=x) self._primary_data = x @property @@ -1844,7 +1854,7 @@ def organ(self) -> Union[None, str]: @organ.setter def organ(self, x: str): self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) - self._value_protection(attr="organ", allowed=self.ontology_container_sfaira.organ, attempted=x) + x = self._value_protection(attr="organ", allowed=self.ontology_container_sfaira.organ, attempted=x) self._organ = x @property @@ -1862,7 +1872,7 @@ def organism(self) -> Union[None, str]: @organism.setter def organism(self, x: str): self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) - self._value_protection(attr="organism", allowed=self.ontology_container_sfaira.organism, attempted=x) + x = self._value_protection(attr="organism", allowed=self.ontology_container_sfaira.organism, attempted=x) self._organism = x @property @@ -1880,7 +1890,8 @@ def sample_source(self) -> Union[None, str]: @sample_source.setter def sample_source(self, x: str): self.__erasing_protection(attr="sample_source", val_old=self._sample_source, val_new=x) - self._value_protection(attr="sample_source", allowed=self.ontology_container_sfaira.sample_source, attempted=x) + x = self._value_protection(attr="sample_source", allowed=self.ontology_container_sfaira.sample_source, + attempted=x) self._sample_source = x @property @@ -1898,7 +1909,7 @@ def sex(self) -> Union[None, str]: @sex.setter def sex(self, x: str): self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) - self._value_protection(attr="sex", allowed=self.ontology_container_sfaira.sex, attempted=x) + x = self._value_protection(attr="sex", allowed=self.ontology_container_sfaira.sex, attempted=x) self._sex = x @property @@ -1977,7 +1988,7 @@ def year(self) -> Union[None, int]: @year.setter def year(self, x: int): self.__erasing_protection(attr="year", val_old=self._year, val_new=x) - self._value_protection(attr="year", allowed=self.ontology_container_sfaira.year, attempted=x) + x = self._value_protection(attr="year", allowed=self.ontology_container_sfaira.year, attempted=x) self._year = x @property @@ -2097,13 +2108,14 @@ def __erasing_protection(self, attr, val_old, val_new): def _value_protection( self, attr: str, - allowed: Union[Ontology, bool, int, float, str, List[bool], List[int], List[float], List[str]], + allowed: Union[Ontology, None], attempted ): """ Check whether value is from set of allowed values. Does not check if allowed is None. + Cleans entry to term name if ontology ID is provided. :param attr: Attribute to set. :param allowed: Constraint for values of `attr`. @@ -2111,15 +2123,36 @@ def _value_protection( :param attempted: Value(s) to attempt to set in `attr`. :return: """ - if isinstance(attempted, np.ndarray): - attempted = attempted.tolist() - if isinstance(attempted, tuple): - attempted = list(attempted) if not isinstance(attempted, list): - attempted = [attempted] - for x in attempted: - if not is_child(query=x, ontology=allowed): - raise ValueError(f"'{x}' is not a valid entry for {attr}.") + if isinstance(attempted, np.ndarray): + attempted_ls = attempted.tolist() + elif isinstance(attempted, tuple): + attempted_ls = list(attempted) + else: + attempted_ls = [attempted] + else: + attempted_ls = attempted + attempted_clean = [] + for x in attempted_ls: + if allowed is None: + attempted_clean.append(x) + elif isinstance(allowed, Ontology): + if attr == "disease" and (x.lower() == "normal" or x.lower() == "healthy"): + # TODO required because of missing streamlining between sfaira and 10x, remove in future. + attempted_clean.append("healthy") + elif x in allowed.node_names: + attempted_clean.append(x) + else: + if isinstance(allowed, OntologyHierarchical) and x in allowed.node_ids: + attempted_clean.append(allowed.name_from_id(x)) + else: + raise ValueError(f"'{x}' is not a valid entry for {attr} in {self.id}.") + else: + raise ValueError(f"allowed of type {type(allowed)} is not a valid entry for {attr}.") + # Flatten attempts if only one was made: + if len(attempted_clean) == 1: + attempted_clean = attempted_clean[0] + return attempted_clean def subset_cells(self, key, values): """ @@ -2151,17 +2184,18 @@ def subset_cells(self, key, values): def get_subset_idx(samplewise_key, cellwise_key): try: sample_attr = getattr(self, samplewise_key) + if not isinstance(sample_attr, list): + sample_attr = [sample_attr] except AttributeError: sample_attr = None obs_key = getattr(self, cellwise_key) - if sample_attr is not None and obs_key is None: - if not isinstance(sample_attr, list): - sample_attr = [sample_attr] + if sample_attr is not None and len(sample_attr) == 1: + # Only use sample-wise subsetting if the sample-wise attribute is unique (not mixed). if np.any([x in values for x in sample_attr]): idx = np.arange(1, self.ncells) else: idx = np.array([]) - elif sample_attr is None and obs_key is not None: + elif obs_key is not None: assert self.adata is not None, "adata was not yet loaded" values_found = self.adata.obs[obs_key].values values_found_unique = np.unique(values_found) @@ -2179,11 +2213,12 @@ def get_subset_idx(samplewise_key, cellwise_key): # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {self.id}") idx = np.where([x in values_found_unique_matched for x in values_found])[0] - elif sample_attr is not None and obs_key is not None: - assert False, f"both cell-wise and sample-wise attribute {samplewise_key} given" else: assert False, "no subset chosen" return idx idx_keep = get_subset_idx(samplewise_key=key, cellwise_key=key + "_obs_key") self.adata = self.adata[idx_keep, :].copy() # if len(idx_keep) > 0 else None + + def show_summary(self): + print(f"{(self.supplier, self.organism, self.organ, self.assay_sc, self.disease)}") diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 39273065e..6fefeed41 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -27,7 +27,8 @@ def map_fn(inputs): :param inputs: :return: None if function ran, error report otherwise """ - ds, remove_gene_version, match_to_reference, load_raw, allow_caching, set_metadata, func, kwargs_func = inputs + ds, remove_gene_version, match_to_reference, load_raw, allow_caching, set_metadata, kwargs, func, kwargs_func = \ + inputs try: ds.load( remove_gene_version=remove_gene_version, @@ -35,6 +36,7 @@ def map_fn(inputs): load_raw=load_raw, allow_caching=allow_caching, set_metadata=set_metadata, + **kwargs ) if func is not None: x = func(ds, **kwargs_func) @@ -90,6 +92,7 @@ def load( processes: int = 1, func=None, kwargs_func: Union[None, dict] = None, + **kwargs ): """ Load all datasets in group (option for temporary loading). @@ -115,6 +118,7 @@ def func(dataset, **kwargs_func): load_raw, allow_caching, set_metadata, + kwargs, func, kwargs_func ] @@ -470,6 +474,32 @@ def additional_annotation_key(self, x: Dict[str, Union[None, str]]): for k, v in x.items(): self.datasets[k].additional_annotation_key = v + @property + def doi(self) -> List[str]: + """ + Propagates DOI annotation from contained datasets. + """ + dois = [] + for _, v in self.datasets.items(): + vdoi = v.doi + if isinstance(vdoi, str): + vdoi = [vdoi] + dois.extend(vdoi) + return np.sort(np.unique(vdoi)).tolist() + + @property + def supplier(self) -> List[str]: + """ + Propagates supplier annotation from contained datasets. + """ + supplier = [v.supplier for _, v in self.datasets.items()] + return np.sort(np.unique(supplier)).tolist() + + def show_summary(self): + for k, v in self.datasets.items(): + print(k) + print(f"\t {(v.supplier, v.organism, v.organ, v.assay_sc, v.disease)}") + class DatasetGroupDirectoryOriented(DatasetGroup): @@ -708,6 +738,20 @@ def ncells_bydataset_flat(self, annotated_only: bool = False): def ncells(self, annotated_only: bool = False): return np.sum(self.ncells_bydataset_flat(annotated_only=annotated_only)) + @property + def datasets(self) -> Dict[str, DatasetBase]: + """ + Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. + + :return: + """ + ds = {} + for x in self.dataset_groups: + for k, v in x.datasets.items(): + assert k not in ds.keys(), f"{k} was duplicated in super group, remove duplicates before flattening" + ds[k] = v + return ds + def flatten(self) -> DatasetGroup: """ Returns DatasetGroup (rather than self = DatasetSuperGroup) containing all listed data sets. @@ -734,6 +778,7 @@ def load( set_metadata: bool = True, allow_caching: bool = True, processes: int = 1, + **kwargs ): """ Loads data set human into anndata object. @@ -757,6 +802,7 @@ def load( allow_caching=allow_caching, set_metadata=set_metadata, processes=processes, + **kwargs ) def subset_genes(self, subset_type: Union[None, str, List[str]] = None): @@ -788,6 +834,13 @@ def adata(self): warnings.warn("no anndata instances to concatenate") return self._adata + @property + def adata_ls(self): + adata_ls = [] + for k, v in self.datasets.items(): + adata_ls.append(v.adata) + return adata_ls + def load_tobacked( self, fn_backed: PathLike, @@ -951,6 +1004,65 @@ def subset(self, key, values): x.subset(key=key, values=values) self.dataset_groups = [x for x in self.dataset_groups if x.datasets] # Delete empty DatasetGroups + def remove_duplicates( + self, + supplier_hierarchy: str = "cellxgene,sfaira" + ): + """ + Remove duplicate data loaders from super group, e.g. loaders that map to the same DOI. + + Any DOI match is removed (pre-print or journal publication). + Data sets without DOI are removed, too. + Loaders are kept in the hierarchy indicated in supplier_hierarchy. + Requires a super group with homogenous suppliers across DatasetGroups, throws an error otherwise. + This is given for sfaira maintained libraries but may not be the case if custom assembled DatasetGroups are + used. + + :param supplier_hierarchy: Hierarchy to resolve duplications by. + Comma separated string that indicates which data provider takes priority. + Choose "cellxgene,sfaira" to prioritise use of data sets downloaded from cellxgene. + Choose "sfaira,cellxgene" to prioritise use of raw data processing pipelines locally. + + - cellxgene: cellxgene downloads + - sfaira: local raw file processing + :return: + """ + # Build a pairing of provider and DOI: + report_list = [] + idx_tokeep = [] + supplier_hierarchy = supplier_hierarchy.split(",") + for i, (x, y) in enumerate([(xx.supplier, xx.doi) for xx in self.dataset_groups]): + if len(x) > 1: + raise ValueError(f"found more than one supplier for DOI {str(y)}") + else: + x = x[0] + if x not in supplier_hierarchy: + raise ValueError(f"could not associate supplier {x} with hierarchy {supplier_hierarchy} in " + f"data set {y}") + if len(report_list) > 0: + matched_idx = np.where([ + np.any([ + zz in y + for zz in z[1] + ]) + for z in report_list + ])[0] + assert len(matched_idx) < 1, f"more matches than expected for {(x, y)}" + else: + matched_idx = [] + if len(matched_idx) > 0: + # Establish which entry takes priority + supplier_old = report_list[matched_idx[0]][0] + priority = supplier_hierarchy.index(supplier_old) > supplier_hierarchy.index(x) + print(f"removing duplicate data set {y} from supplier: {supplier_old if priority else x}") + if priority: + idx_tokeep.append(i) + del idx_tokeep[matched_idx[0]] + else: + report_list.append([x, y]) + idx_tokeep.append(i) + self.dataset_groups = [self.dataset_groups[i] for i in idx_tokeep] + def subset_cells(self, key, values: Union[str, List[str]]): """ Subset list of adata objects based on cell-wise properties. @@ -1040,3 +1152,8 @@ def additional_annotation_key(self, x: Dict[str, Union[None, str]]): warnings.warn(f"did not data set matching ID {k}") elif counter > 1: warnings.warn(f"found more than one ({counter}) data set matching ID {k}") + + def show_summary(self): + for k, v in self.datasets.items(): + print(k) + print(f"\t {(v.supplier, v.organism, v.organ, v.assay_sc, v.disease)}") diff --git a/sfaira/data/dataloaders/__init__.py b/sfaira/data/dataloaders/__init__.py index 1df580fb0..92bc3fe4d 100644 --- a/sfaira/data/dataloaders/__init__.py +++ b/sfaira/data/dataloaders/__init__.py @@ -1,4 +1,4 @@ from . import anatomical_groups from . import databases from . import loaders -from .super_group import DatasetSuperGroupSfaira +from .super_group import Universe diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py index a6a152d07..ad4534a7e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupAdipose(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py index 2e06f3448..5b20e9c89 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupAdrenalgland(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py index 39c91c2cc..0a02eefc6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupArtery(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py index 5ba646162..05330d0f7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBladder(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py index 5d71f1ff5..697c84f36 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBlood(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_blood_2018_10x_ica_001", "human_blood_2019_10x_10xGenomics_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py index 2f0425a5b..e44fe29ae 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBone(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_bone_2018_10x_ica_001", "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py index 6720f55ac..bf26299fb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBrain(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_brain_2017_DroNcSeq_habib_001", "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py index b8bf79613..b5ae45cc3 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupCalvaria(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py index ee3c3f7d7..c993cf859 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupCervix(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py index 8a2af277e..7cba1c705 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupChorionicvillus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py index 01c6fbc26..ac3065342 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupColon(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_colon_2019_10x_kinchen_001", "human_colon_2019_10x_smilie_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py index 9564a3f29..a78ec322d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupDuodenum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py index 48041903f..08833bd57 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupEpityphlon(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py index 98009e77d..e69858791 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupEsophagus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_esophagus_2019_10x_madissoon_001", "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py index 2311943e9..2e6ab6323 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupEye(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_eye_2019_10x_lukowski_001", "human_eye_2019_10x_menon_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py index a2b28d3da..c3b7dbf0f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupFallopiantube(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py index 9a658b5be..f3de9e48d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupFemalegonad(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py index 5914ddd2f..96c2268d4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupGallbladder(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py index 1e118813d..2f01a4471 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupHeart(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py index 77f282d46..1e4ee5ec6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupHesc(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py index 5d2699c4f..94183416a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupIleum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_ileum_2019_10x_martin_001", "human_ileum_2019_10x_wang_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py index f098857eb..c4f92d8c6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupJejunum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py index ef7c715f8..d19b6c928 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupKidney(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_kidney_2019_10xSn_lake_001", "human_kidney_2019_10x_stewart_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py index 29d4a6bbb..7cccc5a31 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupLiver(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_liver_2018_10x_macparland_001", "human_liver_2019_10x_popescu_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py index f690aac3c..49e2ac34e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupLung(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_lung_2019_10x_braga_001", "human_lung_2019_10x_braga_002", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py index 2ab0527ab..f378d4e9c 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupMalegonad(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_malegonad_2018_10x_guo_001", "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py index 5774d3f73..4b0e6f012 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupMuscle(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py index 882ffcde1..7dbe75dee 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupOmentum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py index 4892cc93e..d8361db6a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupPancreas(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_pancreas_2016_indrop_baron_001", "human_pancreas_2016_smartseq2_segerstolpe_001", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py index 4aeac2457..2265e882d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupPlacenta(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_placenta_2018_smartseq2_ventotormo_001", "human_placenta_2018_10x_ventotormo_002", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py index e77a55a62..848acdc26 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupPleura(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py index 00a805ca4..1ee625b28 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupProstate(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_prostate_2018_10x_henry_001", "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py index f674b0384..10eba378e 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupRectum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_rectum_2019_10x_wang_001", "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py index fbd0c0b86..364364bdd 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupRib(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py index b9b848057..149439ac3 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupSkin(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py index b6369cc1d..9e0f5a3fc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupSpinalcord(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py index 386772100..cc595af74 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupSpleen(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_spleen_2019_10x_madissoon_001", "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py index 3f68fac80..930285d1d 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupStomach(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py index f47ce5203..a2a447539 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupThymus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_thymus_2020_10x_park_001", "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py index fe5839f09..d6771c563 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupThyroid(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4", "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4", diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py index fef634fa2..0de665fea 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupTrachea(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py index 4873e3440..8dcb14570 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupUreter(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py index 542969abc..e15f5eca0 100644 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupUterus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py index f8db234d4..8ebc35dbf 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupAdipose(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_adipose_2019_10x_pisco_001_10.1101/661728", "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py index bfa86db4d..ba15353da 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBladder(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_bladder_2019_10x_pisco_001_10.1101/661728", "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py index 86baebf1a..67cd97d08 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBlood (DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py index 534445900..a4cea83e7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBone(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_bone_2019_10x_pisco_001_10.1101/661728", "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py index 132026f9d..881eb02bb 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupBrain(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728", "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py index d9e90d128..2b3888dc7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupColon(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_colon_2019_10x_pisco_001_10.1101/661728", "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py index 933f412f1..0df183699 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupDiaphragm(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py index 56237bbf0..2d55d7bb9 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupFemalegonad(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py index f3b434758..cef991f69 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupHeart(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_heart_2019_10x_pisco_001_10.1101/661728", "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py index 5f78d124e..752f69c4a 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupIleum(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py index b5b236f09..706b5d948 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupKidney(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_kidney_2019_10x_pisco_001_10.1101/661728", "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py index c67e8f82c..62418f9b4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupLiver(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_liver_2019_10x_pisco_001_10.1101/661728", "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py index ceef059d0..89e4dabd6 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupLung(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_lung_2019_10x_pisco_001_10.1101/661728", "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py index 52e9daaf4..f391cdc54 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupMalegonad(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py index 1a73b0d39..540f9d30f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupMammaryGland(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728", "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py index c0535f364..f39a576cf 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupMuscle(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_muscle_2019_10x_pisco_001_10.1101/661728", "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py index 788c7b508..e06876965 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupPancreas(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_pancreas_2019_10x_pisco_001_10.1101/661728", "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py index 92c4816d3..bf084bd94 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupPlacenta(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py index 9bdf31cdc..f8fb99636 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupProstate(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py index ec80bc7e6..df3f32238 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupRib(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py index 77512772c..8df3d82b5 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupSkin(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_skin_2019_10x_pisco_001_10.1101/661728", "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py index dc49ce938..aa0b26a1b 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupSpleen(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_spleen_2019_10x_pisco_001_10.1101/661728", "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py index 310278f8a..7721f9b25 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupStomach(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", ]) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py index 14eb435d8..e5894ecf4 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupThymus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_thymus_2019_10x_pisco_001_10.1101/661728", "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py index cfe9995ab..564d0a1cc 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupTongue(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_tongue_2019_10x_pisco_001_10.1101/661728", "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py index 30fcef445..c27fdcf3f 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupTrachea(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_trachea_2019_10x_pisco_001_10.1101/661728", "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728", diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py index 85a214b96..277a6dcb7 100644 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py @@ -1,7 +1,7 @@ from typing import Union from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import DatasetSuperGroupSfaira +from sfaira.data.dataloaders.super_group import Universe class DatasetGroupUterus(DatasetGroup): @@ -12,7 +12,7 @@ def __init__( meta_path: Union[str, None] = None, cache_path: Union[str, None] = None ): - dsg = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dsg.subset(key="id", values=[ "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" diff --git a/sfaira/data/dataloaders/super_group.py b/sfaira/data/dataloaders/super_group.py index e49093535..316560f8a 100644 --- a/sfaira/data/dataloaders/super_group.py +++ b/sfaira/data/dataloaders/super_group.py @@ -10,7 +10,7 @@ from sfaira.data import DatasetSuperGroup -class DatasetSuperGroupSfaira(DatasetSuperGroup): +class Universe(DatasetSuperGroup): def __init__( self, diff --git a/sfaira/data/utils_scripts/create_anatomical_configs.py b/sfaira/data/utils_scripts/create_anatomical_configs.py index 4663f8ab6..294563e9d 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs.py @@ -3,7 +3,7 @@ import tensorflow as tf # Any data loader here to extract path: -from sfaira.data import DatasetSuperGroupSfaira +from sfaira.data import Universe print(tf.__version__) @@ -92,7 +92,7 @@ def clean(s): for organism, organs in configs_to_write.items(): for organ in organs: print(f"Writing {organism} {organ}") - dsgs = DatasetSuperGroupSfaira( + dsgs = Universe( data_path=".", meta_path=".", cache_path="." diff --git a/sfaira/data/utils_scripts/create_meta.py b/sfaira/data/utils_scripts/create_meta.py index f14c714ca..97930cdaf 100644 --- a/sfaira/data/utils_scripts/create_meta.py +++ b/sfaira/data/utils_scripts/create_meta.py @@ -17,7 +17,7 @@ def write_meta(args0, args1): path_meta = str(sys.argv[2]) processes = int(str(sys.argv[3])) -ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( +ds = sfaira.data.dataloaders.Universe( data_path=data_path, meta_path=path_meta, cache_path=path_meta ) ds.load( diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 4ad33c888..a9750b0dd 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -10,7 +10,7 @@ path_cache = str(sys.argv[3]) processes = int(str(sys.argv[4])) -ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( +ds = sfaira.data.dataloaders.Universe( data_path=data_path, meta_path=path_meta, cache_path=path_cache ) # Write meta data, cache and test load from cache: diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py index fdbed37d9..25b0bf7ad 100644 --- a/sfaira/data/utils_scripts/streamline_selected.py +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -15,7 +15,7 @@ path_cache = path_cache if path_cache != "None" else None for x in dois.split(","): - ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( + ds = sfaira.data.dataloaders.Universe( data_path=data_path, meta_path=path_meta, cache_path=path_cache ) ds.subset(key="doi", values=[x]) diff --git a/sfaira/data/utils_scripts/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py index acc12a185..4d762d767 100644 --- a/sfaira/data/utils_scripts/write_backed_human.py +++ b/sfaira/data/utils_scripts/write_backed_human.py @@ -13,7 +13,7 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") -ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( +ds = sfaira.data.dataloaders.Universe( data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["human"]) diff --git a/sfaira/data/utils_scripts/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py index 8f33dadd4..9ed4417df 100644 --- a/sfaira/data/utils_scripts/write_backed_mouse.py +++ b/sfaira/data/utils_scripts/write_backed_mouse.py @@ -13,7 +13,7 @@ genome = str(sys.argv[3]) path_meta = os.path.join(path, "meta") -ds = sfaira.data.dataloaders.DatasetSuperGroupSfaira( +ds = sfaira.data.dataloaders.Universe( data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["mouse"]) diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index ac9e10f0e..55c566b1b 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -9,7 +9,7 @@ from sfaira.versions.metadata import CelltypeUniverse from sfaira.estimators import EstimatorKerasEmbedding -from sfaira.data import DatasetSuperGroupSfaira +from sfaira.data import Universe def _tp(yhat, ytrue): @@ -875,7 +875,7 @@ def plot_best_classwise_heatmap( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset = Universe(data_path=datapath) dataset.subset(key="organism", values=[organism]) dataset.subset(key="organ", values=[organ]) if not dataset.flatten().datasets: @@ -1035,7 +1035,7 @@ def plot_best_classwise_scatter( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset = Universe(data_path=datapath) dataset.subset(key="organism", values=[organism]) dataset.subset(key="organ", values=[organ]) if not dataset.flatten().datasets: @@ -1372,7 +1372,7 @@ def get_gradients_by_celltype( else: print('Compute gradients (1/3): load data') # load data - dataset = DatasetSuperGroupSfaira(data_path=datapath) + dataset = Universe(data_path=datapath) dataset.subset(key="organism", values=[organism]) dataset.subset(key="organ", values=[organ]) dataset.subset(key="annotated", values=[True]) diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index 97020d5d6..5d554d6d1 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,7 +5,7 @@ import pickle from typing import Union -from sfaira.data import DatasetSuperGroupSfaira +from sfaira.data import Universe from sfaira.estimators import EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.interface import ModelZooEmbedding, ModelZooCelltype @@ -26,7 +26,7 @@ def __init__( fn_backed_obs = ".".join(data_path.split(".")[:-1]) + "_obs.csv" self.data.obs = pd.read_csv(fn_backed_obs) else: - dataset = DatasetSuperGroupSfaira(data_path=data_path, meta_path=meta_path, cache_path=cache_path) + dataset = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) dataset.load_config(config_path) self.set_data(dataset) diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index e8e1b0b3d..72c563846 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -4,14 +4,14 @@ import scipy.sparse from sfaira.data import DatasetSuperGroup -from sfaira.data import DatasetSuperGroupSfaira +from sfaira.data import Universe dir_data = "../test_data" dir_meta = "../test_data/meta" def test_dsgs_instantiate(): - _ = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + _ = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) @pytest.mark.parametrize("organ", ["intestine", "ileum"]) @@ -19,7 +19,7 @@ def test_dsgs_subset_dataset_wise(organ: str): """ Tests if subsetting results only in datasets of the desired characteristics. """ - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=[organ]) for x in ds.dataset_groups: @@ -30,11 +30,11 @@ def test_dsgs_subset_dataset_wise(organ: str): def test_dsgs_config_write_load(): fn = dir_data + "/config.csv" - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.write_config(fn=fn) - ds2 = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds2 = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds2.load_config(fn=fn) assert np.all(ds.ids == ds2.ids) @@ -45,7 +45,7 @@ def test_dsgs_config_write_load(): def test_dsgs_adata(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load(remove_gene_version=True) @@ -53,7 +53,7 @@ def test_dsgs_adata(): def test_dsgs_load(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load(remove_gene_version=False) @@ -65,7 +65,7 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): """ Tests if subsetting results only in datasets of the desired characteristics. """ - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=[organ]) ds.load(remove_gene_version=False) @@ -83,7 +83,7 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): @pytest.mark.parametrize("clean_var", [True, False]) @pytest.mark.parametrize("clean_uns", [True, False]) def test_dsgs_streamline(out_format: str, clean_obs: bool, clean_var: bool, clean_uns: bool): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load(remove_gene_version=True) @@ -91,7 +91,7 @@ def test_dsgs_streamline(out_format: str, clean_obs: bool, clean_var: bool, clea def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) @@ -106,7 +106,7 @@ def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) @@ -121,7 +121,7 @@ def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): def test_dsg_load(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) @@ -129,7 +129,7 @@ def test_dsg_load(): def test_dsg_adata(): - ds = DatasetSuperGroupSfaira(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index 3aa0d973a..80f9367e1 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,4 +1,4 @@ -from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyObo, \ +from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyHierarchical, OntologyObo, \ OntologyCelltypes, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 22b04144e..dbd03dea0 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -60,6 +60,7 @@ class OntologyList(Ontology): """ Basic unordered ontology container """ + nodes: list def __init__( self, @@ -107,7 +108,30 @@ def is_a(self, query: str, reference: str) -> bool: return query == reference -class OntologyEbi(Ontology): +class OntologyHierarchical(Ontology): + """ + Basic ordered ontology container + """ + nodes: dict + + @abc.abstractmethod + def id_from_name(self, x: str) -> str: + pass + + @abc.abstractmethod + def name_from_id(self, x: str) -> str: + pass + + @property + def node_names(self) -> List[str]: + pass + + @property + def node_ids(self) -> List[str]: + pass + + +class OntologyEbi(OntologyHierarchical): """ Recursively assembles ontology by querying EBI web interface. @@ -161,10 +185,18 @@ def recursive_search(iri): def node_names(self) -> List[str]: return [v["name"] for k, v in self.nodes.items()] + @property + def node_ids(self) -> List[str]: + return list(self.nodes.keys()) + def id_from_name(self, x: str) -> str: self.validate_node(x=x) return [k for k, v in self.nodes.items() if v["name"] == x][0] + def name_from_id(self, x: str) -> str: + assert x in self.nodes.keys(), f"node {x} not found" + return self.nodes[x]["name"] + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): """ Map free text node name to ontology node names via fuzzy string matching. @@ -194,7 +226,7 @@ def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: def synonym_node_properties(self) -> List[str]: return ["synonyms"] -# class OntologyOwl(Ontology): +# class OntologyOwl(OntologyHierarchical): # # onto: owlready2.Ontology # @@ -212,7 +244,7 @@ def synonym_node_properties(self) -> List[str]: # pass -class OntologyObo(Ontology): +class OntologyObo(OntologyHierarchical): graph: networkx.MultiDiGraph leaves: List[str] @@ -248,6 +280,10 @@ def id_from_name(self, x: str) -> str: self.validate_node(x=x) return [k for k, v in self.graph.nodes.items() if v["name"] == x][0] + def name_from_id(self, x: str) -> str: + assert x in self.graph.nodes.keys(), f"node {x} not found" + return self.graph.nodes[x]["name"] + def set_leaves(self, nodes: list = None): # ToDo check that these are not include parents of each other! if nodes is not None: @@ -726,6 +762,7 @@ def __init__( ontology=ontology, root_term=root_term, additional_terms={ - "microwell-seq": {"name": "microwell-seq"} + "microwell-seq": {"name": "microwell-seq"}, + "sci-plex": {"name": "sci-plex"} } ) From d04360a677d33d09a09864e8b35e247609b6808c Mon Sep 17 00:00:00 2001 From: Laura Martens Date: Fri, 16 Apr 2021 13:00:00 +0200 Subject: [PATCH 101/161] updated azimuth dataloder to new structure (#217) * added 10.1038/s41593-019-0393-4 Co-authored-by: david.seb.fischer Co-authored-by: lauradmartens --- sfaira/data/base/dataset_group.py | 3 +- .../d10_1101_2020_10_12_335331/__init__.py | 1 + .../human_blood_2020_10x_hao_001.py | 35 +++++++++++ .../human_blood_2020_10x_hao_001.tsv | 59 +++++++++++++++++++ .../human_blood_2020_10x_hao_001.yaml | 51 ++++++++++++++++ ...man_x_2019_10xsequencing_madissoon_001.tsv | 2 +- .../data_contribution/test_data_template.py | 13 ++-- 7 files changed, 156 insertions(+), 8 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 6fefeed41..c036b611f 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -641,7 +641,8 @@ def clean_ontology_class_map(self): # Adds a third column with the corresponding ontology IDs into the file. tab[self._adata_ids_sfaira.classmap_target_id_key] = [ self.ontology_celltypes.id_from_name(x) - if x != self._adata_ids_sfaira.unknown_celltype_identifier + if x != self._adata_ids_sfaira.unknown_celltype_identifier and + x != self._adata_ids_sfaira.not_a_cell_celltype_identifier else self._adata_ids_sfaira.unknown_celltype_identifier for x in tab[self._adata_ids_sfaira.classmap_target_key].values ] diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/__init__.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.py new file mode 100644 index 000000000..58c8fcae6 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.py @@ -0,0 +1,35 @@ +import anndata +import gzip +import os +import pandas as pd +import scipy.io +import tarfile + + +def load(data_dir, **kwargs): + fn = os.path.join(data_dir, "GSE164378_RAW.tar") + adatas = [] + with tarfile.open(fn) as tar: + samples = ['GSM5008737_RNA_3P', 'GSM5008738_ADT_3P'] + for sample in samples: + with gzip.open(tar.extractfile(sample + '-matrix.mtx.gz'), 'rb') as mm: + x = scipy.io.mmread(mm).T.tocsr() + obs = pd.read_csv(tar.extractfile(sample + '-barcodes.tsv.gz'), compression='gzip', + header=None, sep='\t', index_col=0) + obs.index.name = None + var = pd.read_csv(tar.extractfile(sample + '-features.tsv.gz'), compression='gzip', + header=None, sep='\t').iloc[:, :1] + var.columns = ['names'] + var.index = var['names'].values + adata = anndata.AnnData(X=x, obs=obs, var=var) + adata.var_names_make_unique() + adatas.append(adata) + tar.close() + + adata = adatas[0] + protein = adatas[1] + meta = pd.read_csv(os.path.join(data_dir, 'GSE164378_sc.meta.data_3P.csv.gz'), index_col=0) + adata.obs = adata.obs.join(meta) + adata.obsm['protein_expression'] = pd.DataFrame(protein.X.A, columns=protein.var_names, index=protein.obs_names) + + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.tsv b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.tsv new file mode 100644 index 000000000..93516d4d3 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.tsv @@ -0,0 +1,59 @@ +source target target_id +ASDC_mDC myeloid dendritic cell CL:0000782 +ASDC_pDC plasmacytoid dendritic cell CL:0000784 +B intermediate kappa B cell CL:0000236 +B intermediate lambda B cell CL:0000236 +B memory kappa memory B cell CL:0000787 +B memory lambda memory B cell CL:0000787 +B naive kappa naive B cell CL:0000788 +B naive lambda naive B cell CL:0000788 +CD14 Mono CD14-positive monocyte CL:0001054 +CD16 Mono CD14-low, CD16-positive monocyte CL:0002396 +CD4 CTL CD4-positive, alpha-beta cytotoxic T cell CL:0000934 +CD4 Naive naive thymus-derived CD4-positive, alpha-beta T cell CL:0000895 +CD4 Proliferating CD4-positive, alpha-beta T cell CL:0000624 +CD4 TCM_1 central memory CD4-positive, alpha-beta T cell CL:0000904 +CD4 TCM_2 central memory CD4-positive, alpha-beta T cell CL:0000904 +CD4 TCM_3 central memory CD4-positive, alpha-beta T cell CL:0000904 +CD4 TEM_1 effector memory CD4-positive, alpha-beta T cell CL:0000905 +CD4 TEM_2 effector memory CD4-positive, alpha-beta T cell CL:0000905 +CD4 TEM_3 effector memory CD4-positive, alpha-beta T cell CL:0000905 +CD4 TEM_4 effector memory CD4-positive, alpha-beta T cell CL:0000905 +CD8 Naive naive thymus-derived CD8-positive, alpha-beta T cell CL:0000900 +CD8 Naive_2 naive thymus-derived CD8-positive, alpha-beta T cell CL:0000900 +CD8 Proliferating CD8-positive, alpha-beta T cell CL:0000625 +CD8 TCM_1 central memory CD8-positive, alpha-beta T cell CL:0000907 +CD8 TCM_2 central memory CD8-positive, alpha-beta T cell CL:0000907 +CD8 TCM_3 central memory CD8-positive, alpha-beta T cell CL:0000907 +CD8 TEM_1 effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8 TEM_2 effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8 TEM_3 effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8 TEM_4 effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8 TEM_5 effector memory CD8-positive, alpha-beta T cell CL:0000913 +CD8 TEM_6 effector memory CD8-positive, alpha-beta T cell CL:0000913 +Doublet UNKNOWN UNKNOWN +Eryth erythrocyte CL:0000232 +HSPC hematopoietic multipotent progenitor cell CL:0000837 +ILC innate lymphoid cell CL:0001065 +MAIT mucosal invariant T cell CL:0000940 +NK Proliferating natural killer cell CL:0000623 +NK_1 natural killer cell CL:0000623 +NK_2 natural killer cell CL:0000623 +NK_3 natural killer cell CL:0000623 +NK_4 natural killer cell CL:0000623 +NK_CD56bright CD16-negative, CD56-bright natural killer cell, human CL:0000938 +Plasma plasma cell CL:0000786 +Plasmablast plasmablast CL:0000980 +Platelet platelet CL:0000233 +Treg Memory memory regulatory T cell CL:0002678 +Treg Naive naive regulatory T cell CL:0002677 +cDC1 conventional dendritic cell CL:0000990 +cDC2_1 conventional dendritic cell CL:0000990 +cDC2_2 conventional dendritic cell CL:0000990 +dnT_1 double negative thymocyte CL:0002489 +dnT_2 double negative thymocyte CL:0002489 +gdT_1 gamma-delta T cell CL:0000798 +gdT_2 gamma-delta T cell CL:0000798 +gdT_3 gamma-delta T cell CL:0000798 +gdT_4 gamma-delta T cell CL:0000798 +pDC plasmacytoid dendritic cell CL:0000784 diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml new file mode 100644 index 000000000..7fe522a91 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml @@ -0,0 +1,51 @@ +dataset_structure: + dataset_index: 1 + sample_fns: +dataset_wise: + author: + - "Hao, Yuhan" + default_embedding: + doi: "10.1101/2020.10.12.335331" + download_url_data: "https://atlas.fredhutch.org/nygc/multimodal-pbmc/" + download_url_meta: "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE164378&format=file&file=GSE164378%5Fsc%2Emeta%2Edata%5F3P%2Ecsv%2Egz" + normalization: "raw" + primary_data: + year: 2020 +dataset_or_observation_wise: + assay_sc: "10X sequencing" + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: + bio_sample: + bio_sample_obs_key: + cell_line: + cell_line_obs_key: + development_stage: "human adult stage" + development_stage_obs_key: + disease: "healthy" + disease_obs_key: + ethnicity: + ethnicity_obs_key: + individual: + individual_obs_key: "donor" + organ: "blood" + organ_obs_key: + organism: "human" + organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: + sex: + sex_obs_key: + state_exact: + state_exact_obs_key: 'time' + tech_sample: + tech_sample_obs_key: 'Batch' +observation_wise: + cellontology_original_obs_key: "celltype.l3" +feature_wise: + var_ensembl_col: + var_symbol_col: "names" +meta: + version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv index d6c319350..7a90af98a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.tsv @@ -4,7 +4,7 @@ Alveolar_Type2 type II pneumocyte CL:0002063 B_CD27neg B cell CL:0000236 B_CD27pos B cell CL:0000236 B_Hypermutation B cell CL:0000236 -B_T_doublet NOT_A_CELL NOT_A_CELL +B_T_doublet NOT_A_CELL UNKNOWN B_cells B cell CL:0000236 B_follicular follicular B cell CL:0000843 B_mantle B cell CL:0000236 diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index ad87bc788..bacf8ca50 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -55,15 +55,16 @@ def test_load(doi_sfaira_repr: str, test_data: str): else: raise ValueError("data loader not found in sfaira and also not in sfaira_extension") file_path = pydoc.locate(dir_loader + ".FILE_PATH") - cache_path = os.path.join(test_data, "cache") + meta_path = None + cache_path = None # Clear dataset cache shutil.rmtree(cache_path, ignore_errors=True) ds = DatasetGroupDirectoryOriented( file_base=file_path, data_path=test_data, - meta_path=test_data, - cache_path=cache_path + meta_path=None, + cache_path=None ) # Test raw loading and caching: # You can set load_raw to True while debugging when caching works already to speed the test up, @@ -122,7 +123,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): datasets_f = [ DatasetBase( data_path=test_data, - meta_path=test_data, + meta_path=meta_path, cache_path=cache_path, load_func=load_func, dict_load_func_annotation=load_func_annotation, @@ -135,7 +136,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): datasets_f = [ DatasetFound( data_path=test_data, - meta_path=test_data, + meta_path=meta_path, cache_path=cache_path, load_func=load_func, load_func_annotation=load_func_annotation, @@ -158,7 +159,7 @@ def test_load(doi_sfaira_repr: str, test_data: str): ds = DatasetGroupDirectoryOriented( file_base=file_path, data_path=test_data, - meta_path=test_data, + meta_path=meta_path, cache_path=cache_path ) ds.load( From 8336361ae92b847d68dfee430f1ae8dfc17ddcf2 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 16 Apr 2021 15:02:34 +0200 Subject: [PATCH 102/161] Distributed store (#213) * fixed a few loader bugs * added distributed store * improved estimator tests * adapated trainers and estimators to new data structures --- requirements.txt | 1 + sfaira/data/__init__.py | 2 +- sfaira/data/base/__init__.py | 1 + sfaira/data/base/dataset.py | 80 +++- sfaira/data/base/dataset_group.py | 66 ++- sfaira/data/base/distributed_store.py | 314 ++++++++++++ .../__init__.py | 0 .../human_x_2020_scirnaseq_cao_001.py | 0 .../human_x_2020_scirnaseq_cao_001.tsv | 0 .../human_x_2020_scirnaseq_cao_001.yaml | 0 .../create_anatomical_configs_store.py | 99 ++++ .../utils_scripts/create_meta_and_cache.py | 3 +- .../utils_scripts/create_target_universes.py | 30 ++ .../data/utils_scripts/write_backed_human.py | 2 +- .../data/utils_scripts/write_backed_mouse.py | 2 +- sfaira/data/utils_scripts/write_store.py | 32 ++ sfaira/estimators/keras.py | 446 ++++++++++-------- sfaira/interface/model_zoo.py | 264 +++-------- sfaira/models/celltype/marker.py | 13 +- sfaira/models/celltype/mlp.py | 13 +- sfaira/models/embedding/ae.py | 16 +- sfaira/models/embedding/linear.py | 16 +- sfaira/models/embedding/vae.py | 14 +- sfaira/models/embedding/vaeiaf.py | 16 +- sfaira/models/embedding/vaevamp.py | 14 +- sfaira/train/train_model.py | 91 ++-- sfaira/unit_tests/data/test_dataset.py | 4 +- .../unit_tests/estimators/test_estimator.py | 168 ++++--- sfaira/unit_tests/models/__init__.py | 0 sfaira/unit_tests/models/test_models.py | 236 --------- sfaira/versions/genomes.py | 50 +- sfaira/versions/metadata/base.py | 9 +- sfaira/versions/metadata/universe.py | 28 +- sfaira/versions/topologies/__init__.py | 33 +- sfaira/versions/topologies/class_interface.py | 78 +-- .../human/celltype/celltypemarker.py | 11 +- .../topologies/human/celltype/celltypemlp.py | 44 +- .../versions/topologies/human/embedding/ae.py | 17 +- .../topologies/human/embedding/linear.py | 18 +- .../topologies/human/embedding/nmf.py | 14 +- .../topologies/human/embedding/vae.py | 27 +- .../topologies/human/embedding/vaeiaf.py | 16 +- .../topologies/human/embedding/vaevamp.py | 16 +- .../mouse/celltype/celltypemarker.py | 11 +- .../topologies/mouse/celltype/celltypemlp.py | 36 +- .../versions/topologies/mouse/embedding/ae.py | 17 +- .../topologies/mouse/embedding/linear.py | 24 +- .../topologies/mouse/embedding/nmf.py | 24 +- .../topologies/mouse/embedding/vae.py | 32 +- .../topologies/mouse/embedding/vaeiaf.py | 16 +- .../topologies/mouse/embedding/vaevamp.py | 16 +- 51 files changed, 1469 insertions(+), 1011 deletions(-) create mode 100644 sfaira/data/base/distributed_store.py rename sfaira/data/dataloaders/loaders/{d10_1126_science_aba7721 => _d10_1126_science_aba7721}/__init__.py (100%) rename sfaira/data/dataloaders/loaders/{d10_1126_science_aba7721 => _d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.py (100%) rename sfaira/data/dataloaders/loaders/{d10_1126_science_aba7721 => _d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.tsv (100%) rename sfaira/data/dataloaders/loaders/{d10_1126_science_aba7721 => _d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.yaml (100%) create mode 100644 sfaira/data/utils_scripts/create_anatomical_configs_store.py create mode 100644 sfaira/data/utils_scripts/create_target_universes.py create mode 100644 sfaira/data/utils_scripts/write_store.py delete mode 100644 sfaira/unit_tests/models/__init__.py delete mode 100644 sfaira/unit_tests/models/test_models.py diff --git a/requirements.txt b/requirements.txt index 131f41f98..ae48f17ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ requests versioneer h5py xlrd==1.* +zarr rich>=9.10.0 click>=7.1.2 questionary>=1.8.1 diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index 5b9c26f76..ed5540905 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,6 +1,6 @@ from sfaira.data.base import DatasetBase, \ DatasetGroup, DatasetGroupDirectoryOriented, \ - DatasetSuperGroup + DatasetSuperGroup, DistributedStore from . import dataloaders from .dataloaders import Universe from .interactive import DatasetInteractive diff --git a/sfaira/data/base/__init__.py b/sfaira/data/base/__init__.py index d2e2df6fc..9f9e490a6 100644 --- a/sfaira/data/base/__init__.py +++ b/sfaira/data/base/__init__.py @@ -1,2 +1,3 @@ from sfaira.data.base.dataset import DatasetBase from sfaira.data.base.dataset_group import DatasetGroup, DatasetGroupDirectoryOriented, DatasetSuperGroup +from sfaira.data.base.distributed_store import DistributedStore diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 2e3d07dc5..7bb82660e 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -566,27 +566,33 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep - data_ids = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values + data_ids_ensg = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values if subset_type is None: - subset_ids = self.genome_container.ensembl + subset_ids_ensg = self.genome_container.ensembl + subset_ids_symbol = self.genome_container.names else: if isinstance(subset_type, str): subset_type = [subset_type] keys = np.unique(self.genome_container.type) if subset_type not in keys: raise ValueError(f"subset type {subset_type} not available in list {keys}") - subset_ids = [ - x for x, y in zip(self.genome_container.ensembl, self.genome_container.type) + subset_ids_ensg = [ + x.upper() for x, y in zip(self.genome_container.ensembl, self.genome_container.type) + if y in subset_type + ] + subset_ids_symbol = [ + x.upper() for x, y in zip(self.genome_container.names, self.genome_container.type) if y in subset_type ] - idx_feature_kept = np.where([x in subset_ids for x in data_ids])[0] - idx_feature_map = np.array([subset_ids.index(x) for x in data_ids[idx_feature_kept]]) # Remove unmapped genes + idx_feature_kept = np.where([x.upper() in subset_ids_ensg for x in data_ids_ensg])[0] + data_ids_kept = data_ids_ensg[idx_feature_kept] x = x[:, idx_feature_kept] - + # Build map of subset_ids to features in x: + idx_feature_map = np.array([subset_ids_symbol.index(x) for x in data_ids_kept]) # Create reordered feature matrix based on reference and convert to csr - x_new = scipy.sparse.csc_matrix((x.shape[0], self.genome_container.ngenes), dtype=x.dtype) + x_new = scipy.sparse.csc_matrix((x.shape[0], len(subset_ids_symbol)), dtype=x.dtype) # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) # ValueError: could not convert integer scalar @@ -605,9 +611,9 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): X=x_new, obs=self.adata.obs, obsm=self.adata.obsm, - var=pd.DataFrame(data={'names': self.genome_container.names, - self._adata_ids_sfaira.gene_id_ensembl: self.genome_container.ensembl}, - index=self.genome_container.ensembl), + var=pd.DataFrame(data={self._adata_ids_sfaira.gene_id_names: subset_ids_symbol, + self._adata_ids_sfaira.gene_id_ensembl: subset_ids_ensg}, + index=subset_ids_ensg), uns=self.adata.uns ) @@ -674,8 +680,7 @@ def _set_metadata_in_adata(self, allow_uns: bool): # Include flag in .uns that this attribute is in .obs: self.adata.uns[y] = UNS_STRING_META_IN_OBS # Remove potential pd.Categorical formatting: - self._value_protection( - attr=y, allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) + self._value_protection(attr=y, allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) self.adata.obs[y] = self.adata.obs[z].values.tolist() else: assert False, "switch option should not occur" @@ -763,6 +768,9 @@ def streamline( uns_new = dict([ (getattr(adata_fields, k), self.adata.uns[getattr(self._adata_ids_sfaira, k)]) if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() + else (getattr(adata_fields, k), + np.unique(self.adata.obs[getattr(self._adata_ids_sfaira, k)].values).tolist()) + if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() else (getattr(adata_fields, k), None) for k in adata_fields.uns_keys ]) @@ -806,8 +814,11 @@ def streamline( self.adata.obs = pd.DataFrame( data=dict([ (getattr(adata_fields, k), self.adata.obs[getattr(self._adata_ids_sfaira, k)]) - for k in adata_fields.obs_keys if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() + else (getattr(adata_fields, k), list(self.adata.uns[getattr(self._adata_ids_sfaira, k)])) + if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() + else (getattr(adata_fields, k), adata_fields.unknown_metadata_identifier) + for k in adata_fields.obs_keys ]), index=self.adata.obs.index ) @@ -890,7 +901,42 @@ def streamline( if k in self.adata.uns.keys(): del self.adata.uns[k] - def load_tobacked( + def write_distributed_store( + self, + dir_cache: Union[str, os.PathLike], + store: str = "backed", + chunks: Union[int, None] = None, + ): + """ + Write data set into a format that allows distributed access to data set on disk. + + Writes to a zarr-backed h5ad. + Load data set and streamline before calling this method. + + :param dir_cache: Directory to write cache in. + :param store: Disk format for objects in cache: + + - "h5ad": Allows access via backed .h5ad. + On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives + fast row-wise access if the files are csr. + - "zarr": Allows access as zarr array. + :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="zarr". + """ + self.__assert_loaded() + if store == "h5ad": + if not isinstance(self.adata.X, scipy.sparse.csr_matrix): + print(f"WARNING: high-perfomances caches based on .h5ad work better with .csr formatted expression " + f"data, found {type(self.adata.X)}") + fn = os.path.join(dir_cache, self.doi_cleaned_id + ".h5ad") + self.adata.write_h5ad(filename=fn, compression=None, force_dense=False) + elif store == "zarr": + fn = os.path.join(dir_cache, self.doi_cleaned_id) + self.adata.write_zarr(store=fn, chunks=chunks) + else: + raise ValueError() + + def write_backed( self, adata_backed: anndata.AnnData, genome: str, @@ -2222,3 +2268,7 @@ def get_subset_idx(samplewise_key, cellwise_key): def show_summary(self): print(f"{(self.supplier, self.organism, self.organ, self.assay_sc, self.disease)}") + + def __assert_loaded(self): + if self.adata is None: + raise ValueError("adata was not loaded, this is necessary for this operation") diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index c036b611f..8d753e8d5 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -189,7 +189,32 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): for x in self.ids: self.datasets[x].subset_genes(subset_type=subset_type) - def load_tobacked( + def write_distributed_store( + self, + dir_cache: Union[str, os.PathLike], + store: str = "backed", + chunks: Union[int, None] = None, + ): + """ + Write data set into a format that allows distributed access to data set on disk. + + Writes every data set contained to a zarr-backed h5ad. + Load data set and streamline before calling this method. + + :param dir_cache: Directory to write cache in. + :param store: Disk format for objects in cache: + + - "h5ad": Allows access via backed .h5ad. + On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives + fast row-wise access if the files are csr. + - "zarr": Allows access as zarr array. + :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="zarr". + """ + for _, v in self.datasets.items(): + v.write_distributed_store(dir_cache=dir_cache, store=store, chunks=chunks) + + def write_backed( self, adata_backed: anndata.AnnData, genome: str, @@ -218,7 +243,7 @@ def load_tobacked( # if this is for celltype prediction, only load the data with have celltype annotation try: if self.datasets[x].annotated or not annotated_only: - self.datasets[x].load_tobacked( + self.datasets[x].write_backed( adata_backed=adata_backed, genome=genome, idx=idx[i], @@ -842,7 +867,33 @@ def adata_ls(self): adata_ls.append(v.adata) return adata_ls - def load_tobacked( + def write_distributed_store( + self, + dir_cache: Union[str, os.PathLike], + store: str = "backed", + chunks: Union[int, None] = None, + ): + """ + Write data set into a format that allows distributed access to data set on disk. + + Writes every data set contained to a zarr-backed h5ad. + The group structure of the super group is lost during this process. + Load data set and streamline before calling this method. + + :param dir_cache: Directory to write cache in. + :param store: Disk format for objects in cache: + + - "h5ad": Allows access via backed .h5ad. + On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives + fast row-wise access if the files are csr. + - "zarr": Allows access as zarr array. + :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="zarr". + """ + for x in self.dataset_groups: + x.write_distributed_store(dir_cache=dir_cache, store=store, chunks=chunks) + + def write_backed( self, fn_backed: PathLike, genome: str, @@ -855,6 +906,8 @@ def load_tobacked( """ Loads data set human into backed anndata object. + TODO replace streamlining in here by required call to .streamline() before. + Example usage: ds = DatasetSuperGroup([...]) @@ -880,7 +933,7 @@ def load_tobacked( self.fn_backed = fn_backed n_cells = self.ncells(annotated_only=annotated_only) gc = self.get_gc(genome=genome) - n_genes = gc.ngenes + n_genes = gc.n_var if scatter_update: self.adata = anndata.AnnData( scipy.sparse.csr_matrix((n_cells, n_genes), dtype=np.float32) @@ -938,7 +991,7 @@ def load_tobacked( print(self.ncells_bydataset(annotated_only=annotated_only)) print([[len(x) for x in xx] for xx in idx_ls]) for i, x in enumerate(self.dataset_groups): - x.load_tobacked( + x.write_backed( adata_backed=self.adata, genome=genome, idx=idx_ls[i], @@ -988,7 +1041,8 @@ def streamline( """ for x in self.dataset_groups: for xx in x.ids: - x.datasets[xx].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns) + x.datasets[xx].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, + clean_var=clean_var, clean_uns=clean_uns) def subset(self, key, values): """ diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py new file mode 100644 index 000000000..92ccba3af --- /dev/null +++ b/sfaira/data/base/distributed_store.py @@ -0,0 +1,314 @@ +import anndata +import numpy as np +import os +import pandas as pd +import pickle +import scipy.sparse +from typing import Dict, List, Union + +from sfaira.consts import AdataIdsSfaira, OCS +from sfaira.data.base.dataset import is_child +from sfaira.versions.metadata import CelltypeUniverse + + +class DistributedStore: + """ + Data set group class tailored to data access requirements common in high-performance computing (HPC). + + This class does not inherit from DatasetGroup because it entirely relies on the cached objects. + """ + + indices: Dict[str, np.ndarray] + + def __init__(self, cache_path: Union[str, None] = None): + """ + This class is instantiated on a cache directory which contains pre-processed files in rapid access format. + + Supported and automatically identifed are the formats: + + - h5ad, + - zarr + + :param cache_path: Directory in which pre-processed .h5ad files lie. + """ + # Collect all data loaders from files in directory: + adatas = {} + indices = {} + for f in os.listdir(cache_path): + if os.path.isfile(os.path.join(cache_path, f)): # only files + # Narrow down to supported file types: + if f.split(".")[-1] == "h5ad": + adata = anndata.read_h5ad( + filename=os.path.join(cache_path, f), + backed=True, + ) + elif f.split(".")[-1] == "zarr": + # TODO this reads into memory! Might need to directly interface the zarr arrays to work with dask. + adata = anndata.read_zarr(os.path.join(cache_path, f)) + else: + adata = None + if adata is not None: + adatas[adata.uns["id"]] = adata + indices[adata.uns["id"]] = np.arange(0, adata.n_obs) + self.adatas = adatas + self.indices = indices + self.ontology_container = OCS + self._adata_ids_sfaira = AdataIdsSfaira() + self._celltype_universe = None + + def generator( + self, + batch_size: int = 1, + obs_keys: List[str] = [], + continuous_batches: bool = True, + ) -> iter: + """ + Yields an unbiased generator over observations in the contained data sets. + + :param batch_size: Number of observations in each batch (generator invocation). + :param obs_keys: .obs columns to return in the generator. These have to be a subset of the columns available + in self.adatas. + :param continuous_batches: Whether to build batches of batch_size across data set boundaries if end of one + data set is reached. + :return: Generator function which yields batch_size at every invocation. + The generator returns a tuple of (.X, .obs) with types: + + - if store format is h5ad: (scipy.sparse.csr_matrix, pandas.DataFrame) + """ + + def generator() -> tuple: + n_datasets = len(list(self.adatas.keys())) + x_last = None + obs_last = None + for i, (k, v) in enumerate(self.adatas.items()): + # Define batch partitions: + if continuous_batches and x_last is not None: + # Prepend data set with residual data from last data set. + remainder_start = x_last.shape[0] + n_obs = v.n_obs + remainder_start + else: + # Partition into equally sized batches up to last batch. + remainder_start = 0 + n_obs = v.n_obs + remainder = n_obs % batch_size + batch_starts = [ + np.min([0, int(x * batch_size - remainder_start)]) + for x in np.arange(1, n_obs // batch_size + int(remainder > 0)) + ] + n_batches = len(batch_starts) + # Iterate over batches: + for j, x in enumerate(batch_starts): + batch_end = int(x + batch_size) + x = v.X[x:batch_end, :] + obs = v.obs[obs_keys].iloc[x:batch_end, :] + assert isinstance(x, scipy.sparse.csr_matrix), f"{type(x)}" + assert isinstance(obs, pd.DataFrame), f"{type(obs)}" + if continuous_batches and remainder > 0 and i < (n_datasets - 1) and j == (n_batches - 1): + # Cache incomplete last batch to append to next first batch of next data set. + x_last = x + obs_last = obs + elif continuous_batches and x_last is not None: + # Append last incomplete batch current batch. + x = scipy.sparse.hstack(blocks=[x_last, x], format="csr") + obs = pd.concat(objs=[obs_last, obs], axis=0) + yield x, obs + else: + # Yield current batch. + yield x, obs + + return generator + + @property + def celltypes_universe(self) -> CelltypeUniverse: + if self._celltype_universe is None: + self._celltype_universe = CelltypeUniverse( + cl=self.ontology_container.cellontology_class, + uberon=self.ontology_container.organ, + organism=None, # TODO Does not load extensions! + ) + return self._celltype_universe + + def subset(self, attr_key, values): + """ + Subset list of adata objects based on match to values in key property. + + Keys need to be available in adata.uns + + :param attr_key: Property to subset by. + :param values: Classes to overlap to. + :return: + """ + if isinstance(values, np.ndarray): + values = values.tolist() + if isinstance(values, tuple): + values = list(values) + if not isinstance(values, list): + values = [values] + # Get ontology container to be able to do relational reasoning: + ontology = getattr(self.ontology_container, attr_key) + for k in list(self.adatas.keys()): + if getattr(self._adata_ids_sfaira, attr_key) in self.adatas.uns.keys(): + values_found = self.adatas.uns[getattr(self._adata_ids_sfaira, attr_key)] + if not isinstance(values_found, list): + values_found = [values_found] + if not np.any([ + np.any([ + is_child(query=x, ontology=ontology, ontology_parent=y) + for y in values + ]) for x in values_found + ]): + # Delete entries which a non-matching meta data value associated with this item. + del self.adatas[k] + else: + # Delete entries which did not have this key annotated. + del self.adatas[k] + + def subset_cells_idx(self, attr_key, values: Union[str, List[str]]): + """ + Get indices of subset list of adata objects based on cell-wise properties. + + :param attr_key: Property to subset by. Options: + + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key + :param values: Classes to overlap to. + :return dictionary of files and observation indices by file. + """ + if not isinstance(values, list): + values = [values] + + def get_subset_idx(adata, k): + values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values + values_found_unique = np.unique(values_found) + try: + ontology = getattr(self.ontology_container, k) + except AttributeError: + raise ValueError(f"{k} not a valid property of ontology_container object") + # Test only unique elements found in ontology to save time. + values_found_unique_matched = [ + x for x in values_found_unique if np.any([ + is_child(query=x, ontology=ontology, ontology_parent=y) + for y in values + ]) + ] + # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. + print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {self.id}") + idx = np.where([x in values_found_unique_matched for x in values_found])[0] + return idx + + indices = {} + for k, v in self.adatas.items(): + idx_old = self.indices[k].tolist() + idx_new = get_subset_idx(adata=v, k=attr_key) + # Keep intersection of old and new hits. + indices[k] = np.array(list(set(idx_old).intersection(set(idx_new)))) + return indices + + def subset_cells(self, attr_key, values: Union[str, List[str]]): + """ + Subset list of adata objects based on cell-wise properties. + + Subsetting is done based on index vectors, the objects remain untouched. + + :param attr_key: Property to subset by. Options: + + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key + :param values: Classes to overlap to. + """ + self.indices = self.subset_cells_idx(attr_key=attr_key, values=values) + + for k, v in self.indices.items(): + if v.shape[0] == 0: # No observations (cells) left. + del self.adatas[k] + + def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]): + """ + Get indices of subset list of adata objects based on cell-wise properties treating instance as single array. + + The indices are continuous across all data sets as if they were one array. + + :param attr_key: Property to subset by. Options: + + - "assay_differentiation" points to self.assay_differentiation_obs_key + - "assay_sc" points to self.assay_sc_obs_key + - "assay_type_differentiation" points to self.assay_type_differentiation_obs_key + - "cell_line" points to self.cell_line + - "cellontology_class" points to self.cellontology_class_obs_key + - "developmental_stage" points to self.developmental_stage_obs_key + - "ethnicity" points to self.ethnicity_obs_key + - "organ" points to self.organ_obs_key + - "organism" points to self.organism_obs_key + - "sample_source" points to self.sample_source_obs_key + - "sex" points to self.sex_obs_key + - "state_exact" points to self.state_exact_obs_key + :param values: Classes to overlap to. + :return Index vector + """ + # Get indices of of cells in target set by file. + idx_by_dataset = self.subset_cells_idx(attr_key=attr_key, values=values) + # Translate file-wise indices into global index list across all data sets. + idx = [] + counter = 0 + for k, v in self.adatas.items(): + idx_k = np.arange(counter, counter + v.n_obs) + idx.extend(idx_k[idx_by_dataset[k]]) + counter += v.n_obs + return idx + + def write_config(self, fn: Union[str, os.PathLike]): + """ + Writes a config file that describes the current data sub-setting. + + This config file can be loaded later to recreate a sub-setting. + This config file contains observation-wise subsetting information. + + :param fn: Output file without file type extension. + """ + with open(fn + '.pickle', 'w') as f: + pickle.dump(self.indices, f) + + def load_config(self, fn: Union[str, os.PathLike]): + """ + Load a config file and recreates a data sub-setting. + This config file contains observation-wise subsetting information. + + :param fn: Output file without file type extension. + """ + with open(fn + '.pickle', 'rb') as f: + self.indices = pickle.load(f) + # Subset to described data sets: + for x in self.indices.keys(): + if x not in self.adatas.keys(): + raise ValueError(f"did not find object with name {x} in currently loaded universe") + # Only retain data sets with which are mentioned in config file. + self.subset(attr_key="id", values=list(self.indices.keys())) + + @property + def n_vars(self): + # assumes that all adata + return list(self.adatas.values())[0].n_vars + + @property + def n_obs(self): + return np.sum([len(v) for _, v in self.indices]) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/__init__.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py rename to sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/__init__.py diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py rename to sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv rename to sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml similarity index 100% rename from sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml rename to sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml diff --git a/sfaira/data/utils_scripts/create_anatomical_configs_store.py b/sfaira/data/utils_scripts/create_anatomical_configs_store.py new file mode 100644 index 000000000..8bc724ada --- /dev/null +++ b/sfaira/data/utils_scripts/create_anatomical_configs_store.py @@ -0,0 +1,99 @@ +import os +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data import DistributedStore + +print(tf.__version__) + +# Set global variables. +print("sys.argv", sys.argv) + +store_path = str(sys.argv[1]) +config_path = str(sys.argv[2]) + + +def clean(s): + if s is not None: + s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() + return s + + +configs_to_write = { + "human": [ + "adipose tissue", + "adrenal gland", + "artery", + "blood", + "bone marrow", + "brain", + "chorionic villus", + "diaphragm", + "esophagus", + "eye", + "gall bladder", + "heart", + "intestine", + "kidney", + "liver", + "lung", + "muscle organ", + "ovary", + "pancreas", + "placenta", + "pleura", + "prostate gland", + "rib", + "skeleton", + "skin of body", + "spinal cord", + "spleen", + "stomach", + "testis", + "tongue", + "thymus", + "thyroid gland", + "trachea", + "ureter", + "urinary bladder", + "uterine cervix", + "uterus", + "vault of skull", + ], + "mouse": [ + "adipose tissue", + "blood", + "bone marrow", + "brain", + "diaphragm", + "heart", + "intestine", + "kidney", + "liver", + "lung", + "mammary gland", + "muscle organ", + "ovary", + "pancreas", + "placenta", + "prostate gland", + "skin of body", + "spleen", + "stomach", + "testis", + "thymus", + "tongue", + "trachea", + "urinary bladder", + "uterus", + ] +} + +for organism, organs in configs_to_write.items(): + for organ in organs: + print(f"Writing {organism} {organ}") + store = DistributedStore(cache_path=store_path) + store.subset(attr_key="organism", values=[organism]) + store.subset(attr_key="organ", values=[organ]) + store.write_config(os.path.join(config_path, f"config_{clean(organism)}_{clean(organ)}.csv")) diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index a9750b0dd..8f18aa08a 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -16,6 +16,7 @@ # Write meta data, cache and test load from cache: for x in ds.dataset_groups: for k, v in x.datasets.items(): + print(f"SCRIPT: loading {x} {k}") try: # Initial load and cache writing: # Only run this if data set was not already cached to speed up resumed jobs. @@ -37,4 +38,4 @@ v.clear() except ValueError as e: # Do not abort upon ValueErrors, such as from cell type map bugs. - print(f"WARNING: TO-FIX: ValueError in {k}: {e}") + print(f"SCRIPT WARNING: TO-FIX: ValueError in {k}: {e}") diff --git a/sfaira/data/utils_scripts/create_target_universes.py b/sfaira/data/utils_scripts/create_target_universes.py new file mode 100644 index 000000000..0efd698ff --- /dev/null +++ b/sfaira/data/utils_scripts/create_target_universes.py @@ -0,0 +1,30 @@ +import os +import sfaira +import sys +import tensorflow as tf + +# Any data loader here to extract path: +from sfaira.data import DistributedStore + +print(tf.__version__) + + +# Set global variables. +print("sys.argv", sys.argv) + +store_path = str(sys.argv[1]) +config_path = str(sys.argv[2]) +out_path = str(sys.argv[3]) + + +for f in os.listdir(config_path): + fn = os.path.join(config_path, f) + if os.path.isfile(fn): # only files + # Narrow down to supported file types: + if f.split(".")[-1] == "csv" and f.startswith("config_"): + print(f"Writing {f}") + organism = f.split("_")[1] + organ = f.split("_")[2] + store = DistributedStore(cache_path=store_path) + store.load_config(fn=fn) + store.write_config(os.path.join(config_path, f"targets_{organism}_{organ}.csv")) diff --git a/sfaira/data/utils_scripts/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py index 4d762d767..2a5e81720 100644 --- a/sfaira/data/utils_scripts/write_backed_human.py +++ b/sfaira/data/utils_scripts/write_backed_human.py @@ -17,7 +17,7 @@ data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["human"]) -ds.load_tobacked( +ds.write_backed( fn_backed=fn, genome=genome, shuffled=False, diff --git a/sfaira/data/utils_scripts/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py index 9ed4417df..bd4e647d7 100644 --- a/sfaira/data/utils_scripts/write_backed_mouse.py +++ b/sfaira/data/utils_scripts/write_backed_mouse.py @@ -17,7 +17,7 @@ data_path=path, meta_path=path_meta, cache_path=path_meta ) ds.subset(key="organism", values=["mouse"]) -ds.load_tobacked( +ds.write_backed( fn_backed=fn, genome=genome, shuffled=False, diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py new file mode 100644 index 000000000..adac37fb1 --- /dev/null +++ b/sfaira/data/utils_scripts/write_store.py @@ -0,0 +1,32 @@ +import sfaira +import sys + +# Set global variables. +print("sys.argv", sys.argv) + +data_path = str(sys.argv[1]) +path_meta = str(sys.argv[2]) +path_cache = str(sys.argv[3]) +path_store = str(sys.argv[4]) + +universe = sfaira.data.dataloaders.Universe(data_path=data_path, meta_path=path_meta, cache_path=path_cache) + +for k, ds in universe.datasets.items(): + print(f"SCRIPT loading {k}") + ds.load( + match_to_reference=None, + remove_gene_version=True, + load_raw=False, + allow_caching=True, + set_metadata=False, + ) + ds.streamline( + format="sfaira", + allow_uns_sfaira=True, + clean_obs=True, + clean_var=True, + clean_uns=True, + ) + ds.subset_genes(subset_type="protein_coding") + ds.write_distributed_store(dir_cache=path_store, store="h5ad") + ds.clear() diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 1b81eebc7..5d0f7822b 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -2,7 +2,6 @@ import anndata import hashlib import numpy as np -import pandas import scipy.sparse try: import tensorflow as tf @@ -13,23 +12,32 @@ import warnings from tqdm import tqdm -from sfaira.consts import AdataIdsSfaira +from sfaira.consts import AdataIdsSfaira, OCS +from sfaira.data import DistributedStore from sfaira.models import BasicModel -from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topologies import Topologies +from sfaira.versions.metadata import CelltypeUniverse, OntologyCelltypes +from sfaira.versions.topologies import TopologyContainer from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ CustomAccAgg, CustomF1Classwise, CustomFprClasswise, CustomTprClasswise, custom_cce_agg +def prepare_sf(x): + if len(x.shape) == 2: + sf = np.asarray(x.sum(axis=1)).flatten() + elif len(x.shape) == 1: + sf = np.asarray(x.sum()).flatten() + else: + raise ValueError("x.shape > 2") + sf = np.log(sf / 1e4 + 1e-10) + return sf + + class EstimatorKeras: """ Estimator base class for keras models. """ - data: Union[anndata.AnnData] - obs_train: Union[pandas.DataFrame, None] - obs_eval: Union[pandas.DataFrame, None] - obs_test: Union[pandas.DataFrame, None] + data: Union[anndata.AnnData, DistributedStore] model: Union[BasicModel, None] model_topology: Union[str, None] model_id: Union[str, None] @@ -43,35 +51,20 @@ class EstimatorKeras: def __init__( self, - data: Union[anndata.AnnData, np.ndarray], + data: Union[anndata.AnnData, np.ndarray, DistributedStore], model_dir: Union[str, None], + model_class: str, model_id: Union[str, None], - model_class: Union[str, None], - organism: Union[str, None], - organ: Union[str, None], - model_type: Union[str, None], - model_topology: Union[str, None], + model_topology: TopologyContainer, weights_md5: Union[str, None] = None, cache_path: str = os.path.join('cache', '') ): self.data = data - self.obs_train = None - self.obs_eval = None - self.obs_test = None self.model = None self.model_dir = model_dir self.model_id = model_id - self.model_class = model_class.lower() - self.organism = organism.lower() - self.organ = organ.lower() - self.model_type = model_type.lower() - self.model_topology = model_topology - self.topology_container = Topologies( - organism=organism, - model_class=model_class, - model_type=model_type, - topology_id=model_topology - ) + self.model_class = model_class + self.topology_container = model_topology self.history = None self.train_hyperparam = None @@ -82,6 +75,14 @@ def __init__( self.cache_path = cache_path self._adata_ids_sfaira = AdataIdsSfaira() + @property + def model_type(self): + return self.topology_container.model_type + + @property + def organism(self): + return self.topology_container.organism + def load_pretrained_weights(self): """ Loads model weights from local directory or zenodo. @@ -217,14 +218,15 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): # If the feature space is already mapped to the right reference, return the data matrix immediately if 'mapped_features' in self.data.uns_keys(): - if self.data.uns[self._adata_ids_sfaira.mapped_features] == self.topology_container.genome_container.assembly: + if self.data.uns[self._adata_ids_sfaira.mapped_features] == \ + self.topology_container.gc.assembly: print(f"found {x.shape[0]} observations") return x # Compute indices of genes to keep data_ids = self.data.var[self._adata_ids_sfaira.gene_id_ensembl].values - idx_feature_kept = np.where([x in self.topology_container.genome_container.ensembl for x in data_ids])[0] - idx_feature_map = np.array([self.topology_container.genome_container.ensembl.index(x) + idx_feature_kept = np.where([x in self.topology_container.gc.ensembl for x in data_ids])[0] + idx_feature_map = np.array([self.topology_container.gc.ensembl.index(x) for x in data_ids[idx_feature_kept]]) # Convert to csc and remove unmapped genes @@ -232,7 +234,7 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): x = x[:, idx_feature_kept] # Create reordered feature matrix based on reference and convert to csr - x_new = scipy.sparse.csc_matrix((x.shape[0], self.topology_container.ngenes), dtype=x.dtype) + x_new = scipy.sparse.csc_matrix((x.shape[0], self.topology_container.n_var), dtype=x.dtype) # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) # ValueError: could not convert integer scalar @@ -247,21 +249,11 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): x_new = x_new.tocsr() print(f"found {len(idx_feature_kept)} intersecting features between {x.shape[1]} " - f"features in input data set and {self.topology_container.ngenes} features in reference genome") + f"features in input data set and {self.topology_container.n_var} features in reference genome") print(f"found {x_new.shape[0]} observations") return x_new - def _prepare_sf(self, x): - if len(x.shape) == 2: - sf = np.asarray(x.sum(axis=1)).flatten() - elif len(x.shape) == 1: - sf = np.asarray(x.sum()).flatten() - else: - raise ValueError("x.shape > 2") - sf = np.log(sf / 1e4 + 1e-10) - return sf - @abc.abstractmethod def _get_loss(self): pass @@ -395,7 +387,7 @@ def train( # Split training and evaluation data. np.random.seed(1) - all_idx = np.arange(0, self.data.shape[0]) + all_idx = np.arange(0, self.data.n_obs) # n_obs is both a property of AnnData and DistributedStore if isinstance(test_split, float) or isinstance(test_split, int): self.idx_test = np.random.choice( a=all_idx, @@ -403,15 +395,20 @@ def train( replace=False, ) elif isinstance(test_split, dict): - in_test = np.ones((self.data.obs.shape[0],), dtype=int) == 1 - for k, v in test_split.items(): - if isinstance(v, list): - in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) - else: - in_test = np.logical_and(in_test, self.data.obs[k].values == v) - self.idx_test = np.where(in_test)[0] - print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") - print(self.idx_test) + if isinstance(self.data, anndata.AnnData): + in_test = np.ones((self.data.obs.shape[0],), dtype=int) == 1 + for k, v in test_split.items(): + if isinstance(v, list): + in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) + else: + in_test = np.logical_and(in_test, self.data.obs[k].values == v) + self.idx_test = np.where(in_test)[0] + print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") + print(self.idx_test) + else: + assert len(test_split.values()) == 1 + self.idx_test = self.data.subset_cells_idx_global(attr_key=list(test_split.keys())[0], + values=list(test_split.values())[0]) else: raise ValueError("type of test_split %s not recognized" % type(test_split)) idx_train_eval = np.array([x for x in all_idx if x not in self.idx_test]) @@ -431,10 +428,6 @@ def train( if not len(self.idx_train): raise ValueError("The train dataset is empty.") - self.obs_train = self.data.obs.iloc[self.idx_train, :].copy() - self.obs_eval = self.data.obs.iloc[self.idx_eval, :].copy() - self.obs_test = self.data.obs.iloc[self.idx_test, :].copy() - self._compile_models(optimizer=optim) train_dataset = self._get_dataset( idx=self.idx_train, @@ -472,6 +465,10 @@ def get_citations(self): """ raise NotImplementedError() + @property + def using_store(self) -> bool: + return isinstance(self.data, DistributedStore) + class EstimatorKerasEmbedding(EstimatorKeras): """ @@ -483,21 +480,15 @@ def __init__( data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - organism: Union[str, None], - organ: Union[str, None], - model_type: Union[str, None], - model_topology: Union[str, None], + model_topology: TopologyContainer, weights_md5: Union[str, None] = None, cache_path: str = os.path.join('cache', '') ): super(EstimatorKerasEmbedding, self).__init__( data=data, model_dir=model_dir, - model_id=model_id, model_class="embedding", - organism=organism, - organ=organ, - model_type=model_type, + model_id=model_id, model_topology=model_topology, weights_md5=weights_md5, cache_path=cache_path @@ -524,7 +515,7 @@ def init_model( elif self.model_type == 'vaevamp': from sfaira.models.embedding import ModelVaeVampVersioned as Model else: - raise ValueError('unknown model type %s for EstimatorKerasEmbedding' % self.model_type) + raise ValueError(f'unknown model type {self.model_type} for EstimatorKerasEmbedding') self.model = Model( topology_container=self.topology_container, override_hyperpar=override_hyperpar @@ -533,8 +524,8 @@ def init_model( @staticmethod def _get_output_dim(n_features, model_type, mode='train'): if mode == 'predict': # Output shape is same for predict mode regardless of model type - output_types = (tf.float32, tf.float32) - output_shapes = (n_features, ()) + output_types = (tf.float32, tf.float32), + output_shapes = (n_features, ()), elif model_type == "vae": output_types = ((tf.float32, tf.float32), (tf.float32, tf.float32)) output_shapes = ((n_features, ()), (n_features, ())) @@ -569,24 +560,46 @@ def _get_dataset( idx = np.arange(0, self.data.n_obs) if mode in ['train', 'train_val', 'eval', 'predict']: + def generator_helper(x_sample): + sf_sample = prepare_sf(x=x_sample)[0] + if mode == 'predict': + return (x_sample, sf_sample), + elif model_type == "vae": + return (x_sample, sf_sample), (x_sample, sf_sample) + else: + return (x_sample, sf_sample), x_sample + # Prepare data reading according to whether anndata is backed or not: - x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + if self.using_store: + generator_raw = self.data.generator( + batch_size=1, + obs_keys=[], + continuous_batches=True, + ) - def generator(): - is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) - indices = idx if self.data.isbacked else range(x.shape[0]) - for i in indices: - x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() - sf = self._prepare_sf(x=x_sample)[0] - if mode == 'predict': # If predicting, only return X regardless of model type - yield x_sample, sf - elif model_type == "vae": - yield (x_sample, sf), (x_sample, sf) - else: - yield (x_sample, sf), x_sample + def generator(): + counter = -1 + for z in generator_raw: + counter += 1 + if counter in idx: + x_sample = z[0].toarray().flatten() + yield generator_helper(x_sample=x_sample) + + n_features = self.data.n_vars + n_samples = self.data.n_obs + else: + x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + + def generator(): + is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) + indices = idx if self.data.isbacked else range(x.shape[0]) + for i in indices: + x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() + yield generator_helper(x_sample=x_sample) + + n_features = x.shape[1] + n_samples = x.shape[0] - n_features = x.shape[1] - n_samples = x.shape[0] output_types, output_shapes = self._get_output_dim(n_features, model_type, mode=mode) dataset = tf.data.Dataset.from_generator( @@ -607,30 +620,47 @@ def generator(): elif mode == 'gradient_method': # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: + cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) + if self.using_store: + n_features = self.data.n_vars + generator_raw = self.data.generator( + batch_size=1, + obs_keys=["cell_ontology_class"], + continuous_batches=True, + ) + + def generator(): + counter = -1 + for z in generator_raw: + counter += 1 + if counter in idx: + x_sample = z[0].toarray().flatten() + sf_sample = prepare_sf(x=x_sample)[0] + y_sample = z[1]["cell_ontology_class"].values[0] + yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) + + elif isinstance(self.data, anndata.AnnData) and self.data.isbacked: n_features = self.data.X.shape[1] - cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) - output_types, output_shapes = self._get_output_dim(n_features, 'vae') def generator(): sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) for i in idx: - x = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() - sf = self._prepare_sf(x=x)[0] - y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][i] - yield (x, sf), (x, cell_to_class[y]) + x_sample = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() + sf_sample = prepare_sf(x=x_sample)[0] + y_sample = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][i] + yield (x_sample, sf_sample), (x, cell_to_class[y_sample]) else: x = self._prepare_data_matrix(idx=idx) - sf = self._prepare_sf(x=x) - cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) - y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][idx] # for gradients per celltype in compute_gradients_input() + sf = prepare_sf(x=x) + y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][idx] + # for gradients per celltype in compute_gradients_input() n_features = x.shape[1] - output_types, output_shapes = self._get_output_dim(n_features, 'vae') def generator(): for i in range(x.shape[0]): yield (x[i, :].toarray().flatten(), sf[i]), (x[i, :].toarray().flatten(), cell_to_class[y[i]]) + output_types, output_shapes = self._get_output_dim(n_features, 'vae') dataset = tf.data.Dataset.from_generator( generator=generator, output_types=output_types, @@ -745,9 +775,7 @@ def predict(self): batch_size=64, mode='predict' ) - return self.model.predict_reconstructed( - x=x - ) + return self.model.predict_reconstructed(x=x) else: return np.array([]) @@ -880,17 +908,14 @@ class EstimatorKerasCelltype(EstimatorKeras): Estimator class for the cell type model. """ - celltypes_version: CelltypeUniverse + celltype_universe: CelltypeUniverse def __init__( self, data: Union[anndata.AnnData, np.ndarray], model_dir: Union[str, None], model_id: Union[str, None], - organism: Union[str, None], - organ: Union[str, None], - model_type: Union[str, None], - model_topology: Union[str, None], + model_topology: TopologyContainer, weights_md5: Union[str, None] = None, cache_path: str = os.path.join('cache', ''), max_class_weight: float = 1e3 @@ -898,17 +923,21 @@ def __init__( super(EstimatorKerasCelltype, self).__init__( data=data, model_dir=model_dir, - model_id=model_id, model_class="celltype", - organism=organism, - organ=organ, - model_type=model_type, + model_id=model_id, model_topology=model_topology, weights_md5=weights_md5, cache_path=cache_path ) + assert "cl" in self.topology_container.output.keys(), self.topology_container.output.keys() + assert "targets" in self.topology_container.output.keys(), self.topology_container.output.keys() self.max_class_weight = max_class_weight - self.celltypes_version = CelltypeUniverse(organism=organism) + self.celltype_universe = CelltypeUniverse( + cl=OntologyCelltypes(branch=self.topology_container.output["cl"]), + uberon=OCS.organ, + organism=self.organism, + ) + self.celltype_universe.target_universe = self.topology_container.output["targets"] def init_model( self, @@ -928,27 +957,39 @@ def init_model( raise ValueError('unknown topology %s for EstimatorKerasCelltype' % self.model_type) self.model = Model( - celltypes_version=self.celltypes_version, + celltypes_version=self.celltype_universe, topology_container=self.topology_container, override_hyperpar=override_hyperpar ) @property def ids(self): - return self.celltypes_version.target_universe + return self.celltype_universe.target_universe @property def ntypes(self): - return self.celltypes_version.ntypes + return self.celltype_universe.ntypes @property def ontology_ids(self): - return self.celltypes_version.target_universe + return self.celltype_universe.target_universe + + def _one_hot_encoder(self): + + def encoder(x): + idx = self.celltype_universe.map_to_target_leaves( + nodes=[x], + return_type="idx" + )[0] + y = np.zeros((self.ntypes,), dtype="float32") + y[idx] = 1. / len(idx) + return y + + return encoder def _get_celltype_out( self, idx: Union[np.ndarray, None], - lookup_ontology="names" ): """ Build one hot encoded cell type output tensor and observation-wise weight matrix. @@ -959,20 +1000,11 @@ def _get_celltype_out( if idx is None: idx = np.arange(0, self.data.n_obs) # One whether "unknown" is already included, otherwise add one extra column. - if np.any([x.lower() == "unknown" for x in self.ids]): - type_classes = self.ntypes - else: - type_classes = self.ntypes + 1 - y = np.zeros((len(idx), type_classes), dtype="float32") - celltype_idx = self.model.celltypes_version.map_to_target_leaves( - nodes=self.data.obs[self._adata_ids_sfaira.cell_ontology_class].values[idx].tolist(), - ontology="custom", - ontology_id=lookup_ontology, - return_type="idx" - ) - for i, x in enumerate(celltype_idx): - # Distribute probability mass uniformly across classes if multiple classes match: - y[i, x] = 1. / len(x) + onehot_encoder = self._one_hot_encoder() + y = np.concatenate([ + np.expand_dims(onehot_encoder(z), axis=0) + for z in self.data.obs[self._adata_ids_sfaira.cell_ontology_class].values[idx].tolist() + ], axis=0) # Distribute aggregated class weight for computation of weights: freq = np.mean(y / np.sum(y, axis=1, keepdims=True), axis=0, keepdims=True) weights = 1. / np.matmul(y, freq.T) # observation wise weight matrix @@ -1001,94 +1033,130 @@ def _get_dataset( :param weighted: Whether to use weights. :return: """ - if mode == 'train' or mode == 'train_val': - weights, y = self._get_celltype_out(idx=idx) - if not weighted: - weights = np.ones_like(weights) - - if self.data.isbacked: - n_features = self.data.X.shape[1] - - def generator(): - sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) - for i, ii in enumerate(idx): - x = self.data.X[ii, :].toarray().flatten() if sparse else self.data.X[ii, :].flatten() - yield x, y[i, :], weights[i] - else: - x = self._prepare_data_matrix(idx=idx) - n_features = x.shape[1] + if self.using_store: + if weighted: + raise ValueError("using weights with store is not supported yet") + n_obs = self.data.n_obs + n_features = self.data.n_vars + n_labels = len(self.data.celltypes_universe.target_universe) + generator_raw = self.data.generator( + batch_size=1, + obs_keys=["cell_ontology_class"], + continuous_batches=True, + ) + onehot_encoder = self._one_hot_encoder() - def generator(): - for i, ii in enumerate(idx): - yield x[i, :].toarray().flatten(), y[i, :], weights[i] + def generator(): + counter = -1 + for z in generator_raw: + counter += 1 + if counter in idx: + x_sample = z[0].toarray().flatten() + y = onehot_encoder(z[0]["cell_ontology_class"].values[0]) + yield x_sample, y, 1. dataset = tf.data.Dataset.from_generator( generator=generator, output_types=(tf.float32, tf.float32, tf.float32), output_shapes=( (tf.TensorShape([n_features])), - tf.TensorShape([y.shape[1]]), + tf.TensorShape([n_labels]), tf.TensorShape([]) ) ) - if mode == 'train': + if mode == 'train' or mode == 'train_val': dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(x.shape[0], shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True - ).batch(batch_size).prefetch(prefetch) + dataset = dataset.shuffle( + buffer_size=min(n_obs, shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True + ) + dataset = dataset.batch(batch_size).prefetch(prefetch) return dataset + else: + if mode != 'predict': + weights, y = self._get_celltype_out(idx=idx) + if not weighted: + weights = np.ones_like(weights) + if mode == 'train' or mode == 'train_val': + if isinstance(self.data, anndata.AnnData) and self.data.isbacked: + n_features = self.data.X.shape[1] + n_labels = y.shape[1] + + def generator(): + sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) + for i, ii in enumerate(idx): + x = self.data.X[ii, :].toarray().flatten() if sparse else self.data.X[ii, :].flatten() + yield x, y[i, :], weights[i] + else: + x = self._prepare_data_matrix(idx=idx) + n_features = x.shape[1] + n_labels = y.shape[1] + + def generator(): + for i, ii in enumerate(idx): + yield x[i, :].toarray().flatten(), y[i, :], weights[i] + + dataset = tf.data.Dataset.from_generator( + generator=generator, + output_types=(tf.float32, tf.float32, tf.float32), + output_shapes=( + (tf.TensorShape([n_features])), + tf.TensorShape([n_labels]), + tf.TensorShape([]) + ) + ) + if mode == 'train': + dataset = dataset.repeat() + dataset = dataset.shuffle( + buffer_size=min(x.shape[0], shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True + ).batch(batch_size).prefetch(prefetch) + + return dataset + + elif mode == 'eval': + # Prepare data reading according to whether anndata is backed or not: + if isinstance(self.data, anndata.AnnData) and self.data.isbacked: + # Need to supply sorted indices to backed anndata: + x = self.data.X[np.sort(idx), :] + # Sort back in original order of indices. + x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] + else: + x = self._prepare_data_matrix(idx=idx) + x = x.toarray() + + return x, y, weights + + elif mode == 'predict': + # Prepare data reading according to whether anndata is backed or not: + if self.data.isbacked: + # Need to supply sorted indices to backed anndata: + x = self.data.X[np.sort(idx), :] + # Sort back in original order of indices. + x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] + else: + x = self._prepare_data_matrix(idx=idx) + x = x.toarray() - elif mode == 'eval': - weights, y = self._get_celltype_out(idx=idx) - if not weighted: - weights = np.ones_like(weights) - - # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: - # Need to supply sorted indices to backed anndata: - x = self.data.X[np.sort(idx), :] - # Sort back in original order of indices. - x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] - else: - x = self._prepare_data_matrix(idx=idx) - x = x.toarray() - - return x, y, weights + return x, None, None - elif mode == 'predict': - # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: - # Need to supply sorted indices to backed anndata: - x = self.data.X[np.sort(idx), :] - # Sort back in original order of indices. - x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] else: - x = self._prepare_data_matrix(idx=idx) - x = x.toarray() - - return x, None, None - - else: - raise ValueError(f'Mode {mode} not recognised. Should be "train", "eval" or" predict"') + raise ValueError(f'Mode {mode} not recognised. Should be "train", "eval" or" predict"') def _get_loss(self): return LossCrossentropyAgg() def _metrics(self): - if np.any([x.lower() == "unknown" for x in self.ids]): - ntypes = self.ntypes - else: - ntypes = self.ntypes + 1 return [ "accuracy", custom_cce_agg, CustomAccAgg(), - CustomF1Classwise(k=ntypes), - CustomFprClasswise(k=ntypes), - CustomTprClasswise(k=ntypes) + CustomF1Classwise(k=self.ntypes), + CustomFprClasswise(k=self.ntypes), + CustomTprClasswise(k=self.ntypes) ] def predict(self): diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 3f047416a..1267b153f 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -9,19 +9,17 @@ from sfaira.versions.metadata import CelltypeUniverse from sfaira.consts import OntologyContainerSfaira -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer class ModelZoo(abc.ABC): """ Model ontology base class. """ - topology_container: Topologies + topology_container: TopologyContainer ontology: dict model_id: Union[str, None] model_class: Union[str, None] - organism: Union[str, None] - organ: Union[str, None] model_class: Union[str, None] model_type: Union[str, None] model_topology: Union[str, None] @@ -40,8 +38,6 @@ def __init__( self.ontology = self.load_ontology_from_model_ids(model_lookuptable['model_id'].values) self.model_id = None self.model_class = None - self.organism = None - self.organ = None self.model_type = None self.organisation = None self.model_topology = None @@ -79,24 +75,16 @@ def set_model_id( :param model_id: Model ID to set. Format: pipeline_genome_organ_model_organisation_topology_version """ - if len(model_id.split('_')) < 7: - raise RuntimeError(f'Model ID {model_id} is invalid! Must follow the format: pipeline_genome_organ_model_organisation_topology_version') + if len(model_id.split('_')) < 6: + raise RuntimeError(f'Model ID {model_id} is invalid!') self.model_id = model_id ixs = self.model_id.split('_') self.model_class = ixs[0] - self.organism = ixs[1] - self.organ = ixs[2] - self.model_type = ixs[3] - self.organisation = ixs[4] - self.model_topology = ixs[5] - self.model_version = ixs[6] - - self.topology_container = Topologies( - organism=self.organism, - model_class=self.model_class, - model_type=self.model_type, - topology_id=self.model_topology - ) + self.model_id = ixs[1] + self.model_type = ixs[2] + self.organisation = ixs[3] + self.model_topology = ixs[4] + self.model_version = ixs[5] def save_weights_to_remote(self, path=None): """ @@ -123,99 +111,35 @@ def call_kipoi(self): :return: Predictions """ - return kipoi.get_model( - self.model_id, - source='kipoi_experimental', - with_dataloader=True - ) # TODO make sure that this is in line with kipoi_experimental model names - # alternatively: - # return kipoi_experimental.get_model("https://github.com/kipoi/models/tree/7d3ea7800184de414aac16811deba6c8eefef2b6/pwm_HOCOMOCO/human/CTCF", - # source='github-permalink') - - def organism(self) -> List[str]: - """ - Return list of available organism. - - :return: List of organism available. - """ - return self.ontology.keys() - - def organs( - self, - organism: str - ) -> List[str]: - """ - Return list of available organs for a given organism. - - :param organism: Identifier of organism to show organs for. - :return: List of organs available. - """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - return self.ontology[organism].keys() + raise NotImplementedError() - def models( - self, - organism: str, - organ: str - ) -> List[str]: + def models(self) -> List[str]: """ - Return list of available models for a given organism, organ. + Return list of available models. - :param organism: Identifier of organism to show organs for. - :param organ: Identifier of organ to show versions for. :return: List of models available. """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - return self.ontology[organism][organ].keys() - - def organisation( - self, - organism: str, - organ: str, - model_type: str - ) -> List[str]: - """ - Return list of available organisation that trained a given model for a given organism and organ - - :param organism: Identifier of organism to show versions for. - :param organ: Identifier of organ to show versions for. - :param model_type: Identifier of model to show versions for. - :return: List of versions available. - """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" - return self.ontology[organism][organ][model_type] + return self.ontology.keys() def topology( self, - organism: str, - organ: str, model_type: str, organisation: str ) -> List[str]: """ - Return list of available model topologies that trained by a given organisation, - a given model for a given organism and organ + Return list of available model topologies that trained by a given organisation, and a given model - :param organism: Identifier of organism to show versions for. - :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :return: List of versions available. """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[organism][organ][model_type].keys(), \ + assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[model_type].keys(), \ "organisation requested was not found in ontology" - return self.ontology[organism][organ][model_type][organisation] + return self.ontology[model_type][organisation] def versions( self, - organism: str, - organ: str, model_type: str, organisation: str, model_topology: str @@ -223,33 +147,17 @@ def versions( """ Return list of available model versions of a given organisation for a given organism and organ and model. - :param organism: Identifier of organism to show versions for. - :param organ: Identifier of organ to show versions for. :param model_type: Identifier of model_type to show versions for. :param organisation: Identifier of organisation to show versions for. :param model_topology: Identifier of model_topology to show versions for. :return: List of versions available. """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[organism][organ][model_type].keys(), \ + assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[model_type][organisation].keys(), \ "model_topology requested was not found in ontology" - return self.ontology[organism][organ][model_type][organisation][model_topology] - - @property - def genome(self): - return self.model_hyperparameters["genome"] - - @property - def gene_names(self): - return self.topology_container.genome_container.names - - @property - def ensemble_names(self): - return self.topology_container.genome_container.ensembl + return self.ontology[model_type][organisation][model_topology] @property def model_hyperparameters(self) -> dict: @@ -258,6 +166,7 @@ def model_hyperparameters(self) -> dict: class ModelZooEmbedding(ModelZoo): + """ The supported model ontology is: @@ -279,37 +188,27 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'embedding'] id_df = pd.DataFrame( - [i.split('_')[1:7] for i in ids], - columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + [i.split('_')[1:6] for i in ids], + columns=['id', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - organism = np.unique(id_df['organism']) - ontology = dict.fromkeys(organism) - for g in organism: - id_df_g = id_df[id_df.organism == g] - organ = np.unique(id_df_g['organ']) - ontology[g] = dict.fromkeys(organ) - for o in organ: - id_df_o = id_df_g[id_df_g.organ == o] - model = np.unique(id_df_o['model_type']) - ontology[g][o] = dict.fromkeys(model) - for m in model: - id_df_m = id_df_o[id_df_o.model_type == m] - orga = np.unique(id_df_m['organisation']) - ontology[g][o][m] = dict.fromkeys(orga) - for org in orga: - id_df_org = id_df_m[id_df_m.organisation == org] - topo = np.unique(id_df_org['model_topology']) - ontology[g][o][m][org] = dict.fromkeys(topo) - for t in topo: - id_df_t = id_df_org[id_df_org.model_topology == t] - ontology[g][o][m][org][t] = id_df_t.model_version.tolist() + model = np.unique(id_df['model_type']) + ontology = dict.fromkeys(model) + for m in model: + id_df_m = id_df[id_df.model_type == m] + orga = np.unique(id_df_m['organisation']) + ontology[m] = dict.fromkeys(orga) + for org in orga: + id_df_org = id_df_m[id_df_m.organisation == org] + topo = np.unique(id_df_org['model_topology']) + ontology[m][org] = dict.fromkeys(topo) + for t in topo: + id_df_t = id_df_org[id_df_org.model_topology == t] + ontology[m][org][t] = id_df_t.model_version.tolist() return ontology def set_latest( self, - organism: str, - organ: str, model_type: str, organisation: str, model_topology: str @@ -317,30 +216,22 @@ def set_latest( """ Set model ID to latest model in given ontology group. - :param organism: Identifier of organism to select. - :param organ: Identifier of organ to select. :param model_type: Identifier of model_type to select. :param organisation: Identifier of organisation to select. :param model_topology: Identifier of model_topology to select :return: """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[organism][organ][model_type].keys(), \ + assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - organism=organism, - organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.organism = organism - self.organ = organ self.model_type = model_type self.organisation = organisation self.model_topology = model_topology # set to model for now, could be organism/organ specific later @@ -348,19 +239,12 @@ def set_latest( self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'embedding', - self.organism, - self.organ, + self.id, self.model_type, self.organisation, self.model_topology, self.model_version ]) - self.topology_container = Topologies( - organism=self.organism, - model_class="embedding", - model_type=self.model_type, - topology_id=self.model_topology - ) class ModelZooCelltype(ModelZoo): @@ -388,37 +272,27 @@ def load_ontology_from_model_ids( ids = [i for i in model_ids if i.split('_')[0] == 'celltype'] id_df = pd.DataFrame( - [i.split('_')[1:7] for i in ids], - columns=['organism', 'organ', 'model_type', 'organisation', 'model_topology', 'model_version'] + [i.split('_')[1:6] for i in ids], + columns=['id', 'model_type', 'organisation', 'model_topology', 'model_version'] ) - organism = np.unique(id_df['organism']) - ontology = dict.fromkeys(organism) - for g in organism: - id_df_g = id_df[id_df.organism == g] - organ = np.unique(id_df_g['organ']) - ontology[g] = dict.fromkeys(organ) - for o in organ: - id_df_o = id_df_g[id_df_g.organ == o] - model = np.unique(id_df_o['model_type']) - ontology[g][o] = dict.fromkeys(model) - for m in model: - id_df_m = id_df_o[id_df_o.model_type == m] - orga = np.unique(id_df_m['organisation']) - ontology[g][o][m] = dict.fromkeys(orga) - for org in orga: - id_df_org = id_df_m[id_df_m.organisation == org] - topo = np.unique(id_df_org['model_topology']) - ontology[g][o][m][org] = dict.fromkeys(topo) - for t in topo: - id_df_t = id_df_org[id_df_org.model_topology == t] - ontology[g][o][m][org][t] = id_df_t.model_version.tolist() + model = np.unique(id_df['model_type']) + ontology = dict.fromkeys(model) + for m in model: + id_df_m = id_df[id_df.model_type == m] + orga = np.unique(id_df_m['organisation']) + ontology[m] = dict.fromkeys(orga) + for org in orga: + id_df_org = id_df_m[id_df_m.organisation == org] + topo = np.unique(id_df_org['model_topology']) + ontology[m][org] = dict.fromkeys(topo) + for t in topo: + id_df_t = id_df_org[id_df_org.model_topology == t] + ontology[m][org][t] = id_df_t.model_version.tolist() return ontology def set_latest( self, - organism: str, - organ: str, model_type: str, organisation: str, model_topology: str @@ -433,24 +307,18 @@ def set_latest( :param model_topology: Identifier of model_topology to select :return: """ - assert organism in self.ontology.keys(), "organism requested was not found in ontology" - assert organ in self.ontology[organism].keys(), "organ requested was not found in ontology" - assert model_type in self.ontology[organism][organ].keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[organism][organ][model_type].keys(), \ + assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" + assert organisation in self.ontology[model_type].keys(), \ "organisation requested was not found in ontology" - assert model_topology in self.ontology[organism][organ][model_type][organisation].keys(), \ + assert model_topology in self.ontology[model_type][organisation].keys(), \ "model_topology requested was not found in ontology" versions = self.versions( - organism=organism, - organ=organ, model_type=model_type, organisation=organisation, model_topology=model_topology ) - self.organism = organism - self.organ = organ self.model_type = model_type self.organisation = organisation self.model_topology = model_topology # set to model for now, could be organism/organ specific later @@ -458,21 +326,9 @@ def set_latest( self.model_version = self._order_versions(versions=versions)[0] self.model_id = '_'.join([ 'celltype', - self.organism, - self.organ, + self.id, self.model_type, self.organisation, self.model_topology, self.model_version ]) - self.topology_container = Topologies( - organism=self.organism, - model_class="celltype", - model_type=self.model_type, - topology_id=self.model_topology - ) - self.celltypes = CelltypeUniverse( - cl=self._ontology_container_sfaira.cellontology_class, - uberon=self._ontology_container_sfaira.organ, - organism=self.organism - ).load_target_universe(organ=self.organ) diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index 523f37c3c..a1e6c5470 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -6,7 +6,7 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -102,7 +102,7 @@ class CellTypeMarkerVersioned(CellTypeMarker): def __init__( self, celltypes_version: CelltypeUniverse, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): """ @@ -114,20 +114,19 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - unkown_already_included = np.any([x.lower() == "unknown" for x in celltypes_version.target_universe]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, - out_dim=celltypes_version.ntypes if unkown_already_included else celltypes_version.ntypes + 1, + in_dim=topology_container.n_var, + out_dim=celltypes_version.ntypes, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "celltype" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index f8a38f8e1..7586e76d6 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -6,7 +6,7 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -77,7 +77,7 @@ class CellTypeMlpVersioned(CellTypeMlp): def __init__( self, celltypes_version: CelltypeUniverse, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): """ @@ -89,20 +89,19 @@ def __init__( dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. """ # Get cell type version instance based on topology ID, organism and organ. - unkown_already_included = np.any([x.lower() == "unknown" for x in celltypes_version.target_universe]) hyperpar = topology_container.topology["hyper_parameters"] if override_hyperpar is not None: for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, - out_dim=celltypes_version.ntypes if unkown_already_included else celltypes_version.ntypes + 1, + in_dim=topology_container.n_var, + out_dim=celltypes_version.ntypes, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "celltype" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index 099a385ec..af719740f 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -188,7 +188,7 @@ def __init__( output_decoder_expfamily_concat = tf.keras.layers.Concatenate(axis=1, name="neg_ll")(output_decoder_expfamily) self.encoder_model = tf.keras.Model( - inputs=inputs_encoder, + inputs=[inputs_encoder, inputs_sf], outputs=output_encoder, name="encoder_model" ) @@ -198,10 +198,10 @@ def __init__( name="autoencoder" ) - def predict_reconstructed(self, x: np.ndarray): + def predict_reconstructed(self, x): return np.split(self.training_model.predict(x), indices_or_sections=2, axis=1)[0] - def predict_embedding(self, x: np.ndarray, variational=False): + def predict_embedding(self, x, variational=False): if variational: raise ValueError("Cannot predict variational embedding on AE model.topo") return self.encoder_model.predict(x) @@ -210,7 +210,7 @@ def predict_embedding(self, x: np.ndarray, variational=False): class ModelAeVersioned(ModelAe): def __init__( self, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): hyperpar = topology_container.topology["hyper_parameters"] @@ -218,13 +218,13 @@ def __init__( for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, + in_dim=topology_container.n_var, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "embedding" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 423cf915b..140b72f9e 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -85,7 +85,7 @@ def __init__( output_decoder_expfamily_concat = tf.keras.layers.Concatenate(axis=1, name="neg_ll")(output_decoder_expfamily) self.encoder_model = tf.keras.Model( - inputs=inputs_encoder, + inputs=[inputs_encoder, inputs_sf], outputs=output_encoder, name="encoder" ) @@ -95,17 +95,17 @@ def __init__( name="autoencoder" ) - def predict_reconstructed(self, x: np.ndarray): + def predict_reconstructed(self, x): return np.split(self.training_model.predict(x), indices_or_sections=2, axis=1)[0] - def predict_embedding(self, x: np.ndarray, **kwargs): + def predict_embedding(self, x, **kwargs): return self.encoder_model.predict(x) class ModelLinearVersioned(ModelLinear): def __init__( self, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): hyperpar = topology_container.topology["hyper_parameters"] @@ -113,13 +113,13 @@ def __init__( for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, + in_dim=topology_container.n_var, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "embedding" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index 1c36084ec..7e379e567 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -212,7 +212,7 @@ def __init__( output_decoder_expfamily_concat = tf.keras.layers.Concatenate(axis=1, name="neg_ll")(output_decoder_expfamily) self.encoder_model = tf.keras.Model( - inputs=inputs_encoder, + inputs=[inputs_encoder, inputs_sf], outputs=[z, z_mean, z_log_var], name="encoder_model" ) @@ -222,7 +222,7 @@ def __init__( name="autoencoder" ) - def predict_embedding(self, x: np.ndarray, variational=False): + def predict_embedding(self, x, variational=False): if variational: return self.encoder_model.predict(x) else: @@ -232,7 +232,7 @@ def predict_embedding(self, x: np.ndarray, variational=False): class ModelVaeVersioned(ModelVae): def __init__( self, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): hyperpar = topology_container.topology["hyper_parameters"] @@ -240,13 +240,13 @@ def __init__( for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, + in_dim=topology_container.n_var, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "embedding" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 55530e132..4ee2e5c6f 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput from sfaira.models.made import MaskingDense @@ -319,7 +319,7 @@ def __init__( output_decoder_expfamily_concat = tf.keras.layers.Concatenate(axis=1, name="neg_ll")(output_decoder_expfamily) self.encoder_model = tf.keras.Model( - inputs=inputs_encoder, + inputs=[inputs_encoder, inputs_sf], outputs=[z_t, z_t_mean, z_0], name="encoder_model" ) @@ -329,10 +329,10 @@ def __init__( name="autoencoder" ) - def predict_reconstructed(self, x: np.ndarray): + def predict_reconstructed(self, x): return np.split(self.training_model.predict(x)[0], indices_or_sections=2, axis=1)[0] - def predict_embedding(self, x: np.ndarray, variational=False, return_z0=False): + def predict_embedding(self, x, variational=False, return_z0=False): if return_z0 and variational: z_t, z_t_mean, z_0 = self.encoder_model.predict(x) return z_t, z_t_mean, z_0 @@ -350,7 +350,7 @@ def predict_embedding(self, x: np.ndarray, variational=False, return_z0=False): class ModelVaeIAFVersioned(ModelVaeIAF): def __init__( self, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): hyperpar = topology_container.topology["hyper_parameters"] @@ -358,13 +358,13 @@ def __init__( for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, + in_dim=topology_container.n_var, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "embedding" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index fbd0fc579..fd06ce0e8 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -7,7 +7,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput -from sfaira.versions.topologies import Topologies +from sfaira.versions.topologies import TopologyContainer from sfaira.models.base import BasicModel from sfaira.models.pp_layer import PreprocInput @@ -274,7 +274,7 @@ def __init__( z_log_var = tf.keras.layers.Concatenate(axis=1, name="z_log_var")([q_z1_log_var, q_z2_log_var]) self.encoder_model = tf.keras.Model( - inputs=inputs_encoder, + inputs=[inputs_encoder, inputs_sf], outputs=[z, z_mean, z_log_var], name="encoder_model" ) @@ -284,7 +284,7 @@ def __init__( name="autoencoder" ) - def predict_embedding(self, x: np.ndarray, variational=False): + def predict_embedding(self, x, variational=False): if variational: return self.encoder_model.predict(x) else: @@ -294,7 +294,7 @@ def predict_embedding(self, x: np.ndarray, variational=False): class ModelVaeVampVersioned(ModelVaeVamp): def __init__( self, - topology_container: Topologies, + topology_container: TopologyContainer, override_hyperpar: Union[dict, None] = None ): hyperpar = topology_container.topology["hyper_parameters"] @@ -302,14 +302,14 @@ def __init__( for k in list(override_hyperpar.keys()): hyperpar[k] = override_hyperpar[k] super().__init__( - in_dim=topology_container.ngenes, + in_dim=topology_container.n_var, **hyperpar ) print('passed hyperpar: \n', hyperpar) self._topology_id = topology_container.topology_id - self.genome_size = topology_container.ngenes - self.model_class = topology_container.model_class + self.genome_size = topology_container.n_var + self.model_class = "embedding" self.model_type = topology_container.model_type self.hyperparam = dict( list(hyperpar.items()) + # noqa: W504 diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index 5d554d6d1..4462e99b1 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,53 +5,39 @@ import pickle from typing import Union -from sfaira.data import Universe -from sfaira.estimators import EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.data import DistributedStore, Universe +from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.interface import ModelZooEmbedding, ModelZooCelltype class TrainModel: + data: Union[anndata.AnnData, DistributedStore] + estimator: EstimatorKeras + def __init__( self, - config_path: str, - data_path: str, - meta_path: str, - cache_path: str, + data: Union[str, anndata.AnnData, Universe, DistributedStore], ): # Check if handling backed anndata or base path to directory of raw files: - if data_path.split(".")[-1] == "h5ad": - self.data = anndata.read(data_path, backed='r') + if isinstance(data, str) and data.split(".")[-1] == "h5ad": + self.data = anndata.read(data, backed='r') if len(self.data.obs.columns) == 0: - fn_backed_obs = ".".join(data_path.split(".")[:-1]) + "_obs.csv" + fn_backed_obs = ".".join(data.split(".")[:-1]) + "_obs.csv" self.data.obs = pd.read_csv(fn_backed_obs) + elif isinstance(data, anndata.AnnData): + self.data = data + elif isinstance(data, Universe): + self.data = data.adata + elif isinstance(data, DistributedStore): + self.data = data else: - dataset = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dataset.load_config(config_path) - self.set_data(dataset) - - @abc.abstractmethod - def set_data(self, dataset): - pass + raise ValueError(f"did not recongize data of type {type(data)}") @abc.abstractmethod def init_estim(self): pass - @property - def adata(self): - """ - Get adata object depending on whether backed or a property of a container class. - - :return: - """ - if self.data is None: - raise ValueError("self.data not set yet") - elif isinstance(self.data, anndata.AnnData): - return self.data - else: - raise ValueError(f"self.data type not recognized: {type(self.data)}") - @abc.abstractmethod def _save_specific( self, @@ -84,35 +70,27 @@ def save( class TrainModelEmbedding(TrainModel): + estimator: EstimatorKerasEmbedding + def __init__( self, - config_path: str, - data_path: str, - meta_path: str, - cache_path: str, model_path: str, + data: Union[str, anndata.AnnData, Universe, DistributedStore], ): - super(TrainModelEmbedding, self).__init__(config_path=config_path, data_path=data_path, meta_path=meta_path, cache_path=cache_path) + super(TrainModelEmbedding, self).__init__(data=data) self.zoo = ModelZooEmbedding(model_lookuptable=None) self.estimator = None self.model_dir = model_path - def set_data(self, dataset): - dataset.load(match_to_reference=True) - self.data = dataset.adata - def init_estim( self, override_hyperpar: Union[dict, None] = None ): assert self.zoo.model_id is not None, "choose model in zoo first" self.estimator = EstimatorKerasEmbedding( - data=self.adata, + data=self.data, model_dir=self.model_dir, model_id=self.zoo.model_id, - organism=self.zoo.organism, - organ=self.zoo.organ, - model_type=self.zoo.model_type, model_topology=self.zoo.model_topology ) self.estimator.init_model(override_hyperpar=override_hyperpar) @@ -158,23 +136,19 @@ def _save_specific( class TrainModelCelltype(TrainModel): + estimator: EstimatorKerasCelltype + def __init__( self, - config_path: str, - data_path: str, - meta_path: str, - cache_path: str, model_path: str, + data: Union[str, anndata.AnnData, Universe, DistributedStore], + fn_target_universe: str, ): - super(TrainModelCelltype, self).__init__(config_path=config_path, data_path=data_path, meta_path=meta_path, cache_path=cache_path) + super(TrainModelCelltype, self).__init__(data=data) self.zoo = ModelZooCelltype(model_lookuptable=None) self.estimator = None self.model_dir = model_path - - def set_data(self, dataset): - dataset.subset("annotated", True) - dataset.load(match_to_reference=True) - self.data = dataset.adata + self.data.celltypes_universe.load_target_universe(fn=fn_target_universe) def init_estim( self, @@ -182,12 +156,9 @@ def init_estim( ): assert self.zoo.model_id is not None, "choose model in zoo first" self.estimator = EstimatorKerasCelltype( - data=self.adata, + data=self.data, model_dir=self.model_dir, model_id=self.zoo.model_id, - organism=self.zoo.organism, - organ=self.zoo.organ, - model_type=self.zoo.model_type, model_topology=self.zoo.model_topology ) self.estimator.init_model(override_hyperpar=override_hyperpar) @@ -240,12 +211,12 @@ def _save_specific( cell_counts_leaf = cell_counts.copy() for k in cell_counts.keys(): if k not in self.estimator.ids: - if k not in self.estimator.celltypes_version.ontology.node_ids: + if k not in self.estimator.celltype_universe.ontology.node_ids: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in self.estimator.celltypes_version.ontology.node_ids: + for leaf in self.estimator.celltype_universe.ontology.node_ids: if leaf not in cell_counts_leaf.keys(): cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1 / len(self.estimator.celltypes_version.ontology.node_ids) + cell_counts_leaf[leaf] += 1 / len(self.estimator.celltype_universe.ontology.node_ids) del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 72c563846..bb0e43ab3 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -95,7 +95,7 @@ def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_tobacked( + ds.write_backed( fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), genome=genome, shuffled=True, @@ -110,7 +110,7 @@ def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.load_tobacked( + ds.write_backed( fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), genome=genome, shuffled=False, diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index d3e59d778..01ccada5b 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -1,23 +1,50 @@ import abc import anndata import numpy as np -import tensorflow as tf from typing import Union -import unittest from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.versions.topologies import Topologies - - -class _TestEstimator: +from sfaira.versions.topologies import TopologyContainer + +GENES = ["ENSMUSG00000000003", "ENSMUSG00000000028"] +TARGETS = ["T cell", "stromal cell"] + +TOPOLOGY_EMBEDDING_MODEL = { + "model_type": None, + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["ensg", GENES], + }, + "output": {}, + "hyper_parameters": { + "latent_dim": None, + "l2_coef": 0., + "l1_coef": 0., + "output_layer": "nb_const_disp" + } +} + +TOPOLOGY_CELLTYPE_MODEL = { + "model_type": None, + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["ensg", GENES], + }, + "output": { + "cl": "v2021-02-01", + "targets": TARGETS + }, + "hyper_parameters": { + "latent_dim": None, + "l1_coef": 0., + "l2_coef": 0., + } +} + + +class TestEstimatorBase: estimator: Union[EstimatorKeras] data: Union[anndata.AnnData] - model: Union[tf.keras.models.Model, None] - topology_container: Union[Topologies, None] - optimizer: Union[str, None] - model_id: Union[str, None] - weights: Union[np.ndarray, None] - model_dir: Union[str, None] """ Contains functions _test* to test individual functions and attributes of estimator class. @@ -26,10 +53,6 @@ class _TestEstimator: basic_estimator_test(). See _test_call() for an example. """ - @abc.abstractmethod - def set_topology(self, model_type): - pass - def simulate(self): """ Simulate basic data example used for unit test. @@ -39,19 +62,17 @@ def simulate(self): :return: """ nobs = 100 - ngenes = self.topology_container.ngenes self.data = anndata.AnnData( - np.random.randint(low=0, high=100, size=(nobs, ngenes)).astype(np.float32) + np.random.randint(low=0, high=100, size=(nobs, len(GENES))).astype(np.float32) ) self.data.obs["cell_ontology_class"] = [ - ["vein endothelial cell", "glial cell"][np.random.randint(0, 2)] + TARGETS[np.random.randint(0, len(TARGETS))] for i in range(nobs) ] - self.data.var["ensembl"] = self.topology_container.genome_container.ensembl - self.data.var["names"] = self.topology_container.genome_container.names + self.data.var["ensembl"] = GENES @abc.abstractmethod - def init_estimator(self): + def init_estimator(self, model_type: str): """ Initialise target estimator as .estimator attribute. """ @@ -61,33 +82,31 @@ def init_estimator(self): def basic_estimator_test(self): pass - def _test_for_fatal(self): + def test_for_fatal(self, model_type): np.random.seed(1) self.simulate() - self.init_estimator() + self.init_estimator(model_type=model_type) self.basic_estimator_test() return True -class TestEstimatorKerasEmbedding(unittest.TestCase, _TestEstimator): +class TestEstimatorKerasEmbedding(TestEstimatorBase): - def set_topology(self, model_type): - self.topology_container = Topologies( - organism="mouse", - model_class="embedding", - model_type=model_type, - topology_id="0.1" - ) + estimator: EstimatorKerasEmbedding - def init_estimator(self): + def init_estimator(self, model_type): + topology = TOPOLOGY_EMBEDDING_MODEL.copy() + topology["model_type"] = model_type + if model_type == "linear": + topology["hyper_parameters"]["latent_dim"] = 2 + else: + topology["hyper_parameters"]["latent_dim"] = (len(GENES), 2, len(GENES)) + self.model_type = model_type self.estimator = EstimatorKerasEmbedding( data=self.data, model_dir=None, - model_id=None, - organism="mouse", - organ="lung", - model_type=self.topology_container.model_type, - model_topology=self.topology_container.topology_id + model_id="testid", + model_topology=TopologyContainer(topology=topology, topology_id="0.1") ) def basic_estimator_test(self): @@ -113,43 +132,27 @@ def basic_estimator_test(self): new_weights = self.estimator.model.training_model.get_weights() for i in range(len(weights)): assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) - if self.topology_container.model_type != 'vae': + if self.model_type != 'vae': assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) assert np.allclose(prediction_embed, new_prediction_embed, rtol=1e-6, atol=1e-6) - def test_for_fatal_vae(self): - self.set_topology(model_type="vae") - self._test_for_fatal() - - def test_for_fatal_ae(self): - self.set_topology(model_type="ae") - self._test_for_fatal() - - def test_for_fatal_linear(self): - self.set_topology(model_type="linear") - self._test_for_fatal() +class TestEstimatorKerasCelltype(TestEstimatorBase): -class TestEstimatorKerasCelltype(unittest.TestCase, _TestEstimator): - - def set_topology(self, model_type): - self.topology_container = Topologies( - organism="mouse", - model_class="celltype", - model_type=model_type, - topology_id="0.0.1" - ) + estimator: EstimatorKerasCelltype - def init_estimator(self): + def init_estimator(self, model_type: str): + topology = TOPOLOGY_CELLTYPE_MODEL.copy() + topology["model_type"] = model_type + topology["hyper_parameters"]["latent_dim"] = (len(GENES), 2) + self.model_type = model_type self.estimator = EstimatorKerasCelltype( data=self.data, model_dir=None, - model_id=None, - organism="mouse", - organ="lung", - model_type=self.topology_container.model_type, - model_topology=self.topology_container.topology_id + model_id="testid", + model_topology=TopologyContainer(topology=topology, topology_id="0.1"), ) + self.estimator.celltype_universe.target_universe = TARGETS def basic_estimator_test(self): self.estimator.init_model() @@ -175,14 +178,33 @@ def basic_estimator_test(self): assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) - def test_for_fatal_mlp(self): - self.set_topology(model_type="mlp") - self._test_for_fatal() - def test_for_fatal_marker(self): - self.set_topology(model_type="marker") - self._test_for_fatal() +# Test embedding models: + + +def test_for_fatal_linear(): + test_estim = TestEstimatorKerasEmbedding() + test_estim.test_for_fatal(model_type="linear") + + +def test_for_fatal_ae(): + test_estim = TestEstimatorKerasEmbedding() + test_estim.test_for_fatal(model_type="ae") + + +def test_for_fatal_vae(): + test_estim = TestEstimatorKerasEmbedding() + test_estim.test_for_fatal(model_type="vae") + + +# Test cell type predictor models: + + +def test_for_fatal_mlp(): + test_estim = TestEstimatorKerasCelltype() + test_estim.test_for_fatal(model_type="mlp") -if __name__ == '__main__': - unittest.main() +def test_for_fatal_marker(): + test_estim = TestEstimatorKerasCelltype() + test_estim.test_for_fatal(model_type="marker") diff --git a/sfaira/unit_tests/models/__init__.py b/sfaira/unit_tests/models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sfaira/unit_tests/models/test_models.py b/sfaira/unit_tests/models/test_models.py deleted file mode 100644 index a2762fd9d..000000000 --- a/sfaira/unit_tests/models/test_models.py +++ /dev/null @@ -1,236 +0,0 @@ -import abc -import numpy as np -import tensorflow as tf -import unittest - -from sfaira.estimators.losses import LossLoglikelihoodNb -from sfaira.estimators.metrics import custom_mse - -import sfaira.models as models -from sfaira.models.base import BasicModel - - -class _TestModel: - model: BasicModel - data: np.ndarray - - @abc.abstractmethod - def init_model(self): - """ - Initialise target model as .model attribute. - - :return: - """ - pass - - def simulate(self): - """ - Simulate basic data example used for unit test. - - Sets attribute .data with simulated data. - - :return: - """ - self.data = np.random.uniform(low=0, high=100, size=(1000, 100)).astype('float32') - self.sf = np.zeros((1000, 1)) - - -class TestModelAe(unittest.TestCase, _TestModel): - - def init_model(self): - tf.compat.v1.set_random_seed(0) - self.dataset = tf.data.Dataset.from_tensor_slices( - ((self.data, self.sf), self.data) - ) - self.model = models.embedding.ModelAe(in_dim=self.data.shape[1]) - - def compile_models(self): - self.model.training_model.compile( - optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=LossLoglikelihoodNb(), - metrics=[custom_mse] - ) - - def train(self): - self.model.training_model.fit( - self.dataset.repeat().batch(128), - epochs=2, steps_per_epoch=100 - ) - - def test_for_fatal(self): - print(tf.__version__) - np.random.seed(1) - self.simulate() - self.init_model() - self.compile_models() - self.train() - _ = self.model.training_model.evaluate(x=(self.data, self.sf), y=self.data) - embedding = self.model.predict_embedding(x=(self.data, self.sf)) - assert embedding.shape[0] == self.data.shape[0], embedding.shape - denoised = self.model.predict_reconstructed(x=(self.data, self.sf)) - assert denoised.shape == self.data.shape, (denoised.shape, self.data.shape) - return True - - -class TestModelVae(unittest.TestCase, _TestModel): - - def init_model(self): - # (_,_), (_,sf) is dummy for kl loss - self.dataset = tf.data.Dataset.from_tensor_slices( - ((self.data, self.sf), (self.data, self.sf)) - ) - self.model = models.embedding.ModelVae(in_dim=self.data.shape[1]) - tf.compat.v1.set_random_seed(0) - - def compile_models(self): - self.model.training_model.compile( - optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=LossLoglikelihoodNb(), - metrics=[custom_mse] - ) - - def train(self): - self.model.training_model.fit( - self.dataset.repeat().batch(128), - epochs=2, steps_per_epoch=100 - ) - - def test_for_fatal(self): - print(tf.__version__) - np.random.seed(1) - self.simulate() - self.init_model() - self.compile_models() - self.train() - # (_,_), (_,sf) is dummy for kl loss - _ = self.model.training_model.evaluate(x=(self.data, self.sf), y=(self.data, self.sf)) - embedding = self.model.predict_embedding(x=(self.data, self.sf)) - assert embedding.shape[0] == self.data.shape[0], embedding.shape - denoised = self.model.predict_reconstructed(x=(self.data, self.sf)) - assert denoised.shape == self.data.shape, (denoised.shape, self.data.shape) - return True - - -class TestModelLinear(unittest.TestCase, _TestModel): - - def init_model(self): - self.dataset = tf.data.Dataset.from_tensor_slices( - ((self.data, self.sf), self.data) - ) - self.model = models.embedding.ModelLinear( - in_dim=self.data.shape[1], - output_layer="nb_shared_disp" - ) - tf.compat.v1.set_random_seed(0) - - def compile_models(self): - self.model.training_model.compile( - optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=LossLoglikelihoodNb(), - metrics=[custom_mse] - ) - - def train(self): - self.model.training_model.fit( - self.dataset.repeat().batch(128), - epochs=2, steps_per_epoch=100 - ) - - def test_for_fatal(self): - print(tf.__version__) - np.random.seed(1) - self.simulate() - self.init_model() - self.compile_models() - self.train() - _ = self.model.training_model.evaluate(x=(self.data, self.sf), y=self.data) - embedding = self.model.predict_embedding(x=(self.data, self.sf)) - assert embedding.shape[0] == self.data.shape[0], (embedding.shape, self.data.shape) - denoised = self.model.predict_reconstructed(x=(self.data, self.sf)) - assert denoised.shape == self.data.shape, (denoised.shape, self.data.shape) - return True - - -class TestCelltypeMlp(unittest.TestCase, _TestModel): - - def init_model(self): - self.out_dim = 20 - self.dataset = tf.data.Dataset.from_tensor_slices( - (self.data, np.ones((self.data.shape[0], self.out_dim))) - ) - self.model = models.celltype.CellTypeMlp( - in_dim=self.data.shape[1], - out_dim=self.out_dim, - units=[30] - ) - - def compile_model(self): - self.model.training_model.compile( - optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=tf.keras.losses.CategoricalCrossentropy(), - metrics=["accuracy"] - ) - - def train(self): - train_dataset = self.dataset - train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) - self.model.training_model.fit(train_dataset, epochs=5, steps_per_epoch=2) - - def test_for_fatal(self): - print(tf.__version__) - np.random.seed(1) - self.simulate() - self.init_model() - self.compile_model() - self.train() - self.model.training_model.evaluate( - x=self.data, y=np.ones((self.data.shape[0], self.out_dim)) - ) - self.model.training_model.predict( - x=self.data - ) - return True - - -class TestCelltypeMarker(unittest.TestCase, _TestModel): - - def init_model(self): - self.out_dim = 20 - self.dataset = tf.data.Dataset.from_tensor_slices( - (self.data, np.ones((self.data.shape[0], self.out_dim))) - ) - self.model = models.celltype.CellTypeMarker( - in_dim=self.data.shape[1], - out_dim=self.out_dim, - ) - - def compile_model(self): - self.model.training_model.compile( - optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), - loss=tf.keras.losses.CategoricalCrossentropy(), - metrics=["accuracy"] - ) - - def train(self): - train_dataset = self.dataset - train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) - self.model.training_model.fit(train_dataset, epochs=5, steps_per_epoch=2) - - def test_for_fatal(self): - print(tf.__version__) - np.random.seed(1) - self.simulate() - self.init_model() - self.compile_model() - self.train() - self.model.training_model.evaluate( - x=self.data, y=np.ones((self.data.shape[0], self.out_dim)) - ) - self.model.training_model.predict( - x=self.data - ) - return True - - -if __name__ == '__main__': - unittest.main() diff --git a/sfaira/versions/genomes.py b/sfaira/versions/genomes.py index 872fd0063..4583763ab 100644 --- a/sfaira/versions/genomes.py +++ b/sfaira/versions/genomes.py @@ -83,31 +83,55 @@ def cache(self) -> pandas.DataFrame: class GenomeContainer: + genome_tab: pandas.DataFrame assembly: str - organism: str def __init__( self, - organism: str, - assembly: Union[None, str], + organism: Union[None, str] = None, + assembly: Union[None, str] = None, ): - self.organism = organism - # Set defaults: - if self.organism == "human": - self.assembly = assembly if assembly is not None else "Homo_sapiens.GRCh38.102" - elif self.organism == "mouse": - self.assembly = assembly if assembly is not None else "Mus_musculus.GRCm38.102" + if assembly is None: + # Set defaults based on organism if assembly is not given. + if self.organism == "human": + self.assembly = "Homo_sapiens.GRCh38.102" + elif self.organism == "mouse": + self.assembly = "Mus_musculus.GRCm38.102" + else: + raise ValueError(f"organism {organism} not found") else: - raise ValueError(f"organism {organism} not found") + self.assembly = assembly self.gc = GtfInterface(assembly=self.assembly) self.load_genome() + @property + def organism(self): + return self.gc.organism + def load_genome(self): self.genome_tab = self.gc.cache - def subset(self, gene_biotype: str): - self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_TYPE].values == gene_biotype, :].copy() + def subset( + self, + biotype: Union[None, str] = None, + symbols: Union[None, str] = None, + ensg: Union[None, str] = None, + ): + """ + Subset by gene biotype or to gene list defined by identifiers (symbol or ensemble ID). + + :param biotype: + :param symbols: + :param ensg: + :return: + """ + if biotype is None: + self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_TYPE].values == biotype, :].copy() + if symbols is None: + self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_SYMBOL].values == symbols, :].copy() + if ensg is None: + self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_ID].values == ensg, :].copy() @property def names(self): @@ -122,7 +146,7 @@ def type(self): return self.genome_tab[KEY_TYPE].values.tolist() @property - def ngenes(self) -> int: + def n_var(self) -> int: return self.genome_tab.shape[0] @property diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index dbd03dea0..279aa1c22 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -266,7 +266,7 @@ def nodes(self) -> List[Tuple[str, dict]]: @property def nodes_dict(self) -> dict: - return self.graph.nodes.items() + return dict(list(self.graph.nodes.items())) @property def node_names(self) -> List[str]: @@ -288,13 +288,14 @@ def set_leaves(self, nodes: list = None): # ToDo check that these are not include parents of each other! if nodes is not None: for x in nodes: - assert x in self.graph.nodes, f"{x} not found" + assert x in self.node_names, f"{x} not found" self.leaves = nodes else: self.leaves = self.get_all_roots() def get_all_roots(self) -> List[str]: - return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] + return [v["name"] for v, x in zip(self.graph.nodes.values(), self.graph.nodes()) + if self.graph.in_degree(x) == 0] def get_ancestors(self, node: str) -> List[str]: if node not in self.node_ids: @@ -334,7 +335,7 @@ def map_to_leaves(self, node: str, return_type: str = "elements", include_self: if return_type == "elements": return [x for x in self.leaves if x in ancestors] if return_type == "idx": - return np.array([i for i, (x, y) in enumerate(self.leaves) if x in ancestors]) + return np.array([i for i, x in enumerate(self.leaves) if x in ancestors]) @abc.abstractmethod def synonym_node_properties(self) -> List[str]: diff --git a/sfaira/versions/metadata/universe.py b/sfaira/versions/metadata/universe.py index 69a52bd63..f7f2a408e 100644 --- a/sfaira/versions/metadata/universe.py +++ b/sfaira/versions/metadata/universe.py @@ -17,7 +17,7 @@ class CelltypeUniverse: onto_uberon: OntologyUberon _target_universe: Union[List[str], None] - def __init__(self, cl: OntologyCelltypes, uberon: OntologyUberon, organism: str, **kwargs): + def __init__(self, cl: OntologyCelltypes, uberon: OntologyUberon, organism: Union[str, None] = None, **kwargs): """ :param organism: Organism, defines ontology extension used. @@ -28,20 +28,20 @@ def __init__(self, cl: OntologyCelltypes, uberon: OntologyUberon, organism: str, self._target_universe = None self._set_extension(organism=organism) - def _set_extension(self, organism): + def _set_extension(self, organism: Union[str, None]): """ - :param organism: Organism, defines ontology extension used. """ - if organism == "human": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) - elif organism == "mouse": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) - else: - raise ValueError(f"organism {organism} not found") + if organism is not None: + if organism == "human" or organism.lower() == "homo_sapiens": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) + elif organism == "mouse" or organism.lower() == "mus_musculus": + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) + else: + raise ValueError(f"organism {organism} not found") @property - def target_universe(self): + def target_universe(self) -> List[str]: """ Ontology classes of target universe (understandable cell type names). @@ -53,11 +53,11 @@ def target_universe(self): def target_universe(self, x: List[str]): # Check that all nodes are valid: for xx in x: - if xx not in self.onto_cl.nodes: + if xx not in self.onto_cl.node_names: raise ValueError(f"cell type {xx} was not in ontology") # Default universe is the full set of leave nodes of ontology: - self.target_universe = self.onto_cl.leaves - self.onto_cl.set_leaves(self.target_universe) + self._target_universe = x + self.onto_cl.set_leaves(self._target_universe) @property def target_universe_ids(self): @@ -93,7 +93,7 @@ def map_to_target_leaves( self, nodes: List[str], return_type: str = "elements" - ): + ) -> list: """ Map a given list of nodes to leave nodes defined for this ontology. :param nodes: diff --git a/sfaira/versions/topologies/__init__.py b/sfaira/versions/topologies/__init__.py index 31c062410..16095b0e5 100644 --- a/sfaira/versions/topologies/__init__.py +++ b/sfaira/versions/topologies/__init__.py @@ -1,3 +1,34 @@ from . import human from . import mouse -from .class_interface import Topologies +from .class_interface import TopologyContainer + +TOPOLOGIES = { + "mouse": { + "celltype": { + "marker": mouse.celltype.celltypemarker.CELLTYPEMARKER_TOPOLOGIES, + "mlp": mouse.celltype.celltypemlp.CELLTYPEMLP_TOPOLOGIES + }, + "embedding": { + "ae": mouse.embedding.ae.AE_TOPOLOGIES, + "linear": mouse.embedding.linear.LINEAR_TOPOLOGIES, + "nmf": mouse.embedding.nmf.NMF_TOPOLOGIES, + "vae": mouse.embedding.vae.VAE_TOPOLOGIES, + "vaeiaf": mouse.embedding.vaeiaf.VAEIAF_TOPOLOGIES, + "vaevamp": mouse.embedding.vaevamp.VAEVAMP_TOPOLOGIES + } + }, + "human": { + "celltype": { + "marker": human.celltype.celltypemarker.CELLTYPEMARKER_TOPOLOGIES, + "mlp": human.celltype.celltypemlp.CELLTYPEMLP_TOPOLOGIES + }, + "embedding": { + "ae": human.embedding.ae.AE_TOPOLOGIES, + "linear": human.embedding.linear.LINEAR_TOPOLOGIES, + "nmf": human.embedding.nmf.NMF_TOPOLOGIES, + "vae": human.embedding.vae.VAE_TOPOLOGIES, + "vaeiaf": human.embedding.vaeiaf.VAEIAF_TOPOLOGIES, + "vaevamp": human.embedding.vaevamp.VAEVAMP_TOPOLOGIES + } + } +} diff --git a/sfaira/versions/topologies/class_interface.py b/sfaira/versions/topologies/class_interface.py index 30f824a73..7513b01b0 100644 --- a/sfaira/versions/topologies/class_interface.py +++ b/sfaira/versions/topologies/class_interface.py @@ -1,70 +1,34 @@ from sfaira.versions.genomes import GenomeContainer -from . import human -from . import mouse +class TopologyContainer: -class Topologies: + """ + Class interface for a YAML-style defined model topology that loads a genome container tailored to the model. + """ def __init__( self, - organism: str, - model_class: str, - model_type: str, - topology_id: str + topology: dict, + topology_id: str, ): - self.topologies = { - "mouse": { - "celltype": { - "marker": mouse.celltype.celltypemarker.CELLTYPEMARKER_TOPOLOGIES, - "mlp": mouse.celltype.celltypemlp.CELLTYPEMLP_TOPOLOGIES - }, - "embedding": { - "ae": mouse.embedding.ae.AE_TOPOLOGIES, - "linear": mouse.embedding.linear.LINEAR_TOPOLOGIES, - "nmf": mouse.embedding.nmf.NMF_TOPOLOGIES, - "vae": mouse.embedding.vae.VAE_TOPOLOGIES, - "vaeiaf": mouse.embedding.vaeiaf.VAEIAF_TOPOLOGIES, - "vaevamp": mouse.embedding.vaevamp.VAEVAMP_TOPOLOGIES - } - }, - "human": { - "celltype": { - "marker": human.celltype.celltypemarker.CELLTYPEMARKER_TOPOLOGIES, - "mlp": human.celltype.celltypemlp.CELLTYPEMLP_TOPOLOGIES - }, - "embedding": { - "ae": human.embedding.ae.AE_TOPOLOGIES, - "linear": human.embedding.linear.LINEAR_TOPOLOGIES, - "nmf": human.embedding.nmf.NMF_TOPOLOGIES, - "vae": human.embedding.vae.VAE_TOPOLOGIES, - "vaeiaf": human.embedding.vaeiaf.VAEIAF_TOPOLOGIES, - "vaevamp": human.embedding.vaevamp.VAEVAMP_TOPOLOGIES - } - } - } - self.organism = organism - self.model_class = model_class - self.model_type = model_type + self.topology = topology + self.gc = GenomeContainer(assembly=self.topology["input"]["genome"]) + self.gc.subset(**dict([tuple(self.topology["input"]["genes"])])) self.topology_id = topology_id - assert organism in list(self.topologies.keys()), \ - "organism %s not found in %s" % \ - (organism, list(self.topologies.keys())) - assert model_class in list(self.topologies[organism].keys()), \ - "model_class %s not found in %s" % \ - (model_type, list(self.topologies[organism].keys())) - assert model_type in list(self.topologies[organism][model_class].keys()), \ - "model_type %s not found in %s" % \ - (model_type, list(self.topologies[organism][model_class].keys())) - assert topology_id in list(self.topologies[organism][model_class][model_type].keys()), \ - "topology_id %s not found in %s" % \ - (topology_id, list(self.topologies[organism][model_class][model_type].keys())) - self.genome_container = GenomeContainer(organism=organism, assembly=self.topology["genome"]) @property - def topology(self): - return self.topologies[self.organism][self.model_class][self.model_type][self.topology_id] + def model_type(self): + return self.topology["model_type"] @property - def ngenes(self): - return self.genome_container.ngenes + def output(self): + return self.topology["output"] + + @property + def n_var(self): + return self.gc.n_var + + @property + def organism(self): + return self.gc.organism diff --git a/sfaira/versions/topologies/human/celltype/celltypemarker.py b/sfaira/versions/topologies/human/celltype/celltypemarker.py index c99b1025d..b67211892 100644 --- a/sfaira/versions/topologies/human/celltype/celltypemarker.py +++ b/sfaira/versions/topologies/human/celltype/celltypemarker.py @@ -1,7 +1,14 @@ CELLTYPEMARKER_TOPOLOGIES = { "0.0.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "marker", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "l1_coef": 0., "l2_coef": 0., diff --git a/sfaira/versions/topologies/human/celltype/celltypemlp.py b/sfaira/versions/topologies/human/celltype/celltypemlp.py index 1d5cd616b..fec0c2c93 100644 --- a/sfaira/versions/topologies/human/celltype/celltypemlp.py +++ b/sfaira/versions/topologies/human/celltype/celltypemlp.py @@ -1,7 +1,14 @@ CELLTYPEMLP_TOPOLOGIES = { "0.0.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [], "activation": None, @@ -17,8 +24,15 @@ } }, "0.1.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [128], "activation": "selu", @@ -34,8 +48,15 @@ } }, "0.1.2": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [256, 128], "activation": "selu", @@ -51,8 +72,15 @@ } }, "0.1.3": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [512, 256, 128], "activation": "selu", diff --git a/sfaira/versions/topologies/human/embedding/ae.py b/sfaira/versions/topologies/human/embedding/ae.py index d190585d6..59f958c36 100644 --- a/sfaira/versions/topologies/human/embedding/ae.py +++ b/sfaira/versions/topologies/human/embedding/ae.py @@ -1,7 +1,8 @@ AE_TOPOLOGIES = { "0.1": { + "model_type": "ae", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -16,8 +17,9 @@ }, "0.2": { + "model_type": "ae", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -32,8 +34,9 @@ }, "0.3": { + "model_type": "ae", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -48,8 +51,12 @@ }, "0.4": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "ae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/linear.py b/sfaira/versions/topologies/human/embedding/linear.py index ca2721129..06947132e 100644 --- a/sfaira/versions/topologies/human/embedding/linear.py +++ b/sfaira/versions/topologies/human/embedding/linear.py @@ -1,7 +1,11 @@ LINEAR_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -12,8 +16,12 @@ }, "0.2": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -25,7 +33,7 @@ "0.3": { "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/nmf.py b/sfaira/versions/topologies/human/embedding/nmf.py index 2efd21f9c..3c49a8189 100644 --- a/sfaira/versions/topologies/human/embedding/nmf.py +++ b/sfaira/versions/topologies/human/embedding/nmf.py @@ -1,7 +1,8 @@ NMF_TOPOLOGIES = { "0.1": { + "model_type": "linear", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -12,8 +13,9 @@ }, "0.2": { + "model_type": "linear", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -24,8 +26,12 @@ }, "0.3": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/vae.py b/sfaira/versions/topologies/human/embedding/vae.py index 4ec8370c7..7d6bc28e4 100644 --- a/sfaira/versions/topologies/human/embedding/vae.py +++ b/sfaira/versions/topologies/human/embedding/vae.py @@ -1,7 +1,11 @@ VAE_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -15,8 +19,9 @@ }, "0.2": { + "model_type": "vae", "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -30,8 +35,12 @@ }, "0.3": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -45,8 +54,12 @@ }, "0.4": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/vaeiaf.py b/sfaira/versions/topologies/human/embedding/vaeiaf.py index d732fd1e4..ea1d415a9 100644 --- a/sfaira/versions/topologies/human/embedding/vaeiaf.py +++ b/sfaira/versions/topologies/human/embedding/vaeiaf.py @@ -1,7 +1,11 @@ VAEIAF_TOPOLOGIES = { "0.1": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vaeiaf", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "n_iaf": 2, @@ -15,8 +19,12 @@ } }, "0.2": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vaeiaf", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "n_iaf": 2, diff --git a/sfaira/versions/topologies/human/embedding/vaevamp.py b/sfaira/versions/topologies/human/embedding/vaevamp.py index a94131783..ea47b47ff 100644 --- a/sfaira/versions/topologies/human/embedding/vaevamp.py +++ b/sfaira/versions/topologies/human/embedding/vaevamp.py @@ -1,7 +1,11 @@ VAEVAMP_TOPOLOGIES = { "0.2": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vaevamp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, (32, 32), 128, 256), "l1_coef": 0., @@ -14,8 +18,12 @@ } }, "0.3": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["protein_coding"], + "model_type": "vaevamp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, (64, 64), 256, 512), "l1_coef": 0., diff --git a/sfaira/versions/topologies/mouse/celltype/celltypemarker.py b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py index 0625f7636..082adbf45 100644 --- a/sfaira/versions/topologies/mouse/celltype/celltypemarker.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py @@ -1,7 +1,14 @@ CELLTYPEMARKER_TOPOLOGIES = { "0.0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "marker", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "l1_coef": 0., "l2_coef": 0., diff --git a/sfaira/versions/topologies/mouse/celltype/celltypemlp.py b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py index 1f85bc78a..ec302ce46 100644 --- a/sfaira/versions/topologies/mouse/celltype/celltypemlp.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py @@ -1,7 +1,14 @@ CELLTYPEMLP_TOPOLOGIES = { "0.0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [], "activation": None, @@ -17,8 +24,15 @@ } }, "0.1.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [128], "activation": "selu", @@ -34,8 +48,9 @@ } }, "0.1.2": { + "model_type": "mlp", "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "units": [256, 128], "activation": "selu", @@ -51,8 +66,15 @@ } }, "0.1.3": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "mlp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [512, 256, 128], "activation": "selu", diff --git a/sfaira/versions/topologies/mouse/embedding/ae.py b/sfaira/versions/topologies/mouse/embedding/ae.py index d837520d2..3f54884a7 100644 --- a/sfaira/versions/topologies/mouse/embedding/ae.py +++ b/sfaira/versions/topologies/mouse/embedding/ae.py @@ -1,7 +1,8 @@ AE_TOPOLOGIES = { "0.1": { + "model_type": "ae", "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -16,8 +17,9 @@ }, "0.2": { + "model_type": "ae", "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -32,8 +34,9 @@ }, "0.3": { + "model_type": "ae", "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "genes": ["biotype", "protein_coding"], "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -48,8 +51,12 @@ }, "0.4": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "ae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topologies/mouse/embedding/linear.py b/sfaira/versions/topologies/mouse/embedding/linear.py index 1eaecc63a..228e8cd32 100644 --- a/sfaira/versions/topologies/mouse/embedding/linear.py +++ b/sfaira/versions/topologies/mouse/embedding/linear.py @@ -1,7 +1,11 @@ LINEAR_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -12,8 +16,12 @@ }, "0.2": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -24,8 +32,12 @@ }, "0.3": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/mouse/embedding/nmf.py b/sfaira/versions/topologies/mouse/embedding/nmf.py index 4817cd588..d8777708c 100644 --- a/sfaira/versions/topologies/mouse/embedding/nmf.py +++ b/sfaira/versions/topologies/mouse/embedding/nmf.py @@ -1,7 +1,11 @@ NMF_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -12,8 +16,12 @@ }, "0.2": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., @@ -24,8 +32,12 @@ }, "0.3": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "linear", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/mouse/embedding/vae.py b/sfaira/versions/topologies/mouse/embedding/vae.py index cacaf76c7..2650cb2b6 100644 --- a/sfaira/versions/topologies/mouse/embedding/vae.py +++ b/sfaira/versions/topologies/mouse/embedding/vae.py @@ -1,7 +1,11 @@ VAE_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -15,8 +19,12 @@ }, "0.2": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -30,8 +38,12 @@ }, "0.3": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -45,8 +57,12 @@ }, "0.4": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vae", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 64, 128, 256, 512), "l2_coef": 0., diff --git a/sfaira/versions/topologies/mouse/embedding/vaeiaf.py b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py index b23b3675f..0a998af56 100644 --- a/sfaira/versions/topologies/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py @@ -1,7 +1,11 @@ VAEIAF_TOPOLOGIES = { "0.1": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vaeiaf", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "n_iaf": 2, @@ -15,8 +19,12 @@ } }, "0.2": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vaeiaf", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "n_iaf": 2, diff --git a/sfaira/versions/topologies/mouse/embedding/vaevamp.py b/sfaira/versions/topologies/mouse/embedding/vaevamp.py index 13553a77d..dbe3c9620 100644 --- a/sfaira/versions/topologies/mouse/embedding/vaevamp.py +++ b/sfaira/versions/topologies/mouse/embedding/vaevamp.py @@ -1,7 +1,11 @@ VAEVAMP_TOPOLOGIES = { "0.2": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vaevamp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, (32, 32), 128, 256), "l1_coef": 0., @@ -14,8 +18,12 @@ } }, "0.3": { - "genome": "Mus_musculus.GRCm38.102", - "genes": ["protein_coding"], + "model_type": "vaevamp", + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, (64, 64), 256, 512), "l1_coef": 0., From 1a8cbe853aa20ee9eaf049c8fd7550d567388c46 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 16 Apr 2021 20:16:09 +0200 Subject: [PATCH 103/161] finished target universe building and caching (#222) - fixed interface to estimator - added unit tests - removed developmental connections from defaults ontology --- sfaira/consts/ontologies.py | 6 +- sfaira/data/base/dataset.py | 5 +- sfaira/data/base/dataset_group.py | 2 +- .../utils_scripts/create_target_universes.py | 16 +- sfaira/estimators/keras.py | 34 ++-- sfaira/models/celltype/mlp.py | 4 +- sfaira/train/summaries.py | 4 +- .../unit_tests/estimators/test_estimator.py | 2 +- sfaira/unit_tests/versions/test_ontologies.py | 66 ++++++- sfaira/unit_tests/versions/test_universe.py | 20 ++ sfaira/versions/metadata/__init__.py | 2 +- sfaira/versions/metadata/base.py | 183 ++++++++++++++---- .../versions/metadata/extensions/__init__.py | 3 +- .../metadata/extensions/obo_extension.py | 1 + .../extensions/obo_extension_human.py | 1 - .../extensions/obo_extension_mouse.py | 1 - sfaira/versions/metadata/universe.py | 94 +++------ 17 files changed, 293 insertions(+), 151 deletions(-) create mode 100644 sfaira/unit_tests/versions/test_universe.py create mode 100644 sfaira/versions/metadata/extensions/obo_extension.py delete mode 100644 sfaira/versions/metadata/extensions/obo_extension_human.py delete mode 100644 sfaira/versions/metadata/extensions/obo_extension_mouse.py diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 7c1fd2af0..78d5e9e39 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -1,11 +1,11 @@ -from sfaira.versions.metadata import OntologyList, OntologyCelltypes +from sfaira.versions.metadata import OntologyList, OntologyCl from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus class OntologyContainerSfaira: - _cellontology_class: OntologyCelltypes + _cellontology_class: OntologyCl def __init__(self): self.annotated = OntologyList(terms=[True, False]) @@ -42,4 +42,4 @@ def cellontology_class(self): @cellontology_class.setter def cellontology_class(self, x: str): - self._cellontology_class = OntologyCelltypes(branch=x) + self._cellontology_class = OntologyCl(branch=x) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 7bb82660e..3df1a66d9 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -1162,7 +1162,7 @@ def __project_name_to_id_obs( """ ontology = getattr(self.ontology_container_sfaira, ontology) map_vals = dict([ - (x, ontology.id_from_name(x)) + (x, ontology.convert_to_id(x)) for x in np.unique([ xx for xx in self.adata.obs[key_in].values if (xx not in map_exceptions and xx is not None) @@ -2051,7 +2051,6 @@ def celltypes_universe(self): self._celltype_universe = CelltypeUniverse( cl=self.ontology_celltypes, uberon=self.ontology_container_sfaira.organ, - organism=self.organism, ) return self._celltype_universe @@ -2190,7 +2189,7 @@ def _value_protection( attempted_clean.append(x) else: if isinstance(allowed, OntologyHierarchical) and x in allowed.node_ids: - attempted_clean.append(allowed.name_from_id(x)) + attempted_clean.append(allowed.convert_to_name(x)) else: raise ValueError(f"'{x}' is not a valid entry for {attr} in {self.id}.") else: diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 8d753e8d5..46cadc4c3 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -665,7 +665,7 @@ def clean_ontology_class_map(self): ) # Adds a third column with the corresponding ontology IDs into the file. tab[self._adata_ids_sfaira.classmap_target_id_key] = [ - self.ontology_celltypes.id_from_name(x) + self.ontology_celltypes.convert_to_id(x) if x != self._adata_ids_sfaira.unknown_celltype_identifier and x != self._adata_ids_sfaira.not_a_cell_celltype_identifier else self._adata_ids_sfaira.unknown_celltype_identifier diff --git a/sfaira/data/utils_scripts/create_target_universes.py b/sfaira/data/utils_scripts/create_target_universes.py index 0efd698ff..cae6643aa 100644 --- a/sfaira/data/utils_scripts/create_target_universes.py +++ b/sfaira/data/utils_scripts/create_target_universes.py @@ -1,5 +1,5 @@ +import numpy as np import os -import sfaira import sys import tensorflow as tf @@ -16,7 +16,6 @@ config_path = str(sys.argv[2]) out_path = str(sys.argv[3]) - for f in os.listdir(config_path): fn = os.path.join(config_path, f) if os.path.isfile(fn): # only files @@ -27,4 +26,15 @@ organ = f.split("_")[2] store = DistributedStore(cache_path=store_path) store.load_config(fn=fn) - store.write_config(os.path.join(config_path, f"targets_{organism}_{organ}.csv")) + celltypes_found = {} + for adata in store.adatas: + celltypes_found = celltypes_found.union(set(adata.obs["cell_ontology_class"].values)) + celltypes_found = np.sort(list(celltypes_found - { + store._adata_ids_sfaira.unknown_celltype_identifier, + store._adata_ids_sfaira.not_a_cell_celltype_identifier + })).tolist() + celltypes_found = store.celltypes_universe.onto_cl.get_effective_leaves(x=celltypes_found) + store.celltypes_universe.write_target_universe( + fn=os.path.join(config_path, f"targets_{organism}_{organ}.csv"), + x=celltypes_found, + ) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 5d0f7822b..43b8ca69d 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -15,7 +15,7 @@ from sfaira.consts import AdataIdsSfaira, OCS from sfaira.data import DistributedStore from sfaira.models import BasicModel -from sfaira.versions.metadata import CelltypeUniverse, OntologyCelltypes +from sfaira.versions.metadata import CelltypeUniverse, OntologyCl from sfaira.versions.topologies import TopologyContainer from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ @@ -874,15 +874,14 @@ def get_gradients(x_batch): tape.watch(x) model_out = model((x, sf)) if abs_gradients: - def f(x): - return abs(x) + def f(xx): + return abs(xx) else: - def f(x): - return x + def f(xx): + return xx # marginalize on batch level and then accumulate batches # batch_jacobian gives output of size: (batch_size, latent_dim, input_dim) - batch_gradients = f(tape.batch_jacobian(model_out, x)) - return batch_gradients + return f(tape.batch_jacobian(model_out, x)) for step, (x_batch, y_batch) in tqdm(enumerate(ds), total=np.ceil(n_obs / batch_size)): batch_gradients = get_gradients(x_batch).numpy() @@ -933,11 +932,11 @@ def __init__( assert "targets" in self.topology_container.output.keys(), self.topology_container.output.keys() self.max_class_weight = max_class_weight self.celltype_universe = CelltypeUniverse( - cl=OntologyCelltypes(branch=self.topology_container.output["cl"]), + cl=OntologyCl(branch=self.topology_container.output["cl"]), uberon=OCS.organ, organism=self.organism, ) - self.celltype_universe.target_universe = self.topology_container.output["targets"] + self.celltype_universe.leaves = self.topology_container.output["targets"] def init_model( self, @@ -964,23 +963,24 @@ def init_model( @property def ids(self): - return self.celltype_universe.target_universe + return self.celltype_universe.leaves @property def ntypes(self): - return self.celltype_universe.ntypes + return self.celltype_universe.onto_cl.n_leaves @property def ontology_ids(self): - return self.celltype_universe.target_universe + return self.celltype_universe.leaves def _one_hot_encoder(self): def encoder(x): - idx = self.celltype_universe.map_to_target_leaves( - nodes=[x], - return_type="idx" - )[0] + idx = self.celltype_universe.onto_cl.map_to_leaves( + node=x, + return_type="idx", + include_self=True, + ) y = np.zeros((self.ntypes,), dtype="float32") y[idx] = 1. / len(idx) return y @@ -1038,7 +1038,7 @@ def _get_dataset( raise ValueError("using weights with store is not supported yet") n_obs = self.data.n_obs n_features = self.data.n_vars - n_labels = len(self.data.celltypes_universe.target_universe) + n_labels = self.data.celltypes_universe.onto_cl.n_leaves generator_raw = self.data.generator( batch_size=1, obs_keys=["cell_ontology_class"], diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index 7586e76d6..c54d2624b 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -95,7 +95,7 @@ def __init__( hyperpar[k] = override_hyperpar[k] super().__init__( in_dim=topology_container.n_var, - out_dim=celltypes_version.ntypes, + out_dim=celltypes_version.onto_cl.n_leaves, **hyperpar ) print('passed hyperpar: \n', hyperpar) @@ -110,6 +110,6 @@ def __init__( ("genome_size", self.genome_size), ("model_class", self.model_class), ("model_type", self.model_type), - ("ntypes", celltypes_version.ntypes), + ("ntypes", celltypes_version.onto_cl.n_leaves), ] ) diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 55c566b1b..7a36780da 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -888,7 +888,7 @@ def plot_best_classwise_heatmap( cu = CelltypeUniverse(organism=organism) # TODO set target universe. for k in celltypelist: - if k not in cu.target_universe: + if k not in cu.leaves: if k not in cu.ontology.node_names: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) for leaf in cu[k]: # TODO get leaves @@ -1048,7 +1048,7 @@ def plot_best_classwise_scatter( cu = CelltypeUniverse(organism=organism) # TODO set target universe. for k in celltypelist: - if k not in cu.target_universe: + if k not in cu.leaves: if k not in cu.ontology.node_names: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) for leaf in cu[k]: # TODO get leaves diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index 01ccada5b..a8637a404 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -152,7 +152,7 @@ def init_estimator(self, model_type: str): model_id="testid", model_topology=TopologyContainer(topology=topology, topology_id="0.1"), ) - self.estimator.celltype_universe.target_universe = TARGETS + self.estimator.celltype_universe.leaves = TARGETS def basic_estimator_test(self): self.estimator.init_model() diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index 02f3d7dbd..cf13f5e31 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -1,21 +1,77 @@ -from sfaira.versions.metadata import OntologyUberon, OntologyCelltypes, OntologyMondo, OntologyMmusdv, OntologyHsapdv +import numpy as np +from sfaira.versions.metadata import OntologyUberon, OntologyCl, OntologyMondo, OntologyMmusdv, OntologyHsapdv """ -CL +OntologyCelltypes """ def test_cl_loading(): - _ = OntologyCelltypes(branch="v2021-02-01") + """ + Tests if ontology can be initialised. + """ + _ = OntologyCl(branch="v2021-02-01") -def test_cl_subsetting(): - oc = OntologyCelltypes(branch="v2021-02-01") +def test_cl_is_a(): + """ + Tests if is-a relationships work correctly. + """ + oc = OntologyCl(branch="v2021-02-01") assert oc.is_a(query="T cell", reference="lymphocyte") assert oc.is_a(query="lymphocyte", reference="lymphocyte") assert not oc.is_a(query="lymphocyte", reference="T cell") +def test_cl_effective_leaves(): + """ + Tests if node sets can be mapped to effective leaf sets via `OntologyCelltypes.get_effective_leaves()` + """ + oc = OntologyCl(branch="v2021-02-01") + x = oc.get_effective_leaves(x=[ + "CD4-positive helper T cell", "lymphocyte", "stromal cell", "T cell", "T-helper 1 cell", + "T-helper 17 cell" + ]) + x = oc.convert_to_name(x) + assert set(x) == {"stromal cell", "T-helper 1 cell", "T-helper 17 cell"}, x + + +def test_cl_map_leaves(): + """ + Tests if nodes can be mapped to leave nodes in ontology. + """ + oc = OntologyCl(branch="v2021-02-01") + leaf_map_1 = oc.convert_to_name(oc.map_to_leaves(node="CD4-positive helper T cell", include_self=True)) + leaf_map_2 = oc.map_to_leaves(node="CD4-positive helper T cell", include_self=True, return_type="idx") + assert len(leaf_map_1) == 7 + assert np.all(leaf_map_2 == np.sort([oc.convert_to_name(oc.leaves).index(x) for x in list(leaf_map_1)])) + + +def test_cl_set_leaves(): + """ + Tests if ontology behaves correctly if leaf nodes were reset. + """ + oc = OntologyCl(branch="v2021-02-01", use_developmental_relationships=False) + targets = ["stromal cell", "T-helper 1 cell", "T-helper 17 cell"] + oc.leaves = targets + leaves = oc.convert_to_name(oc.leaves) + assert set(leaves) == set(targets), leaves + assert len(oc.node_ids) == 22 + assert np.all([x in oc.convert_to_name(oc.node_ids) for x in targets]), oc.convert_to_name(oc.node_ids) + leaf_map_1 = oc.convert_to_name(oc.map_to_leaves(node="lymphocyte", include_self=True)) + leaf_map_2 = oc.convert_to_name(oc.map_to_leaves(node="lymphocyte", include_self=False)) + leaf_map_3 = oc.map_to_leaves(node="lymphocyte", include_self=True, return_type="idx") + leaf_map_4 = oc.convert_to_name(oc.map_to_leaves(node="T-helper 1 cell", include_self=True)) + leaf_map_5 = oc.map_to_leaves(node="T-helper 1 cell", include_self=False) + leaf_map_6 = oc.map_to_leaves(node="T-helper 1 cell", include_self=True, return_type="idx") + assert set(leaf_map_1) == {"T-helper 1 cell", "T-helper 17 cell"} + assert set(leaf_map_2) == {"T-helper 1 cell", "T-helper 17 cell"} + assert np.all(leaf_map_3 == np.sort([oc.convert_to_name(oc.leaves).index(x) for x in list(leaf_map_1)])) + assert set(leaf_map_4) == {"T-helper 1 cell"} + assert leaf_map_5 == [] + assert np.all(leaf_map_6 == np.sort([oc.convert_to_name(oc.leaves).index(x) for x in list(leaf_map_4)])) + + """ Hancestro """ diff --git a/sfaira/unit_tests/versions/test_universe.py b/sfaira/unit_tests/versions/test_universe.py new file mode 100644 index 000000000..560eec945 --- /dev/null +++ b/sfaira/unit_tests/versions/test_universe.py @@ -0,0 +1,20 @@ +import os + +from sfaira.versions.metadata import CelltypeUniverse, OntologyCl, OntologyUberon + +""" +CelltypeUniverse +""" + + +def test_universe_io(): + tmp_fn = "./universe_tempp.csv" + targets = ["stromal cell", "lymphocyte", "T-helper 1 cell", "T-helper 17 cell"] + cl = OntologyCl(branch="v2021-02-01") + uberon = OntologyUberon() + cu = CelltypeUniverse(cl=cl, uberon=uberon) + cu.write_target_universe(fn=tmp_fn, x=targets) + cu.load_target_universe(fn=tmp_fn) + os.remove(tmp_fn) + leaves = cu.leaves + assert set(leaves) == set(targets), (leaves, targets) diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index 80f9367e1..e16e54767 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,4 +1,4 @@ from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyHierarchical, OntologyObo, \ - OntologyCelltypes, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ + OntologyCl, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 279aa1c22..849d22c77 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -8,7 +8,7 @@ import warnings from sfaira.consts.adata_fields import AdataIdsSfaira -from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE +from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION FILE_PATH = __file__ @@ -115,11 +115,11 @@ class OntologyHierarchical(Ontology): nodes: dict @abc.abstractmethod - def id_from_name(self, x: str) -> str: + def convert_to_id(self, x: str) -> str: pass @abc.abstractmethod - def name_from_id(self, x: str) -> str: + def convert_to_name(self, x: str) -> str: pass @property @@ -189,11 +189,11 @@ def node_names(self) -> List[str]: def node_ids(self) -> List[str]: return list(self.nodes.keys()) - def id_from_name(self, x: str) -> str: + def convert_to_id(self, x: str) -> str: self.validate_node(x=x) return [k for k, v in self.nodes.items() if v["name"] == x][0] - def name_from_id(self, x: str) -> str: + def convert_to_name(self, x: str) -> str: assert x in self.nodes.keys(), f"node {x} not found" return self.nodes[x]["name"] @@ -247,7 +247,6 @@ def synonym_node_properties(self) -> List[str]: class OntologyObo(OntologyHierarchical): graph: networkx.MultiDiGraph - leaves: List[str] def __init__( self, @@ -260,6 +259,22 @@ def _check_graph(self): if not networkx.is_directed_acyclic_graph(self.graph): warnings.warn("DAG was broken") + def __validate_node_ids(self, x: Union[str, List[str]]): + if isinstance(x, str): + x = [x] + node_ids = self.node_ids + for y in x: + if y not in node_ids: + raise ValueError(f"queried node id {y} is not in graph") + + def __validate_node_names(self, x: Union[str, List[str]]): + if isinstance(x, str): + x = [x] + node_names = self.node_names + for y in x: + if y not in node_names: + raise ValueError(f"queried node name {y} is not in graph") + @property def nodes(self) -> List[Tuple[str, dict]]: return list(self.graph.nodes.items()) @@ -276,32 +291,101 @@ def node_names(self) -> List[str]: def node_ids(self) -> List[str]: return list(self.graph.nodes()) - def id_from_name(self, x: str) -> str: - self.validate_node(x=x) - return [k for k, v in self.graph.nodes.items() if v["name"] == x][0] - - def name_from_id(self, x: str) -> str: - assert x in self.graph.nodes.keys(), f"node {x} not found" - return self.graph.nodes[x]["name"] - - def set_leaves(self, nodes: list = None): - # ToDo check that these are not include parents of each other! - if nodes is not None: - for x in nodes: - assert x in self.node_names, f"{x} not found" - self.leaves = nodes + def is_a_node_id(self, x: str) -> bool: + return x in self.node_ids + + def is_a_node_name(self, x: str) -> bool: + return x in self.node_names + + def convert_to_name(self, x: Union[str, List[str]]) -> Union[str, List[str]]: + was_str = isinstance(x, str) + if was_str: + x = [x] + if self.is_a_node_id(x[0]): + self.__validate_node_ids(x=x) + x = [ + [v["name"] for k, v in self.graph.nodes.items() if k == z][0] + for z in x + ] + elif self.is_a_node_name(x[0]): + self.__validate_node_names(x=x) + else: + raise ValueError(f"node {x[0]} not recognized") + self.__validate_node_names(x=x) + if was_str: + return x[0] else: - self.leaves = self.get_all_roots() + return x + + def convert_to_id(self, x: Union[str, List[str]]) -> Union[str, List[str]]: + was_str = isinstance(x, str) + if was_str: + x = [x] + if self.is_a_node_id(x[0]): + self.__validate_node_ids(x=x) + elif self.is_a_node_name(x[0]): + self.__validate_node_names(x=x) + x = [ + [k for k, v in self.graph.nodes.items() if v["name"] == z][0] + for z in x + ] + else: + raise ValueError(f"node {x[0]} not recognized") + self.__validate_node_ids(x=x) + if was_str: + return x[0] + else: + return x + + @property + def leaves(self) -> List[str]: + return [x for x in self.graph.nodes() if self.graph.in_degree(x) == 0] + + @leaves.setter + def leaves(self, x: List[str]): + """ + Sets new leaf-space for graph. + + This clips nodes that are not upstream of defined leaves. + :param x: New set of leaves nodes, identified as IDs. + """ + x = self.convert_to_id(x=x) + nodes_to_remove = [] + for y in self.graph.nodes(): + if not np.any([self.is_a(query=z, reference=y) for z in x]): + nodes_to_remove.append(y) + self.graph.remove_nodes_from(nodes_to_remove) + + @property + def n_leaves(self) -> int: + return len(self.leaves) - def get_all_roots(self) -> List[str]: - return [v["name"] for v, x in zip(self.graph.nodes.values(), self.graph.nodes()) - if self.graph.in_degree(x) == 0] + def get_effective_leaves(self, x: List[str]) -> List[str]: + """ + Get effective leaves in ontology given set of observed nodes. + + The effective leaves are the minimal set of nodes such that all nodes in x are ancestors of this set, ie the + observed nodes which represent leaves of a sub-DAG of the ontology DAG, which captures all observed nodes. + + :param x: Observed node IDs. + :return: Effective leaves. + """ + x = np.unique(x).tolist() + x = self.convert_to_id(x=x) + leaves = [] + for y in x: + if not np.any([self.is_a(query=z, reference=y) for z in list(set(x) - {y})]): + leaves.append(y) + return leaves def get_ancestors(self, node: str) -> List[str]: - if node not in self.node_ids: - node = self.id_from_name(node) + node = self.convert_to_id(node) return list(networkx.ancestors(self.graph, node)) + def get_descendants(self, node: str) -> List[str]: + node = self.convert_to_id(node) + return list(networkx.descendants(self.graph, node)) + def is_a(self, query: str, reference: str) -> bool: """ Checks if query node is reference node or an ancestor thereof. @@ -310,32 +394,38 @@ def is_a(self, query: str, reference: str) -> bool: :param reference: Reference node name. Node ID or name. :return: If query node is reference node or an ancestor thereof. """ - if query not in self.node_ids: - query = self.id_from_name(query) - if reference not in self.node_ids: - reference = self.id_from_name(reference) + query = self.convert_to_id(query) + reference = self.convert_to_id(reference) return query in self.get_ancestors(node=reference) or query == reference - def map_to_leaves(self, node: str, return_type: str = "elements", include_self: bool = True): + def map_to_leaves( + self, + node: str, + return_type: str = "ids", + include_self: bool = True + ) -> Union[List[str], np.ndarray]: """ - Map a given list of nodes to leave nodes. + Map a given node to leave nodes. :param node: :param return_type: - "elements": names of mapped leave nodes - "idx": indicies in leave note list of of mapped leave nodes + "ids": IDs of mapped leave nodes + "idx": indicies in leave note list of mapped leave nodes :param include_self: whether to include node itself :return: """ - assert self.leaves is not None + node = self.convert_to_id(node) ancestors = self.get_ancestors(node) if include_self: ancestors = ancestors + [node] - if return_type == "elements": - return [x for x in self.leaves if x in ancestors] + if len(ancestors) > 0: + ancestors = self.convert_to_id(ancestors) + leaves = self.convert_to_id(self.leaves) + if return_type == "ids": + return [x for x in leaves if x in ancestors] if return_type == "idx": - return np.array([i for i, x in enumerate(self.leaves) if x in ancestors]) + return np.sort([i for i, x in enumerate(leaves) if x in ancestors]) @abc.abstractmethod def synonym_node_properties(self) -> List[str]: @@ -564,13 +654,22 @@ def synonym_node_properties(self) -> List[str]: return ["synonym", "latin term", "has relational adjective"] -class OntologyCelltypes(OntologyExtendedObo): +class OntologyCl(OntologyExtendedObo): def __init__( self, branch: str, + use_developmental_relationships: bool = False, **kwargs ): + """ + + Developmental edges are not desired in all interactions with this ontology, double-negative thymocytes are for + example not an intuitive parent node for a fine grained T cell label in a non-thymic tissue. + :param branch: + :param use_developmental_relationships: Whether to keep developmental relationships. + :param kwargs: + """ if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet obofile = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" else: @@ -622,9 +721,13 @@ def download_cl(): 'lacks_plasma_membrane_part', # ? ] edges_to_delete = [] + if use_developmental_relationships: + edges_allowed = ["is_a", "develops_from"] + else: + edges_allowed = ["is_a"] for i, x in enumerate(self.graph.edges): assert x[2] in edge_types, x - if x[2] not in ["is_a", "develops_from"]: + if x[2] not in edges_allowed: edges_to_delete.append((x[0], x[1])) for x in edges_to_delete: self.graph.remove_edge(u=x[0], v=x[1]) diff --git a/sfaira/versions/metadata/extensions/__init__.py b/sfaira/versions/metadata/extensions/__init__.py index ff1f8ff55..6fe32c7de 100644 --- a/sfaira/versions/metadata/extensions/__init__.py +++ b/sfaira/versions/metadata/extensions/__init__.py @@ -1,2 +1 @@ -from .obo_extension_human import ONTOLOGIY_EXTENSION_HUMAN -from .obo_extension_mouse import ONTOLOGIY_EXTENSION_MOUSE +from .obo_extension import ONTOLOGIY_EXTENSION diff --git a/sfaira/versions/metadata/extensions/obo_extension.py b/sfaira/versions/metadata/extensions/obo_extension.py new file mode 100644 index 000000000..2a54299ae --- /dev/null +++ b/sfaira/versions/metadata/extensions/obo_extension.py @@ -0,0 +1 @@ +ONTOLOGIY_EXTENSION = {} diff --git a/sfaira/versions/metadata/extensions/obo_extension_human.py b/sfaira/versions/metadata/extensions/obo_extension_human.py deleted file mode 100644 index 8a4b683e7..000000000 --- a/sfaira/versions/metadata/extensions/obo_extension_human.py +++ /dev/null @@ -1 +0,0 @@ -ONTOLOGIY_EXTENSION_HUMAN = {} diff --git a/sfaira/versions/metadata/extensions/obo_extension_mouse.py b/sfaira/versions/metadata/extensions/obo_extension_mouse.py deleted file mode 100644 index af93a79db..000000000 --- a/sfaira/versions/metadata/extensions/obo_extension_mouse.py +++ /dev/null @@ -1 +0,0 @@ -ONTOLOGIY_EXTENSION_MOUSE = {} diff --git a/sfaira/versions/metadata/universe.py b/sfaira/versions/metadata/universe.py index f7f2a408e..41e30ce9a 100644 --- a/sfaira/versions/metadata/universe.py +++ b/sfaira/versions/metadata/universe.py @@ -2,8 +2,11 @@ import pandas as pd from typing import Dict, List, Tuple, Union -from sfaira.versions.metadata import OntologyCelltypes, OntologyUberon -from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION_HUMAN, ONTOLOGIY_EXTENSION_MOUSE +from sfaira.versions.metadata import OntologyCl, OntologyUberon +from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION + +TARGET_UNIVERSE_KEY_NAME = "name" +TARGET_UNIVERSE_KEY_ID = "id" class CelltypeUniverse: @@ -13,67 +16,18 @@ class CelltypeUniverse: Basic checks on the organ specific instance are performed in the constructor. """ - onto_cl: OntologyCelltypes + onto_cl: OntologyCl onto_uberon: OntologyUberon _target_universe: Union[List[str], None] - def __init__(self, cl: OntologyCelltypes, uberon: OntologyUberon, organism: Union[str, None] = None, **kwargs): - """ - - :param organism: Organism, defines ontology extension used. - :param kwargs: - """ + def __init__(self, cl: OntologyCl, uberon: OntologyUberon, **kwargs): self.onto_cl = cl self.onto_uberon = uberon self._target_universe = None - self._set_extension(organism=organism) - - def _set_extension(self, organism: Union[str, None]): - """ - :param organism: Organism, defines ontology extension used. - """ - if organism is not None: - if organism == "human" or organism.lower() == "homo_sapiens": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_HUMAN) - elif organism == "mouse" or organism.lower() == "mus_musculus": - self.onto_cl.add_extension(ONTOLOGIY_EXTENSION_MOUSE) - else: - raise ValueError(f"organism {organism} not found") - - @property - def target_universe(self) -> List[str]: - """ - Ontology classes of target universe (understandable cell type names). - - :return: - """ - return self._target_universe + self._set_extension() - @target_universe.setter - def target_universe(self, x: List[str]): - # Check that all nodes are valid: - for xx in x: - if xx not in self.onto_cl.node_names: - raise ValueError(f"cell type {xx} was not in ontology") - # Default universe is the full set of leave nodes of ontology: - self._target_universe = x - self.onto_cl.set_leaves(self._target_universe) - - @property - def target_universe_ids(self): - """ - Ontology IDs of target universe (codified cell type names). - - :return: - """ - return [self.onto_cl.id_from_name(x) for x in self._target_universe] - - @property - def ntypes(self): - """ - Number of different cell types in target universe. - """ - return len(self.target_universe) + def _set_extension(self): + self.onto_cl.add_extension(ONTOLOGIY_EXTENSION) def __validate_target_universe_table(self, tab: pd.DataFrame): assert len(tab.columns) == 2 @@ -85,25 +39,27 @@ def load_target_universe(self, fn): :param fn: .csv file containing target universe. :return: """ - tab = pd.read_csv(fn) + tab = pd.read_csv(fn, sep="\t", index_col=None) self.__validate_target_universe_table(tab=tab) - self.target_universe = tab["name"].values + self.leaves = tab["name"].values - def map_to_target_leaves( + def write_target_universe( self, - nodes: List[str], - return_type: str = "elements" - ) -> list: + fn, + x: List[str], + ): """ - Map a given list of nodes to leave nodes defined for this ontology. - :param nodes: - :param return_type: - "elements": names of mapped leave nodes - "idx": indices in leave note list of of mapped leave nodes + :param fn: .csv file containing target universe. + :param x: Nodes that make up target universe. :return: """ - return [self.onto_cl.map_to_leaves(x, return_type=return_type) for x in nodes] + tab = pd.DataFrame({ + TARGET_UNIVERSE_KEY_NAME: self.onto_cl.convert_to_name(x), + TARGET_UNIVERSE_KEY_ID: self.onto_cl.convert_to_id(x), + }) + self.__validate_target_universe_table(tab=tab) + tab.to_csv(path_or_buf=fn, sep="\t", index=False) def prepare_celltype_map_fuzzy( self, @@ -253,7 +209,7 @@ def synonym_string_processing(y): # 1. Select cell types that are in the correct ontology. # Check that anatomical constraint is a term in UBERON and get UBERON ID: - anatomical_constraint_id = self.onto_uberon.id_from_name(anatomical_constraint) + anatomical_constraint_id = self.onto_uberon.convert_to_id(anatomical_constraint) # Select up to 5 nodes which match the anatomical constraint: # The entries look as follows: # node.value['relationship'] = ['part_of UBERON:0001885'] From e6585a4f8fbd4e760c9b992958d678bb8f0b4b4d Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 16 Apr 2021 21:27:51 +0200 Subject: [PATCH 104/161] Fix GenomeContainer (#223) * fixed bugs in GenomeContainer and added unit tests for this class * fixed interface bug from genomecontainer to estimatortest --- sfaira/data/base/dataset.py | 8 +- sfaira/estimators/keras.py | 4 +- sfaira/models/__init__.py | 2 +- sfaira/models/base.py | 12 ++ sfaira/models/celltype/marker.py | 8 +- sfaira/models/celltype/mlp.py | 4 +- sfaira/models/embedding/__init__.py | 10 +- sfaira/models/embedding/ae.py | 6 +- sfaira/models/embedding/linear.py | 8 +- sfaira/models/embedding/vae.py | 8 +- sfaira/models/embedding/vaeiaf.py | 8 +- sfaira/models/embedding/vaevamp.py | 8 +- .../unit_tests/estimators/test_estimator.py | 28 ++--- sfaira/unit_tests/versions/test_genomes.py | 40 +++++++ sfaira/versions/genomes.py | 104 ++++++++++++++---- 15 files changed, 184 insertions(+), 74 deletions(-) create mode 100644 sfaira/unit_tests/versions/test_genomes.py diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 3df1a66d9..6fe2475b6 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -569,19 +569,19 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): data_ids_ensg = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values if subset_type is None: subset_ids_ensg = self.genome_container.ensembl - subset_ids_symbol = self.genome_container.names + subset_ids_symbol = self.genome_container.symbols else: if isinstance(subset_type, str): subset_type = [subset_type] - keys = np.unique(self.genome_container.type) + keys = np.unique(self.genome_container.biotype) if subset_type not in keys: raise ValueError(f"subset type {subset_type} not available in list {keys}") subset_ids_ensg = [ - x.upper() for x, y in zip(self.genome_container.ensembl, self.genome_container.type) + x.upper() for x, y in zip(self.genome_container.ensembl, self.genome_container.biotype) if y in subset_type ] subset_ids_symbol = [ - x.upper() for x, y in zip(self.genome_container.names, self.genome_container.type) + x.upper() for x, y in zip(self.genome_container.symbols, self.genome_container.biotype) if y in subset_type ] diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 43b8ca69d..18f23ae96 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -14,7 +14,7 @@ from sfaira.consts import AdataIdsSfaira, OCS from sfaira.data import DistributedStore -from sfaira.models import BasicModel +from sfaira.models import BasicModelKeras from sfaira.versions.metadata import CelltypeUniverse, OntologyCl from sfaira.versions.topologies import TopologyContainer from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss @@ -38,7 +38,7 @@ class EstimatorKeras: Estimator base class for keras models. """ data: Union[anndata.AnnData, DistributedStore] - model: Union[BasicModel, None] + model: Union[BasicModelKeras, None] model_topology: Union[str, None] model_id: Union[str, None] weights: Union[np.ndarray, None] diff --git a/sfaira/models/__init__.py b/sfaira/models/__init__.py index a31eeec41..2a104881b 100644 --- a/sfaira/models/__init__.py +++ b/sfaira/models/__init__.py @@ -1,4 +1,4 @@ -from .base import BasicModel +from .base import BasicModelKeras from .pp_layer import PreprocInput from .made import MaskingDense from sfaira.models import celltype diff --git a/sfaira/models/base.py b/sfaira/models/base.py index cd5a47ed4..73b7c2ad4 100644 --- a/sfaira/models/base.py +++ b/sfaira/models/base.py @@ -1,4 +1,8 @@ import abc +try: + import tensorflow as tf +except ImportError: + tf = None class BasicModel(abc.ABC): @@ -15,3 +19,11 @@ class BasicModel(abc.ABC): @property def version(self): return self._version + + +class BasicModelKeras(BasicModel): + """ + This base class defines model attributes shared across all tf.keras models. + """ + + training_model: tf.keras.Model diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index a1e6c5470..de3d9292b 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -7,7 +7,7 @@ from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput @@ -41,7 +41,7 @@ def call(self, inputs): return tf.nn.sigmoid(x) -class CellTypeMarker(BasicModel): +class CellTypeMarker(BasicModelKeras): """ Marker gene-based cell type classifier: Learns whether or not each gene exceeds requires threshold and learns cell type assignment as linear combination of these marker gene presence probabilities. @@ -120,7 +120,7 @@ def __init__( hyperpar[k] = override_hyperpar[k] super().__init__( in_dim=topology_container.n_var, - out_dim=celltypes_version.ntypes, + out_dim=celltypes_version.onto_cl.n_leaves, **hyperpar ) print('passed hyperpar: \n', hyperpar) @@ -135,6 +135,6 @@ def __init__( ("genome_size", self.genome_size), ("model_class", self.model_class), ("model_type", self.model_type), - ("ntypes", celltypes_version.ntypes), + ("ntypes", celltypes_version.onto_cl.n_leaves), ] ) diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index c54d2624b..1ca9754f6 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -7,11 +7,11 @@ from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput -class CellTypeMlp(BasicModel): +class CellTypeMlp(BasicModelKeras): """ Multi-layer perceptron to predict cell type. diff --git a/sfaira/models/embedding/__init__.py b/sfaira/models/embedding/__init__.py index c07882edd..f206e5be5 100644 --- a/sfaira/models/embedding/__init__.py +++ b/sfaira/models/embedding/__init__.py @@ -1,5 +1,5 @@ -from sfaira.models.embedding.ae import ModelAe, ModelAeVersioned -from sfaira.models.embedding.vae import ModelVae, ModelVaeVersioned -from sfaira.models.embedding.linear import ModelLinear, ModelLinearVersioned -from sfaira.models.embedding.vaeiaf import ModelVaeIAF, ModelVaeIAFVersioned -from sfaira.models.embedding.vaevamp import ModelVaeVamp, ModelVaeVampVersioned +from sfaira.models.embedding.ae import ModelKerasAe, ModelAeVersioned +from sfaira.models.embedding.vae import ModelKerasVae, ModelVaeVersioned +from sfaira.models.embedding.linear import ModelKerasLinear, ModelLinearVersioned +from sfaira.models.embedding.vaeiaf import ModelKerasVaeIAF, ModelVaeIAFVersioned +from sfaira.models.embedding.vaevamp import ModelKerasVaeVamp, ModelVaeVampVersioned diff --git a/sfaira/models/embedding/ae.py b/sfaira/models/embedding/ae.py index af719740f..08ad9a396 100644 --- a/sfaira/models/embedding/ae.py +++ b/sfaira/models/embedding/ae.py @@ -8,7 +8,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput @@ -113,7 +113,7 @@ def call(self, x, **kwargs): return x -class ModelAe(BasicModel): +class ModelKerasAe(BasicModelKeras): """Combines the encoder and decoder into an end-to-end model for training.""" # Note: Original DCA implementation uses l1_l2 regularisation also on last layer (nb) - missing here # Note: Original DCA implementation uses softplus function instead of exponential as dispersion activation @@ -207,7 +207,7 @@ def predict_embedding(self, x, variational=False): return self.encoder_model.predict(x) -class ModelAeVersioned(ModelAe): +class ModelAeVersioned(ModelKerasAe): def __init__( self, topology_container: TopologyContainer, diff --git a/sfaira/models/embedding/linear.py b/sfaira/models/embedding/linear.py index 140b72f9e..3004006be 100644 --- a/sfaira/models/embedding/linear.py +++ b/sfaira/models/embedding/linear.py @@ -8,7 +8,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput @@ -39,7 +39,7 @@ def call(self, inputs, **kwargs): return x -class ModelLinear(BasicModel): +class ModelKerasLinear(BasicModelKeras): def __init__( self, @@ -51,7 +51,7 @@ def __init__( dropout_rate=None, output_layer="nb" ): - super(ModelLinear, self).__init__() + super(ModelKerasLinear, self).__init__() self.in_dim = in_dim self.latent_dim = latent_dim @@ -102,7 +102,7 @@ def predict_embedding(self, x, **kwargs): return self.encoder_model.predict(x) -class ModelLinearVersioned(ModelLinear): +class ModelLinearVersioned(ModelKerasLinear): def __init__( self, topology_container: TopologyContainer, diff --git a/sfaira/models/embedding/vae.py b/sfaira/models/embedding/vae.py index 7e379e567..47fafa498 100644 --- a/sfaira/models/embedding/vae.py +++ b/sfaira/models/embedding/vae.py @@ -8,7 +8,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput @@ -140,7 +140,7 @@ def call(self, inputs, **kwargs): return x -class ModelVae(BasicModel): +class ModelKerasVae(BasicModelKeras): def predict_reconstructed(self, x: np.ndarray): return np.split(self.training_model.predict(x)[0], indices_or_sections=2, axis=1)[0] @@ -157,7 +157,7 @@ def __init__( init='glorot_uniform', output_layer="nb" ): - super(ModelVae, self).__init__() + super(ModelKerasVae, self).__init__() # Check length of latent dim to divide encoder-decoder stack: if len(latent_dim) % 2 == 1: n_layers_enc = len(latent_dim) // 2 + 1 @@ -229,7 +229,7 @@ def predict_embedding(self, x, variational=False): return self.encoder_model.predict(x)[1] -class ModelVaeVersioned(ModelVae): +class ModelVaeVersioned(ModelKerasVae): def __init__( self, topology_container: TopologyContainer, diff --git a/sfaira/models/embedding/vaeiaf.py b/sfaira/models/embedding/vaeiaf.py index 4ee2e5c6f..4db1875a6 100644 --- a/sfaira/models/embedding/vaeiaf.py +++ b/sfaira/models/embedding/vaeiaf.py @@ -8,7 +8,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput from sfaira.models.made import MaskingDense @@ -221,7 +221,7 @@ def call(self, inputs, **kwargs): return x -class ModelVaeIAF(BasicModel): +class ModelKerasVaeIAF(BasicModelKeras): def __init__( self, @@ -237,7 +237,7 @@ def __init__( init='glorot_uniform', output_layer="nb" ): - super(ModelVaeIAF, self).__init__() + super(ModelKerasVaeIAF, self).__init__() # Check length of latent dim to divide encoder-decoder stack: if len(latent_dim) % 2 == 1: n_layers_enc = len(latent_dim) // 2 + 1 @@ -347,7 +347,7 @@ def predict_embedding(self, x, variational=False, return_z0=False): return z_t_mean -class ModelVaeIAFVersioned(ModelVaeIAF): +class ModelVaeIAFVersioned(ModelKerasVaeIAF): def __init__( self, topology_container: TopologyContainer, diff --git a/sfaira/models/embedding/vaevamp.py b/sfaira/models/embedding/vaevamp.py index fd06ce0e8..88062b1fc 100644 --- a/sfaira/models/embedding/vaevamp.py +++ b/sfaira/models/embedding/vaevamp.py @@ -8,7 +8,7 @@ from sfaira.models.embedding.output_layers import NegBinOutput, NegBinSharedDispOutput, NegBinConstDispOutput, \ GaussianOutput, GaussianSharedStdOutput, GaussianConstStdOutput from sfaira.versions.topologies import TopologyContainer -from sfaira.models.base import BasicModel +from sfaira.models.base import BasicModelKeras from sfaira.models.pp_layer import PreprocInput @@ -200,7 +200,7 @@ def call(self, inputs, **kwargs): return (p_z1_mean, p_z1_log_var), (p_z2_mean, p_z2_log_var), out -class ModelVaeVamp(BasicModel): +class ModelKerasVaeVamp(BasicModelKeras): def predict_reconstructed(self, x: np.ndarray): return np.split(self.training_model.predict(x)[0], indices_or_sections=2, axis=1)[0] @@ -218,7 +218,7 @@ def __init__( init='glorot_uniform', output_layer="nb" ): - super(ModelVaeVamp, self).__init__() + super(ModelKerasVaeVamp, self).__init__() config = ( latent_dim, l1_coef, @@ -291,7 +291,7 @@ def predict_embedding(self, x, variational=False): return self.encoder_model.predict(x)[1] -class ModelVaeVampVersioned(ModelVaeVamp): +class ModelVaeVampVersioned(ModelKerasVaeVamp): def __init__( self, topology_container: TopologyContainer, diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index a8637a404..2fe831ec4 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -42,7 +42,7 @@ } -class TestEstimatorBase: +class HelperEstimatorBase: estimator: Union[EstimatorKeras] data: Union[anndata.AnnData] @@ -82,7 +82,7 @@ def init_estimator(self, model_type: str): def basic_estimator_test(self): pass - def test_for_fatal(self, model_type): + def fatal_estimator_test(self, model_type): np.random.seed(1) self.simulate() self.init_estimator(model_type=model_type) @@ -90,7 +90,7 @@ def test_for_fatal(self, model_type): return True -class TestEstimatorKerasEmbedding(TestEstimatorBase): +class HelperEstimatorKerasEmbedding(HelperEstimatorBase): estimator: EstimatorKerasEmbedding @@ -137,7 +137,7 @@ def basic_estimator_test(self): assert np.allclose(prediction_embed, new_prediction_embed, rtol=1e-6, atol=1e-6) -class TestEstimatorKerasCelltype(TestEstimatorBase): +class HelperEstimatorKerasCelltype(HelperEstimatorBase): estimator: EstimatorKerasCelltype @@ -183,28 +183,28 @@ def basic_estimator_test(self): def test_for_fatal_linear(): - test_estim = TestEstimatorKerasEmbedding() - test_estim.test_for_fatal(model_type="linear") + test_estim = HelperEstimatorKerasEmbedding() + test_estim.fatal_estimator_test(model_type="linear") def test_for_fatal_ae(): - test_estim = TestEstimatorKerasEmbedding() - test_estim.test_for_fatal(model_type="ae") + test_estim = HelperEstimatorKerasEmbedding() + test_estim.fatal_estimator_test(model_type="ae") def test_for_fatal_vae(): - test_estim = TestEstimatorKerasEmbedding() - test_estim.test_for_fatal(model_type="vae") + test_estim = HelperEstimatorKerasEmbedding() + test_estim.fatal_estimator_test(model_type="vae") # Test cell type predictor models: def test_for_fatal_mlp(): - test_estim = TestEstimatorKerasCelltype() - test_estim.test_for_fatal(model_type="mlp") + test_estim = HelperEstimatorKerasCelltype() + test_estim.fatal_estimator_test(model_type="mlp") def test_for_fatal_marker(): - test_estim = TestEstimatorKerasCelltype() - test_estim.test_for_fatal(model_type="marker") + test_estim = HelperEstimatorKerasCelltype() + test_estim.fatal_estimator_test(model_type="marker") diff --git a/sfaira/unit_tests/versions/test_genomes.py b/sfaira/unit_tests/versions/test_genomes.py new file mode 100644 index 000000000..512ca2417 --- /dev/null +++ b/sfaira/unit_tests/versions/test_genomes.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest +from typing import Tuple, Union + +from sfaira.versions.genomes import GenomeContainer + +""" +GenomeContainer +""" + + +@pytest.mark.parametrize("organism", ["mouse"]) +@pytest.mark.parametrize("assembly", [None, "Mus_musculus.GRCm38.102"]) +def test_gc_init(organism: Union[str, None], assembly: Union[str, None]): + """ + Tests different modes of initialisation for fatal errors. + """ + gc = GenomeContainer(organism=organism, assembly=assembly) + assert gc.organism == "mus_musculus" + + +@pytest.mark.parametrize("subset", [ + ({"biotype": "protein_coding"}, 21936), + ({"biotype": "lincRNA"}, 5629), + ({"biotype": "protein_coding,lincRNA"}, 21936 + 5629), + ({"symbols": "Gnai3,Pbsn,Cdc45"}, 3), + ({"ensg": "ENSMUSG00000000003,ENSMUSG00000000028"}, 2) +]) +def test_gc_subsetting(subset: Tuple[dict, int]): + """ + Tests if genome container is subsetted correctly. + """ + gc = GenomeContainer(organism=None, assembly="Mus_musculus.GRCm38.102") + gc.subset(**subset[0]) + assert gc.n_var == subset[1] + assert len(gc.ensembl) == subset[1] + assert len(gc.symbols) == subset[1] + assert len(gc.biotype) == subset[1] + if list(subset[0].keys())[0] == "protein_coding": + assert np.all(gc.biotype == "protein_coding") diff --git a/sfaira/versions/genomes.py b/sfaira/versions/genomes.py index 4583763ab..cb575e68f 100644 --- a/sfaira/versions/genomes.py +++ b/sfaira/versions/genomes.py @@ -3,10 +3,12 @@ """ import gzip +import numpy as np import os -from typing import Union +from typing import List, Union import pandas import pathlib +import urllib.error import urllib.request KEY_SYMBOL = "gene_name" @@ -44,7 +46,7 @@ def release(self) -> str: return self.assembly.split(".")[-1] @property - def organism(self): + def organism(self) -> str: return self.assembly.split(".")[0].lower() @property @@ -57,7 +59,10 @@ def download_gtf_ensembl(self): """ temp_file = os.path.join(self.cache_dir, self.assembly + ".gtf.gz") print(f"downloading {self.url_ensembl_ftp} into a temporary file {temp_file}") - _ = urllib.request.urlretrieve(url=self.url_ensembl_ftp, filename=temp_file) + try: + _ = urllib.request.urlretrieve(url=self.url_ensembl_ftp, filename=temp_file) + except urllib.error.URLError as e: + raise ValueError(f"Could not download gtf from {self.url_ensembl_ftp} with urllib.error.URLError: {e}") with gzip.open(temp_file) as f: tab = pandas.read_csv(f, sep="\t", comment="#", header=None) os.remove(temp_file) # Delete temporary file .gtf.gz. @@ -94,47 +99,85 @@ def __init__( ): if assembly is None: # Set defaults based on organism if assembly is not given. - if self.organism == "human": + if organism is None: + raise ValueError("Supply either organism or assembly to GenomeContainer().") + if organism == "human": self.assembly = "Homo_sapiens.GRCh38.102" - elif self.organism == "mouse": + elif organism == "mouse": self.assembly = "Mus_musculus.GRCm38.102" else: raise ValueError(f"organism {organism} not found") else: self.assembly = assembly - self.gc = GtfInterface(assembly=self.assembly) + self.gtfi = GtfInterface(assembly=self.assembly) self.load_genome() @property def organism(self): - return self.gc.organism + return self.gtfi.organism def load_genome(self): - self.genome_tab = self.gc.cache + self.genome_tab = self.gtfi.cache def subset( self, - biotype: Union[None, str] = None, - symbols: Union[None, str] = None, - ensg: Union[None, str] = None, + biotype: Union[None, str, List[str]] = None, + symbols: Union[None, str, List[str]] = None, + ensg: Union[None, str, List[str]] = None, ): """ Subset by gene biotype or to gene list defined by identifiers (symbol or ensemble ID). - :param biotype: - :param symbols: - :param ensg: - :return: + Will subset by multiple factors if more than one parameter is not None. + + :param biotype: Gene biotype(s) of gene(s) to subset genome to. Elements have to appear in genome. + Separate in string via "," if choosing multiple or supply as list of string. + :param symbols: Gene symbol(s) of gene(s) to subset genome to. Elements have to appear in genome. + Separate in string via "," if choosing multiple or supply as list of string. + :param ensg: Ensemble gene ID(s) of gene(s) to subset genome to. Elements have to appear in genome. + Separate in string via "," if choosing multiple or supply as list of string. """ - if biotype is None: - self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_TYPE].values == biotype, :].copy() - if symbols is None: - self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_SYMBOL].values == symbols, :].copy() - if ensg is None: - self.genome_tab = self.genome_tab.loc[self.genome_tab[KEY_ID].values == ensg, :].copy() + subset = np.ones((self.n_var,), "int") == 1 + if biotype is not None: + if isinstance(biotype, list): + pass + elif isinstance(biotype, str): + biotype = biotype.split(",") + else: + raise ValueError(f"Supply biotype as string, see also function annotation. Supplied {biotype}.") + self.__validate_types(x=biotype) + subset = np.logical_and( + subset, + [x in biotype for x in self.genome_tab[KEY_TYPE].values] + ) + if symbols is not None: + if isinstance(symbols, list): + pass + elif isinstance(symbols, str): + symbols = symbols.split(",") + else: + raise ValueError(f"Supply symbols as string, see also function annotation. Supplied {symbols}.") + self.__validate_symbols(x=symbols) + subset = np.logical_and( + subset, + [x in symbols for x in self.genome_tab[KEY_SYMBOL].values] + ) + if ensg is not None: + if isinstance(ensg, list): + pass + elif isinstance(ensg, str): + ensg = ensg.split(",") + else: + raise ValueError(f"Supply ensg as string, see also function annotation. Supplied {ensg}.") + self.__validate_ensembl(x=ensg) + subset = np.logical_and( + subset, + [x in ensg for x in self.genome_tab[KEY_ID].values] + ) + self.genome_tab = self.genome_tab.loc[subset, :].copy() @property - def names(self): + def symbols(self): return self.genome_tab[KEY_SYMBOL].values.tolist() @property @@ -142,9 +185,24 @@ def ensembl(self): return self.genome_tab[KEY_ID].values.tolist() @property - def type(self): + def biotype(self): return self.genome_tab[KEY_TYPE].values.tolist() + def __validate_ensembl(self, x: List[str]): + not_found = [y for y in x if y not in self.ensembl] + if len(not_found) > 0: + raise ValueError(f"Could not find ensembl: {not_found}") + + def __validate_symbols(self, x: List[str]): + not_found = [y for y in x if y not in self.symbols] + if len(not_found) > 0: + raise ValueError(f"Could not find names: {not_found}") + + def __validate_types(self, x: List[str]): + not_found = [y for y in x if y not in self.biotype] + if len(not_found) > 0: + raise ValueError(f"Could not find type: {not_found}") + @property def n_var(self) -> int: return self.genome_tab.shape[0] From 967986130bf80d202f5bceb607ac6340a13a2641 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 19 Apr 2021 12:40:05 +0200 Subject: [PATCH 105/161] fixed EFO 10X labels and moved EBI ontology interface over to networkx, allowing relational reasoning (#224) --- sfaira/data/base/dataset.py | 2 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 2 +- ...pithelium_2019_10xsequencing_smilie_001.py | 2 +- ...man_ileum_2019_10xsequencing_martin_001.py | 2 +- ...stategland_2018_10xsequencing_henry_001.py | 2 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 2 +- ...uman_lung_2020_10xsequencing_miller_001.py | 2 +- ...human_testis_2018_10xsequencing_guo_001.py | 2 +- ...liver_2018_10xsequencing_macparland_001.py | 2 +- .../human_x_2019_10xsequencing_szabo_001.py | 2 +- ...man_retina_2019_10xsequencing_menon_001.py | 2 +- .../human_placenta_2018_x_ventotormo_001.py | 2 +- ...ver_2019_10xsequencing_ramachandran_001.py | 2 +- ...an_liver_2019_10xsequencing_popescu_001.py | 2 +- ...rain_2019_10x3v2sequencing_kanton_001.yaml | 2 +- .../human_lung_2020_x_travaglini_001.yaml | 2 +- ...uman_colon_2020_10xsequencing_james_001.py | 2 +- .../human_x_2019_10xsequencing_braga_x.py | 2 +- .../mouse_x_2019_10xsequencing_hove_001.py | 2 +- ...uman_kidney_2020_10xsequencing_liao_001.py | 2 +- ...man_retina_2019_10xsequencing_voigt_001.py | 2 +- .../human_x_2019_10xsequencing_wang_001.py | 2 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 2 +- .../human_blood_2020_10x_hao_001.yaml | 2 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 2 +- ...nchyma_2020_10xsequencing_habermann_001.py | 2 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 2 +- ...uman_thymus_2020_10xsequencing_park_001.py | 2 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 2 +- ..._retina_2019_10xsequencing_lukowski_001.py | 2 +- ...lood_2019_10xsequencing_10xgenomics_001.py | 2 +- .../human_x_2018_10xsequencing_regev_001.py | 2 +- sfaira/unit_tests/versions/test_ontologies.py | 39 ++- sfaira/versions/metadata/base.py | 297 +++++++++--------- 34 files changed, 218 insertions(+), 182 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 6fe2475b6..4d6deb21b 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -2191,7 +2191,7 @@ def _value_protection( if isinstance(allowed, OntologyHierarchical) and x in allowed.node_ids: attempted_clean.append(allowed.convert_to_name(x)) else: - raise ValueError(f"'{x}' is not a valid entry for {attr} in {self.id}.") + raise ValueError(f"'{x}' is not a valid entry for {attr}.") else: raise ValueError(f"allowed of type {type(allowed)} is not a valid entry for {attr}.") # Flatten attempts if only one was made: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index a3276d4c8..4cf0acbcf 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -19,7 +19,7 @@ dataset_wise: primary_data: year: 2019 dataset_or_observation_wise: - assay_sc: "10X sequencing" + assay_sc: "10x sequencing" assay_sc_obs_key: assay_differentiation: assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index d865d427f..de9a1b696 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Smilie" self.disease = "healthy" self.doi = "10.1016/j.cell.2019.06.029" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index 2e81a828d..00b8f66c3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Martin" self.disease = "healthy" self.doi = "10.1016/j.cell.2019.08.008" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index fbd5b242a..9acd6481e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -18,7 +18,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Henry" self.disease = "healthy" self.doi = "10.1016/j.celrep.2018.11.086" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 94b53b1f7..35dd76d5b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -33,7 +33,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.state_exact = "diabetic" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index e3ff49bef..aebbfd373 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Miller" self.disease = "healthy" self.doi = "10.1016/j.devcel.2020.01.033" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 586c78391..3aa52ea6d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Guo" self.disease = "healthy" self.doi = "10.1038/s41422-018-0099-2" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index 6d394eef6..dda02bcc0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "private,GSE115469.csv.gz" self.download_url_meta = "private,GSE115469_labels.txt" - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "MacParland" self.disease = "healthy" self.doi = "10.1038/s41467-018-06318-7" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index 7fced312b..6a60fd6bd 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -58,7 +58,7 @@ def __init__(self, **kwargs): "private,donor2.annotation.txt" ] - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Szabo" self.doi = "10.1038/s41467-019-12464-3" self.individual = SAMPLE_DICT[self.sample_fn][1] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index 6d912ca2d..db310b697 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -11,7 +11,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Menon" self.disease = "healthy" self.doi = "10.1038/s41467-019-12780-8" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index be7fb5d14..e9ee21afe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -19,7 +19,7 @@ def __init__(self, **kwargs): self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.2.zip" - self.assay_sc = "10X sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" + self.assay_sc = "10x sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" self.author = "Ventotormo" self.disease = "healthy" self.doi = "10.1038/s41586-018-0698-6" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 18561b807..9daa11115 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -14,7 +14,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Ramachandran" self.doi = "10.1038/s41586-019-1631-3" self.normalization = "raw" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index 735b25a32..51d3cd39d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -11,7 +11,7 @@ def __init__(self, **kwargs): self.download_url_data = "private,fetal_liver_alladata_.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Popescu" self.disease = "healthy" self.doi = "10.1038/s41586-019-1652-y" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index 06a779d14..ef013c9bb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -14,7 +14,7 @@ dataset_wise: normalization: "raw" year: 2019 dataset_or_observation_wise: - assay_sc: "10X 3' v2 sequencing" + assay_sc: "10x 3' v2 sequencing" assay_sc_obs_key: assay_differentiation: "Lancaster, 2014 (doi: 10.1038/nprot.2014.158)" assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index c34567f29..7a3c09343 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -18,7 +18,7 @@ dataset_wise: year: 2020 dataset_or_observation_wise: assay_sc: - droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10X sequencing" + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10x sequencing" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "Smart-seq2" assay_sc_obs_key: assay_differentiation: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 8b1828f6a..8259699b9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "James" self.disease = "healthy" self.doi = "10.1038/s41590-020-0602-z" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index d3fd7db16..e37f5c9d0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Braga" self.disease = "healthy" self.doi = "10.1038/s41591-019-0468-5" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index 485a13c88..592dda3da 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -16,7 +16,7 @@ def __init__(self, **kwargs): self.download_url_meta = \ "https://www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Hove" self.disease = "healthy" self.doi = "10.1038/s41593-019-0393-4" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index 372e0e2b7..3deb38f2c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Liao" self.disease = "healthy" self.normalization = "raw" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index e6dce3ace..3c5b42096 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Voigt" self.disease = "healthy" self.doi = "10.1073/pnas.1914143116" diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 7adab9a37..da2c79b58 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -21,7 +21,7 @@ def __init__(self, **kwargs): organ = self.sample_fn.split("_")[1].split(".")[0] - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Wang" self.disease = "healthy" self.doi = "10.1084/jem.20191130" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 2fd7f759e..208fcc876 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -18,7 +18,7 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Lukassen" self.disease = "healthy" self.doi = "10.1101/2020.03.13.991455" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml index 7fe522a91..77f5fd1ee 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml @@ -12,7 +12,7 @@ dataset_wise: primary_data: year: 2020 dataset_or_observation_wise: - assay_sc: "10X sequencing" + assay_sc: "10x sequencing" assay_sc_obs_key: assay_differentiation: assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 9d33ec9a7..92dd5eced 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -87,7 +87,7 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organism = "mouse" self.organ = organ - self.assay_sc = "10X sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" + self.assay_sc = "10x sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index ed4ed0511..9cc9a77b8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung parenchyma" self.organism = "human" - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.year = 2020 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index 97b9527f0..b4b101c26 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): ] self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Stewart" self.disease = "healthy" self.doi = "10.1126/science.aat5031" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index d1a625acd..ad29f080f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Park" self.disease = "healthy" self.doi = "10.1126/science.aay3224" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index e911a971b..6996c7900 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -33,7 +33,7 @@ def __init__(self, **kwargs): self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Madissoon" self.disease = "healthy" self.doi = "10.1186/s13059-019-1906-x" diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index b905467fe..a75aa2f59 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -14,7 +14,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Lukowski" self.disease = "healthy" self.doi = "10.15252/embj.2018100811" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index 79765a904..53c515cdb 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "10x Genomics" self.disease = "healthy" self.doi = "no_doi_10x_genomics" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index b2be24f84..96a8d1119 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" self.download_url_meta = None - self.assay_sc = "10X sequencing" + self.assay_sc = "10x sequencing" self.author = "Regev" self.disease = "healthy" self.doi = "no_doi_regev" diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index cf13f5e31..c4e15599f 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -1,5 +1,6 @@ import numpy as np -from sfaira.versions.metadata import OntologyUberon, OntologyCl, OntologyMondo, OntologyMmusdv, OntologyHsapdv +from sfaira.versions.metadata import OntologyUberon, OntologyCl, OntologyMondo, OntologyMmusdv, OntologyHsapdv, \ + OntologySinglecellLibraryConstruction """ OntologyCelltypes @@ -106,6 +107,42 @@ def test_mmusdv_loading(): _ = OntologyMmusdv() +""" +OntologySinglecellLibraryConstruction +""" + + +def test_sclc_loading(): + """ + Tests if ontology can be initialised. + """ + _ = OntologySinglecellLibraryConstruction() + + +def test_sclc_nodes(): + """ + Tests for presence and absence of a few commonly mistaken nodes. + """ + sclc = OntologySinglecellLibraryConstruction() + assert "10x sequencing" in sclc.node_names + assert "10x 5' v3 sequencing" in sclc.node_names + assert "Smart-like" in sclc.node_names + assert "Smart-seq2" in sclc.node_names + assert "single cell library construction" in sclc.node_names + + +def test_sclc_is_a(): + """ + Tests if is-a relationships work correctly. + """ + sclc = OntologySinglecellLibraryConstruction() + assert sclc.is_a(query="10x v1 sequencing", reference="10x sequencing") + assert sclc.is_a(query="10x 5' v3 sequencing", reference="10x sequencing") + assert sclc.is_a(query="10x 5' v3 sequencing", reference="10x v3 sequencing") + assert not sclc.is_a(query="10x sequencing", reference="10x v1 sequencing") + assert sclc.is_a(query="10x 5' v3 sequencing", reference="single cell library construction") + + """ UBERON """ diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 849d22c77..90d59229f 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -108,153 +108,12 @@ def is_a(self, query: str, reference: str) -> bool: return query == reference -class OntologyHierarchical(Ontology): +class OntologyHierarchical(Ontology, abc.ABC): """ Basic ordered ontology container """ - nodes: dict - - @abc.abstractmethod - def convert_to_id(self, x: str) -> str: - pass - - @abc.abstractmethod - def convert_to_name(self, x: str) -> str: - pass - - @property - def node_names(self) -> List[str]: - pass - - @property - def node_ids(self) -> List[str]: - pass - - -class OntologyEbi(OntologyHierarchical): - """ - Recursively assembles ontology by querying EBI web interface. - - Not recommended for large ontologies. - Yields unstructured list of terms. - """ - - def __init__( - self, - ontology: str, - root_term: str, - additional_terms: Union[Dict[str, Dict[str, str]], None] = None, - **kwargs - ): - """ - - :param ontology: - :param root_term: - :param additional_terms: Dictionary with additional terms, values should be - - - "name" necessary - - "description" optional - - "synonyms" optional - - "has_children" optional - :param kwargs: - """ - def get_url(iri): - return f"https://www.ebi.ac.uk/ols/api/ontologies/{ontology}/terms/" \ - f"http%253A%252F%252Fwww.ebi.ac.uk%252F{ontology}%252F{iri}/children" - - def recursive_search(iri): - terms = requests.get(get_url(iri=iri)).json()["_embedded"]["terms"] - nodes_new = {} - for x in terms: - k = x["iri"].split("/")[-1] - k = ":".join(k.split("_")) - nodes_new[k] = { - "name": x["label"], - "description": x["description"], - "synonyms": x["synonyms"], - "has_children": x["has_children"], - } - if x["has_children"]: - nodes_new.update(recursive_search(iri=x["iri"].split("/")[-1])) - return nodes_new - - self.nodes = recursive_search(iri=root_term) - self.nodes.update(additional_terms) - - @property - def node_names(self) -> List[str]: - return [v["name"] for k, v in self.nodes.items()] - - @property - def node_ids(self) -> List[str]: - return list(self.nodes.keys()) - - def convert_to_id(self, x: str) -> str: - self.validate_node(x=x) - return [k for k, v in self.nodes.items() if v["name"] == x][0] - - def convert_to_name(self, x: str) -> str: - assert x in self.nodes.keys(), f"node {x} not found" - return self.nodes[x]["name"] - - def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): - """ - Map free text node name to ontology node names via fuzzy string matching. - - :param x: Free text node label which is to be matched to ontology nodes. - :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. - :return List of proposed matches in ontology. - """ - from fuzzywuzzy import fuzz - scores = np.array([ - np.max( - [ - fuzz.partial_ratio(x.lower(), v["name"].lower()) - ] + [ - fuzz.partial_ratio(x.lower(), yyy.lower()) - for yy in self.synonym_node_properties if yy in v.keys() for yyy in v[yy] - ] - ) if include_synonyms else - np.max([ - fuzz.partial_ratio(x.lower(), v["name"].lower()) - ]) - for k, v in self.nodes.items() - ]) - # Suggest top n_suggest hits by string match: - return [self.node_names[i] for i in np.argsort(scores)[-n_suggest:]][::-1] - - def synonym_node_properties(self) -> List[str]: - return ["synonyms"] - -# class OntologyOwl(OntologyHierarchical): -# -# onto: owlready2.Ontology -# -# def __init__( -# self, -# owl: str, -# **kwargs -# ): -# self.onto = owlready2.get_ontology(owl) -# self.onto.load() -# # ToDo build support here -# -# @property -# def node_names(self): -# pass - - -class OntologyObo(OntologyHierarchical): - graph: networkx.MultiDiGraph - def __init__( - self, - obo: str, - **kwargs - ): - self.graph = obonet.read_obo(obo) - def _check_graph(self): if not networkx.is_directed_acyclic_graph(self.graph): warnings.warn("DAG was broken") @@ -431,6 +290,150 @@ def map_to_leaves( def synonym_node_properties(self) -> List[str]: pass + +class OntologyEbi(OntologyHierarchical): + """ + Recursively assembles ontology by querying EBI web interface. + + Not recommended for large ontologies because of the iterative query of the web API. + """ + + def __init__( + self, + ontology: str, + root_term: str, + **kwargs + ): + def get_url_self(iri): + return f"https://www.ebi.ac.uk/ols/api/ontologies/{ontology}/terms/" \ + f"http%253A%252F%252Fwww.ebi.ac.uk%252F{ontology}%252F{iri}" + + def get_url_children(iri): + return f"https://www.ebi.ac.uk/ols/api/ontologies/{ontology}/terms/" \ + f"http%253A%252F%252Fwww.ebi.ac.uk%252F{ontology}%252F{iri}/children" + + def get_iri_from_node(x): + return x["iri"].split("/")[-1] + + def get_id_from_iri(x): + x = ":".join(x.split("_")) + return x + + def get_id_from_node(x): + x = get_iri_from_node(x) + x = get_id_from_iri(x) + return x + + def recursive_search(iri): + """ + This function queries all nodes that are children of a given node at one time. This is faster than querying + the characteristics of each node separately but leads to slightly awkward code, the root node has to be + queried separately for example below. + + :param iri: Root node IRI. + :return: Tuple of + + - nodes (dictionaries of node ID and node values) and + - edges (node ID of parent and child). + """ + terms_children = requests.get(get_url_children(iri=iri)).json()["_embedded"]["terms"] + nodes_new = {} + edges_new = [] + direct_children = [] + k_self = get_id_from_iri(iri) + # Define root node if this is the first iteration, this node is otherwise not defined through values. + if k_self == "EFO:0010183": + terms_self = requests.get(get_url_self(iri=iri)).json() + nodes_new[k_self] = { + "name": terms_self["label"], + "description": terms_self["description"], + "synonyms": terms_self["synonyms"], + "has_children": terms_self["has_children"], + } + for c in terms_children: + k_c = get_id_from_node(c) + nodes_new[k_c] = { + "name": c["label"], + "description": c["description"], + "synonyms": c["synonyms"], + "has_children": c["has_children"], + } + direct_children.append(k_c) + if c["has_children"]: + nodes_x, edges_x = recursive_search(iri=get_iri_from_node(c)) + nodes_new.update(nodes_x) + # Update nested edges of between children: + edges_new.extend(edges_x) + # Update edges to children: + edges_new.extend([(k_self, k_c) for k_c in direct_children]) + return nodes_new, edges_new + + self.graph = networkx.MultiDiGraph() + nodes, edges = recursive_search(iri=root_term) + for k, v in nodes.items(): + self.graph.add_node(node_for_adding=k, **v) + for x in edges: + parent, child = x + self.graph.add_edge(child, parent) + + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): + """ + Map free text node name to ontology node names via fuzzy string matching. + + :param x: Free text node label which is to be matched to ontology nodes. + :param include_synonyms: Whether to search for meaches in synonyms field of node instances, too. + :return List of proposed matches in ontology. + """ + from fuzzywuzzy import fuzz + scores = np.array([ + np.max( + [ + fuzz.partial_ratio(x.lower(), v["name"].lower()) + ] + [ + fuzz.partial_ratio(x.lower(), yyy.lower()) + for yy in self.synonym_node_properties if yy in v.keys() for yyy in v[yy] + ] + ) if include_synonyms else + np.max([ + fuzz.partial_ratio(x.lower(), v["name"].lower()) + ]) + for k, v in self.graph.nodes.items() + ]) + # Suggest top n_suggest hits by string match: + return [self.node_names[i] for i in np.argsort(scores)[-n_suggest:]][::-1] + + @property + def synonym_node_properties(self) -> List[str]: + return ["synonyms"] + + +# class OntologyOwl(OntologyHierarchical): +# +# onto: owlready2.Ontology +# +# def __init__( +# self, +# owl: str, +# **kwargs +# ): +# self.onto = owlready2.get_ontology(owl) +# self.onto.load() +# # ToDo build support here +# +# @property +# def node_names(self): +# pass + + +class OntologyObo(OntologyHierarchical, abc.ABC): + + def __init__( + self, + obo: str, + **kwargs + ): + self.graph = obonet.read_obo(obo) + def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): """ Map free text node name to ontology node names via fuzzy string matching. @@ -857,14 +860,10 @@ def synonym_node_properties(self) -> List[str]: class OntologySinglecellLibraryConstruction(OntologyEbi): - def __init__( - self, - ontology: str = "efo", - root_term: str = "EFO_0010183", - ): + def __init__(self): super().__init__( - ontology=ontology, - root_term=root_term, + ontology="efo", + root_term="EFO_0010183", additional_terms={ "microwell-seq": {"name": "microwell-seq"}, "sci-plex": {"name": "sci-plex"} From ef8eece14089a1b4a75f110b0a5e96651888ff05 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 19 Apr 2021 18:33:42 +0200 Subject: [PATCH 106/161] Generator restructuring (#225) * restructure generators in keras estimators to yield more useful templates --- sfaira/estimators/keras.py | 403 +++++++++++++++++++------------------ 1 file changed, 203 insertions(+), 200 deletions(-) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 18f23ae96..373d20ff8 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -39,7 +39,7 @@ class EstimatorKeras: """ data: Union[anndata.AnnData, DistributedStore] model: Union[BasicModelKeras, None] - model_topology: Union[str, None] + topology_container: Union[TopologyContainer, None] model_id: Union[str, None] weights: Union[np.ndarray, None] model_dir: Union[str, None] @@ -535,6 +535,58 @@ def _get_output_dim(n_features, model_type, mode='train'): return output_types, output_shapes + def _get_base_generator( + self, + generator_helper, + idx: Union[np.ndarray, None], + ): + """ + Yield a basic generator based on which a tf dataset can be built. + + The signature of this generator can be modified through generator_helper. + + :param generator_helper: Python function that should take (x_sample,) as an input: + + - x_sample is a gene expression vector of a cell + :param idx: Indicies of data set to include in generator. + :return: + """ + if idx is None: + idx = np.arange(0, self.data.n_obs) + + # Prepare data reading according to whether anndata is backed or not: + if self.using_store: + generator_raw = self.data.generator( + batch_size=1, + obs_keys=[], + continuous_batches=True, + ) + + def generator(): + counter = -1 + for z in generator_raw: + counter += 1 + if counter in idx: + x_sample = z[0].toarray().flatten() + yield generator_helper(x_sample=x_sample) + + n_features = self.data.n_vars + n_samples = self.data.n_obs + else: + x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + + def generator(): + is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) + indices = idx if self.data.isbacked else range(x.shape[0]) + for i in indices: + x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() + yield generator_helper(x_sample=x_sample) + + n_features = x.shape[1] + n_samples = x.shape[0] + + return generator, n_samples, n_features + def _get_dataset( self, idx: Union[np.ndarray, None], @@ -556,9 +608,6 @@ def _get_dataset( # Determine model type [ae, vae(iaf, vamp)] model_type = "vae" if self.model_type[:3] == "vae" else "ae" - if idx is None: - idx = np.arange(0, self.data.n_obs) - if mode in ['train', 'train_val', 'eval', 'predict']: def generator_helper(x_sample): sf_sample = prepare_sf(x=x_sample)[0] @@ -569,39 +618,11 @@ def generator_helper(x_sample): else: return (x_sample, sf_sample), x_sample - # Prepare data reading according to whether anndata is backed or not: - if self.using_store: - generator_raw = self.data.generator( - batch_size=1, - obs_keys=[], - continuous_batches=True, - ) - - def generator(): - counter = -1 - for z in generator_raw: - counter += 1 - if counter in idx: - x_sample = z[0].toarray().flatten() - yield generator_helper(x_sample=x_sample) - - n_features = self.data.n_vars - n_samples = self.data.n_obs - else: - x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) - - def generator(): - is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) - indices = idx if self.data.isbacked else range(x.shape[0]) - for i in indices: - x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() - yield generator_helper(x_sample=x_sample) - - n_features = x.shape[1] - n_samples = x.shape[0] - - output_types, output_shapes = self._get_output_dim(n_features, model_type, mode=mode) - + generator, n_samples, n_features = self._get_base_generator( + generator_helper=generator_helper, + idx=idx, + ) + output_types, output_shapes = self._get_output_dim(n_features=n_features, model_type=model_type, mode=mode) dataset = tf.data.Dataset.from_generator( generator=generator, output_types=output_types, @@ -715,10 +736,12 @@ def _metrics(self): return {"neg_ll": [custom_mse, custom_negll]} - def evaluate_any(self, idx, batch_size=64, max_steps=20): + def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): """ Evaluate the custom model on any local data. + Defaults to run on full data if idx is None. + :param idx: Indices of observations to evaluate on. Evaluates on all observations if None. :param batch_size: Batch size for evaluation. :param max_steps: Maximum steps before evaluation round is considered complete. @@ -732,35 +755,22 @@ def evaluate_any(self, idx, batch_size=64, max_steps=20): mode='eval' ) steps = min(max(len(idx) // batch_size, 1), max_steps) - results = self.model.training_model.evaluate( - x=dataset, steps=steps - ) + results = self.model.training_model.evaluate(x=dataset, steps=steps) return dict(zip(self.model.training_model.metrics_names, results)) else: return {} - def evaluate(self, batch_size=64, max_steps=20): + def evaluate(self, batch_size: int = 1, max_steps: int = np.inf): """ - Evaluate the custom model on local data. + Evaluate the custom model on test data. Defaults to run on full data if idx_test was not set before, ie. train() has not been called before. + :param batch_size: Batch size for evaluation. + :param max_steps: Maximum steps before evaluation round is considered complete. :return: Dictionary of metric names and values. """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - idx = np.arange(0, self.data.n_obs) if self.idx_test is None else self.idx_test - dataset = self._get_dataset( - idx=idx, - batch_size=batch_size, - mode='eval' - ) - steps = min(max(len(idx) // batch_size, 1), max_steps) - results = self.model.training_model.evaluate( - x=dataset, steps=steps - ) - return dict(zip(self.model.training_model.metrics_names, results)) - else: - return {} + return self.evaluate_any(idx=self.idx_test, batch_size=batch_size, max_steps=max_steps) def predict(self): """ @@ -770,12 +780,12 @@ def predict(self): prediction """ if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x = self._get_dataset( + dataset = self._get_dataset( idx=self.idx_test, batch_size=64, mode='predict' ) - return self.model.predict_reconstructed(x=x) + return self.model.predict_reconstructed(x=dataset) else: return np.array([]) @@ -787,15 +797,12 @@ def predict_embedding(self): latent space """ if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x = self._get_dataset( + dataset = self._get_dataset( idx=self.idx_test, batch_size=64, mode='predict' ) - return self.model.predict_embedding( - x=x, - variational=False - ) + return self.model.predict_embedding(x=dataset, variational=False) else: return np.array([]) @@ -807,15 +814,12 @@ def predict_embedding_variational(self): sample of latent space, mean of latent space, variance of latent space """ if self.idx_test is None or self.idx_test: # true if the array is not empty or if the passed value is None - x = self._get_dataset( + dataset = self._get_dataset( idx=self.idx_test, batch_size=64, mode='predict' ) - return self.model.predict_embedding( - x=x, - variational=True - ) + return self.model.predict_embedding(x=dataset, variational=True) else: return np.array([]) @@ -1015,30 +1019,47 @@ def _get_celltype_out( ).flatten() return weights, y - def _get_dataset( + @staticmethod + def _get_output_dim(n_features, n_labels, mode): + if mode == 'predict': + output_types = (tf.float32,) + output_shapes = (tf.TensorShape([n_features]),) + else: + output_types = (tf.float32, tf.float32, tf.float32) + output_shapes = ( + (tf.TensorShape([n_features])), + tf.TensorShape([n_labels]), + tf.TensorShape([]) + ) + + return output_types, output_shapes + + def _get_base_generator( self, + generator_helper, idx: Union[np.ndarray, None], - batch_size: Union[int, None], - mode: str, - shuffle_buffer_size: int = int(1e7), - prefetch: int = 10, - weighted: bool = True, + weighted: bool = False, ): """ + Yield a basic generator based on which a tf dataset can be built. - :param idx: - :param batch_size: - :param mode: - :param shuffle_buffer_size: - :param weighted: Whether to use weights. + The signature of this generator can be modified through generator_helper. + + :param generator_helper: Python function that should take (x_sample, y_sample, w_sample) as an input: + + - x_sample is a gene expression vector of a cell + - y_sample is a one-hot encoded label vector of a cell + - w_sample is a weight scalar of a cell + :param idx: Indicies of data set to include in generator. :return: """ + if idx is None: + idx = np.arange(0, self.data.n_obs) + + # Prepare data reading according to whether anndata is backed or not: if self.using_store: if weighted: raise ValueError("using weights with store is not supported yet") - n_obs = self.data.n_obs - n_features = self.data.n_vars - n_labels = self.data.celltypes_universe.onto_cl.n_leaves generator_raw = self.data.generator( batch_size=1, obs_keys=["cell_ontology_class"], @@ -1052,99 +1073,79 @@ def generator(): counter += 1 if counter in idx: x_sample = z[0].toarray().flatten() - y = onehot_encoder(z[0]["cell_ontology_class"].values[0]) - yield x_sample, y, 1. + y_sample = onehot_encoder(z[0]["cell_ontology_class"].values[0]) + yield generator_helper(x_sample, y_sample, 1.) - dataset = tf.data.Dataset.from_generator( - generator=generator, - output_types=(tf.float32, tf.float32, tf.float32), - output_shapes=( - (tf.TensorShape([n_features])), - tf.TensorShape([n_labels]), - tf.TensorShape([]) - ) - ) - if mode == 'train' or mode == 'train_val': - dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(n_obs, shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True - ) - dataset = dataset.batch(batch_size).prefetch(prefetch) - - return dataset + n_features = self.data.n_vars + n_samples = self.data.n_obs + n_labels = self.data.celltypes_universe.onto_cl.n_leaves else: - if mode != 'predict': - weights, y = self._get_celltype_out(idx=idx) - if not weighted: - weights = np.ones_like(weights) - if mode == 'train' or mode == 'train_val': - if isinstance(self.data, anndata.AnnData) and self.data.isbacked: - n_features = self.data.X.shape[1] - n_labels = y.shape[1] - - def generator(): - sparse = isinstance(self.data.X[0, :], scipy.sparse.spmatrix) - for i, ii in enumerate(idx): - x = self.data.X[ii, :].toarray().flatten() if sparse else self.data.X[ii, :].flatten() - yield x, y[i, :], weights[i] - else: - x = self._prepare_data_matrix(idx=idx) - n_features = x.shape[1] - n_labels = y.shape[1] - - def generator(): - for i, ii in enumerate(idx): - yield x[i, :].toarray().flatten(), y[i, :], weights[i] - - dataset = tf.data.Dataset.from_generator( - generator=generator, - output_types=(tf.float32, tf.float32, tf.float32), - output_shapes=( - (tf.TensorShape([n_features])), - tf.TensorShape([n_labels]), - tf.TensorShape([]) - ) - ) - if mode == 'train': - dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(x.shape[0], shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True - ).batch(batch_size).prefetch(prefetch) - - return dataset - - elif mode == 'eval': - # Prepare data reading according to whether anndata is backed or not: - if isinstance(self.data, anndata.AnnData) and self.data.isbacked: - # Need to supply sorted indices to backed anndata: - x = self.data.X[np.sort(idx), :] - # Sort back in original order of indices. - x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] - else: - x = self._prepare_data_matrix(idx=idx) - x = x.toarray() - - return x, y, weights - - elif mode == 'predict': - # Prepare data reading according to whether anndata is backed or not: - if self.data.isbacked: - # Need to supply sorted indices to backed anndata: - x = self.data.X[np.sort(idx), :] - # Sort back in original order of indices. - x = x[[np.where(np.sort(idx) == i)[0][0] for i in idx], :] - else: - x = self._prepare_data_matrix(idx=idx) - x = x.toarray() + weights, y = self._get_celltype_out(idx=idx) + if not weighted: + weights = np.ones_like(weights) + x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + + def generator(): + is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) + indices = idx if self.data.isbacked else range(x.shape[0]) + for i in indices: + x_sample = np.asarray(x[i, :].todense()).flatten() if is_sparse else x[i, :].flatten() + y_sample = y[i, :] + w_sample = weights[i] + yield generator_helper(x_sample, y_sample, w_sample) + + n_features = x.shape[1] + n_samples = x.shape[0] + n_labels = y.shape[1] - return x, None, None + return generator, n_samples, n_features, n_labels + + def _get_dataset( + self, + idx: Union[np.ndarray, None], + batch_size: Union[int, None], + mode: str, + shuffle_buffer_size: int = int(1e7), + prefetch: int = 10, + weighted: bool = True, + ): + """ + :param idx: + :param batch_size: + :param mode: + :param shuffle_buffer_size: + :param weighted: Whether to use weights. + :return: + """ + # This is a basic cell type prediction model estimator class, the standard generator is fine. + def generator_helper(x_sample, y_sample, w_sample): + if mode in ['train', 'train_val', 'eval']: + return x_sample, y_sample, w_sample else: - raise ValueError(f'Mode {mode} not recognised. Should be "train", "eval" or" predict"') + return x_sample, + + generator, n_samples, n_features, n_labels = self._get_base_generator( + generator_helper=generator_helper, + idx=idx, + weighted=weighted, + ) + output_types, output_shapes = self._get_output_dim(n_features=n_features, n_labels=n_labels, mode=mode) + dataset = tf.data.Dataset.from_generator( + generator=generator, + output_types=output_types, + output_shapes=output_shapes + ) + if mode == 'train' or mode == 'train_val': + dataset = dataset.repeat() + dataset = dataset.shuffle( + buffer_size=min(n_samples, shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True + ) + dataset = dataset.batch(batch_size).prefetch(prefetch) + + return dataset def _get_loss(self): return LossCrossentropyAgg() @@ -1159,22 +1160,27 @@ def _metrics(self): CustomTprClasswise(k=self.ntypes) ] - def predict(self): + def predict( + self, + batch_size: int = 1, + max_steps: int = np.inf, + ): """ Return the prediction of the model - :return: - prediction + :param batch_size: Batch size for evaluation. + :param max_steps: Maximum steps before evaluation round is considered complete. + :return: Prediction tensor. """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, _, _ = self._get_dataset( - idx=self.idx_test, - batch_size=None, + idx = self.idx_test + if idx is None or idx.any(): # true if the array is not empty or if the passed value is None + dataset = self._get_dataset( + idx=idx, + batch_size=batch_size, mode='predict' ) - return self.model.training_model.predict( - x=x - ) + steps = min(max(len(idx) // batch_size, 1), max_steps) + return self.model.training_model.predict(x=dataset, steps=steps) else: return np.array([]) @@ -1197,50 +1203,47 @@ def ytrue(self): def evaluate_any( self, idx, + batch_size: int = 1, + max_steps: int = np.inf, weighted: bool = True ): """ Evaluate the custom model on any local data. + Defaults to run on full data if idx is None. + :param idx: Indices of observations to evaluate on. Evaluates on all observations if None. + :param batch_size: Batch size for evaluation. + :param max_steps: Maximum steps before evaluation round is considered complete. :param weighted: Whether to use class weights in evaluation. :return: Dictionary of metric names and values. """ if idx is None or idx.any(): # true if the array is not empty or if the passed value is None - x, y, w = self._get_dataset( + idx = np.arange(0, self.data.n_obs) if idx is None else idx + dataset = self._get_dataset( idx=idx, - batch_size=None, + batch_size=batch_size, mode='eval', weighted=weighted ) - results = self.model.training_model.evaluate( - x=x, y=y, sample_weight=w - ) + steps = min(max(len(idx) // batch_size, 1), max_steps) + results = self.model.training_model.evaluate(x=dataset, steps=steps) return dict(zip(self.model.training_model.metrics_names, results)) else: return {} - def evaluate(self, weighted: bool = True): + def evaluate(self, batch_size: int = 1, max_steps: int = np.inf, weighted: bool = True): """ Evaluate the custom model on local data. Defaults to run on full data if idx_test was not set before, ie. train() has not been called before. + :param batch_size: Batch size for evaluation. + :param max_steps: Maximum steps before evaluation round is considered complete. :param weighted: Whether to use class weights in evaluation. - :return: model.evaluate + :return: Dictionary of metric names and values. """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y, w = self._get_dataset( - idx=self.idx_test, - batch_size=None, - mode='eval', - weighted=weighted - ) - return self.model.training_model.evaluate( - x=x, y=y, sample_weight=w - ) - else: - return np.array([]) + return self.evaluate_any(idx=self.idx_test, batch_size=batch_size, max_steps=max_steps, weighted=weighted) def compute_gradients_input( self, From 56a2c1ec72941dd855377b7e99b0818088dba8b5 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 19 Apr 2021 19:21:54 +0200 Subject: [PATCH 107/161] Ebi fix (#226) * fixed ommitted custom EFO terms --- sfaira/unit_tests/versions/test_ontologies.py | 3 +++ sfaira/versions/metadata/base.py | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index c4e15599f..8eccb2903 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -128,6 +128,7 @@ def test_sclc_nodes(): assert "10x 5' v3 sequencing" in sclc.node_names assert "Smart-like" in sclc.node_names assert "Smart-seq2" in sclc.node_names + assert "sci-plex" in sclc.node_names assert "single cell library construction" in sclc.node_names @@ -141,6 +142,8 @@ def test_sclc_is_a(): assert sclc.is_a(query="10x 5' v3 sequencing", reference="10x v3 sequencing") assert not sclc.is_a(query="10x sequencing", reference="10x v1 sequencing") assert sclc.is_a(query="10x 5' v3 sequencing", reference="single cell library construction") + assert sclc.is_a(query="sci-plex", reference="single cell library construction") + assert not sclc.is_a(query="sci-plex", reference="10x sequencing") """ diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 90d59229f..15dd7edaf 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -302,6 +302,8 @@ def __init__( self, ontology: str, root_term: str, + additional_terms: dict, + additional_edges: List[Tuple[str, str]], **kwargs ): def get_url_self(iri): @@ -370,6 +372,8 @@ def recursive_search(iri): self.graph = networkx.MultiDiGraph() nodes, edges = recursive_search(iri=root_term) + nodes.update(additional_terms) + edges.extend(additional_edges) for k, v in nodes.items(): self.graph.add_node(node_for_adding=k, **v) for x in edges: @@ -867,5 +871,9 @@ def __init__(self): additional_terms={ "microwell-seq": {"name": "microwell-seq"}, "sci-plex": {"name": "sci-plex"} - } + }, + additional_edges=[ + ("EFO:0010183", "microwell-seq"), + ("EFO:0010183", "sci-plex"), + ] ) From 5a24c5a9fe3b81b15adcf89ff51fd90a0cfb3abd Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 22 Apr 2021 17:58:30 +0200 Subject: [PATCH 108/161] Refactor streamlining (#219) * remove set_metadata_in_adata from Dataset.load() and add docstring * rename property _adata_ids_sfaira to _adata_ids * merge _set_metadata and streamline functions * globally rename cellontology_original_obs_key to cell_types_original_obs_key and cellontology_original to cell_types_original * globally rename var_ensembl_col to gene_id_ensembl_var_key * globally rename var_symbol_col to gene_id_symbols_var_key and gene_id_names to gene_id_symbols * WIP - refactoring streamline (incomplete) * WIP - refactoring streamline (incomplete) handling of cell-type annotations now done * rename .streamline() to .streamline_metadata() * rename .subset_genes() to .streamline_features() * move call of _set_genome(), _convert_and_set_var_names() and _collapse_gene_versions() from Dataset.load() to Dataset.streamline_features() * merge _load_cached() into load() as load() was not doing anything apart from calling _load_cached() * clean up feature streamlining * move applying of schema to var columns from streamline_features() to streamline_meta() * fix flake8 * adatapt methods and properties of Dataset group classes to new structure * fix flake8 * fix ontology [skip ci] * fix bugs * fix bugs * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs * fix bugs * fix bugs [skip ci] * remove __erasing_protection() method * fix bugs * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs * fix bugs * fix bugs [skip ci] * fix bugs [skip ci] * fix bugs * fix bugs * add unknown cell placeholder conversion in streamline_metadata * fix bug * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * fix bug [skip ci] * add comment that original celltype field cannot be the same as cell ontology class field in cellxgene schema [skip ci] * fix bug [skip ci] * fix bug * fix f8 --- docs/adding_dataset_classes.rst | 4 +- docs/adding_datasets.rst | 24 +- docs/api/sfaira.data.DatasetBase.rst | 6 +- docs/api/sfaira.data.DatasetInteractive.rst | 6 +- docs/using_data.rst | 10 +- .../{{ cookiecutter.id_without_doi }}.yaml | 6 +- .../{{ cookiecutter.id_without_doi }}.yaml | 6 +- sfaira/consts/adata_fields.py | 30 +- sfaira/consts/ontologies.py | 2 +- sfaira/data/base/dataset.py | 1080 ++++++++--------- sfaira/data/base/dataset_group.py | 337 +++-- .../databases/cellxgene/cellxgene_loader.py | 6 +- .../human_x_2020_scirnaseq_cao_001.yaml | 6 +- ...letoflangerhans_2017_smartseq2_enge_001.py | 4 +- .../mouse_x_2018_microwellseq_han_x.py | 4 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 6 +- ...pithelium_2019_10xsequencing_smilie_001.py | 4 +- ...man_ileum_2019_10xsequencing_martin_001.py | 6 +- ...stategland_2018_10xsequencing_henry_001.py | 4 +- .../human_pancreas_2016_indrop_baron_001.py | 4 +- ...pancreas_2016_smartseq2_segerstolpe_001.py | 4 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 4 +- ...uman_lung_2020_10xsequencing_miller_001.py | 4 +- .../human_brain_2017_droncseq_habib_001.py | 4 +- ...human_testis_2018_10xsequencing_guo_001.py | 4 +- ...liver_2018_10xsequencing_macparland_001.py | 4 +- .../human_kidney_2019_droncseq_lake_001.py | 4 +- .../human_x_2019_10xsequencing_szabo_001.py | 6 +- ...man_retina_2019_10xsequencing_menon_001.py | 4 +- .../human_placenta_2018_x_ventotormo_001.py | 6 +- .../human_liver_2019_celseq2_aizarani_001.py | 4 +- ...ver_2019_10xsequencing_ramachandran_001.py | 4 +- ...an_liver_2019_10xsequencing_popescu_001.py | 4 +- ...rain_2019_10x3v2sequencing_kanton_001.yaml | 6 +- .../human_x_2020_microwellseq_han_x.py | 4 +- .../human_lung_2020_x_travaglini_001.yaml | 6 +- ...uman_colon_2020_10xsequencing_james_001.py | 6 +- .../human_lung_2019_dropseq_braga_001.py | 4 +- .../human_x_2019_10xsequencing_braga_x.py | 4 +- .../mouse_x_2019_10xsequencing_hove_001.py | 6 +- ...uman_kidney_2020_10xsequencing_liao_001.py | 4 +- ...man_retina_2019_10xsequencing_voigt_001.py | 4 +- .../human_x_2019_10xsequencing_wang_001.py | 4 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 4 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 6 +- ...nchyma_2020_10xsequencing_habermann_001.py | 4 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 6 +- ...uman_thymus_2020_10xsequencing_park_001.py | 4 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 10 +- ..._retina_2019_10xsequencing_lukowski_001.py | 6 +- ...lood_2019_10xsequencing_10xgenomics_001.py | 4 +- .../human_x_2018_10xsequencing_regev_001.py | 4 +- sfaira/data/interactive/loader.py | 6 +- sfaira/data/utils.py | 19 +- .../utils_scripts/create_meta_and_cache.py | 13 +- .../data/utils_scripts/streamline_selected.py | 4 +- sfaira/estimators/keras.py | 16 +- sfaira/interface/user_interface.py | 9 +- sfaira/unit_tests/data/test_data_utils.py | 11 +- sfaira/unit_tests/data/test_dataset.py | 7 +- 60 files changed, 899 insertions(+), 893 deletions(-) diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst index 8924fd8e1..cb499949d 100644 --- a/docs/adding_dataset_classes.rst +++ b/docs/adding_dataset_classes.rst @@ -62,7 +62,7 @@ In this scenario, meta data is described in a constructor of a class in the same self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cellontology_original = x # (optional) + self.obs_key_cell_types_original = x # (optional) # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and # directory as the python file of this data loader (see below). @@ -104,7 +104,7 @@ In summary, a python file for a mouse lung data set could look like this: self.year = "2020" self.sample_source = "primary_tissue" - self.obs_key_cellontology_original = "louvain_named" # i save my cell type names in here + self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here def load(data_dir, fn=None) -> anndata.AnnData: fn = os.path.join(data_dir, "my.h5ad") diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index 31ad2652f..daeed9f91 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -106,7 +106,7 @@ by `_`, below referred to as `--DOI-folder--`: 10. Mitigate automated cell type maps. Sfaira creates a cell type mapping `.tsv` file in the directory in which your data loaders is located if you - indicated that annotation is present by filling `cellontology_original_obs_key`. + indicated that annotation is present by filling `cell_types_original_obs_key`. This file is: `NA_NA_2021_NA_Einstein_001.tsv`. This file contains two columns with one row for each unique cell type label. The free text identifiers in the first column "source", @@ -224,10 +224,10 @@ before it is loaded into memory: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: + cell_types_original_obs_key: feature_wise: - var_ensembl_col: - var_symbol_col: + gene_id_ensembl_var_key: + gene_id_symbols_var_key: meta: version: "1.0" @@ -295,10 +295,10 @@ In summary, a the dataloader for a mouse lung data set could look like this: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: "louvain_named" + cell_types_original_obs_key: "louvain_named" feature_wise: - var_ensembl_col: - var_symbol_col: + gene_id_ensembl_var_key: + gene_id_symbols_var_key: meta: version: "1.0" @@ -475,7 +475,7 @@ All tests must pass! If any of the tests fail please revisit your dataloader and Map cell type labels to ontology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The entries in `self.cellontology_original_obs_key` are free text but are mapped to an ontology via a .tsv file with +The entries in `self.cell_types_original_obs_key` are free text but are mapped to an ontology via a .tsv file with the same name and directory as the python file in which the data loader is located. This .tsv contains two columns with one row for each unique cell type label. The free text identifiers in the first column "source", @@ -518,7 +518,7 @@ Required are: - dataset_wise: author, doi, download_url_data, normalisation and year are required. - dataset_or_observation_wise: organism is required. - observation_wise: None are required. -- feature_wise: var_ensembl_col or var_symbol_col is required. +- feature_wise: gene_id_ensembl_var_key or gene_id_symbols_var_key is required. - misc: None are required. Field descriptions @@ -630,15 +630,15 @@ outlined below. Meta-data which are strictly observation-wise are in the section `observation_wise` in the `.yaml` file: -- cellontology_original_obs_key [string] +- cell_types_original_obs_key [string] Column name in `adata.obs` emitted by the `load()` function which contains free text cell type labels. Meta-data which are feature-wise are in the section `feature_wise` in the `.yaml` file: -- var_ensembl_col [string] +- gene_id_ensembl_var_key [string] Name of the column in `adata.var` emitted by the `load()` which contains ENSEMBL gene IDs. This can also be "index" if the ENSEMBL gene names are in the index of the `adata.var` data frame. -- var_symbol_col:.[string] +- gene_id_symbols_var_key:.[string] Name of the column in `adata.var` emitted by the `load()` which contains gene symbol: HGNC for human and MGI for mouse. This can also be "index" if the gene symbol are in the index of the `adata.var` data frame. diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst index 6237f0786..8f83a7c06 100644 --- a/docs/api/sfaira.data.DatasetBase.rst +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -64,7 +64,7 @@ ~DatasetBase.obs_key_assay_type_differentiation ~DatasetBase.obs_key_cell_line ~DatasetBase.obs_key_cellontology_id - ~DatasetBase.obs_key_cellontology_original + ~DatasetBase.obs_key_cell_types_original ~DatasetBase.obs_key_dev_stage ~DatasetBase.obs_key_ethnicity ~DatasetBase.obs_key_healthy @@ -82,8 +82,8 @@ ~DatasetBase.sample_source ~DatasetBase.source ~DatasetBase.state_exact - ~DatasetBase.var_ensembl_col - ~DatasetBase.var_symbol_col + ~DatasetBase.gene_id_ensembl_var_key + ~DatasetBase.gene_id_symbols_var_key ~DatasetBase.year \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst index ca64fd27a..680496e67 100644 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -64,7 +64,7 @@ ~DatasetInteractive.obs_key_assay_type_differentiation ~DatasetInteractive.obs_key_cell_line ~DatasetInteractive.obs_key_cellontology_id - ~DatasetInteractive.obs_key_cellontology_original + ~DatasetInteractive.obs_key_cell_types_original ~DatasetInteractive.obs_key_dev_stage ~DatasetInteractive.obs_key_ethnicity ~DatasetInteractive.obs_key_healthy @@ -82,8 +82,8 @@ ~DatasetInteractive.sample_source ~DatasetInteractive.source ~DatasetInteractive.state_exact - ~DatasetInteractive.var_ensembl_col - ~DatasetInteractive.var_symbol_col + ~DatasetInteractive.gene_id_ensembl_var_key + ~DatasetInteractive.gene_id_symbols_var_key ~DatasetInteractive.year \ No newline at end of file diff --git a/docs/using_data.rst b/docs/using_data.rst index 1d8bbb944..24f0a1cbb 100644 --- a/docs/using_data.rst +++ b/docs/using_data.rst @@ -74,9 +74,9 @@ Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not specified. Example: self.download_meta = "some URL" - - .var_symbol_col, .var_ensembl_col: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var + - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. - Example: self.var_symbol_col = 'index', self.var_ensembl_col = “GeneID” + Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” - .author: First author of publication (or list of all authors). self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] - .doi: Doi of publication @@ -109,8 +109,8 @@ Optional (if available): Example: self.sex = “male” - .state_exact (or .obs_key_state_exact): Exact disease state self.state_exact = free text - - .obs_key_cellontology_original: Column in .obs in which free text cell type names are stored. - Example: self.obs_key_cellontology_original = 'CellType' + - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. + Example: self.obs_key_cell_types_original = 'CellType' - .year: Year of publication: Example: self.year = 2019 - .cell_line: Which cell line was used for the experiment (for cell culture samples) @@ -138,7 +138,7 @@ Setting of class metadata such as `.doi`, `.id` etc. should be done in the const How do I tell sfaira where the gene names are? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By setting the attributes `.var_symbol_col` or `.var_ensembl_col` in the constructor. +By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? diff --git a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml index 5f73dc4ce..0a10f0d0a 100644 --- a/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml +++ b/sfaira/commands/templates/multiple_datasets/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -49,10 +49,10 @@ dataset_or_observation_wise: {% for fn in cookiecutter.sample_fns.fns %} {{ fn }}: {% endfor %} tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: + cell_types_original_obs_key: feature_wise: - var_ensembl_col: - var_symbol_col: + gene_id_ensembl_var_key: + gene_id_symbols_var_key: misc: meta: version: "1.0" diff --git a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml index 5a505dccd..1beb00a25 100644 --- a/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml +++ b/sfaira/commands/templates/single_dataset/{{ cookiecutter.doi_sfaira_repr }}/{{ cookiecutter.id_without_doi }}.yaml @@ -35,10 +35,10 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: + cell_types_original_obs_key: feature_wise: - var_ensembl_col: - var_symbol_col: + gene_id_ensembl_var_key: + gene_id_symbols_var_key: misc: meta: version: "1.0" diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index a768ade0e..91caf9c96 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -2,7 +2,7 @@ The classes in this file are containers of field names and element entries that are used in streamlined adata objects in sfaira and in associated data bases. """ -from typing import List +from typing import List, Union class AdataIds: @@ -25,7 +25,7 @@ class AdataIds: ethnicity: str gene_id_ensembl: str gene_id_index: str - gene_id_names: str + gene_id_symbols: str id: str individual: str ncells: str @@ -42,6 +42,14 @@ class AdataIds: var_keys: List[str] uns_keys: List[str] + classmap_source_key: str + classmap_target_key: str + classmap_target_id_key: str + + unknown_celltype_identifier: Union[str, None] + not_a_cell_celltype_identifier: Union[str, None] + unknown_metadata_identifier: Union[str, None] + class AdataIdsSfaira(AdataIds): """ @@ -73,8 +81,8 @@ def __init__(self): self.download_url_data = "download_url_data" self.download_url_meta = "download_url_meta" self.gene_id_ensembl = "ensembl" - self.gene_id_index = "ensembl" - self.gene_id_names = "names" + self.gene_id_index = self.gene_id_ensembl + self.gene_id_symbols = "names" self.id = "id" self.individual = "individual" self.ncells = "ncells" @@ -126,7 +134,7 @@ def __init__(self): ] self.var_keys = [ "gene_id_ensembl", - "gene_id_names", + "gene_id_symbols", ] self.uns_keys = [ "annotated", @@ -141,6 +149,9 @@ def __init__(self): "primary_data", "title", "year", + "load_raw", + "mapped_features", + "remove_gene_version", ] @@ -153,13 +164,15 @@ class AdataIdsCellxgene(AdataIds): def __init__(self): self.assay_sc = "assay" - self.cell_types_original = "cell_type" # TODO "free_annotation" not always given. + self.cell_types_original = "free_annotation" # TODO "free_annotation" not always given + # TODO: -> This will break streamlining though if self.cell_types_original is the same value as self.cell_ontology_class!! self.cell_ontology_class = "cell_type" self.cell_ontology_id = "cell_type_ontology_term_id" self.default_embedding = "default_embedding" self.doi = "preprint_doi" self.disease = "disease" - self.gene_id_names = "gene_symbol" + self.gene_id_symbols = "gene_symbol" + self.gene_id_index = self.gene_id_symbols self.id = "id" self.ncells = "ncells" self.organ = "tissue" @@ -178,6 +191,7 @@ def __init__(self): self.author_names = "names" self.unknown_celltype_identifier = None + self.not_a_cell_celltype_identifier = self.unknown_celltype_identifier self.unknown_metadata_identifier = "unknown" self.invalid_metadata_identifier = "na" self.unknown_metadata_ontology_id_identifier = "" @@ -201,7 +215,7 @@ def __init__(self): "tech_sample", ] self.var_keys = [ - "gene_id_names", + "gene_id_symbols", ] self.uns_keys = [ "default_embedding", diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 78d5e9e39..78394b20e 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -16,7 +16,7 @@ def __init__(self): self.bio_sample = None self.cell_line = OntologyCellosaurus() self.cellontology_class = "v2021-02-01" - self.cellontology_original = None + self.cell_types_original = None self.collection_id = None self.default_embedding = None self.development_stage = None # OntologyHsapdv() # TODO allow for other organisms here too. diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 4d6deb21b..199e6cc66 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -2,6 +2,7 @@ import abc import anndata +from anndata.utils import make_index_unique import h5py import numpy as np import pandas as pd @@ -9,7 +10,7 @@ from os import PathLike import pandas import scipy.sparse -from typing import Dict, List, Tuple, Union +from typing import List, Tuple, Union import warnings import urllib.request import urllib.parse @@ -19,7 +20,7 @@ from sfaira.versions.genomes import GenomeContainer from sfaira.versions.metadata import Ontology, OntologyHierarchical, CelltypeUniverse -from sfaira.consts import AdataIds, AdataIdsSfaira, META_DATA_FIELDS, OCS +from sfaira.consts import AdataIds, AdataIdsCellxgene, AdataIdsSfaira, META_DATA_FIELDS, OCS from sfaira.data.utils import collapse_matrix, read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -108,7 +109,7 @@ class DatasetBase(abc.ABC): _assay_cell_line_obs_key: Union[None, str] _cellontology_class_obs_key: Union[None, str] _cellontology_id_obs_key: Union[None, str] - _cellontology_original_obs_key: Union[None, str] + _cell_types_original_obs_key: Union[None, str] _development_stage_obs_key: Union[None, str] _disease_obs_key: Union[None, str] _ethnicity_obs_key: Union[None, str] @@ -121,12 +122,18 @@ class DatasetBase(abc.ABC): _state_exact_obs_key: Union[None, str] _tech_sample_obs_key: Union[None, str] - _var_symbol_col: Union[None, str] - _var_ensembl_col: Union[None, str] + _gene_id_symbols_var_key: Union[None, str] + _gene_id_ensembl_var_key: Union[None, str] _celltype_universe: Union[None, CelltypeUniverse] _ontology_class_map: Union[None, dict] + load_raw: Union[None, bool] + mapped_features: Union[None, str, bool] + remove_gene_version: Union[None, bool] + subset_gene_type: Union[None, str] + streamlined_meta: bool + sample_fn: Union[None, str] _sample_fns: Union[None, List[str]] @@ -165,7 +172,7 @@ def __init__( is to be loaded. :param kwargs: """ - self._adata_ids_sfaira = AdataIdsSfaira() + self._adata_ids = AdataIdsSfaira() self.ontology_container_sfaira = OCS # Using a pre-instantiated version of this yields drastic speed-ups. self.adata = None @@ -210,7 +217,7 @@ def __init__( self._cell_line_obs_key = None self._cellontology_class_obs_key = None self._cellontology_id_obs_key = None - self._cellontology_original_obs_key = None + self._cell_types_original_obs_key = None self._development_stage_obs_key = None self._disease_obs_key = None self._ethnicity_obs_key = None @@ -223,15 +230,21 @@ def __init__( self._state_exact_obs_key = None self._tech_sample_obs_key = None - self._var_symbol_col = None - self._var_ensembl_col = None + self._gene_id_symbols_var_key = None + self._gene_id_ensembl_var_key = None self.class_maps = {"0": {}} - self._unknown_celltype_identifiers = self._adata_ids_sfaira.unknown_celltype_identifier + self._unknown_celltype_identifiers = self._adata_ids.unknown_celltype_identifier self._celltype_universe = None self._ontology_class_map = None + self.load_raw = None + self.mapped_features = None + self.remove_gene_version = None + self.subset_gene_type = None + self.streamlined_meta = False + self.sample_fn = sample_fn self._sample_fns = sample_fns @@ -366,23 +379,27 @@ def cache_fn(self): cache = os.path.join(self.cache_path, self.directory_formatted_doi) return os.path.join(cache, "cache", self._directory_formatted_id + ".h5ad") - def _load_cached( + def load( self, - load_raw: bool, - allow_caching: bool, + load_raw: bool = False, + allow_caching: bool = True, **kwargs ): """ - Wraps data set specific load and allows for caching. - + Load the selected datasets into memory. Cache is written into director named after doi and h5ad named after data set id. Cache is not over-written. - :param load_raw: Loads unprocessed version of data if available in data loader. - :param allow_caching: Whether to allow method to cache adata object for faster re-loading. - :return: + :param load_raw: Force reading the raw object even when a cached one is present. + :param allow_caching: Write the object to cache after loading if no cache exists yet. """ + # Sanity checks + if self.adata is not None: + raise ValueError(f"adata of {self.id} already loaded.") + if self.data_dir is None: + raise ValueError("No sfaira data repo path provided in constructor.") + # Run data set-specific loading script: def _assembly_wrapper(): if self.load_func is None: raise ValueError(f"Tried to access load_func for {self.id} but did not find any.") @@ -427,133 +444,126 @@ def _cached_writing(filename): else: # not load_raw and not allow_caching _cached_reading(self.cache_fn) - def load( - self, - remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, - load_raw: bool = False, - allow_caching: bool = True, - set_metadata: bool = True, - **kwargs - ): - if match_to_reference and not remove_gene_version: - warnings.warn("it is not recommended to enable matching the feature space to a genomes reference" - "while not removing gene versions. this can lead to very poor matching results") - if not (isinstance(match_to_reference, bool) and not match_to_reference): - self._set_genome(organism=self.organism, assembly=match_to_reference) - - # Set path to dataset directory - if self.data_dir is None: - raise ValueError("No sfaira data repo path provided in constructor.") - - # Run data set-specific loading script: - self._load_cached(load_raw=load_raw, allow_caching=allow_caching, **kwargs) - # Set loading hyper-parameter-specific meta data: - self.adata.uns[self._adata_ids_sfaira.load_raw] = load_raw - self.adata.uns[self._adata_ids_sfaira.mapped_features] = match_to_reference - self.adata.uns[self._adata_ids_sfaira.remove_gene_version] = remove_gene_version - if set_metadata: - # Set data-specific meta data in .adata: - self._set_metadata_in_adata(allow_uns=True) - # Streamline feature space: - self._convert_and_set_var_names(match_to_reference=match_to_reference) - self._collapse_genes(remove_gene_version=remove_gene_version) + # Set loading-specific metadata: + self.load_raw = load_raw load.__doc__ = load_doc - def _convert_and_set_var_names( + def _add_missing_featurenames( self, match_to_reference: Union[str, bool, None], - symbol_col: str = None, - ensembl_col: str = None, ): - # Use defaults defined in data loader if none given to this function. - if symbol_col is None: - symbol_col = self.var_symbol_col - if ensembl_col is None: - ensembl_col = self.var_ensembl_col - if not ensembl_col and not symbol_col: - raise ValueError('Please provide the name of at least the name of the var column containing ensembl ids or' - 'the name of the var column containing gene symbols') - # Process given gene names: Full gene names ("symbol") or ENSEMBL IDs ("ensembl"). - # Below the .var column that contain the target IDs are renamed to follow streamlined naming. - # If the IDs were contained in the index, a new column is added to .var. - if symbol_col: - if symbol_col == 'index': - self.adata.var[self._adata_ids_sfaira.gene_id_names] = self.adata.var.index.values.tolist() - else: - assert symbol_col in self.adata.var.columns, f"symbol_col {symbol_col} not found in .var" - self.adata.var = self.adata.var.rename( - {symbol_col: self._adata_ids_sfaira.gene_id_names}, - axis='columns' - ) - if ensembl_col: - if ensembl_col == 'index': - self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = self.adata.var.index.values.tolist() - else: - assert ensembl_col in self.adata.var.columns, f"ensembl_col {ensembl_col} not found in .var" - self.adata.var = self.adata.var.rename( - {ensembl_col: self._adata_ids_sfaira.gene_id_ensembl}, - axis='columns' - ) - # If only symbol or ensembl was supplied, the other one is inferred from a genome mapping dictionary. - if not ensembl_col and not (isinstance(match_to_reference, bool) and not match_to_reference): - id_dict = self.genome_container.names_to_id_dict - id_strip_dict = self.genome_container.strippednames_to_id_dict - # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, - # match it straight away, if it is not in there we try to match everything in front of the first period in - # the gene name with a dictionary that was modified in the same way, if there is still no match we append na - ensids = [] - for n in self.adata.var[self._adata_ids_sfaira.gene_id_names]: - if n in id_dict.keys(): - ensids.append(id_dict[n]) - elif n.split(".")[0] in id_strip_dict.keys(): - ensids.append(id_strip_dict[n.split(".")[0]]) - else: - ensids.append('n/a') - self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] = ensids - - if not symbol_col and not (isinstance(match_to_reference, bool) and not match_to_reference): - id_dict = self.genome_container.id_to_names_dict - self.adata.var[self._adata_ids_sfaira.gene_id_names] = [ - id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in self.adata.var[self._adata_ids_sfaira.gene_id_ensembl] - ] - - if match_to_reference: - # Lastly, the index of .var is set to ensembl IDs. - try: # debugging - self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_index].values.tolist() - except KeyError as e: - raise KeyError(e) - self.adata.var_names_make_unique() + # If schema does not include symbols or ensebl ids, add them to the schema so we can do the conversion + if hasattr(self._adata_ids, "gene_id_symbols"): + gene_id_symbols = self._adata_ids.gene_id_symbols + else: + gene_id_symbols = "gene_symbol" # add some default name if not in schema + self._adata_ids.gene_id_symbols = gene_id_symbols + if hasattr(self._adata_ids, "gene_id_ensembl"): + gene_id_ensembl = self._adata_ids.gene_id_ensembl + else: + gene_id_ensembl = "ensembl" # add some default name if not in schema + self._adata_ids.gene_id_ensembl = gene_id_ensembl + + if match_to_reference is not False: + if not self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: + raise ValueError("Either gene_id_symbols_var_key or gene_id_ensembl_var_key needs to be provided in the" + " dataloader") + elif not self.gene_id_symbols_var_key and self.gene_id_ensembl_var_key: + # Convert ensembl ids to gene symbols + id_dict = self.genome_container.id_to_names_dict + ensids = self.adata.var.index if self.gene_id_ensembl_var_key == "index" else self.adata.var[self.gene_id_ensembl_var_key] + self.adata.var[gene_id_symbols] = [ + id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' + for n in ensids + ] + self.gene_id_symbols_var_key = gene_id_symbols + elif self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: + # Convert gene symbols to ensembl ids + id_dict = self.genome_container.names_to_id_dict + id_strip_dict = self.genome_container.strippednames_to_id_dict + # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, + # match it straight away, if it is not in there we try to match everything in front of the first period in + # the gene name with a dictionary that was modified in the same way, if there is still no match we append na + ensids = [] + symbs = self.adata.var.index if self.gene_id_symbols_var_key == "index" else self.adata.var[self.gene_id_symbols_var_key] + for n in symbs: + if n in id_dict.keys(): + ensids.append(id_dict[n]) + elif n.split(".")[0] in id_strip_dict.keys(): + ensids.append(id_strip_dict[n.split(".")[0]]) + else: + ensids.append('n/a') + self.adata.var[gene_id_ensembl] = ensids + self.gene_id_ensembl_var_key = gene_id_ensembl - def _collapse_genes(self, remove_gene_version): + def _collapse_ensembl_gene_id_versions(self): """ Remove version tag on ensembl gene ID so that different versions are superimposed downstream. - :param remove_gene_version: :return: """ - if remove_gene_version: - self.adata.var_names = [ - x.split(".")[0] for x in self.adata.var[self._adata_ids_sfaira.gene_id_index].values + if not self.gene_id_ensembl_var_key: + raise ValueError( + "Cannot remove gene version when gene_id_ensembl_var_key is not set in dataloader and " + "match_to_reference is False" + ) + elif self.gene_id_ensembl_var_key == "index": + self.adata.index = [ + x.split(".")[0] for x in self.adata.var.index + ] + else: + self.adata.var[self.gene_id_ensembl_var_key] = [ + x.split(".")[0] for x in self.adata.var[self.gene_id_ensembl_var_key].values ] # Collapse if necessary: - self.adata = collapse_matrix(adata=self.adata) - - self.adata.var[self._adata_ids_sfaira.gene_id_index] = self.adata.var_names - self.adata.var.index = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values + self.adata = collapse_matrix(adata=self.adata, var_column=self.gene_id_ensembl_var_key) - def subset_genes(self, subset_type: Union[None, str, List[str]] = None): + def streamline_features( + self, + remove_gene_version: bool = True, + match_to_reference: Union[str, bool, None] = None, + subset_genes_to_type: Union[None, str, List[str]] = None, + ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - - :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - + This also adds missing ensid or gene symbol columns if match_to_reference is not set to False and removes all + adata.var columns that are not defined as gene_id_ensembl_var_key or gene_id_symbol_var_key in the dataloader. + + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. + :param match_to_reference: Whether to map gene names to a given annotation. Can be: + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - None: use the default annotation for this organism in sfaira. + - False: no mapping of gene labels will be done. + :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. """ + # TODO: think about workflow when featurespace should nt be sreamlined. can we still apply a metadata schema? + assert match_to_reference is not False, "feature_streamlining is not possible when match_to_reference is False" + + # Set genome container if mapping of gene labels is requested + if match_to_reference is not False: # Testing this explicitly makes sure False is treated separately from None + self._set_genome(organism=self.organism, assembly=match_to_reference) + self.mapped_features = self.genome_container.assembly + else: + self.mapped_features = False + self.remove_gene_version = remove_gene_version + self.subset_gene_type = subset_genes_to_type + # Streamline feature space: + self._add_missing_featurenames(match_to_reference=match_to_reference) + for key in [self.gene_id_ensembl_var_key, self.gene_id_symbols_var_key]: + # Make features unique (to avoid na-matches in converted columns to be collapsed by + # _collapse_ensembl_gene_id_versions() below. + if not key: + pass + elif key == "index": + self.adata.var.index = make_index_unique(self.adata.var.index).tolist() + else: + self.adata.var[key] = make_index_unique(self.adata.var[key]).tolist() + if remove_gene_version: + self._collapse_ensembl_gene_id_versions() + # Convert data matrix to csc matrix if isinstance(self.adata.X, np.ndarray): # Change NaN to zero. This occurs for example in concatenation of anndata instances. @@ -566,23 +576,24 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): raise ValueError(f"Data type {type(self.adata.X)} not recognized.") # Compute indices of genes to keep - data_ids_ensg = self.adata.var[self._adata_ids_sfaira.gene_id_ensembl].values - if subset_type is None: + data_ids_ensg = self.adata.var.index.values if self.gene_id_ensembl_var_key == "index" \ + else self.adata.var[self.gene_id_ensembl_var_key].values + if subset_genes_to_type is None: subset_ids_ensg = self.genome_container.ensembl subset_ids_symbol = self.genome_container.symbols else: - if isinstance(subset_type, str): - subset_type = [subset_type] + if isinstance(subset_genes_to_type, str): + subset_genes_to_type = [subset_genes_to_type] keys = np.unique(self.genome_container.biotype) - if subset_type not in keys: - raise ValueError(f"subset type {subset_type} not available in list {keys}") + if subset_genes_to_type not in keys: + raise ValueError(f"subset type {subset_genes_to_type} not available in list {keys}") subset_ids_ensg = [ x.upper() for x, y in zip(self.genome_container.ensembl, self.genome_container.biotype) - if y in subset_type + if y in subset_genes_to_type ] subset_ids_symbol = [ x.upper() for x, y in zip(self.genome_container.symbols, self.genome_container.biotype) - if y in subset_type + if y in subset_genes_to_type ] # Remove unmapped genes @@ -590,9 +601,9 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): data_ids_kept = data_ids_ensg[idx_feature_kept] x = x[:, idx_feature_kept] # Build map of subset_ids to features in x: - idx_feature_map = np.array([subset_ids_symbol.index(x) for x in data_ids_kept]) + idx_feature_map = np.array([subset_ids_ensg.index(x) for x in data_ids_kept]) # Create reordered feature matrix based on reference and convert to csr - x_new = scipy.sparse.csc_matrix((x.shape[0], len(subset_ids_symbol)), dtype=x.dtype) + x_new = scipy.sparse.csc_matrix((x.shape[0], len(subset_ids_ensg)), dtype=x.dtype) # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) # ValueError: could not convert integer scalar @@ -604,241 +615,205 @@ def subset_genes(self, subset_type: Union[None, str, List[str]] = None): x_new[:, idx_feature_map[i + step:]] = x[:, i + step:] else: x_new[:, idx_feature_map] = x - x_new = x_new.tocsr() + # Create new var dataframe + if self.gene_id_symbols_var_key == "index": + var_index = subset_ids_symbol + var_data = {self.gene_id_ensembl_var_key: subset_ids_ensg} + elif self.gene_id_ensembl_var_key == "index": + var_index = subset_ids_ensg + var_data = {self.gene_id_symbols_var_key: subset_ids_symbol} + else: + var_index = None + var_data = {self.gene_id_symbols_var_key: subset_ids_symbol, + self.gene_id_ensembl_var_key: subset_ids_ensg} + var_new = pd.DataFrame(data=var_data, index=var_index) + self.adata = anndata.AnnData( X=x_new, obs=self.adata.obs, obsm=self.adata.obsm, - var=pd.DataFrame(data={self._adata_ids_sfaira.gene_id_names: subset_ids_symbol, - self._adata_ids_sfaira.gene_id_ensembl: subset_ids_ensg}, - index=subset_ids_ensg), + var=var_new, uns=self.adata.uns ) - def _set_metadata_in_adata(self, allow_uns: bool): - """ - Copy meta data from dataset class in .anndata. - - :param allow_uns: Allow writing of constant meta data into uns rather than .obs. - :return: - """ - # Set data set-wide attributes (.uns) (write to .obs if .uns is not allowed): - if allow_uns: - for k in self._adata_ids_sfaira.uns_keys: - if k not in self.adata.uns.keys(): - self.adata.uns[getattr(self._adata_ids_sfaira, k)] = getattr(self, k) - else: - for k in self._adata_ids_sfaira.uns_keys: - if k in self.adata.uns.keys(): - val = self.adata.uns[k] - else: - val = getattr(self, k) - while hasattr(val, '__len__') and not isinstance(val, str) and len(val) == 1: # unpack nested lists - val = val[0] - self.adata.obs[getattr(self._adata_ids_sfaira, k)] = [val for i in range(len(self.adata.obs))] - - # Set cell-wise or data set-wide attributes (.uns / .obs): - # These are saved in .uns if they are data set wide to save memory if allow_uns is True. - for x, y, z, v in ( - [self.assay_sc, self._adata_ids_sfaira.assay_sc, self.assay_sc_obs_key, self.ontology_container_sfaira.assay_sc], - [self.assay_differentiation, self._adata_ids_sfaira.assay_differentiation, self.assay_differentiation_obs_key, - self.ontology_container_sfaira.assay_differentiation], - [self.assay_type_differentiation, self._adata_ids_sfaira.assay_type_differentiation, - self.assay_type_differentiation_obs_key, self.ontology_container_sfaira.assay_type_differentiation], - [self.cell_line, self._adata_ids_sfaira.cell_line, self.cell_line_obs_key, - self.ontology_container_sfaira.cell_line], - [self.development_stage, self._adata_ids_sfaira.development_stage, self.development_stage_obs_key, - self.ontology_container_sfaira.development_stage], - [self.disease, self._adata_ids_sfaira.disease, self.disease_obs_key, - self.ontology_container_sfaira.disease], - [self.ethnicity, self._adata_ids_sfaira.ethnicity, self.ethnicity_obs_key, - self.ontology_container_sfaira.ethnicity], - [self.organ, self._adata_ids_sfaira.organ, self.organ_obs_key, self.ontology_container_sfaira.organ], - [self.organism, self._adata_ids_sfaira.organism, self.organism_obs_key, - self.ontology_container_sfaira.organism], - [self.sample_source, self._adata_ids_sfaira.sample_source, self.sample_source_obs_key, - self.ontology_container_sfaira.sample_source], - [self.sex, self._adata_ids_sfaira.sex, self.sex_obs_key, self.ontology_container_sfaira.sex], - [self.state_exact, self._adata_ids_sfaira.state_exact, self.state_exact_obs_key, None], - ): - if z is None and allow_uns: - self.adata.uns[y] = None - elif z is None and not allow_uns: - self.adata.obs[y] = x - elif z is not None: - # Attribute supplied per cell: Write into .obs. - # Search for direct match of the sought-after column name or for attribute specific obs key. - if z not in self.adata.obs.keys(): - # This should not occur in single data set loaders (see warning below) but can occur in - # streamlined data loaders if not all instances of the streamlined data sets have all columns - # in .obs set. - self.adata.uns[y] = None - print(f"WARNING: attribute {y} of data set {self.id} was not found in column {z}") # debugging - else: - # Include flag in .uns that this attribute is in .obs: - self.adata.uns[y] = UNS_STRING_META_IN_OBS - # Remove potential pd.Categorical formatting: - self._value_protection(attr=y, allowed=v, attempted=np.unique(self.adata.obs[z].values).tolist()) - self.adata.obs[y] = self.adata.obs[z].values.tolist() - else: - assert False, "switch option should not occur" - # Add batch annotation which can be rule-based - for x, y, z in ( - [self.bio_sample, self._adata_ids_sfaira.bio_sample, self.bio_sample_obs_key], - [self.individual, self._adata_ids_sfaira.individual, self.individual_obs_key], - [self.tech_sample, self._adata_ids_sfaira.tech_sample, self.tech_sample_obs_key], - ): - if z is None and allow_uns: - self.adata.uns[y] = x - elif z is None and not allow_uns: - self.adata.uns[y] = UNS_STRING_META_IN_OBS - self.adata.obs[y] = x - elif z is not None: - self.adata.uns[y] = UNS_STRING_META_IN_OBS - zs = z.split("*") # Separator for indicate multiple columns. - keys_to_use = [] - for zz in zs: - if zz not in self.adata.obs.keys(): - # This should not occur in single data set loaders (see warning below) but can occur in - # streamlined data loaders if not all instances of the streamlined data sets have all columns - # in .obs set. - print(f"WARNING: attribute {y} of data set {self.id} was not found in column {zz}") # debugging - else: - keys_to_use.append(zz) - if len(keys_to_use) > 0: - # Build a combination label out of all columns used to describe this group. - self.adata.obs[y] = [ - "_".join([str(xxx) for xxx in xx]) - for xx in zip(*[self.adata.obs[k].values.tolist() for k in keys_to_use]) - ] - else: - assert False, "switch option should not occur" - # Set cell-wise attributes (.obs): - # None so far other than celltypes. - # Set cell types: - # Map cell type names from raw IDs to ontology maintained ones: - if self.cellontology_original_obs_key is not None: - self.project_celltypes_to_ontology() - - def streamline( + def streamline_metadata( self, - format: str = "sfaira", - allow_uns_sfaira: bool = True, + schema: str = "sfaira", + uns_to_obs: bool = False, clean_obs: bool = True, clean_var: bool = True, - clean_uns: bool = True + clean_uns: bool = True, + clean_obs_names: bool = True, ): """ - Streamline the adata instance to output format. + Streamline the adata instance to a defined output schema. Output format are saved in ADATA_FIELDS* classes. - :param format: Export format. - + :param schema: Export format. - "sfaira" - "cellxgene" - :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param uns_to_obs: Whether to move metadata in .uns to .obs to make sure it's not lost when concatenating multiple objects. :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. :param clean_uns: Whether to delete non-streamlined fields in .uns. + :param clean_obs_names: Whether to replace obs_names with a string comprised of dataset id and an increasing integer. :return: """ - if format == "sfaira": - adata_fields = self._adata_ids_sfaira - self._set_metadata_in_adata(allow_uns=allow_uns_sfaira) - elif format == "cellxgene": - from sfaira.consts import AdataIdsCellxgene - adata_fields = AdataIdsCellxgene() - self._set_metadata_in_adata(allow_uns=False) + + # Set schema as provided by the user + if schema == "sfaira": + adata_target_ids = AdataIdsSfaira() + elif schema == "cellxgene": + adata_target_ids = AdataIdsCellxgene() else: - raise ValueError(f"did not recognize format {format}") + raise ValueError(f"did not recognize schema {schema}") + + if hasattr(adata_target_ids, "gene_id_ensembl") and not hasattr(self._adata_ids, "gene_id_ensembl"): + raise ValueError(f"Cannot convert this object to schema {schema}, as the currently applied schema does not " + f"have an ensembl gene ID annotation. Please run .streamline_features() first.") + + # Creating new var annotation + var_new = pd.DataFrame() + for k in adata_target_ids.var_keys: + if k == "gene_id_ensembl": + if not self.gene_id_ensembl_var_key: + raise ValueError("gene_id_ensembl_var_key not set in dataloader despite being required by the " + "selected meta data schema. please run streamline_features() first to create the " + "missing annotation") + elif self.gene_id_ensembl_var_key == "index": + var_new[getattr(adata_target_ids, k)] = self.adata.var.index.tolist() + else: + var_new[getattr(adata_target_ids, k)] = self.adata.var[self.gene_id_ensembl_var_key].tolist() + del self.adata.var[self.gene_id_ensembl_var_key] + self.gene_id_ensembl_var_key = getattr(adata_target_ids, k) + elif k == "gene_id_symbols": + if not self.gene_id_symbols_var_key: + raise ValueError("gene_id_symbols_var_key not set in dataloader despite being required by the " + "selected meta data schema. please run streamline_features() first to create the " + "missing annotation") + elif self.gene_id_symbols_var_key == "index": + var_new[getattr(adata_target_ids, k)] = self.adata.var.index.tolist() + else: + var_new[getattr(adata_target_ids, k)] = self.adata.var[self.gene_id_symbols_var_key].tolist() + del self.adata.var[self.gene_id_symbols_var_key] + self.gene_id_symbols_var_key = getattr(adata_target_ids, k) + else: + val = getattr(self, k) + while hasattr(val, '__len__') and not isinstance(val, str) and len(val) == 1: # unpack nested lists/tuples + val = val[0] + var_new[getattr(adata_target_ids, k)] = val + # set var index + var_new.index = var_new[adata_target_ids.gene_id_index].tolist() + + per_cell_labels = ["cell_types_original", "cell_ontology_class", "cell_ontology_id"] + experiment_batch_labels = ["bio_sample", "individual", "tech_sample"] + + # Prepare .obs column name dict (process keys below with other .uns keys if they're set dataset-wide) + obs_cols = {} + for k in adata_target_ids.obs_keys: + # Skip any per-cell labels for now and process them in the next code block + if k in per_cell_labels: + continue + else: + if hasattr(self, f"{k}_obs_key") and getattr(self, f"{k}_obs_key") is not None: + obs_cols[k] = (getattr(self, f"{k}_obs_key"), getattr(adata_target_ids, k)) + else: + adata_target_ids.uns_keys.append(k) + + # Prepare new .uns dict: + uns_new = {} + for k in adata_target_ids.uns_keys: + val = getattr(self, k) + while hasattr(val, '__len__') and not isinstance(val, str) and len(val) == 1: # unpack nested lists/tuples + val = val[0] + uns_new[getattr(adata_target_ids, k)] = val + + # Prepare new .obs dataframe + obs_new = pd.DataFrame(index=self.adata.obs.index) + for k, (old_col, new_col) in obs_cols.items(): + # Handle batch-annotation columns which can be provided as a combination of columns separated by an asterisk + if k in experiment_batch_labels and "*" in old_col: + batch_cols = [] + for batch_col in old_col.split("*"): + if batch_col in self.adata.obs_keys(): + batch_cols.append(batch_col) + else: + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + print(f"WARNING: attribute {new_col} of data set {self.id} was not found in column {batch_col}") + # Build a combination label out of all columns used to describe this group. + obs_new[new_col] = [ + "_".join([str(xxx) for xxx in xx]) + for xx in zip(*[self.adata.obs[batch_col].values.tolist() for batch_col in batch_cols]) + ] + setattr(self, f"{k}_obs_key", new_col) # update _obs_column attribute of this class to match the new column + # All other .obs fields are interpreted below as provided + else: + # Search for direct match of the sought-after column name or for attribute specific obs key. + if old_col in self.adata.obs_keys(): + # Include flag in .uns that this attribute is in .obs: + uns_new[new_col] = UNS_STRING_META_IN_OBS + # Remove potential pd.Categorical formatting: + ontology = getattr(self.ontology_container_sfaira, k) if hasattr(self.ontology_container_sfaira, k) else None + self._value_protection(attr=new_col, allowed=ontology, attempted=np.unique(self.adata.obs[old_col].values).tolist()) + obs_new[new_col] = self.adata.obs[old_col].values.tolist() + del self.adata.obs[old_col] + setattr(self, f"{k}_obs_key", new_col) # update _obs_column attribute of this class to match the new column + else: + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + uns_new[new_col] = None + print(f"WARNING: attribute {new_col} of data set {self.id} was not found in column {old_col}") + + # Set cell-wise attributes (.obs): (None so far other than celltypes.) + # Set cell types: + # Map cell type names from raw IDs to ontology maintained ones: + if self.cell_types_original_obs_key is not None: + obs_cl = self.project_celltypes_to_ontology(copy=True, adata_fields=adata_target_ids) + obs_new = pd.concat([obs_new, obs_cl], axis=1) + + # Add new annotation to adata and delete old fields if requested if clean_var: if self.adata.varm is not None: del self.adata.varm if self.adata.varp is not None: del self.adata.varp + self.adata.var = var_new + if "gene_id_ensembl" not in adata_target_ids.var_keys: + self.gene_id_ensembl_var_key = None + if "gene_id_symbols" not in adata_target_ids.var_keys: + self.gene_id_symbols_var_key = None + else: + self.adata.var = pd.concat([var_new, self.adata.var], axis=1, ignore_index=True) + self.adata.var.index = var_new.index if clean_obs: if self.adata.obsm is not None: del self.adata.obsm if self.adata.obsp is not None: del self.adata.obsp - # Only retain target elements in adata.uns: - uns_new = dict([ - (getattr(adata_fields, k), self.adata.uns[getattr(self._adata_ids_sfaira, k)]) - if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() - else (getattr(adata_fields, k), - np.unique(self.adata.obs[getattr(self._adata_ids_sfaira, k)].values).tolist()) - if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() - else (getattr(adata_fields, k), None) - for k in adata_fields.uns_keys - ]) + self.adata.obs = obs_new + else: + self.adata.obs = pd.concat([obs_new, self.adata.obs], axis=1, ignore_index=True) + self.adata.obs.index = obs_new.index + if clean_obs_names: + self.adata.obs.index = [f"{self.id}_{i}" for i in range(1, self.adata.n_obs + 1)] if clean_uns: - del self.adata.uns - # Remove old keys in sfaira scheme: - for k in adata_fields.uns_keys: - if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys(): - del self.adata.uns[getattr(self._adata_ids_sfaira, k)] - # Add new keys in new scheme: - for k, v in uns_new.items(): - self.adata.uns[k] = v - # Catch issues with data structures in uns that cannot be written to h5ad: - for k, v in self.adata.uns.items(): - replace = False - if isinstance(v, tuple) and len(v) == 1 and (isinstance(v[0], tuple) or isinstance(v[0], list)): - v = v[0] - replace = True - if isinstance(v, tuple) and len(v) == 1 and (isinstance(v[0], tuple) or isinstance(v[0], list)): - v = v[0] - replace = True - if replace: - if v == self._adata_ids_sfaira.unknown_metadata_identifier: - self.adata.uns[k] = adata_fields.unknown_metadata_identifier - else: - self.adata.uns[k] = v - # Only retain target elements in adata.var: - var_old = self.adata.var.copy() - self.adata.var = pd.DataFrame(dict([ - (getattr(adata_fields, k), self.adata.var[getattr(self._adata_ids_sfaira, k)]) - for k in adata_fields.var_keys - if getattr(self._adata_ids_sfaira, k) in self.adata.var.keys() - ])) - # Add old columns in if they are not overwritten and object is not cleaned: - if not clean_var: - for k, v in var_old.items(): - if k not in self.adata.var.keys(): - self.adata.var[k] = v - # Only retain target columns in adata.obs: - obs_old = self.adata.obs.copy() - self.adata.obs = pd.DataFrame( - data=dict([ - (getattr(adata_fields, k), self.adata.obs[getattr(self._adata_ids_sfaira, k)]) - if getattr(self._adata_ids_sfaira, k) in self.adata.obs.keys() - else (getattr(adata_fields, k), list(self.adata.uns[getattr(self._adata_ids_sfaira, k)])) - if getattr(self._adata_ids_sfaira, k) in self.adata.uns.keys() - else (getattr(adata_fields, k), adata_fields.unknown_metadata_identifier) - for k in adata_fields.obs_keys - ]), - index=self.adata.obs.index - ) - # Add old columns in if they are not overwritten and object is not cleaned: - if not clean_obs: - for k, v in obs_old.items(): - if k not in self.adata.obs.keys() and \ - k not in [getattr(self._adata_ids_sfaira, k) for k in adata_fields.obs_keys] and \ - k not in self._adata_ids_sfaira.obs_keys: - self.adata.obs[k] = v - # Add additional constant description changes based on output format: - if format == "cellxgene": + self.adata.uns = uns_new + else: + self.adata.uns = {**self.adata.uns, **uns_new} + + # Add additional hard-coded description changes for cellxgene schema: + if schema == "cellxgene": self.adata.uns["layer_descriptions"] = {"X": "raw"} self.adata.uns["version"] = { "corpora_encoding_version": "0.1.0", "corpora_schema_version": "1.1.0", } - for k in ["author", "doi", "download_url_data", "download_url_meta", "id", "year"]: - if k in self.adata.uns.keys(): - del self.adata.uns[k] # TODO port this into organism ontology handling. if self.organism == "mouse": self.adata.uns["organism"] = "Mus musculus" @@ -847,59 +822,50 @@ def streamline( self.adata.uns["organism"] = "Homo sapiens" self.adata.uns["organism_ontology_term_id"] = "NCBITaxon:9606" else: - assert False, self.organism + raise ValueError(f"organism {self.organism} currently not supported by cellxgene schema") # Add ontology IDs where necessary (note that human readable terms are also kept): - for k in [ - "organ", - "assay_sc", - "disease", - "ethnicity", - "development_stage", - ]: - if getattr(adata_fields, k) in self.adata.obs.columns: + for k in ["organ", "assay_sc", "disease", "ethnicity", "development_stage"]: + if getattr(adata_target_ids, k) in self.adata.obs.columns: self.__project_name_to_id_obs( - ontology=getattr(self._adata_ids_sfaira, k), - key_in=getattr(adata_fields, k), - key_out=getattr(adata_fields, k) + "_ontology_term_id", + ontology=getattr(adata_target_ids, k), + key_in=getattr(adata_target_ids, k), + key_out=getattr(adata_target_ids, k) + "_ontology_term_id", map_exceptions=[], - map_exceptions_value="", + map_exceptions_value=adata_target_ids.unknown_metadata_ontology_id_identifier, ) else: - self.adata.obs[getattr(adata_fields, k)] = adata_fields.unknown_metadata_identifier - self.adata.obs[getattr(adata_fields, k) + "_ontology_term_id"] = "" - # Clean up readable fields. - for k in [ - "organ", - "assay_sc", - "disease", - "ethnicity", - "development_stage", - "sex", - ]: - self.adata.obs[getattr(adata_fields, k)] = [ - x if x is not None else adata_fields.unknown_metadata_identifier - for x in self.adata.obs[getattr(adata_fields, k)].values - ] + self.adata.obs[getattr(adata_target_ids, k)] = adata_target_ids.unknown_metadata_identifier + self.adata.obs[getattr(adata_target_ids, k) + "_ontology_term_id"] = \ + adata_target_ids.unknown_metadata_ontology_id_identifier # Adapt var columns naming. - if self.organism == "mouse": + if self.organism == "human": gene_id_new = "hgnc_gene_symbol" - elif self.organism == "human": + elif self.organism == "mouse": gene_id_new = "mgi_gene_symbol" else: - assert False, self.organism - self.adata.var[gene_id_new] = self.adata.var[getattr(adata_fields, "gene_id_names")] - self.adata.var.index = self.adata.var[gene_id_new].values - if gene_id_new != getattr(adata_fields, "gene_id_names"): - del self.adata.var[getattr(adata_fields, "gene_id_names")] - if format != "sfaira": - # Remove sfaira intrinsic .uns fields: - keys_to_delete = ["load_raw", "mapped_features", "remove_gene_version", "annotated"] + raise ValueError(f"organism {self.organism} currently not supported") + self.adata.var[gene_id_new] = self.adata.var[getattr(adata_target_ids, "gene_id_symbols")] + self.adata.var.index = self.adata.var[gene_id_new].tolist() + if gene_id_new != self.gene_id_symbols_var_key: + del self.adata.var[self.gene_id_symbols_var_key] + self.gene_id_symbols_var_key = gene_id_new + + # Make sure that correct unknown_metadata_identifier is used in .uns, .obs and .var metadata + self.adata.obs = self.adata.obs.replace({None: adata_target_ids.unknown_metadata_identifier}) + self.adata.var = self.adata.var.replace({None: adata_target_ids.unknown_metadata_identifier}) + for k in self.adata.uns_keys(): + if self.adata.uns[k] is None: + self.adata.uns[k] = adata_target_ids.unknown_metadata_identifier + + # Move all uns annotation to obs columns if requested + if uns_to_obs: for k, v in self.adata.uns.items(): - if isinstance(v, str) and v == UNS_STRING_META_IN_OBS: - keys_to_delete.append(k) - for k in np.unique(keys_to_delete): - if k in self.adata.uns.keys(): - del self.adata.uns[k] + if k not in self.adata.obs_keys(): + self.adata.obs[k] = [v for i in range(self.adata.n_obs)] + self.adata.uns = {} + + self._adata_ids = adata_target_ids # set new adata fields to class after conversion + self.streamlined_meta = True def write_distributed_store( self, @@ -958,12 +924,7 @@ def write_backed( :param allow_caching: See .load(). :return: New row index for next element to be written into backed anndata. """ - self.load( - remove_gene_version=True, - match_to_reference=genome, - load_raw=load_raw, - allow_caching=allow_caching - ) + self.load(load_raw=load_raw, allow_caching=allow_caching) # Check if writing to sparse or dense matrix: if isinstance(adata_backed.X, np.ndarray) or \ isinstance(adata_backed.X, h5py._hl.dataset.Dataset): # backed dense @@ -977,8 +938,8 @@ def write_backed( adata_backed.X[np.sort(idx), :] = x_new[np.argsort(idx), :] for k in adata_backed.obs.columns: - if k == self._adata_ids_sfaira.dataset: - adata_backed.obs.loc[np.sort(idx), self._adata_ids_sfaira.dataset] = [ + if k == self._adata_ids.dataset: + adata_backed.obs.loc[np.sort(idx), self._adata_ids.dataset] = [ self.id for _ in range(len(idx))] elif k in self.adata.obs.columns: adata_backed.obs.loc[np.sort(idx), k] = self.adata.obs[k].values[np.argsort(idx)] @@ -999,7 +960,7 @@ def write_backed( adata_backed._n_obs = adata_backed.X.shape[0] # not automatically updated after append adata_backed.obs = adata_backed.obs.append( # .obs was not broadcasted to the right shape! pandas.DataFrame(dict([ - (k, [self.id for i in range(len(idx))]) if k == self._adata_ids_sfaira.dataset + (k, [self.id for i in range(len(idx))]) if k == self._adata_ids.dataset else (k, self.adata.obs[k].values[np.argsort(idx)]) if k in self.adata.obs.columns else (k, [self.adata.uns[k] for _ in range(len(idx))]) if k in list(self.adata.uns.keys()) else (k, ["key_not_found" for _ in range(len(idx))]) @@ -1041,7 +1002,7 @@ def write_ontology_class_map( if not self.annotated: warnings.warn(f"attempted to write ontology classmaps for data set {self.id} without annotation") else: - labels_original = np.sort(np.unique(self.adata.obs[self._adata_ids_sfaira.cell_types_original].values)) + labels_original = np.sort(np.unique(self.adata.obs[self._adata_ids.cell_types_original].values)) tab = self.celltypes_universe.prepare_celltype_map_tab( source=labels_original, match_only=False, @@ -1088,10 +1049,10 @@ def load_ontology_class_map(self, fn): if os.path.exists(fn): self.cell_ontology_map = self._read_class_map(fn=fn) else: - if self.cellontology_original_obs_key is not None: - warnings.warn(f"file {fn} does not exist but cellontology_original_obs_key is given") + if self.cell_types_original_obs_key is not None: + warnings.warn(f"file {fn} does not exist but cell_types_original_obs_key is given") - def project_celltypes_to_ontology(self): + def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = None, copy=False): """ Project free text cell type names to ontology based on mapping table. @@ -1099,79 +1060,102 @@ def project_celltypes_to_ontology(self): :return: """ - labels_original = self.adata.obs[self.cellontology_original_obs_key].values + adata_fields = adata_fields if adata_fields is not None else self._adata_ids + results = {} + labels_original = self.adata.obs[self.cell_types_original_obs_key].values if self.cell_ontology_map is not None: # only if this was defined labels_mapped = [ self.cell_ontology_map[x] if x in self.cell_ontology_map.keys() else x for x in labels_original ] - else: - labels_mapped = labels_original - # Validate mapped IDs based on ontology: - # This aborts with a readable error if there was a target in the mapping file that does not match the - # ontology. - if self.cell_ontology_map is not None: + # Convert unknown celltype placeholders (needs to be hardcoded here as placeholders are also hardcoded in + # conversion tsv files + placeholder_conversion = { + "UNKNOWN": adata_fields.unknown_celltype_identifier, + "NOT_A_CELL": adata_fields.not_a_cell_celltype_identifier, + } + labels_mapped = [ + placeholder_conversion[x] if x in placeholder_conversion.keys() + else x for x in labels_mapped + ] + # Validate mapped IDs based on ontology: + # This aborts with a readable error if there was a target in the mapping file that doesnt match the ontology # This protection blocks progression in the unit test if not deactivated. self._value_protection( attr="celltypes", allowed=self.ontology_celltypes, attempted=[ - x for x in np.unique(labels_mapped).tolist() + x for x in list(set(labels_mapped)) if x not in [ - self._adata_ids_sfaira.unknown_celltype_identifier, - self._adata_ids_sfaira.not_a_cell_celltype_identifier + adata_fields.unknown_celltype_identifier, + adata_fields.not_a_cell_celltype_identifier ] ] ) - self.adata.obs[self._adata_ids_sfaira.cell_ontology_class] = labels_mapped - self.cellontology_class_obs_key = self._adata_ids_sfaira.cell_ontology_class - self.adata.obs[self._adata_ids_sfaira.cell_types_original] = labels_original - # Add cell type IDs into object: - # The IDs are not read from a source file but inferred based on the class name. - # TODO this could be changed in the future, this allows this function to be used both on cell type name mapping - # files with and without the ID in the third column. - if self.cell_ontology_map is not None: + # Add cell type IDs into object: + # The IDs are not read from a source file but inferred based on the class name. + # TODO this could be changed in the future, this allows this function to be used both on cell type name + # mapping files with and without the ID in the third column. # This mapping blocks progression in the unit test if not deactivated. - self.__project_name_to_id_obs( + ids_mapped = self.__project_name_to_id_obs( ontology="cellontology_class", - key_in=self._adata_ids_sfaira.cell_ontology_class, - key_out=self._adata_ids_sfaira.cell_ontology_id, + key_in=labels_mapped, + key_out=None, map_exceptions=[ - self._adata_ids_sfaira.unknown_celltype_identifier, - self._adata_ids_sfaira.not_a_cell_celltype_identifier + adata_fields.unknown_celltype_identifier, + adata_fields.not_a_cell_celltype_identifier ], ) + results[adata_fields.cell_ontology_class] = labels_mapped + results[adata_fields.cell_ontology_id] = ids_mapped + else: + results[adata_fields.cell_ontology_class] = labels_original + results[adata_fields.cell_types_original] = labels_original + self.cellontology_class_obs_key = adata_fields.cell_ontology_class + self.cell_types_original_obs_key = adata_fields.cell_types_original + if copy: + return pd.DataFrame(results, index=self.adata.obs.index) + else: + for k, v in results.items(): + self.adata.obs[k] = v def __project_name_to_id_obs( self, ontology: str, - key_in: str, - key_out: str, + key_in: Union[str, list], + key_out: Union[str, None], map_exceptions: list, map_exceptions_value=None, ): """ Project ontology names to IDs for a given ontology in .obs entries. - :param ontology: - :param key_in: - :param key_out: - :param map_exceptions: - :param map_exceptions_value: + :param ontology: name of the ontology to use when converting to IDs + :param key_in: name of obs_column containing names to convert or python list containing these values + :param key_out: name of obs_column to write the IDs or None. If None, a python list with the new values will be returned + :param map_exceptions: list of values that should not be mapped + :param map_exceptions_value: placeholder target value for values excluded from mapping :return: """ ontology = getattr(self.ontology_container_sfaira, ontology) + assert isinstance(key_in, (str, list)), f"argument key_in needs to be of type str or list. Supplied" \ + f"type: {type(key_in)}" + input_values = self.adata.obs[key_in].values if isinstance(key_in, str) else key_in map_vals = dict([ (x, ontology.convert_to_id(x)) for x in np.unique([ - xx for xx in self.adata.obs[key_in].values + xx for xx in input_values if (xx not in map_exceptions and xx is not None) ]) ]) - self.adata.obs[key_out] = [ + output_values = [ map_vals[x] if x in map_vals.keys() else map_exceptions_value - for x in self.adata.obs[key_in].values + for x in input_values ] + if isinstance(key_out, str): + self.adata.obs[key_out] = output_values + else: + return output_values @property def citation(self): @@ -1244,52 +1228,47 @@ def write_meta( assert False, "bug in switch" if self.adata is None: - self.load( - remove_gene_version=False, - match_to_reference=None, - load_raw=True, - allow_caching=False, - ) + self.load(load_raw=True, allow_caching=False) # Add data-set wise meta data into table: meta = pandas.DataFrame({ - self._adata_ids_sfaira.annotated: self.adata.uns[self._adata_ids_sfaira.annotated], - self._adata_ids_sfaira.author: self.adata.uns[self._adata_ids_sfaira.author], - self._adata_ids_sfaira.doi: self.adata.uns[self._adata_ids_sfaira.doi], - self._adata_ids_sfaira.download_url_data: self.adata.uns[self._adata_ids_sfaira.download_url_data], - self._adata_ids_sfaira.download_url_meta: self.adata.uns[self._adata_ids_sfaira.download_url_meta], - self._adata_ids_sfaira.id: self.adata.uns[self._adata_ids_sfaira.id], - self._adata_ids_sfaira.ncells: self.adata.n_obs, - self._adata_ids_sfaira.normalization: self.adata.uns[self._adata_ids_sfaira.normalization], - self._adata_ids_sfaira.year: self.adata.uns[self._adata_ids_sfaira.year], + self._adata_ids.annotated: self.adata.uns[self._adata_ids.annotated], + self._adata_ids.author: self.adata.uns[self._adata_ids.author], + self._adata_ids.doi: self.adata.uns[self._adata_ids.doi], + self._adata_ids.download_url_data: self.adata.uns[self._adata_ids.download_url_data], + self._adata_ids.download_url_meta: self.adata.uns[self._adata_ids.download_url_meta], + self._adata_ids.id: self.adata.uns[self._adata_ids.id], + self._adata_ids.ncells: self.adata.n_obs, + self._adata_ids.normalization: self.adata.uns[self._adata_ids.normalization], + self._adata_ids.year: self.adata.uns[self._adata_ids.year], }, index=range(1)) # Expand table by variably cell-wise or data set-wise meta data: for x in [ - self._adata_ids_sfaira.assay_sc, - self._adata_ids_sfaira.assay_differentiation, - self._adata_ids_sfaira.assay_type_differentiation, - self._adata_ids_sfaira.bio_sample, - self._adata_ids_sfaira.cell_line, - self._adata_ids_sfaira.development_stage, - self._adata_ids_sfaira.ethnicity, - self._adata_ids_sfaira.individual, - self._adata_ids_sfaira.organ, - self._adata_ids_sfaira.organism, - self._adata_ids_sfaira.sample_source, - self._adata_ids_sfaira.sex, - self._adata_ids_sfaira.state_exact, - self._adata_ids_sfaira.tech_sample, + self._adata_ids.assay_sc, + self._adata_ids.assay_differentiation, + self._adata_ids.assay_type_differentiation, + self._adata_ids.bio_sample, + self._adata_ids.cell_line, + self._adata_ids.development_stage, + self._adata_ids.ethnicity, + self._adata_ids.individual, + self._adata_ids.organ, + self._adata_ids.organism, + self._adata_ids.sample_source, + self._adata_ids.sex, + self._adata_ids.state_exact, + self._adata_ids.tech_sample, ]: if self.adata.uns[x] == UNS_STRING_META_IN_OBS: meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) else: meta[x] = self.adata.uns[x] # Add cell types into table if available: - if self._adata_ids_sfaira.cell_ontology_class in self.adata.obs.keys(): - meta[self._adata_ids_sfaira.cell_ontology_class] = str(( - np.sort(np.unique(self.adata.obs[self._adata_ids_sfaira.cell_ontology_class].values)), + if self._adata_ids.cell_ontology_class in self.adata.obs.keys(): + meta[self._adata_ids.cell_ontology_class] = str(( + np.sort(np.unique(self.adata.obs[self._adata_ids.cell_ontology_class].values)), )) else: - meta[self._adata_ids_sfaira.cell_ontology_class] = " " + meta[self._adata_ids.cell_ontology_class] = " " meta.to_csv(fn_meta) def set_dataset_id( @@ -1332,13 +1311,13 @@ def additional_annotation_key(self, x: str): @property def annotated(self) -> Union[bool, None]: - if self.cellontology_id_obs_key is not None or self.cellontology_original_obs_key is not None: + if self.cellontology_id_obs_key is not None or self.cell_types_original_obs_key is not None: return True else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.annotated in self.meta.columns: - return self.meta[self._adata_ids_sfaira.annotated].values[0] + if self.meta is not None and self._adata_ids.annotated in self.meta.columns: + return self.meta[self._adata_ids.annotated].values[0] elif self.loaded: # If data set was loaded and there is still no annotation indicated, it is declared unannotated. return False @@ -1354,14 +1333,13 @@ def assay_sc(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.assay_sc in self.meta.columns: - return self.meta[self._adata_ids_sfaira.assay_sc] + if self.meta is not None and self._adata_ids.assay_sc in self.meta.columns: + return self.meta[self._adata_ids.assay_sc] else: return None @assay_sc.setter def assay_sc(self, x: str): - self.__erasing_protection(attr="assay_sc", val_old=self._assay_sc, val_new=x) x = self._value_protection(attr="assay_sc", allowed=self.ontology_container_sfaira.assay_sc, attempted=x) self._assay_sc = x @@ -1372,14 +1350,13 @@ def assay_differentiation(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.assay_differentiation in self.meta.columns: - return self.meta[self._adata_ids_sfaira.assay_differentiation] + if self.meta is not None and self._adata_ids.assay_differentiation in self.meta.columns: + return self.meta[self._adata_ids.assay_differentiation] else: return None @assay_differentiation.setter def assay_differentiation(self, x: str): - self.__erasing_protection(attr="assay_differentiation", val_old=self._assay_differentiation, val_new=x) x = self._value_protection(attr="assay_differentiation", allowed=self.ontology_container_sfaira.assay_differentiation, attempted=x) self._assay_differentiation = x @@ -1391,14 +1368,13 @@ def assay_type_differentiation(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.assay_type_differentiation in self.meta.columns: - return self.meta[self._adata_ids_sfaira.assay_type_differentiation] + if self.meta is not None and self._adata_ids.assay_type_differentiation in self.meta.columns: + return self.meta[self._adata_ids.assay_type_differentiation] else: return None @assay_type_differentiation.setter def assay_type_differentiation(self, x: str): - self.__erasing_protection(attr="assay_type_differentiation", val_old=self._assay_type_differentiation, val_new=x) x = self._value_protection(attr="assay_type_differentiation", allowed=self.ontology_container_sfaira.assay_type_differentiation, attempted=x) self._assay_type_differentiation = x @@ -1410,14 +1386,13 @@ def bio_sample(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.bio_sample in self.meta.columns: - return self.meta[self._adata_ids_sfaira.bio_sample] + if self.meta is not None and self._adata_ids.bio_sample in self.meta.columns: + return self.meta[self._adata_ids.bio_sample] else: return None @bio_sample.setter def bio_sample(self, x: str): - self.__erasing_protection(attr="bio_sample", val_old=self._bio_sample, val_new=x) self._bio_sample = x @property @@ -1427,14 +1402,13 @@ def cell_line(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.cell_line in self.meta.columns: - return self.meta[self._adata_ids_sfaira.cell_line] + if self.meta is not None and self._adata_ids.cell_line in self.meta.columns: + return self.meta[self._adata_ids.cell_line] else: return None @cell_line.setter def cell_line(self, x: str): - self.__erasing_protection(attr="cell_line", val_old=self._cell_line, val_new=x) self._cell_line = x @property @@ -1457,14 +1431,13 @@ def default_embedding(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.default_embedding in self.meta.columns: - return self.meta[self._adata_ids_sfaira.default_embedding] + if self.meta is not None and self._adata_ids.default_embedding in self.meta.columns: + return self.meta[self._adata_ids.default_embedding] else: return None @default_embedding.setter def default_embedding(self, x: str): - self.__erasing_protection(attr="default_embedding", val_old=self._development_stage, val_new=x) x = self._value_protection(attr="default_embedding", allowed=self.ontology_container_sfaira.default_embedding, attempted=x) self._default_embedding = x @@ -1476,14 +1449,13 @@ def development_stage(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.development_stage in self.meta.columns: - return self.meta[self._adata_ids_sfaira.development_stage] + if self.meta is not None and self._adata_ids.development_stage in self.meta.columns: + return self.meta[self._adata_ids.development_stage] else: return None @development_stage.setter def development_stage(self, x: str): - self.__erasing_protection(attr="development_stage", val_old=self._development_stage, val_new=x) x = self._value_protection(attr="development_stage", allowed=self.ontology_container_sfaira.development_stage, attempted=x) self._development_stage = x @@ -1495,14 +1467,13 @@ def disease(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.disease in self.meta.columns: - return self.meta[self._adata_ids_sfaira.disease] + if self.meta is not None and self._adata_ids.disease in self.meta.columns: + return self.meta[self._adata_ids.disease] else: return None @disease.setter def disease(self, x: str): - self.__erasing_protection(attr="disease", val_old=self._disease, val_new=x) x = self._value_protection(attr="disease", allowed=self.ontology_container_sfaira.disease, attempted=x) self._disease = x @@ -1514,13 +1485,12 @@ def doi(self) -> Union[str, List[str]]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is None or self._adata_ids_sfaira.doi not in self.meta.columns: + if self.meta is None or self._adata_ids.doi not in self.meta.columns: raise ValueError("doi must be set but was neither set in constructor nor in meta data") - return self.meta[self._adata_ids_sfaira.doi] + return self.meta[self._adata_ids.doi] @doi.setter def doi(self, x: Union[str, List[str]]): - self.__erasing_protection(attr="doi", val_old=self._doi, val_new=x) self._doi = x @property @@ -1548,7 +1518,7 @@ def download_url_data(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[self._adata_ids_sfaira.download_url_data] + x = self.meta[self._adata_ids.download_url_data] if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): @@ -1557,7 +1527,6 @@ def download_url_data(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: @download_url_data.setter def download_url_data(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): - self.__erasing_protection(attr="download_url_data", val_old=self._download_url_data, val_new=x) # Formats to tuple with single element, which is a list of all download websites relevant to dataset, # which can be used as a single element column in a pandas data frame. if isinstance(x, str) or x is None: @@ -1580,7 +1549,7 @@ def download_url_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: # else: # if self.meta is None: # self.load_meta(fn=None) - # x = self.meta[self._ADATA_IDS_SFAIRA.download_url_meta] + # x = self.meta[self._adata_ids.download_url_meta] if isinstance(x, str) or x is None: x = [x] if isinstance(x, list): @@ -1589,7 +1558,6 @@ def download_url_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: @download_url_meta.setter def download_url_meta(self, x: Union[str, None, List[str], Tuple[str], List[None], Tuple[None]]): - self.__erasing_protection(attr="download_url_meta", val_old=self._download_url_meta, val_new=x) # Formats to tuple with single element, which is a list of all download websites relevant to dataset, # which can be used as a single element column in a pandas data frame. if isinstance(x, str) or x is None: @@ -1605,14 +1573,13 @@ def ethnicity(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.ethnicity in self.meta.columns: - return self.meta[self._adata_ids_sfaira.ethnicity] + if self.meta is not None and self._adata_ids.ethnicity in self.meta.columns: + return self.meta[self._adata_ids.ethnicity] else: return None @ethnicity.setter def ethnicity(self, x: str): - self.__erasing_protection(attr="ethnicity", val_old=self._ethnicity, val_new=x) x = self._value_protection(attr="ethnicity", allowed=self.ontology_container_sfaira.ethnicity, attempted=x) self._ethnicity = x @@ -1626,7 +1593,6 @@ def id(self) -> str: @id.setter def id(self, x: str): - self.__erasing_protection(attr="id", val_old=self._id, val_new=x) self._id = x @property @@ -1636,14 +1602,13 @@ def individual(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.individual in self.meta.columns: - return self.meta[self._adata_ids_sfaira.individual] + if self.meta is not None and self._adata_ids.individual in self.meta.columns: + return self.meta[self._adata_ids.individual] else: return None @individual.setter def individual(self, x: str): - self.__erasing_protection(attr="bio_sample", val_old=self._individual, val_new=x) self._individual = x @property @@ -1683,7 +1648,7 @@ def ncells(self) -> int: else: if self.meta is None: self.load_meta(fn=None) - x = self.meta[self._adata_ids_sfaira.ncells] + x = self.meta[self._adata_ids.ncells] return int(x) @property @@ -1693,14 +1658,13 @@ def normalization(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.normalization in self.meta.columns: - return self.meta[self._adata_ids_sfaira.normalization] + if self.meta is not None and self._adata_ids.normalization in self.meta.columns: + return self.meta[self._adata_ids.normalization] else: return None @normalization.setter def normalization(self, x: str): - self.__erasing_protection(attr="normalization", val_old=self._normalization, val_new=x) x = self._value_protection(attr="normalization", allowed=self.ontology_container_sfaira.normalization, attempted=x) self._normalization = x @@ -1712,14 +1676,13 @@ def primary_data(self) -> Union[None, bool]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.primary_data in self.meta.columns: - return self.meta[self._adata_ids_sfaira.primary_data] + if self.meta is not None and self._adata_ids.primary_data in self.meta.columns: + return self.meta[self._adata_ids.primary_data] else: return None @primary_data.setter def primary_data(self, x: bool): - self.__erasing_protection(attr="primary_data", val_old=self._primary_data, val_new=x) x = self._value_protection(attr="primary_data", allowed=self.ontology_container_sfaira.primary_data, attempted=x) self._primary_data = x @@ -1730,7 +1693,6 @@ def assay_sc_obs_key(self) -> str: @assay_sc_obs_key.setter def assay_sc_obs_key(self, x: str): - self.__erasing_protection(attr="assay_sc_obs_key", val_old=self._assay_sc_obs_key, val_new=x) self._assay_sc_obs_key = x @property @@ -1739,7 +1701,6 @@ def assay_differentiation_obs_key(self) -> str: @assay_differentiation_obs_key.setter def assay_differentiation_obs_key(self, x: str): - self.__erasing_protection(attr="assay_differentiation_obs_key", val_old=self._assay_differentiation_obs_key, val_new=x) self._assay_differentiation_obs_key = x @property @@ -1748,7 +1709,6 @@ def assay_type_differentiation_obs_key(self) -> str: @assay_type_differentiation_obs_key.setter def assay_type_differentiation_obs_key(self, x: str): - self.__erasing_protection(attr="assay_type_differentiation_otype_bs_key", val_old=self._assay_differentiation_obs_key, val_new=x) self._assay_type_differentiation_obs_key = x @property @@ -1757,7 +1717,6 @@ def bio_sample_obs_key(self) -> str: @bio_sample_obs_key.setter def bio_sample_obs_key(self, x: str): - self.__erasing_protection(attr="bio_sample_obs_key", val_old=self._bio_sample_obs_key, val_new=x) self._bio_sample_obs_key = x @property @@ -1766,7 +1725,6 @@ def cell_line_obs_key(self) -> str: @cell_line_obs_key.setter def cell_line_obs_key(self, x: str): - self.__erasing_protection(attr="cell_line_obs_key", val_old=self._cell_line_obs_key, val_new=x) self._cell_line_obs_key = x @property @@ -1786,14 +1744,12 @@ def cellontology_id_obs_key(self, x: str): self._cellontology_id_obs_key = x @property - def cellontology_original_obs_key(self) -> str: - return self._cellontology_original_obs_key + def cell_types_original_obs_key(self) -> str: + return self._cell_types_original_obs_key - @cellontology_original_obs_key.setter - def cellontology_original_obs_key(self, x: str): - self.__erasing_protection(attr="cellontology_original_obs_key", val_old=self._cellontology_original_obs_key, - val_new=x) - self._cellontology_original_obs_key = x + @cell_types_original_obs_key.setter + def cell_types_original_obs_key(self, x: str): + self._cell_types_original_obs_key = x @property def development_stage_obs_key(self) -> str: @@ -1801,7 +1757,6 @@ def development_stage_obs_key(self) -> str: @development_stage_obs_key.setter def development_stage_obs_key(self, x: str): - self.__erasing_protection(attr="development_stage_obs_key", val_old=self._development_stage_obs_key, val_new=x) self._development_stage_obs_key = x @property @@ -1810,7 +1765,6 @@ def disease_obs_key(self) -> str: @disease_obs_key.setter def disease_obs_key(self, x: str): - self.__erasing_protection(attr="disease_obs_key", val_old=self._disease_obs_key, val_new=x) self._disease_obs_key = x @property @@ -1819,7 +1773,6 @@ def ethnicity_obs_key(self) -> str: @ethnicity_obs_key.setter def ethnicity_obs_key(self, x: str): - self.__erasing_protection(attr="ethnicity_obs_key", val_old=self._ethnicity_obs_key, val_new=x) self._ethnicity_obs_key = x @property @@ -1828,7 +1781,6 @@ def individual_obs_key(self) -> str: @individual_obs_key.setter def individual_obs_key(self, x: str): - self.__erasing_protection(attr="individual_obs_key", val_old=self._individual_obs_key, val_new=x) self._individual_obs_key = x @property @@ -1837,7 +1789,6 @@ def organ_obs_key(self) -> str: @organ_obs_key.setter def organ_obs_key(self, x: str): - self.__erasing_protection(attr="organ_obs_key", val_old=self._organ_obs_key, val_new=x) self._organ_obs_key = x @property @@ -1846,7 +1797,6 @@ def organism_obs_key(self) -> str: @organism_obs_key.setter def organism_obs_key(self, x: str): - self.__erasing_protection(attr="organism_obs_key", val_old=self._organism_obs_key, val_new=x) self._organism_obs_key = x @property @@ -1855,7 +1805,6 @@ def sample_source_obs_key(self) -> str: @sample_source_obs_key.setter def sample_source_obs_key(self, x: str): - self.__erasing_protection(attr="sample_source_obs_key", val_old=self._sample_source_obs_key, val_new=x) self._sample_source_obs_key = x @property @@ -1864,7 +1813,6 @@ def sex_obs_key(self) -> str: @sex_obs_key.setter def sex_obs_key(self, x: str): - self.__erasing_protection(attr="sex_obs_key", val_old=self._sex_obs_key, val_new=x) self._sex_obs_key = x @property @@ -1873,7 +1821,6 @@ def state_exact_obs_key(self) -> str: @state_exact_obs_key.setter def state_exact_obs_key(self, x: str): - self.__erasing_protection(attr="state_exact_obs_key", val_old=self._state_exact_obs_key, val_new=x) self._state_exact_obs_key = x @property @@ -1882,7 +1829,6 @@ def tech_sample_obs_key(self) -> str: @tech_sample_obs_key.setter def tech_sample_obs_key(self, x: str): - self.__erasing_protection(attr="tech_sample_obs_key", val_old=self._tech_sample_obs_key, val_new=x) self._tech_sample_obs_key = x @property @@ -1892,14 +1838,13 @@ def organ(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.organ in self.meta.columns: - return self.meta[self._adata_ids_sfaira.organ] + if self.meta is not None and self._adata_ids.organ in self.meta.columns: + return self.meta[self._adata_ids.organ] else: return None @organ.setter def organ(self, x: str): - self.__erasing_protection(attr="organ", val_old=self._organ, val_new=x) x = self._value_protection(attr="organ", allowed=self.ontology_container_sfaira.organ, attempted=x) self._organ = x @@ -1910,14 +1855,13 @@ def organism(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.organism in self.meta.columns: - return self.meta[self._adata_ids_sfaira.organism] + if self.meta is not None and self._adata_ids.organism in self.meta.columns: + return self.meta[self._adata_ids.organism] else: return None @organism.setter def organism(self, x: str): - self.__erasing_protection(attr="organism", val_old=self._organism, val_new=x) x = self._value_protection(attr="organism", allowed=self.ontology_container_sfaira.organism, attempted=x) self._organism = x @@ -1928,14 +1872,13 @@ def sample_source(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.sample_source in self.meta.columns: - return self.meta[self._adata_ids_sfaira.sample_source] + if self.meta is not None and self._adata_ids.sample_source in self.meta.columns: + return self.meta[self._adata_ids.sample_source] else: return None @sample_source.setter def sample_source(self, x: str): - self.__erasing_protection(attr="sample_source", val_old=self._sample_source, val_new=x) x = self._value_protection(attr="sample_source", allowed=self.ontology_container_sfaira.sample_source, attempted=x) self._sample_source = x @@ -1947,14 +1890,13 @@ def sex(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.sex in self.meta.columns: - return self.meta[self._adata_ids_sfaira.sex] + if self.meta is not None and self._adata_ids.sex in self.meta.columns: + return self.meta[self._adata_ids.sex] else: return None @sex.setter def sex(self, x: str): - self.__erasing_protection(attr="sex", val_old=self._sex, val_new=x) x = self._value_protection(attr="sex", allowed=self.ontology_container_sfaira.sex, attempted=x) self._sex = x @@ -1964,7 +1906,6 @@ def source(self) -> str: @source.setter def source(self, x: Union[str, None]): - self.__erasing_protection(attr="source", val_old=self._source, val_new=x) self._source = x @property @@ -1974,14 +1915,13 @@ def state_exact(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.state_exact in self.meta.columns: - return self.meta[self._adata_ids_sfaira.state_exact] + if self.meta is not None and self._adata_ids.state_exact in self.meta.columns: + return self.meta[self._adata_ids.state_exact] else: return None @state_exact.setter def state_exact(self, x: str): - self.__erasing_protection(attr="state_exact", val_old=self._state_exact, val_new=x) self._state_exact = x @property @@ -1991,33 +1931,30 @@ def tech_sample(self) -> Union[None, str]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.tech_sample in self.meta.columns: - return self.meta[self._adata_ids_sfaira.tech_sample] + if self.meta is not None and self._adata_ids.tech_sample in self.meta.columns: + return self.meta[self._adata_ids.tech_sample] else: return None @tech_sample.setter def tech_sample(self, x: str): - self.__erasing_protection(attr="tech_sample", val_old=self._tech_sample, val_new=x) self._tech_sample = x @property - def var_ensembl_col(self) -> str: - return self._var_ensembl_col + def gene_id_ensembl_var_key(self) -> str: + return self._gene_id_ensembl_var_key - @var_ensembl_col.setter - def var_ensembl_col(self, x: str): - self.__erasing_protection(attr="var_ensembl_col", val_old=self._var_ensembl_col, val_new=x) - self._var_ensembl_col = x + @gene_id_ensembl_var_key.setter + def gene_id_ensembl_var_key(self, x: str): + self._gene_id_ensembl_var_key = x @property - def var_symbol_col(self) -> str: - return self._var_symbol_col + def gene_id_symbols_var_key(self) -> str: + return self._gene_id_symbols_var_key - @var_symbol_col.setter - def var_symbol_col(self, x: str): - self.__erasing_protection(attr="var_symbol_col", val_old=self._var_symbol_col, val_new=x) - self._var_symbol_col = x + @gene_id_symbols_var_key.setter + def gene_id_symbols_var_key(self, x: str): + self._gene_id_symbols_var_key = x @property def year(self) -> Union[None, int]: @@ -2026,14 +1963,13 @@ def year(self) -> Union[None, int]: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.year in self.meta.columns: - return self.meta[self._adata_ids_sfaira.year] + if self.meta is not None and self._adata_ids.year in self.meta.columns: + return self.meta[self._adata_ids.year] else: return None @year.setter def year(self, x: int): - self.__erasing_protection(attr="year", val_old=self._year, val_new=x) x = self._value_protection(attr="year", allowed=self.ontology_container_sfaira.year, attempted=x) self._year = x @@ -2060,22 +1996,21 @@ def cell_ontology_map(self) -> dict: @cell_ontology_map.setter def cell_ontology_map(self, x: pd.DataFrame): - self.__erasing_protection(attr="ontology_class_map", val_old=self._ontology_class_map, val_new=x) assert x.shape[1] in [2, 3], f"{x.shape} in {self.id}" - assert x.columns[0] == self._adata_ids_sfaira.classmap_source_key - assert x.columns[1] == self._adata_ids_sfaira.classmap_target_key + assert x.columns[0] == self._adata_ids.classmap_source_key + assert x.columns[1] == self._adata_ids.classmap_target_key # Check for weird entries: # nan arises if columns was empty in that row. nan_vals = np.where([ False if isinstance(x, str) else (np.isnan(x) or x is None) - for x in x[self._adata_ids_sfaira.classmap_target_key].values.tolist() + for x in x[self._adata_ids.classmap_target_key].values.tolist() ])[0] assert len(nan_vals) == 0, \ - f"Found nan target values in {self.id} for {x[self._adata_ids_sfaira.classmap_target_key].values[nan_vals]}" + f"Found nan target values in {self.id} for {x[self._adata_ids.classmap_target_key].values[nan_vals]}" # Transform data frame into a mapping dictionary: self._ontology_class_map = dict(list(zip( - x[self._adata_ids_sfaira.classmap_source_key].values.tolist(), - x[self._adata_ids_sfaira.classmap_target_key].values.tolist() + x[self._adata_ids.classmap_source_key].values.tolist(), + x[self._adata_ids.classmap_target_key].values.tolist() ))) def __crossref_query(self, k): @@ -2115,13 +2050,12 @@ def author(self) -> str: else: if self.meta is None: self.load_meta(fn=None) - if self.meta is None or self._adata_ids_sfaira.author not in self.meta.columns: + if self.meta is None or self._adata_ids.author not in self.meta.columns: raise ValueError("author must be set but was neither set in constructor nor in meta data") - return self.meta[self._adata_ids_sfaira.author] + return self.meta[self._adata_ids.author] @author.setter def author(self, x: str): - self.__erasing_protection(attr="author", val_old=self._author, val_new=x) self._author = x @property @@ -2131,25 +2065,13 @@ def title(self): else: if self.meta is None: self.load_meta(fn=None) - if self.meta is not None and self._adata_ids_sfaira.title in self.meta.columns: - return self.meta[self._adata_ids_sfaira.title] + if self.meta is not None and self._adata_ids.title in self.meta.columns: + return self.meta[self._adata_ids.title] else: return self.__crossref_query(k="title") # Private methods: - def __erasing_protection(self, attr, val_old, val_new): - """ - This is called when a erasing protected attribute is set to check whether it was set before. - - :param attr: Attribute to be set. - :param val_old: Old value for attribute to be set. - :param val_new: New value for attribute to be set. - """ - if val_old is not None: - raise ValueError(f"attempted to set erasing protected attribute {attr}: " - f"previously was {str(val_old)}, attempted to set {str(val_new)}") - def _value_protection( self, attr: str, diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 46cadc4c3..fc710b894 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -14,7 +14,7 @@ from sfaira.data.base.dataset import is_child, DatasetBase from sfaira.versions.genomes import GenomeContainer -from sfaira.consts import AdataIdsSfaira +from sfaira.consts import AdataIds, AdataIdsSfaira from sfaira.data.utils import read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -27,17 +27,10 @@ def map_fn(inputs): :param inputs: :return: None if function ran, error report otherwise """ - ds, remove_gene_version, match_to_reference, load_raw, allow_caching, set_metadata, kwargs, func, kwargs_func = \ + ds, load_raw, allow_caching, kwargs, func, kwargs_func = \ inputs try: - ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=load_raw, - allow_caching=allow_caching, - set_metadata=set_metadata, - **kwargs - ) + ds.load(load_raw=load_raw, allow_caching=allow_caching, **kwargs) if func is not None: x = func(ds, **kwargs_func) ds.clear() @@ -74,7 +67,7 @@ class DatasetGroup: datasets: Dict[str, DatasetBase] def __init__(self, datasets: dict): - self._adata_ids_sfaira = AdataIdsSfaira() + self._adata_ids = AdataIdsSfaira() self.datasets = datasets @property @@ -84,11 +77,8 @@ def _unknown_celltype_identifiers(self): def load( self, annotated_only: bool = False, - remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, load_raw: bool = False, allow_caching: bool = True, - set_metadata: bool = True, processes: int = 1, func=None, kwargs_func: Union[None, dict] = None, @@ -103,6 +93,8 @@ def load( In this setting, datasets are removed from memory after the function has been executed. :param annotated_only: + :param load_raw: + :param allow_caching: :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. :param func: Function to run on loaded datasets. map_fun should only take one argument, which is a Dataset instance. The return can be empty: @@ -113,11 +105,8 @@ def func(dataset, **kwargs_func): :param kwargs_func: Kwargs of func. """ args = [ - remove_gene_version, - match_to_reference, load_raw, allow_caching, - set_metadata, kwargs, func, kwargs_func @@ -150,44 +139,62 @@ def func(dataset, **kwargs_func): load.__doc__ += load_doc - def streamline( + def streamline_metadata( self, - format: str = "sfaira", - allow_uns_sfaira: bool = False, + schema: str = "sfaira", + uns_to_obs: bool = False, clean_obs: bool = True, clean_var: bool = True, - clean_uns: bool = True + clean_uns: bool = True, + clean_obs_names: bool = True ): """ Streamline the adata instance in each data set to output format. - Output format are saved in ADATA_FIELDS* classes. - :param format: Export format. - + :param schema: Export format. - "sfaira" - "cellxgene" - :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param uns_to_obs: Whether to move metadata in .uns to .obs to make sure it's not lost when concatenating multiple objects. :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. :param clean_uns: Whether to delete non-streamlined fields in .uns. + :param clean_obs_names: Whether to replace obs_names with a string comprised of dataset id and an increasing integer. :return: """ for x in self.ids: - self.datasets[x].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, - clean_var=clean_var, clean_uns=clean_uns) + self.datasets[x].streamline_metadata( + schema=schema, + uns_to_obs=uns_to_obs, + clean_obs=clean_obs, + clean_var=clean_var, + clean_uns=clean_uns, + clean_obs_names=clean_obs_names + ) - def subset_genes(self, subset_type: Union[None, str, List[str]] = None): + def streamline_features( + self, + remove_gene_version: bool = True, + match_to_reference: Union[str, bool, None] = None, + subset_genes_to_type: Union[None, str, List[str]] = None, + ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - - :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. + :param match_to_reference: Whether to map gene names to a given annotation. Can be: + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - None: use the default annotation for this organism in sfaira. + - False: no mapping of gene labels will be done. + :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. """ for x in self.ids: - self.datasets[x].subset_genes(subset_type=subset_type) + self.datasets[x].streamline_features( + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + subset_genes_to_type=subset_genes_to_type, + ) def write_distributed_store( self, @@ -270,7 +277,7 @@ def write_ontology_class_map( for k, v in self.datasets.items(): if v.annotated: labels_original = np.sort(np.unique(np.concatenate([ - v.adata.obs[self._adata_ids_sfaira.cell_types_original].values + v.adata.obs[self._adata_ids.cell_types_original].values ]))) tab.append(v.celltypes_universe.prepare_celltype_map_tab( source=labels_original, @@ -286,7 +293,7 @@ def write_ontology_class_map( tab = pandas.concat(tab, axis=0) # Take out columns with the same source: tab = tab.loc[[x not in tab.iloc[:i, 0].values for i, x in enumerate(tab.iloc[:, 0].values)], :].copy() - tab = tab.sort_values(self._adata_ids_sfaira.classmap_source_key) + tab = tab.sort_values(self._adata_ids.classmap_source_key) if not os.path.exists(fn) or not protected_writing: tab.to_csv(fn, index=False, sep="\t") @@ -312,43 +319,63 @@ def adata(self): adata_ls = self.adata_ls if not adata_ls: return None - self.streamline(format="sfaira", allow_uns_sfaira=False, clean_obs=True, clean_var=True, clean_uns=True) + # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata + match_ref_list = [] + rm_gene_ver_list = [] + gene_type_list = [] + for d_id in self.ids: + if self.datasets[d_id].adata is not None: + assert self.datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ + f"featurespace. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have a " \ + f"streamlined featurespace. Run .streamline_features()" \ + f" first." + assert self.datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ + f"metadata. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have " \ + f"streamlined metadata. Run .streamline_metadata() first." + match_ref_list.append(self.datasets[d_id].mapped_features) + rm_gene_ver_list.append(self.datasets[d_id].remove_gene_version) + gene_type_list.append(self.datasets[d_id].subset_gene_type) + assert len(set(match_ref_list)) == 1, \ + "Not all datasets in this group had their features matched to the same reference (argument " \ + "'match_to_reference' of method .streamline_features())." \ + "This is however a prerequisite for creating a combined adata object." + assert len(set(rm_gene_ver_list)) == 1, \ + "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ + "method .streamline_features()). This is however a prerequisite for creating a combined adata object." + assert len(set(gene_type_list)) == 1, \ + "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ + "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." - # .var entries are renamed and copied upon concatenation. - # To preserve gene names in .var, the target gene names are copied into var_names and are then copied - # back into .var. - for adata in adata_ls: - adata.var.index = adata.var[self._adata_ids_sfaira.gene_id_ensembl].tolist() if len(adata_ls) > 1: + var_original = adata_ls[0].var.copy() + for a in adata_ls: + a.var_names_make_unique() # TODO: need to keep this? -> yes, still catching errors here (March 2020) # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. try: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=self._adata_ids_sfaira.dataset, - batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] + batch_key=self._adata_ids.dataset, + batch_categories=[i for i in self.ids if self.datasets[i].adata is not None], + index_unique=None ) except ValueError: adata_concat = adata_ls[0].concatenate( *adata_ls[1:], join="outer", - batch_key=self._adata_ids_sfaira.dataset, - batch_categories=[i for i in self.ids if self.datasets[i].adata is not None] + batch_key=self._adata_ids.dataset, + batch_categories=[i for i in self.ids if self.datasets[i].adata is not None], + index_unique=None ) - - adata_concat.var[self._adata_ids_sfaira.gene_id_ensembl] = adata_concat.var.index - - if len(set([a.uns[self._adata_ids_sfaira.mapped_features] for a in adata_ls])) == 1: - adata_concat.uns[self._adata_ids_sfaira.mapped_features] = \ - adata_ls[0].uns[self._adata_ids_sfaira.mapped_features] - else: - adata_concat.uns[self._adata_ids_sfaira.mapped_features] = False + adata_concat.var = var_original + adata_concat.uns[self._adata_ids.mapped_features] = match_ref_list[0] else: adata_concat = adata_ls[0] - adata_concat.obs[self._adata_ids_sfaira.dataset] = self.ids[0] - - adata_concat.var_names_make_unique() + adata_concat.obs[self._adata_ids.dataset] = adata_ls[0].uns['id'] return adata_concat def obs_concat(self, keys: Union[list, None] = None): @@ -367,7 +394,7 @@ def obs_concat(self, keys: Union[list, None] = None): (k, self.datasets[x].adata.obs[k]) if k in self.datasets[x].adata.obs.columns else (k, ["nan" for _ in range(self.datasets[x].adata.obs.shape[0])]) for k in keys - ] + [(self._adata_ids_sfaira.dataset, [x for _ in range(self.datasets[x].adata.obs.shape[0])])] + ] + [(self._adata_ids.dataset, [x for _ in range(self.datasets[x].adata.obs.shape[0])])] )) for x in self.ids if self.datasets[x].adata is not None]) return obs_concat @@ -395,13 +422,13 @@ def ontology_celltypes(self): "type ontology. Using only the ontology of the first data set in the group.") return self.datasets[self.ids[0]].ontology_celltypes - def project_celltypes_to_ontology(self): + def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = None, copy=False): """ Project free text cell type names to ontology based on mapping table. :return: """ for _, v in self.datasets.items(): - v.project_celltypes_to_ontology() + v.project_celltypes_to_ontology(adata_fields=adata_fields, copy=copy) def subset(self, key, values: Union[list, tuple, np.ndarray]): """ @@ -656,20 +683,20 @@ def clean_ontology_class_map(self): attr="celltypes", allowed=self.ontology_celltypes, attempted=[ - x for x in np.unique(tab[self._adata_ids_sfaira.classmap_target_key].values).tolist() + x for x in np.unique(tab[self._adata_ids.classmap_target_key].values).tolist() if x not in [ - self._adata_ids_sfaira.unknown_celltype_identifier, - self._adata_ids_sfaira.not_a_cell_celltype_identifier + self._adata_ids.unknown_celltype_identifier, + self._adata_ids.not_a_cell_celltype_identifier ] ] ) # Adds a third column with the corresponding ontology IDs into the file. - tab[self._adata_ids_sfaira.classmap_target_id_key] = [ + tab[self._adata_ids.classmap_target_id_key] = [ self.ontology_celltypes.convert_to_id(x) - if x != self._adata_ids_sfaira.unknown_celltype_identifier and - x != self._adata_ids_sfaira.not_a_cell_celltype_identifier - else self._adata_ids_sfaira.unknown_celltype_identifier - for x in tab[self._adata_ids_sfaira.classmap_target_key].values + if x != self._adata_ids.unknown_celltype_identifier and + x != self._adata_ids.not_a_cell_celltype_identifier + else self._adata_ids.unknown_celltype_identifier + for x in tab[self._adata_ids.classmap_target_key].values ] list(self.datasets.values())[0]._write_class_map(fn=fn_map, tab=tab) @@ -690,7 +717,7 @@ def __init__(self, dataset_groups: Union[None, List[DatasetGroup], List[DatasetS self.fn_backed = None self.set_dataset_groups(dataset_groups=dataset_groups) - self._adata_ids_sfaira = AdataIdsSfaira() + self._adata_ids = AdataIdsSfaira() def set_dataset_groups(self, dataset_groups: Union[DatasetGroup, DatasetSuperGroup, List[DatasetGroup], List[DatasetSuperGroup]]): @@ -798,10 +825,7 @@ def download(self, **kwargs): def load( self, annotated_only: bool = False, - match_to_reference: Union[str, bool, None] = None, - remove_gene_version: bool = True, load_raw: bool = False, - set_metadata: bool = True, allow_caching: bool = True, processes: int = 1, **kwargs @@ -810,10 +834,7 @@ def load( Loads data set human into anndata object. :param annotated_only: - :param match_to_reference: See .load(). - :param remove_gene_version: See .load(). :param load_raw: See .load(). - :param set_metadata: See .load(). :param allow_caching: See .load(). :param processes: Processes to parallelise loading over. Uses python multiprocessing if > 1, for loop otherwise. Note: parallelises loading of each dataset group, but not across groups. @@ -822,51 +843,109 @@ def load( for x in self.dataset_groups: x.load( annotated_only=annotated_only, - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, load_raw=load_raw, allow_caching=allow_caching, - set_metadata=set_metadata, processes=processes, **kwargs ) - def subset_genes(self, subset_type: Union[None, str, List[str]] = None): + def streamline_features( + self, + remove_gene_version: bool = True, + match_to_reference: Union[str, bool, None] = None, + subset_genes_to_type: Union[None, str, List[str]] = None, + ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - - :param subset_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. + :param match_to_reference: Whether to map gene names to a given annotation. Can be: + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - None: use the default annotation for this organism in sfaira. + - False: no mapping of gene labels will be done. + :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. """ for x in self.dataset_groups: - x.subset_genes(subset_type=subset_type) - - @property - def adata(self): - if self._adata is None: - # Make sure that concatenate is not used on a None adata object: - adatas = [x.adata for x in self.dataset_groups if x.adata_ls] - if len(adatas) > 1: - self._adata = adatas[0].concatenate( - *adatas[1:], - join="outer", - batch_key=self._adata_ids_sfaira.dataset_group - ) - elif len(adatas) == 1: - self._adata = adatas[0] - else: - warnings.warn("no anndata instances to concatenate") - return self._adata + x.streamline_features( + remove_gene_version=remove_gene_version, + match_to_reference=match_to_reference, + subset_genes_to_type=subset_genes_to_type + ) @property def adata_ls(self): adata_ls = [] - for k, v in self.datasets.items(): - adata_ls.append(v.adata) + for k, v in self.flatten().datasets.items(): + if v.adata is not None: + adata_ls.append(v.adata) return adata_ls + @property + def adata(self): + adata_ls = self.adata_ls + if not adata_ls: + return None + + # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata + match_ref_list = [] + rm_gene_ver_list = [] + gene_type_list = [] + for d_id in self.flatten().ids: + if self.flatten().datasets[d_id].adata is not None: + assert self.flatten().datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ + f"featurespace. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have a " \ + f"streamlined featurespace. Run .streamline_features()" \ + f" first." + assert self.flatten().datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ + f"metadata. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have " \ + f"streamlined metadata. Run .streamline_metadata() first." + match_ref_list.append(self.flatten().datasets[d_id].mapped_features) + rm_gene_ver_list.append(self.flatten().datasets[d_id].remove_gene_version) + gene_type_list.append(self.flatten().datasets[d_id].subset_gene_type) + assert len(set(match_ref_list)) == 1, \ + "Not all datasets in this group had their features matched to the same reference (argument " \ + "'match_to_reference' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." + assert len(set(rm_gene_ver_list)) == 1, \ + "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ + "method .streamline_features()). This is however a prerequisite for creating a combined adata object." + assert len(set(gene_type_list)) == 1, \ + "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ + "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." + + if len(adata_ls) > 1: + var_original = adata_ls[0].var.copy() + for a in adata_ls: + a.var_names_make_unique() + # TODO: need to keep this? -> yes, still catching errors here (March 2020) + # Fix for loading bug: sometime concatenating sparse matrices fails the first time but works on second try. + try: + adata_concat = adata_ls[0].concatenate( + *adata_ls[1:], + join="outer", + batch_key=self._adata_ids.dataset, + batch_categories=[i for i in self.ids if self.flatten().datasets[i].adata is not None], + index_unique=None + ) + except ValueError: + adata_concat = adata_ls[0].concatenate( + *adata_ls[1:], + join="outer", + batch_key=self._adata_ids.dataset, + batch_categories=[i for i in self.ids if self.flatten().datasets[i].adata is not None], + index_unique=None + ) + adata_concat.var = var_original + adata_concat.uns[self._adata_ids.mapped_features] = match_ref_list[0] + else: + adata_concat = adata_ls[0] + adata_concat.obs[self._adata_ids.dataset] = adata_ls[0].uns['id'] + return adata_concat + def write_distributed_store( self, dir_cache: Union[str, os.PathLike], @@ -950,20 +1029,20 @@ def write_backed( X.indptr = X.indptr.astype(np.int64) self.adata.X = X keys = [ - self._adata_ids_sfaira.annotated, - self._adata_ids_sfaira.assay_sc, - self._adata_ids_sfaira.assay_differentiation, - self._adata_ids_sfaira.assay_type_differentiation, - self._adata_ids_sfaira.author, - self._adata_ids_sfaira.cell_line, - self._adata_ids_sfaira.dataset, - self._adata_ids_sfaira.cell_ontology_class, - self._adata_ids_sfaira.development_stage, - self._adata_ids_sfaira.normalization, - self._adata_ids_sfaira.organ, - self._adata_ids_sfaira.sample_type, - self._adata_ids_sfaira.state_exact, - self._adata_ids_sfaira.year, + self._adata_ids.annotated, + self._adata_ids.assay_sc, + self._adata_ids.assay_differentiation, + self._adata_ids.assay_type_differentiation, + self._adata_ids.author, + self._adata_ids.cell_line, + self._adata_ids.dataset, + self._adata_ids.cell_ontology_class, + self._adata_ids.development_stage, + self._adata_ids.normalization, + self._adata_ids.organ, + self._adata_ids.sample_type, + self._adata_ids.state_exact, + self._adata_ids.year, ] if scatter_update: self.adata.obs = pandas.DataFrame({ @@ -1016,33 +1095,39 @@ def delete_backed(self): def load_cached_backed(self, fn: PathLike): self.adata = anndata.read(fn, backed='r') - def streamline( + def streamline_metadata( self, - format: str = "sfaira", - allow_uns_sfaira: bool = False, + schema: str = "sfaira", + uns_to_obs: bool = False, clean_obs: bool = True, clean_var: bool = True, - clean_uns: bool = True + clean_uns: bool = True, + clean_obs_names: bool = True, ): """ Streamline the adata instance in each group and each data set to output format. - Output format are saved in ADATA_FIELDS* classes. - :param format: Export format. - + :param schema: Export format. - "sfaira" - "cellxgene" - :param allow_uns_sfaira: When using sfaira format: Whether to keep metadata in uns or move it to obs instead. + :param uns_to_obs: Whether to move metadata in .uns to .obs to make sure it's not lost when concatenating multiple objects. :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. :param clean_uns: Whether to delete non-streamlined fields in .uns. + :param clean_obs_names: Whether to replace obs_names with a string comprised of dataset id and an increasing integer. :return: """ for x in self.dataset_groups: for xx in x.ids: - x.datasets[xx].streamline(format=format, allow_uns_sfaira=allow_uns_sfaira, clean_obs=clean_obs, - clean_var=clean_var, clean_uns=clean_uns) + x.datasets[xx].streamline_metadata( + schema=schema, + uns_to_obs=uns_to_obs, + clean_obs=clean_obs, + clean_var=clean_var, + clean_uns=clean_uns, + clean_obs_names=clean_obs_names + ) def subset(self, key, values): """ @@ -1145,13 +1230,13 @@ def subset_cells(self, key, values: Union[str, List[str]]): for i in range(len(self.dataset_groups)): self.dataset_groups[i].subset_cells(key=key, values=values) - def project_celltypes_to_ontology(self): + def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = None, copy=False): """ Project free text cell type names to ontology based on mapping table. :return: """ for _, v in self.dataset_groups: - v.project_celltypes_to_ontology() + v.project_celltypes_to_ontology(adata_fields=adata_fields, copy=copy) def write_config(self, fn: Union[str, os.PathLike]): """ diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index 5e8857f47..f2970cf7d 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -28,7 +28,7 @@ def __init__( self.cellontology_class_obs_key = self._adata_ids_cellxgene.cell_ontology_class self.cellontology_id_obs_key = self._adata_ids_cellxgene.cell_ontology_id - self.cellontology_original_obs_key = self._adata_ids_cellxgene.cell_types_original + self.cell_types_original_obs_key = self._adata_ids_cellxgene.cell_types_original self.development_stage_obs_key = self._adata_ids_cellxgene.development_stage self.disease_obs_key = self._adata_ids_cellxgene.disease self.ethnicity_obs_key = self._adata_ids_cellxgene.ethnicity @@ -36,8 +36,8 @@ def __init__( self.organ_obs_key = self._adata_ids_cellxgene.organism self.state_exact_obs_key = self._adata_ids_cellxgene.state_exact - self.var_ensembl_col = self._adata_ids_cellxgene.gene_id_ensembl - self.var_symbol_col = self._adata_ids_cellxgene.gene_id_names + self.gene_id_ensembl_var_key = self._adata_ids_cellxgene.gene_id_ensembl + self.gene_id_symbols_var_key = self._adata_ids_cellxgene.gene_id_symbols def _load(self): """ diff --git a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml index 3db231a7f..cee3e1910 100644 --- a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml +++ b/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml @@ -44,9 +44,9 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: "Experiment_batch" observation_wise: - cellontology_original_obs_key: "Main_cluster_name" + cell_types_original_obs_key: "Main_cluster_name" feature_wise: - var_ensembl_col: "gene_id" - var_symbol_col: "gene_short_name" + gene_id_ensembl_var_key: "gene_id" + gene_id_symbols_var_key: "gene_short_name" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py index e0c4f6e65..e89f75cac 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2017_09_004/human_isletoflangerhans_2017_smartseq2_enge_001.py @@ -24,8 +24,8 @@ def __init__(self, **kwargs): self.organ = "islet of Langerhans" self.organism = "human" self.year = 2017 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "celltype" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "celltype" self.sample_source = "primary_tissue" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py index c338040d9..24e8c3103 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_02_001/mouse_x_2018_microwellseq_han_x.py @@ -308,10 +308,10 @@ def __init__(self, **kwargs): self.year = 2018 self.sample_source = "primary_tissue" - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" # Only adult and neonatal samples are annotated: - self.cellontology_original_obs_key = "Annotation" \ + self.cell_types_original_obs_key = "Annotation" \ if sample_dev_stage_dict[self.sample_fn] in ["adult", "neonatal"] and \ self.sample_fn not in [ "NeontalBrain1_dge.txt.gz", diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index 4cf0acbcf..826736b3a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -52,9 +52,9 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: "Cluster" + cell_types_original_obs_key: "Cluster" feature_wise: - var_ensembl_col: - var_symbol_col: "index" + gene_id_ensembl_var_key: + gene_id_symbols_var_key: "index" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index de9a1b696..7f978325a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -23,9 +23,9 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" - self.cellontology_original_obs_key = "CellType" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index 00b8f66c3..fde1b559b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -23,10 +23,10 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "gene_ids" - self.cellontology_original_obs_key = "CellType" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index 9acd6481e..b16ceb030 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -29,9 +29,9 @@ def __init__(self, **kwargs): self.organism = "human" self.year = 2018 - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" - self.cellontology_original_obs_key = "CellType" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py index 5e4ee0daf..d7e33841b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cels_2016_08_011/human_pancreas_2016_indrop_baron_001.py @@ -27,8 +27,8 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2016 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py index dcee6e1e3..6abb1637c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2016_08_020/human_pancreas_2016_smartseq2_segerstolpe_001.py @@ -25,9 +25,9 @@ def __init__(self, **kwargs): self.year = 2016 self.sample_source = "primary_tissue" - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" - self.cellontology_original_obs_key = "Characteristics[cell type]" + self.cell_types_original_obs_key = "Characteristics[cell type]" self.state_exact_obs_key = "Characteristics[disease]" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 35dd76d5b..5317bf313 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -38,8 +38,8 @@ def __init__(self, **kwargs): self.year = 2019 self.sample_source = "primary_tissue" - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "celltypes" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "celltypes" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index aebbfd373..ccd9bdca3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -24,8 +24,8 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2020 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "Cell_type" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "Cell_type" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py index ae6097663..27ef2f609 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_nmeth_4407/human_brain_2017_droncseq_habib_001.py @@ -23,8 +23,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2017 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index 3aa52ea6d..c3d9af7a9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -23,8 +23,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2018 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index dda02bcc0..aa459169b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2018 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "celltype" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py index 8fdb37558..5bb21b453 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_10861_2/human_kidney_2019_droncseq_lake_001.py @@ -24,8 +24,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "celltype" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index 6a60fd6bd..60448ed1f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -69,10 +69,10 @@ def __init__(self, **kwargs): self.state_exact = SAMPLE_DICT[self.sample_fn][2] self.year = 2019 - self.var_symbol_col = "Gene" - self.var_ensembl_col = "Accession" + self.gene_id_symbols_var_key = "Gene" + self.gene_id_ensembl_var_key = "Accession" - self.cellontology_original_obs_key = "cell_ontology_class" + self.cell_types_original_obs_key = "cell_ontology_class" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index db310b697..cfb4a1155 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -21,8 +21,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index e9ee21afe..84b0d677e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -29,9 +29,9 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2018 - self.var_symbol_col = "names" - self.var_ensembl_col = "ensembl" - self.cellontology_original_obs_key = "annotation" + self.gene_id_symbols_var_key = "names" + self.gene_id_ensembl_var_key = "ensembl" + self.cell_types_original_obs_key = "annotation" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py index a9ed3eb01..ef2597f3b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1373_2/human_liver_2019_celseq2_aizarani_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.organism = "human" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index 9daa11115..d4487a62f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -23,9 +23,9 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" - self.cellontology_original_obs_key = "annotation_lineage" + self.cell_types_original_obs_key = "annotation_lineage" self.state_exact_obs_key = "condition" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index 51d3cd39d..d36bc6554 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "cell.labels" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "cell.labels" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index ef013c9bb..7b8fe8990 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -45,9 +45,9 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: observation_wise: - cellontology_original_obs_key: + cell_types_original_obs_key: feature_wise: - var_ensembl_col: "ensembl" - var_symbol_col: "index" + gene_id_ensembl_var_key: "ensembl" + gene_id_symbols_var_key: "index" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 2aa629412..d7e392209 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -29,13 +29,13 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.bio_sample_obs_key = "sample" - self.cellontology_original_obs_key = "celltype_specific" + self.cell_types_original_obs_key = "celltype_specific" self.development_stage_obs_key = "dev_stage" self.organ_obs_key = "organ" self.sex_obs_key = "sex" self.age_obs_key = "age" - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index 7a3c09343..ab082177f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -54,9 +54,9 @@ dataset_or_observation_wise: droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "channel" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "plate.barcode" observation_wise: - cellontology_original_obs_key: "free_annotation" + cell_types_original_obs_key: "free_annotation" feature_wise: - var_ensembl_col: - var_symbol_col: "index" + gene_id_ensembl_var_key: + gene_id_symbols_var_key: "index" meta: version: "1.0" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 8259699b9..6ae1b3135 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -24,9 +24,9 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2020 - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids" - self.cellontology_original_obs_key = "cell_type" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "gene_ids" + self.cell_types_original_obs_key = "cell_type" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py index aacfdd85b..b7ce94249 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_lung_2019_dropseq_braga_001.py @@ -23,8 +23,8 @@ def __init__(self, **kwargs): self.state_exact = "uninvolved areas of tumour resection material" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "celltype" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index e37f5c9d0..ad68bd449 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -27,8 +27,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index 592dda3da..f4c7413ba 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -27,11 +27,11 @@ def __init__(self, **kwargs): self.year = 2019 self.bio_sample_obs_key = "sample" - self.cellontology_original_obs_key = "cluster" + self.cell_types_original_obs_key = "cluster" self.organ_obs_key = "organ" - self.var_ensembl_col = "ensembl" - self.var_symbol_col = "name" + self.gene_id_ensembl_var_key = "ensembl" + self.gene_id_symbols_var_key = "name" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index 3deb38f2c..8256e3e73 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -25,8 +25,8 @@ def __init__(self, **kwargs): self.year = 2020 self.doi = "10.1038/s41597-019-0351-8" - self.var_symbol_col = "names" - self.var_ensembl_col = "ensembl" + self.gene_id_symbols_var_key = "names" + self.gene_id_ensembl_var_key = "ensembl" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index 3c5b42096..feaf2d55b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index da2c79b58..84b179723 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -31,8 +31,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 208fcc876..1352418da 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -28,8 +28,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2020 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 92dd5eced..198aaccf5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -76,7 +76,7 @@ def __init__(self, **kwargs): f"{self.sample_fn}" self.download_url_meta = None - self.cellontology_original_obs_key = "cell_ontology_class" + self.cell_types_original_obs_key = "cell_ontology_class" self.development_stage_obs_key = "development_stage" # not given in all data sets, TODO maybe infer as age? self.sex_obs_key = "sex" # ToDo: further anatomical information for subtissue in "subtissue"? @@ -91,8 +91,8 @@ def __init__(self, **kwargs): self.year = 2019 self.sample_source = "primary_tissue" - self.var_ensembl_col = None - self.var_symbol_col = "index" + self.gene_id_ensembl_var_key = None + self.gene_id_symbols_var_key = "index" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 9cc9a77b8..4cee92c69 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -28,9 +28,9 @@ def __init__(self, **kwargs): self.year = 2020 self.sample_source = "primary_tissue" - self.var_symbol_col = "index" + self.gene_id_symbols_var_key = "index" - self.cellontology_original_obs_key = "celltype" + self.cell_types_original_obs_key = "celltype" self.state_exact_obs_key = "Diagnosis" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index b4b101c26..97df2730f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -26,9 +26,9 @@ def __init__(self, **kwargs): self.state_exact = "healthy" self.year = 2019 - self.var_symbol_col = "index" - self.var_ensembl_col = "ID" - self.cellontology_original_obs_key = "celltype" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "ID" + self.cell_types_original_obs_key = "celltype" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index ad29f080f..07de38e1c 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2020 - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "Anno_level_fig1" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "Anno_level_fig1" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index 6996c7900..d6e7200a9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -20,16 +20,16 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if self.sample_fn == "madissoon19_lung.processed.h5ad": self.download_url_data = "https://covid19.cog.sanger.ac.uk/madissoon19_lung.processed.h5ad" - self.var_ensembl_col = "gene.ids.HCATisStab7509734" + self.gene_id_ensembl_var_key = "gene.ids.HCATisStab7509734" elif self.sample_fn == "oesophagus.cellxgene.h5ad": self.download_url_data = \ "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/oesophagus.cellxgene.h5ad" # Associated DCP: https://data.humancellatlas.org/explore/projects/c4077b3c-5c98-4d26-a614-246d12c2e5d7 - self.var_ensembl_col = "gene_ids-HCATisStab7413619" + self.gene_id_ensembl_var_key = "gene_ids-HCATisStab7413619" else: self.download_url_data = \ "https://cellgeni.cog.sanger.ac.uk/tissue-stability/tissue-stability/spleen.cellxgene.h5ad" - self.var_ensembl_col = "gene_ids-HCATisStab7463846" + self.gene_id_ensembl_var_key = "gene_ids-HCATisStab7463846" self.download_url_meta = None @@ -44,8 +44,8 @@ def __init__(self, **kwargs): self.year = 2019 self.sample_source = "primary_tissue" - self.var_symbol_col = "index" - self.cellontology_original_obs_key = "Celltypes" + self.gene_id_symbols_var_key = "index" + self.cell_types_original_obs_key = "Celltypes" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index a75aa2f59..5385e38fd 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -24,9 +24,9 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids" - self.cellontology_original_obs_key = "CellType" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "gene_ids" + self.cell_types_original_obs_key = "CellType" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index 53c515cdb..b4bd78bc8 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -25,8 +25,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2019 - self.var_symbol_col = "index" - self.var_ensembl_col = "gene_ids" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "gene_ids" self.set_dataset_id(idx=1) diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 96a8d1119..2d8ad20a1 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -22,8 +22,8 @@ def __init__(self, **kwargs): self.sample_source = "primary_tissue" self.year = 2018 - self.var_symbol_col = "index" - self.var_ensembl_col = "Accession" + self.gene_id_symbols_var_key = "index" + self.gene_id_ensembl_var_key = "Accession" self.set_dataset_id(idx=1) diff --git a/sfaira/data/interactive/loader.py b/sfaira/data/interactive/loader.py index 1eecb32eb..9df419c7a 100644 --- a/sfaira/data/interactive/loader.py +++ b/sfaira/data/interactive/loader.py @@ -60,7 +60,7 @@ def __init__( # self.state_exact # not currently supported # self.year # not currently supported - self.obs_key_cellontology_original = obs_key_celltypes + self.obs_key_cell_types_original = obs_key_celltypes # self.obs_key_age # not currently supported # self.obs_key_assay_sc # not currently supported @@ -76,8 +76,8 @@ def __init__( # self.obs_key_sex # not currently supported # self.obs_key_state_exact # not currently supported - self.var_symbol_col = gene_symbol_col - self.var_ensembl_col = gene_ens_col + self.gene_id_symbols_var_key = gene_symbol_col + self.gene_id_ensembl_var_key = gene_ens_col self.class_maps = class_maps diff --git a/sfaira/data/utils.py b/sfaira/data/utils.py index e229976c6..a9108fb19 100644 --- a/sfaira/data/utils.py +++ b/sfaira/data/utils.py @@ -122,19 +122,21 @@ def read_yaml(fn) -> Dict[str, Dict[str, Union[str, int, bool]]]: return {"attr": attr_dict, "meta": meta_dict} -def collapse_matrix(adata: anndata.AnnData) -> anndata.AnnData: +def collapse_matrix(adata: anndata.AnnData, var_column: str) -> anndata.AnnData: """ - Collapses (sum) features with the same var_name. + Collapses (sum) features with the same var_name in a provided var column. Does not retain .varm if duplicated var_names are found. keeps .var column of first occurrence of duplicated variables. - :param adata: Input anndata instance with potential duplicated var_names. - :return: Processed anndata instance without duplicated var_names. + :param adata: Input anndata instance with potential duplicated var names. + :param var_column: column name in .var that contains the duplicated features of interest + :return: Processed anndata instance without duplicated var names. """ - new_index = np.unique(adata.var_names).tolist() - if len(new_index) < adata.n_vars: - idx_map = np.array([np.where(x == adata.var_names)[0] for x in new_index]) + old_index = adata.var.index.tolist() if var_column == "index" else adata.var[var_column].tolist() + new_index = list(np.unique(old_index)) + if len(new_index) < len(old_index): + idx_map = np.array([np.where(x == np.array(old_index))[0] for x in new_index]) # Build initial matrix from first match. data = adata.X[:, np.array([x[0] for x in idx_map])].copy() # Add additional matched (duplicates) on top: @@ -148,9 +150,8 @@ def collapse_matrix(adata: anndata.AnnData) -> anndata.AnnData: X=data, obs=adata.obs, obsm=adata.obsm, - var=adata.var.iloc[[adata.var_names.tolist().index(x) for x in new_index]], + var=adata.var.iloc[[old_index.index(x) for x in new_index]], uns=adata.uns ) adata.obs_names = obs_names - adata.var_names = new_index return adata diff --git a/sfaira/data/utils_scripts/create_meta_and_cache.py b/sfaira/data/utils_scripts/create_meta_and_cache.py index 8f18aa08a..f73d54377 100644 --- a/sfaira/data/utils_scripts/create_meta_and_cache.py +++ b/sfaira/data/utils_scripts/create_meta_and_cache.py @@ -21,20 +21,11 @@ # Initial load and cache writing: # Only run this if data set was not already cached to speed up resumed jobs. if not os.path.exists(v.cache_fn): - v.load( - match_to_reference=None, - remove_gene_version=True, - load_raw=False, - allow_caching=True, - ) + v.load(load_raw=False, allow_caching=True) # Write meta data, cache. v.write_meta(fn_meta=None, dir_out=path_meta) # Test load from cache. - v.load( - remove_gene_version=False, # speed this up - load_raw=False, - allow_caching=False, - ) + v.load(load_raw=False, allow_caching=False) v.clear() except ValueError as e: # Do not abort upon ValueErrors, such as from cell type map bugs. diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py index 25b0bf7ad..528c1a929 100644 --- a/sfaira/data/utils_scripts/streamline_selected.py +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -27,8 +27,8 @@ set_metadata=False, ) if schema == "cellxgene": - ds.subset_genes(subset_type=None) - ds.streamline(format=schema.lower(), allow_uns_sfaira=True, clean_obs=False, clean_var=True, clean_uns=False) + ds.streamline_features(remove_gene_version=True, match_to_reference=True, subset_genes_to_type=None) + ds.streamline_metadata(schema=schema.lower(), uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, clean_obs_names=True) assert len(ds.dataset_groups) == 1, len(ds.dataset_groups) dsg = ds.dataset_groups[0] for k, v in dsg.datasets.items(): diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 373d20ff8..07cddb790 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -73,7 +73,7 @@ def __init__( self.idx_test = None self.md5 = weights_md5 self.cache_path = cache_path - self._adata_ids_sfaira = AdataIdsSfaira() + self._adata_ids = AdataIdsSfaira() @property def model_type(self): @@ -218,13 +218,13 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): # If the feature space is already mapped to the right reference, return the data matrix immediately if 'mapped_features' in self.data.uns_keys(): - if self.data.uns[self._adata_ids_sfaira.mapped_features] == \ + if self.data.uns[self._adata_ids.mapped_features] == \ self.topology_container.gc.assembly: print(f"found {x.shape[0]} observations") return x # Compute indices of genes to keep - data_ids = self.data.var[self._adata_ids_sfaira.gene_id_ensembl].values + data_ids = self.data.var[self._adata_ids.gene_id_ensembl].values idx_feature_kept = np.where([x in self.topology_container.gc.ensembl for x in data_ids])[0] idx_feature_map = np.array([self.topology_container.gc.ensembl.index(x) for x in data_ids[idx_feature_kept]]) @@ -641,7 +641,7 @@ def generator_helper(x_sample): elif mode == 'gradient_method': # Prepare data reading according to whether anndata is backed or not: - cell_to_class = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) + cell_to_class = self._get_class_dict(obs_key=self._adata_ids.cell_ontology_class) if self.using_store: n_features = self.data.n_vars generator_raw = self.data.generator( @@ -668,12 +668,12 @@ def generator(): for i in idx: x_sample = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() sf_sample = prepare_sf(x=x_sample)[0] - y_sample = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][i] + y_sample = self.data.obs[self._adata_ids.cell_ontology_class][i] yield (x_sample, sf_sample), (x, cell_to_class[y_sample]) else: x = self._prepare_data_matrix(idx=idx) sf = prepare_sf(x=x) - y = self.data.obs[self._adata_ids_sfaira.cell_ontology_class][idx] + y = self.data.obs[self._adata_ids.cell_ontology_class][idx] # for gradients per celltype in compute_gradients_input() n_features = x.shape[1] @@ -847,7 +847,7 @@ def compute_gradients_input( ) if per_celltype: - cell_to_id = self._get_class_dict(obs_key=self._adata_ids_sfaira.cell_ontology_class) + cell_to_id = self._get_class_dict(obs_key=self._adata_ids.cell_ontology_class) cell_names = cell_to_id.keys() cell_id = cell_to_id.values() id_to_cell = dict([(key, value) for (key, value) in zip(cell_id, cell_names)]) @@ -1007,7 +1007,7 @@ def _get_celltype_out( onehot_encoder = self._one_hot_encoder() y = np.concatenate([ np.expand_dims(onehot_encoder(z), axis=0) - for z in self.data.obs[self._adata_ids_sfaira.cell_ontology_class].values[idx].tolist() + for z in self.data.obs[self._adata_ids.cell_ontology_class].values[idx].tolist() ], axis=0) # Distribute aggregated class weight for computation of weights: freq = np.mean(y / np.sum(y, axis=1, keepdims=True), axis=0, keepdims=True) diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index c98ebf1c0..c6f88b018 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -314,14 +314,7 @@ def load_data( gene_symbol_col=gene_symbol_col, gene_ens_col=gene_ens_col ) - dataset.load( - celltype_version=None, - data_dir=None, - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=False, - allow_caching=False, - ) + dataset.load(load_raw=False, allow_caching=False, celltype_version=None, data_dir=None) self.data = dataset.adata def filter_cells(self): diff --git a/sfaira/unit_tests/data/test_data_utils.py b/sfaira/unit_tests/data/test_data_utils.py index 352c59462..5e766fa01 100644 --- a/sfaira/unit_tests/data/test_data_utils.py +++ b/sfaira/unit_tests/data/test_data_utils.py @@ -74,17 +74,16 @@ def test_collapse_matrix( else: assert False if duplications: - index = ["g" + str(i) for i in range(x.shape[1])] - else: # Create triplicate and duplicate gene names: index = ["g" + str(i) for i in range(2)] + ["g" + str(i) for i in range(3)] + \ ["g" + str(i) for i in range(x.shape[1] - 3 - 2)] - adata = anndata.AnnData(x, var=pd.DataFrame(index=index)) - adata.var_names = index - adata2 = collapse_matrix(adata=adata) + else: + index = ["g" + str(i) for i in range(x.shape[1])] + adata = anndata.AnnData(x, var=pd.DataFrame({"var_column": index})) + adata2 = collapse_matrix(adata=adata, var_column="var_column") assert adata.X.shape[0] == adata2.X.shape[0], "observation dimension mismatch" assert adata.X.dtype == adata2.X.dtype, "type mismatch" - assert adata2.X.shape[1] == len(np.unique(adata.var_names)), "feature dimension mismatch" + assert adata2.X.shape[1] == len(np.unique(adata.var["var_column"])), "feature dimension mismatch" assert np.all(np.asarray(adata.X.sum()).flatten() == np.asarray(adata2.X.sum().flatten())), \ "total count mismatch" assert np.all(np.asarray(adata.X.sum(axis=1)).flatten() == np.asarray(adata2.X.sum(axis=1).flatten())), \ diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index bb0e43ab3..e0d5b3fe4 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -74,7 +74,7 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): for k, v in x.datasets.items(): assert v.organism == "mouse", v.id assert v.ontology_container_sfaira.organ.is_a(query=v.organ, reference=organ), v.organ - for y in np.unique(v.adata.obs[v._adata_ids_sfaira.cell_ontology_class].values): + for y in np.unique(v.adata.obs[v._adata_ids.cell_ontology_class].values): assert v.ontology_container_sfaira.cellontology_class.is_a(query=y, reference=celltype), y @@ -82,12 +82,13 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): @pytest.mark.parametrize("clean_obs", [True, False]) @pytest.mark.parametrize("clean_var", [True, False]) @pytest.mark.parametrize("clean_uns", [True, False]) -def test_dsgs_streamline(out_format: str, clean_obs: bool, clean_var: bool, clean_uns: bool): +@pytest.mark.parametrize("clean_obs_names", [True, False]) +def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: bool, clean_uns: bool, clean_obs_names: bool): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load(remove_gene_version=True) - ds.streamline(format=out_format, allow_uns_sfaira=False, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns) + ds.streamline_metadata(schema=out_format, uns_to_obs=True, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns, clean_obs_names=clean_obs_names) def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): From f3a2a3ecfa8602e5b6a776d441ccc574fa87854d Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 10:01:44 +0200 Subject: [PATCH 109/161] updated streamlining in store script --- sfaira/data/utils_scripts/write_store.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index adac37fb1..831c53649 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -20,13 +20,8 @@ allow_caching=True, set_metadata=False, ) - ds.streamline( - format="sfaira", - allow_uns_sfaira=True, - clean_obs=True, - clean_var=True, - clean_uns=True, - ) - ds.subset_genes(subset_type="protein_coding") + ds.streamline_features(remove_gene_version=True, match_to_reference=True, subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, + clean_obs_names=True) ds.write_distributed_store(dir_cache=path_store, store="h5ad") ds.clear() From cb6b4284c80a66bbf9b079878a7669c2d995402d Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 10:37:41 +0200 Subject: [PATCH 110/161] added unit test for feature streamlining and fixed store script added option to supply reference assembly as dictionary --- sfaira/data/base/dataset.py | 22 ++++++++++++++-------- sfaira/data/base/dataset_group.py | 20 +++++++++++--------- sfaira/data/utils_scripts/write_store.py | 9 +++++---- sfaira/unit_tests/data/test_dataset.py | 20 ++++++++++++++++---- 4 files changed, 46 insertions(+), 25 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 199e6cc66..f0323e3d7 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -10,7 +10,7 @@ from os import PathLike import pandas import scipy.sparse -from typing import List, Tuple, Union +from typing import Dict, List, Tuple, Union import warnings import urllib.request import urllib.parse @@ -486,7 +486,8 @@ def _add_missing_featurenames( # match it straight away, if it is not in there we try to match everything in front of the first period in # the gene name with a dictionary that was modified in the same way, if there is still no match we append na ensids = [] - symbs = self.adata.var.index if self.gene_id_symbols_var_key == "index" else self.adata.var[self.gene_id_symbols_var_key] + symbs = self.adata.var.index if self.gene_id_symbols_var_key == "index" else \ + self.adata.var[self.gene_id_symbols_var_key] for n in symbs: if n in id_dict.keys(): ensids.append(id_dict[n]) @@ -521,8 +522,8 @@ def _collapse_ensembl_gene_id_versions(self): def streamline_features( self, + match_to_reference: Union[str, Dict[str, str], None] = None, remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, subset_genes_to_type: Union[None, str, List[str]] = None, ): """ @@ -530,20 +531,24 @@ def streamline_features( This also adds missing ensid or gene symbol columns if match_to_reference is not set to False and removes all adata.var columns that are not defined as gene_id_ensembl_var_key or gene_id_symbol_var_key in the dataloader. - :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param match_to_reference: Whether to map gene names to a given annotation. Can be: - - str: Provide the name of the annotation in the format Organism.Assembly.Release - - None: use the default annotation for this organism in sfaira. - - False: no mapping of gene labels will be done. + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set + based on organism annotation. + - False: no mapping of gene labels will be done. + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. """ - # TODO: think about workflow when featurespace should nt be sreamlined. can we still apply a metadata schema? + # TODO: think about workflow when featurespace should nt be streamlined. can we still apply a metadata schema? assert match_to_reference is not False, "feature_streamlining is not possible when match_to_reference is False" + self.__assert_loaded() # Set genome container if mapping of gene labels is requested if match_to_reference is not False: # Testing this explicitly makes sure False is treated separately from None + if isinstance(match_to_reference, dict): + match_to_reference = match_to_reference[self.organism] self._set_genome(organism=self.organism, assembly=match_to_reference) self.mapped_features = self.genome_container.assembly else: @@ -662,6 +667,7 @@ def streamline_metadata( :param clean_obs_names: Whether to replace obs_names with a string comprised of dataset id and an increasing integer. :return: """ + self.__assert_loaded() # Set schema as provided by the user if schema == "sfaira": diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index fc710b894..5df10881c 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -174,17 +174,18 @@ def streamline_metadata( def streamline_features( self, + match_to_reference: Union[str, Dict[str, str], None] = None, remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, subset_genes_to_type: Union[None, str, List[str]] = None, ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param match_to_reference: Whether to map gene names to a given annotation. Can be: - - str: Provide the name of the annotation in the format Organism.Assembly.Release - - None: use the default annotation for this organism in sfaira. - - False: no mapping of gene labels will be done. + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set + based on organism annotation. + - False: no mapping of gene labels will be done. + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. @@ -851,17 +852,18 @@ def load( def streamline_features( self, + match_to_reference: Union[str, Dict[str, str], None] = None, remove_gene_version: bool = True, - match_to_reference: Union[str, bool, None] = None, subset_genes_to_type: Union[None, str, List[str]] = None, ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param match_to_reference: Whether to map gene names to a given annotation. Can be: - - str: Provide the name of the annotation in the format Organism.Assembly.Release - - None: use the default annotation for this organism in sfaira. - - False: no mapping of gene labels will be done. + - str: Provide the name of the annotation in the format Organism.Assembly.Release + - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set + based on organism annotation. + - False: no mapping of gene labels will be done. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index 831c53649..b51331d11 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -14,13 +14,14 @@ for k, ds in universe.datasets.items(): print(f"SCRIPT loading {k}") ds.load( - match_to_reference=None, - remove_gene_version=True, load_raw=False, allow_caching=True, - set_metadata=False, ) - ds.streamline_features(remove_gene_version=True, match_to_reference=True, subset_genes_to_type="protein_coding") + ds.streamline_features( + remove_gene_version=True, + match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, + subset_genes_to_type="protein_coding" + ) ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, clean_obs_names=True) ds.write_distributed_store(dir_cache=path_store, store="h5ad") diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index e0d5b3fe4..45df9f8bf 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -48,7 +48,7 @@ def test_dsgs_adata(): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) - ds.load(remove_gene_version=True) + ds.load() _ = ds.adata @@ -56,7 +56,7 @@ def test_dsgs_load(): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) - ds.load(remove_gene_version=False) + ds.load() @pytest.mark.parametrize("organ", ["lung"]) @@ -68,7 +68,7 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=[organ]) - ds.load(remove_gene_version=False) + ds.load() ds.subset_cells(key="cellontology_class", values=celltype) for x in ds.dataset_groups: for k, v in x.datasets.items(): @@ -87,10 +87,22 @@ def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: b ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) - ds.load(remove_gene_version=True) + ds.load() ds.streamline_metadata(schema=out_format, uns_to_obs=True, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns, clean_obs_names=clean_obs_names) +@pytest.mark.parametrize("match_to_reference", ["Mus_musculus.GRCm38.102", {"mouse": "Mus_musculus.GRCm38.102"}]) +@pytest.mark.parametrize("remove_gene_version", [False, True]) +@pytest.mark.parametrize("subset_genes_to_type", [None, "protein_coding"]) +def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: str, subset_genes_to_type: str): + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds.load() + ds.streamline_features(remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, + subset_genes_to_type=subset_genes_to_type) + + def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) From ec3aca32e8f940d3a1849ec2de9964859f137342 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 11:13:46 +0200 Subject: [PATCH 111/161] updated store writing and added a unit test --- requirements.txt | 2 +- sfaira/data/base/dataset.py | 14 ++++++++--- sfaira/data/base/dataset_group.py | 28 +++++++++++++++------ sfaira/data/utils_scripts/write_store.py | 2 +- sfaira/unit_tests/data/test_dataset.py | 32 +++++------------------- 5 files changed, 38 insertions(+), 40 deletions(-) diff --git a/requirements.txt b/requirements.txt index ae48f17ff..9676a97f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -anndata>=0.7 +anndata>=0.7.6 crossref_commons docutils fuzzywuzzy diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index f0323e3d7..65c4df5aa 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -876,7 +876,9 @@ def streamline_metadata( def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "backed", + store: str = "h5ad", + dense: bool = False, + compression_kwargs: dict = {}, chunks: Union[int, None] = None, ): """ @@ -889,9 +891,12 @@ def write_distributed_store( :param store: Disk format for objects in cache: - "h5ad": Allows access via backed .h5ad. - On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives - fast row-wise access if the files are csr. + Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise + access if the files are csr, so further compression potentially not necessary. - "zarr": Allows access as zarr array. + :param dense: Whether to write sparse or dense store, this will be homogenously enforced. + :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: + compression, compression_opts. :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. Only relevant for store=="zarr". """ @@ -901,7 +906,8 @@ def write_distributed_store( print(f"WARNING: high-perfomances caches based on .h5ad work better with .csr formatted expression " f"data, found {type(self.adata.X)}") fn = os.path.join(dir_cache, self.doi_cleaned_id + ".h5ad") - self.adata.write_h5ad(filename=fn, compression=None, force_dense=False) + as_dense = ("X",) if dense else () + self.adata.write_h5ad(filename=fn, as_dense=as_dense, **compression_kwargs) elif store == "zarr": fn = os.path.join(dir_cache, self.doi_cleaned_id) self.adata.write_zarr(store=fn, chunks=chunks) diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 5df10881c..170902596 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -200,7 +200,9 @@ def streamline_features( def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "backed", + store: str = "h5ad", + dense: bool = False, + compression_kwargs: dict = {}, chunks: Union[int, None] = None, ): """ @@ -213,14 +215,18 @@ def write_distributed_store( :param store: Disk format for objects in cache: - "h5ad": Allows access via backed .h5ad. - On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives - fast row-wise access if the files are csr. + Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise + access if the files are csr, so further compression potentially not necessary. - "zarr": Allows access as zarr array. + :param dense: Whether to write sparse or dense store, this will be homogenously enforced. + :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: + compression, compression_opts. :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. Only relevant for store=="zarr". """ for _, v in self.datasets.items(): - v.write_distributed_store(dir_cache=dir_cache, store=store, chunks=chunks) + v.write_distributed_store(dir_cache=dir_cache, store=store, dense=dense, + compression_kwargs=compression_kwargs, chunks=chunks) def write_backed( self, @@ -951,7 +957,9 @@ def adata(self): def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "backed", + store: str = "h5ad", + dense: bool = False, + compression_kwargs: dict = {}, chunks: Union[int, None] = None, ): """ @@ -965,14 +973,18 @@ def write_distributed_store( :param store: Disk format for objects in cache: - "h5ad": Allows access via backed .h5ad. - On disk data will not be compressed as .h5ad supports sparse data with is a good compression that gives - fast row-wise access if the files are csr. + Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise + access if the files are csr, so further compression potentially not necessary. - "zarr": Allows access as zarr array. + :param dense: Whether to write sparse or dense store, this will be homogenously enforced. + :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: + compression, compression_opts. :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. Only relevant for store=="zarr". """ for x in self.dataset_groups: - x.write_distributed_store(dir_cache=dir_cache, store=store, chunks=chunks) + x.write_distributed_store(dir_cache=dir_cache, store=store, dense=dense, + compression_kwargs=compression_kwargs, chunks=chunks) def write_backed( self, diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index b51331d11..e104ffa13 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -24,5 +24,5 @@ ) ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, clean_obs_names=True) - ds.write_distributed_store(dir_cache=path_store, store="h5ad") + ds.write_distributed_store(dir_cache=path_store, store="h5ad", dense=False) ds.clear() diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 45df9f8bf..0f8f4d5d8 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -94,7 +94,7 @@ def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: b @pytest.mark.parametrize("match_to_reference", ["Mus_musculus.GRCm38.102", {"mouse": "Mus_musculus.GRCm38.102"}]) @pytest.mark.parametrize("remove_gene_version", [False, True]) @pytest.mark.parametrize("subset_genes_to_type", [None, "protein_coding"]) -def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: str, subset_genes_to_type: str): +def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: bool, subset_genes_to_type: str): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) @@ -103,34 +103,14 @@ def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: subset_genes_to_type=subset_genes_to_type) -def test_dsg_load_backed_dense(genome="Mus_musculus_GRCm38_97"): +@pytest.mark.parametrize("store", ["h5ad"]) +@pytest.mark.parametrize("dense", [False]) +def test_dsg_write_store(store: str, dense: bool): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.write_backed( - fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), - genome=genome, - shuffled=True, - as_dense=True, - annotated_only=False - ) - assert isinstance(ds.adata.X[:], np.ndarray), "%s" % type(ds.adata.X) - - -def test_dsg_load_backed_sparse(genome="Mus_musculus_GRCm38_97"): - ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds = DatasetSuperGroup(dataset_groups=[ds]) - ds.write_backed( - fn_backed=os.path.join(dir_data, 'test_backed_data.h5ad'), - genome=genome, - shuffled=False, - as_dense=False, - annotated_only=False - ) - assert isinstance(ds.adata.X[:], scipy.sparse.csr_matrix), "%s" % type(ds.adata.X) + ds.load() + ds.write_distributed_store(dir_cache=os.path.join(dir_data, "store"), store=store, dense=dense) def test_dsg_load(): From 310a37372c9795699900ce4248627a6bd5dc7b99 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 11:18:27 +0200 Subject: [PATCH 112/161] updated write_distrbuted_store documentation --- sfaira/data/base/dataset.py | 4 ++-- sfaira/data/base/dataset_group.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 65c4df5aa..d06969d6c 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -884,8 +884,8 @@ def write_distributed_store( """ Write data set into a format that allows distributed access to data set on disk. - Writes to a zarr-backed h5ad. - Load data set and streamline before calling this method. + Stores are useful for distributed access to data sets, in many settings this requires some streamlining of the + data sets that are accessed. Use .streamline_* before calling this method to streamline the data sets. :param dir_cache: Directory to write cache in. :param store: Disk format for objects in cache: diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 170902596..774ac384b 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -208,8 +208,9 @@ def write_distributed_store( """ Write data set into a format that allows distributed access to data set on disk. - Writes every data set contained to a zarr-backed h5ad. - Load data set and streamline before calling this method. + Stores are useful for distributed access to data sets, in many settings this requires some streamlining of the + data sets that are accessed. Use .streamline_* before calling this method to streamline the data sets. + This method writes a separate file for each data set in this object. :param dir_cache: Directory to write cache in. :param store: Disk format for objects in cache: @@ -965,9 +966,9 @@ def write_distributed_store( """ Write data set into a format that allows distributed access to data set on disk. - Writes every data set contained to a zarr-backed h5ad. - The group structure of the super group is lost during this process. - Load data set and streamline before calling this method. + Stores are useful for distributed access to data sets, in many settings this requires some streamlining of the + data sets that are accessed. Use .streamline_* before calling this method to streamline the data sets. + This method writes a separate file for each data set in this object. :param dir_cache: Directory to write cache in. :param store: Disk format for objects in cache: From 3ae850f88eb4186129e33abc93e6768e9e885cc5 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 11:30:35 +0200 Subject: [PATCH 113/161] removed genome annotation defaults --- sfaira/data/base/dataset.py | 7 +++---- sfaira/data/base/dataset_group.py | 6 +++--- .../data/utils_scripts/streamline_selected.py | 12 +++++++----- sfaira/versions/genomes.py | 18 ++++-------------- 4 files changed, 17 insertions(+), 26 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index d06969d6c..753f4b749 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -546,10 +546,10 @@ def streamline_features( self.__assert_loaded() # Set genome container if mapping of gene labels is requested - if match_to_reference is not False: # Testing this explicitly makes sure False is treated separately from None + if match_to_reference is not None: # Testing this explicitly makes sure False is treated separately from None if isinstance(match_to_reference, dict): match_to_reference = match_to_reference[self.organism] - self._set_genome(organism=self.organism, assembly=match_to_reference) + self._set_genome(assembly=match_to_reference) self.mapped_features = self.genome_container.assembly else: self.mapped_features = False @@ -983,9 +983,8 @@ def write_backed( else: raise ValueError(f"Did not recognize backed AnnData.X format {type(adata_backed.X)}") - def _set_genome(self, organism: str, assembly: Union[str, None]): + def _set_genome(self, assembly: Union[str, None]): self.genome_container = GenomeContainer( - organism=organism, assembly=assembly, ) diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 774ac384b..1e8516614 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -192,8 +192,8 @@ def streamline_features( """ for x in self.ids: self.datasets[x].streamline_features( - remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, + remove_gene_version=remove_gene_version, subset_genes_to_type=subset_genes_to_type, ) @@ -877,9 +877,9 @@ def streamline_features( """ for x in self.dataset_groups: x.streamline_features( - remove_gene_version=remove_gene_version, match_to_reference=match_to_reference, - subset_genes_to_type=subset_genes_to_type + remove_gene_version=remove_gene_version, + subset_genes_to_type=subset_genes_to_type, ) @property diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py index 528c1a929..f50724b31 100644 --- a/sfaira/data/utils_scripts/streamline_selected.py +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -20,15 +20,17 @@ ) ds.subset(key="doi", values=[x]) ds.load( - match_to_reference=None, - remove_gene_version=True, load_raw=False, allow_caching=True, - set_metadata=False, ) if schema == "cellxgene": - ds.streamline_features(remove_gene_version=True, match_to_reference=True, subset_genes_to_type=None) - ds.streamline_metadata(schema=schema.lower(), uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, clean_obs_names=True) + ds.streamline_features( + match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, + remove_gene_version=True, + subset_genes_to_type=None + ) + ds.streamline_metadata(schema=schema.lower(), uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, + clean_obs_names=True) assert len(ds.dataset_groups) == 1, len(ds.dataset_groups) dsg = ds.dataset_groups[0] for k, v in dsg.datasets.items(): diff --git a/sfaira/versions/genomes.py b/sfaira/versions/genomes.py index cb575e68f..04ec9a42c 100644 --- a/sfaira/versions/genomes.py +++ b/sfaira/versions/genomes.py @@ -94,21 +94,11 @@ class GenomeContainer: def __init__( self, - organism: Union[None, str] = None, - assembly: Union[None, str] = None, + assembly: str = None, ): - if assembly is None: - # Set defaults based on organism if assembly is not given. - if organism is None: - raise ValueError("Supply either organism or assembly to GenomeContainer().") - if organism == "human": - self.assembly = "Homo_sapiens.GRCh38.102" - elif organism == "mouse": - self.assembly = "Mus_musculus.GRCm38.102" - else: - raise ValueError(f"organism {organism} not found") - else: - self.assembly = assembly + if not isinstance(assembly, str): + raise ValueError(f"supplied assembly {assembly} was not a string") + self.assembly = assembly self.gtfi = GtfInterface(assembly=self.assembly) self.load_genome() From 9902d6ee30d7d07372ac11cb6a1f98205b2cbbfd Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 11:46:54 +0200 Subject: [PATCH 114/161] updated store defaults --- sfaira/data/utils_scripts/streamline_selected.py | 2 +- sfaira/data/utils_scripts/write_store.py | 2 +- sfaira/unit_tests/data/test_dataset.py | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py index f50724b31..62a6329b4 100644 --- a/sfaira/data/utils_scripts/streamline_selected.py +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -30,7 +30,7 @@ subset_genes_to_type=None ) ds.streamline_metadata(schema=schema.lower(), uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, - clean_obs_names=True) + clean_obs_names=False) assert len(ds.dataset_groups) == 1, len(ds.dataset_groups) dsg = ds.dataset_groups[0] for k, v in dsg.datasets.items(): diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index e104ffa13..2c4adfaea 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -22,7 +22,7 @@ match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, subset_genes_to_type="protein_coding" ) - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, clean_obs_names=True) ds.write_distributed_store(dir_cache=path_store, store="h5ad", dense=False) ds.clear() diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 0f8f4d5d8..82685836d 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -6,6 +6,8 @@ from sfaira.data import DatasetSuperGroup from sfaira.data import Universe +MOUSE_GENOME_ANNOTATION = "Mus_musculus.GRCm38.102" + dir_data = "../test_data" dir_meta = "../test_data/meta" @@ -91,7 +93,7 @@ def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: b ds.streamline_metadata(schema=out_format, uns_to_obs=True, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns, clean_obs_names=clean_obs_names) -@pytest.mark.parametrize("match_to_reference", ["Mus_musculus.GRCm38.102", {"mouse": "Mus_musculus.GRCm38.102"}]) +@pytest.mark.parametrize("match_to_reference", ["Mus_musculus.GRCm38.102", {"mouse": MOUSE_GENOME_ANNOTATION}]) @pytest.mark.parametrize("remove_gene_version", [False, True]) @pytest.mark.parametrize("subset_genes_to_type", [None, "protein_coding"]) def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: bool, subset_genes_to_type: str): @@ -110,6 +112,10 @@ def test_dsg_write_store(store: str, dense: bool): ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load() + ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, + subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) ds.write_distributed_store(dir_cache=os.path.join(dir_data, "store"), store=store, dense=dense) From 3986c3338afcb36a07739d46f06ec10093c7c802 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 11:49:15 +0200 Subject: [PATCH 115/161] removed backed object writing scripts --- .../data/utils_scripts/write_backed_human.py | 26 ------------------- .../data/utils_scripts/write_backed_mouse.py | 26 ------------------- 2 files changed, 52 deletions(-) delete mode 100644 sfaira/data/utils_scripts/write_backed_human.py delete mode 100644 sfaira/data/utils_scripts/write_backed_mouse.py diff --git a/sfaira/data/utils_scripts/write_backed_human.py b/sfaira/data/utils_scripts/write_backed_human.py deleted file mode 100644 index 2a5e81720..000000000 --- a/sfaira/data/utils_scripts/write_backed_human.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import sfaira -import sys -import tensorflow as tf - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -fn = str(sys.argv[2]) -genome = str(sys.argv[3]) - -path_meta = os.path.join(path, "meta") -ds = sfaira.data.dataloaders.Universe( - data_path=path, meta_path=path_meta, cache_path=path_meta -) -ds.subset(key="organism", values=["human"]) -ds.write_backed( - fn_backed=fn, - genome=genome, - shuffled=False, - as_dense=False, - annotated_only=False -) diff --git a/sfaira/data/utils_scripts/write_backed_mouse.py b/sfaira/data/utils_scripts/write_backed_mouse.py deleted file mode 100644 index bd4e647d7..000000000 --- a/sfaira/data/utils_scripts/write_backed_mouse.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import sfaira -import sys -import tensorflow as tf - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -path = str(sys.argv[1]) -fn = str(sys.argv[2]) -genome = str(sys.argv[3]) - -path_meta = os.path.join(path, "meta") -ds = sfaira.data.dataloaders.Universe( - data_path=path, meta_path=path_meta, cache_path=path_meta -) -ds.subset(key="organism", values=["mouse"]) -ds.write_backed( - fn_backed=fn, - genome=genome, - shuffled=False, - as_dense=False, - annotated_only=False -) From e4d3b0273206b43e23c00687a0193728daba96d4 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 12:02:55 +0200 Subject: [PATCH 116/161] fixed input to make_index_unique --- sfaira/data/base/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 753f4b749..e81c3003a 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -565,7 +565,7 @@ def streamline_features( elif key == "index": self.adata.var.index = make_index_unique(self.adata.var.index).tolist() else: - self.adata.var[key] = make_index_unique(self.adata.var[key]).tolist() + self.adata.var[key] = make_index_unique(pd.Index(self.adata.var[key].values.tolist())).tolist() if remove_gene_version: self._collapse_ensembl_gene_id_versions() From 95ef15af17409222f4bb395d13e361c190163293 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 26 Apr 2021 13:23:17 +0200 Subject: [PATCH 117/161] fixed column name retaining in obs and var cleaning added removal of old columns if names are duplicated in added columns --- sfaira/data/base/dataset.py | 18 ++++++++++++++---- sfaira/unit_tests/data/test_dataset.py | 5 +++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index e81c3003a..67a8d92d1 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -795,8 +795,13 @@ def streamline_metadata( if "gene_id_symbols" not in adata_target_ids.var_keys: self.gene_id_symbols_var_key = None else: - self.adata.var = pd.concat([var_new, self.adata.var], axis=1, ignore_index=True) - self.adata.var.index = var_new.index + index_old = self.adata.var.index.copy() + # Add old columns in if they are not duplicated: + self.adata.var = pd.concat([ + var_new, + pd.DataFrame(dict([(k, v) for k, v in self.adata.var.items() if k not in var_new.columns])) + ], axis=1) + self.adata.var.index = index_old if clean_obs: if self.adata.obsm is not None: del self.adata.obsm @@ -804,8 +809,13 @@ def streamline_metadata( del self.adata.obsp self.adata.obs = obs_new else: - self.adata.obs = pd.concat([obs_new, self.adata.obs], axis=1, ignore_index=True) - self.adata.obs.index = obs_new.index + index_old = self.adata.obs.index.copy() + # Add old columns in if they are not duplicated: + self.adata.obs = pd.concat([ + obs_new, + pd.DataFrame(dict([(k, v) for k, v in self.adata.obs.items() if k not in obs_new.columns])) + ], axis=1) + self.adata.obs.index = index_old if clean_obs_names: self.adata.obs.index = [f"{self.id}_{i}" for i in range(1, self.adata.n_obs + 1)] if clean_uns: diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 82685836d..35082887f 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -107,14 +107,15 @@ def test_dsgs_streamline_features(match_to_reference: str, remove_gene_version: @pytest.mark.parametrize("store", ["h5ad"]) @pytest.mark.parametrize("dense", [False]) -def test_dsg_write_store(store: str, dense: bool): +@pytest.mark.parametrize("clean_obs", [False, True]) +def test_dsg_write_store(store: str, dense: bool, clean_obs: bool): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load() ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, subset_genes_to_type="protein_coding") - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=clean_obs, clean_var=True, clean_uns=True, clean_obs_names=True) ds.write_distributed_store(dir_cache=os.path.join(dir_data, "store"), store=store, dense=dense) From dd75ae80cce2feb065d35408bb79768c74ff84ef Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Mon, 26 Apr 2021 13:35:42 +0200 Subject: [PATCH 118/161] Bugfix (#233) * fix bugs and add cae dataset back into automated loading * remove bool option for match_to_reference * allow returning of adata without any streamlining if Dataset(Super)group only has 1 dataset loaded * make match_to_reference a mandatory argument of streamline_features() (can no longer be None) * fix bug --- sfaira/data/base/dataset.py | 82 +++++---- sfaira/data/base/dataset_group.py | 157 +++++++++--------- .../__init__.py | 0 .../human_x_2020_scirnaseq_cao_001.py | 0 .../human_x_2020_scirnaseq_cao_001.tsv | 0 .../human_x_2020_scirnaseq_cao_001.yaml | 0 6 files changed, 120 insertions(+), 119 deletions(-) rename sfaira/data/dataloaders/loaders/{_d10_1126_science_aba7721 => d10_1126_science_aba7721}/__init__.py (100%) rename sfaira/data/dataloaders/loaders/{_d10_1126_science_aba7721 => d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.py (100%) rename sfaira/data/dataloaders/loaders/{_d10_1126_science_aba7721 => d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.tsv (100%) rename sfaira/data/dataloaders/loaders/{_d10_1126_science_aba7721 => d10_1126_science_aba7721}/human_x_2020_scirnaseq_cao_001.yaml (100%) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 67a8d92d1..251e50683 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -465,38 +465,37 @@ def _add_missing_featurenames( gene_id_ensembl = "ensembl" # add some default name if not in schema self._adata_ids.gene_id_ensembl = gene_id_ensembl - if match_to_reference is not False: - if not self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: - raise ValueError("Either gene_id_symbols_var_key or gene_id_ensembl_var_key needs to be provided in the" - " dataloader") - elif not self.gene_id_symbols_var_key and self.gene_id_ensembl_var_key: - # Convert ensembl ids to gene symbols - id_dict = self.genome_container.id_to_names_dict - ensids = self.adata.var.index if self.gene_id_ensembl_var_key == "index" else self.adata.var[self.gene_id_ensembl_var_key] - self.adata.var[gene_id_symbols] = [ - id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' - for n in ensids - ] - self.gene_id_symbols_var_key = gene_id_symbols - elif self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: - # Convert gene symbols to ensembl ids - id_dict = self.genome_container.names_to_id_dict - id_strip_dict = self.genome_container.strippednames_to_id_dict - # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, - # match it straight away, if it is not in there we try to match everything in front of the first period in - # the gene name with a dictionary that was modified in the same way, if there is still no match we append na - ensids = [] - symbs = self.adata.var.index if self.gene_id_symbols_var_key == "index" else \ - self.adata.var[self.gene_id_symbols_var_key] - for n in symbs: - if n in id_dict.keys(): - ensids.append(id_dict[n]) - elif n.split(".")[0] in id_strip_dict.keys(): - ensids.append(id_strip_dict[n.split(".")[0]]) - else: - ensids.append('n/a') - self.adata.var[gene_id_ensembl] = ensids - self.gene_id_ensembl_var_key = gene_id_ensembl + if not self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: + raise ValueError("Either gene_id_symbols_var_key or gene_id_ensembl_var_key needs to be provided in the" + " dataloader") + elif not self.gene_id_symbols_var_key and self.gene_id_ensembl_var_key: + # Convert ensembl ids to gene symbols + id_dict = self.genome_container.id_to_names_dict + ensids = self.adata.var.index if self.gene_id_ensembl_var_key == "index" else self.adata.var[self.gene_id_ensembl_var_key] + self.adata.var[gene_id_symbols] = [ + id_dict[n.split(".")[0]] if n.split(".")[0] in id_dict.keys() else 'n/a' + for n in ensids + ] + self.gene_id_symbols_var_key = gene_id_symbols + elif self.gene_id_symbols_var_key and not self.gene_id_ensembl_var_key: + # Convert gene symbols to ensembl ids + id_dict = self.genome_container.names_to_id_dict + id_strip_dict = self.genome_container.strippednames_to_id_dict + # Matching gene names to ensembl ids in the following way: if the gene is present in the ensembl dictionary, + # match it straight away, if it is not in there we try to match everything in front of the first period in + # the gene name with a dictionary that was modified in the same way, if there is still no match we append na + ensids = [] + symbs = self.adata.var.index if self.gene_id_symbols_var_key == "index" else \ + self.adata.var[self.gene_id_symbols_var_key] + for n in symbs: + if n in id_dict.keys(): + ensids.append(id_dict[n]) + elif n.split(".")[0] in id_strip_dict.keys(): + ensids.append(id_strip_dict[n.split(".")[0]]) + else: + ensids.append('n/a') + self.adata.var[gene_id_ensembl] = ensids + self.gene_id_ensembl_var_key = gene_id_ensembl def _collapse_ensembl_gene_id_versions(self): """ @@ -522,7 +521,7 @@ def _collapse_ensembl_gene_id_versions(self): def streamline_features( self, - match_to_reference: Union[str, Dict[str, str], None] = None, + match_to_reference: Union[str, Dict[str, str], None], remove_gene_version: bool = True, subset_genes_to_type: Union[None, str, List[str]] = None, ): @@ -531,28 +530,23 @@ def streamline_features( This also adds missing ensid or gene symbol columns if match_to_reference is not set to False and removes all adata.var columns that are not defined as gene_id_ensembl_var_key or gene_id_symbol_var_key in the dataloader. - :param match_to_reference: Whether to map gene names to a given annotation. Can be: + :param match_to_reference: Which annotation to map the feature space to. Can be: - str: Provide the name of the annotation in the format Organism.Assembly.Release - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set based on organism annotation. - - False: no mapping of gene labels will be done. :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. """ - # TODO: think about workflow when featurespace should nt be streamlined. can we still apply a metadata schema? - assert match_to_reference is not False, "feature_streamlining is not possible when match_to_reference is False" self.__assert_loaded() # Set genome container if mapping of gene labels is requested - if match_to_reference is not None: # Testing this explicitly makes sure False is treated separately from None - if isinstance(match_to_reference, dict): - match_to_reference = match_to_reference[self.organism] - self._set_genome(assembly=match_to_reference) - self.mapped_features = self.genome_container.assembly - else: - self.mapped_features = False + if isinstance(match_to_reference, dict): + match_to_reference = match_to_reference[self.organism] + self._set_genome(assembly=match_to_reference) + self.mapped_features = self.genome_container.assembly + self.remove_gene_version = remove_gene_version self.subset_gene_type = subset_genes_to_type # Streamline feature space: diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 1e8516614..f595713fb 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -174,17 +174,16 @@ def streamline_metadata( def streamline_features( self, - match_to_reference: Union[str, Dict[str, str], None] = None, + match_to_reference: Union[str, Dict[str, str], None], remove_gene_version: bool = True, subset_genes_to_type: Union[None, str, List[str]] = None, ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - :param match_to_reference: Whether to map gene names to a given annotation. Can be: + :param match_to_reference: Which annotation to map the feature space to. Can be: - str: Provide the name of the annotation in the format Organism.Assembly.Release - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set based on organism annotation. - - False: no mapping of gene labels will be done. :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. @@ -327,37 +326,44 @@ def adata(self): adata_ls = self.adata_ls if not adata_ls: return None - # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata - match_ref_list = [] - rm_gene_ver_list = [] - gene_type_list = [] - for d_id in self.ids: - if self.datasets[d_id].adata is not None: - assert self.datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ - f"featurespace. To obtain an adata object from this " \ - f"DatasetGroup, all contained Datasets need to have a " \ - f"streamlined featurespace. Run .streamline_features()" \ - f" first." - assert self.datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ - f"metadata. To obtain an adata object from this " \ - f"DatasetGroup, all contained Datasets need to have " \ - f"streamlined metadata. Run .streamline_metadata() first." - match_ref_list.append(self.datasets[d_id].mapped_features) - rm_gene_ver_list.append(self.datasets[d_id].remove_gene_version) - gene_type_list.append(self.datasets[d_id].subset_gene_type) - assert len(set(match_ref_list)) == 1, \ - "Not all datasets in this group had their features matched to the same reference (argument " \ - "'match_to_reference' of method .streamline_features())." \ - "This is however a prerequisite for creating a combined adata object." - assert len(set(rm_gene_ver_list)) == 1, \ - "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ - "method .streamline_features()). This is however a prerequisite for creating a combined adata object." - assert len(set(gene_type_list)) == 1, \ - "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ - "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ - "combined adata object." - - if len(adata_ls) > 1: + if len(adata_ls) == 1: + for i in self.ids: + if self.datasets[i] is not None: + if self.datasets[i].adata is not None: + ds_id = i + adata_concat = adata_ls[0] + adata_concat.obs[self._adata_ids.dataset] = ds_id + else: + # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata + match_ref_list = [] + rm_gene_ver_list = [] + gene_type_list = [] + for d_id in self.ids: + if self.datasets[d_id].adata is not None: + assert self.datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ + f"featurespace. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have a " \ + f"streamlined featurespace. Run .streamline_features()" \ + f" first." + assert self.datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ + f"metadata. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have " \ + f"streamlined metadata. Run .streamline_metadata() first." + match_ref_list.append(self.datasets[d_id].mapped_features) + rm_gene_ver_list.append(self.datasets[d_id].remove_gene_version) + gene_type_list.append(self.datasets[d_id].subset_gene_type) + assert len(set(match_ref_list)) == 1, \ + "Not all datasets in this group had their features matched to the same reference (argument " \ + "'match_to_reference' of method .streamline_features())." \ + "This is however a prerequisite for creating a combined adata object." + assert len(set(rm_gene_ver_list)) == 1, \ + "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ + "method .streamline_features()). This is however a prerequisite for creating a combined adata object." + assert len(set(gene_type_list)) == 1, \ + "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ + "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." + var_original = adata_ls[0].var.copy() for a in adata_ls: a.var_names_make_unique() @@ -381,9 +387,7 @@ def adata(self): ) adata_concat.var = var_original adata_concat.uns[self._adata_ids.mapped_features] = match_ref_list[0] - else: - adata_concat = adata_ls[0] - adata_concat.obs[self._adata_ids.dataset] = adata_ls[0].uns['id'] + return adata_concat def obs_concat(self, keys: Union[list, None] = None): @@ -859,18 +863,17 @@ def load( def streamline_features( self, - match_to_reference: Union[str, Dict[str, str], None] = None, + match_to_reference: Union[str, Dict[str, str], None], remove_gene_version: bool = True, subset_genes_to_type: Union[None, str, List[str]] = None, ): """ Subset and sort genes to genes defined in an assembly or genes of a particular type, such as protein coding. - :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. - :param match_to_reference: Whether to map gene names to a given annotation. Can be: + :param match_to_reference: Which annotation to map the feature space to. Can be: - str: Provide the name of the annotation in the format Organism.Assembly.Release - dict: Mapping of organism to name of the annotation (see str format). Chooses annotation for each data set based on organism annotation. - - False: no mapping of gene labels will be done. + :param remove_gene_version: Whether to remove the version number after the colon sometimes found in ensembl gene ids. :param subset_genes_to_type: Type(s) to subset to. Can be a single type or a list of types or None. Types can be: - None: All genes in assembly. - "protein_coding": All protein coding genes in assembly. @@ -895,38 +898,44 @@ def adata(self): adata_ls = self.adata_ls if not adata_ls: return None + if len(adata_ls) == 1: + for i in self.ids: + if self.datasets[i] is not None: + if self.datasets[i].adata is not None: + ds_id = i + adata_concat = adata_ls[0] + adata_concat.obs[self._adata_ids.dataset] = ds_id + else: + # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata + match_ref_list = [] + rm_gene_ver_list = [] + gene_type_list = [] + for d_id in self.flatten().ids: + if self.flatten().datasets[d_id].adata is not None: + assert self.flatten().datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ + f"featurespace. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have a " \ + f"streamlined featurespace. Run .streamline_features()" \ + f" first." + assert self.flatten().datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ + f"metadata. To obtain an adata object from this " \ + f"DatasetGroup, all contained Datasets need to have " \ + f"streamlined metadata. Run .streamline_metadata() first." + match_ref_list.append(self.flatten().datasets[d_id].mapped_features) + rm_gene_ver_list.append(self.flatten().datasets[d_id].remove_gene_version) + gene_type_list.append(self.flatten().datasets[d_id].subset_gene_type) + assert len(set(match_ref_list)) == 1, \ + "Not all datasets in this group had their features matched to the same reference (argument " \ + "'match_to_reference' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." + assert len(set(rm_gene_ver_list)) == 1, \ + "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ + "method .streamline_features()). This is however a prerequisite for creating a combined adata object." + assert len(set(gene_type_list)) == 1, \ + "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ + "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ + "combined adata object." - # Check that all individual adata objects in linked Dataset instances have identicall streamlined features and metadata - match_ref_list = [] - rm_gene_ver_list = [] - gene_type_list = [] - for d_id in self.flatten().ids: - if self.flatten().datasets[d_id].adata is not None: - assert self.flatten().datasets[d_id].mapped_features, f"Dataset {d_id} does not seem to have a streamlined " \ - f"featurespace. To obtain an adata object from this " \ - f"DatasetGroup, all contained Datasets need to have a " \ - f"streamlined featurespace. Run .streamline_features()" \ - f" first." - assert self.flatten().datasets[d_id].streamlined_meta, f"Dataset {d_id} does not seem to have streamlined " \ - f"metadata. To obtain an adata object from this " \ - f"DatasetGroup, all contained Datasets need to have " \ - f"streamlined metadata. Run .streamline_metadata() first." - match_ref_list.append(self.flatten().datasets[d_id].mapped_features) - rm_gene_ver_list.append(self.flatten().datasets[d_id].remove_gene_version) - gene_type_list.append(self.flatten().datasets[d_id].subset_gene_type) - assert len(set(match_ref_list)) == 1, \ - "Not all datasets in this group had their features matched to the same reference (argument " \ - "'match_to_reference' of method .streamline_features()). This is however a prerequisite for creating a " \ - "combined adata object." - assert len(set(rm_gene_ver_list)) == 1, \ - "Not all datasets in this group have had their gene version removed (argument 'remove_gene_version' of " \ - "method .streamline_features()). This is however a prerequisite for creating a combined adata object." - assert len(set(gene_type_list)) == 1, \ - "Not all datasets in this group had their featurespace subsetted to the same gene type (argument " \ - "'subset_gene_type' of method .streamline_features()). This is however a prerequisite for creating a " \ - "combined adata object." - - if len(adata_ls) > 1: var_original = adata_ls[0].var.copy() for a in adata_ls: a.var_names_make_unique() @@ -950,9 +959,7 @@ def adata(self): ) adata_concat.var = var_original adata_concat.uns[self._adata_ids.mapped_features] = match_ref_list[0] - else: - adata_concat = adata_ls[0] - adata_concat.obs[self._adata_ids.dataset] = adata_ls[0].uns['id'] + return adata_concat def write_distributed_store( diff --git a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/__init__.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/__init__.py rename to sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/__init__.py diff --git a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py rename to sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.py diff --git a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv rename to sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.tsv diff --git a/sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml similarity index 100% rename from sfaira/data/dataloaders/loaders/_d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml rename to sfaira/data/dataloaders/loaders/d10_1126_science_aba7721/human_x_2020_scirnaseq_cao_001.yaml From 507264cd3d42b1e1f32b8fe562f28aa221d8580a Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 13:52:16 +0200 Subject: [PATCH 119/161] Ontology call for cellxgene (#235) * fixed usage of ontology in cellxgene output streamlining * fixed streamlining unit test for cellxgene output --- sfaira/data/base/dataset.py | 2 +- sfaira/unit_tests/data/test_dataset.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 251e50683..87eb91cb1 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -837,7 +837,7 @@ def streamline_metadata( for k in ["organ", "assay_sc", "disease", "ethnicity", "development_stage"]: if getattr(adata_target_ids, k) in self.adata.obs.columns: self.__project_name_to_id_obs( - ontology=getattr(adata_target_ids, k), + ontology=k, key_in=getattr(adata_target_ids, k), key_out=getattr(adata_target_ids, k) + "_ontology_term_id", map_exceptions=[], diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 35082887f..650055d7c 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -90,6 +90,8 @@ def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: b ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load() + ds.streamline_features(remove_gene_version=False, match_to_reference=MOUSE_GENOME_ANNOTATION, + subset_genes_to_type=None) ds.streamline_metadata(schema=out_format, uns_to_obs=True, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns, clean_obs_names=clean_obs_names) From 2b3616905b5192f37aec08c8b1ebec52a6075dca Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 15:17:05 +0200 Subject: [PATCH 120/161] Ontology call for cellxgene (#234) * fixed usage of ontology in cellxgene output streamlining * fixed streamlining unit test for cellxgene output * skip writing of obs columns that are already in adata object but are controlled by the meta data streamlining vocabulary * enabled orgnism wise ontologies * fixed var naming in d10_1101_2020_10_12_335331 and updated string cleaning --- sfaira/consts/adata_fields.py | 4 ++ sfaira/consts/ontologies.py | 14 +++-- sfaira/data/__init__.py | 2 +- sfaira/data/base/__init__.py | 2 +- sfaira/data/base/dataset.py | 52 ++++++++++++------- .../human_blood_2020_10x_hao_001.yaml | 6 +-- sfaira/unit_tests/data/test_dataset.py | 7 ++- 7 files changed, 57 insertions(+), 30 deletions(-) diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 91caf9c96..7759e5706 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -50,6 +50,10 @@ class AdataIds: not_a_cell_celltype_identifier: Union[str, None] unknown_metadata_identifier: Union[str, None] + @property + def controlled_meta_keys(self): + return [getattr(self, k) for k in self.obs_keys + self.uns_keys] + class AdataIdsSfaira(AdataIds): """ diff --git a/sfaira/consts/ontologies.py b/sfaira/consts/ontologies.py index 78394b20e..8f18526ce 100644 --- a/sfaira/consts/ontologies.py +++ b/sfaira/consts/ontologies.py @@ -1,6 +1,6 @@ from sfaira.versions.metadata import OntologyList, OntologyCl -from sfaira.versions.metadata import OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ - OntologySinglecellLibraryConstruction, OntologyCellosaurus +from sfaira.versions.metadata import OntologyCellosaurus, OntologyHsapdv, OntologyMondo, \ + OntologyMmusdv, OntologySinglecellLibraryConstruction, OntologyUberon class OntologyContainerSfaira: @@ -19,10 +19,16 @@ def __init__(self): self.cell_types_original = None self.collection_id = None self.default_embedding = None - self.development_stage = None # OntologyHsapdv() # TODO allow for other organisms here too. + self.development_stage = { + "human": OntologyHsapdv(), + "mouse": OntologyMmusdv(), + } self.disease = OntologyMondo() self.doi = None - self.ethnicity = None # OntologyHancestro() # TODO + self.ethnicity = { + "human": None, # TODO OntologyHancestro + "mouse": None, + } self.id = None self.individual = None self.normalization = None diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index ed5540905..b13e572d6 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,4 +1,4 @@ -from sfaira.data.base import DatasetBase, \ +from sfaira.data.base import clean_string, DatasetBase, \ DatasetGroup, DatasetGroupDirectoryOriented, \ DatasetSuperGroup, DistributedStore from . import dataloaders diff --git a/sfaira/data/base/__init__.py b/sfaira/data/base/__init__.py index 9f9e490a6..7e3dacc49 100644 --- a/sfaira/data/base/__init__.py +++ b/sfaira/data/base/__init__.py @@ -1,3 +1,3 @@ -from sfaira.data.base.dataset import DatasetBase +from sfaira.data.base.dataset import DatasetBase, clean_string from sfaira.data.base.dataset_group import DatasetGroup, DatasetGroupDirectoryOriented, DatasetSuperGroup from sfaira.data.base.distributed_store import DistributedStore diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 87eb91cb1..5ea7f5905 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -64,6 +64,12 @@ def is_child( raise ValueError(f"did not recognize ontology type {type(ontology)}") +def clean_string(s): + if s is not None: + s = s.replace(',', '').replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() + return s + + class DatasetBase(abc.ABC): adata: Union[None, anndata.AnnData] class_maps: dict @@ -252,8 +258,12 @@ def __init__( if yaml_path is not None: assert os.path.exists(yaml_path), f"did not find yaml {yaml_path}" yaml_vals = read_yaml(fn=yaml_path) + # Set organism first as this is required to disambiguate valid entries for other meta data. + k = "organism" + v = yaml_vals["attr"]["organism"] + setattr(self, k, v) for k, v in yaml_vals["attr"].items(): - if v is not None and k not in ["sample_fns", "dataset_index"]: + if v is not None and k not in ["organism", "sample_fns", "dataset_index"]: if isinstance(v, dict): # v is a dictionary over file-wise meta-data items assert self.sample_fn in v.keys(), f"did not find key {self.sample_fn} in yamls keys for {k}" setattr(self, k, v[self.sample_fn]) @@ -804,10 +814,12 @@ def streamline_metadata( self.adata.obs = obs_new else: index_old = self.adata.obs.index.copy() - # Add old columns in if they are not duplicated: + # Add old columns in if they are not duplicated in target obs column space, even if this column is not + # defined. This would result in the instance accessing this column assuming it was streamlined. self.adata.obs = pd.concat([ obs_new, - pd.DataFrame(dict([(k, v) for k, v in self.adata.obs.items() if k not in obs_new.columns])) + pd.DataFrame(dict([(k, v) for k, v in self.adata.obs.items() + if k not in adata_target_ids.controlled_meta_keys])) ], axis=1) self.adata.obs.index = index_old if clean_obs_names: @@ -836,8 +848,12 @@ def streamline_metadata( # Add ontology IDs where necessary (note that human readable terms are also kept): for k in ["organ", "assay_sc", "disease", "ethnicity", "development_stage"]: if getattr(adata_target_ids, k) in self.adata.obs.columns: + ontology = getattr(self.ontology_container_sfaira, k) + # Disambiguate organism-dependent ontologies: + if isinstance(ontology, dict): + ontology = ontology[self.organism] self.__project_name_to_id_obs( - ontology=k, + ontology=ontology, key_in=getattr(adata_target_ids, k), key_out=getattr(adata_target_ids, k) + "_ontology_term_id", map_exceptions=[], @@ -1112,8 +1128,9 @@ def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = No # TODO this could be changed in the future, this allows this function to be used both on cell type name # mapping files with and without the ID in the third column. # This mapping blocks progression in the unit test if not deactivated. + ontology = getattr(self.ontology_container_sfaira, "cellontology_class") ids_mapped = self.__project_name_to_id_obs( - ontology="cellontology_class", + ontology=ontology, key_in=labels_mapped, key_out=None, map_exceptions=[ @@ -1136,7 +1153,7 @@ def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = No def __project_name_to_id_obs( self, - ontology: str, + ontology: Ontology, key_in: Union[str, list], key_out: Union[str, None], map_exceptions: list, @@ -1145,14 +1162,14 @@ def __project_name_to_id_obs( """ Project ontology names to IDs for a given ontology in .obs entries. - :param ontology: name of the ontology to use when converting to IDs + :param ontology: ontology to use when converting to IDs :param key_in: name of obs_column containing names to convert or python list containing these values :param key_out: name of obs_column to write the IDs or None. If None, a python list with the new values will be returned :param map_exceptions: list of values that should not be mapped :param map_exceptions_value: placeholder target value for values excluded from mapping :return: """ - ontology = getattr(self.ontology_container_sfaira, ontology) + assert ontology is not None, f"cannot project value for {key_in} because ontology is None" assert isinstance(key_in, (str, list)), f"argument key_in needs to be of type str or list. Supplied" \ f"type: {type(key_in)}" input_values = self.adata.obs[key_in].values if isinstance(key_in, str) else key_in @@ -1290,11 +1307,6 @@ def set_dataset_id( self, idx: int = 1 ): - def clean(s): - if s is not None: - s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() - return s - if self.sample_fn is not None: idx += self._sample_fns.index(self.sample_fn) idx = str(idx).zfill(3) @@ -1306,11 +1318,11 @@ def clean(s): # Note: access private attributes here, e.g. _organism, to avoid loading of content via meta data, which would # invoke call to self.id before it is set. - self.id = f"{clean(self._organism)}_" \ - f"{clean(self._organ)}_" \ + self.id = f"{clean_string(self._organism)}_" \ + f"{clean_string(self._organ)}_" \ f"{self._year}_" \ - f"{clean(self._assay_sc)}_" \ - f"{clean(author)}_" \ + f"{clean_string(self._assay_sc)}_" \ + f"{clean_string(author)}_" \ f"{idx}_" \ f"{self.doi_main}" @@ -1471,7 +1483,8 @@ def development_stage(self) -> Union[None, str]: @development_stage.setter def development_stage(self, x: str): - x = self._value_protection(attr="development_stage", allowed=self.ontology_container_sfaira.development_stage, + x = self._value_protection(attr="development_stage", + allowed=self.ontology_container_sfaira.development_stage[self.organism], attempted=x) self._development_stage = x @@ -1595,7 +1608,8 @@ def ethnicity(self) -> Union[None, str]: @ethnicity.setter def ethnicity(self, x: str): - x = self._value_protection(attr="ethnicity", allowed=self.ontology_container_sfaira.ethnicity, attempted=x) + x = self._value_protection(attr="ethnicity", allowed=self.ontology_container_sfaira.ethnicity[self.organism], + attempted=x) self._ethnicity = x @property diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml index 77f5fd1ee..b5c0a6b3f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml @@ -43,9 +43,9 @@ dataset_or_observation_wise: tech_sample: tech_sample_obs_key: 'Batch' observation_wise: - cellontology_original_obs_key: "celltype.l3" + cell_types_original_obs_key: "celltype.l3" feature_wise: - var_ensembl_col: - var_symbol_col: "names" + gene_id_ensembl_var_key: + gene_id_symbols_var_key: "names" meta: version: "1.0" diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 650055d7c..deab6f1bc 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -81,18 +81,21 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): @pytest.mark.parametrize("out_format", ["sfaira", "cellxgene"]) +@pytest.mark.parametrize("uns_to_obs", [True, False]) @pytest.mark.parametrize("clean_obs", [True, False]) @pytest.mark.parametrize("clean_var", [True, False]) @pytest.mark.parametrize("clean_uns", [True, False]) @pytest.mark.parametrize("clean_obs_names", [True, False]) -def test_dsgs_streamline_metadata(out_format: str, clean_obs: bool, clean_var: bool, clean_uns: bool, clean_obs_names: bool): +def test_dsgs_streamline_metadata(out_format: str, uns_to_obs: bool, clean_obs: bool, clean_var: bool, clean_uns: bool, + clean_obs_names: bool): ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key="organism", values=["mouse"]) ds.subset(key="organ", values=["lung"]) ds.load() ds.streamline_features(remove_gene_version=False, match_to_reference=MOUSE_GENOME_ANNOTATION, subset_genes_to_type=None) - ds.streamline_metadata(schema=out_format, uns_to_obs=True, clean_obs=clean_obs, clean_var=clean_var, clean_uns=clean_uns, clean_obs_names=clean_obs_names) + ds.streamline_metadata(schema=out_format, uns_to_obs=uns_to_obs, clean_obs=clean_obs, clean_var=clean_var, + clean_uns=clean_uns, clean_obs_names=clean_obs_names) @pytest.mark.parametrize("match_to_reference", ["Mus_musculus.GRCm38.102", {"mouse": MOUSE_GENOME_ANNOTATION}]) From 1830168147bce0cc55a7aba0a2e70c5800738fdd Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 15:55:49 +0200 Subject: [PATCH 121/161] Distributed store update (#236) * added new attribute adata to store * fixed bug in value protection in streamlining --- sfaira/data/base/dataset.py | 2 ++ sfaira/data/base/distributed_store.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 5ea7f5905..d9280119f 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -769,6 +769,8 @@ def streamline_metadata( uns_new[new_col] = UNS_STRING_META_IN_OBS # Remove potential pd.Categorical formatting: ontology = getattr(self.ontology_container_sfaira, k) if hasattr(self.ontology_container_sfaira, k) else None + if k in ["development_stage", "ethnicity"]: + ontology = ontology[self.organism] self._value_protection(attr=new_col, allowed=ontology, attempted=np.unique(self.adata.obs[old_col].values).tolist()) obs_new[new_col] = self.adata.obs[old_col].values.tolist() del self.adata.obs[old_col] diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index 92ccba3af..f5f6b0c47 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -56,6 +56,12 @@ def __init__(self, cache_path: Union[str, None] = None): self._adata_ids_sfaira = AdataIdsSfaira() self._celltype_universe = None + @property + def adata(self): + return list(self.adatas.values)[0].concatenate( + list(self.adatas.values)[1:] + ) + def generator( self, batch_size: int = 1, From 1f2ce9d5d5f75873a43a6c6c87d82592b7512060 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 16:13:20 +0200 Subject: [PATCH 122/161] adapted developmental stage label to ontology (#237) --- ...aminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py index c78eb42c6..2efb01ddc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.py @@ -17,5 +17,6 @@ def load(data_dir, sample_fn, **kwargs): adata.obs = obs s_dict = {"F": "female", "M": "male"} adata.obs['Sex'] = [s_dict[i] for i in adata.obs['Sex']] + adata.obs['Age'] = [str(x) + "-year-old human stage" for x in adata.obs['Age'].values] return adata From a50b45f02153bbdbb65017f94c426aba50902530 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 18:32:33 +0200 Subject: [PATCH 123/161] D10 1038 s41586 019 1654 9 dev ontology (#239) * moved unconstrained developmental stage label to state exact @le-ander, I started enforcing the developmental ontologies now, so we have to think about if we can use / build an ontology for organoid stages? * added rounding for cellxgene output --- sfaira/data/base/dataset.py | 11 ++++++++++- .../human_brain_2019_10x3v2sequencing_kanton_001.py | 3 --- .../human_brain_2019_10x3v2sequencing_kanton_001.yaml | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index d9280119f..3b9a66083 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -769,7 +769,9 @@ def streamline_metadata( uns_new[new_col] = UNS_STRING_META_IN_OBS # Remove potential pd.Categorical formatting: ontology = getattr(self.ontology_container_sfaira, k) if hasattr(self.ontology_container_sfaira, k) else None - if k in ["development_stage", "ethnicity"]: + if k == "development_stage": + ontology = ontology[self.organism] + if k == "ethnicity": ontology = ontology[self.organism] self._value_protection(attr=new_col, allowed=ontology, attempted=np.unique(self.adata.obs[old_col].values).tolist()) obs_new[new_col] = self.adata.obs[old_col].values.tolist() @@ -877,6 +879,13 @@ def streamline_metadata( if gene_id_new != self.gene_id_symbols_var_key: del self.adata.var[self.gene_id_symbols_var_key] self.gene_id_symbols_var_key = gene_id_new + # Check if .X is counts: The conversion are based on the assumption that .X is csr. + assert isinstance(self.adata.X, scipy.sparse.csr_matrix), type(self.adata.X) + count_values = np.unique(np.asarray(self.adata.X.todense())) + is_counts = np.all(count_values % 1. == 0.) + if not is_counts: + print(f"WARNING: not all count entries were counts {is_counts}. rounding.") + self.adata.X.data = np.rint(self.adata.X.data) # Make sure that correct unknown_metadata_identifier is used in .uns, .obs and .var metadata self.adata.obs = self.adata.obs.replace({None: adata_target_ids.unknown_metadata_identifier}) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py index 3e0f9756b..86e461f0b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.py @@ -29,9 +29,6 @@ def load(data_dir, **kwargs): with zipfile.ZipFile(fn[2]) as archive: obs = pandas.read_csv(archive.open('metadata_human_cells.tsv'), sep="\t", index_col=0) adata = anndata.AnnData(X=x, var=var, obs=obs) - adata.obs["Line"] = [cell_line_dict[x] for x in adata.obs["Line"]] - # TODO: remove non-protein coding genes? - return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index 7b8fe8990..dde9db146 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -25,7 +25,7 @@ dataset_or_observation_wise: cell_line: cell_line_obs_key: "Line" development_stage: - development_stage_obs_key: "Stage" + development_stage_obs_key: disease: "healthy" disease_obs_key: ethnicity: @@ -40,8 +40,8 @@ dataset_or_observation_wise: sample_source_obs_key: sex: sex_obs_key: - state_exact: "healthy" - state_exact_obs_key: + state_exact: + state_exact_obs_key: "Stage" tech_sample: tech_sample_obs_key: observation_wise: From 101ba8909ea881ff6327c8591c8a8aedc5499b8d Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 22:00:22 +0200 Subject: [PATCH 124/161] D10 1038 s41586 019 1654 9 dev ontology (#238) * moved unconstrained developmental stage label to state exact @le-ander, I started enforcing the developmental ontologies now, so we have to think about if we can use / build an ontology for organoid stages? * added rounding for cellxgene output From be2516df0a964e02299978e2fb99d926fa709015 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 22:01:33 +0200 Subject: [PATCH 125/161] D10 1038 s41586 020 2157 4 dev stage (#240) * added developmetnal stages to hcl --- .../human_x_2020_microwellseq_han_x.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index d7e392209..5bb11b5a8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -148,6 +148,113 @@ def load(data_dir, **kwargs): 'PeripheralBlood_1': 'blood', 'Placenta_1': 'placenta', } + sample_dev_stage_dict = { + 'AdultAdipose_1': 'human adult stage', + 'AdultAdrenalGland_2': 'human adult stage', + 'AdultAdrenalGland_3': 'human adult stage', + 'AdultArtery_1': 'human adult stage', + 'AdultAscendingColon_1': 'human adult stage', + 'AdultBladder_1': 'human adult stage', + 'AdultBladder_2': 'human adult stage', + 'AdultCerebellum_1': 'human adult stage', + 'AdultCervix_1': 'human adult stage', + 'AdultColon_1': 'human adult stage', + 'AdultDuodenum_1': 'human adult stage', + 'AdultEpityphlon_1': 'human adult stage', + 'AdultEsophagus_1': 'human adult stage', + 'AdultEsophagus_2': 'human adult stage', + 'AdultFallopiantube_1': 'human adult stage', + 'AdultGallbladder_1': 'human adult stage', + 'AdultGallbladder_2': 'gall bladder', + 'AdultHeart_1': 'human adult stage', + 'AdultHeart_2': 'human adult stage', + 'AdultIleum_2': 'human adult stage', + 'AdultJejunum_2': 'human adult stage', + 'AdultKidney_2': 'human adult stage', + 'AdultKidney_3': 'human adult stage', + 'AdultKidney_4': 'human adult stage', + 'AdultLiver_1': 'human adult stage', + 'AdultLiver_2': 'human adult stage', + 'AdultLiver_4': 'human adult stage', + 'AdultLung_1': 'human adult stage', + 'AdultLung_2': 'human adult stage', + 'AdultLung_3': 'human adult stage', + 'AdultMuscle_1': 'human adult stage', + 'AdultOmentum_1': 'human adult stage', + 'AdultOmentum_2': 'human adult stage', + 'AdultOmentum_3': 'human adult stage', + 'AdultPancreas_1': 'human adult stage', + 'AdultPeripheralBlood_3': 'human adult stage', + 'AdultPeripheralBlood_4': 'human adult stage', + 'AdultPleura_1': 'human adult stage', + 'AdultProstate_1': 'human adult stage', + 'AdultRectum_1': 'human adult stage', + 'AdultSigmoidColon_1': 'human adult stage', + 'AdultSpleenParenchyma_1': 'human adult stage', + 'AdultSpleen_1': 'human adult stage', + 'AdultStomach_1': 'human adult stage', + 'AdultStomach_2': 'human adult stage', + 'AdultStomach_3': 'human adult stage', + 'AdultTemporalLobe_1': 'human adult stage', + 'AdultThyroid_1': 'human adult stage', + 'AdultThyroid_2': 'human adult stage', + 'AdultTrachea_2': 'human adult stage', + 'AdultTransverseColon_2': 'human adult stage', + 'AdultUreter_1': 'human adult stage', + 'AdultUterus_1': 'human adult stage', + 'BoneMarrow_1': 'human adult stage', + 'BoneMarrow_2': 'human adult stage', + 'ChorionicVillus_1': 'human adult stage', + 'CordBloodCD34P_1': 'human adult stage', + 'CordBloodCD34P_2': 'human adult stage', + 'CordBlood_1': 'human adult stage', + 'CordBlood_2': 'human adult stage', + 'FetalAdrenalGland_2': 'fetal stage', + 'FetalAdrenalGland_3': 'fetal stage', + 'FetalAdrenalGland_4': 'fetal stage', + 'FetalBrain_3': 'fetal stage', + 'FetalBrain_4': 'fetal stage', + 'FetalBrain_5': 'fetal stage', + 'FetalBrain_6': 'fetal stage', + 'FetalCalvaria_1': 'fetal stage', + 'FetalEyes_1': 'fetal stage', + 'FetalFemaleGonad_1': 'fetal stage', + 'FetalFemaleGonad_2': 'fetal stage', + 'FetalHeart_1': 'fetal stage', + 'FetalHeart_2': 'fetal stage', + 'FetalIntestine_1': 'fetal stage', + 'FetalIntestine_2': 'fetal stage', + 'FetalIntetsine_3': 'fetal stage', + 'FetalIntestine_4': 'fetal stage', + 'FetalIntestine_5': 'fetal stage', + 'FetalKidney_3': 'fetal stage', + 'FetalKidney_4': 'fetal stage', + 'FetalKidney_5': 'fetal stage', + 'FetalKidney_6': 'fetal stage', + 'FetalLung_1': 'fetal stage', + 'FetalLung_2': 'fetal stage', + 'FetalMaleGonad_1': 'fetal stage', + 'FetalMaleGonad_2': 'fetal stage', + 'FetalMuscle_1': 'fetal stage', + 'FetalPancreas_1': 'fetal stage', + 'FetalPancreas_2': 'fetal stage', + 'FetalPancreas_3': 'fetal stage', + 'FetalRib_2': 'fetal stage', + 'FetalRib_3': 'fetal stage', + 'FetalSkin_2': 'fetal stage', + 'FetalSkin_3': 'fetal stage', + 'FetalSpinalCord_1': 'fetal stage', + 'FetalStomach_1': 'fetal stage', + 'FetalStomach_2': 'fetal stage', + 'FetalThymus_1': 'fetal stage', + 'FetalThymus_2': 'fetal stage', + 'HESC_1': 'blastula stage', + 'Liver_1': 'human adult stage', + 'Liver_2': 'human adult stage', + 'NeonatalAdrenalGland_1': 'newborn human stage', + 'PeripheralBlood_1': 'human adult stage', + 'Placenta_1': 'human adult stage', + } sex_dict = { 'Male': "male", 'Female': "female", @@ -218,5 +325,6 @@ def load(data_dir, **kwargs): ] adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] adata.obs["sex"] = [sex_dict[str(x)] for x in adata.obs["sex"].values] + adata.obs["dev_stage"] = [sample_dev_stage_dict[str(x)] for x in adata.obs["dev_stage"].values] return adata From 2fa39572100da24025bd0d7903eb2734d6b602cd Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 26 Apr 2021 22:42:13 +0200 Subject: [PATCH 126/161] added feature matrix collapse and uns clearning (#241) --- sfaira/data/base/dataset.py | 7 +++++++ sfaira/data/base/dataset_group.py | 14 ++++++++++++++ sfaira/data/utils_scripts/streamline_selected.py | 11 +++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 3b9a66083..3cc1ebe0e 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -529,6 +529,13 @@ def _collapse_ensembl_gene_id_versions(self): # Collapse if necessary: self.adata = collapse_matrix(adata=self.adata, var_column=self.gene_id_ensembl_var_key) + def collapse_counts(self): + """ + Collapse count matrix along duplicated index. + """ + if len(np.unique(self.adata.var.index)) < self.adata.var.shape[0]: + self.adata = collapse_matrix(adata=self.adata, var_column="index") + def streamline_features( self, match_to_reference: Union[str, Dict[str, str], None], diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index f595713fb..23bf4b58b 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -196,6 +196,13 @@ def streamline_features( subset_genes_to_type=subset_genes_to_type, ) + def collapse_counts(self): + """ + Collapse count matrix along duplicated index. + """ + for x in self.ids: + self.datasets[x].collapse_counts() + def write_distributed_store( self, dir_cache: Union[str, os.PathLike], @@ -885,6 +892,13 @@ def streamline_features( subset_genes_to_type=subset_genes_to_type, ) + def collapse_counts(self): + """ + Collapse count matrix along duplicated index. + """ + for x in self.dataset_groups: + x.collapse_counts() + @property def adata_ls(self): adata_ls = [] diff --git a/sfaira/data/utils_scripts/streamline_selected.py b/sfaira/data/utils_scripts/streamline_selected.py index 62a6329b4..b8b3e2ad5 100644 --- a/sfaira/data/utils_scripts/streamline_selected.py +++ b/sfaira/data/utils_scripts/streamline_selected.py @@ -29,8 +29,15 @@ remove_gene_version=True, subset_genes_to_type=None ) - ds.streamline_metadata(schema=schema.lower(), uns_to_obs=False, clean_obs=False, clean_var=True, clean_uns=False, - clean_obs_names=False) + ds.streamline_metadata( + schema=schema.lower(), + uns_to_obs=False, + clean_obs=False, + clean_var=True, + clean_uns=True, + clean_obs_names=False + ) + ds.collapse_counts() assert len(ds.dataset_groups) == 1, len(ds.dataset_groups) dsg = ds.dataset_groups[0] for k, v in dsg.datasets.items(): From 742b59300efab71218003b725e49ba4ac5cce4a2 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 27 Apr 2021 13:12:31 +0200 Subject: [PATCH 127/161] fixed bug in config writing of store and built unit test (#242) * fixed bug in config writing of store and built unit test * improved error reportign for Dataset --- sfaira/data/base/dataset.py | 11 ++-- sfaira/data/base/distributed_store.py | 6 +-- sfaira/unit_tests/data/test_store.py | 73 +++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 sfaira/unit_tests/data/test_store.py diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 3cc1ebe0e..4650b9b97 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -409,11 +409,14 @@ def load( if self.data_dir is None: raise ValueError("No sfaira data repo path provided in constructor.") + def _error_buffered_reading(**load_kwargs): + self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn, **load_kwargs) + # Run data set-specific loading script: def _assembly_wrapper(): if self.load_func is None: raise ValueError(f"Tried to access load_func for {self.id} but did not find any.") - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn, **kwargs) + _error_buffered_reading(**kwargs) # Enable loading of additional annotation, e.g. secondary cell type annotation # The additional annotation `obs2 needs to be on a subset of the original annotation `self.adata.obs`. if self.dict_load_func_annotation is not None: @@ -431,16 +434,16 @@ def _cached_reading(filename): if os.path.exists(filename): self.adata = anndata.read_h5ad(filename) else: - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + _error_buffered_reading() else: - self.adata = self.load_func(data_dir=self.data_dir, sample_fn=self.sample_fn) + _error_buffered_reading() def _cached_writing(filename): if filename is not None: dir_cache = os.path.dirname(filename) if not os.path.exists(dir_cache): os.makedirs(dir_cache) - if not os.path.exists(filename): + if not os.path.exists(filename) and self.adata is not None: self.adata.write_h5ad(filename) if load_raw and allow_caching: diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index f5f6b0c47..6a007d9f8 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -153,8 +153,8 @@ def subset(self, attr_key, values): # Get ontology container to be able to do relational reasoning: ontology = getattr(self.ontology_container, attr_key) for k in list(self.adatas.keys()): - if getattr(self._adata_ids_sfaira, attr_key) in self.adatas.uns.keys(): - values_found = self.adatas.uns[getattr(self._adata_ids_sfaira, attr_key)] + if getattr(self._adata_ids_sfaira, attr_key) in self.adatas[k].uns.keys(): + values_found = self.adatas[k].uns[getattr(self._adata_ids_sfaira, attr_key)] if not isinstance(values_found, list): values_found = [values_found] if not np.any([ @@ -291,7 +291,7 @@ def write_config(self, fn: Union[str, os.PathLike]): :param fn: Output file without file type extension. """ - with open(fn + '.pickle', 'w') as f: + with open(fn + '.pickle', 'wb') as f: pickle.dump(self.indices, f) def load_config(self, fn: Union[str, os.PathLike]): diff --git a/sfaira/unit_tests/data/test_store.py b/sfaira/unit_tests/data/test_store.py new file mode 100644 index 000000000..8c9c365bc --- /dev/null +++ b/sfaira/unit_tests/data/test_store.py @@ -0,0 +1,73 @@ +import numpy as np +import os +import pytest + +from sfaira.data import DistributedStore +from sfaira.data import Universe + +MOUSE_GENOME_ANNOTATION = "Mus_musculus.GRCm38.102" + +dir_data = "../test_data" +dir_meta = "../test_data/meta" + + +""" +TODO tests from here on down require cached data for mouse lung +""" + + +def test_store_config(): + """ + Test that data set config files can be set, written and recovered. + """ + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds.load() + ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, + subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) + store_path = os.path.join(dir_data, "store") + config_path = os.path.join(store_path, "lung") + ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=True) + store = DistributedStore(cache_path=store_path) + store.subset(attr_key="assay_sc", values=["10x sequencing"]) + store.write_config(fn=config_path) + store2 = DistributedStore(cache_path=store_path) + store2.load_config(fn=config_path) + assert np.all(store.indices.keys() == store2.indices.keys()) + assert np.all([np.all(store.indices[k] == store2.indices[k]) for k in store.indices.keys()]) + + +def test_store_type_targets(): + """ + Test that target leave nodes can be set, written and recovered. + """ + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + ds.load() + ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, + subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) + store_path = os.path.join(dir_data, "store") + target_path = os.path.join(store_path, "lung") + ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=True) + store = DistributedStore(cache_path=store_path) + observed_nodes = np.unique(np.concatenate([ + x.obs[store._adata_ids_sfaira.cell_ontology_class] + for x in store.adatas.values() + ])).tolist() + leaves_all = store.celltypes_universe.onto_cl.leaves + effective_leaves = store.celltypes_universe.onto_cl.get_effective_leaves(x=observed_nodes) + store.celltypes_universe.onto_cl.leaves = effective_leaves + leaves1 = store.celltypes_universe.onto_cl.leaves + store.celltypes_universe.write_target_universe(fn=target_path, x=effective_leaves) + store2 = DistributedStore(cache_path=store_path) + store2.celltypes_universe.load_target_universe(fn=target_path) + leaves2 = store2.celltypes_universe.onto_cl.leaves + assert len(leaves_all) > len(leaves1) + assert len(set(leaves1).union(set(leaves2))) == len(leaves1) + assert np.all([x in leaves1 for x in leaves2]) From 562a668d7fdb19cd3c620dd2c790bc29ec432ce1 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 27 Apr 2021 13:17:55 +0200 Subject: [PATCH 128/161] fixed dev stage assignment (#243) --- .../human_x_2020_microwellseq_han_x.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 5bb11b5a8..2f969cebc 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -325,6 +325,7 @@ def load(data_dir, **kwargs): ] adata.obs["organ"] = [sample_organ_dict[x] for x in adata.obs["sample"].values] adata.obs["sex"] = [sex_dict[str(x)] for x in adata.obs["sex"].values] - adata.obs["dev_stage"] = [sample_dev_stage_dict[str(x)] for x in adata.obs["dev_stage"].values] + # TODO are the more exact developmental stages in dev_stage? + adata.obs["dev_stage"] = [sample_dev_stage_dict[str(x)] for x in adata.obs["sample"].values] return adata From 5c872d60a80a4a823107afd3f1a0b85ee56d48f7 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 27 Apr 2021 15:03:03 +0200 Subject: [PATCH 129/161] fixed subsetting (#244) --- sfaira/data/base/distributed_store.py | 33 ++++++++++++++++++++++----- sfaira/unit_tests/data/test_store.py | 1 + 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index 6a007d9f8..15aa18d96 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -7,7 +7,7 @@ from typing import Dict, List, Union from sfaira.consts import AdataIdsSfaira, OCS -from sfaira.data.base.dataset import is_child +from sfaira.data.base.dataset import is_child, UNS_STRING_META_IN_OBS from sfaira.versions.metadata import CelltypeUniverse @@ -154,7 +154,10 @@ def subset(self, attr_key, values): ontology = getattr(self.ontology_container, attr_key) for k in list(self.adatas.keys()): if getattr(self._adata_ids_sfaira, attr_key) in self.adatas[k].uns.keys(): - values_found = self.adatas[k].uns[getattr(self._adata_ids_sfaira, attr_key)] + if getattr(self._adata_ids_sfaira, attr_key) != UNS_STRING_META_IN_OBS: + values_found = self.adatas[k].uns[getattr(self._adata_ids_sfaira, attr_key)] + else: + values_found = self.adatas[k].obs[getattr(self._adata_ids_sfaira, attr_key)].values.tolist() if not isinstance(values_found, list): values_found = [values_found] if not np.any([ @@ -193,8 +196,26 @@ def subset_cells_idx(self, attr_key, values: Union[str, List[str]]): if not isinstance(values, list): values = [values] - def get_subset_idx(adata, k): - values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values + def get_subset_idx(adata, k, dataset): + # Try to look first in cell wise annotation to use cell-wise map if data set-wide maps are ambiguous: + # This can happen if the different cell-wise annotations are summarised as a union in .uns. + if getattr(self._adata_ids_sfaira, k) in adata.obs.keys(): + values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values + elif getattr(self._adata_ids_sfaira, k) in adata.uns.keys(): + values_found = adata.uns[getattr(self._adata_ids_sfaira, k)] + if isinstance(values_found, np.ndarray): + values_found = values_found.tolist() + elif not isinstance(values_found, list): + values_found = [values_found] + if len(values_found) > 1: + print(f"WARNING: subsetting not exact for attribute {k}: {values_found}," + f" discarding data set {dataset}.") + values_found = [] + else: + # Replicate unique property along cell dimension. + values_found = [values_found[0] for i in range(adata.n_obs)] + else: + raise ValueError(f"did not find attribute {k} in data set {dataset}") values_found_unique = np.unique(values_found) try: ontology = getattr(self.ontology_container, k) @@ -208,14 +229,14 @@ def get_subset_idx(adata, k): ]) ] # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. - print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {self.id}") + print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {dataset}") idx = np.where([x in values_found_unique_matched for x in values_found])[0] return idx indices = {} for k, v in self.adatas.items(): idx_old = self.indices[k].tolist() - idx_new = get_subset_idx(adata=v, k=attr_key) + idx_new = get_subset_idx(adata=v, k=attr_key, dataset=k) # Keep intersection of old and new hits. indices[k] = np.array(list(set(idx_old).intersection(set(idx_new)))) return indices diff --git a/sfaira/unit_tests/data/test_store.py b/sfaira/unit_tests/data/test_store.py index 8c9c365bc..2d50aa9e6 100644 --- a/sfaira/unit_tests/data/test_store.py +++ b/sfaira/unit_tests/data/test_store.py @@ -33,6 +33,7 @@ def test_store_config(): ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=True) store = DistributedStore(cache_path=store_path) store.subset(attr_key="assay_sc", values=["10x sequencing"]) + store.subset_cells(attr_key="assay_sc", values=["10x sequencing"]) store.write_config(fn=config_path) store2 = DistributedStore(cache_path=store_path) store2.load_config(fn=config_path) From b7ddeda46c6cfaece0e9b9edd26f310858486f83 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 27 Apr 2021 21:13:27 +0200 Subject: [PATCH 130/161] added skipping to store writing and fixed hcl bug (#247) --- .../human_x_2020_microwellseq_han_x.py | 2 +- sfaira/data/utils_scripts/write_store.py | 33 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py index 2f969cebc..d01646a31 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2157_4/human_x_2020_microwellseq_han_x.py @@ -165,7 +165,7 @@ def load(data_dir, **kwargs): 'AdultEsophagus_2': 'human adult stage', 'AdultFallopiantube_1': 'human adult stage', 'AdultGallbladder_1': 'human adult stage', - 'AdultGallbladder_2': 'gall bladder', + 'AdultGallbladder_2': 'human adult stage', 'AdultHeart_1': 'human adult stage', 'AdultHeart_2': 'human adult stage', 'AdultIleum_2': 'human adult stage', diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index 2c4adfaea..f99e13c73 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -1,3 +1,4 @@ +import os import sfaira import sys @@ -12,17 +13,21 @@ universe = sfaira.data.dataloaders.Universe(data_path=data_path, meta_path=path_meta, cache_path=path_cache) for k, ds in universe.datasets.items(): - print(f"SCRIPT loading {k}") - ds.load( - load_raw=False, - allow_caching=True, - ) - ds.streamline_features( - remove_gene_version=True, - match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, - subset_genes_to_type="protein_coding" - ) - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, - clean_obs_names=True) - ds.write_distributed_store(dir_cache=path_store, store="h5ad", dense=False) - ds.clear() + fn_store = os.path.join(path_store, ds.doi_cleaned_id + ".h5ad") + if os.path.exists(fn_store): + print(f"SCRIPT skipping {k}") + else: + print(f"SCRIPT loading {k}") + ds.load( + load_raw=False, + allow_caching=True, + ) + ds.streamline_features( + remove_gene_version=True, + match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, + subset_genes_to_type="protein_coding" + ) + ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) + ds.write_distributed_store(dir_cache=path_store, store="h5ad", dense=False) + ds.clear() From ca82c32aeefa8de5dc92aa2f945f663ecc86b27a Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 27 Apr 2021 22:03:04 +0200 Subject: [PATCH 131/161] fixed script (#248) --- .../utils_scripts/create_anatomical_configs_store.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sfaira/data/utils_scripts/create_anatomical_configs_store.py b/sfaira/data/utils_scripts/create_anatomical_configs_store.py index 8bc724ada..94ca0e507 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs_store.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs_store.py @@ -3,7 +3,7 @@ import tensorflow as tf # Any data loader here to extract path: -from sfaira.data import DistributedStore +from sfaira.data import DistributedStore, clean_string print(tf.__version__) @@ -14,12 +14,6 @@ config_path = str(sys.argv[2]) -def clean(s): - if s is not None: - s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() - return s - - configs_to_write = { "human": [ "adipose tissue", @@ -87,7 +81,7 @@ def clean(s): "trachea", "urinary bladder", "uterus", - ] + ], } for organism, organs in configs_to_write.items(): @@ -96,4 +90,4 @@ def clean(s): store = DistributedStore(cache_path=store_path) store.subset(attr_key="organism", values=[organism]) store.subset(attr_key="organ", values=[organ]) - store.write_config(os.path.join(config_path, f"config_{clean(organism)}_{clean(organ)}.csv")) + store.write_config(os.path.join(config_path, f"config_{clean_string(organism)}_{clean_string(organ)}.csv")) From 25ade5402dd305f6928dbb1b85ee293547522926 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Tue, 27 Apr 2021 22:49:52 +0200 Subject: [PATCH 132/161] fix development stage annotation in pisco mouse data. closes #245 (#249) --- .../loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 198aaccf5..10170393a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -77,7 +77,7 @@ def __init__(self, **kwargs): self.download_url_meta = None self.cell_types_original_obs_key = "cell_ontology_class" - self.development_stage_obs_key = "development_stage" # not given in all data sets, TODO maybe infer as age? + self.development_stage_obs_key = "development_stage" self.sex_obs_key = "sex" # ToDo: further anatomical information for subtissue in "subtissue"? @@ -98,6 +98,14 @@ def __init__(self, **kwargs): def load(data_dir, sample_fn, **kwargs): + dev_stage_dict = { + "18m": "18 month-old stage", + "1m": "4 weeks", + "21m": "20 month-old stage and over", + "24m": "20 month-old stage and over", + "30m": "20 month-old stage and over", + "3m": "3 month-old stage", + } fn = os.path.join(data_dir, sample_fn) adata = anndata.read_h5ad(fn) adata.X = adata.raw.X @@ -106,5 +114,6 @@ def load(data_dir, sample_fn, **kwargs): adata.obsm = {} adata.varm = {} adata.uns = {} + adata.obs['development_stage'] = [dev_stage_dict[i] for i in adata.obs['age']] return adata From 5b0a7b984017bb02a2bf1f1186c82a94481826a6 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Tue, 27 Apr 2021 22:51:10 +0200 Subject: [PATCH 133/161] Fix meta writing (#246) * adapt meta_writing to new streamlining backend * fix handling of celltype labels in write_meta * fix donload_url_meta property setter * handle combinatorial batch keys correctly in write_meta * handle combinatorial batch keys correctly in write_meta * handle combinatorial batch keys correctly in write_meta --- sfaira/consts/adata_fields.py | 5 ++ sfaira/data/base/dataset.py | 91 ++++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index 7759e5706..f2d11cab1 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -54,6 +54,10 @@ class AdataIds: def controlled_meta_keys(self): return [getattr(self, k) for k in self.obs_keys + self.uns_keys] + @property + def controlled_meta_fields(self): + return [k for k in self.obs_keys + self.uns_keys] + class AdataIdsSfaira(AdataIds): """ @@ -149,6 +153,7 @@ def __init__(self): "download_url_meta", "id", "mapped_features", + "ncells", "normalization", "primary_data", "title", diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 4650b9b97..c8f183de2 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -1104,14 +1104,24 @@ def load_ontology_class_map(self, fn): if self.cell_types_original_obs_key is not None: warnings.warn(f"file {fn} does not exist but cell_types_original_obs_key is given") - def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = None, copy=False): + def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = None, copy=False, update_fields=True): """ Project free text cell type names to ontology based on mapping table. ToDo: add ontology ID setting here. + :param adata_fields: AdataIds instance that holds the column names to use for the annotation + :param copy: If True, a dataframe with the celltype annotation is returned, otherwise self.adata.obs is updated + inplace. + :param update_fields: If True, the celltype-related attributes of this Dataset instance are updated. Basically, + this should always be true, unless self.adata.obs is not updated by (or with the output of) this function. + This includes the following fields: self.cellontology_class_obs_key, self.cell_types_original_obs_key, + self.cellontology_id_obs_key + :return: """ + assert copy or update_fields, "when copy is set to False, update_fields cannot be False" + adata_fields = adata_fields if adata_fields is not None else self._adata_ids results = {} labels_original = self.adata.obs[self.cell_types_original_obs_key].values @@ -1161,11 +1171,14 @@ def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = No ) results[adata_fields.cell_ontology_class] = labels_mapped results[adata_fields.cell_ontology_id] = ids_mapped + if update_fields: + self.cellontology_id_obs_key = adata_fields.cell_ontology_id else: results[adata_fields.cell_ontology_class] = labels_original results[adata_fields.cell_types_original] = labels_original - self.cellontology_class_obs_key = adata_fields.cell_ontology_class - self.cell_types_original_obs_key = adata_fields.cell_types_original + if update_fields: + self.cellontology_class_obs_key = adata_fields.cell_ontology_class + self.cell_types_original_obs_key = adata_fields.cell_types_original if copy: return pd.DataFrame(results, index=self.adata.obs.index) else: @@ -1174,7 +1187,7 @@ def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = No def __project_name_to_id_obs( self, - ontology: Ontology, + ontology: OntologyHierarchical, key_in: Union[str, list], key_out: Union[str, None], map_exceptions: list, @@ -1283,45 +1296,43 @@ def write_meta( if self.adata is None: self.load(load_raw=True, allow_caching=False) # Add data-set wise meta data into table: - meta = pandas.DataFrame({ - self._adata_ids.annotated: self.adata.uns[self._adata_ids.annotated], - self._adata_ids.author: self.adata.uns[self._adata_ids.author], - self._adata_ids.doi: self.adata.uns[self._adata_ids.doi], - self._adata_ids.download_url_data: self.adata.uns[self._adata_ids.download_url_data], - self._adata_ids.download_url_meta: self.adata.uns[self._adata_ids.download_url_meta], - self._adata_ids.id: self.adata.uns[self._adata_ids.id], - self._adata_ids.ncells: self.adata.n_obs, - self._adata_ids.normalization: self.adata.uns[self._adata_ids.normalization], - self._adata_ids.year: self.adata.uns[self._adata_ids.year], - }, index=range(1)) + meta = pandas.DataFrame(index=range(1)) # Expand table by variably cell-wise or data set-wise meta data: - for x in [ - self._adata_ids.assay_sc, - self._adata_ids.assay_differentiation, - self._adata_ids.assay_type_differentiation, - self._adata_ids.bio_sample, - self._adata_ids.cell_line, - self._adata_ids.development_stage, - self._adata_ids.ethnicity, - self._adata_ids.individual, - self._adata_ids.organ, - self._adata_ids.organism, - self._adata_ids.sample_source, - self._adata_ids.sex, - self._adata_ids.state_exact, - self._adata_ids.tech_sample, - ]: - if self.adata.uns[x] == UNS_STRING_META_IN_OBS: - meta[x] = (np.sort(np.unique(self.adata.obs[x].values)),) + for x in self._adata_ids.controlled_meta_fields: + if x in ["cell_types_original", "cell_ontology_class", "cell_ontology_id"]: + continue + elif x in ["bio_sample", "individual", "tech_sample"] and \ + hasattr(self, f"{x}_obs_key") and \ + getattr(self, f"{x}_obs_key") is not None and \ + "*" in getattr(self, f"{x}_obs_key"): + batch_cols = [] + for batch_col in getattr(self, f"{x}_obs_key").split("*"): + if batch_col in self.adata.obs_keys(): + batch_cols.append(batch_col) + else: + # This should not occur in single data set loaders (see warning below) but can occur in + # streamlined data loaders if not all instances of the streamlined data sets have all columns + # in .obs set. + print(f"WARNING: attribute {x} of data set {self.id} was not found in column {batch_col}") + # Build a combination label out of all columns used to describe this group. + meta[getattr(self._adata_ids, x)] = (list(set([ + "_".join([str(xxx) for xxx in xx]) + for xx in zip(*[self.adata.obs[batch_col].values.tolist() for batch_col in batch_cols]) + ])),) + elif hasattr(self, f"{x}_obs_key") and getattr(self, f"{x}_obs_key") is not None: + meta[getattr(self._adata_ids, x)] = (self.adata.obs[getattr(self, f"{x}_obs_key")].unique(),) else: - meta[x] = self.adata.uns[x] + meta[getattr(self._adata_ids, x)] = getattr(self, x) # Add cell types into table if available: - if self._adata_ids.cell_ontology_class in self.adata.obs.keys(): - meta[self._adata_ids.cell_ontology_class] = str(( - np.sort(np.unique(self.adata.obs[self._adata_ids.cell_ontology_class].values)), - )) + if self.cell_types_original_obs_key is not None: + mappings = self.project_celltypes_to_ontology(copy=True, update_fields=False) + meta[self._adata_ids.cell_ontology_class] = (mappings[self._adata_ids.cell_ontology_class].unique(),) + meta[self._adata_ids.cell_ontology_id] = (mappings[self._adata_ids.cell_ontology_id].unique(),) + meta[self._adata_ids.cell_types_original] = (mappings[self._adata_ids.cell_types_original].unique(),) else: meta[self._adata_ids.cell_ontology_class] = " " + meta[self._adata_ids.cell_ontology_id] = " " + meta[self._adata_ids.cell_types_original] = " " meta.to_csv(fn_meta) def set_dataset_id( @@ -1582,7 +1593,7 @@ def download_url_data(self, x: Union[str, None, List[str], Tuple[str], List[None x = [x] if isinstance(x, list): x = (x,) - self._download_url_data = (x,) + self._download_url_data = x @property def download_url_meta(self) -> Union[Tuple[List[str]], Tuple[List[None]]]: @@ -1613,7 +1624,7 @@ def download_url_meta(self, x: Union[str, None, List[str], Tuple[str], List[None x = [x] if isinstance(x, list): x = (x,) - self._download_url_meta = (x,) + self._download_url_meta = x @property def ethnicity(self) -> Union[None, str]: From 8a202607fdb080a479f73a270abe83ddb414a0f6 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Wed, 28 Apr 2021 09:34:32 +0200 Subject: [PATCH 134/161] reduce step size in matrix copying to get tid of "cannot convert integer scalar" scipy error (#251) --- sfaira/data/base/dataset.py | 2 +- sfaira/estimators/keras.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index c8f183de2..79cdba1bf 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -626,7 +626,7 @@ def streamline_features( # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) # ValueError: could not convert integer scalar - step = 2000 + step = 500 if step < len(idx_feature_map): i = 0 for i in range(0, len(idx_feature_map), step): diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 07cddb790..2ccac6b21 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -238,7 +238,7 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) # ValueError: could not convert integer scalar - step = 2000 + step = 500 if step < len(idx_feature_map): for i in range(0, len(idx_feature_map), step): x_new[:, idx_feature_map[i:i + step]] = x[:, i:i + step] From 63f559dff755e378e34bbce256be898211856f01 Mon Sep 17 00:00:00 2001 From: Karin Hrovatin <47607471+Hrovatin@users.noreply.github.com> Date: Wed, 28 Apr 2021 16:40:18 +0200 Subject: [PATCH 135/161] change read from backed=True (r+) to r so that read permission suffices for loading data (#256) --- sfaira/data/base/distributed_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index 15aa18d96..19ca1a14c 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -40,7 +40,7 @@ def __init__(self, cache_path: Union[str, None] = None): if f.split(".")[-1] == "h5ad": adata = anndata.read_h5ad( filename=os.path.join(cache_path, f), - backed=True, + backed="r", ) elif f.split(".")[-1] == "zarr": # TODO this reads into memory! Might need to directly interface the zarr arrays to work with dask. From ca70c5bb23b60e32d348547181957b6339eeee81 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Thu, 29 Apr 2021 21:02:18 +0200 Subject: [PATCH 136/161] =?UTF-8?q?fixed=20store=20to=20estimator=20interf?= =?UTF-8?q?ace=20and=20added=20further=20unit=20tests=20on=20st=E2=80=A6?= =?UTF-8?q?=20(#255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fixed store to estimator interface and added further unit tests on store and estimators * fixed zoo TopologyContainer interface and added .obs into store * adapated zoo handling and model ontology switched to modelclass_name_provider to be more generalisable modelclass is the only constant beyond provider that is really essential because it maps to a unique estimator class * updated store subsetting * fixed cached_store_writing for testing * fixed store-estimator bugs * stabilised size factor computation, fixed store unit tests and added dense conversion for feature indexing in store generator * improved efficiency of generator queries of store and generator usage by estimator keras * id is retained if uns_to_obs is True in streamlining * removed conitnuous batches option from store generator --- sfaira/data/base/dataset.py | 8 +- sfaira/data/base/distributed_store.py | 252 +++++++++------ sfaira/estimators/keras.py | 304 +++++++++-------- sfaira/interface/__init__.py | 2 +- sfaira/interface/model_zoo.py | 306 ++++++------------ sfaira/interface/user_interface.py | 10 +- sfaira/train/train_model.py | 65 ++-- sfaira/unit_tests/data/test_dataset.py | 5 +- sfaira/unit_tests/data/test_store.py | 82 +++-- .../unit_tests/estimators/test_estimator.py | 296 +++++++++++++---- .../interface/test_userinterface.py | 9 +- sfaira/unit_tests/interface/test_zoo.py | 31 ++ sfaira/unit_tests/trainer/__init__.py | 0 sfaira/unit_tests/trainer/test_trainer.py | 78 +++++ sfaira/unit_tests/utils.py | 53 +++ 15 files changed, 916 insertions(+), 585 deletions(-) create mode 100644 sfaira/unit_tests/interface/test_zoo.py create mode 100644 sfaira/unit_tests/trainer/__init__.py create mode 100644 sfaira/unit_tests/trainer/test_trainer.py create mode 100644 sfaira/unit_tests/utils.py diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 79cdba1bf..cc335a10b 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -674,7 +674,8 @@ def streamline_metadata( :param schema: Export format. - "sfaira" - "cellxgene" - :param uns_to_obs: Whether to move metadata in .uns to .obs to make sure it's not lost when concatenating multiple objects. + :param uns_to_obs: Whether to move metadata in .uns to .obs to make sure it's not lost when concatenating + multiple objects. Retains .id in .uns. :param clean_obs: Whether to delete non-streamlined fields in .obs, .obsm and .obsp. :param clean_var: Whether to delete non-streamlined fields in .var, .varm and .varp. :param clean_uns: Whether to delete non-streamlined fields in .uns. @@ -909,7 +910,9 @@ def streamline_metadata( for k, v in self.adata.uns.items(): if k not in self.adata.obs_keys(): self.adata.obs[k] = [v for i in range(self.adata.n_obs)] - self.adata.uns = {} + # Retain only target uns keys in .uns. + self.adata.uns = dict([(k, v) for k, v in self.adata.uns.items() + if k in [getattr(adata_target_ids, kk) for kk in ["id"]]]) self._adata_ids = adata_target_ids # set new adata fields to class after conversion self.streamlined_meta = True @@ -948,6 +951,7 @@ def write_distributed_store( f"data, found {type(self.adata.X)}") fn = os.path.join(dir_cache, self.doi_cleaned_id + ".h5ad") as_dense = ("X",) if dense else () + print(f"writing {self.adata.shape} into {fn}") self.adata.write_h5ad(filename=fn, as_dense=as_dense, **compression_kwargs) elif store == "zarr": fn = os.path.join(dir_cache, self.doi_cleaned_id) diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index 19ca1a14c..7d6796e63 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -8,6 +8,7 @@ from sfaira.consts import AdataIdsSfaira, OCS from sfaira.data.base.dataset import is_child, UNS_STRING_META_IN_OBS +from sfaira.versions.genomes import GenomeContainer from sfaira.versions.metadata import CelltypeUniverse @@ -20,7 +21,7 @@ class DistributedStore: indices: Dict[str, np.ndarray] - def __init__(self, cache_path: Union[str, None] = None): + def __init__(self, cache_path: Union[str, os.PathLike, None] = None): """ This class is instantiated on a cache directory which contains pre-processed files in rapid access format. @@ -30,6 +31,7 @@ def __init__(self, cache_path: Union[str, None] = None): - zarr :param cache_path: Directory in which pre-processed .h5ad files lie. + :param genome_container: GenomeContainer with target features space defined. """ # Collect all data loaders from files in directory: adatas = {} @@ -53,72 +55,122 @@ def __init__(self, cache_path: Union[str, None] = None): self.adatas = adatas self.indices = indices self.ontology_container = OCS + self._genome_container = None self._adata_ids_sfaira = AdataIdsSfaira() self._celltype_universe = None @property def adata(self): - return list(self.adatas.values)[0].concatenate( - list(self.adatas.values)[1:] + return self.adatas[list(self.adatas.keys())[0]].concatenate( + *[self.adatas[k] for k in list(self.adatas.keys())[1:]], + batch_key="dataset_id", + batch_categories=list(self.adatas.keys()), ) + @property + def genome_container(self) -> Union[GenomeContainer, None]: + return self._genome_container + + @genome_container.setter + def genome_container(self, x: GenomeContainer): + var_names = self.__validate_feature_space_homogeneity() + # Validate genome container choice: + # Make sure that all var names defined in genome container are also contained in loaded data sets. + assert np.all([y in var_names for y in x.ensembl]), \ + "did not find variable names from genome container in store" + self._genome_container = x + + def __validate_feature_space_homogeneity(self) -> List[str]: + """ + Assert that the data sets which were kept have the same feature names. + """ + var_names = self.adatas[list(self.adatas.keys())[0]].var_names.tolist() + for k, v in self.adatas.items(): + assert len(var_names) == len(v.var_names), f"number of features in store differed in object {k}" + assert np.all(var_names == v.var_names), f"var_names in store were not matched in object {k}" + return var_names + def generator( self, + idx: Union[np.ndarray, None] = None, batch_size: int = 1, obs_keys: List[str] = [], - continuous_batches: bool = True, + return_dense: bool = True, ) -> iter: """ Yields an unbiased generator over observations in the contained data sets. - :param batch_size: Number of observations in each batch (generator invocation). + :param idx: Global idx to query from store. These is an array with indicies corresponding to a contiuous index + along all observations in self.adatas, ordered along a hypothetical concatenation along the keys of + self.adatas. + :param batch_size: Number of observations in each batch (generator invocation). Increasing this may result in + large speed-ups in query time but removes the ability of upstream generators to fully shuffle cells, as + these batches are the smallest data unit that upstream generators can access. :param obs_keys: .obs columns to return in the generator. These have to be a subset of the columns available in self.adatas. - :param continuous_batches: Whether to build batches of batch_size across data set boundaries if end of one - data set is reached. + :param return_dense: Whether to return count data .X as dense batches. This allows more efficient feature + indexing if the store is sparse (column indexing on csr matrices is slow). :return: Generator function which yields batch_size at every invocation. The generator returns a tuple of (.X, .obs) with types: - - if store format is h5ad: (scipy.sparse.csr_matrix, pandas.DataFrame) + - if store format is h5ad: (Union[scipy.sparse.csr_matrix, np.ndarray], pandas.DataFrame) """ + # Make sure that features are ordered in the same way in each object so that generator yields consistent cell + # vectors. + _ = self.__validate_feature_space_homogeneity() + var_names_store = self.adatas[list(self.adatas.keys())[0]].var_names.tolist() + # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. + if self.genome_container is not None: + var_names_target = self.genome_container.ensembl + var_idx = np.sort([var_names_store.index(x) for x in var_names_target]) + # Check if index vector is just full ordered list of indices, in this case, sub-setting is unnecessary. + if len(var_idx) == len(var_names_store) and np.all(var_idx == np.arange(0, len(var_names_store))): + var_idx = None + else: + var_idx = None def generator() -> tuple: - n_datasets = len(list(self.adatas.keys())) - x_last = None - obs_last = None + global_index_set = dict(list(zip(list(self.adatas.keys()), self.indices_global))) for i, (k, v) in enumerate(self.adatas.items()): # Define batch partitions: - if continuous_batches and x_last is not None: - # Prepend data set with residual data from last data set. - remainder_start = x_last.shape[0] - n_obs = v.n_obs + remainder_start + # Get subset of target indices that fall into this data set. + # Use indices relative to this data (via .index here). + # continuous_slices is evaluated to establish whether slicing can be performed as the potentially + # faster [start:end] or needs to tbe index wise [indices] + if idx is not None: + idx_i = [global_index_set[k].tolist().index(x) for x in idx if x in global_index_set[k]] + idx_i = np.sort(idx_i) + continuous_slices = np.all(idx_i == np.arange(0, v.n_obs)) else: - # Partition into equally sized batches up to last batch. - remainder_start = 0 - n_obs = v.n_obs - remainder = n_obs % batch_size - batch_starts = [ - np.min([0, int(x * batch_size - remainder_start)]) - for x in np.arange(1, n_obs // batch_size + int(remainder > 0)) - ] - n_batches = len(batch_starts) - # Iterate over batches: - for j, x in enumerate(batch_starts): - batch_end = int(x + batch_size) - x = v.X[x:batch_end, :] - obs = v.obs[obs_keys].iloc[x:batch_end, :] - assert isinstance(x, scipy.sparse.csr_matrix), f"{type(x)}" - assert isinstance(obs, pd.DataFrame), f"{type(obs)}" - if continuous_batches and remainder > 0 and i < (n_datasets - 1) and j == (n_batches - 1): - # Cache incomplete last batch to append to next first batch of next data set. - x_last = x - obs_last = obs - elif continuous_batches and x_last is not None: - # Append last incomplete batch current batch. - x = scipy.sparse.hstack(blocks=[x_last, x], format="csr") - obs = pd.concat(objs=[obs_last, obs], axis=0) - yield x, obs - else: + idx_i = np.arange(0, v.n_obs) + continuous_slices = True + if len(idx_i) > 0: # Skip data objects without matched cells. + n_obs = len(idx_i) + # Cells left over after batching to batch size, accounting for overhang: + remainder = n_obs % batch_size + batch_starts_ends = [ + (int(x * batch_size), int(x * batch_size) + batch_size) + for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) + ] + # Iterate over batches: + for j, (s, e) in enumerate(batch_starts_ends): + if continuous_slices: + e = idx_i[e] if e < n_obs else n_obs + x = v.X[idx_i[s]:e, :] + else: + x = v.X[idx_i[s:e], :] + # Do dense conversion now so that col-wise indexing is not slow, often, dense conversion + # would be done later anyway. + if return_dense: + x = x.todense() + if var_idx is not None: + x = x[:, var_idx] + if continuous_slices: + e = idx_i[e] if e < n_obs else n_obs + obs = v.obs[obs_keys].iloc[idx_i[s]:e, :] + else: + obs = v.obs[obs_keys].iloc[idx_i[s:e], :] + assert isinstance(obs, pd.DataFrame), f"{type(obs)}" # Yield current batch. yield x, obs @@ -134,45 +186,7 @@ def celltypes_universe(self) -> CelltypeUniverse: ) return self._celltype_universe - def subset(self, attr_key, values): - """ - Subset list of adata objects based on match to values in key property. - - Keys need to be available in adata.uns - - :param attr_key: Property to subset by. - :param values: Classes to overlap to. - :return: - """ - if isinstance(values, np.ndarray): - values = values.tolist() - if isinstance(values, tuple): - values = list(values) - if not isinstance(values, list): - values = [values] - # Get ontology container to be able to do relational reasoning: - ontology = getattr(self.ontology_container, attr_key) - for k in list(self.adatas.keys()): - if getattr(self._adata_ids_sfaira, attr_key) in self.adatas[k].uns.keys(): - if getattr(self._adata_ids_sfaira, attr_key) != UNS_STRING_META_IN_OBS: - values_found = self.adatas[k].uns[getattr(self._adata_ids_sfaira, attr_key)] - else: - values_found = self.adatas[k].obs[getattr(self._adata_ids_sfaira, attr_key)].values.tolist() - if not isinstance(values_found, list): - values_found = [values_found] - if not np.any([ - np.any([ - is_child(query=x, ontology=ontology, ontology_parent=y) - for y in values - ]) for x in values_found - ]): - # Delete entries which a non-matching meta data value associated with this item. - del self.adatas[k] - else: - # Delete entries which did not have this key annotated. - del self.adatas[k] - - def subset_cells_idx(self, attr_key, values: Union[str, List[str]]): + def _get_subset_idx(self, attr_key, values: Union[str, List[str]]): """ Get indices of subset list of adata objects based on cell-wise properties. @@ -197,25 +211,27 @@ def subset_cells_idx(self, attr_key, values: Union[str, List[str]]): values = [values] def get_subset_idx(adata, k, dataset): - # Try to look first in cell wise annotation to use cell-wise map if data set-wide maps are ambiguous: + # Use cell-wise annotation if data set-wide maps are ambiguous: # This can happen if the different cell-wise annotations are summarised as a union in .uns. - if getattr(self._adata_ids_sfaira, k) in adata.obs.keys(): - values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values - elif getattr(self._adata_ids_sfaira, k) in adata.uns.keys(): + if getattr(self._adata_ids_sfaira, k) in adata.uns.keys() and \ + adata.uns[getattr(self._adata_ids_sfaira, k)] != UNS_STRING_META_IN_OBS: values_found = adata.uns[getattr(self._adata_ids_sfaira, k)] if isinstance(values_found, np.ndarray): values_found = values_found.tolist() elif not isinstance(values_found, list): values_found = [values_found] if len(values_found) > 1: - print(f"WARNING: subsetting not exact for attribute {k}: {values_found}," - f" discarding data set {dataset}.") - values_found = [] + values_found = None # Go to cell-wise annotation. else: # Replicate unique property along cell dimension. values_found = [values_found[0] for i in range(adata.n_obs)] else: - raise ValueError(f"did not find attribute {k} in data set {dataset}") + values_found = None + if values_found is None: + if getattr(self._adata_ids_sfaira, k) in adata.obs.keys(): + values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values + else: + raise ValueError(f"did not find unique attribute {k} in data set {dataset}") values_found_unique = np.unique(values_found) try: ontology = getattr(self.ontology_container, k) @@ -238,10 +254,10 @@ def get_subset_idx(adata, k, dataset): idx_old = self.indices[k].tolist() idx_new = get_subset_idx(adata=v, k=attr_key, dataset=k) # Keep intersection of old and new hits. - indices[k] = np.array(list(set(idx_old).intersection(set(idx_new)))) + indices[k] = np.asarray(list(set(idx_old).intersection(set(idx_new))), dtype="int32") return indices - def subset_cells(self, attr_key, values: Union[str, List[str]]): + def subset(self, attr_key, values: Union[str, List[str]]): """ Subset list of adata objects based on cell-wise properties. @@ -263,13 +279,13 @@ def subset_cells(self, attr_key, values: Union[str, List[str]]): - "state_exact" points to self.state_exact_obs_key :param values: Classes to overlap to. """ - self.indices = self.subset_cells_idx(attr_key=attr_key, values=values) + self.indices = self._get_subset_idx(attr_key=attr_key, values=values) for k, v in self.indices.items(): if v.shape[0] == 0: # No observations (cells) left. del self.adatas[k] - def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]): + def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]) -> np.ndarray: """ Get indices of subset list of adata objects based on cell-wise properties treating instance as single array. @@ -293,15 +309,27 @@ def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]): :return Index vector """ # Get indices of of cells in target set by file. - idx_by_dataset = self.subset_cells_idx(attr_key=attr_key, values=values) + idx_by_dataset = self._get_subset_idx(attr_key=attr_key, values=values) # Translate file-wise indices into global index list across all data sets. idx = [] counter = 0 + for k, v in idx_by_dataset.items(): + idx.extend((v + counter).tolist()) + counter += self.adatas[k].n_obs + return np.asarray(idx) + + @property + def indices_global(self): + """ + Increasing indices across data sets which can be concatenated into a single index vector with unique entries + for cells. + """ + counter = 0 + indices = [] for k, v in self.adatas.items(): - idx_k = np.arange(counter, counter + v.n_obs) - idx.extend(idx_k[idx_by_dataset[k]]) + indices.append(np.arange(counter, counter + v.n_obs)) counter += v.n_obs - return idx + return indices def write_config(self, fn: Union[str, os.PathLike]): """ @@ -331,11 +359,37 @@ def load_config(self, fn: Union[str, os.PathLike]): # Only retain data sets with which are mentioned in config file. self.subset(attr_key="id", values=list(self.indices.keys())) + @property + def var_names(self): + var_names = self.__validate_feature_space_homogeneity() + # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. + if self.genome_container is None: + return var_names + else: + return self.genome_container.ensembl + @property def n_vars(self): - # assumes that all adata - return list(self.adatas.values())[0].n_vars + var_names = self.__validate_feature_space_homogeneity() + # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. + if self.genome_container is None: + return len(var_names) + else: + return self.genome_container.n_var @property def n_obs(self): - return np.sum([len(v) for _, v in self.indices]) + return np.sum([len(v) for v in self.indices.values()]) + + @property + def shape(self): + return [self.n_obs, self.n_vars] + + @property + def obs(self) -> pd.DataFrame: + """ + Assemble .obs table of subset of full data. + + :return: .obs data frame. + """ + return pd.concat([v.obs for v in self.adatas.values()], axis=0) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 2ccac6b21..0f504bc3c 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -2,6 +2,7 @@ import anndata import hashlib import numpy as np +import pandas as pd import scipy.sparse try: import tensorflow as tf @@ -23,13 +24,16 @@ def prepare_sf(x): + """ + Uses a minimal size factor of 1e-3 for total counts / 1e4 + """ if len(x.shape) == 2: sf = np.asarray(x.sum(axis=1)).flatten() elif len(x.shape) == 1: sf = np.asarray(x.sum()).flatten() else: raise ValueError("x.shape > 2") - sf = np.log(sf / 1e4 + 1e-10) + sf = np.log(np.maximum(sf / 1e4, 1e-3)) return sf @@ -65,6 +69,9 @@ def __init__( self.model_id = model_id self.model_class = model_class self.topology_container = model_topology + # Prepare store with genome container sub-setting: + if isinstance(self.data, DistributedStore): + self.data.genome_container = self.topology_container.gc self.history = None self.train_hyperparam = None @@ -176,8 +183,9 @@ def _get_dataset( batch_size: Union[int, None], mode: str, shuffle_buffer_size: int, - prefetch: int, - weighted: bool + cache_full: bool, + weighted: bool, + retrieval_batch_size: int, ): pass @@ -195,7 +203,14 @@ def _get_class_dict( label_dict.update({label: float(i)}) return label_dict - def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): + def _prepare_data_matrix(self, idx: Union[np.ndarray, None]) -> scipy.sparse.csr_matrix: + """ + Subsets observations x features matrix in .data to observation indices (idx, the split) and features defined + by topology. + + :param idx: Observation index split. + :return: Data matrix + """ # Check that AnnData is not backed. If backed, assume that these processing steps were done before. if self.data.isbacked: raise ValueError("tried running backed AnnData object through standard pipeline") @@ -224,35 +239,19 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]): return x # Compute indices of genes to keep - data_ids = self.data.var[self._adata_ids.gene_id_ensembl].values - idx_feature_kept = np.where([x in self.topology_container.gc.ensembl for x in data_ids])[0] - idx_feature_map = np.array([self.topology_container.gc.ensembl.index(x) - for x in data_ids[idx_feature_kept]]) - - # Convert to csc and remove unmapped genes - x = x.tocsc() - x = x[:, idx_feature_kept] - - # Create reordered feature matrix based on reference and convert to csr - x_new = scipy.sparse.csc_matrix((x.shape[0], self.topology_container.n_var), dtype=x.dtype) - # copying this over to the new matrix in chunks of size `steps` prevents a strange scipy error: - # ... scipy/sparse/compressed.py", line 922, in _zero_many i, j, offsets) - # ValueError: could not convert integer scalar - step = 500 - if step < len(idx_feature_map): - for i in range(0, len(idx_feature_map), step): - x_new[:, idx_feature_map[i:i + step]] = x[:, i:i + step] - x_new[:, idx_feature_map[i + step:]] = x[:, i + step:] - else: - x_new[:, idx_feature_map] = x - - x_new = x_new.tocsr() - - print(f"found {len(idx_feature_kept)} intersecting features between {x.shape[1]} " - f"features in input data set and {self.topology_container.n_var} features in reference genome") - print(f"found {x_new.shape[0]} observations") - - return x_new + data_ids = self.data.var[self._adata_ids.gene_id_ensembl].values.tolist() + target_ids = self.topology_container.gc.ensembl + idx_map = np.array([data_ids.index(z) for z in target_ids]) + # Assert that each ID from target IDs appears exactly once in data IDs: + assert np.all([z in data_ids for z in target_ids]), "not all target feature IDs found in data" + assert np.all([np.sum(z == np.array(data_ids)) <= 1. for z in target_ids]), \ + "duplicated target feature IDs exist in data" + # Map feature space. + x = x[:, idx_map] + print(f"found {len(idx_map)} intersecting features between {x.shape[1]} features in input data set and" + f" {self.topology_container.n_var} features in reference genome") + print(f"found {x.shape[0]} observations") + return x @abc.abstractmethod def _get_loss(self): @@ -283,6 +282,7 @@ def train( test_split: Union[float, dict] = 0., validation_batch_size: int = 256, max_validation_steps: Union[int, None] = 10, + cache_full: bool = False, patience: int = 20, lr_schedule_min_lr: float = 1e-5, lr_schedule_factor: float = 0.2, @@ -290,7 +290,7 @@ def train( shuffle_buffer_size: int = int(1e4), log_dir: Union[str, None] = None, callbacks: Union[list, None] = None, - weighted: bool = True, + weighted: bool = False, verbose: int = 2 ): """ @@ -350,7 +350,7 @@ def train( } # Set callbacks. - cbs = [] + cbs = [tf.keras.callbacks.TerminateOnNaN()] if patience is not None and patience > 0: cbs.append(tf.keras.callbacks.EarlyStopping( monitor='val_loss', @@ -391,24 +391,19 @@ def train( if isinstance(test_split, float) or isinstance(test_split, int): self.idx_test = np.random.choice( a=all_idx, - size=round(self.data.shape[0] * test_split), + size=round(self.data.n_obs * test_split), replace=False, ) elif isinstance(test_split, dict): - if isinstance(self.data, anndata.AnnData): - in_test = np.ones((self.data.obs.shape[0],), dtype=int) == 1 - for k, v in test_split.items(): - if isinstance(v, list): - in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) - else: - in_test = np.logical_and(in_test, self.data.obs[k].values == v) - self.idx_test = np.where(in_test)[0] - print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") - print(self.idx_test) - else: - assert len(test_split.values()) == 1 - self.idx_test = self.data.subset_cells_idx_global(attr_key=list(test_split.keys())[0], - values=list(test_split.values())[0]) + in_test = np.ones((self.data.n_obs,), dtype=int) == 1 + for k, v in test_split.items(): + if isinstance(v, list): + in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) + else: + in_test = np.logical_and(in_test, self.data.obs[k].values == v) + self.idx_test = np.where(in_test)[0] + print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") + print(self.idx_test) else: raise ValueError("type of test_split %s not recognized" % type(test_split)) idx_train_eval = np.array([x for x in all_idx if x not in self.idx_test]) @@ -434,14 +429,16 @@ def train( batch_size=batch_size, mode='train', shuffle_buffer_size=min(shuffle_buffer_size, len(self.idx_train)), - weighted=weighted + weighted=weighted, + cache_full=cache_full, ) eval_dataset = self._get_dataset( idx=self.idx_eval, batch_size=validation_batch_size, mode='train_val', shuffle_buffer_size=min(shuffle_buffer_size, len(self.idx_eval)), - weighted=weighted + weighted=weighted, + cache_full=cache_full, ) steps_per_epoch = min(max(len(self.idx_train) // batch_size, 1), max_steps_per_epoch) @@ -469,6 +466,18 @@ def get_citations(self): def using_store(self) -> bool: return isinstance(self.data, DistributedStore) + @property + def obs_train(self): + return self.data.obs.iloc[self.idx_train, :] + + @property + def obs_eval(self): + return self.data.obs.iloc[self.idx_eval, :] + + @property + def obs_test(self): + return self.data.obs.iloc[self.idx_test, :] + class EstimatorKerasEmbedding(EstimatorKeras): """ @@ -539,6 +548,7 @@ def _get_base_generator( self, generator_helper, idx: Union[np.ndarray, None], + batch_size: int = 1, ): """ Yield a basic generator based on which a tf dataset can be built. @@ -557,30 +567,40 @@ def _get_base_generator( # Prepare data reading according to whether anndata is backed or not: if self.using_store: generator_raw = self.data.generator( - batch_size=1, + idx=idx, + batch_size=batch_size, obs_keys=[], - continuous_batches=True, + return_dense=True, ) def generator(): - counter = -1 - for z in generator_raw: - counter += 1 - if counter in idx: - x_sample = z[0].toarray().flatten() - yield generator_helper(x_sample=x_sample) + for z in generator_raw(): + x_sample = z[0] + if isinstance(x_sample, scipy.sparse.csr_matrix): + x_sample = x_sample.todense() + x_sample = np.asarray(x_sample) + for i in range(x_sample.shape[0]): + yield generator_helper(x_sample=x_sample[i]) n_features = self.data.n_vars n_samples = self.data.n_obs else: x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + indices = idx if self.data.isbacked else range(x.shape[0]) + n_obs = len(indices) + remainder = n_obs % batch_size + batch_starts_ends = [ + (int(x * batch_size), int(x * batch_size) + batch_size) + for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) + ] def generator(): is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) - indices = idx if self.data.isbacked else range(x.shape[0]) - for i in indices: - x_sample = x[i, :].toarray().flatten() if is_sparse else x[i, :].flatten() - yield generator_helper(x_sample=x_sample) + for s, e in batch_starts_ends: + x_sample = np.asarray(x[indices[s:e], :].todense()) if is_sparse \ + else x[indices[s:e], :] + for i in range(x_sample.shape[0]): + yield generator_helper(x_sample=x_sample[i]) n_features = x.shape[1] n_samples = x.shape[0] @@ -593,8 +613,9 @@ def _get_dataset( batch_size: Union[int, None], mode: str, shuffle_buffer_size: int = int(1e7), - prefetch: int = 10, + cache_full: bool = False, weighted: bool = False, + retrieval_batch_size: int = 128, ): """ @@ -621,6 +642,7 @@ def generator_helper(x_sample): generator, n_samples, n_features = self._get_base_generator( generator_helper=generator_helper, idx=idx, + batch_size=retrieval_batch_size, ) output_types, output_shapes = self._get_output_dim(n_features=n_features, model_type=model_type, mode=mode) dataset = tf.data.Dataset.from_generator( @@ -628,6 +650,8 @@ def generator_helper(x_sample): output_types=output_types, output_shapes=output_shapes ) + if cache_full: + dataset = dataset.cache() # Only shuffle in train modes if mode in ['train', 'train_val']: dataset = dataset.repeat() @@ -635,30 +659,31 @@ def generator_helper(x_sample): buffer_size=min(n_samples, shuffle_buffer_size), seed=None, reshuffle_each_iteration=True) - dataset = dataset.batch(batch_size).prefetch(prefetch) + dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) return dataset - elif mode == 'gradient_method': + elif mode == 'gradient_method': # TODO depreceate this code # Prepare data reading according to whether anndata is backed or not: cell_to_class = self._get_class_dict(obs_key=self._adata_ids.cell_ontology_class) if self.using_store: n_features = self.data.n_vars generator_raw = self.data.generator( + idx=idx, batch_size=1, obs_keys=["cell_ontology_class"], - continuous_batches=True, + return_dense=True, ) def generator(): - counter = -1 - for z in generator_raw: - counter += 1 - if counter in idx: - x_sample = z[0].toarray().flatten() - sf_sample = prepare_sf(x=x_sample)[0] - y_sample = z[1]["cell_ontology_class"].values[0] - yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) + for z in generator_raw(): + x_sample = z[0] + if isinstance(x_sample, scipy.sparse.csr_matrix): + x_sample = x_sample.todense() + x_sample = np.asarray(x_sample).flatten() + sf_sample = prepare_sf(x=x_sample)[0] + y_sample = z[1]["cell_ontology_class"].values[0] + yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) elif isinstance(self.data, anndata.AnnData) and self.data.isbacked: n_features = self.data.X.shape[1] @@ -691,7 +716,7 @@ def generator(): buffer_size=shuffle_buffer_size, seed=None, reshuffle_each_iteration=True - ).batch(batch_size).prefetch(prefetch) + ).batch(batch_size).prefetch(tf.data.AUTOTUNE) return dataset @@ -752,7 +777,8 @@ def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): dataset = self._get_dataset( idx=idx, batch_size=batch_size, - mode='eval' + mode='eval', + retrieval_batch_size=128, ) steps = min(max(len(idx) // batch_size, 1), max_steps) results = self.model.training_model.evaluate(x=dataset, steps=steps) @@ -760,7 +786,7 @@ def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): else: return {} - def evaluate(self, batch_size: int = 1, max_steps: int = np.inf): + def evaluate(self, batch_size: int = 64, max_steps: int = np.inf): """ Evaluate the custom model on test data. @@ -772,7 +798,7 @@ def evaluate(self, batch_size: int = 1, max_steps: int = np.inf): """ return self.evaluate_any(idx=self.idx_test, batch_size=batch_size, max_steps=max_steps) - def predict(self): + def predict(self, batch_size: int = 64, max_steps: int = np.inf): """ return the prediction of the model @@ -782,14 +808,15 @@ def predict(self): if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None dataset = self._get_dataset( idx=self.idx_test, - batch_size=64, - mode='predict' + batch_size=batch_size, + mode='predict', + retrieval_batch_size=128, ) return self.model.predict_reconstructed(x=dataset) else: return np.array([]) - def predict_embedding(self): + def predict_embedding(self, batch_size: int = 64, max_steps: int = np.inf): """ return the prediction in the latent space (z_mean for variational models) @@ -799,14 +826,15 @@ def predict_embedding(self): if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None dataset = self._get_dataset( idx=self.idx_test, - batch_size=64, - mode='predict' + batch_size=batch_size, + mode='predict', + retrieval_batch_size=128, ) return self.model.predict_embedding(x=dataset, variational=False) else: return np.array([]) - def predict_embedding_variational(self): + def predict_embedding_variational(self, batch_size: int = 64, max_steps: int = np.inf): """ return the prediction of z, z_mean, z_log_var in the variational latent space @@ -816,8 +844,9 @@ def predict_embedding_variational(self): if self.idx_test is None or self.idx_test: # true if the array is not empty or if the passed value is None dataset = self._get_dataset( idx=self.idx_test, - batch_size=64, - mode='predict' + batch_size=batch_size, + mode='predict', + retrieval_batch_size=128, ) return self.model.predict_embedding(x=dataset, variational=True) else: @@ -979,15 +1008,21 @@ def ontology_ids(self): def _one_hot_encoder(self): - def encoder(x): - idx = self.celltype_universe.onto_cl.map_to_leaves( - node=x, - return_type="idx", - include_self=True, - ) - y = np.zeros((self.ntypes,), dtype="float32") - y[idx] = 1. / len(idx) - return y + def encoder(x) -> np.ndarray: + if isinstance(x, str): + x = [x] + idx = [ + self.celltype_universe.onto_cl.map_to_leaves( + node=y, + return_type="idx", + include_self=True, + ) + for y in x + ] + oh = np.zeros((len(x), self.ntypes,), dtype="float32") + for i, y in enumerate(idx): + oh[i, y] = 1. / len(y) + return oh return encoder @@ -1006,7 +1041,7 @@ def _get_celltype_out( # One whether "unknown" is already included, otherwise add one extra column. onehot_encoder = self._one_hot_encoder() y = np.concatenate([ - np.expand_dims(onehot_encoder(z), axis=0) + onehot_encoder(z) for z in self.data.obs[self._adata_ids.cell_ontology_class].values[idx].tolist() ], axis=0) # Distribute aggregated class weight for computation of weights: @@ -1039,6 +1074,7 @@ def _get_base_generator( generator_helper, idx: Union[np.ndarray, None], weighted: bool = False, + batch_size: int = 1, ): """ Yield a basic generator based on which a tf dataset can be built. @@ -1061,20 +1097,22 @@ def _get_base_generator( if weighted: raise ValueError("using weights with store is not supported yet") generator_raw = self.data.generator( - batch_size=1, + idx=idx, + batch_size=batch_size, obs_keys=["cell_ontology_class"], - continuous_batches=True, + return_dense=True, ) onehot_encoder = self._one_hot_encoder() def generator(): - counter = -1 - for z in generator_raw: - counter += 1 - if counter in idx: - x_sample = z[0].toarray().flatten() - y_sample = onehot_encoder(z[0]["cell_ontology_class"].values[0]) - yield generator_helper(x_sample, y_sample, 1.) + for z in generator_raw(): + x_sample = z[0] + if isinstance(x_sample, scipy.sparse.csr_matrix): + x_sample = x_sample.todense() + x_sample = np.asarray(x_sample) + y_sample = onehot_encoder(z[1]["cell_ontology_class"].values) + for i in range(x_sample.shape[0]): + yield generator_helper(x_sample[i], y_sample[i], 1.) n_features = self.data.n_vars n_samples = self.data.n_obs @@ -1084,15 +1122,23 @@ def generator(): if not weighted: weights = np.ones_like(weights) x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) + is_sparse = isinstance(x, scipy.sparse.spmatrix) + indices = idx if self.data.isbacked else range(x.shape[0]) + n_obs = len(indices) + remainder = n_obs % batch_size + batch_starts_ends = [ + (int(x * batch_size), int(x * batch_size) + batch_size) + for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) + ] def generator(): - is_sparse = isinstance(x[0, :], scipy.sparse.spmatrix) - indices = idx if self.data.isbacked else range(x.shape[0]) - for i in indices: - x_sample = np.asarray(x[i, :].todense()).flatten() if is_sparse else x[i, :].flatten() - y_sample = y[i, :] - w_sample = weights[i] - yield generator_helper(x_sample, y_sample, w_sample) + for s, e in batch_starts_ends: + x_sample = np.asarray(x[indices[s:e], :].todense()) if is_sparse \ + else x[indices[s:e], :] + y_sample = y[indices[s:e], :] + w_sample = weights[indices[s:e]] + for i in range(x_sample.shape[0]): + yield generator_helper(x_sample[i], y_sample[i], w_sample[i]) n_features = x.shape[1] n_samples = x.shape[0] @@ -1106,8 +1152,9 @@ def _get_dataset( batch_size: Union[int, None], mode: str, shuffle_buffer_size: int = int(1e7), - prefetch: int = 10, - weighted: bool = True, + cache_full: bool = False, + weighted: bool = False, + retrieval_batch_size: int = 128, ): """ @@ -1129,6 +1176,7 @@ def generator_helper(x_sample, y_sample, w_sample): generator_helper=generator_helper, idx=idx, weighted=weighted, + batch_size=retrieval_batch_size, ) output_types, output_shapes = self._get_output_dim(n_features=n_features, n_labels=n_labels, mode=mode) dataset = tf.data.Dataset.from_generator( @@ -1136,6 +1184,8 @@ def generator_helper(x_sample, y_sample, w_sample): output_types=output_types, output_shapes=output_shapes ) + if cache_full: + dataset = dataset.cache() if mode == 'train' or mode == 'train_val': dataset = dataset.repeat() dataset = dataset.shuffle( @@ -1143,7 +1193,7 @@ def generator_helper(x_sample, y_sample, w_sample): seed=None, reshuffle_each_iteration=True ) - dataset = dataset.batch(batch_size).prefetch(prefetch) + dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) return dataset @@ -1160,11 +1210,7 @@ def _metrics(self): CustomTprClasswise(k=self.ntypes) ] - def predict( - self, - batch_size: int = 1, - max_steps: int = np.inf, - ): + def predict(self, batch_size: int = 64, max_steps: int = np.inf): """ Return the prediction of the model @@ -1177,14 +1223,15 @@ def predict( dataset = self._get_dataset( idx=idx, batch_size=batch_size, - mode='predict' + mode='predict', + retrieval_batch_size=128, ) steps = min(max(len(idx) // batch_size, 1), max_steps) return self.model.training_model.predict(x=dataset, steps=steps) else: return np.array([]) - def ytrue(self): + def ytrue(self, batch_size: int = 64, max_steps: int = np.inf): """ Return the true labels of the test set. @@ -1193,7 +1240,7 @@ def ytrue(self): if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None x, y, w = self._get_dataset( idx=self.idx_test, - batch_size=None, + batch_size=batch_size, mode='eval' ) return y @@ -1205,7 +1252,7 @@ def evaluate_any( idx, batch_size: int = 1, max_steps: int = np.inf, - weighted: bool = True + weighted: bool = False ): """ Evaluate the custom model on any local data. @@ -1224,7 +1271,8 @@ def evaluate_any( idx=idx, batch_size=batch_size, mode='eval', - weighted=weighted + weighted=weighted, + retrieval_batch_size=128, ) steps = min(max(len(idx) // batch_size, 1), max_steps) results = self.model.training_model.evaluate(x=dataset, steps=steps) @@ -1232,7 +1280,7 @@ def evaluate_any( else: return {} - def evaluate(self, batch_size: int = 1, max_steps: int = np.inf, weighted: bool = True): + def evaluate(self, batch_size: int = 64, max_steps: int = np.inf, weighted: bool = False): """ Evaluate the custom model on local data. diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py index 51dee4b72..7c96cac34 100644 --- a/sfaira/interface/__init__.py +++ b/sfaira/interface/__init__.py @@ -1,2 +1,2 @@ -from sfaira.interface.model_zoo import ModelZoo, ModelZooEmbedding, ModelZooCelltype +from sfaira.interface.model_zoo import ModelZoo from sfaira.interface.user_interface import UserInterface diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 1267b153f..89d3a69a6 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -1,15 +1,11 @@ import abc -try: - import kipoi -except ImportError: - kipoi = None import numpy as np import pandas as pd from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse from sfaira.consts import OntologyContainerSfaira -from sfaira.versions.topologies import TopologyContainer +from sfaira.versions.topologies import TopologyContainer, TOPOLOGIES class ModelZoo(abc.ABC): @@ -18,39 +14,95 @@ class ModelZoo(abc.ABC): """ topology_container: TopologyContainer ontology: dict - model_id: Union[str, None] - model_class: Union[str, None] - model_class: Union[str, None] - model_type: Union[str, None] - model_topology: Union[str, None] - model_version: Union[str, None] + _model_id: Union[str, None] celltypes: Union[CelltypeUniverse, None] def __init__( self, - model_lookuptable: Union[None, pd.DataFrame] = None + model_lookuptable: Union[None, pd.DataFrame] = None, + model_class: Union[str, None] = None, ): """ :param model_lookuptable: model_lookuptable. + :param model_class: Model class to subset to. """ self._ontology_container_sfaira = OntologyContainerSfaira() if model_lookuptable is not None: # check if models in repository - self.ontology = self.load_ontology_from_model_ids(model_lookuptable['model_id'].values) - self.model_id = None - self.model_class = None - self.model_type = None - self.organisation = None - self.model_topology = None - self.model_version = None - self.topology_container = None + self.ontology = self.load_ontology_from_model_ids(model_ids=model_lookuptable['model_id'].values, + model_class=model_class) + self._model_id = None self.celltypes = None - @abc.abstractmethod + @property + def model_class(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[0] + + @property + def model_name(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1] + + @property + def model_organism(self): + # TODO: this is a custom name ontology + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[0] + + @property + def model_organ(self): + # TODO: this is a custom name ontology + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[1] + + @property + def model_type(self): + # TODO: this is a custom name ontology + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[2] + + @property + def model_topology(self): + # TODO: this is a custom name ontology + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[3] + + @property + def model_version(self): + # TODO: this is a custom name ontology + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[4] + + @property + def organisation(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[2] + def load_ontology_from_model_ids( self, - model_ids - ): - pass + model_ids, + model_class: Union[str, None] = None, + ) -> dict: + """ + Load model ontology based on models available in model lookup tables. + + :param model_ids: Table listing all available model_ids. + :param model_class: Model class to subset to. + :return: Dictionary formatted ontology. + """ + + ids = [x for x in model_ids if (x.split('_')[0] == model_class or model_class is None)] + id_df = pd.DataFrame( + [i.split('_')[1:6] for i in ids], + columns=['name', 'organisation'] + ) + model = np.unique(id_df['name']) + ontology = dict.fromkeys(model) + for m in model: + id_df_m = id_df[id_df.model_type == m] + orga = np.unique(id_df_m['organisation']) + ontology[m] = dict.fromkeys(orga) + return ontology def _order_versions( self, @@ -66,25 +118,19 @@ def _order_versions( return versions - def set_model_id( - self, - model_id: str - ): + @property + def model_id(self): + return self._model_id + + @model_id.setter + def model_id(self, x: str): """ Set model ID to a manually supplied ID. - :param model_id: Model ID to set. Format: pipeline_genome_organ_model_organisation_topology_version + :param x: Model ID to set. Format: pipeline_genome_organ_model_organisation_topology_version """ - if len(model_id.split('_')) < 6: - raise RuntimeError(f'Model ID {model_id} is invalid!') - self.model_id = model_id - ixs = self.model_id.split('_') - self.model_class = ixs[0] - self.model_id = ixs[1] - self.model_type = ixs[2] - self.organisation = ixs[3] - self.model_topology = ixs[4] - self.model_version = ixs[5] + assert len(x.split('_')) == 3, f'model_id {x} is invalid' + self._model_id = x def save_weights_to_remote(self, path=None): """ @@ -113,14 +159,6 @@ def call_kipoi(self): """ raise NotImplementedError() - def models(self) -> List[str]: - """ - Return list of available models. - - :return: List of models available. - """ - return self.ontology.keys() - def topology( self, model_type: str, @@ -164,171 +202,11 @@ def model_hyperparameters(self) -> dict: assert self.topology_container is not None return self.topology_container.topology["hyper_parameters"] - -class ModelZooEmbedding(ModelZoo): - - """ - The supported model ontology is: - - organism -> organ -> model -> organisation -> topology -> version -> ID - - Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. - """ - - def load_ontology_from_model_ids( - self, - model_ids - ) -> dict: - """ - Load model ontology based on models available in model lookup tables. - - :param model_ids: Table listing all available model_ids. - :return: Dictionary formatted ontology. - """ - - ids = [i for i in model_ids if i.split('_')[0] == 'embedding'] - id_df = pd.DataFrame( - [i.split('_')[1:6] for i in ids], - columns=['id', 'model_type', 'organisation', 'model_topology', 'model_version'] - ) - model = np.unique(id_df['model_type']) - ontology = dict.fromkeys(model) - for m in model: - id_df_m = id_df[id_df.model_type == m] - orga = np.unique(id_df_m['organisation']) - ontology[m] = dict.fromkeys(orga) - for org in orga: - id_df_org = id_df_m[id_df_m.organisation == org] - topo = np.unique(id_df_org['model_topology']) - ontology[m][org] = dict.fromkeys(topo) - for t in topo: - id_df_t = id_df_org[id_df_org.model_topology == t] - ontology[m][org][t] = id_df_t.model_version.tolist() - - return ontology - - def set_latest( - self, - model_type: str, - organisation: str, - model_topology: str - ): - """ - Set model ID to latest model in given ontology group. - - :param model_type: Identifier of model_type to select. - :param organisation: Identifier of organisation to select. - :param model_topology: Identifier of model_topology to select - :return: - """ - assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[model_type].keys(), \ - "organisation requested was not found in ontology" - assert model_topology in self.ontology[model_type][organisation].keys(), \ - "model_topology requested was not found in ontology" - - versions = self.versions( - model_type=model_type, - organisation=organisation, - model_topology=model_topology - ) - self.model_type = model_type - self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be organism/organ specific later - - self.model_version = self._order_versions(versions=versions)[0] - self.model_id = '_'.join([ - 'embedding', - self.id, - self.model_type, - self.organisation, - self.model_topology, - self.model_version - ]) - - -class ModelZooCelltype(ModelZoo): - """ - The supported model ontology is: - - organism -> organ -> model -> organisation -> topology -> version -> ID - - Maybe: include experimental protocol? Ie droplet, full-length, single-nuclei. - - Note on topology id: The topology ID is x.y.z, x is the major cell type version and y.z is the cell type model - topology. Cell type model ontologies do not include the output size as this is set by the cell type version. - """ - - def load_ontology_from_model_ids( - self, - model_ids - ) -> dict: - """ - Load model ontology based on models available in model lookup tables. - - :param model_ids: Table listing all available model_ids. - :return: Dictionary formatted ontology. - """ - - ids = [i for i in model_ids if i.split('_')[0] == 'celltype'] - id_df = pd.DataFrame( - [i.split('_')[1:6] for i in ids], - columns=['id', 'model_type', 'organisation', 'model_topology', 'model_version'] - ) - model = np.unique(id_df['model_type']) - ontology = dict.fromkeys(model) - for m in model: - id_df_m = id_df[id_df.model_type == m] - orga = np.unique(id_df_m['organisation']) - ontology[m] = dict.fromkeys(orga) - for org in orga: - id_df_org = id_df_m[id_df_m.organisation == org] - topo = np.unique(id_df_org['model_topology']) - ontology[m][org] = dict.fromkeys(topo) - for t in topo: - id_df_t = id_df_org[id_df_org.model_topology == t] - ontology[m][org][t] = id_df_t.model_version.tolist() - - return ontology - - def set_latest( - self, - model_type: str, - organisation: str, - model_topology: str - ): - """ - Set model ID to latest model in given ontology group. - - :param organism: Identifier of organism to select. - :param organ: Identifier of organ to select. - :param model_type: Identifier of model_type to select. - :param organisation: Identifier of organisation to select. - :param model_topology: Identifier of model_topology to select - :return: - """ - assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[model_type].keys(), \ - "organisation requested was not found in ontology" - assert model_topology in self.ontology[model_type][organisation].keys(), \ - "model_topology requested was not found in ontology" - - versions = self.versions( - model_type=model_type, - organisation=organisation, - model_topology=model_topology + @property + def topology_container(self) -> TopologyContainer: + # TODO: this ID decomposition to organism is custom to the topologies handled in this package. + organism = self.model_name.split("-")[0] + return TopologyContainer( + topology=TOPOLOGIES[organism][self.model_class][self.model_type][self.model_topology], + topology_id=self.model_version ) - - self.model_type = model_type - self.organisation = organisation - self.model_topology = model_topology # set to model for now, could be organism/organ specific later - - self.model_version = self._order_versions(versions=versions)[0] - self.model_id = '_'.join([ - 'celltype', - self.id, - self.model_type, - self.organisation, - self.model_topology, - self.model_version - ]) diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index c6f88b018..db4529678 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -11,7 +11,7 @@ from sfaira.data import DatasetInteractive from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype -from sfaira.interface.model_zoo import ModelZooEmbedding, ModelZooCelltype +from sfaira.interface.model_zoo import ModelZoo class UserInterface: @@ -43,8 +43,8 @@ class UserInterface: estimator_celltype: Union[EstimatorKerasCelltype, None] model_kipoi_embedding: Union[None] model_kipoi_celltype: Union[BaseModel, None] - zoo_embedding: Union[ModelZooEmbedding, None] - zoo_celltype: Union[ModelZooCelltype, None] + zoo_embedding: Union[ModelZoo, None] + zoo_celltype: Union[ModelZoo, None] data: Union[anndata.AnnData] model_lookuptable: Union[pd.DataFrame, None] @@ -87,8 +87,8 @@ def __init__( # TODO: workaround to deal with model ids bearing file endings in model lookuptable (as is the case in first sfaira model repo upload) self.model_lookuptable['model_id'] = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in self.model_lookuptable['model_id']] - self.zoo_embedding = ModelZooEmbedding(self.model_lookuptable) - self.zoo_celltype = ModelZooCelltype(self.model_lookuptable) + self.zoo_embedding = ModelZoo(model_lookuptable=self.model_lookuptable, model_class="embedding") + self.zoo_celltype = ModelZoo(model_lookuptable=self.model_lookuptable, model_class="celltype") def _load_lookuptable( self, diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index 4462e99b1..b51ff99e1 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -5,9 +5,10 @@ import pickle from typing import Union +from sfaira.consts import AdataIdsSfaira from sfaira.data import DistributedStore, Universe from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZooEmbedding, ModelZooCelltype +from sfaira.interface import ModelZoo class TrainModel: @@ -33,16 +34,26 @@ def __init__( self.data = data else: raise ValueError(f"did not recongize data of type {type(data)}") + self.zoo = ModelZoo() + + def load_into_memory(self): + """ + Loads backed objects from DistributedStore into single adata object in memory in .data slot. + :return: + """ + if isinstance(self.data, DistributedStore): + self.data = self.data.adata @abc.abstractmethod def init_estim(self): pass @abc.abstractmethod - def _save_specific( - self, - fn: str - ): + def save_eval(self, fn: str): + pass + + @abc.abstractmethod + def _save_specific(self, fn: str): pass def save( @@ -78,7 +89,6 @@ def __init__( data: Union[str, anndata.AnnData, Universe, DistributedStore], ): super(TrainModelEmbedding, self).__init__(data=data) - self.zoo = ModelZooEmbedding(model_lookuptable=None) self.estimator = None self.model_dir = model_path @@ -91,14 +101,11 @@ def init_estim( data=self.data, model_dir=self.model_dir, model_id=self.zoo.model_id, - model_topology=self.zoo.model_topology + model_topology=self.zoo.topology_container ) self.estimator.init_model(override_hyperpar=override_hyperpar) - def save_eval( - self, - fn: str - ): + def save_eval(self, fn: str): evaluation_train = self.estimator.evaluate_any(idx=self.estimator.idx_train) evaluation_val = self.estimator.evaluate_any(idx=self.estimator.idx_eval) evaluation_test = self.estimator.evaluate_any(idx=self.estimator.idx_test) @@ -112,10 +119,7 @@ def save_eval( with open(fn + '_evaluation.pickle', 'wb') as f: pickle.dump(obj=evaluation, file=f) - def _save_specific( - self, - fn: str - ): + def _save_specific(self, fn: str): """ Save embedding prediction: @@ -123,10 +127,7 @@ def _save_specific( :return: """ embedding = self.estimator.predict_embedding() - df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "author", "year", "assay_sc", - "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] - ] + df_summary = self.estimator.obs_test[AdataIdsSfaira.obs_keys] df_summary["ncounts"] = np.asarray( self.estimator.data.X[np.sort(self.estimator.idx_test), :].sum(axis=1)[np.argsort(self.estimator.idx_test)] ).flatten() @@ -145,7 +146,6 @@ def __init__( fn_target_universe: str, ): super(TrainModelCelltype, self).__init__(data=data) - self.zoo = ModelZooCelltype(model_lookuptable=None) self.estimator = None self.model_dir = model_path self.data.celltypes_universe.load_target_universe(fn=fn_target_universe) @@ -159,14 +159,11 @@ def init_estim( data=self.data, model_dir=self.model_dir, model_id=self.zoo.model_id, - model_topology=self.zoo.model_topology + model_topology=self.zoo.topology_container ) self.estimator.init_model(override_hyperpar=override_hyperpar) - def save_eval( - self, - fn: str - ): + def save_eval(self, fn: str): evaluation = { 'train': self.estimator.evaluate_any(idx=self.estimator.idx_train, weighted=False), 'val': self.estimator.evaluate_any(idx=self.estimator.idx_eval, weighted=False), @@ -184,10 +181,7 @@ def save_eval( with open(fn + '_evaluation_weighted.pickle', 'wb') as f: pickle.dump(obj=evaluation_weighted, file=f) - def _save_specific( - self, - fn: str - ): + def _save_specific(self, fn: str): """ Save true and predicted labels on test set: @@ -196,10 +190,7 @@ def _save_specific( """ ytrue = self.estimator.ytrue() yhat = self.estimator.predict() - df_summary = self.estimator.obs_test[ - ["dataset", "cell_ontology_class", "state_exact", "author", "year", "assay_sc", - "assay_differentiation", "assay_type_differentiation", "cell_line", "sample_source"] - ] + df_summary = self.estimator.obs_test[AdataIdsSfaira.obs_keys] df_summary["ncounts"] = np.asarray(self.estimator.data.X[self.estimator.idx_test, :].sum(axis=1)).flatten() np.save(file=fn + "_ytrue", arr=ytrue) np.save(file=fn + "_yhat", arr=yhat) @@ -207,16 +198,16 @@ def _save_specific( with open(fn + '_ontology_names.pickle', 'wb') as f: pickle.dump(obj=self.estimator.ids, file=f) - cell_counts = self.data.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() + cell_counts = self.data.obs['cell_ontology_class'].value_counts().to_dict() cell_counts_leaf = cell_counts.copy() for k in cell_counts.keys(): if k not in self.estimator.ids: - if k not in self.estimator.celltype_universe.ontology.node_ids: + if k not in self.estimator.celltype_universe.onto_cl.node_ids: raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in self.estimator.celltype_universe.ontology.node_ids: + for leaf in self.estimator.celltype_universe.onto_cl.node_ids: if leaf not in cell_counts_leaf.keys(): cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1 / len(self.estimator.celltype_universe.ontology.node_ids) + cell_counts_leaf[leaf] += 1 / len(self.estimator.celltype_universe.onto_cl.node_ids) del cell_counts_leaf[k] with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index deab6f1bc..940b22f0a 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -1,15 +1,14 @@ import numpy as np import os import pytest -import scipy.sparse from sfaira.data import DatasetSuperGroup from sfaira.data import Universe MOUSE_GENOME_ANNOTATION = "Mus_musculus.GRCm38.102" -dir_data = "../test_data" -dir_meta = "../test_data/meta" +dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") def test_dsgs_instantiate(): diff --git a/sfaira/unit_tests/data/test_store.py b/sfaira/unit_tests/data/test_store.py index 2d50aa9e6..b89a29d22 100644 --- a/sfaira/unit_tests/data/test_store.py +++ b/sfaira/unit_tests/data/test_store.py @@ -1,14 +1,18 @@ import numpy as np import os import pytest +import time +from typing import List from sfaira.data import DistributedStore -from sfaira.data import Universe +from sfaira.versions.genomes import GenomeContainer +from sfaira.unit_tests.utils import cached_store_writing + MOUSE_GENOME_ANNOTATION = "Mus_musculus.GRCm38.102" -dir_data = "../test_data" -dir_meta = "../test_data/meta" +dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") """ @@ -16,24 +20,14 @@ """ -def test_store_config(): +def test_config(): """ Test that data set config files can be set, written and recovered. """ - ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds.load() - ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, - subset_genes_to_type="protein_coding") - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, - clean_obs_names=True) - store_path = os.path.join(dir_data, "store") + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) config_path = os.path.join(store_path, "lung") - ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=True) store = DistributedStore(cache_path=store_path) store.subset(attr_key="assay_sc", values=["10x sequencing"]) - store.subset_cells(attr_key="assay_sc", values=["10x sequencing"]) store.write_config(fn=config_path) store2 = DistributedStore(cache_path=store_path) store2.load_config(fn=config_path) @@ -41,21 +35,12 @@ def test_store_config(): assert np.all([np.all(store.indices[k] == store2.indices[k]) for k in store.indices.keys()]) -def test_store_type_targets(): +def test_type_targets(): """ Test that target leave nodes can be set, written and recovered. """ - ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) - ds.load() - ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": MOUSE_GENOME_ANNOTATION}, - subset_genes_to_type="protein_coding") - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, - clean_obs_names=True) - store_path = os.path.join(dir_data, "store") + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) target_path = os.path.join(store_path, "lung") - ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=True) store = DistributedStore(cache_path=store_path) observed_nodes = np.unique(np.concatenate([ x.obs[store._adata_ids_sfaira.cell_ontology_class] @@ -72,3 +57,48 @@ def test_store_type_targets(): assert len(leaves_all) > len(leaves1) assert len(set(leaves1).union(set(leaves2))) == len(leaves1) assert np.all([x in leaves1 for x in leaves2]) + + +@pytest.mark.parametrize("idx", [None, np.concatenate([np.arange(150, 200), np.array([1, 100, 2003, 33])])]) +@pytest.mark.parametrize("batch_size", [1, 10]) +@pytest.mark.parametrize("obs_keys", [[], ["cell_ontology_class"]]) +@pytest.mark.parametrize("gc", [(None, {}), (MOUSE_GENOME_ANNOTATION, {"biotype": "protein_coding"})]) +def test_generator_shapes(idx, batch_size: int, obs_keys: List[str], gc: tuple): + """ + Test generators queries do not throw errors and that output shapes are correct. + """ + assembly, subset = gc + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) + store = DistributedStore(cache_path=store_path) + if assembly is not None: + gc = GenomeContainer(assembly=assembly) + gc.subset(**subset) + store.genome_container = gc + g = store.generator( + idx=idx, + batch_size=batch_size, + obs_keys=obs_keys, + ) + nobs = len(idx) if idx is not None else store.n_obs + batch_sizes = [] + t0 = time.time() + for i, z in enumerate(g()): + x_i, obs_i = z + assert x_i.shape[0] == obs_i.shape[0] + if i == 0: # First batch hast correct shape, last batch not necessarily! + x = x_i + obs = obs_i + batch_sizes.append(x_i.shape[0]) + tdelta = time.time() - t0 + print(f"time for iterating over generator:" + f" {tdelta}s for {np.sum(batch_sizes)} cells in {len(batch_sizes)} batches," + f" {tdelta / len(batch_sizes)}s per batch.") + # Only the last batch in each data set can be of different size: + assert np.sum(batch_sizes != batch_size) <= len(store.adatas.keys()) + assert x.shape[0] == batch_size, (x.shape, batch_size) + assert obs.shape[0] == batch_size, (obs.shape, batch_size) + assert x.shape[1] == store.n_vars, (x.shape, store.n_vars) + assert obs.shape[1] == len(obs_keys), (x.shape, obs_keys) + assert np.sum(batch_sizes) == nobs, (x.shape, obs_keys) + if assembly is not None: + assert x.shape[1] == gc.n_var, (x.shape, gc.n_var) diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index 2fe831ec4..521efd9d0 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -1,18 +1,32 @@ import abc import anndata import numpy as np +import os +import pandas as pd +import pytest +import time from typing import Union +from sfaira.data import DistributedStore from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.versions.topologies import TopologyContainer +from sfaira.unit_tests.utils import cached_store_writing, simulate_anndata +dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") +cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "cache", "genomes") + +ASSEMBLY = "Mus_musculus.GRCm38.102" GENES = ["ENSMUSG00000000003", "ENSMUSG00000000028"] TARGETS = ["T cell", "stromal cell"] +ASSAYS = ["10x sequencing", "Smart-seq2"] + TOPOLOGY_EMBEDDING_MODEL = { "model_type": None, "input": { - "genome": "Mus_musculus.GRCm38.102", + "genome": ASSEMBLY, "genes": ["ensg", GENES], }, "output": {}, @@ -27,7 +41,7 @@ TOPOLOGY_CELLTYPE_MODEL = { "model_type": None, "input": { - "genome": "Mus_musculus.GRCm38.102", + "genome": ASSEMBLY, "genes": ["ensg", GENES], }, "output": { @@ -43,8 +57,11 @@ class HelperEstimatorBase: + + data: Union[anndata.AnnData, DistributedStore] estimator: Union[EstimatorKeras] - data: Union[anndata.AnnData] + model_type: str + tc: TopologyContainer """ Contains functions _test* to test individual functions and attributes of estimator class. @@ -53,74 +70,102 @@ class HelperEstimatorBase: basic_estimator_test(). See _test_call() for an example. """ - def simulate(self): + def _simulate(self) -> anndata.AnnData: """ Simulate basic data example used for unit test. - Sets attribute .data with simulated data. + :return: Simulated data set. + """ + return simulate_anndata(n_obs=100, assays=ASSAYS, genes=self.tc.gc.ensembl, targets=TARGETS) - :return: + def load_adata(self): """ - nobs = 100 - self.data = anndata.AnnData( - np.random.randint(low=0, high=100, size=(nobs, len(GENES))).astype(np.float32) - ) - self.data.obs["cell_ontology_class"] = [ - TARGETS[np.random.randint(0, len(TARGETS))] - for i in range(nobs) - ] - self.data.var["ensembl"] = GENES + Sets attribute .data with simulated data. + """ + self.data = self._simulate() + + def load_store(self): + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY) + store = DistributedStore(cache_path=store_path) + self.data = store + + @abc.abstractmethod + def init_topology(self, model_type: str, feature_space: str): + pass @abc.abstractmethod - def init_estimator(self, model_type: str): + def init_estimator(self): """ Initialise target estimator as .estimator attribute. """ pass + def estimator_train(self, test_split): + self.estimator.init_model() + self.estimator.train( + optimizer="adam", + lr=0.005, + epochs=2, + batch_size=4, + validation_split=0.5, + test_split=test_split, + validation_batch_size=4, + max_validation_steps=1, + shuffle_buffer_size=10, + cache_full=False, + ) + @abc.abstractmethod - def basic_estimator_test(self): + def basic_estimator_test(self, test_split): pass - def fatal_estimator_test(self, model_type): + def load_estimator(self, model_type, data_type, feature_space, test_split): + self.init_topology(model_type=model_type, feature_space=feature_space) np.random.seed(1) - self.simulate() - self.init_estimator(model_type=model_type) + if data_type == "adata": + self.load_adata() + else: + self.load_store() + self.init_estimator() + self.estimator_train(test_split=test_split) + + def fatal_estimator_test(self, model_type, data_type, test_split=0.1, feature_space="small"): + self.load_estimator(model_type=model_type, data_type=data_type, feature_space=feature_space, + test_split=test_split) self.basic_estimator_test() - return True class HelperEstimatorKerasEmbedding(HelperEstimatorBase): estimator: EstimatorKerasEmbedding + model_type: str + tc: TopologyContainer - def init_estimator(self, model_type): + def init_topology(self, model_type: str, feature_space: str): topology = TOPOLOGY_EMBEDDING_MODEL.copy() + if feature_space == "full": + # Read 500 genes (not full protein coding) to compromise between being able to distinguish observations + # and reducing run time of unit tests. + tab = pd.read_csv(os.path.join(cache_dir, ASSEMBLY + ".csv")) + genes_full = tab.loc[tab["gene_biotype"].values == "protein_coding", "gene_id"].values[:500].tolist() + topology["input"]["genes"] = ["ensg", genes_full] topology["model_type"] = model_type if model_type == "linear": topology["hyper_parameters"]["latent_dim"] = 2 else: - topology["hyper_parameters"]["latent_dim"] = (len(GENES), 2, len(GENES)) + topology["hyper_parameters"]["latent_dim"] = (2, 2, 2) self.model_type = model_type + self.tc = TopologyContainer(topology=topology, topology_id="0.1") + + def init_estimator(self): self.estimator = EstimatorKerasEmbedding( data=self.data, model_dir=None, model_id="testid", - model_topology=TopologyContainer(topology=topology, topology_id="0.1") + model_topology=self.tc ) - def basic_estimator_test(self): - self.estimator.init_model() - self.estimator.train( - optimizer="adam", - lr=0.005, - epochs=2, - batch_size=32, - validation_split=0.1, - test_split=0.1, - validation_batch_size=32, - max_validation_steps=1 - ) + def basic_estimator_test(self, test_split=0.1): _ = self.estimator.evaluate() prediction_output = self.estimator.predict() prediction_embed = self.estimator.predict_embedding() @@ -131,41 +176,37 @@ def basic_estimator_test(self): new_prediction_embed = self.estimator.predict_embedding() new_weights = self.estimator.model.training_model.get_weights() for i in range(len(weights)): - assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) + if not np.any(np.isnan(weights[i])): + assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) if self.model_type != 'vae': - assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) - assert np.allclose(prediction_embed, new_prediction_embed, rtol=1e-6, atol=1e-6) + if not np.any(np.isnan(prediction_output)): + assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) + assert np.allclose(prediction_embed, new_prediction_embed, rtol=1e-6, atol=1e-6) class HelperEstimatorKerasCelltype(HelperEstimatorBase): estimator: EstimatorKerasCelltype + model_type: str + tc: TopologyContainer - def init_estimator(self, model_type: str): + def init_topology(self, model_type: str, feature_space: str): topology = TOPOLOGY_CELLTYPE_MODEL.copy() topology["model_type"] = model_type - topology["hyper_parameters"]["latent_dim"] = (len(GENES), 2) + topology["hyper_parameters"]["latent_dim"] = (2,) self.model_type = model_type + self.tc = TopologyContainer(topology=topology, topology_id="0.1") + + def init_estimator(self): self.estimator = EstimatorKerasCelltype( data=self.data, model_dir=None, model_id="testid", - model_topology=TopologyContainer(topology=topology, topology_id="0.1"), + model_topology=self.tc ) self.estimator.celltype_universe.leaves = TARGETS - def basic_estimator_test(self): - self.estimator.init_model() - self.estimator.train( - optimizer="adam", - lr=0.005, - epochs=2, - batch_size=32, - validation_split=0.1, - test_split=0.1, - validation_batch_size=32, - max_validation_steps=1 - ) + def basic_estimator_test(self, test_split=0.1): _ = self.estimator.evaluate() prediction_output = self.estimator.predict() weights = self.estimator.model.training_model.get_weights() @@ -175,36 +216,165 @@ def basic_estimator_test(self): new_weights = self.estimator.model.training_model.get_weights() print(self.estimator.model.training_model.summary()) for i in range(len(weights)): - assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) - assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) + if not np.any(np.isnan(weights[i])): + assert np.allclose(weights[i], new_weights[i], rtol=1e-6, atol=1e-6) + if not np.any(np.isnan(prediction_output)): + assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) # Test embedding models: -def test_for_fatal_linear(): +@pytest.mark.parametrize("data_type", ["adata", "store"]) +def test_for_fatal_linear(data_type): test_estim = HelperEstimatorKerasEmbedding() - test_estim.fatal_estimator_test(model_type="linear") + test_estim.fatal_estimator_test(model_type="linear", data_type=data_type) def test_for_fatal_ae(): test_estim = HelperEstimatorKerasEmbedding() - test_estim.fatal_estimator_test(model_type="ae") + test_estim.fatal_estimator_test(model_type="ae", data_type="adata") def test_for_fatal_vae(): test_estim = HelperEstimatorKerasEmbedding() - test_estim.fatal_estimator_test(model_type="vae") + test_estim.fatal_estimator_test(model_type="vae", data_type="adata") # Test cell type predictor models: -def test_for_fatal_mlp(): +@pytest.mark.parametrize("data_type", ["adata", "store"]) +def test_for_fatal_mlp(data_type): test_estim = HelperEstimatorKerasCelltype() - test_estim.fatal_estimator_test(model_type="mlp") + test_estim.fatal_estimator_test(model_type="mlp", data_type=data_type) def test_for_fatal_marker(): test_estim = HelperEstimatorKerasCelltype() - test_estim.fatal_estimator_test(model_type="marker") + test_estim.fatal_estimator_test(model_type="marker", data_type="adata") + + +# Test index sets + + +@pytest.mark.parametrize("data_type", ["adata", "store"]) +@pytest.mark.parametrize("test_split", [0.3, {"assay_sc": "10x sequencing"}]) +def test_split_index_sets(data_type: str, test_split): + """ + Test that train, val, test split index sets are correct: + + 1) complete + 2) non-overlapping + 3) that test indices map to all (float split) or distinct (attribute split) data sets + 4) do not contain duplicated observations within and across splits (defined based on the feature vectors) + """ + test_estim = HelperEstimatorKerasEmbedding() + # Need full feature space here because observations are not necessarily different in small model testing feature + # space with only two genes: + t0 = time.time() + test_estim.load_estimator(model_type="linear", data_type=data_type, test_split=test_split, feature_space="full") + print(f"time for running estimator test: {time.time() - t0}s") + idx_train = test_estim.estimator.idx_train + idx_eval = test_estim.estimator.idx_eval + idx_test = test_estim.estimator.idx_test + # 1) Assert that index assignments sum up to full data set: + assert len(idx_train) + len(idx_eval) + len(idx_test) == test_estim.data.n_obs, \ + (len(idx_train), len(idx_eval), len(idx_test), test_estim.data.n_obs) + # 2) Assert that index assignments are exclusive to each split: + assert len(set(idx_train).intersection(set(idx_eval))) == 0 + assert len(set(idx_train).intersection(set(idx_test))) == 0 + assert len(set(idx_test).intersection(set(idx_eval))) == 0 + # 3) Check partition of index vectors over store data sets matches test split scenario: + if isinstance(test_estim.estimator.data, DistributedStore): + # Prepare data set-wise index vectors that are numbered in the same way as global split index vectors. + # See also EstimatorKeras.train and DistributedStore.subset_cells_idx_global + idx_raw = test_estim.estimator.data.indices_global + if isinstance(test_split, float): + # Make sure that indices from each split are in each data set: + for z in [idx_train, idx_eval, idx_test]: + assert np.all([ # in each data set + np.any([y in z for y in x]) # at least one match of data set to split index set + for x in idx_raw + ]) + else: + # Make sure that indices from (train, val) and test split are exclusive: + datasets_train = np.where([ # in each data set + np.any([y in idx_train for y in x]) # at least one match of data set to split index set + for x in idx_raw + ])[0] + datasets_eval = np.where([ # in each data set + np.any([y in idx_eval for y in x]) # at least one match of data set to split index set + for x in idx_raw + ])[0] + datasets_test = np.where([ # in each data set + np.any([y in idx_test for y in x]) # at least one match of data set to split index set + for x in idx_raw + ])[0] + assert datasets_train == datasets_eval, (datasets_train, datasets_eval) + assert len(set(datasets_train).intersection(set(datasets_test))) == 0, (datasets_train, datasets_test) + # 4) Assert that observations mapped to indices are actually unique based on expression vectors: + # Build numpy arrays of expression input data sets from tensorflow data sets directly from estimator. + # These data sets are the most processed transformation of the data and stand directly in concat with the model. + t0 = time.time() + ds_train = test_estim.estimator._get_dataset(idx=idx_train, batch_size=128, mode='eval', shuffle_buffer_size=1, + retrieval_batch_size=128) + print(f"time for building training data set: {time.time() - t0}s") + t0 = time.time() + ds_eval = test_estim.estimator._get_dataset(idx=idx_eval, batch_size=128, mode='eval', shuffle_buffer_size=1, + retrieval_batch_size=128) + print(f"time for building validation data set: {time.time() - t0}s") + t0 = time.time() + ds_test = test_estim.estimator._get_dataset(idx=idx_test, batch_size=128, mode='eval', shuffle_buffer_size=1, + retrieval_batch_size=128) + print(f"time for building test data set: {time.time() - t0}s") + x_train = [] + x_eval = [] + x_test = [] + t0 = time.time() + for x, y in ds_train.as_numpy_iterator(): + x_train.append(x[0]) + x_train = np.concatenate(x_train, axis=0) + print(f"time for iterating over training data set: {time.time() - t0}s") + t0 = time.time() + for x, y in ds_eval.as_numpy_iterator(): + x_eval.append(x[0]) + x_eval = np.concatenate(x_eval, axis=0) + print(f"time for iterating over validation data set: {time.time() - t0}s") + t0 = time.time() + for x, y in ds_test.as_numpy_iterator(): + x_test.append(x[0]) + x_test = np.concatenate(x_test, axis=0) + print(f"time for iterating over test data set: {time.time() - t0}s") + # Validate size of recovered numpy data sets: + print(f"shapes received {(x_train.shape[0], x_eval.shape[0], x_test.shape[0])}") + print(f"shapes expected {(len(idx_train), len(idx_eval), len(idx_test))}") + assert x_train.shape[0] == len(idx_train) + assert x_eval.shape[0] == len(idx_eval) + assert x_test.shape[0] == len(idx_test) + # Assert that observations are unique within partition: + assert np.all([ + np.sum([np.all(x_train[i] == x_train[j]) for j in range(x_train.shape[0])]) == 1 + for i in range(x_train.shape[0]) + ]) + assert np.all([ + np.sum([np.all(x_eval[i] == x_eval[j]) for j in range(x_eval.shape[0])]) == 1 + for i in range(x_eval.shape[0]) + ]) + assert np.all([ + np.sum([np.all(x_test[i] == x_test[j]) for j in range(x_test.shape[0])]) == 1 + for i in range(x_test.shape[0]) + ]) + # Assert that observations are not replicated across partitions: + assert not np.any([ + np.any([np.all(x_train[i] == x_eval[j]) for j in range(x_eval.shape[0])]) + for i in range(x_train.shape[0]) + ]) + assert not np.any([ + np.any([np.all(x_train[i] == x_test[j]) for j in range(x_test.shape[0])]) + for i in range(x_train.shape[0]) + ]) + assert not np.any([ + np.any([np.all(x_test[i] == x_eval[j]) for j in range(x_eval.shape[0])]) + for i in range(x_test.shape[0]) + ]) diff --git a/sfaira/unit_tests/interface/test_userinterface.py b/sfaira/unit_tests/interface/test_userinterface.py index 504e91984..613d016fe 100644 --- a/sfaira/unit_tests/interface/test_userinterface.py +++ b/sfaira/unit_tests/interface/test_userinterface.py @@ -1,12 +1,11 @@ import numpy as np import os from typing import Union -import unittest from sfaira.interface import UserInterface -class TestUi(unittest.TestCase): +class TestUi: ui: Union[UserInterface] data: np.ndarray @@ -27,7 +26,7 @@ def simulate(self): """ pass - def test_basic(self): + def _test_basic(self): """ Test all relevant model methods. @@ -47,7 +46,3 @@ def _test_kipoi(self): temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), '../test_data') self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) self.ui.compute_embedding_kipoi() - - -if __name__ == '__main__': - unittest.main() diff --git a/sfaira/unit_tests/interface/test_zoo.py b/sfaira/unit_tests/interface/test_zoo.py new file mode 100644 index 000000000..2a8f2bd30 --- /dev/null +++ b/sfaira/unit_tests/interface/test_zoo.py @@ -0,0 +1,31 @@ +import os +from sfaira.interface import ModelZoo + +dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") + + +def test_for_fatal_embedding(): + model_id = "embedding_human-lung-linear-0.1-0.1_mylab" + zoo = ModelZoo() + zoo.model_id = model_id + assert zoo.model_id == model_id + assert zoo.model_class == "embedding" + assert zoo.model_name == "human-lung-linear-0.1-0.1" + assert zoo.organisation == "mylab" + _ = zoo.topology_container + _ = zoo.topology_container.topology + _ = zoo.topology_container.gc + + +def test_for_fatal_celltype(): + model_id = "celltype_human-lung-mlp-0.0.1-0.1_mylab" + zoo = ModelZoo() + zoo.model_id = model_id + assert zoo.model_id == model_id + assert zoo.model_class == "celltype" + assert zoo.model_name == "human-lung-mlp-0.0.1-0.1" + assert zoo.organisation == "mylab" + _ = zoo.topology_container + _ = zoo.topology_container.topology + _ = zoo.topology_container.gc diff --git a/sfaira/unit_tests/trainer/__init__.py b/sfaira/unit_tests/trainer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sfaira/unit_tests/trainer/test_trainer.py b/sfaira/unit_tests/trainer/test_trainer.py new file mode 100644 index 000000000..c9886c5f0 --- /dev/null +++ b/sfaira/unit_tests/trainer/test_trainer.py @@ -0,0 +1,78 @@ +import anndata +import numpy as np +import os +import pytest +from typing import Union + +from sfaira.data import DistributedStore +from sfaira.interface import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.train import TrainModelCelltype, TrainModelEmbedding +from sfaira.unit_tests.utils import cached_store_writing, simulate_anndata + +dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") + +ASSEMBLY = "Mus_musculus.GRCm38.102" +TARGETS = ["T cell", "stromal cell"] + + +class HelperTrainerBase: + + data: Union[anndata.AnnData, DistributedStore] + trainer: Union[TrainModelCelltype, TrainModelEmbedding] + zoo: ModelZoo + + def __init__(self, zoo: ModelZoo): + self.model_id = zoo.model_id + self.tc = zoo.topology_container + + def _simulate(self) -> anndata.AnnData: + """ + Simulate basic data example used for unit test. + + :return: Simulated data set. + """ + return simulate_anndata(n_obs=100, genes=self.tc.gc.ensembl, targets=TARGETS) + + def load_adata(self): + """ + Sets attribute .data with simulated data. + """ + self.data = self._simulate() + + def load_store(self): + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY) + store = DistributedStore(cache_path=store_path) + self.data = store + + def load_data(self, data_type): + np.random.seed(1) + if data_type == "adata": + self.load_adata() + else: + self.load_store() + + def test_for_fatal(self, cls): + self.load_data(data_type="adata") + trainer = cls( + data=self.data, + model_path=dir_meta, + ) + trainer.zoo.set_model_id(model_id=self.model_id) + trainer.init_estim(override_hyperpar={}) + + +def test_for_fatal_embedding(): + model_id = "embedding_human-lung_linear_mylab_0.1_0.1" + zoo = ModelZooEmbedding() + zoo.set_model_id(model_id=model_id) + test_trainer = HelperTrainerBase(zoo=zoo) + test_trainer.test_for_fatal(cls=TrainModelEmbedding) + + +def test_for_fatal(): + model_id = "celltype_human-lung_mlp_mylab_0.0.1_0.1" + zoo = ModelZooCelltype() + zoo.set_model_id(model_id=model_id) + test_trainer = HelperTrainerBase(zoo=zoo) + test_trainer.test_for_fatal(cls=TrainModelCelltype) diff --git a/sfaira/unit_tests/utils.py b/sfaira/unit_tests/utils.py new file mode 100644 index 000000000..32d3d03fb --- /dev/null +++ b/sfaira/unit_tests/utils.py @@ -0,0 +1,53 @@ +import anndata +import numpy as np +import os + +from sfaira.data import Universe + + +def simulate_anndata(genes, n_obs, targets=None, assays=None) -> anndata.AnnData: + """ + Simulate basic data example. + + :return: AnnData instance. + """ + data = anndata.AnnData( + np.random.randint(low=0, high=100, size=(n_obs, len(genes))).astype(np.float32) + ) + if assays is not None: + data.obs["assay_sc"] = [ + assays[np.random.randint(0, len(targets))] + for i in range(n_obs) + ] + if targets is not None: + data.obs["cell_ontology_class"] = [ + targets[np.random.randint(0, len(targets))] + for i in range(n_obs) + ] + data.var["ensembl"] = genes + return data + + +def cached_store_writing(dir_data, dir_meta, assembly) -> os.PathLike: + """ + Writes a store if it does not already exist. + + :return: Path to store. + """ + store_path = os.path.join(dir_data, "store") + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key="organism", values=["mouse"]) + ds.subset(key="organ", values=["lung"]) + # Only load files that are not already in cache. + anticipated_files = np.unique([ + v.doi for k, v in ds.datasets.items() + if not os.path.exists(os.path.join(store_path, v.doi_cleaned_id + ".h5ad")) + ]).tolist() + ds.subset(key="doi", values=anticipated_files) + ds.load(allow_caching=True) + ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": assembly}, + subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=True, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) + ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=False) + return store_path From 914920c64ba0cb615bdcddabeffd1af9f2432efd Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 30 Apr 2021 10:48:00 +0200 Subject: [PATCH 137/161] fixed config scripts (#257) --- sfaira/data/utils_scripts/create_anatomical_configs_store.py | 2 +- sfaira/data/utils_scripts/create_target_universes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sfaira/data/utils_scripts/create_anatomical_configs_store.py b/sfaira/data/utils_scripts/create_anatomical_configs_store.py index 94ca0e507..75b157484 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs_store.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs_store.py @@ -90,4 +90,4 @@ store = DistributedStore(cache_path=store_path) store.subset(attr_key="organism", values=[organism]) store.subset(attr_key="organ", values=[organ]) - store.write_config(os.path.join(config_path, f"config_{clean_string(organism)}_{clean_string(organ)}.csv")) + store.write_config(os.path.join(config_path, f"config_{clean_string(organism)}_{clean_string(organ)}")) diff --git a/sfaira/data/utils_scripts/create_target_universes.py b/sfaira/data/utils_scripts/create_target_universes.py index cae6643aa..7cb158cbf 100644 --- a/sfaira/data/utils_scripts/create_target_universes.py +++ b/sfaira/data/utils_scripts/create_target_universes.py @@ -20,7 +20,7 @@ fn = os.path.join(config_path, f) if os.path.isfile(fn): # only files # Narrow down to supported file types: - if f.split(".")[-1] == "csv" and f.startswith("config_"): + if f.split(".")[-1] == "pickle" and f.startswith("config_"): print(f"Writing {f}") organism = f.split("_")[1] organ = f.split("_")[2] From e2c27ff20f9f0f94a243418814cef3b17cba994d Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Fri, 30 Apr 2021 11:01:23 +0200 Subject: [PATCH 138/161] fix filename in config load (#258) --- sfaira/data/base/distributed_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index 7d6796e63..abb4d5b2c 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -350,7 +350,7 @@ def load_config(self, fn: Union[str, os.PathLike]): :param fn: Output file without file type extension. """ - with open(fn + '.pickle', 'rb') as f: + with open(fn, 'rb') as f: self.indices = pickle.load(f) # Subset to described data sets: for x in self.indices.keys(): From 1353efc49ff1ffdb62fc0481a9bc554057598343 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 30 Apr 2021 20:23:44 +0200 Subject: [PATCH 139/161] Store fix (#259) * reduced indices in store to non empty indices * fixed unit test * extended store class documentation * added additional edges to UBERON wrapper class * updated DAG break warning in ontology classes * made empty argument error in get_effective_leaves more verbose * warning instead of error if empty sets are passed to target universe writing * removed empty configs * caught non annotated data sets * fix out path in target writing and some syntax * enforced subsetting by selected indices within data set in target universe writing script Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> --- sfaira/data/base/distributed_store.py | 46 +++++++++++++------ .../create_anatomical_configs_store.py | 2 - .../utils_scripts/create_target_universes.py | 39 +++++++++------- sfaira/unit_tests/data/test_store.py | 4 +- sfaira/unit_tests/versions/test_ontologies.py | 4 ++ sfaira/versions/metadata/base.py | 14 +++++- 6 files changed, 74 insertions(+), 35 deletions(-) diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index abb4d5b2c..f86237281 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -17,8 +17,24 @@ class DistributedStore: Data set group class tailored to data access requirements common in high-performance computing (HPC). This class does not inherit from DatasetGroup because it entirely relies on the cached objects. + This class is centred around .adatas and .indices. + + .adatas is a dictionary (by id) of backed anndata instances that point to individual h5ads. + This dictionary is intialised with all h5ads in the store. + As the store is subsetted, key-value pairs are deleted from this dictionary. + + .indices have keys that correspond to keys in .adatas and contain index vectors of observations in the anndata + instances in .adatas which are still kept. + These index vectors are a form of lazy slicing that does not require data set loading or re-writing. + As the store is subsetted, key-value pairs are deleted from this dictionary if no observations from a given key + match the subsetting. + If a subset of observations from a key matches the subsetting operation, the index set in the corresponding value is + reduced. + All data retrievel operations work on .indices: Generators run over these indices when retrieving observations for + example. """ + adatas: Dict[str, anndata.AnnData] indices: Dict[str, np.ndarray] def __init__(self, cache_path: Union[str, os.PathLike, None] = None): @@ -130,7 +146,7 @@ def generator( var_idx = None def generator() -> tuple: - global_index_set = dict(list(zip(list(self.adatas.keys()), self.indices_global))) + global_index_set = self.indices_global for i, (k, v) in enumerate(self.adatas.items()): # Define batch partitions: # Get subset of target indices that fall into this data set. @@ -186,7 +202,7 @@ def celltypes_universe(self) -> CelltypeUniverse: ) return self._celltype_universe - def _get_subset_idx(self, attr_key, values: Union[str, List[str]]): + def _get_subset_idx(self, attr_key, values: Union[str, List[str]]) -> dict: """ Get indices of subset list of adata objects based on cell-wise properties. @@ -250,11 +266,15 @@ def get_subset_idx(adata, k, dataset): return idx indices = {} - for k, v in self.adatas.items(): - idx_old = self.indices[k].tolist() - idx_new = get_subset_idx(adata=v, k=attr_key, dataset=k) + for k, v in self.indices.items(): + idx_old = v.tolist() + if k not in self.adatas.keys(): + raise ValueError(f"data set {k} queried by indices does not exist in store (.adatas)") + idx_new = get_subset_idx(adata=self.adatas[k], k=attr_key, dataset=k) # Keep intersection of old and new hits. - indices[k] = np.asarray(list(set(idx_old).intersection(set(idx_new))), dtype="int32") + idx_new = list(set(idx_old).intersection(set(idx_new))) + if len(idx_new) > 0: + indices[k] = np.asarray(idx_new, dtype="int32") return indices def subset(self, attr_key, values: Union[str, List[str]]): @@ -281,8 +301,8 @@ def subset(self, attr_key, values: Union[str, List[str]]): """ self.indices = self._get_subset_idx(attr_key=attr_key, values=values) - for k, v in self.indices.items(): - if v.shape[0] == 0: # No observations (cells) left. + for k in list(self.adatas.keys()): + if k not in self.indices or self.indices[k].shape[0] == 0: # No observations (cells) left. del self.adatas[k] def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]) -> np.ndarray: @@ -319,16 +339,16 @@ def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]) -> np return np.asarray(idx) @property - def indices_global(self): + def indices_global(self) -> dict: """ Increasing indices across data sets which can be concatenated into a single index vector with unique entries for cells. """ counter = 0 - indices = [] - for k, v in self.adatas.items(): - indices.append(np.arange(counter, counter + v.n_obs)) - counter += v.n_obs + indices = {} + for k, v in self.indices.items(): + indices[k] = np.arange(counter, counter + len(v)) + counter += len(v) return indices def write_config(self, fn: Union[str, os.PathLike]): diff --git a/sfaira/data/utils_scripts/create_anatomical_configs_store.py b/sfaira/data/utils_scripts/create_anatomical_configs_store.py index 75b157484..925a1e23b 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs_store.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs_store.py @@ -23,7 +23,6 @@ "bone marrow", "brain", "chorionic villus", - "diaphragm", "esophagus", "eye", "gall bladder", @@ -45,7 +44,6 @@ "spleen", "stomach", "testis", - "tongue", "thymus", "thyroid gland", "trachea", diff --git a/sfaira/data/utils_scripts/create_target_universes.py b/sfaira/data/utils_scripts/create_target_universes.py index 7cb158cbf..d75c0c80a 100644 --- a/sfaira/data/utils_scripts/create_target_universes.py +++ b/sfaira/data/utils_scripts/create_target_universes.py @@ -6,8 +6,6 @@ # Any data loader here to extract path: from sfaira.data import DistributedStore -print(tf.__version__) - # Set global variables. print("sys.argv", sys.argv) @@ -16,25 +14,34 @@ config_path = str(sys.argv[2]) out_path = str(sys.argv[3]) +col_name_annot = "cell_ontology_class" + for f in os.listdir(config_path): fn = os.path.join(config_path, f) if os.path.isfile(fn): # only files # Narrow down to supported file types: if f.split(".")[-1] == "pickle" and f.startswith("config_"): - print(f"Writing {f}") + print(f"Writing target universe for {f}") organism = f.split("_")[1] - organ = f.split("_")[2] + organ = f.split("_")[2].split(".")[0] store = DistributedStore(cache_path=store_path) store.load_config(fn=fn) - celltypes_found = {} - for adata in store.adatas: - celltypes_found = celltypes_found.union(set(adata.obs["cell_ontology_class"].values)) - celltypes_found = np.sort(list(celltypes_found - { - store._adata_ids_sfaira.unknown_celltype_identifier, - store._adata_ids_sfaira.not_a_cell_celltype_identifier - })).tolist() - celltypes_found = store.celltypes_universe.onto_cl.get_effective_leaves(x=celltypes_found) - store.celltypes_universe.write_target_universe( - fn=os.path.join(config_path, f"targets_{organism}_{organ}.csv"), - x=celltypes_found, - ) + celltypes_found = set([]) + for k, idx in store.indices.items(): + if col_name_annot not in store.adatas[k].obs.columns: + print(f"WARNING: annotation column {col_name_annot} not found in {k}, skipping.") + else: + if len(idx) > 0: + celltypes_found = celltypes_found.union( + set(store.adatas[k].obs[col_name_annot].values[idx].tolist()) + ) + celltypes_found = sorted(list(celltypes_found - {store._adata_ids_sfaira.unknown_celltype_identifier, + store._adata_ids_sfaira.not_a_cell_celltype_identifier})) + if len(celltypes_found) == 0: + print(f"WARNING: No cells found for {organism} {organ}, skipping.") + else: + celltypes_found = store.celltypes_universe.onto_cl.get_effective_leaves(x=celltypes_found) + store.celltypes_universe.write_target_universe( + fn=os.path.join(out_path, f"targets_{organism}_{organ}.csv"), + x=celltypes_found, + ) diff --git a/sfaira/unit_tests/data/test_store.py b/sfaira/unit_tests/data/test_store.py index b89a29d22..d0befe3c0 100644 --- a/sfaira/unit_tests/data/test_store.py +++ b/sfaira/unit_tests/data/test_store.py @@ -25,12 +25,12 @@ def test_config(): Test that data set config files can be set, written and recovered. """ store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) - config_path = os.path.join(store_path, "lung") + config_path = os.path.join(store_path, "config_lung") store = DistributedStore(cache_path=store_path) store.subset(attr_key="assay_sc", values=["10x sequencing"]) store.write_config(fn=config_path) store2 = DistributedStore(cache_path=store_path) - store2.load_config(fn=config_path) + store2.load_config(fn=config_path + ".pickle") assert np.all(store.indices.keys() == store2.indices.keys()) assert np.all([np.all(store.indices[k] == store2.indices[k]) for k in store.indices.keys()]) diff --git a/sfaira/unit_tests/versions/test_ontologies.py b/sfaira/unit_tests/versions/test_ontologies.py index 8eccb2903..9d568919e 100644 --- a/sfaira/unit_tests/versions/test_ontologies.py +++ b/sfaira/unit_tests/versions/test_ontologies.py @@ -164,3 +164,7 @@ def test_uberon_subsetting(): assert ou.is_a(query="lobar bronchus", reference="lung") assert ou.is_a(query="lobar bronchus", reference="lobar bronchus") assert not ou.is_a(query="lung", reference="lobar bronchus") + + assert ou.is_a(query="adipose tissue of abdominal region", reference="adipose tissue") + assert ou.is_a(query="adipose tissue", reference="adipose tissue") + assert not ou.is_a(query="adipose tissue", reference="adipose tissue of abdominal region") diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 15dd7edaf..4a9aab37a 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -116,7 +116,7 @@ class OntologyHierarchical(Ontology, abc.ABC): def _check_graph(self): if not networkx.is_directed_acyclic_graph(self.graph): - warnings.warn("DAG was broken") + warnings.warn(f"DAG was broken in {type(self)}") def __validate_node_ids(self, x: Union[str, List[str]]): if isinstance(x, str): @@ -229,6 +229,13 @@ def get_effective_leaves(self, x: List[str]) -> List[str]: :param x: Observed node IDs. :return: Effective leaves. """ + if isinstance(x, str): + x = [x] + if isinstance(x, np.ndarray): + x = x.tolist() + assert isinstance(x, list), "supply either list or str to get_effective_leaves" + if len(x) == 0: + raise ValueError("x was empty list, get_effective_leaves cannot be called on empty list") x = np.unique(x).tolist() x = self.convert_to_id(x=x) leaves = [] @@ -607,7 +614,7 @@ def __init__( 'innervated_by', 'innervates', 'intersects_midsagittal_plane_of', - 'is_a', + 'is_a', # term DAG -> include because it connect conceptual tissue groups 'layer_part_of', 'located_in', # anatomic DAG -> include because it reflects the anatomic coarseness / hierarchy 'location_of', @@ -648,6 +655,9 @@ def __init__( assert x[2] in edge_types, x if x[2] not in [ "develops_from", + 'develops_from_part_of', + 'directly_develops_from', + "is_a", "located_in", "part_of", ]: From a40fd0a91e753025d9ee4a668b9dc09df19df208 Mon Sep 17 00:00:00 2001 From: le-ander <20015434+le-ander@users.noreply.github.com> Date: Wed, 5 May 2021 15:53:51 +0200 Subject: [PATCH 140/161] fix download function --- sfaira/data/base/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index cc335a10b..2ae3d9330 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -301,7 +301,7 @@ def download(self, **kwargs): if not os.path.exists(os.path.join(self.data_dir_base, self.directory_formatted_doi)): os.makedirs(os.path.join(self.data_dir_base, self.directory_formatted_doi)) - urls = self.download_url_data[0][0] + self.download_url_meta[0][0] + urls = self.download_url_data[0] + self.download_url_meta[0] for url in urls: if url is None: From e07f850198a665aeeffa8fac5534ceea48919035 Mon Sep 17 00:00:00 2001 From: Leander <20015434+le-ander@users.noreply.github.com> Date: Thu, 6 May 2021 13:24:21 +0200 Subject: [PATCH 141/161] Add polioudakis data and small fixes (#261) * add polioudakis data * fix download function * add manual download for polioudakis * fix polioudakis * fix maps writing * update polioudakis * add celltype ids * fix cli print * update manual downlaod workflow * fix manual download * fix download url polioudakis --- sfaira/commands/test_dataloader.py | 2 +- sfaira/data/base/dataset.py | 16 ++++-- sfaira/data/base/dataset_group.py | 2 +- .../d10_1016_j_neuron_2019_06_011/__init__.py | 1 + ...uman_brain_2019_dropseq_polioudakis_001.py | 52 +++++++++++++++++++ ...man_brain_2019_dropseq_polioudakis_001.tsv | 17 ++++++ ...an_brain_2019_dropseq_polioudakis_001.yaml | 51 ++++++++++++++++++ 7 files changed, 136 insertions(+), 5 deletions(-) create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/__init__.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.py create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.tsv create mode 100644 sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.yaml diff --git a/sfaira/commands/test_dataloader.py b/sfaira/commands/test_dataloader.py index 6fa796102..16f9f0a58 100644 --- a/sfaira/commands/test_dataloader.py +++ b/sfaira/commands/test_dataloader.py @@ -39,7 +39,7 @@ def _run_unittest(self): """ print('[bold blue]Conflicts are not automatically resolved.') print('[bold blue]Please go back to [bold]https://www.ebi.ac.uk/ols/ontologies/cl[blue] for every mismatch or conflicts ' - 'and add the correct cell ontology class name into the .csv "target" column.') + 'and add the correct cell ontology class name into the .tsv "target" column.') os.chdir(f'{self.path}/sfaira/unit_tests/data_contribution') diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 2ae3d9330..7070a155c 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -306,6 +306,7 @@ def download(self, **kwargs): for url in urls: if url is None: continue + # Special case for data that is not publically available if url.split(",")[0] == 'private': if "," in url: fn = ','.join(url.split(',')[1:]) @@ -318,14 +319,23 @@ def download(self, **kwargs): else: warnings.warn(f"A file for dataset {self.id} is not available for automatic download, please" f"manually copy the associated file to the following location: {self.data_dir}") - + # Special case for data from the synapse portal elif url.split(",")[0].startswith('syn'): fn = ",".join(url.split(",")[1:]) if os.path.isfile(os.path.join(self.data_dir, fn)): print(f"File {fn} already found on disk, skipping download.") else: self._download_synapse(url.split(",")[0], fn, **kwargs) - + # Special case for public data that is labelled as not automatically downloadable + elif url.split(",")[0] == 'manual': + u = ",".join(url.split(",")[2:]) + fn = url.split(",")[1] + if os.path.isfile(os.path.join(self.data_dir, fn)): + print(f"File {fn} already found on disk, skipping download.") + else: + print(f"Data file {fn} for dataset {self.id} cannot be retrieved automatically. " + f"Please download it from {u} and copy to {os.path.join(self.data_dir, fn)}") + # All other cases else: url = urllib.parse.unquote(url) try: @@ -1058,7 +1068,7 @@ def write_ontology_class_map( if not self.annotated: warnings.warn(f"attempted to write ontology classmaps for data set {self.id} without annotation") else: - labels_original = np.sort(np.unique(self.adata.obs[self._adata_ids.cell_types_original].values)) + labels_original = np.sort(np.unique(self.adata.obs[self.cell_types_original_obs_key].values)) tab = self.celltypes_universe.prepare_celltype_map_tab( source=labels_original, match_only=False, diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 23bf4b58b..915e7c835 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -291,7 +291,7 @@ def write_ontology_class_map( for k, v in self.datasets.items(): if v.annotated: labels_original = np.sort(np.unique(np.concatenate([ - v.adata.obs[self._adata_ids.cell_types_original].values + v.adata.obs[v.cell_types_original_obs_key].values ]))) tab.append(v.celltypes_universe.prepare_celltype_map_tab( source=labels_original, diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/__init__.py b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/__init__.py new file mode 100644 index 000000000..b1d5b2c2b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/__init__.py @@ -0,0 +1 @@ +FILE_PATH = __file__ diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.py new file mode 100644 index 000000000..e2d8b8435 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.py @@ -0,0 +1,52 @@ +import os +import pandas +import shutil +import zipfile + + +def load(data_dir, **kwargs): + age_dict = { + 17: "17th week post-fertilization human stage", + 18: "18th week post-fertilization human stage", + } + ct_dict = { + "End": "Endothelial", + "ExDp1": "Excitatory deep layer 1", + "ExDp2": "Excitatory deep layer 2", + "ExM": "Maturing excitatory", + "ExM-U": "Maturing excitatory upper enriched", + "ExN": "Migrating excitatory", + "IP": "IP", + "InCGE": "Interneuron CGE", + "InMGE": "Interneuron MGE", + "Mic": "Microglia", + "OPC": "OPC", + "Per": "Pericyte", + "PgG2M": "Cycling Progenitors (G2/M phase)", + "PgS": "Cycling Progenitors (S phase)", + "oRG": "Outer Radial Glia", + "vRG": "Ventricular Radial Glia", + } + + import anndata2ri + from rpy2.robjects import r + fn = os.path.join(data_dir, "sc_dev_cortex_geschwind.zip") + fn_tmp = os.path.join(os.path.expanduser("~"), "sfaira_tmp") + if not os.path.exists(fn_tmp): + os.makedirs(fn_tmp) + with zipfile.ZipFile(fn, 'r') as zip_ref: + zip_ref.extractall(fn_tmp) + anndata2ri.activate() # TODO: remove global activation of anndata2ri and use localconverter once it's fixed + adata = r( + f"library(Seurat)\n" + f"load('{os.path.join(fn_tmp, 'raw_counts_mat.rdata')}')\n" + f"new_obj = CreateSeuratObject(raw_counts_mat)\n" + f"as.SingleCellExperiment(new_obj)\n" + ) + obs = pandas.read_csv(os.path.join(fn_tmp, "cell_metadata.csv"), index_col=0) + adata = adata[obs.index.tolist()].copy() + adata.obs = obs + shutil.rmtree(fn_tmp) + adata.obs['devstage'] = [age_dict[i] for i in adata.obs['Gestation_week']] + adata.obs['celltype'] = [ct_dict[i] for i in adata.obs['Cluster']] + return adata diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.tsv b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.tsv new file mode 100644 index 000000000..bcdfb356b --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.tsv @@ -0,0 +1,17 @@ +source target target_id +Endothelial endothelial cell CL:0000115 +Excitatory deep layer 1 excitatory neuron CL:0008030 +Excitatory deep layer 2 excitatory neuron CL:0008030 +Maturing excitatory excitatory neuron CL:0008030 +Maturing excitatory upper enriched excitatory neuron CL:0008030 +Migrating excitatory excitatory neuron CL:0008030 +IP neural progenitor cell CL:0011020 +Interneuron CGE interneuron CL:0000099 +Interneuron MGE interneuron CL:0000099 +Microglia microglial cell CL:0000129 +OPC oligodendrocyte precursor cell CL:0002453 +Pericyte pericyte cell CL:0000669 +Cycling Progenitors (G2/M phase) neural progenitor cell CL:0011020 +Cycling Progenitors (S phase) neural progenitor cell CL:0011020 +Outer Radial Glia radial glial cell CL:0000681 +Ventricular Radial Glia radial glial cell CL:0000681 diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.yaml new file mode 100644 index 000000000..667f9b4d2 --- /dev/null +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_neuron_2019_06_011/human_brain_2019_dropseq_polioudakis_001.yaml @@ -0,0 +1,51 @@ +dataset_structure: + dataset_index: 1 + sample_fns: +dataset_wise: + author: + - "Polioudakis" + doi: + - "10.1016/j.neuron.2019.06.011" + download_url_data: + - "manual,sc_dev_cortex_geschwind.zip,http://solo.bmap.ucla.edu/shiny/webapp" + download_url_meta: + normalization: "raw" + year: 2019 +dataset_or_observation_wise: + assay_sc: "Drop-seq" + assay_sc_obs_key: + assay_differentiation: + assay_differentiation_obs_key: + assay_type_differentiation: + assay_type_differentiation_obs_key: + bio_sample: + bio_sample_obs_key: "Layer" + cell_line: + cell_line_obs_key: + development_stage: + development_stage_obs_key: "devstage" + disease: "healthy" + disease_obs_key: + ethnicity: + ethnicity_obs_key: + individual: + individual_obs_key: "Donor" + organ: "brain" + organ_obs_key: + organism: "human" + organism_obs_key: + sample_source: "primary_tissue" + sample_source_obs_key: + sex: + sex_obs_key: + state_exact: + state_exact_obs_key: + tech_sample: + tech_sample_obs_key: "Index" +observation_wise: + cell_types_original_obs_key: "celltype" +feature_wise: + gene_id_ensembl_var_key: + gene_id_symbols_var_key: "index" +meta: + version: "1.0" From 80641f7df09fc4d7c3f5c26a80bdd41a5eee2257 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 11 May 2021 16:30:09 +0200 Subject: [PATCH 142/161] renamed sfaira lint to validate (#263) * renamed sfaira lint to validate Signed-off-by: zethson * remove clean from instructions Signed-off-by: zethson --- docs/adding_datasets.rst | 27 +++++++------------ sfaira/cli.py | 8 +++--- ...t_dataloader.py => validate_dataloader.py} | 16 +++++------ 3 files changed, 21 insertions(+), 30 deletions(-) rename sfaira/commands/{lint_dataloader.py => validate_dataloader.py} (90%) diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index daeed9f91..c5c5f13f3 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -76,26 +76,17 @@ by `_`, below referred to as `--DOI-folder--`: 6. Write load function. Fill load function in `sfaira/data/dataloaders/loaders/--DOI-folder--NA_NA_2021_NA_Einstein_001.py`. -7. Clean the dataloader with a supervicial check (lint). - This step is optional. - -.. code-block:: - - # make sure you are in the top-level sfaira directory from step 1 - sfaira clean-dataloader -.. - -8. Validate the dataloader with the CLI. - Next validate the integrity of your dataloader content with ``sfaira lint-dataloader ``. +7. Validate the dataloader with the CLI. + Next validate the integrity of your dataloader content with ``sfaira validate-dataloader ``. All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information. .. code-block:: # make sure you are in the top-level sfaira directory from step 1 - sfaira lint-dataloader `` + sfaira validate-dataloader `` .. -9. Create cell type annotation if your data set is annotated. +8. Create cell type annotation if your data set is annotated. Note that this will abort with error if there are bugs in your data loader. .. code-block:: @@ -104,7 +95,7 @@ by `_`, below referred to as `--DOI-folder--`: # sfaira annotate `` TODO .. -10. Mitigate automated cell type maps. +9. Mitigate automated cell type maps. Sfaira creates a cell type mapping `.tsv` file in the directory in which your data loaders is located if you indicated that annotation is present by filling `cell_types_original_obs_key`. This file is: `NA_NA_2021_NA_Einstein_001.tsv`. @@ -121,16 +112,16 @@ by `_`, below referred to as `--DOI-folder--`: Note that you do not have to include the non-human-readable IDs here as they are added later in a fully automated fashion. -11. Test data loader. +10. Test data loader. Note that this will abort with error if there are bugs in your data loader. .. code-block:: # make sure you are in the top-level sfaira directory from step 1 - # sfaira test `` TODO + # sfaira test-dataloader `` TODO .. -12. Make loader public. +11. Make loader public. You can contribute the data loader to public sfaira as code through a pull request. Note that you can also just keep the data loader in your local installation or keep it in sfaira_extensions if you do not want to make it public. @@ -448,7 +439,7 @@ Now simply fill in all missing properties in your dataloader scripts and yaml fi When done optionally run ``sfaira clean-dataloader `` on the just filled out dataloader yaml file. All unused attributes will be removed. -Next validate the integrity of your dataloader content with ``sfaira lint-dataloader ``. +Next validate the integrity of your dataloader content with ``sfaira validate-dataloader ``. All tests must pass! If any of the tests fail please revisit your dataloader and add the missing information. Finally, copy your dataloader into the ``sfaira/dataloaders/loaders/`` folder. diff --git a/sfaira/cli.py b/sfaira/cli.py index 596b61d0f..547768616 100644 --- a/sfaira/cli.py +++ b/sfaira/cli.py @@ -10,7 +10,7 @@ from sfaira.commands.test_dataloader import DataloaderTester from sfaira.commands.clean_dataloader import DataloaderCleaner -from sfaira.commands.lint_dataloader import DataloaderLinter +from sfaira.commands.validate_dataloader import DataloaderValidator import sfaira from sfaira.commands.create_dataloader import DataloaderCreator @@ -92,14 +92,14 @@ def clean_dataloader(path) -> None: @sfaira_cli.command() @click.argument('path', type=click.Path(exists=True)) -def lint_dataloader(path) -> None: +def validate_dataloader(path) -> None: """ Verifies the dataloader against sfaira's requirements. PATH to the dataloader script. """ - dataloader_linter = DataloaderLinter(path) - dataloader_linter.lint() + dataloader_validator = DataloaderValidator(path) + dataloader_validator.validate() @sfaira_cli.command() diff --git a/sfaira/commands/lint_dataloader.py b/sfaira/commands/validate_dataloader.py similarity index 90% rename from sfaira/commands/lint_dataloader.py rename to sfaira/commands/validate_dataloader.py index 88d343eef..54c1af116 100644 --- a/sfaira/commands/lint_dataloader.py +++ b/sfaira/commands/validate_dataloader.py @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) -class DataloaderLinter: +class DataloaderValidator: def __init__(self, path='.'): self.path: str = path @@ -18,11 +18,11 @@ def __init__(self, path='.'): self.passed: dict = {} self.warned: dict = {} self.failed: dict = {} - self.linting_functions: list = [ - '_lint_required_attributes', + self.validation_functions: list = [ + '_validate_required_attributes', ] - def lint(self) -> None: + def validate(self) -> None: """ Statically verifies a yaml dataloader file against a predefined set of rules. Every rule is a function defined in this class, which must be part of this class' linting_functions. @@ -34,15 +34,15 @@ def lint(self) -> None: "[bold yellow]{task.completed} of {task.total}[reset] [bold green]{task.fields[func_name]}") with progress: lint_progress = progress.add_task("Running lint checks", - total=len(self.linting_functions), - func_name=self.linting_functions) - for fun_name in self.linting_functions: + total=len(self.validation_functions), + func_name=self.validation_functions) + for fun_name in self.validation_functions: progress.update(lint_progress, advance=1, func_name=fun_name) getattr(self, fun_name)() self._print_results() - def _lint_required_attributes(self): + def _validate_required_attributes(self): """ Verifies that all required attributes for every dataloader are present. """ From f89eddb780027bbcbf972eae401ec98da2bb1cf2 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 11 May 2021 19:25:09 +0200 Subject: [PATCH 143/161] add sfaira dataloader schema (#265) Signed-off-by: zethson --- docs/adding_datasets.rst | 8 ++ docs/api/sfaira.data.DatasetBase.rst | 79 +++++++++++-------- docs/api/sfaira.data.DatasetInteractive.rst | 79 +++++++++++-------- docs/api/sfaira.data.DatasetSuperGroup.rst | 23 +++++- docs/api/sfaira.train.TrainModelCelltype.rst | 12 +-- docs/api/sfaira.train.TrainModelEmbedding.rst | 12 +-- 6 files changed, 127 insertions(+), 86 deletions(-) diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index c5c5f13f3..a5099ce39 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -3,6 +3,14 @@ Adding data sets Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. This process requires a couple of steps as outlined in the following sections. + + +.. figure:: https://user-images.githubusercontent.com/21954664/117845386-c6744a00-b280-11eb-9d86-8c47132a3949.png + :alt: sfaira adding datasets + + Overview of contributing dataloaders to sfaira. First, ensure that your data is not yet available as a dataloader. + Next, create a dataloader and validate it. Afterwards, annotate it to finally test it. Finally, submit your dataloader to sfaira. + sfaira features an interactive way of creating, formatting and testing dataloaders through a command line interface (CLI). The common workflow using the CLI looks as follows: diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst index 8f83a7c06..b8b86e9c9 100644 --- a/docs/api/sfaira.data.DatasetBase.rst +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -15,14 +15,19 @@ ~DatasetBase.__init__ ~DatasetBase.clear + ~DatasetBase.collapse_counts + ~DatasetBase.download ~DatasetBase.load ~DatasetBase.load_meta ~DatasetBase.load_ontology_class_map - ~DatasetBase.load_tobacked ~DatasetBase.project_celltypes_to_ontology - ~DatasetBase.set_raw_full_group_object - ~DatasetBase.set_unkown_class_id + ~DatasetBase.set_dataset_id + ~DatasetBase.show_summary + ~DatasetBase.streamline_features + ~DatasetBase.streamline_metadata ~DatasetBase.subset_cells + ~DatasetBase.write_backed + ~DatasetBase.write_distributed_store ~DatasetBase.write_meta ~DatasetBase.write_ontology_class_map @@ -34,56 +39,68 @@ .. autosummary:: - ~DatasetBase.age + ~DatasetBase.additional_annotation_key ~DatasetBase.annotated - ~DatasetBase.assay_sc ~DatasetBase.assay_differentiation + ~DatasetBase.assay_differentiation_obs_key + ~DatasetBase.assay_sc + ~DatasetBase.assay_sc_obs_key ~DatasetBase.assay_type_differentiation + ~DatasetBase.assay_type_differentiation_obs_key ~DatasetBase.author + ~DatasetBase.bio_sample + ~DatasetBase.bio_sample_obs_key + ~DatasetBase.cache_fn + ~DatasetBase.cell_line + ~DatasetBase.cell_line_obs_key + ~DatasetBase.cell_ontology_map + ~DatasetBase.cell_types_original_obs_key + ~DatasetBase.cellontology_class_obs_key + ~DatasetBase.cellontology_id_obs_key + ~DatasetBase.celltypes_universe ~DatasetBase.citation - ~DatasetBase.dev_stage + ~DatasetBase.data_dir + ~DatasetBase.default_embedding + ~DatasetBase.development_stage + ~DatasetBase.development_stage_obs_key ~DatasetBase.directory_formatted_doi - ~DatasetBase.cell_line + ~DatasetBase.disease + ~DatasetBase.disease_obs_key ~DatasetBase.doi ~DatasetBase.doi_cleaned_id - ~DatasetBase.download - ~DatasetBase.download_meta + ~DatasetBase.doi_main + ~DatasetBase.download_url_data + ~DatasetBase.download_url_meta ~DatasetBase.ethnicity + ~DatasetBase.ethnicity_obs_key ~DatasetBase.fn_ontology_class_map_tsv - ~DatasetBase.healthy - ~DatasetBase.healthy_state_healthy + ~DatasetBase.gene_id_ensembl_var_key + ~DatasetBase.gene_id_symbols_var_key ~DatasetBase.id + ~DatasetBase.individual + ~DatasetBase.individual_obs_key ~DatasetBase.loaded ~DatasetBase.meta ~DatasetBase.meta_fn ~DatasetBase.ncells ~DatasetBase.normalization - ~DatasetBase.obs_key_age - ~DatasetBase.obs_key_assay_sc - ~DatasetBase.obs_key_assay_differentiation - ~DatasetBase.obs_key_assay_type_differentiation - ~DatasetBase.obs_key_cell_line - ~DatasetBase.obs_key_cellontology_id - ~DatasetBase.obs_key_cell_types_original - ~DatasetBase.obs_key_dev_stage - ~DatasetBase.obs_key_ethnicity - ~DatasetBase.obs_key_healthy - ~DatasetBase.obs_key_organ - ~DatasetBase.obs_key_organism - ~DatasetBase.obs_key_sample - ~DatasetBase.obs_key_sample_source - ~DatasetBase.obs_key_sex - ~DatasetBase.obs_key_state_exact ~DatasetBase.ontology_celltypes - ~DatasetBase.ontology_class_map + ~DatasetBase.ontology_organ ~DatasetBase.organ + ~DatasetBase.organ_obs_key ~DatasetBase.organism - ~DatasetBase.sex + ~DatasetBase.organism_obs_key + ~DatasetBase.primary_data ~DatasetBase.sample_source + ~DatasetBase.sample_source_obs_key + ~DatasetBase.sex + ~DatasetBase.sex_obs_key ~DatasetBase.source ~DatasetBase.state_exact - ~DatasetBase.gene_id_ensembl_var_key - ~DatasetBase.gene_id_symbols_var_key + ~DatasetBase.state_exact_obs_key + ~DatasetBase.tech_sample + ~DatasetBase.tech_sample_obs_key + ~DatasetBase.title ~DatasetBase.year \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst index 680496e67..daf81f984 100644 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -15,14 +15,19 @@ ~DatasetInteractive.__init__ ~DatasetInteractive.clear + ~DatasetInteractive.collapse_counts + ~DatasetInteractive.download ~DatasetInteractive.load ~DatasetInteractive.load_meta ~DatasetInteractive.load_ontology_class_map - ~DatasetInteractive.load_tobacked ~DatasetInteractive.project_celltypes_to_ontology - ~DatasetInteractive.set_raw_full_group_object - ~DatasetInteractive.set_unkown_class_id + ~DatasetInteractive.set_dataset_id + ~DatasetInteractive.show_summary + ~DatasetInteractive.streamline_features + ~DatasetInteractive.streamline_metadata ~DatasetInteractive.subset_cells + ~DatasetInteractive.write_backed + ~DatasetInteractive.write_distributed_store ~DatasetInteractive.write_meta ~DatasetInteractive.write_ontology_class_map @@ -34,56 +39,68 @@ .. autosummary:: - ~DatasetInteractive.age + ~DatasetInteractive.additional_annotation_key ~DatasetInteractive.annotated - ~DatasetInteractive.author - ~DatasetInteractive.assay_sc ~DatasetInteractive.assay_differentiation + ~DatasetInteractive.assay_differentiation_obs_key + ~DatasetInteractive.assay_sc + ~DatasetInteractive.assay_sc_obs_key ~DatasetInteractive.assay_type_differentiation + ~DatasetInteractive.assay_type_differentiation_obs_key + ~DatasetInteractive.author + ~DatasetInteractive.bio_sample + ~DatasetInteractive.bio_sample_obs_key + ~DatasetInteractive.cache_fn ~DatasetInteractive.cell_line + ~DatasetInteractive.cell_line_obs_key + ~DatasetInteractive.cell_ontology_map + ~DatasetInteractive.cell_types_original_obs_key + ~DatasetInteractive.cellontology_class_obs_key + ~DatasetInteractive.cellontology_id_obs_key + ~DatasetInteractive.celltypes_universe ~DatasetInteractive.citation - ~DatasetInteractive.dev_stage + ~DatasetInteractive.data_dir + ~DatasetInteractive.default_embedding + ~DatasetInteractive.development_stage + ~DatasetInteractive.development_stage_obs_key ~DatasetInteractive.directory_formatted_doi + ~DatasetInteractive.disease + ~DatasetInteractive.disease_obs_key ~DatasetInteractive.doi ~DatasetInteractive.doi_cleaned_id - ~DatasetInteractive.download - ~DatasetInteractive.download_meta + ~DatasetInteractive.doi_main + ~DatasetInteractive.download_url_data + ~DatasetInteractive.download_url_meta ~DatasetInteractive.ethnicity + ~DatasetInteractive.ethnicity_obs_key ~DatasetInteractive.fn_ontology_class_map_tsv - ~DatasetInteractive.healthy - ~DatasetInteractive.healthy_state_healthy + ~DatasetInteractive.gene_id_ensembl_var_key + ~DatasetInteractive.gene_id_symbols_var_key ~DatasetInteractive.id + ~DatasetInteractive.individual + ~DatasetInteractive.individual_obs_key ~DatasetInteractive.loaded ~DatasetInteractive.meta ~DatasetInteractive.meta_fn ~DatasetInteractive.ncells ~DatasetInteractive.normalization - ~DatasetInteractive.obs_key_age - ~DatasetInteractive.obs_key_assay_sc - ~DatasetInteractive.obs_key_assay_differentiation - ~DatasetInteractive.obs_key_assay_type_differentiation - ~DatasetInteractive.obs_key_cell_line - ~DatasetInteractive.obs_key_cellontology_id - ~DatasetInteractive.obs_key_cell_types_original - ~DatasetInteractive.obs_key_dev_stage - ~DatasetInteractive.obs_key_ethnicity - ~DatasetInteractive.obs_key_healthy - ~DatasetInteractive.obs_key_organ - ~DatasetInteractive.obs_key_organism - ~DatasetInteractive.obs_key_sample - ~DatasetInteractive.obs_key_sample_source - ~DatasetInteractive.obs_key_sex - ~DatasetInteractive.obs_key_state_exact ~DatasetInteractive.ontology_celltypes - ~DatasetInteractive.ontology_class_map + ~DatasetInteractive.ontology_organ ~DatasetInteractive.organ + ~DatasetInteractive.organ_obs_key ~DatasetInteractive.organism - ~DatasetInteractive.sex + ~DatasetInteractive.organism_obs_key + ~DatasetInteractive.primary_data ~DatasetInteractive.sample_source + ~DatasetInteractive.sample_source_obs_key + ~DatasetInteractive.sex + ~DatasetInteractive.sex_obs_key ~DatasetInteractive.source ~DatasetInteractive.state_exact - ~DatasetInteractive.gene_id_ensembl_var_key - ~DatasetInteractive.gene_id_symbols_var_key + ~DatasetInteractive.state_exact_obs_key + ~DatasetInteractive.tech_sample + ~DatasetInteractive.tech_sample_obs_key + ~DatasetInteractive.title ~DatasetInteractive.year \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst index e607ab926..5bfa255c8 100644 --- a/docs/api/sfaira.data.DatasetSuperGroup.rst +++ b/docs/api/sfaira.data.DatasetSuperGroup.rst @@ -14,23 +14,42 @@ .. autosummary:: ~DatasetSuperGroup.__init__ + ~DatasetSuperGroup.collapse_counts ~DatasetSuperGroup.delete_backed + ~DatasetSuperGroup.download ~DatasetSuperGroup.extend_dataset_groups ~DatasetSuperGroup.flatten ~DatasetSuperGroup.get_gc - ~DatasetSuperGroup.load_all - ~DatasetSuperGroup.load_all_tobacked + ~DatasetSuperGroup.load ~DatasetSuperGroup.load_cached_backed + ~DatasetSuperGroup.load_config ~DatasetSuperGroup.ncells ~DatasetSuperGroup.ncells_bydataset ~DatasetSuperGroup.ncells_bydataset_flat ~DatasetSuperGroup.project_celltypes_to_ontology + ~DatasetSuperGroup.remove_duplicates ~DatasetSuperGroup.set_dataset_groups + ~DatasetSuperGroup.show_summary + ~DatasetSuperGroup.streamline_features + ~DatasetSuperGroup.streamline_metadata ~DatasetSuperGroup.subset ~DatasetSuperGroup.subset_cells + ~DatasetSuperGroup.write_backed + ~DatasetSuperGroup.write_config + ~DatasetSuperGroup.write_distributed_store + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetSuperGroup.adata + ~DatasetSuperGroup.adata_ls + ~DatasetSuperGroup.additional_annotation_key + ~DatasetSuperGroup.datasets + ~DatasetSuperGroup.ids + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst index cde6646e2..79cf1acc3 100644 --- a/docs/api/sfaira.train.TrainModelCelltype.rst +++ b/docs/api/sfaira.train.TrainModelCelltype.rst @@ -14,23 +14,13 @@ .. autosummary:: ~TrainModelCelltype.__init__ - ~TrainModelCelltype.human_target ~TrainModelCelltype.init_estim - ~TrainModelCelltype.mouse_target + ~TrainModelCelltype.load_into_memory ~TrainModelCelltype.save ~TrainModelCelltype.save_eval - ~TrainModelCelltype.set_data - ~TrainModelCelltype.write_celltypes_tocsv_human - ~TrainModelCelltype.write_celltypes_tocsv_mouse - .. rubric:: Attributes - - .. autosummary:: - - ~TrainModelCelltype.adata - \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst index e7c1b6be8..fc3dea8d6 100644 --- a/docs/api/sfaira.train.TrainModelEmbedding.rst +++ b/docs/api/sfaira.train.TrainModelEmbedding.rst @@ -14,23 +14,13 @@ .. autosummary:: ~TrainModelEmbedding.__init__ - ~TrainModelEmbedding.human_target ~TrainModelEmbedding.init_estim - ~TrainModelEmbedding.mouse_target + ~TrainModelEmbedding.load_into_memory ~TrainModelEmbedding.save ~TrainModelEmbedding.save_eval - ~TrainModelEmbedding.set_data - ~TrainModelEmbedding.write_celltypes_tocsv_human - ~TrainModelEmbedding.write_celltypes_tocsv_mouse - .. rubric:: Attributes - - .. autosummary:: - - ~TrainModelEmbedding.adata - \ No newline at end of file From 4abb78db03506190f4e24a2aea8f7431c328e375 Mon Sep 17 00:00:00 2001 From: Abdul Moeed Date: Wed, 12 May 2021 23:10:14 +0200 Subject: [PATCH 144/161] Fix error due to undefined variable. (#266) --- sfaira/estimators/keras.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 0f504bc3c..297851c30 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -694,7 +694,7 @@ def generator(): x_sample = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() sf_sample = prepare_sf(x=x_sample)[0] y_sample = self.data.obs[self._adata_ids.cell_ontology_class][i] - yield (x_sample, sf_sample), (x, cell_to_class[y_sample]) + yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) else: x = self._prepare_data_matrix(idx=idx) sf = prepare_sf(x=x) From c23b4c82ec12dbef5418244c867b136663e341ee Mon Sep 17 00:00:00 2001 From: Abdul Moeed Date: Mon, 17 May 2021 11:52:35 +0200 Subject: [PATCH 145/161] Fix model_id docstring to suggest correct format. (#267) --- sfaira/interface/model_zoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index 89d3a69a6..c3f4bd61d 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -127,7 +127,7 @@ def model_id(self, x: str): """ Set model ID to a manually supplied ID. - :param x: Model ID to set. Format: pipeline_genome_organ_model_organisation_topology_version + :param x: Model ID to set. Format: modelclass_organism-organ-modeltype-topology-version_organisation """ assert len(x.split('_')) == 3, f'model_id {x} is invalid' self._model_id = x From 7372c4f83954a7922b180a063d60d4b559b6f9ee Mon Sep 17 00:00:00 2001 From: zethson Date: Fri, 28 May 2021 14:13:14 +0200 Subject: [PATCH 146/161] partially fix test Signed-off-by: zethson --- .../mouse_pancreas_2019_10xsequencing_thompson_x.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 5317bf313..b0c668d92 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -33,7 +33,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" - self.assay_sc = "10x sequencing" + self.assay_sc = "Drop-seq" self.state_exact = "diabetic" self.year = 2019 self.sample_source = "primary_tissue" From f03f8337b708f837cfdcc833ef7e0fd5b87f8f54 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 28 May 2021 16:21:51 +0200 Subject: [PATCH 147/161] Train fixes (#260) * fixes training-related bugs * added zarr-dask distributed acccess store store * fixed indexing bug in h5ad store * fixed store unit test and renamed unknown meta data field in sfaira streamlining * added optional custom cell ontology to estimator constructor (#254) * fix handling of new model_id format * clean up model zoo * fixed grid search summaries * fix celltype handling in UI * include topology files in zenodo deposition Co-authored-by: le-ander <20015434+le-ander@users.noreply.github.com> --- ...ira.train.SummarizeGridsearchEmbedding.rst | 2 +- docs/api/sfaira.ui.UserInterface.rst | 6 +- docs/conf.py | 2 +- requirements.txt | 2 + sfaira/consts/adata_fields.py | 28 +- sfaira/consts/meta_data_files.py | 4 +- sfaira/data/__init__.py | 2 +- sfaira/data/base/__init__.py | 3 +- sfaira/data/base/dataset.py | 67 +- sfaira/data/base/dataset_group.py | 66 +- sfaira/data/base/distributed_store.py | 771 +++++++++++++----- sfaira/data/base/io_dao.py | 133 +++ .../databases/cellxgene/cellxgene_loader.py | 4 +- ..._pancreas_2019_10xsequencing_thompson_x.py | 3 +- .../create_anatomical_configs.py | 102 --- .../create_anatomical_configs_store.py | 6 +- sfaira/data/utils_scripts/create_meta.py | 2 - .../utils_scripts/create_target_universes.py | 17 +- sfaira/data/utils_scripts/write_store.py | 25 +- sfaira/estimators/keras.py | 436 ++++++---- sfaira/interface/model_zoo.py | 240 +++--- sfaira/interface/user_interface.py | 327 ++++---- sfaira/models/celltype/marker.py | 6 +- sfaira/models/celltype/mlp.py | 3 +- sfaira/train/summaries.py | 474 ++++++----- sfaira/train/train_model.py | 130 +-- sfaira/unit_tests/data/test_dataset.py | 4 +- sfaira/unit_tests/data/test_store.py | 144 +++- sfaira/unit_tests/estimators/custom.obo | 16 + .../unit_tests/estimators/test_estimator.py | 300 +++++-- sfaira/unit_tests/trainer/test_trainer.py | 47 +- sfaira/unit_tests/utils.py | 73 +- sfaira/versions/genomes.py | 28 +- sfaira/versions/metadata/__init__.py | 2 +- sfaira/versions/metadata/base.py | 194 +++-- sfaira/versions/metadata/universe.py | 2 +- sfaira/versions/topologies/class_interface.py | 9 +- .../versions/topologies/human/embedding/ae.py | 21 +- .../topologies/human/embedding/linear.py | 7 +- .../topologies/human/embedding/nmf.py | 14 +- .../topologies/human/embedding/vae.py | 7 +- .../mouse/celltype/celltypemarker.py | 2 +- .../topologies/mouse/celltype/celltypemlp.py | 16 +- .../versions/topologies/mouse/embedding/ae.py | 23 +- .../topologies/mouse/embedding/linear.py | 6 +- .../topologies/mouse/embedding/nmf.py | 6 +- .../topologies/mouse/embedding/vae.py | 8 +- .../topologies/mouse/embedding/vaeiaf.py | 4 +- .../topologies/mouse/embedding/vaevamp.py | 4 +- 49 files changed, 2433 insertions(+), 1365 deletions(-) create mode 100644 sfaira/data/base/io_dao.py delete mode 100644 sfaira/data/utils_scripts/create_anatomical_configs.py create mode 100644 sfaira/unit_tests/estimators/custom.obo diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst index 5ef0ddf33..5edcfac79 100644 --- a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst +++ b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst @@ -15,7 +15,7 @@ ~SummarizeGridsearchEmbedding.__init__ ~SummarizeGridsearchEmbedding.best_model_by_partition - ~SummarizeGridsearchEmbedding.best_model_embedding + ~SummarizeGridsearchEmbedding.best_model_embedding_latentspace ~SummarizeGridsearchEmbedding.create_summary_tab ~SummarizeGridsearchEmbedding.get_best_model_ids ~SummarizeGridsearchEmbedding.get_gradients_by_celltype diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst index d14d56879..6b8ba31b2 100644 --- a/docs/api/sfaira.ui.UserInterface.rst +++ b/docs/api/sfaira.ui.UserInterface.rst @@ -15,13 +15,13 @@ ~ui.UserInterface.__init__ ~ui.UserInterface.celltype_summary - ~ui.UserInterface.compute_all + ~ui.UserInterface.predict_all ~ui.UserInterface.compute_all_kipoi - ~ui.UserInterface.compute_celltype + ~ui.UserInterface.predict_celltypes ~ui.UserInterface.compute_celltype_kipoi ~ui.UserInterface.compute_denoised_expression ~ui.UserInterface.compute_denoised_expression_kipoi - ~ui.UserInterface.compute_embedding + ~ui.UserInterface.predict_embedding ~ui.UserInterface.compute_embedding_kipoi ~ui.UserInterface.deposit_zenodo ~ui.UserInterface.filter_cells diff --git a/docs/conf.py b/docs/conf.py index 7ff074f47..64d6ead7e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -121,7 +121,7 @@ def setup(app): - app.add_stylesheet('css/custom.css') + app.add_css_file('css/custom.css') app.connect('autodoc-process-docstring', insert_function_images) app.add_role('pr', autolink(f'{gh_url}/pull/{{}}', 'PR {}')) diff --git a/requirements.txt b/requirements.txt index 9676a97f2..a410be433 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ anndata>=0.7.6 crossref_commons +dask docutils fuzzywuzzy loompy @@ -9,6 +10,7 @@ numpy>=1.16.4 obonet openpyxl pandas +pyarrow pytest>=6.2.2 python-Levenshtein PyYAML diff --git a/sfaira/consts/adata_fields.py b/sfaira/consts/adata_fields.py index f2d11cab1..041fe83e8 100644 --- a/sfaira/consts/adata_fields.py +++ b/sfaira/consts/adata_fields.py @@ -13,8 +13,8 @@ class AdataIds: assay_sc: str author: str cell_types_original: str - cell_ontology_class: str - cell_ontology_id: str + cellontology_class: str + cellontology_id: str development_stage: str disease: str doi: str @@ -38,6 +38,10 @@ class AdataIds: tech_sample: str year: str + load_raw: str + mapped_features: str + remove_gene_version: str + obs_keys: List[str] var_keys: List[str] uns_keys: List[str] @@ -79,8 +83,8 @@ def __init__(self): self.bio_sample = "bio_sample" self.cell_line = "cell_line" self.cell_types_original = "cell_types_original" - self.cell_ontology_class = "cell_ontology_class" - self.cell_ontology_id = "cell_ontology_id" + self.cellontology_class = "cell_ontology_class" + self.cellontology_id = "cell_ontology_id" self.default_embedding = "default_embedding" self.disease = "disease" self.doi = "doi" @@ -118,7 +122,7 @@ def __init__(self): self.unknown_celltype_identifier = "UNKNOWN" self.not_a_cell_celltype_identifier = "NOT_A_CELL" - self.unknown_metadata_identifier = None + self.unknown_metadata_identifier = "unknown" self.obs_keys = [ "assay_sc", @@ -127,8 +131,8 @@ def __init__(self): "bio_sample", "cell_line", "cell_types_original", - "cell_ontology_class", - "cell_ontology_id", + "cellontology_class", + "cellontology_id", "development_stage", "disease", "ethnicity", @@ -174,9 +178,9 @@ class AdataIdsCellxgene(AdataIds): def __init__(self): self.assay_sc = "assay" self.cell_types_original = "free_annotation" # TODO "free_annotation" not always given - # TODO: -> This will break streamlining though if self.cell_types_original is the same value as self.cell_ontology_class!! - self.cell_ontology_class = "cell_type" - self.cell_ontology_id = "cell_type_ontology_term_id" + # TODO: -> This will break streamlining though if self.cell_types_original is the same value as self.cellontology_class!! + self.cellontology_class = "cell_type" + self.cellontology_id = "cell_type_ontology_term_id" self.default_embedding = "default_embedding" self.doi = "preprint_doi" self.disease = "disease" @@ -213,8 +217,8 @@ def __init__(self): self.obs_keys = [ "assay_sc", "cell_types_original", - "cell_ontology_class", - "cell_ontology_id", + "cellontology_class", + "cellontology_id", "development_stage", "disease", "ethnicity", diff --git a/sfaira/consts/meta_data_files.py b/sfaira/consts/meta_data_files.py index c5a61e56e..cfc06b7e2 100644 --- a/sfaira/consts/meta_data_files.py +++ b/sfaira/consts/meta_data_files.py @@ -10,8 +10,8 @@ "assay_type_differentiation": str, "bio_sample": str, "cell_line": str, - "cell_ontology_class": str, - "cell_ontology_id": str, + "cellontology_class": str, + "cellontology_id": str, "development_stage": str, "disease": str, "doi": str, diff --git a/sfaira/data/__init__.py b/sfaira/data/__init__.py index b13e572d6..11e56506e 100644 --- a/sfaira/data/__init__.py +++ b/sfaira/data/__init__.py @@ -1,6 +1,6 @@ from sfaira.data.base import clean_string, DatasetBase, \ DatasetGroup, DatasetGroupDirectoryOriented, \ - DatasetSuperGroup, DistributedStore + DatasetSuperGroup, load_store, DistributedStoreBase, DistributedStoreH5ad, DistributedStoreDao from . import dataloaders from .dataloaders import Universe from .interactive import DatasetInteractive diff --git a/sfaira/data/base/__init__.py b/sfaira/data/base/__init__.py index 7e3dacc49..e5c92edcd 100644 --- a/sfaira/data/base/__init__.py +++ b/sfaira/data/base/__init__.py @@ -1,3 +1,4 @@ from sfaira.data.base.dataset import DatasetBase, clean_string from sfaira.data.base.dataset_group import DatasetGroup, DatasetGroupDirectoryOriented, DatasetSuperGroup -from sfaira.data.base.distributed_store import DistributedStore +from sfaira.data.base.distributed_store import load_store, DistributedStoreBase, DistributedStoreH5ad, \ + DistributedStoreDao diff --git a/sfaira/data/base/dataset.py b/sfaira/data/base/dataset.py index 7070a155c..ef1236b8a 100644 --- a/sfaira/data/base/dataset.py +++ b/sfaira/data/base/dataset.py @@ -21,6 +21,7 @@ from sfaira.versions.genomes import GenomeContainer from sfaira.versions.metadata import Ontology, OntologyHierarchical, CelltypeUniverse from sfaira.consts import AdataIds, AdataIdsCellxgene, AdataIdsSfaira, META_DATA_FIELDS, OCS +from sfaira.data.base.io_dao import write_dao from sfaira.data.utils import collapse_matrix, read_yaml UNS_STRING_META_IN_OBS = "__obs__" @@ -666,6 +667,7 @@ def streamline_features( var=var_new, uns=self.adata.uns ) + self.adata.uns[self._adata_ids.mapped_features] = match_to_reference def streamline_metadata( self, @@ -739,7 +741,7 @@ def streamline_metadata( # set var index var_new.index = var_new[adata_target_ids.gene_id_index].tolist() - per_cell_labels = ["cell_types_original", "cell_ontology_class", "cell_ontology_id"] + per_cell_labels = ["cell_types_original", "cellontology_class", "cellontology_id"] experiment_batch_labels = ["bio_sample", "individual", "tech_sample"] # Prepare .obs column name dict (process keys below with other .uns keys if they're set dataset-wide) @@ -810,7 +812,13 @@ def streamline_metadata( # Map cell type names from raw IDs to ontology maintained ones: if self.cell_types_original_obs_key is not None: obs_cl = self.project_celltypes_to_ontology(copy=True, adata_fields=adata_target_ids) - obs_new = pd.concat([obs_new, obs_cl], axis=1) + else: + obs_cl = pd.DataFrame({ + adata_target_ids.cellontology_class: [adata_target_ids.unknown_metadata_identifier] * self.adata.n_obs, + adata_target_ids.cellontology_id: [adata_target_ids.unknown_metadata_identifier] * self.adata.n_obs, + adata_target_ids.cell_types_original: [adata_target_ids.unknown_metadata_identifier] * self.adata.n_obs, + }, index=self.adata.obs.index) + obs_new = pd.concat([obs_new, obs_cl], axis=1) # Add new annotation to adata and delete old fields if requested if clean_var: @@ -930,7 +938,7 @@ def streamline_metadata( def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "h5ad", + store_format: str = "dao", dense: bool = False, compression_kwargs: dict = {}, chunks: Union[int, None] = None, @@ -942,20 +950,29 @@ def write_distributed_store( data sets that are accessed. Use .streamline_* before calling this method to streamline the data sets. :param dir_cache: Directory to write cache in. - :param store: Disk format for objects in cache: + :param store_format: Disk format for objects in cache. Recommended is "dao". - "h5ad": Allows access via backed .h5ad. Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise access if the files are csr, so further compression potentially not necessary. - - "zarr": Allows access as zarr array. + - "dao": Distributed access optimised format, recommended for batched access in optimisation, for example. :param dense: Whether to write sparse or dense store, this will be homogenously enforced. - :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: - compression, compression_opts. - :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. - Only relevant for store=="zarr". + :param compression_kwargs: Compression key word arguments to give to h5py or zarr + For store_format=="h5ad", see also anndata.AnnData.write_h5ad: + - compression, + - compression_opts. + For store_format=="dao", see also sfaira.data.write_dao which relays kwargs to + zarr.hierarchy.create_dataset: + - compressor + - overwrite + - order + and others. + :param chunks: Observation axes of chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="dao". The feature dimension of the chunks is always is the full feature space. + Uses zarr default chunking across both axes if None. """ self.__assert_loaded() - if store == "h5ad": + if store_format == "h5ad": if not isinstance(self.adata.X, scipy.sparse.csr_matrix): print(f"WARNING: high-perfomances caches based on .h5ad work better with .csr formatted expression " f"data, found {type(self.adata.X)}") @@ -963,9 +980,14 @@ def write_distributed_store( as_dense = ("X",) if dense else () print(f"writing {self.adata.shape} into {fn}") self.adata.write_h5ad(filename=fn, as_dense=as_dense, **compression_kwargs) - elif store == "zarr": + elif store_format == "dao": + # Convert data object to sparse / dense as required: + if not dense: + raise ValueError("WARNING: sparse zarr array performance is not be optimal and not supported yet, " + "consider writing as dense and consider that zarr arrays are compressed on disk!") fn = os.path.join(dir_cache, self.doi_cleaned_id) - self.adata.write_zarr(store=fn, chunks=chunks) + chunks = (chunks, self.adata.X.shape[1]) if chunks is not None else True + write_dao(store=fn, adata=self.adata, chunks=chunks, compression_kwargs=compression_kwargs) else: raise ValueError() @@ -1183,15 +1205,16 @@ def project_celltypes_to_ontology(self, adata_fields: Union[AdataIds, None] = No adata_fields.not_a_cell_celltype_identifier ], ) - results[adata_fields.cell_ontology_class] = labels_mapped - results[adata_fields.cell_ontology_id] = ids_mapped + results[adata_fields.cellontology_class] = labels_mapped + results[adata_fields.cellontology_id] = ids_mapped if update_fields: - self.cellontology_id_obs_key = adata_fields.cell_ontology_id + self.cellontology_id_obs_key = adata_fields.cellontology_id else: - results[adata_fields.cell_ontology_class] = labels_original + results[adata_fields.cellontology_class] = labels_original + results[adata_fields.cellontology_id] = [adata_fields.unknown_metadata_identifier] * self.adata.n_obs results[adata_fields.cell_types_original] = labels_original if update_fields: - self.cellontology_class_obs_key = adata_fields.cell_ontology_class + self.cellontology_class_obs_key = adata_fields.cellontology_class self.cell_types_original_obs_key = adata_fields.cell_types_original if copy: return pd.DataFrame(results, index=self.adata.obs.index) @@ -1313,7 +1336,7 @@ def write_meta( meta = pandas.DataFrame(index=range(1)) # Expand table by variably cell-wise or data set-wise meta data: for x in self._adata_ids.controlled_meta_fields: - if x in ["cell_types_original", "cell_ontology_class", "cell_ontology_id"]: + if x in ["cell_types_original", "cellontology_class", "cellontology_id"]: continue elif x in ["bio_sample", "individual", "tech_sample"] and \ hasattr(self, f"{x}_obs_key") and \ @@ -1340,12 +1363,12 @@ def write_meta( # Add cell types into table if available: if self.cell_types_original_obs_key is not None: mappings = self.project_celltypes_to_ontology(copy=True, update_fields=False) - meta[self._adata_ids.cell_ontology_class] = (mappings[self._adata_ids.cell_ontology_class].unique(),) - meta[self._adata_ids.cell_ontology_id] = (mappings[self._adata_ids.cell_ontology_id].unique(),) + meta[self._adata_ids.cellontology_class] = (mappings[self._adata_ids.cellontology_class].unique(),) + meta[self._adata_ids.cellontology_id] = (mappings[self._adata_ids.cellontology_id].unique(),) meta[self._adata_ids.cell_types_original] = (mappings[self._adata_ids.cell_types_original].unique(),) else: - meta[self._adata_ids.cell_ontology_class] = " " - meta[self._adata_ids.cell_ontology_id] = " " + meta[self._adata_ids.cellontology_class] = " " + meta[self._adata_ids.cellontology_id] = " " meta[self._adata_ids.cell_types_original] = " " meta.to_csv(fn_meta) diff --git a/sfaira/data/base/dataset_group.py b/sfaira/data/base/dataset_group.py index 915e7c835..98f2e2a70 100644 --- a/sfaira/data/base/dataset_group.py +++ b/sfaira/data/base/dataset_group.py @@ -37,7 +37,7 @@ def map_fn(inputs): return x else: return None - except FileNotFoundError as e: + except (FileNotFoundError, OSError) as e: return ds.id, e, @@ -206,7 +206,7 @@ def collapse_counts(self): def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "h5ad", + store_format: str = "dao", dense: bool = False, compression_kwargs: dict = {}, chunks: Union[int, None] = None, @@ -219,20 +219,29 @@ def write_distributed_store( This method writes a separate file for each data set in this object. :param dir_cache: Directory to write cache in. - :param store: Disk format for objects in cache: + :param store_format: Disk format for objects in cache. Recommended is "dao". - "h5ad": Allows access via backed .h5ad. Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise access if the files are csr, so further compression potentially not necessary. - - "zarr": Allows access as zarr array. + - "dao": Distributed access optimised format, recommended for batched access in optimisation, for example. :param dense: Whether to write sparse or dense store, this will be homogenously enforced. - :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: - compression, compression_opts. - :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. - Only relevant for store=="zarr". + :param compression_kwargs: Compression key word arguments to give to h5py or zarr + For store_format=="h5ad", see also anndata.AnnData.write_h5ad: + - compression, + - compression_opts. + For store_format=="dao", see also sfaira.data.write_dao which relays kwargs to + zarr.hierarchy.create_dataset: + - compressor + - overwrite + - order + and others. + :param chunks: Observation axes of chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="dao". The feature dimension of the chunks is always is the full feature space. + Uses zarr default chunking across both axes if None. """ for _, v in self.datasets.items(): - v.write_distributed_store(dir_cache=dir_cache, store=store, dense=dense, + v.write_distributed_store(dir_cache=dir_cache, store_format=store_format, dense=dense, compression_kwargs=compression_kwargs, chunks=chunks) def write_backed( @@ -779,14 +788,8 @@ def get_gc( self, genome: str = None ): - if genome.lower().startswith("homo_sapiens"): + if genome.lower().startswith("homo_sapiens") or genome.lower().startswith("mus_musculus"): g = GenomeContainer( - organism="human", - assembly=genome - ) - elif genome.lower().startswith("mus_musculus"): - g = GenomeContainer( - organism="mouse", assembly=genome ) else: @@ -979,7 +982,7 @@ def adata(self): def write_distributed_store( self, dir_cache: Union[str, os.PathLike], - store: str = "h5ad", + store_format: str = "dao", dense: bool = False, compression_kwargs: dict = {}, chunks: Union[int, None] = None, @@ -992,20 +995,29 @@ def write_distributed_store( This method writes a separate file for each data set in this object. :param dir_cache: Directory to write cache in. - :param store: Disk format for objects in cache: + :param store_format: Disk format for objects in cache. Recommended is "dao". - "h5ad": Allows access via backed .h5ad. Note on compression: .h5ad supports sparse data with is a good compression that gives fast row-wise access if the files are csr, so further compression potentially not necessary. - - "zarr": Allows access as zarr array. + - "dao": Distributed access optimised format, recommended for batched access in optimisation, for example. :param dense: Whether to write sparse or dense store, this will be homogenously enforced. - :param compression_kwargs: Compression key word arguments to give to h5py, see also anndata.AnnData.write_h5ad: - compression, compression_opts. - :param chunks: Chunk size of zarr array, see anndata.AnnData.write_zarr documentation. - Only relevant for store=="zarr". + :param compression_kwargs: Compression key word arguments to give to h5py or zarr + For store_format=="h5ad", see also anndata.AnnData.write_h5ad: + - compression, + - compression_opts. + For store_format=="dao", see also sfaira.data.write_dao which relays kwargs to + zarr.hierarchy.create_dataset: + - compressor + - overwrite + - order + and others. + :param chunks: Observation axes of chunk size of zarr array, see anndata.AnnData.write_zarr documentation. + Only relevant for store=="dao". The feature dimension of the chunks is always is the full feature space. + Uses zarr default chunking across both axes if None. """ for x in self.dataset_groups: - x.write_distributed_store(dir_cache=dir_cache, store=store, dense=dense, + x.write_distributed_store(dir_cache=dir_cache, store_format=store_format, dense=dense, compression_kwargs=compression_kwargs, chunks=chunks) def write_backed( @@ -1026,7 +1038,7 @@ def write_backed( Example usage: ds = DatasetSuperGroup([...]) - ds.load_all_tobacked( + ds.write_backed( fn_backed="...", target_genome="...", annotated_only=False @@ -1072,11 +1084,11 @@ def write_backed( self._adata_ids.author, self._adata_ids.cell_line, self._adata_ids.dataset, - self._adata_ids.cell_ontology_class, + self._adata_ids.cellontology_class, self._adata_ids.development_stage, self._adata_ids.normalization, self._adata_ids.organ, - self._adata_ids.sample_type, + self._adata_ids.bio_sample, self._adata_ids.state_exact, self._adata_ids.year, ] diff --git a/sfaira/data/base/distributed_store.py b/sfaira/data/base/distributed_store.py index f86237281..310124ef6 100644 --- a/sfaira/data/base/distributed_store.py +++ b/sfaira/data/base/distributed_store.py @@ -1,30 +1,59 @@ +import abc import anndata +import dask.array +import dask.dataframe import numpy as np import os import pandas as pd import pickle import scipy.sparse -from typing import Dict, List, Union +import sys +from typing import Dict, List, Tuple, Union from sfaira.consts import AdataIdsSfaira, OCS from sfaira.data.base.dataset import is_child, UNS_STRING_META_IN_OBS +from sfaira.data.base.io_dao import read_dao from sfaira.versions.genomes import GenomeContainer -from sfaira.versions.metadata import CelltypeUniverse +""" +Distributed stores are array-like classes that sit on groups of on disk representations of anndata instances files. +Depending on the file format of the count matrix on disk, different in memory representations are sensible. +In particular, if .X is saved as zarr array, one can use lazy dask arrays to operate across sets of count matrices, +heavily reducing the complexity of the code required here and often increasing access speed. -class DistributedStore: +DistributedStoreBase is base class for any file format on disk. +DistributedStoreZarr is adapted to classes that store an anndata instance as a zarr group. +DistributedStoreH5ad is adapted to classes that store an anndata instance as a h5ad file. + +Note that in all cases, you can use standard anndata reading functions to load a single object into memory. +""" + + +def access_helper(adata, s, e, j, return_dense, obs_keys) -> tuple: + x = adata.X[s:e, :] + # Do dense conversion now so that col-wise indexing is not slow, often, dense conversion + # would be done later anyway. + if return_dense and isinstance(x, scipy.sparse.spmatrix): + x = x.todense() + if j is not None: + x = x[:, j] + obs = adata.obs[obs_keys].iloc[s:e, :] + return x, obs + + +class DistributedStoreBase(abc.ABC): """ Data set group class tailored to data access requirements common in high-performance computing (HPC). This class does not inherit from DatasetGroup because it entirely relies on the cached objects. - This class is centred around .adatas and .indices. + This class is centred around .adata_by_key and .indices. - .adatas is a dictionary (by id) of backed anndata instances that point to individual h5ads. + .adata_by_key is a dictionary (by id) of backed anndata instances that point to individual h5ads. This dictionary is intialised with all h5ads in the store. As the store is subsetted, key-value pairs are deleted from this dictionary. - .indices have keys that correspond to keys in .adatas and contain index vectors of observations in the anndata - instances in .adatas which are still kept. + .indices have keys that correspond to keys in .adata_by_key and contain index vectors of observations in the anndata + instances in .adata_by_key which are still kept. These index vectors are a form of lazy slicing that does not require data set loading or re-writing. As the store is subsetted, key-value pairs are deleted from this dictionary if no observations from a given key match the subsetting. @@ -34,107 +63,53 @@ class DistributedStore: example. """ - adatas: Dict[str, anndata.AnnData] - indices: Dict[str, np.ndarray] - - def __init__(self, cache_path: Union[str, os.PathLike, None] = None): - """ - This class is instantiated on a cache directory which contains pre-processed files in rapid access format. - - Supported and automatically identifed are the formats: - - - h5ad, - - zarr + _adata_by_key: Dict[str, anndata.AnnData] + _indices: Dict[str, np.ndarray] + _obs_by_key: Union[None, Dict[str, dask.dataframe.DataFrame]] - :param cache_path: Directory in which pre-processed .h5ad files lie. - :param genome_container: GenomeContainer with target features space defined. - """ - # Collect all data loaders from files in directory: - adatas = {} - indices = {} - for f in os.listdir(cache_path): - if os.path.isfile(os.path.join(cache_path, f)): # only files - # Narrow down to supported file types: - if f.split(".")[-1] == "h5ad": - adata = anndata.read_h5ad( - filename=os.path.join(cache_path, f), - backed="r", - ) - elif f.split(".")[-1] == "zarr": - # TODO this reads into memory! Might need to directly interface the zarr arrays to work with dask. - adata = anndata.read_zarr(os.path.join(cache_path, f)) - else: - adata = None - if adata is not None: - adatas[adata.uns["id"]] = adata - indices[adata.uns["id"]] = np.arange(0, adata.n_obs) - self.adatas = adatas + def __init__(self, adata_by_key: Dict[str, anndata.AnnData], indices: Dict[str, np.ndarray], + obs_by_key: Union[None, Dict[str, dask.dataframe.DataFrame]] = None): + self.adata_by_key = adata_by_key self.indices = indices + self.obs_by_key = obs_by_key self.ontology_container = OCS self._genome_container = None self._adata_ids_sfaira = AdataIdsSfaira() self._celltype_universe = None - @property - def adata(self): - return self.adatas[list(self.adatas.keys())[0]].concatenate( - *[self.adatas[k] for k in list(self.adatas.keys())[1:]], - batch_key="dataset_id", - batch_categories=list(self.adatas.keys()), - ) - - @property - def genome_container(self) -> Union[GenomeContainer, None]: - return self._genome_container - - @genome_container.setter - def genome_container(self, x: GenomeContainer): - var_names = self.__validate_feature_space_homogeneity() - # Validate genome container choice: - # Make sure that all var names defined in genome container are also contained in loaded data sets. - assert np.all([y in var_names for y in x.ensembl]), \ - "did not find variable names from genome container in store" - self._genome_container = x + def _validate_idx(self, idx: Union[np.ndarray, list]) -> np.ndarray: + assert np.max(idx) < self.n_obs, f"maximum of supplied index vector {np.max(idx)} exceeds number of modelled " \ + f"observations {self.n_obs}" + assert len(idx) == len(np.unique(idx)), f"there were {len(idx) - len(np.unique(idx))} repeated indices in idx" + if isinstance(idx, np.ndarray): + assert len(idx.shape) == 1, idx.shape + assert idx.dtype == np.int + else: + assert isinstance(idx, list) + assert isinstance(idx[0], int) or isinstance(idx[0], np.int) + idx = np.asarray(idx) + return idx - def __validate_feature_space_homogeneity(self) -> List[str]: + def _validate_feature_space_homogeneity(self) -> List[str]: """ Assert that the data sets which were kept have the same feature names. """ - var_names = self.adatas[list(self.adatas.keys())[0]].var_names.tolist() - for k, v in self.adatas.items(): - assert len(var_names) == len(v.var_names), f"number of features in store differed in object {k}" - assert np.all(var_names == v.var_names), f"var_names in store were not matched in object {k}" + var_names = self._adata_by_key[list(self.indices.keys())[0]].var_names.tolist() + for k, v in self.indices.items(): + assert len(var_names) == len(self._adata_by_key[k].var_names), \ + f"number of features in store differed in object {k} compared to {list(self._adata_by_key.keys())[0]}" + assert np.all(var_names == self._adata_by_key[k].var_names), \ + f"var_names in store were not matched in object {k} compared to {list(self._adata_by_key.keys())[0]}" return var_names - def generator( + def _generator_helper( self, idx: Union[np.ndarray, None] = None, - batch_size: int = 1, - obs_keys: List[str] = [], - return_dense: bool = True, - ) -> iter: - """ - Yields an unbiased generator over observations in the contained data sets. - - :param idx: Global idx to query from store. These is an array with indicies corresponding to a contiuous index - along all observations in self.adatas, ordered along a hypothetical concatenation along the keys of - self.adatas. - :param batch_size: Number of observations in each batch (generator invocation). Increasing this may result in - large speed-ups in query time but removes the ability of upstream generators to fully shuffle cells, as - these batches are the smallest data unit that upstream generators can access. - :param obs_keys: .obs columns to return in the generator. These have to be a subset of the columns available - in self.adatas. - :param return_dense: Whether to return count data .X as dense batches. This allows more efficient feature - indexing if the store is sparse (column indexing on csr matrices is slow). - :return: Generator function which yields batch_size at every invocation. - The generator returns a tuple of (.X, .obs) with types: - - - if store format is h5ad: (Union[scipy.sparse.csr_matrix, np.ndarray], pandas.DataFrame) - """ + ) -> Tuple[Union[np.ndarray, None], Union[np.ndarray, None]]: # Make sure that features are ordered in the same way in each object so that generator yields consistent cell # vectors. - _ = self.__validate_feature_space_homogeneity() - var_names_store = self.adatas[list(self.adatas.keys())[0]].var_names.tolist() + _ = self._validate_feature_space_homogeneity() + var_names_store = self.adata_by_key[list(self.indices.keys())[0]].var_names.tolist() # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. if self.genome_container is not None: var_names_target = self.genome_container.ensembl @@ -144,65 +119,73 @@ def generator( var_idx = None else: var_idx = None + if idx is not None: + idx = self._validate_idx(idx) + return idx, var_idx - def generator() -> tuple: - global_index_set = self.indices_global - for i, (k, v) in enumerate(self.adatas.items()): - # Define batch partitions: - # Get subset of target indices that fall into this data set. - # Use indices relative to this data (via .index here). - # continuous_slices is evaluated to establish whether slicing can be performed as the potentially - # faster [start:end] or needs to tbe index wise [indices] - if idx is not None: - idx_i = [global_index_set[k].tolist().index(x) for x in idx if x in global_index_set[k]] - idx_i = np.sort(idx_i) - continuous_slices = np.all(idx_i == np.arange(0, v.n_obs)) - else: - idx_i = np.arange(0, v.n_obs) - continuous_slices = True - if len(idx_i) > 0: # Skip data objects without matched cells. - n_obs = len(idx_i) - # Cells left over after batching to batch size, accounting for overhang: - remainder = n_obs % batch_size - batch_starts_ends = [ - (int(x * batch_size), int(x * batch_size) + batch_size) - for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) - ] - # Iterate over batches: - for j, (s, e) in enumerate(batch_starts_ends): - if continuous_slices: - e = idx_i[e] if e < n_obs else n_obs - x = v.X[idx_i[s]:e, :] - else: - x = v.X[idx_i[s:e], :] - # Do dense conversion now so that col-wise indexing is not slow, often, dense conversion - # would be done later anyway. - if return_dense: - x = x.todense() - if var_idx is not None: - x = x[:, var_idx] - if continuous_slices: - e = idx_i[e] if e < n_obs else n_obs - obs = v.obs[obs_keys].iloc[idx_i[s]:e, :] - else: - obs = v.obs[obs_keys].iloc[idx_i[s:e], :] - assert isinstance(obs, pd.DataFrame), f"{type(obs)}" - # Yield current batch. - yield x, obs + @property + def adata_by_key(self) -> Dict[str, anndata.AnnData]: + return self._adata_by_key - return generator + @adata_by_key.setter + def adata_by_key(self, x: Dict[str, anndata.AnnData]): + self._adata_by_key = x + + @property + def indices(self) -> Dict[str, np.ndarray]: + return self._indices + + @indices.setter + def indices(self, x: Dict[str, np.ndarray]): + """ + Setter imposes a few constraints on indices: + + 1) checks that keys are contained ._adata_by_key.keys() + 2) checks that indices are contained in size of values of ._adata_by_key + 3) checks that indces are not duplicated + 4) checks that indices are sorted + """ + for k, v in x.items(): + assert k in self._adata_by_key.keys(), f"did not find key {k}" + assert np.max(v) < self._adata_by_key[k].n_obs, f"found index for key {k} that exceeded data set size" + assert len(v) == len(np.unique(v)), f"found duplicated indices for key {k}" + assert np.all(np.diff(v) >= 0), f"indices not sorted for key {k}" + self._indices = x + + @property + def obs_by_key(self) -> Dict[str, Union[pd.DataFrame, dask.dataframe.DataFrame]]: + if self._obs_by_key is not None: + # Run sanity checks to validate that this external obs can be used in the context of .adata_by_key: + assert np.all(list(self._adata_by_key.keys()) == list(self._obs_by_key.keys())) + assert np.all([self._obs_by_key[k].shape[0] == self._adata_by_key[k].shape[0] + for k in self._obs_by_key.keys()]) + return self._obs_by_key + else: + return dict([(k, v.obs) for k, v in self.adata_by_key.items()]) + + @obs_by_key.setter + def obs_by_key(self, x: Union[None, Dict[str, dask.dataframe.DataFrame]]): + if x is not None: + for k, v in x.items(): + if not (isinstance(v, dask.dataframe.DataFrame) or isinstance(v, pd.DataFrame)): + raise ValueError(f"value of entry {k} was not a dask.dataframe.DataFrame but {type(v)}") + self._obs_by_key = x @property - def celltypes_universe(self) -> CelltypeUniverse: - if self._celltype_universe is None: - self._celltype_universe = CelltypeUniverse( - cl=self.ontology_container.cellontology_class, - uberon=self.ontology_container.organ, - organism=None, # TODO Does not load extensions! - ) - return self._celltype_universe + def genome_container(self) -> Union[GenomeContainer, None]: + return self._genome_container + + @genome_container.setter + def genome_container(self, x: GenomeContainer): + var_names = self._validate_feature_space_homogeneity() + # Validate genome container choice: + # Make sure that all var names defined in genome container are also contained in loaded data sets. + assert np.all([y in var_names for y in x.ensembl]), \ + "did not find variable names from genome container in store" + self._genome_container = x - def _get_subset_idx(self, attr_key, values: Union[str, List[str]]) -> dict: + def get_subset_idx(self, attr_key, values: Union[str, List[str], None], + excluded_values: Union[str, List[str], None]) -> dict: """ Get indices of subset list of adata objects based on cell-wise properties. @@ -220,13 +203,16 @@ def _get_subset_idx(self, attr_key, values: Union[str, List[str]]) -> dict: - "sample_source" points to self.sample_source_obs_key - "sex" points to self.sex_obs_key - "state_exact" points to self.state_exact_obs_key - :param values: Classes to overlap to. + :param values: Classes to overlap to. Supply either values or excluded_values. + :param excluded_values: Classes to exclude from match list. Supply either values or excluded_values. :return dictionary of files and observation indices by file. """ if not isinstance(values, list): values = [values] + assert (values is None or excluded_values is not None) or (values is not None or excluded_values is None), \ + "supply either values or excluded_values" - def get_subset_idx(adata, k, dataset): + def get_idx(adata, obs, k, v, xv, dataset): # Use cell-wise annotation if data set-wide maps are ambiguous: # This can happen if the different cell-wise annotations are summarised as a union in .uns. if getattr(self._adata_ids_sfaira, k) in adata.uns.keys() and \ @@ -240,44 +226,60 @@ def get_subset_idx(adata, k, dataset): values_found = None # Go to cell-wise annotation. else: # Replicate unique property along cell dimension. - values_found = [values_found[0] for i in range(adata.n_obs)] + values_found = [values_found[0] for _ in range(adata.n_obs)] else: values_found = None if values_found is None: - if getattr(self._adata_ids_sfaira, k) in adata.obs.keys(): - values_found = adata.obs[getattr(self._adata_ids_sfaira, k)].values + if getattr(self._adata_ids_sfaira, k) in obs.columns: + values_found = obs[getattr(self._adata_ids_sfaira, k)].values else: - raise ValueError(f"did not find unique attribute {k} in data set {dataset}") + values_found = [] + print(f"WARNING: did not find attribute {k} in data set {dataset}") values_found_unique = np.unique(values_found) try: ontology = getattr(self.ontology_container, k) except AttributeError: raise ValueError(f"{k} not a valid property of ontology_container object") # Test only unique elements found in ontology to save time. - values_found_unique_matched = [ - x for x in values_found_unique if np.any([ - is_child(query=x, ontology=ontology, ontology_parent=y) - for y in values - ]) - ] - # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. - print(f"matched cell-wise keys {str(values_found_unique_matched)} in data set {dataset}") + if v is not None: + values_found_unique_matched = [ + x for x in values_found_unique if np.any([ + is_child(query=x, ontology=ontology, ontology_parent=y) + for y in v + ]) + ] + else: + values_found_unique_matched = [ + x for x in values_found_unique if np.all([ + not is_child(query=x, ontology=ontology, ontology_parent=y) + for y in xv + ]) + ] idx = np.where([x in values_found_unique_matched for x in values_found])[0] + if len(idx) > 0: + # TODO keep this logging for now to catch undesired behaviour resulting from loaded edges in ontologies. + print(f"matched keys {str(values_found_unique_matched)} in data set {dataset}") return idx indices = {} - for k, v in self.indices.items(): - idx_old = v.tolist() - if k not in self.adatas.keys(): - raise ValueError(f"data set {k} queried by indices does not exist in store (.adatas)") - idx_new = get_subset_idx(adata=self.adatas[k], k=attr_key, dataset=k) + for key in self.indices.keys(): + if key not in self.adata_by_key.keys(): + raise ValueError(f"data set {key} queried by indices does not exist in store (.adata_by_key)") + # Get indices of idx_old to keep: + adata_k = self.adata_by_key[key] + obs_k = self.obs_by_key[key] + idx_old = self.indices[key] + # Cannot index on view here as indexing on view of views of backed anndata objects is not yet supported. + idx_subset = get_idx(adata=adata_k, obs=obs_k, k=attr_key, v=values, xv=excluded_values, dataset=key) # Keep intersection of old and new hits. - idx_new = list(set(idx_old).intersection(set(idx_new))) + idx_new = np.sort(list(set(np.asarray(idx_old).tolist()).intersection( + set(np.asarray(idx_subset).tolist())))) if len(idx_new) > 0: - indices[k] = np.asarray(idx_new, dtype="int32") + indices[key] = np.asarray(idx_new, dtype="int32") return indices - def subset(self, attr_key, values: Union[str, List[str]]): + def subset(self, attr_key, values: Union[str, List[str], None] = None, + excluded_values: Union[str, List[str], None] = None): """ Subset list of adata objects based on cell-wise properties. @@ -297,15 +299,231 @@ def subset(self, attr_key, values: Union[str, List[str]]): - "sample_source" points to self.sample_source_obs_key - "sex" points to self.sex_obs_key - "state_exact" points to self.state_exact_obs_key - :param values: Classes to overlap to. + :param values: Classes to overlap to. Supply either values or excluded_values. + :param excluded_values: Classes to exclude from match list. Supply either values or excluded_values. + """ + self.indices = self.get_subset_idx(attr_key=attr_key, values=values, excluded_values=excluded_values) + if self.n_obs == 0: + print("WARNING: store is now empty.") + + def write_config(self, fn: Union[str, os.PathLike]): + """ + Writes a config file that describes the current data sub-setting. + + This config file can be loaded later to recreate a sub-setting. + This config file contains observation-wise subsetting information. + + :param fn: Output file without file type extension. """ - self.indices = self._get_subset_idx(attr_key=attr_key, values=values) + with open(fn + '.pickle', 'wb') as f: + pickle.dump(self.indices, f) - for k in list(self.adatas.keys()): - if k not in self.indices or self.indices[k].shape[0] == 0: # No observations (cells) left. - del self.adatas[k] + def load_config(self, fn: Union[str, os.PathLike]): + """ + Load a config file and recreates a data sub-setting. + This config file contains observation-wise subsetting information. - def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]) -> np.ndarray: + :param fn: Output file without file type extension. + """ + with open(fn, 'rb') as f: + self.indices = pickle.load(f) + # Make sure all declared data sets are found in store: + for x in self.indices.keys(): + if x not in self.adata_by_key.keys(): + raise ValueError(f"did not find object with name {x} in currently loaded universe") + + @property + def var_names(self): + var_names = self._validate_feature_space_homogeneity() + # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. + if self.genome_container is None: + return var_names + else: + return self.genome_container.ensembl + + @property + def n_vars(self): + var_names = self._validate_feature_space_homogeneity() + # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. + if self.genome_container is None: + return len(var_names) + else: + return self.genome_container.n_var + + @property + def n_obs(self): + return np.sum([len(v) for v in self.indices.values()]) + + @property + def shape(self): + return [self.n_obs, self.n_vars] + + @abc.abstractmethod + def generator( + self, + idx: Union[np.ndarray, None] = None, + batch_size: int = 1, + obs_keys: List[str] = [], + return_dense: bool = True, + randomized_batch_access: bool = False, + ) -> iter: + pass + + @property + @abc.abstractmethod + def X(self) -> Union[dask.array.Array, scipy.sparse.csr_matrix]: + pass + + @property + @abc.abstractmethod + def obs(self) -> Union[pd.DataFrame]: + pass + + @abc.abstractmethod + def n_counts(self, idx: Union[np.ndarray, list, None] = None) -> np.ndarray: + pass + + +class DistributedStoreH5ad(DistributedStoreBase): + + def __init__(self, cache_path: Union[str, os.PathLike]): + # Collect all data loaders from files in directory: + adata_by_key = {} + indices = {} + for f in os.listdir(cache_path): + adata = None + trial_path = os.path.join(cache_path, f) + if os.path.isfile(trial_path): + # Narrow down to supported file types: + if f.split(".")[-1] == "h5ad": + print(f"Discovered {f} as .h5ad file.") + try: + adata = anndata.read_h5ad( + filename=trial_path, + backed="r", + ) + except OSError as e: + adata = None + print(f"WARNING: for data set {f}: {e}") + if adata is not None: + adata_by_key[adata.uns["id"]] = adata + indices[adata.uns["id"]] = np.arange(0, adata.n_obs) + self._x_as_dask = False + super(DistributedStoreH5ad, self).__init__(adata_by_key=adata_by_key, indices=indices) + + @property + def adata_sliced(self) -> Dict[str, anndata.AnnData]: + """ + Only exposes the subset and slices of the adata instances contained in ._adata_by_key defined in .indices. + """ + return dict([(k, self._adata_by_key[k][v, :]) for k, v in self.indices.items()]) + + @property + def X(self): + assert False + + @property + def obs(self) -> Union[pd.DataFrame]: + """ + Assemble .obs table of subset of selected data. + + :return: .obs data frame. + """ + return pd.concat([ + self._adata_by_key[k].obs.iloc[v, :] + for k, v in self.indices.items() + ], axis=0, join="inner", ignore_index=False, copy=False) + + def n_counts(self, idx: Union[np.ndarray, list, None] = None) -> np.ndarray: + """ + Compute sum over features for each observation in index. + + :param idx: Index vector over observations in object. + :return: Array with sum per observations: (number of observations in index,) + """ + return np.concatenate([ + np.asarray(v.X.sum(axis=1)).flatten() + for v in self.adata_by_key_subset(idx=idx).values() + ], axis=0) + + def generator( + self, + idx: Union[np.ndarray, None] = None, + batch_size: int = 1, + obs_keys: List[str] = [], + return_dense: bool = True, + randomized_batch_access: bool = False, + ) -> iter: + """ + Yields an unbiased generator over observations in the contained data sets. + + :param idx: Global idx to query from store. These is an array with indicies corresponding to a contiuous index + along all observations in self.adata_by_key, ordered along a hypothetical concatenation along the keys of + self.adata_by_key. + :param batch_size: Number of observations read from disk in each batched access (generator invocation). + :param obs_keys: .obs columns to return in the generator. These have to be a subset of the columns available + in self.adata_by_key. + :param return_dense: Whether to force return count data .X as dense batches. This allows more efficient feature + indexing if the store is sparse (column indexing on csr matrices is slow). + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. + :return: Generator function which yields batch_size at every invocation. + The generator returns a tuple of (.X, .obs) with types: + + - if store format is h5ad: (Union[scipy.sparse.csr_matrix, np.ndarray], pandas.DataFrame) + """ + idx, var_idx = self._generator_helper(idx=idx) + + def generator(): + adatas_sliced_subset = self.adata_by_key_subset(idx=idx) + key_batch_starts_ends = [] # List of tuples of data set key and (start, end) index set of batches. + for k, adata in adatas_sliced_subset.items(): + n_obs = adata.shape[0] + if n_obs > 0: # Skip data objects without matched cells. + # Cells left over after batching to batch size, accounting for overhang: + remainder = n_obs % batch_size + key_batch_starts_ends_k = [ + (k, (int(x * batch_size), int(np.minimum((x * batch_size) + batch_size, n_obs)))) + for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) + ] + assert np.sum([v2 - v1 for k, (v1, v2) in key_batch_starts_ends_k]) == n_obs + key_batch_starts_ends.extend(key_batch_starts_ends_k) + batch_range = np.arange(0, len(key_batch_starts_ends)) + if randomized_batch_access: + np.random.shuffle(batch_range) + for i in batch_range: + k, (s, e) = key_batch_starts_ends[i] + x, obs = access_helper(adata=adatas_sliced_subset[k], s=s, e=e, j=var_idx, return_dense=return_dense, + obs_keys=obs_keys) + yield x, obs + + return generator + + def adata_by_key_subset(self, idx: Union[np.ndarray, list]) -> Dict[str, anndata.AnnData]: + """ + Subsets adata_by_key as if it was one object, ie behaves the same way as self.adata[idx] without explicitly + concatenating. + """ + if idx is not None: + idx = self._validate_idx(idx) + indices_subsetted = {} + counter = 0 + for k, v in self.indices.items(): + n_obs_k = len(v) + indices_global = np.arange(counter, counter + n_obs_k) + indices_subset_k = [x for x, y in zip(v, indices_global) if y in idx] + if len(indices_subset_k) > 0: + indices_subsetted[k] = indices_subset_k + counter += n_obs_k + assert counter == self.n_obs + return dict([(k, self._adata_by_key[k][v, :]) for k, v in indices_subsetted.items()]) + else: + return self.adata_sliced + + def get_subset_idx_global(self, attr_key, values: Union[str, List[str], None] = None, + excluded_values: Union[str, List[str], None] = None) -> np.ndarray: """ Get indices of subset list of adata objects based on cell-wise properties treating instance as single array. @@ -329,13 +547,15 @@ def subset_cells_idx_global(self, attr_key, values: Union[str, List[str]]) -> np :return Index vector """ # Get indices of of cells in target set by file. - idx_by_dataset = self._get_subset_idx(attr_key=attr_key, values=values) + idx_by_dataset = self.get_subset_idx(attr_key=attr_key, values=values, excluded_values=excluded_values) # Translate file-wise indices into global index list across all data sets. idx = [] counter = 0 - for k, v in idx_by_dataset.items(): - idx.extend((v + counter).tolist()) - counter += self.adatas[k].n_obs + for k, v in self.indices.items(): + for x in v: + if k in idx_by_dataset.keys() and x in idx_by_dataset[k]: + idx.append(counter) + counter += 1 return np.asarray(idx) @property @@ -343,6 +563,11 @@ def indices_global(self) -> dict: """ Increasing indices across data sets which can be concatenated into a single index vector with unique entries for cells. + + E.g.: For two data sets of 10 cells each, the return value would be {A:[0..9], B:[10..19]}. + Note that this operates over pre-selected indices, if this store was subsetted before resulting in only the + second half B to be kept, the return value would be {A:[0..9], B:[10..14]}, where .indices would be + {A:[0..9], B:[15..19]}. """ counter = 0 indices = {} @@ -351,65 +576,163 @@ def indices_global(self) -> dict: counter += len(v) return indices - def write_config(self, fn: Union[str, os.PathLike]): - """ - Writes a config file that describes the current data sub-setting. - This config file can be loaded later to recreate a sub-setting. - This config file contains observation-wise subsetting information. +class DistributedStoreDao(DistributedStoreBase): - :param fn: Output file without file type extension. + def __init__(self, cache_path: Union[str, os.PathLike], columns: Union[None, List[str]] = None): """ - with open(fn + '.pickle', 'wb') as f: - pickle.dump(self.indices, f) - def load_config(self, fn: Union[str, os.PathLike]): + :param cache_path: Store directory. + :param columns: Which columns to read into the obs copy in the output, see pandas.read_parquet(). """ - Load a config file and recreates a data sub-setting. - This config file contains observation-wise subsetting information. - - :param fn: Output file without file type extension. - """ - with open(fn, 'rb') as f: - self.indices = pickle.load(f) - # Subset to described data sets: - for x in self.indices.keys(): - if x not in self.adatas.keys(): - raise ValueError(f"did not find object with name {x} in currently loaded universe") - # Only retain data sets with which are mentioned in config file. - self.subset(attr_key="id", values=list(self.indices.keys())) + # Collect all data loaders from files in directory: + adata_by_key = {} + indices = {} + for f in os.listdir(cache_path): + adata = None + trial_path = os.path.join(cache_path, f) + if os.path.isdir(trial_path): + # zarr-backed anndata are saved as directories with the elements of the array group as further sub + # directories, e.g. a directory called "X", and a file ".zgroup" which identifies the zarr group. + if [".zgroup" in os.listdir(trial_path)]: + adata = read_dao(trial_path, use_dask=True, columns=columns, obs_separate=False) + print(f"Discovered {f} as zarr group, " + f"sized {round(sys.getsizeof(adata) / np.power(1024, 2), 1)}MB") + if adata is not None: + adata_by_key[adata.uns["id"]] = adata + indices[adata.uns["id"]] = np.arange(0, adata.n_obs) + self._x_as_dask = True + super(DistributedStoreDao, self).__init__(adata_by_key=adata_by_key, indices=indices, obs_by_key=None) @property - def var_names(self): - var_names = self.__validate_feature_space_homogeneity() - # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. - if self.genome_container is None: - return var_names - else: - return self.genome_container.ensembl + def X(self) -> Union[dask.array.Array]: + assert np.all([isinstance(self._adata_by_key[k].X, dask.array.Array) for k in self.indices.keys()]) + return dask.array.vstack([ + self._adata_by_key[k].X[v, :] + for k, v in self.indices.items() + ]) @property - def n_vars(self): - var_names = self.__validate_feature_space_homogeneity() - # Use feature space sub-selection based on assembly if provided, will use full feature space otherwise. - if self.genome_container is None: - return len(var_names) - else: - return self.genome_container.n_var + def obs(self) -> Union[pd.DataFrame]: + """ + Assemble .obs table of subset of selected data. - @property - def n_obs(self): - return np.sum([len(v) for v in self.indices.values()]) + Resulting index is increasing integers starting with zero. - @property - def shape(self): - return [self.n_obs, self.n_vars] + :return: .obs data frame. + """ + # TODO Using loc indexing here instead of iloc, this might be faster on larger tables? + return pd.concat([ + self.adata_by_key[k].obs.loc[self.adata_by_key[k].obs.index[v], :] + for k, v in self.indices.items() + ], axis=0, join="inner", ignore_index=True, copy=False) - @property - def obs(self) -> pd.DataFrame: + def n_counts(self, idx: Union[np.ndarray, list, None] = None) -> np.ndarray: """ - Assemble .obs table of subset of full data. + Compute sum over features for each observation in index. - :return: .obs data frame. + :param idx: Index vector over observations in object. + :return: Array with sum per observations: (number of observations in index,) """ - return pd.concat([v.obs for v in self.adatas.values()], axis=0) + return np.asarray(self.X.sum(axis=1)).flatten() + + def generator( + self, + idx: Union[np.ndarray, None] = None, + batch_size: int = 1, + obs_keys: List[str] = [], + return_dense: bool = True, + randomized_batch_access: bool = False, + random_access: bool = False, + ) -> iter: + """ + Yields an unbiased generator over observations in the contained data sets. + + :param idx: Global idx to query from store. These is an array with indicies corresponding to a contiuous index + along all observations in self.adata_by_key, ordered along a hypothetical concatenation along the keys of + self.adata_by_key. + :param batch_size: Number of observations read from disk in each batched access (generator invocation). + :param obs_keys: .obs columns to return in the generator. These have to be a subset of the columns available + in self.adata_by_key. + :param return_dense: Whether to force return count data .X as dense batches. This allows more efficient feature + indexing if the store is sparse (column indexing on csr matrices is slow). + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. + Do not use randomized_batch_access and random_access. + :param random_access: Whether to fully shuffle observations before batched access takes place. May + slow down access compared randomized_batch_access and to no randomization. + Do not use randomized_batch_access and random_access. + :return: Generator function which yields batch_size at every invocation. + The generator returns a tuple of (.X, .obs) with types: + + - if store format is h5ad: (Union[scipy.sparse.csr_matrix, np.ndarray], pandas.DataFrame) + """ + idx, var_idx = self._generator_helper(idx=idx) + if randomized_batch_access and random_access: + raise ValueError("Do not use randomized_batch_access and random_access.") + + def generator(): + # Can treat full data set as a single array because dask keeps expression data and obs out of memory. + x = self.X[idx, :] + obs = self.obs.iloc[idx, :] + # Redefine index so that .loc indexing can be used instead of .iloc indexing: + obs.index = np.arange(0, obs.shape[0]) + n_obs = x.shape[0] + remainder = n_obs % batch_size + assert n_obs == obs.shape[0] + batch_starts_ends = [ + (int(x * batch_size), int(np.minimum((x * batch_size) + batch_size, n_obs))) + for x in np.arange(0, n_obs // batch_size + int(remainder > 0)) + ] + batch_range = np.arange(0, len(batch_starts_ends)) + if randomized_batch_access: + np.random.shuffle(batch_range) + epoch_indices = np.arange(0, n_obs) + if random_access: + np.random.shuffle(epoch_indices) + for i in batch_range: + s, e = batch_starts_ends[i] + # Feature indexing: Run in same operation as observation index so that feature chunking can be + # efficiently used if available. TODO does this make a difference in dask? + if random_access: + if var_idx is not None: + x_i = x[epoch_indices[s:e], var_idx] + else: + x_i = x[epoch_indices[s:e], :] + else: + # Use slicing because observations accessed in batch are ordered in data set: + # Note that epoch_indices[i] == i if not random_access. + if var_idx is not None: + x_i = x[s:e, var_idx] + else: + x_i = x[s:e, :] + # Exploit fact that index of obs is just increasing list of integers, so we can use the .loc[] indexing + # instead of .iloc[]: + obs_i = obs[obs_keys].loc[epoch_indices[s:e].tolist(), :] + yield x_i, obs_i + + return generator + + +def load_store(cache_path: Union[str, os.PathLike], store_format: str = "dao", + columns: Union[None, List[str]] = None) -> Union[DistributedStoreH5ad, DistributedStoreDao]: + """ + Instantiates a distributed store class. + + :param cache_path: Store directory. + :param store_format: Format of store {"h5ad", "dao"}. + + - "h5ad": Returns instance of DistributedStoreH5ad. + - "dao": Returns instance of DistributedStoreDoa (distributed access optimized). + :param columns: Which columns to read into the obs copy in the output, see pandas.read_parquet(). + Only relevant if store_format is "dao". + :return: Instances of a distributed store class. + """ + if store_format == "h5ad": + return DistributedStoreH5ad(cache_path=cache_path) + elif store_format == "dao": + return DistributedStoreDao(cache_path=cache_path, columns=columns) + else: + raise ValueError(f"Did not recognize store_format {store_format}.") diff --git a/sfaira/data/base/io_dao.py b/sfaira/data/base/io_dao.py new file mode 100644 index 000000000..87402d1be --- /dev/null +++ b/sfaira/data/base/io_dao.py @@ -0,0 +1,133 @@ +import anndata +import dask.array +import dask.dataframe +import numpy as np +import os +import pandas as pd +from pathlib import Path +import pickle +import scipy.sparse +from typing import List, Tuple, Union +import zarr + + +def _buffered_path(path_base, path, fn): + path_base = os.path.join(path_base, path) + if not os.path.exists(path_base): + os.makedirs(path_base) + return os.path.join(path_base, fn) + + +def path_obs(path): + return _buffered_path(path_base=path, path="parquet", fn="obs.parquet") + + +def path_var(path): + return _buffered_path(path_base=path, path="parquet", fn="var.parquet") + + +def path_uns(path): + return _buffered_path(path_base=path, path="pickle", fn="uns.pickle") + + +def path_x(path): + if not os.path.exists(path): + os.makedirs(path) + return os.path.join(path, "zarr") + + +def write_dao(store: Union[str, Path], adata: anndata.AnnData, chunks: Union[bool, Tuple[int, int]], + compression_kwargs: dict): + """ + Writes a distributed access optimised ("dao") store of a dataset based on an AnnData instance. + + The following components are saved: + - .X: as zarr array which can be interfaced with zarr or dask (or xarray). + - .obs: as parquet table which can be interfaced with pandas.DataFrame (and dask.dataframe.DataFrame). + - .var: as parquet table which can be interfaced with pandas.DataFrame (and dask.dataframe.DataFrame). + - .uns: as a pickle to be flexible with values here. + + TODO: If layers become relevant for this store, they can be added into the zarr group. + TODO: If obsp, varp become relevant for this store, they can be added into the zarr group. + TODO: If obsm, varm become relevant for this store, they can be added into the zarr group. + + :param store: File name of the store (zarr group). + :param adata: Anndata to save. + :param chunks: Chunking of .X for zarr. + :param compression_kwargs: Compression kwargs for zarr. + """ + # Write numeric matrix as zarr array: + f = zarr.open(store=path_x(store), mode="w") + # If adata.X is already a dense array in memory, it can be safely written fully to a zarr array. Otherwise: + # Create empty store and then write in dense chunks to avoid having to load entire adata.X into a dense array in + # memory. + if isinstance(adata.X, np.ndarray) or isinstance(adata.X, np.matrix): + f.create_dataset("X", data=adata.X.todense(), chunks=chunks, dtype=adata.X.dtype, **compression_kwargs) + elif isinstance(adata.X, scipy.sparse.csr_matrix): + # Initialise empty array + dtype = adata.X.data.dtype + shape = adata.X.shape + f.create_dataset("X", shape=shape, dtype=dtype, fill_value=0., chunks=chunks, **compression_kwargs) + batch_size = 128 # Use a batch size that guarantees that the dense batch fits easily into memory. + n_batches = shape[0] // batch_size + int(shape[0] % batch_size > 0) + batches = [(i * batch_size, min(i * batch_size + batch_size, shape[0])) for i in range(n_batches)] + for s, e in batches: + f["X"][s:e, :] = np.asarray(adata.X[s:e, :].todense(), dtype=dtype) + else: + raise ValueError(f"did not recognise array format {type(adata.X)}") + # Write .uns into pickle: + with open(path_uns(store), "wb") as f: + pickle.dump(obj=adata.uns, file=f) + # Write .obs and .var as a separate file as this can be easily interfaced with DataFrames. + adata.obs.to_parquet(path=path_obs(store), engine='pyarrow', compression='snappy', index=None) + adata.var.to_parquet(path=path_var(store), engine='pyarrow', compression='snappy', index=None) + + +def read_dao(store: Union[str, Path], use_dask: bool = True, columns: Union[None, List[str]] = None, + obs_separate: bool = False) -> \ + Union[Tuple[anndata.AnnData, Union[dask.dataframe.DataFrame, pd.DataFrame]], anndata.AnnData]: + """ + Assembles an AnnData instance based on distributed access optimised ("dao") store of a dataset. + + See write_distributed_access_optimised() for the expected format of the store. + In particular, if use_dask is True: + + - .X is interfaced as a dask Array + + Can return representation of .obs separately, which makes sense if a HPC framework is used for this tabular format + which is not supported by AnnData as an .obs entry. + + :param store: Path to zarr group. + :param use_dask: Whether to use lazy dask arrays where appropriate. + :param columns: Which columns to read into the obs copy in the output, see pandas.read_parquet(). + :param obs_separate: Whether to return .obs as a separate return value or in the returned AnnData. + :return: Tuple of: + - AnnData with .X as dask array. + - obs table separately as dataframe + """ + if use_dask: + x = dask.array.from_zarr(url=path_x(store), component="X") + else: + f = zarr.open(path_x(store), mode="r") + x = f["X"] # Select member of group. + x = x[...] # Load into memory. + # Read pickle: + with open(path_uns(store), "rb") as f: + uns = pickle.load(file=f) + # Read tables: + obs = pd.read_parquet(path_obs(store), columns=columns, engine="pyarrow") + var = pd.read_parquet(path_var(store), engine="pyarrow") + # Convert to categorical variables where possible to save memory: + for k, dtype in zip(list(obs.columns), obs.dtypes): + if dtype == "object": + obs[k] = obs[k].astype(dtype="category") + d = {"var": var, "uns": uns} + # Assemble AnnData without obs to save memory: + adata = anndata.AnnData(**d, shape=x.shape) + # Need to add these attributes after initialisation so that they are not evaluated: + adata.X = x + if obs_separate: + return adata, obs + else: + adata.obs = obs + return adata diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py index f2970cf7d..b4cd6daff 100644 --- a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py +++ b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py @@ -26,8 +26,8 @@ def __init__( self._adata_ids_cellxgene = AdataIdsCellxgene() self.fn = fn - self.cellontology_class_obs_key = self._adata_ids_cellxgene.cell_ontology_class - self.cellontology_id_obs_key = self._adata_ids_cellxgene.cell_ontology_id + self.cellontology_class_obs_key = self._adata_ids_cellxgene.cellontology_class + self.cellontology_id_obs_key = self._adata_ids_cellxgene.cellontology_id self.cell_types_original_obs_key = self._adata_ids_cellxgene.cell_types_original self.development_stage_obs_key = self._adata_ids_cellxgene.development_stage self.disease_obs_key = self._adata_ids_cellxgene.disease diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index b0c668d92..cba511374 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -38,7 +38,8 @@ def __init__(self, **kwargs): self.year = 2019 self.sample_source = "primary_tissue" - self.gene_id_symbols_var_key = "index" + self.gene_id_symbols_var_key = "names" + self.gene_id_ensembl_var_key = "ensembl" self.cell_types_original_obs_key = "celltypes" self.set_dataset_id(idx=1) diff --git a/sfaira/data/utils_scripts/create_anatomical_configs.py b/sfaira/data/utils_scripts/create_anatomical_configs.py deleted file mode 100644 index 294563e9d..000000000 --- a/sfaira/data/utils_scripts/create_anatomical_configs.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -import sys -import tensorflow as tf - -# Any data loader here to extract path: -from sfaira.data import Universe - -print(tf.__version__) - -# Set global variables. -print("sys.argv", sys.argv) - -config_path = str(sys.argv[1]) - - -def clean(s): - if s is not None: - s = s.replace(' ', '').replace('-', '').replace('_', '').replace("'", '').lower() - return s - - -configs_to_write = { - "human": [ - "adipose tissue", - "adrenal gland", - "artery", - "blood", - "bone marrow", - "brain", - "chorionic villus", - "diaphragm", - "esophagus", - "eye", - "gall bladder", - "heart", - "intestine", - "kidney", - "liver", - "lung", - "muscle organ", - "ovary", - "pancreas", - "placenta", - "pleura", - "prostate gland", - "rib", - "skeleton", - "skin of body", - "spinal cord", - "spleen", - "stomach", - "testis", - "tongue", - "thymus", - "thyroid gland", - "trachea", - "ureter", - "urinary bladder", - "uterine cervix", - "uterus", - "vault of skull", - ], - "mouse": [ - "adipose tissue", - "blood", - "bone marrow", - "brain", - "diaphragm", - "heart", - "intestine", - "kidney", - "liver", - "lung", - "mammary gland", - "muscle organ", - "ovary", - "pancreas", - "placenta", - "prostate gland", - "skin of body", - "spleen", - "stomach", - "testis", - "thymus", - "tongue", - "trachea", - "urinary bladder", - "uterus", - ] -} - -for organism, organs in configs_to_write.items(): - for organ in organs: - print(f"Writing {organism} {organ}") - dsgs = Universe( - data_path=".", - meta_path=".", - cache_path="." - ) - dsgs.subset(key="organism", values=[organism]) - dsgs.subset(key="organ", values=[organ]) - dsgs.write_config(os.path.join(config_path, f"config_{clean(organism)}_{clean(organ)}.csv")) diff --git a/sfaira/data/utils_scripts/create_anatomical_configs_store.py b/sfaira/data/utils_scripts/create_anatomical_configs_store.py index 925a1e23b..cb2e2ce36 100644 --- a/sfaira/data/utils_scripts/create_anatomical_configs_store.py +++ b/sfaira/data/utils_scripts/create_anatomical_configs_store.py @@ -3,7 +3,7 @@ import tensorflow as tf # Any data loader here to extract path: -from sfaira.data import DistributedStore, clean_string +from sfaira.data import load_store, clean_string print(tf.__version__) @@ -49,7 +49,6 @@ "trachea", "ureter", "urinary bladder", - "uterine cervix", "uterus", "vault of skull", ], @@ -85,7 +84,8 @@ for organism, organs in configs_to_write.items(): for organ in organs: print(f"Writing {organism} {organ}") - store = DistributedStore(cache_path=store_path) + store = load_store(cache_path=store_path) + store.subset(attr_key="sample_source", values=["primary_tissue"]) store.subset(attr_key="organism", values=[organism]) store.subset(attr_key="organ", values=[organ]) store.write_config(os.path.join(config_path, f"config_{clean_string(organism)}_{clean_string(organ)}")) diff --git a/sfaira/data/utils_scripts/create_meta.py b/sfaira/data/utils_scripts/create_meta.py index 97930cdaf..0f227f9ce 100644 --- a/sfaira/data/utils_scripts/create_meta.py +++ b/sfaira/data/utils_scripts/create_meta.py @@ -22,8 +22,6 @@ def write_meta(args0, args1): ) ds.load( annotated_only=False, - match_to_reference=None, - remove_gene_version=True, load_raw=True, allow_caching=False, processes=processes, diff --git a/sfaira/data/utils_scripts/create_target_universes.py b/sfaira/data/utils_scripts/create_target_universes.py index d75c0c80a..ec26c17da 100644 --- a/sfaira/data/utils_scripts/create_target_universes.py +++ b/sfaira/data/utils_scripts/create_target_universes.py @@ -1,10 +1,10 @@ -import numpy as np import os import sys -import tensorflow as tf # Any data loader here to extract path: -from sfaira.data import DistributedStore +from sfaira.consts import OCS +from sfaira.data import load_store +from sfaira.versions.metadata import CelltypeUniverse, OntologyCl # Set global variables. @@ -15,6 +15,7 @@ out_path = str(sys.argv[3]) col_name_annot = "cell_ontology_class" +cl_branch = "v2021-02-01" for f in os.listdir(config_path): fn = os.path.join(config_path, f) @@ -24,7 +25,7 @@ print(f"Writing target universe for {f}") organism = f.split("_")[1] organ = f.split("_")[2].split(".")[0] - store = DistributedStore(cache_path=store_path) + store = load_store(cache_path=store_path) store.load_config(fn=fn) celltypes_found = set([]) for k, idx in store.indices.items(): @@ -40,8 +41,12 @@ if len(celltypes_found) == 0: print(f"WARNING: No cells found for {organism} {organ}, skipping.") else: - celltypes_found = store.celltypes_universe.onto_cl.get_effective_leaves(x=celltypes_found) - store.celltypes_universe.write_target_universe( + celltypes_universe = CelltypeUniverse( + cl=OntologyCl(branch=cl_branch), + uberon=OCS.organ, + ) + celltypes_found = celltypes_universe.onto_cl.get_effective_leaves(x=celltypes_found) + celltypes_universe.write_target_universe( fn=os.path.join(out_path, f"targets_{organism}_{organ}.csv"), x=celltypes_found, ) diff --git a/sfaira/data/utils_scripts/write_store.py b/sfaira/data/utils_scripts/write_store.py index f99e13c73..9b3f319f5 100644 --- a/sfaira/data/utils_scripts/write_store.py +++ b/sfaira/data/utils_scripts/write_store.py @@ -9,11 +9,29 @@ path_meta = str(sys.argv[2]) path_cache = str(sys.argv[3]) path_store = str(sys.argv[4]) +store_type = str(sys.argv[5]).lower() + +# On disk format hyperparameters +if store_type == "h5ad": + # Write sparse arrays in h5ad. + kwargs = {"dense": False} + compression_kwargs = {} +elif store_type == "dao": + # Write dense arrays in zarr. + kwargs = {"dense": True, "chunks": 128} + compression_kwargs = {"compressor": "default", "overwrite": True, "order": "C"} +else: + assert False, store_type universe = sfaira.data.dataloaders.Universe(data_path=data_path, meta_path=path_meta, cache_path=path_cache) for k, ds in universe.datasets.items(): - fn_store = os.path.join(path_store, ds.doi_cleaned_id + ".h5ad") + if store_type == "h5ad": + fn_store = os.path.join(path_store, ds.doi_cleaned_id + ".h5ad") + elif store_type == "dao": + fn_store = os.path.join(path_store, ds.doi_cleaned_id) + else: + assert False if os.path.exists(fn_store): print(f"SCRIPT skipping {k}") else: @@ -27,7 +45,8 @@ match_to_reference={"human": "Homo_sapiens.GRCh38.102", "mouse": "Mus_musculus.GRCm38.102"}, subset_genes_to_type="protein_coding" ) - ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=True, clean_var=True, clean_uns=True, + ds.streamline_metadata(schema="sfaira", uns_to_obs=True, clean_obs=True, clean_var=True, clean_uns=True, clean_obs_names=True) - ds.write_distributed_store(dir_cache=path_store, store="h5ad", dense=False) + ds.write_distributed_store(dir_cache=path_store, store_format=store_type, compression_kwargs=compression_kwargs, + **kwargs) ds.clear() diff --git a/sfaira/estimators/keras.py b/sfaira/estimators/keras.py index 297851c30..2ba21fbf5 100644 --- a/sfaira/estimators/keras.py +++ b/sfaira/estimators/keras.py @@ -2,7 +2,6 @@ import anndata import hashlib import numpy as np -import pandas as pd import scipy.sparse try: import tensorflow as tf @@ -13,13 +12,13 @@ import warnings from tqdm import tqdm -from sfaira.consts import AdataIdsSfaira, OCS -from sfaira.data import DistributedStore +from sfaira.consts import AdataIdsSfaira, OCS, AdataIds +from sfaira.data import DistributedStoreBase from sfaira.models import BasicModelKeras -from sfaira.versions.metadata import CelltypeUniverse, OntologyCl +from sfaira.versions.metadata import CelltypeUniverse, OntologyCl, OntologyObo from sfaira.versions.topologies import TopologyContainer from .losses import LossLoglikelihoodNb, LossLoglikelihoodGaussian, LossCrossentropyAgg, KLLoss -from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, custom_kl, \ +from .metrics import custom_mse, custom_negll_nb, custom_negll_gaussian, \ CustomAccAgg, CustomF1Classwise, CustomFprClasswise, CustomTprClasswise, custom_cce_agg @@ -41,7 +40,7 @@ class EstimatorKeras: """ Estimator base class for keras models. """ - data: Union[anndata.AnnData, DistributedStore] + data: Union[anndata.AnnData, DistributedStoreBase] model: Union[BasicModelKeras, None] topology_container: Union[TopologyContainer, None] model_id: Union[str, None] @@ -52,16 +51,18 @@ class EstimatorKeras: idx_train: Union[np.ndarray, None] idx_eval: Union[np.ndarray, None] idx_test: Union[np.ndarray, None] + adata_ids: AdataIds def __init__( self, - data: Union[anndata.AnnData, np.ndarray, DistributedStore], + data: Union[anndata.AnnData, np.ndarray, DistributedStoreBase], model_dir: Union[str, None], model_class: str, model_id: Union[str, None], model_topology: TopologyContainer, weights_md5: Union[str, None] = None, - cache_path: str = os.path.join('cache', '') + cache_path: str = os.path.join('cache', ''), + adata_ids: AdataIds = AdataIdsSfaira() ): self.data = data self.model = None @@ -70,7 +71,7 @@ def __init__( self.model_class = model_class self.topology_container = model_topology # Prepare store with genome container sub-setting: - if isinstance(self.data, DistributedStore): + if isinstance(self.data, DistributedStoreBase): self.data.genome_container = self.topology_container.gc self.history = None @@ -80,7 +81,7 @@ def __init__( self.idx_test = None self.md5 = weights_md5 self.cache_path = cache_path - self._adata_ids = AdataIdsSfaira() + self._adata_ids = adata_ids @property def model_type(self): @@ -115,9 +116,10 @@ def load_pretrained_weights(self): fn = os.path.join(self.cache_path, f"{self.model_id}_weights.h5") except HTTPError: try: - urllib.request.urlretrieve(urljoin(self.model_dir, f'{self.model_id}_weights.data-00000-of-00001'), - os.path.join(self.cache_path, f'{self.model_id}_weights.data-00000-of-00001') - ) + urllib.request.urlretrieve( + urljoin(self.model_dir, f'{self.model_id}_weights.data-00000-of-00001'), + os.path.join(self.cache_path, f'{self.model_id}_weights.data-00000-of-00001') + ) fn = os.path.join(self.cache_path, f"{self.model_id}_weights.data-00000-of-00001") except HTTPError: raise FileNotFoundError('cannot find remote weightsfile') @@ -166,8 +168,8 @@ def init_model(self, clear_weight_cache=True, override_hyperpar=None): file_path = os.path.join(os.path.join(self.cache_path, 'weights'), file) os.remove(file_path) + @staticmethod def _assert_md5_sum( - self, fn, target_md5 ): @@ -186,7 +188,9 @@ def _get_dataset( cache_full: bool, weighted: bool, retrieval_batch_size: int, - ): + randomized_batch_access: bool, + prefetch: Union[int, None], + ) -> tf.data.Dataset: pass def _get_class_dict( @@ -232,11 +236,10 @@ def _prepare_data_matrix(self, idx: Union[np.ndarray, None]) -> scipy.sparse.csr x = x[idx, :] # If the feature space is already mapped to the right reference, return the data matrix immediately - if 'mapped_features' in self.data.uns_keys(): - if self.data.uns[self._adata_ids.mapped_features] == \ - self.topology_container.gc.assembly: - print(f"found {x.shape[0]} observations") - return x + if self._adata_ids.mapped_features in self.data.uns_keys() and \ + self.data.uns[self._adata_ids.mapped_features] == self.topology_container.gc.assembly: + print(f"found {x.shape[0]} observations") + return x # Compute indices of genes to keep data_ids = self.data.var[self._adata_ids.gene_id_ensembl].values.tolist() @@ -271,6 +274,54 @@ def _compile_models( metrics=self._metrics() ) + def split_train_val_test(self, val_split: float, test_split: Union[float, dict]): + # Split training and evaluation data. + np.random.seed(1) + all_idx = np.arange(0, self.data.n_obs) # n_obs is both a property of AnnData and DistributedStoreBase + if isinstance(test_split, float) or isinstance(test_split, int): + self.idx_test = np.sort(np.random.choice( + a=all_idx, + size=round(self.data.n_obs * test_split), + replace=False, + )) + elif isinstance(test_split, dict): + in_test = np.ones((self.data.n_obs,), dtype=int) == 1 + for k, v in test_split.items(): + if isinstance(v, bool) or isinstance(v, int) or isinstance(v, list): + v = [v] + if isinstance(self.data, anndata.AnnData): + if k not in self.data.obs.columns: + raise ValueError(f"Did not find column {k} used to define test set in self.data.") + in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) + elif isinstance(self.data, DistributedStoreBase): + idx = self.data.get_subset_idx_global(attr_key=k, values=v) + in_test_k = np.ones((self.data.n_obs,), dtype=int) == 0 + in_test_k[idx] = True + in_test = np.logical_and(in_test, in_test_k) + else: + assert False + self.idx_test = np.sort(np.where(in_test)[0]) + print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") + print(self.idx_test) + else: + raise ValueError("type of test_split %s not recognized" % type(test_split)) + idx_train_eval = np.array([x for x in all_idx if x not in self.idx_test]) + np.random.seed(1) + self.idx_eval = np.sort(np.random.choice( + a=idx_train_eval, + size=round(len(idx_train_eval) * val_split), + replace=False + )) + self.idx_train = np.sort([x for x in idx_train_eval if x not in self.idx_eval]) + + # Check that none of the train, test, eval partitions are empty + if not len(self.idx_test): + warnings.warn("Test partition is empty!") + if not len(self.idx_eval): + raise ValueError("The evaluation dataset is empty.") + if not len(self.idx_train): + raise ValueError("The train dataset is empty.") + def train( self, optimizer: str, @@ -282,12 +333,15 @@ def train( test_split: Union[float, dict] = 0., validation_batch_size: int = 256, max_validation_steps: Union[int, None] = 10, - cache_full: bool = False, patience: int = 20, lr_schedule_min_lr: float = 1e-5, lr_schedule_factor: float = 0.2, lr_schedule_patience: int = 5, - shuffle_buffer_size: int = int(1e4), + shuffle_buffer_size: Union[int, None] = None, + cache_full: bool = False, + randomized_batch_access: bool = True, + retrieval_batch_size: int = 512, + prefetch: Union[int, None] = 1, log_dir: Union[str, None] = None, callbacks: Union[list, None] = None, weighted: bool = False, @@ -314,6 +368,11 @@ def train( when plateau is reached. :param lr_schedule_patience: Patience for learning rate reduction in learning rate reduction schedule. :param shuffle_buffer_size: tf.Dataset.shuffle(): buffer_size argument. + :param cache_full: Whether to use tensorflow caching on full training and validation data. + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. :param log_dir: Directory to save tensorboard callback to. Disabled if None. :param callbacks: Add additional callbacks to the training call :param weighted: @@ -385,60 +444,36 @@ def train( # callbacks needs to be a list cbs += callbacks - # Split training and evaluation data. - np.random.seed(1) - all_idx = np.arange(0, self.data.n_obs) # n_obs is both a property of AnnData and DistributedStore - if isinstance(test_split, float) or isinstance(test_split, int): - self.idx_test = np.random.choice( - a=all_idx, - size=round(self.data.n_obs * test_split), - replace=False, - ) - elif isinstance(test_split, dict): - in_test = np.ones((self.data.n_obs,), dtype=int) == 1 - for k, v in test_split.items(): - if isinstance(v, list): - in_test = np.logical_and(in_test, np.array([x in v for x in self.data.obs[k].values])) - else: - in_test = np.logical_and(in_test, self.data.obs[k].values == v) - self.idx_test = np.where(in_test)[0] - print(f"Found {len(self.idx_test)} out of {self.data.n_obs} cells that correspond to held out data set") - print(self.idx_test) - else: - raise ValueError("type of test_split %s not recognized" % type(test_split)) - idx_train_eval = np.array([x for x in all_idx if x not in self.idx_test]) - np.random.seed(1) - self.idx_eval = np.random.choice( - a=idx_train_eval, - size=round(len(idx_train_eval) * validation_split), - replace=False - ) - self.idx_train = np.array([x for x in idx_train_eval if x not in self.idx_eval]) - - # Check that none of the train, test, eval partitions are empty - if not len(self.idx_test): - warnings.warn("Test partition is empty!") - if not len(self.idx_eval): - raise ValueError("The evaluation dataset is empty.") - if not len(self.idx_train): - raise ValueError("The train dataset is empty.") - + # Check randomisation settings: + if shuffle_buffer_size is not None and shuffle_buffer_size > 0 and randomized_batch_access: + raise ValueError("You are using shuffle_buffer_size and randomized_batch_access, this is likely not " + "intended.") + if cache_full and randomized_batch_access: + raise ValueError("You are using cache_full and randomized_batch_access, this is likely not intended.") + self.split_train_val_test(val_split=validation_split, test_split=test_split) self._compile_models(optimizer=optim) + shuffle_buffer_size = shuffle_buffer_size if shuffle_buffer_size is not None else 0 train_dataset = self._get_dataset( idx=self.idx_train, batch_size=batch_size, + retrieval_batch_size=retrieval_batch_size, mode='train', shuffle_buffer_size=min(shuffle_buffer_size, len(self.idx_train)), weighted=weighted, cache_full=cache_full, + randomized_batch_access=randomized_batch_access, + prefetch=prefetch, ) eval_dataset = self._get_dataset( idx=self.idx_eval, batch_size=validation_batch_size, + retrieval_batch_size=retrieval_batch_size, mode='train_val', shuffle_buffer_size=min(shuffle_buffer_size, len(self.idx_eval)), weighted=weighted, cache_full=cache_full, + randomized_batch_access=randomized_batch_access, + prefetch=prefetch, ) steps_per_epoch = min(max(len(self.idx_train) // batch_size, 1), max_steps_per_epoch) @@ -454,17 +489,9 @@ def train( verbose=verbose ).history - def get_citations(self): - """ - Return papers to cite when using this model. - - :return: - """ - raise NotImplementedError() - @property def using_store(self) -> bool: - return isinstance(self.data, DistributedStore) + return isinstance(self.data, DistributedStoreBase) @property def obs_train(self): @@ -486,12 +513,13 @@ class EstimatorKerasEmbedding(EstimatorKeras): def __init__( self, - data: Union[anndata.AnnData, np.ndarray], + data: Union[anndata.AnnData, np.ndarray, DistributedStoreBase], model_dir: Union[str, None], model_id: Union[str, None], model_topology: TopologyContainer, weights_md5: Union[str, None] = None, - cache_path: str = os.path.join('cache', '') + cache_path: str = os.path.join('cache', ''), + adata_ids: AdataIds = AdataIdsSfaira() ): super(EstimatorKerasEmbedding, self).__init__( data=data, @@ -500,7 +528,8 @@ def __init__( model_id=model_id, model_topology=model_topology, weights_md5=weights_md5, - cache_path=cache_path + cache_path=cache_path, + adata_ids=adata_ids ) def init_model( @@ -548,7 +577,8 @@ def _get_base_generator( self, generator_helper, idx: Union[np.ndarray, None], - batch_size: int = 1, + batch_size: int, + randomized_batch_access: bool, ): """ Yield a basic generator based on which a tf dataset can be built. @@ -559,6 +589,11 @@ def _get_base_generator( - x_sample is a gene expression vector of a cell :param idx: Indicies of data set to include in generator. + :param batch_size: Number of observations read from disk in each batched access. + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. :return: """ if idx is None: @@ -571,6 +606,7 @@ def _get_base_generator( batch_size=batch_size, obs_keys=[], return_dense=True, + randomized_batch_access=randomized_batch_access, ) def generator(): @@ -616,7 +652,9 @@ def _get_dataset( cache_full: bool = False, weighted: bool = False, retrieval_batch_size: int = 128, - ): + randomized_batch_access: bool = False, + prefetch: Union[int, None] = 1, + ) -> tf.data.Dataset: """ :param idx: @@ -624,6 +662,11 @@ def _get_dataset( :param mode: :param shuffle_buffer_size: :param weighted: Whether to use weights. Not implemented for embedding models yet. + :param retrieval_batch_size: Number of observations read from disk in each batched access. + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. :return: """ # Determine model type [ae, vae(iaf, vamp)] @@ -643,6 +686,7 @@ def generator_helper(x_sample): generator_helper=generator_helper, idx=idx, batch_size=retrieval_batch_size, + randomized_batch_access=randomized_batch_access, ) output_types, output_shapes = self._get_output_dim(n_features=n_features, model_type=model_type, mode=mode) dataset = tf.data.Dataset.from_generator( @@ -655,23 +699,26 @@ def generator_helper(x_sample): # Only shuffle in train modes if mode in ['train', 'train_val']: dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(n_samples, shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True) - dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + if shuffle_buffer_size is not None and shuffle_buffer_size > 0: + dataset = dataset.shuffle( + buffer_size=min(n_samples, shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True) + if prefetch is None: + prefetch = tf.data.AUTOTUNE + dataset = dataset.batch(batch_size, drop_remainder=False).prefetch(prefetch) return dataset elif mode == 'gradient_method': # TODO depreceate this code # Prepare data reading according to whether anndata is backed or not: - cell_to_class = self._get_class_dict(obs_key=self._adata_ids.cell_ontology_class) + cell_to_class = self._get_class_dict(obs_key=self._adata_ids.cellontology_class) if self.using_store: n_features = self.data.n_vars generator_raw = self.data.generator( idx=idx, batch_size=1, - obs_keys=["cell_ontology_class"], + obs_keys=[self._adata_ids.cellontology_class], return_dense=True, ) @@ -682,10 +729,12 @@ def generator(): x_sample = x_sample.todense() x_sample = np.asarray(x_sample).flatten() sf_sample = prepare_sf(x=x_sample)[0] - y_sample = z[1]["cell_ontology_class"].values[0] + y_sample = z[1][self._adata_ids.cellontology_class].values[0] yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) elif isinstance(self.data, anndata.AnnData) and self.data.isbacked: + if idx is None: + idx = np.arange(0, self.data.n_obs) n_features = self.data.X.shape[1] def generator(): @@ -693,12 +742,14 @@ def generator(): for i in idx: x_sample = self.data.X[i, :].toarray().flatten() if sparse else self.data.X[i, :].flatten() sf_sample = prepare_sf(x=x_sample)[0] - y_sample = self.data.obs[self._adata_ids.cell_ontology_class][i] + y_sample = self.data.obs[self._adata_ids.cellontology_id][i] yield (x_sample, sf_sample), (x_sample, cell_to_class[y_sample]) else: + if idx is None: + idx = np.arange(0, self.data.n_obs) x = self._prepare_data_matrix(idx=idx) sf = prepare_sf(x=x) - y = self.data.obs[self._adata_ids.cell_ontology_class][idx] + y = self.data.obs[self._adata_ids.cellontology_class].values[idx] # for gradients per celltype in compute_gradients_input() n_features = x.shape[1] @@ -716,7 +767,7 @@ def generator(): buffer_size=shuffle_buffer_size, seed=None, reshuffle_each_iteration=True - ).batch(batch_size).prefetch(tf.data.AUTOTUNE) + ).batch(batch_size, drop_remainder=False).prefetch(prefetch) return dataset @@ -761,7 +812,7 @@ def _metrics(self): return {"neg_ll": [custom_mse, custom_negll]} - def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): + def evaluate_any(self, idx, batch_size: int = 128, max_steps: int = np.inf): """ Evaluate the custom model on any local data. @@ -779,6 +830,7 @@ def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): batch_size=batch_size, mode='eval', retrieval_batch_size=128, + shuffle_buffer_size=0, ) steps = min(max(len(idx) // batch_size, 1), max_steps) results = self.model.training_model.evaluate(x=dataset, steps=steps) @@ -786,7 +838,7 @@ def evaluate_any(self, idx, batch_size: int = 1, max_steps: int = np.inf): else: return {} - def evaluate(self, batch_size: int = 64, max_steps: int = np.inf): + def evaluate(self, batch_size: int = 128, max_steps: int = np.inf): """ Evaluate the custom model on test data. @@ -798,55 +850,58 @@ def evaluate(self, batch_size: int = 64, max_steps: int = np.inf): """ return self.evaluate_any(idx=self.idx_test, batch_size=batch_size, max_steps=max_steps) - def predict(self, batch_size: int = 64, max_steps: int = np.inf): + def predict(self, batch_size: int = 128): """ return the prediction of the model :return: prediction """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None + if self.idx_test is None or self.idx_test.any(): dataset = self._get_dataset( idx=self.idx_test, batch_size=batch_size, mode='predict', retrieval_batch_size=128, + shuffle_buffer_size=0, ) return self.model.predict_reconstructed(x=dataset) else: return np.array([]) - def predict_embedding(self, batch_size: int = 64, max_steps: int = np.inf): + def predict_embedding(self, batch_size: int = 128): """ return the prediction in the latent space (z_mean for variational models) :return: latent space """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None + if self.idx_test is None or self.idx_test.any(): dataset = self._get_dataset( idx=self.idx_test, batch_size=batch_size, mode='predict', retrieval_batch_size=128, + shuffle_buffer_size=0, ) return self.model.predict_embedding(x=dataset, variational=False) else: return np.array([]) - def predict_embedding_variational(self, batch_size: int = 64, max_steps: int = np.inf): + def predict_embedding_variational(self, batch_size: int = 128, max_steps: int = np.inf): """ return the prediction of z, z_mean, z_log_var in the variational latent space :return: sample of latent space, mean of latent space, variance of latent space """ - if self.idx_test is None or self.idx_test: # true if the array is not empty or if the passed value is None + if self.idx_test is None or self.idx_test: dataset = self._get_dataset( idx=self.idx_test, batch_size=batch_size, mode='predict', retrieval_batch_size=128, + shuffle_buffer_size=0, ) return self.model.predict_embedding(x=dataset, variational=True) else: @@ -876,7 +931,7 @@ def compute_gradients_input( ) if per_celltype: - cell_to_id = self._get_class_dict(obs_key=self._adata_ids.cell_ontology_class) + cell_to_id = self._get_class_dict(obs_key=self._adata_ids.cellontology_class) cell_names = cell_to_id.keys() cell_id = cell_to_id.values() id_to_cell = dict([(key, value) for (key, value) in zip(cell_id, cell_names)]) @@ -944,13 +999,16 @@ class EstimatorKerasCelltype(EstimatorKeras): def __init__( self, - data: Union[anndata.AnnData, np.ndarray], + data: Union[anndata.AnnData, DistributedStoreBase], model_dir: Union[str, None], model_id: Union[str, None], model_topology: TopologyContainer, weights_md5: Union[str, None] = None, cache_path: str = os.path.join('cache', ''), - max_class_weight: float = 1e3 + celltype_ontology: Union[OntologyObo, None] = None, + max_class_weight: float = 1e3, + remove_unlabeled_cells: bool = True, + adata_ids: AdataIds = AdataIdsSfaira() ): super(EstimatorKerasCelltype, self).__init__( data=data, @@ -959,17 +1017,41 @@ def __init__( model_id=model_id, model_topology=model_topology, weights_md5=weights_md5, - cache_path=cache_path + cache_path=cache_path, + adata_ids=adata_ids ) + if remove_unlabeled_cells: + # Remove cells without type label from store: + if isinstance(self.data, DistributedStoreBase): + self.data.subset(attr_key="cellontology_class", excluded_values=[ + self._adata_ids.unknown_celltype_identifier, + self._adata_ids.not_a_cell_celltype_identifier, + None, # TODO: it may be possible to remove this in the future + np.nan, # TODO: it may be possible to remove this in the future + ]) + elif isinstance(self.data, anndata.AnnData): + self.data = self.data[np.where([ + x not in [ + self._adata_ids.unknown_celltype_identifier, + self._adata_ids.not_a_cell_celltype_identifier, + None, # TODO: it may be possible to remove this in the future + np.nan, # TODO: it may be possible to remove this in the future + ] for x in self.data.obs[self._adata_ids.cellontology_class].values + ])[0], :] + else: + assert False assert "cl" in self.topology_container.output.keys(), self.topology_container.output.keys() assert "targets" in self.topology_container.output.keys(), self.topology_container.output.keys() self.max_class_weight = max_class_weight + if celltype_ontology is None: + celltype_ontology = OntologyCl(branch=self.topology_container.output["cl"]) self.celltype_universe = CelltypeUniverse( - cl=OntologyCl(branch=self.topology_container.output["cl"]), + cl=celltype_ontology, uberon=OCS.organ, - organism=self.organism, ) - self.celltype_universe.leaves = self.topology_container.output["targets"] + # Set leaves if they are defined in topology: + if self.topology_container.output["targets"] is not None: + self.celltype_universe.onto_cl.leaves = self.topology_container.output["targets"] def init_model( self, @@ -994,34 +1076,37 @@ def init_model( override_hyperpar=override_hyperpar ) - @property - def ids(self): - return self.celltype_universe.leaves - @property def ntypes(self): return self.celltype_universe.onto_cl.n_leaves @property def ontology_ids(self): - return self.celltype_universe.leaves + return self.celltype_universe.onto_cl.convert_to_id(self.celltype_universe.onto_cl.leaves) + + @property + def ontology_names(self): + return self.celltype_universe.onto_cl.convert_to_name(self.celltype_universe.onto_cl.leaves) def _one_hot_encoder(self): + leave_maps = self.celltype_universe.onto_cl.prepare_maps_to_leaves(include_self=True) def encoder(x) -> np.ndarray: if isinstance(x, str): x = [x] + # Encodes unknowns to empty rows. idx = [ - self.celltype_universe.onto_cl.map_to_leaves( - node=y, - return_type="idx", - include_self=True, - ) + leave_maps[y] if y not in [ + self._adata_ids.unknown_celltype_identifier, + self._adata_ids.not_a_cell_celltype_identifier, + ] else np.array([]) for y in x ] oh = np.zeros((len(x), self.ntypes,), dtype="float32") for i, y in enumerate(idx): - oh[i, y] = 1. / len(y) + scale = len(y) + if scale > 0: + oh[i, y] = 1. / scale return oh return encoder @@ -1042,7 +1127,7 @@ def _get_celltype_out( onehot_encoder = self._one_hot_encoder() y = np.concatenate([ onehot_encoder(z) - for z in self.data.obs[self._adata_ids.cell_ontology_class].values[idx].tolist() + for z in self.data.obs[self._adata_ids.cellontology_id].values[idx].tolist() ], axis=0) # Distribute aggregated class weight for computation of weights: freq = np.mean(y / np.sum(y, axis=1, keepdims=True), axis=0, keepdims=True) @@ -1071,10 +1156,11 @@ def _get_output_dim(n_features, n_labels, mode): def _get_base_generator( self, - generator_helper, idx: Union[np.ndarray, None], - weighted: bool = False, - batch_size: int = 1, + yield_labels: bool, + weighted: bool, + batch_size: int, + randomized_batch_access: bool, ): """ Yield a basic generator based on which a tf dataset can be built. @@ -1087,6 +1173,12 @@ def _get_base_generator( - y_sample is a one-hot encoded label vector of a cell - w_sample is a weight scalar of a cell :param idx: Indicies of data set to include in generator. + :param yield_labels: + :param batch_size: Number of observations read from disk in each batched access. + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. :return: """ if idx is None: @@ -1099,10 +1191,12 @@ def _get_base_generator( generator_raw = self.data.generator( idx=idx, batch_size=batch_size, - obs_keys=["cell_ontology_class"], + obs_keys=[self._adata_ids.cellontology_id], return_dense=True, + randomized_batch_access=randomized_batch_access, ) - onehot_encoder = self._one_hot_encoder() + if yield_labels: + onehot_encoder = self._one_hot_encoder() def generator(): for z in generator_raw(): @@ -1110,17 +1204,21 @@ def generator(): if isinstance(x_sample, scipy.sparse.csr_matrix): x_sample = x_sample.todense() x_sample = np.asarray(x_sample) - y_sample = onehot_encoder(z[1]["cell_ontology_class"].values) - for i in range(x_sample.shape[0]): - yield generator_helper(x_sample[i], y_sample[i], 1.) - + if yield_labels: + y_sample = onehot_encoder(z[1][self._adata_ids.cellontology_id].values) + for i in range(x_sample.shape[0]): + if y_sample[i].sum() > 0: + yield x_sample[i], y_sample[i], 1. + else: + for i in range(x_sample.shape[0]): + yield x_sample[i], n_features = self.data.n_vars n_samples = self.data.n_obs - n_labels = self.data.celltypes_universe.onto_cl.n_leaves else: - weights, y = self._get_celltype_out(idx=idx) - if not weighted: - weights = np.ones_like(weights) + if yield_labels: + weights, y = self._get_celltype_out(idx=idx) + if not weighted: + weights = np.ones_like(weights) x = self.data.X if self.data.isbacked else self._prepare_data_matrix(idx=idx) is_sparse = isinstance(x, scipy.sparse.spmatrix) indices = idx if self.data.isbacked else range(x.shape[0]) @@ -1133,17 +1231,21 @@ def generator(): def generator(): for s, e in batch_starts_ends: - x_sample = np.asarray(x[indices[s:e], :].todense()) if is_sparse \ - else x[indices[s:e], :] - y_sample = y[indices[s:e], :] - w_sample = weights[indices[s:e]] - for i in range(x_sample.shape[0]): - yield generator_helper(x_sample[i], y_sample[i], w_sample[i]) + x_sample = np.asarray(x[indices[s:e], :].todense()) if is_sparse else x[indices[s:e], :] + if yield_labels: + y_sample = y[indices[s:e], :] + w_sample = weights[indices[s:e]] + for i in range(x_sample.shape[0]): + if y_sample[i].sum() > 0: + yield x_sample[i], y_sample[i], w_sample[i] + else: + for i in range(x_sample.shape[0]): + yield x_sample[i], n_features = x.shape[1] n_samples = x.shape[0] - n_labels = y.shape[1] + n_labels = self.celltype_universe.onto_cl.n_leaves return generator, n_samples, n_features, n_labels def _get_dataset( @@ -1155,7 +1257,9 @@ def _get_dataset( cache_full: bool = False, weighted: bool = False, retrieval_batch_size: int = 128, - ): + randomized_batch_access: bool = False, + prefetch: Union[int, None] = 1, + ) -> tf.data.Dataset: """ :param idx: @@ -1163,20 +1267,19 @@ def _get_dataset( :param mode: :param shuffle_buffer_size: :param weighted: Whether to use weights. + :param retrieval_batch_size: Number of observations read from disk in each batched access. + :param randomized_batch_access: Whether to randomize batches during reading (in generator). Lifts necessity of + using a shuffle buffer on generator, however, batch composition stays unchanged over epochs unless there + is overhangs in retrieval_batch_size in the raw data files, which often happens and results in modest + changes in batch composition. :return: """ - # This is a basic cell type prediction model estimator class, the standard generator is fine. - def generator_helper(x_sample, y_sample, w_sample): - if mode in ['train', 'train_val', 'eval']: - return x_sample, y_sample, w_sample - else: - return x_sample, - generator, n_samples, n_features, n_labels = self._get_base_generator( - generator_helper=generator_helper, idx=idx, + yield_labels=mode in ['train', 'train_val', 'eval'], weighted=weighted, batch_size=retrieval_batch_size, + randomized_batch_access=randomized_batch_access, ) output_types, output_shapes = self._get_output_dim(n_features=n_features, n_labels=n_labels, mode=mode) dataset = tf.data.Dataset.from_generator( @@ -1188,12 +1291,14 @@ def generator_helper(x_sample, y_sample, w_sample): dataset = dataset.cache() if mode == 'train' or mode == 'train_val': dataset = dataset.repeat() - dataset = dataset.shuffle( - buffer_size=min(n_samples, shuffle_buffer_size), - seed=None, - reshuffle_each_iteration=True - ) - dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + if shuffle_buffer_size is not None and shuffle_buffer_size > 0: + dataset = dataset.shuffle( + buffer_size=min(n_samples, shuffle_buffer_size), + seed=None, + reshuffle_each_iteration=True) + if prefetch is None: + prefetch = tf.data.AUTOTUNE + dataset = dataset.batch(batch_size, drop_remainder=False).prefetch(prefetch) return dataset @@ -1210,7 +1315,7 @@ def _metrics(self): CustomTprClasswise(k=self.ntypes) ] - def predict(self, batch_size: int = 64, max_steps: int = np.inf): + def predict(self, batch_size: int = 128, max_steps: int = np.inf): """ Return the prediction of the model @@ -1219,41 +1324,40 @@ def predict(self, batch_size: int = 64, max_steps: int = np.inf): :return: Prediction tensor. """ idx = self.idx_test - if idx is None or idx.any(): # true if the array is not empty or if the passed value is None + if idx is None or idx.any(): dataset = self._get_dataset( idx=idx, batch_size=batch_size, mode='predict', retrieval_batch_size=128, + shuffle_buffer_size=0, ) - steps = min(max(len(idx) // batch_size, 1), max_steps) - return self.model.training_model.predict(x=dataset, steps=steps) + return self.model.training_model.predict(x=dataset) else: return np.array([]) - def ytrue(self, batch_size: int = 64, max_steps: int = np.inf): + def ytrue(self, batch_size: int = 128, max_steps: int = np.inf): """ Return the true labels of the test set. :return: true labels """ - if self.idx_test is None or self.idx_test.any(): # true if the array is not empty or if the passed value is None - x, y, w = self._get_dataset( + if self.idx_test is None or self.idx_test.any(): + dataset = self._get_dataset( idx=self.idx_test, batch_size=batch_size, - mode='eval' + mode='eval', + shuffle_buffer_size=0, ) - return y + y_true = [] + for _, y, _ in dataset.as_numpy_iterator(): + y_true.append(y) + y_true = np.concatenate(y_true, axis=0) + return y_true else: return np.array([]) - def evaluate_any( - self, - idx, - batch_size: int = 1, - max_steps: int = np.inf, - weighted: bool = False - ): + def evaluate_any(self, idx, batch_size: int = 128, max_steps: int = np.inf, weighted: bool = False): """ Evaluate the custom model on any local data. @@ -1273,14 +1377,14 @@ def evaluate_any( mode='eval', weighted=weighted, retrieval_batch_size=128, + shuffle_buffer_size=0, ) - steps = min(max(len(idx) // batch_size, 1), max_steps) - results = self.model.training_model.evaluate(x=dataset, steps=steps) + results = self.model.training_model.evaluate(x=dataset) return dict(zip(self.model.training_model.metrics_names, results)) else: return {} - def evaluate(self, batch_size: int = 64, max_steps: int = np.inf, weighted: bool = False): + def evaluate(self, batch_size: int = 128, max_steps: int = np.inf, weighted: bool = False): """ Evaluate the custom model on local data. diff --git a/sfaira/interface/model_zoo.py b/sfaira/interface/model_zoo.py index c3f4bd61d..8b0eb0faf 100644 --- a/sfaira/interface/model_zoo.py +++ b/sfaira/interface/model_zoo.py @@ -4,18 +4,20 @@ from typing import List, Union from sfaira.versions.metadata import CelltypeUniverse -from sfaira.consts import OntologyContainerSfaira +from sfaira.consts import OCS from sfaira.versions.topologies import TopologyContainer, TOPOLOGIES class ModelZoo(abc.ABC): """ - Model ontology base class. + Model zoo base class. """ topology_container: TopologyContainer - ontology: dict + zoo: Union[dict, None] _model_id: Union[str, None] celltypes: Union[CelltypeUniverse, None] + available_model_ids: Union[list, None] + topology_container: Union[None, TopologyContainer] def __init__( self, @@ -26,86 +28,48 @@ def __init__( :param model_lookuptable: model_lookuptable. :param model_class: Model class to subset to. """ - self._ontology_container_sfaira = OntologyContainerSfaira() - if model_lookuptable is not None: # check if models in repository - self.ontology = self.load_ontology_from_model_ids(model_ids=model_lookuptable['model_id'].values, - model_class=model_class) + self._ontology_container_sfaira = OCS self._model_id = None - self.celltypes = None - - @property - def model_class(self): - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[0] - - @property - def model_name(self): - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1] - - @property - def model_organism(self): - # TODO: this is a custom name ontology - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1].split("-")[0] + self.topology_container = None - @property - def model_organ(self): - # TODO: this is a custom name ontology - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1].split("-")[1] - - @property - def model_type(self): - # TODO: this is a custom name ontology - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1].split("-")[2] - - @property - def model_topology(self): - # TODO: this is a custom name ontology - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1].split("-")[3] - - @property - def model_version(self): - # TODO: this is a custom name ontology - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[1].split("-")[4] - - @property - def organisation(self): - assert self.model_id is not None, "set model_id first" - return self.model_id.split('_')[2] + if model_lookuptable is not None: # check if models in repository + self._load_model_ids(model_ids=model_lookuptable['model_id'].values, model_class=model_class) + self._construct_zoo_from_model_ids() + else: + self.zoo = None + self.available_model_ids = None - def load_ontology_from_model_ids( + def _load_model_ids( self, model_ids, model_class: Union[str, None] = None, - ) -> dict: + ): """ - Load model ontology based on models available in model lookup tables. + Load model ids based on models available in model lookup tables. :param model_ids: Table listing all available model_ids. - :param model_class: Model class to subset to. - :return: Dictionary formatted ontology. + :param model_class: Model class to subset to """ + self.available_model_ids = [x for x in model_ids if (x.split('_')[0] == model_class or model_class is None)] - ids = [x for x in model_ids if (x.split('_')[0] == model_class or model_class is None)] + def _construct_zoo_from_model_ids(self): + """ + Load model zoo based on models available model_ids. + """ id_df = pd.DataFrame( - [i.split('_')[1:6] for i in ids], + [i.split('_')[1:3] for i in self.available_model_ids], columns=['name', 'organisation'] ) - model = np.unique(id_df['name']) - ontology = dict.fromkeys(model) - for m in model: - id_df_m = id_df[id_df.model_type == m] - orga = np.unique(id_df_m['organisation']) - ontology[m] = dict.fromkeys(orga) - return ontology - + orgs = np.unique(id_df['organisation']) + zoo = dict.fromkeys(orgs) + for o in orgs: + id_df_o = id_df[id_df['organisation'] == o] + name = np.unique(id_df_o['name']) + zoo[o] = dict.fromkeys(name) + self.zoo = zoo + + @staticmethod def _order_versions( - self, versions: List[str] ): """ @@ -115,50 +79,8 @@ def _order_versions( :return: Ordered list of versions. """ versions.sort(key=lambda s: [int(u) for u in s.split('.')]) - return versions - @property - def model_id(self): - return self._model_id - - @model_id.setter - def model_id(self, x: str): - """ - Set model ID to a manually supplied ID. - - :param x: Model ID to set. Format: modelclass_organism-organ-modeltype-topology-version_organisation - """ - assert len(x.split('_')) == 3, f'model_id {x} is invalid' - self._model_id = x - - def save_weights_to_remote(self, path=None): - """ - Saves model weights to repository XY. - Increments 3rd digit of version number. - Adds model_id to the text file, updates model_index - """ - raise NotImplementedError() - - def save_weights_to_public(self): - """ - Saves model weights to cloud under an organization name. - Increments 2nd digit of version number. - Adds model_id to the text file, updates model_index - """ - raise NotImplementedError() - - def call_kipoi(self): - """ - Returns kipoi_experimental model call from remote directly on local data using kipoi_experimental. - - Runs model defined in self.model_id. - For this, the remote server associated with the model_id has to be identified via find_remote(). - - :return: Predictions - """ - raise NotImplementedError() - def topology( self, model_type: str, @@ -171,10 +93,10 @@ def topology( :param organisation: Identifier of organisation to show versions for. :return: List of versions available. """ - assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[model_type].keys(), \ - "organisation requested was not found in ontology" - return self.ontology[model_type][organisation] + assert organisation in self.zoo.keys(), "organisation requested was not found in zoo" + assert model_type in self.zoo[organisation].keys(), \ + "model_type requested was not found in zoo" + return self.zoo[organisation][model_type] def versions( self, @@ -190,23 +112,89 @@ def versions( :param model_topology: Identifier of model_topology to show versions for. :return: List of versions available. """ - assert model_type in self.ontology.keys(), "model_type requested was not found in ontology" - assert organisation in self.ontology[model_type].keys(), \ - "organisation requested was not found in ontology" - assert model_topology in self.ontology[model_type][organisation].keys(), \ - "model_topology requested was not found in ontology" - return self.ontology[model_type][organisation][model_topology] + assert organisation in self.zoo.keys(), "organisation requested was not found in zoo" + assert model_type in self.zoo[organisation].keys(), \ + "model_type requested was not found in zoo" + assert model_topology in self.zoo[organisation][model_type].keys(), \ + "model_topology requested was not found in zoo" + return self.zoo[organisation][model_type][model_topology] @property def model_hyperparameters(self) -> dict: - assert self.topology_container is not None + assert self.topology_container is not None, "set model_id first" return self.topology_container.topology["hyper_parameters"] @property - def topology_container(self) -> TopologyContainer: - # TODO: this ID decomposition to organism is custom to the topologies handled in this package. - organism = self.model_name.split("-")[0] - return TopologyContainer( - topology=TOPOLOGIES[organism][self.model_class][self.model_type][self.model_topology], + def celltypes(self): + assert self.topology_container is not None, "set model_id first" + return self.topology_container.topology["output"]["targets"] + + @celltypes.setter + def celltypes(self, x: List): + assert self.topology_container is not None, "set model_id first" + self.topology_container.topology["output"]["targets"] = x + + @property + def model_id(self): + return self._model_id + + @model_id.setter + def model_id(self, x: str): + """ + Set model ID to a manually supplied ID and automatically set topology container. + + :param x: Model ID to set. Format: modelclass_organism-organ-modeltype-topology-version_organisation + """ + assert self.available_model_ids is None or x in self.available_model_ids,\ + f"{x} not found in available_model_ids, please check available models using ModelZoo.available_model_ids" + assert len(x.split('_')) == 3, f'model_id {x} is invalid' + self._model_id = x + self.topology_container = TopologyContainer( + topology=TOPOLOGIES[self.model_organism][self.model_class][self.model_type][self.model_topology], topology_id=self.model_version ) + + @property + def model_class(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[0] + + @property + def model_name(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1] + + @property + def model_organism(self): + # TODO: this relies on theislab model_name formatting + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[0] + + @property + def model_organ(self): + # TODO: this relies on theislab model_name formatting + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[1] + + @property + def model_type(self): + # TODO: this relies on theislab model_name formatting + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[2] + + @property + def model_topology(self): + # TODO: this relies on theislab model_name formatting + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[3] + + @property + def model_version(self): + # TODO: this relies on theislab model_name formatting + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[1].split("-")[4] + + @property + def organisation(self): + assert self.model_id is not None, "set model_id first" + return self.model_id.split('_')[2] diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index db4529678..b4a1caed6 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -1,17 +1,18 @@ import anndata -try: - from kipoi.model import BaseModel -except ImportError: - BaseModel = None import numpy as np -import pandas as pd import os +import re +import pandas as pd +import pickle from typing import List, Union import warnings +import time +from sfaira.consts import AdataIdsSfaira, AdataIds from sfaira.data import DatasetInteractive from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype from sfaira.interface.model_zoo import ModelZoo +from sfaira.versions.topologies import TopologyContainer class UserInterface: @@ -26,27 +27,24 @@ class UserInterface: # initialise your sfaira instance with a model lookuptable. # instead of setting `custom_repo` when initialising the UI you can also use `sfaira_repo=True` to use public weights ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) - ui.zoo_embedding.set_latest(organism, organ, model_type, organisation, model_topology) - ui.zoo_celltype.set_latest(organism, organ, model_type, organisation, model_topology) ui.load_data(anndata.read("/path/to/file.h5ad")) # load your dataset into sfaira ui.load_model_embedding() ui.load_model_celltype() - ui.compute_all() - adata = ui.data + ui.predict_all() + adata = ui.data.adata scanpy.pp.neighbors(adata, use_rep="X_sfaira") scanpy.tl.umap(adata) - scanpy.pl.umap(adata, color="celltype_sfaira", show=True, save="UMAP_sfaira.png") + scanpy.pl.umap(adata, color="celltypes_sfaira", show=True, save="UMAP_sfaira.png") ``` """ estimator_embedding: Union[EstimatorKerasEmbedding, None] estimator_celltype: Union[EstimatorKerasCelltype, None] - model_kipoi_embedding: Union[None] - model_kipoi_celltype: Union[BaseModel, None] zoo_embedding: Union[ModelZoo, None] zoo_celltype: Union[ModelZoo, None] - data: Union[anndata.AnnData] + data: Union[DatasetInteractive, None] model_lookuptable: Union[pd.DataFrame, None] + adata_ids: AdataIds def __init__( self, @@ -54,12 +52,12 @@ def __init__( sfaira_repo: bool = False, cache_path: str = os.path.join('cache', '') ): - self.model_kipoi_embedding = None - self.model_kipoi_celltype = None self.estimator_embedding = None self.estimator_celltype = None + self.data = None self.use_sfaira_repo = sfaira_repo self.cache_path = os.path.join(cache_path, '') + self.adata_ids = AdataIdsSfaira() if sfaira_repo: # check if public sfaira repository should be accessed self.model_lookuptable = self._load_lookuptable("https://zenodo.org/record/4304660/files/") @@ -84,9 +82,6 @@ def __init__( raise ValueError("please either provide a custom folder/repository with model weights or specify " "`sfaira_repo=True` to access the public weight repository") - # TODO: workaround to deal with model ids bearing file endings in model lookuptable (as is the case in first sfaira model repo upload) - self.model_lookuptable['model_id'] = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in self.model_lookuptable['model_id']] - self.zoo_embedding = ModelZoo(model_lookuptable=self.model_lookuptable, model_class="embedding") self.zoo_celltype = ModelZoo(model_lookuptable=self.model_lookuptable, model_class="celltype") @@ -137,9 +132,8 @@ def write_lookuptable( file_names.append(file) with open(os.path.join(subdir, file), 'rb') as f: md5.append(hashlib.md5(f.read()).hexdigest()) - s = [i.split('_')[0:7] for i in file_names] - ids = ['_'.join(i) for i in s] - ids_cleaned = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in ids] # remove file extensions from ids + ids = ['_'.join(i.split('_')[0:3]) for i in file_names] + ids_cleaned = [i.replace('.h5', '').replace('.data-00000-of-00001', '') for i in ids] # remove file extensions if ids: pd.DataFrame( @@ -162,8 +156,10 @@ def deposit_zenodo( authors: list, description: str, metadata: dict = {}, + update_existing_deposition: Union[None, str] = None, publish: bool = False, - sandbox: bool = False + sandbox: bool = False, + deposit_topologies: bool = True ): """ Deposit all models in model lookup table on Zenodo. If publish is set to false, files will be uploaded to a @@ -179,10 +175,14 @@ def deposit_zenodo( :param description: Description of the Zenodo deposition. :param metadata: Dictionary with further metadata attributes of the deposit. See the Zenodo API refenrece for accepted keys: https://developers.zenodo.org/#representation + :param update_existing_deposition: If None, a new deposition will be created. + If an existing deposition ID is provided as a sting, than this deposition will be updated with a new version. :param publish: Set this to True to directly publish the weights on Zenodo. When set to False a draft will be created, which can be edited in the browser before publishing. :param sandbox: If True, use the Zenodo testing platform at https://sandbox.zenodo.org for your deposition. - We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platfowm once created. + We recommend testing your upload with sandbox first as depositions cannot be deleted from the main Zenodo platform once created. + :param deposit_topologies: If true, an associated topology file for every weights file will be uploaded to zenodo. + The naming format for the topology files is _topology.pickle """ import requests @@ -197,29 +197,88 @@ def deposit_zenodo( raise ValueError( "Your Zenodo access token was not accepted by the API. Please provide a valid access token.") - # Create empty deposition - r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions', - params=params, - json={}, - headers=headers) - - # Obtain bucket URL and deposition ID - bucket_url = r.json()["links"]["bucket"] - deposition_id = r.json()['id'] + if update_existing_deposition is None: + # Create empty deposition + r = requests.post(f'https://{sandbox}zenodo.org/api/deposit/depositions', + params=params, + json={}, + headers=headers) + # Obtain bucket URL and deposition ID + bucket_url = r.json()["links"]["bucket"] + deposition_id = r.json()['id'] + else: + update_existing_deposition = str(update_existing_deposition) if isinstance(update_existing_deposition, int)\ + else update_existing_deposition + # Create a new version of the existing deposition + r = requests.post( + f'https://{sandbox}zenodo.org/api/deposit/depositions/{update_existing_deposition}/actions/newversion', + params=params) + try: + deposition_id = r.json()["links"]["latest_draft"].split("/")[-1] + except json.decoder.JSONDecodeError: + time.sleep(10) + r = requests.post( + f'https://{sandbox}zenodo.org/api/deposit/depositions/{update_existing_deposition}/actions/newversion', + params=params) + deposition_id = r.json()["links"]["latest_draft"].split("/")[-1] + if r.status_code != 201: + raise ValueError( + f"A new version of deposition {update_existing_deposition} could not be created, " + f"please make sure your API key is associated with the account that owns this deposition.") + r = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}', params=params) + bucket_url = r.json()["links"]["bucket"] + + # Delete all existing files from new version + r_files = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/files', + params=params) + while len(r_files.json()) > 0: + for file_dict in r_files.json(): + requests.delete( + f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/files/{file_dict["id"]}', + params=params) + r_files = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/files', + params=params) + while isinstance(r_files.json(), dict): + print("Pausing due to Zenodo API rate limitng") + time.sleep(10) + r_files = requests.get(f'https://{sandbox}zenodo.org/api/deposit/depositions/{deposition_id}/files', + params=params) # Loop over files in model lookup table and upload them one by one for i, weight_path in enumerate(self.model_lookuptable['model_file_path']): - filename = os.path.basename(weight_path) + basepath, filename_weights = os.path.split(weight_path) with open(weight_path, "rb") as fp: r = requests.put( - f"{bucket_url}/{filename}", + f"{bucket_url}/{filename_weights}", data=fp, params=params, ) + while r.status_code != 200: + print(f"Upload of {weight_path} was not successful (status code {r.status_code}), retrying") + time.sleep(10) + with open(weight_path, "rb") as fp: + r = requests.put( + f"{bucket_url}/{filename_weights}", + data=fp, + params=params, + ) # Verify checksum after upload if r.json()['checksum'][4:] != self.model_lookuptable['md5'][i]: warnings.warn(f"The md5 checksum in your model_lookuptable for {self.model_lookuptable['model_id'][i]} " f"does not match the md5 checksum of the uploaded file.") + if deposit_topologies: # Deposit associated topology file + filename_topology = ".".join(filename_weights.split(".")[:-1]) + filename_topology = re.sub(r"_weights$", "", filename_topology) + filename_topology += "_topology.pickle" + topology_path = os.path.join(basepath, filename_topology) + assert os.path.isfile(topology_path), f"topology file {topology_path} not found. " \ + f"consider deactivating the deposition of topology files." + with open(topology_path, "rb") as fp: + r = requests.put( + f"{bucket_url}/{filename_topology}", + data=fp, + params=params, + ) # Add model lookup table to zenodo df = self.model_lookuptable.copy() @@ -277,8 +336,8 @@ def load_data( data: anndata.AnnData, gene_symbol_col: Union[str, None] = None, gene_ens_col: Union[str, None] = None, - remove_gene_version: bool = True, - match_to_reference: Union[str, None] = None, + obs_key_celltypes: Union[str, None] = None, + class_maps: dict = {}, ): """ Loads the provided AnnData object into sfaira. @@ -291,40 +350,57 @@ def load_data( :param data: AnnData object to load :param gene_symbol_col: Var column name (or 'index') which contains gene symbols :param gene_ens_col: ar column name (or 'index') which contains ensembl ids - :param remove_gene_version: Remove gene version string from ENSEMBL ID so that different versions in different - data sets are superimposed. - :param match_to_reference: Reference genomes name. - """ - if self.zoo_embedding.organism is not None: - organism = self.zoo_embedding.organism - organ = self.zoo_embedding.organ - elif self.zoo_celltype.organism is not None: - organism = self.zoo_celltype.organism - organ = self.zoo_celltype.organ + :param obs_key_celltypes: .obs column name which contains cell type labels. + :param class_maps: Cell type class maps. + """ + if self.zoo_embedding.model_organism is not None: + organism = self.zoo_embedding.model_organism + organ = self.zoo_embedding.model_organ + elif self.zoo_celltype.model_organism is not None: + organism = self.zoo_celltype.model_organism + organ = self.zoo_celltype.model_organ else: raise ValueError("Please first set which model_id to use via the model zoo before loading the data") if gene_ens_col is None and gene_symbol_col is None: raise ValueError("Please provide either the gene_ens_col or the gene_symbol_col argument.") - dataset = DatasetInteractive( + self.data = DatasetInteractive( data=data, organism=organism, organ=organ, gene_symbol_col=gene_symbol_col, - gene_ens_col=gene_ens_col + gene_ens_col=gene_ens_col, + obs_key_celltypes=obs_key_celltypes, + class_maps=class_maps, + ) + # Align to correct featurespace + self.data.streamline_features( + match_to_reference=self.zoo_embedding.topology_container.gc.assembly, + subset_genes_to_type=list(set(self.zoo_embedding.topology_container.gc.biotype)) ) - dataset.load(load_raw=False, allow_caching=False, celltype_version=None, data_dir=None) - self.data = dataset.adata - - def filter_cells(self): - """ - Filters cells with a basic pre-defined filter. - :return: - """ - # call cell_filter() - raise NotImplementedError() + def _load_topology_dict(self, model_weights_file) -> dict: + topology_filepath = ".".join(model_weights_file.split(".")[:-1]) + topology_filepath = re.sub(r"_weights$", "", topology_filepath) + topology_filepath += "_topology.pickle" + if topology_filepath.startswith('http'): + # Download into cache if file is on a remote server. + if not os.path.exists(self.cache_path): + os.makedirs(self.cache_path) + import urllib.request + from urllib.error import HTTPError + try: + urllib.request.urlretrieve( + topology_filepath, + os.path.join(self.cache_path, os.path.basename(topology_filepath)) + ) + topology_filepath = os.path.join(self.cache_path, os.path.basename(topology_filepath)) + except HTTPError: + raise FileNotFoundError(f"cannot find remote topology file {topology_filepath}") + with open(topology_filepath, "rb") as f: + topology = pickle.load(f) + return topology def load_model_embedding(self): """ @@ -335,18 +411,22 @@ def load_model_embedding(self): :return: Model ID loaded. """ assert self.zoo_embedding.model_id is not None, "choose embedding model first" - model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] - md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_embedding.model_id].iloc[0] + model_weights_file = self.model_lookuptable["model_file_path"].loc[self.model_lookuptable["model_id"] == + self.zoo_embedding.model_id].iloc[0] + md5 = self.model_lookuptable["md5"].loc[self.model_lookuptable["model_id"] == + self.zoo_embedding.model_id].iloc[0] + tc = TopologyContainer( + topology=self._load_topology_dict(model_weights_file=model_weights_file), + topology_id=self.zoo_embedding.topology_container.topology_id + ) self.estimator_embedding = EstimatorKerasEmbedding( - data=self.data, - model_dir=model_dir, + data=self.data.adata, + model_dir=model_weights_file, model_id=self.zoo_embedding.model_id, - organism=self.zoo_embedding.organism, - organ=self.zoo_embedding.organ, - model_type=self.zoo_embedding.model_type, - model_topology=self.zoo_embedding.model_topology, + model_topology=tc, weights_md5=md5, - cache_path=self.cache_path + cache_path=self.cache_path, + adata_ids=self.adata_ids ) self.estimator_embedding.init_model() self.estimator_embedding.load_pretrained_weights() @@ -360,18 +440,23 @@ def load_model_celltype(self): :return: Model ID loaded. """ assert self.zoo_celltype.model_id is not None, "choose cell type model first" - model_dir = self.model_lookuptable.model_file_path[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] - md5 = self.model_lookuptable.md5[self.model_lookuptable.model_id == self.zoo_celltype.model_id].iloc[0] + model_weights_file = self.model_lookuptable["model_file_path"].loc[self.model_lookuptable["model_id"] == + self.zoo_celltype.model_id].iloc[0] + md5 = self.model_lookuptable["md5"].loc[self.model_lookuptable["model_id"] == + self.zoo_celltype.model_id].iloc[0] + tc = TopologyContainer( + topology=self._load_topology_dict(model_weights_file=model_weights_file), + topology_id=self.zoo_celltype.topology_container.topology_id + ) self.estimator_celltype = EstimatorKerasCelltype( - data=self.data, - model_dir=model_dir, + data=self.data.adata, + model_dir=model_weights_file, model_id=self.zoo_celltype.model_id, - organism=self.zoo_celltype.organism, - organ=self.zoo_celltype.organ, - model_type=self.zoo_celltype.model_type, - model_topology=self.zoo_celltype.model_topology, + model_topology=tc, weights_md5=md5, - cache_path=self.cache_path + cache_path=self.cache_path, + remove_unlabeled_cells=False, + adata_ids=self.adata_ids ) self.estimator_celltype.init_model() self.estimator_celltype.load_pretrained_weights() @@ -385,7 +470,11 @@ def _adata_write_celltype( Writes a list of cell type labels into the column of adata.obs indicated :return: """ - self.data.obs[key] = [self.zoo_celltype.celltypes[i][0] for i in np.argmax(labels, axis=1)] + key_id = key + "_id" + self.data.adata.obs[key] = [self.estimator_celltype.ontology_names[i] for i in np.argmax(labels, axis=1)] + self.data.adata.obs[key] = self.data.adata.obs[key].astype('category') + self.data.adata.obs[key_id] = [self.estimator_celltype.ontology_ids[i] for i in np.argmax(labels, axis=1)] + self.data.adata.obs[key_id] = self.data.adata.obs[key_id].astype('category') def _adata_write_embedding( self, @@ -396,7 +485,7 @@ def _adata_write_embedding( Writes the embedding matrix into adata.obsm with the key indicated. :return: """ - self.data.obsm[key] = embedding + self.data.adata.obsm[key] = embedding def _adata_write_denoised_data( self, @@ -407,9 +496,9 @@ def _adata_write_denoised_data( Writes the denoised expression matrix into adata.obsm with the key indicated. :return: """ - self.data.layers[key] = denoised_data + self.data.adata.layers[key] = denoised_data - def compute_celltype(self): + def predict_celltypes(self): """ Run local cell type prediction model and add predictions to adata.obs. @@ -418,12 +507,12 @@ def compute_celltype(self): if self.zoo_celltype is not None: self._adata_write_celltype( labels=self.estimator_celltype.predict(), - key="celltype_sfaira" + key="celltypes_sfaira" ) else: raise ValueError("celltype zoo has to be set before local model can be run.") - def compute_embedding(self): + def predict_embedding(self): """ Run local embedding prediction model and add embedding to adata.obsm. @@ -437,14 +526,14 @@ def compute_embedding(self): else: raise ValueError("embedding zoo has to be set before local model can be run.") - def compute_all(self): + def predict_all(self): """ Run local cell type prediction and embedding models and add results of both to adata. :return: """ - self.compute_embedding() - self.compute_celltype() + self.predict_embedding() + self.predict_celltypes() def compute_denoised_expression(self): """ @@ -460,74 +549,12 @@ def compute_denoised_expression(self): else: raise ValueError("embedding zoo has to be set before local model can be run.") - def compute_celltype_kipoi(self): - """ - Run executable cell type prediction model from kipoi_experimental and add prediction to adata.obs. - - :return: - """ - if self.zoo_celltype is not None: - self.model_kipoi_celltype = self.zoo_celltype.get_kipoi_model() - self._adata_write_celltype( - labels=self.model_kipoi_celltype.pipeline.predict(dict(adata=self.data)), - key="celltype_sfaira" - ) - else: - raise ValueError("celltype zoo has to be set before kipoi_experimental model can be run.") - - def compute_embedding_kipoi(self): - """ - Run executable embedding prediction model from kipoi_experimental and add embedding to adata.obsm. - - :return: - """ - if self.zoo_embedding is not None: - self.model_kipoi_embedding = self.zoo_embedding.get_kipoi_model() - self._adata_write_embedding( - embedding=self.model_kipoi_embedding.pipeline.predict_embedding(dict(adata=self.data)), - key="X_sfaira" - ) - else: - raise ValueError("embedding zoo has to be set before kipoi_experimental model can be run.") - - def compute_all_kipoi(self): - """ - Run executable cell type prediction and embedding models from kipoi_experimental and add results to adata. - - :return: - """ - self.compute_embedding_kipoi() - self.compute_celltype_kipoi() - - def compute_denoised_expression_kipoi(self): - """ - Run executable embedding prediction model from kipoi_experimental and add denoised expression to adata layer. - - :return: - """ - if self.zoo_embedding is not None: - self.model_kipoi_embedding = self.zoo_embedding.get_kipoi_model() - self._adata_write_denoised_data( - denoised_data=self.model_kipoi_embedding.pipeline.predict(dict(adata=self.data)), - key="denoised_sfaira" - ) - else: - raise ValueError("embedding zoo has to be set before local model can be run.") - def celltype_summary(self): """ Return type with frequencies of predicted cell types. :return: """ - return self.data.obs['celltype_sfaira'].value_counts() - - def get_references(self): - """ - Return papers to cite when using the embedding model. - - Collects references from the estimators of each model type. - - :return: - """ - return self.estimator_embedding.get_citations() + assert "celltypes_sfaira" in self.data.adata.obs.keys(), \ + "Column celltypes_sfaira not found in the data. Please run UserInterface.predict_celltypes() first." + return self.data.adata.obs['celltypes_sfaira'].value_counts() diff --git a/sfaira/models/celltype/marker.py b/sfaira/models/celltype/marker.py index de3d9292b..2e05e694a 100644 --- a/sfaira/models/celltype/marker.py +++ b/sfaira/models/celltype/marker.py @@ -1,9 +1,8 @@ -import numpy as np try: import tensorflow as tf except ImportError: tf = None -from typing import List, Union +from typing import Union from sfaira.versions.metadata import CelltypeUniverse from sfaira.versions.topologies import TopologyContainer @@ -107,8 +106,7 @@ def __init__( ): """ - :param genome: - :param organ: + :param celltypes_version: :param topology_container: :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. diff --git a/sfaira/models/celltype/mlp.py b/sfaira/models/celltype/mlp.py index 1ca9754f6..92116da6c 100644 --- a/sfaira/models/celltype/mlp.py +++ b/sfaira/models/celltype/mlp.py @@ -82,8 +82,7 @@ def __init__( ): """ - :param genome: - :param organ: + :param celltypes_version: :param topology_container: :param override_hyperpar: Dictionary with hyper-parameters of model to override in preset hyper-parameter dictionary that is queried based on the topology_id. Can contain a subset of all hyperparameters. diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 7a36780da..5e4d72bb8 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -7,9 +7,12 @@ from typing import Union, List import os -from sfaira.versions.metadata import CelltypeUniverse +from sfaira.consts import OCS +from sfaira.data import load_store +from sfaira.data.dataloaders import Universe from sfaira.estimators import EstimatorKerasEmbedding -from sfaira.data import Universe +from sfaira.interface import ModelZoo +from sfaira.versions.metadata import CelltypeUniverse, OntologyCl def _tp(yhat, ytrue): @@ -109,12 +112,12 @@ class GridsearchContainer: gs_keys: Union[None, dict] summary_tab: Union[None, pandas.DataFrame] cv: bool - source_path: str + source_path: dict model_id_len: Union[None, int] def __init__( self, - source_path: str, + source_path: dict, cv: bool ): self.histories = None @@ -135,12 +138,15 @@ def load_gs( :param gs_ids: :return: """ - res_dirs = [os.path.join(self.source_path, x, "results", "") for x in gs_ids] + res_dirs = [ + os.path.join(self.source_path[x], x, "results", "") + for x in gs_ids + ] run_ids = [ np.sort(np.unique([ - x.split("_history.pickle")[0] + x.split("_evaluation.pickle")[0] for x in os.listdir(indir) - if "_history.pickle" in x + if "_evaluation.pickle" in x ])) for i, indir in enumerate(res_dirs) ] @@ -150,6 +156,7 @@ def load_gs( model_hyperpars = {} run_ids_proc = [] gs_keys = [] + gs_dirs = [] for i, indir in enumerate(res_dirs): for x in run_ids[i]: fn_history = os.path.join(indir, f"{x}_history.pickle") @@ -182,9 +189,11 @@ def load_gs( run_ids_proc.append(x) gs_keys.append(os.path.normpath(indir).split(os.path.sep)[-2]) + gs_dirs.append(indir) self.run_ids = run_ids_proc self.gs_keys = dict(zip(run_ids_proc, gs_keys)) + self.gs_dirs = dict(zip(run_ids_proc, gs_dirs)) self.evals = evals self.hyperpars = hyperpars self.model_hyperpars = model_hyperpars @@ -195,7 +204,7 @@ def load_y( hat_or_true: str, run_id: str ): - fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_y{hat_or_true}.npy") + fn = os.path.join(self.gs_dirs[run_id], f"{run_id}_y{hat_or_true}.npy") return np.load(fn) def best_model_by_partition( @@ -279,23 +288,23 @@ def get_best_model_ids( metric_select = f"{partition_select}_{metric_select}" if cv_mode.lower() == "mean": - best_model = tab.groupby("run", as_index=False)[metric_select].mean().\ + best_model = tab.groupby("run", as_index=False)[metric_select].mean(). \ sort_values([metric_select], ascending=ascending) elif cv_mode.lower() == "median": - best_model = tab.groupby("run", as_index=False)[metric_select].median().\ + best_model = tab.groupby("run", as_index=False)[metric_select].median(). \ sort_values([metric_select], ascending=ascending) elif cv_mode.lower() == "max": - best_model = tab.groupby("run", as_index=False)[metric_select].max().\ + best_model = tab.groupby("run", as_index=False)[metric_select].max(). \ sort_values([metric_select], ascending=ascending) elif cv_mode.lower() == "min": - best_model = tab.groupby("run", as_index=False)[metric_select].min().\ + best_model = tab.groupby("run", as_index=False)[metric_select].min(). \ sort_values([metric_select], ascending=ascending) else: raise ValueError("cv_mode %s not recognized" % cv_mode) best_run_id = best_model['run'].values[0] if best_model.shape[0] > 0 else None - best_cv = tab[tab["run"] == best_run_id].\ + best_cv = tab[tab["run"] == best_run_id]. \ sort_values([metric_select], ascending=ascending)['cv'].values[0] if best_run_id is not None \ else None @@ -340,7 +349,7 @@ def save_best_weight( subset=subset, ) shutil.copyfile( - os.path.join(self.source_path, self.gs_keys[model_id], "results", f"{model_id}_weights.h5"), + os.path.join(self.gs_dirs[model_id], self.gs_keys[model_id], "results", f"{model_id}_weights.h5"), os.path.join(path, f"{model_id}_weights.h5") ) @@ -375,8 +384,7 @@ def plot_completions( np.logical_and( sns_tab["model_type"].values == m, sns_tab["organ"].values == o - ), : - ].groupby(groupby).size().values + ), :].groupby(groupby).size().values # Assume that largest number of successful completions is maximum (all completed: hm[j, i] = np.sum(n_by_gridpoint == np.max(n_by_gridpoint)) if len(n_by_gridpoint) > 0 else 0 sns_data_heatmap = pandas.DataFrame( @@ -400,7 +408,7 @@ def plot_best_model_by_hyperparam( partition_select: str = "val", partition_show: str = "test", subset: dict = {}, - param_x=['lr', 'depth', 'width', 'dropout', 'l1', 'l2'], + param_x: Union[tuple, list] = ('lr', 'depth', 'width', 'dropout', 'l1', 'l2'), show_swarm: bool = False, panel_width: float = 4., panel_height: float = 2. @@ -504,12 +512,12 @@ def plot_training_history( ) if cv_key is None: sns_data = [] - for run in np.unique( + for run in list(np.unique( self.summary_tab.loc[self.summary_tab["model_gs_id"].values == model_gs_id, "run"].values - ).tolist(): + )): sns_data_temp = pandas.DataFrame(self.histories[run]) sns_data_temp["epoch"] = np.arange(0, sns_data_temp.shape[0]) - sns_data_temp["cv"] = int(run.split("_")[-1]) + sns_data_temp["cv"] = run[-1] sns_data.append(sns_data_temp) sns_data = pandas.concat(sns_data, axis=0) else: @@ -580,16 +588,14 @@ def write_best_hyparam( if best_model_id is not None: if cvs is None: file_path_base = os.path.join( - self.source_path, + self.gs_dirs[best_model_id], self.gs_keys[best_model_id], 'results', best_model_id, ) else: file_path_base = os.path.join( - self.source_path, - self.gs_keys[f"{best_model_id}_cv{cvs[0]}"], - 'results', + self.gs_dirs[f"{best_model_id}_cv{cvs[0]}"], f"{best_model_id}_cv{cvs[0]}", ) @@ -600,7 +606,6 @@ def write_best_hyparam( # Read optimizer hyperparameter with open(f"{file_path_base}_hyperparam.pickle", 'rb') as file: hyparam_optim = pickle.load(file) - # Write both hyperparameter dicts with open(os.path.join(write_path, f"{best_model_id[:-12]}_best_hyperparam.txt"), 'w') as file: file.write(json.dumps({"model": hyparam_model, "optimizer": hyparam_optim})) @@ -613,9 +618,9 @@ class SummarizeGridsearchCelltype(GridsearchContainer): def __init__( self, - source_path: str, + source_path: dict, cv: bool, - model_id_len: int = 7 + model_id_len: int = 3 ): super(SummarizeGridsearchCelltype, self).__init__( source_path=source_path, @@ -633,7 +638,7 @@ def load_ontology_names( :param run_id: :return: """ - fn = os.path.join(self.source_path, self.gs_keys[run_id], "results", f"{run_id}_ontology_names.pickle") + fn = os.path.join(self.gs_dirs[run_id], f"{run_id}_ontology_names.pickle") if not os.path.isfile(fn): raise FileNotFoundError(f"file {run_id}_ontology_names.pickle not found") with open(fn, 'rb') as f: @@ -641,51 +646,32 @@ def load_ontology_names( return ids def create_summary_tab(self): - """ metrics = list(self.evals.values())[0]['val'].keys() - hyperpar = list(self.hyperpars.keys()) - model_hyperpar = list(self.hyperpars.keys()) self.summary_tab = pandas.DataFrame(dict( list({ - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], - "run": self.run_ids, - }.items()) + - list(dict([(hp, [self.hyperpars[id_i][hp] for id_i in self.run_ids]) for hp in hyperpar]).items()) + - list(dict([(hp, [self.model_hyperpar[id_i][hp] for id_i in self.run_ids]) for hp in model_hyperpar]).items()) + - list(dict( - [("train_" + m, [self.evals[id_i]["train"][m] for id_i in self.run_ids]) for m in metrics]).items()) + - list(dict( - [("test_" + m, [self.evals[id_i]["test"][m] for id_i in self.run_ids]) for m in metrics]).items()) + - list(dict([("val_" + m, [self.evals[id_i]["val"][m] for id_i in self.run_ids]) for m in metrics]).items()) + - list(dict([("all_" + m, [self.evals[id_i]["all"][m] for id_i in self.run_ids]) for m in metrics]).items()) - )) - :return: - """ - metrics = list(self.evals.values())[0]['val'].keys() - self.summary_tab = pandas.DataFrame(dict( - list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "cv0" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [ - "linear" if (id_i.split("_")[3] == "mlp" and id_i.split("_")[5].split(".")[1] == "0") - else id_i.split("_")[3] + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], # noqa: E241 + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], # noqa: E241 + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], # noqa: E241 + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], # noqa: E241 + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], # noqa: E241 + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], # noqa: E241 + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], # noqa: E241 + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], # noqa: E241 + "organism": [id_i.split("_")[1].split("-")[0] for id_i in self.run_ids], # noqa: E241 + "organ": [id_i.split("_")[1].split("-")[1] for id_i in self.run_ids], # noqa: E241 + "model_type": [ # noqa: E241 + "linear" if (id_i.split("_")[1].split("-")[2] == "mlp" and + id_i.split("_")[1].split("-")[3].split(".")[1] == "0") + else id_i.split("_")[1].split("-")[2] for id_i in self.run_ids ], - "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], - "run": self.run_ids - }.items()) + # noqa: W504 - list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 - list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 - list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + # noqa: W504 + "version": [id_i.split("_")[1].split("-")[3] for id_i in self.run_ids], # noqa: E241 + "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], # noqa: E241 + "run": self.run_ids, # noqa: E241 + }.items()) + + list(dict([("train_" + m, [self.evals[x]["train"][m] for x in self.run_ids]) for m in metrics]).items()) + + list(dict([("test_" + m, [self.evals[x]["test"][m] for x in self.run_ids]) for m in metrics]).items()) + + list(dict([("val_" + m, [self.evals[x]["val"][m] for x in self.run_ids]) for m in metrics]).items()) + list(dict([("all_" + m, [self.evals[x]["all"][m] for x in self.run_ids]) for m in metrics]).items()) )) if self.summary_tab.shape[0] == 0: @@ -708,11 +694,11 @@ def best_model_celltype( if model_id is not None: if cvs is not None: fns = [ - os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") + os.path.join(self.gs_dirs[f"{model_id}_cv{x}"], self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") for x in cvs ] else: - fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + fns = [os.path.join(self.gs_dirs[model_id], self.gs_keys[model_id], "results", model_id)] covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, covar else: @@ -735,17 +721,13 @@ def plot_best( Plot accuracy or other metric heatmap by organ and model type. :param rename_levels: - :param metric: Metric to plot in heatmap. - - - acc - - f1 - :param collapse_cv: How to collapse values from cross validation into single scalar: - - - mean - - median - - max - :param ylim: - :param xrot: + :param partition_select: + :param partition_show: + :param metric_select: + :param metric_show: + :param collapse_cv: + :param vmin: + :param vmax: :param height_fig: :param width_fig: :return: @@ -822,6 +804,9 @@ def plot_best_classwise_heatmap( organ: str, organism: str, datapath: str, + store_format: str, + targetpath: str, + configpath: str, partition_select: str = "val", metric_select: str = "custom_cce_agg", metric_show: str = "f1", @@ -834,8 +819,11 @@ def plot_best_classwise_heatmap( Plot evaluation metric heatmap for specified organ by cell classes and model types. :param organ: Organ to plot in heatmap. - :param organism: Organism that the gridsearch was run on + :param organism: Species that the gridsearch was run on :param datapath: Path to the local sfaira data repository + :param store_format: + :param targetpath: + :param configpath: :param partition_select: Based on which partition to select the best model - train - val @@ -858,10 +846,53 @@ def plot_best_classwise_heatmap( :param width_fig: Figure width. :return: fig, axs, sns_data_heatmap """ - import matplotlib.pyplot as plt import seaborn as sns + def f1(yhat, ytrue): + """ + Class wise F1. + + :param yhat: + :param ytrue: + :return: + """ + def _tp(yhat, ytrue): + """ + Class wise true positive count. + + :param yhat: + :param ytrue: + :return: + """ + yhat_true = np.asarray(yhat == np.max(yhat, axis=1, keepdims=True), dtype="float32") + return np.sum(yhat_true * ytrue, axis=0) + + def _fp(yhat, ytrue): + """ + Class wise false positive count. + + :param yhat: + :param ytrue: + :return: + """ + yhat_true = np.asarray(yhat == np.max(yhat, axis=1, keepdims=True), dtype="float32") + return np.sum(yhat_true * (1. - ytrue), axis=0) + + def _fn(yhat, ytrue): + """ + Class wise false negative count. + + :param yhat: + :param ytrue: + :return: + """ + yhat_true = np.asarray(yhat < np.max(yhat, axis=1, keepdims=True), dtype="float32") + return np.sum(yhat_true * ytrue, axis=0) + precision = _tp(yhat, ytrue) / (_tp(yhat, ytrue) + _fp(yhat, ytrue)) + recall = _tp(yhat, ytrue) / (_tp(yhat, ytrue) + _fn(yhat, ytrue)) + return 2 * 1 / (1 / precision + 1 / recall) + if self.summary_tab is None: self.create_summary_tab() @@ -875,26 +906,30 @@ def plot_best_classwise_heatmap( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - dataset = Universe(data_path=datapath) - dataset.subset(key="organism", values=[organism]) - dataset.subset(key="organ", values=[organ]) - if not dataset.flatten().datasets: - raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") - dataset.load() - dataset = dataset.flatten() - - cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() + store = load_store(cache_path=datapath, store_format=store_format) + store.load_config(configpath) + store.subset(attr_key="id", values=[k for k in store.indices.keys() + if 'cell_ontology_class' in store.adata_by_key[k].obs.columns]) + store.subset(attr_key="cellontology_class", excluded_values=[ + store._adata_ids_sfaira.unknown_celltype_identifier, + store._adata_ids_sfaira.not_a_cell_celltype_identifier, + ]) + cu = CelltypeUniverse( + cl=OntologyCl(branch="v2021-02-01"), + uberon=OCS.organ, + ) + cu.load_target_universe(targetpath) + cell_counts = store.obs['cell_ontology_class'].value_counts().to_dict() celltypelist = list(cell_counts.keys()).copy() - cu = CelltypeUniverse(organism=organism) - # TODO set target universe. + leaves = cu.onto_cl.convert_to_name(cu.onto_cl.leaves) for k in celltypelist: - if k not in cu.leaves: - if k not in cu.ontology.node_names: - raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in cu[k]: # TODO get leaves + leafnodes = cu.onto_cl.convert_to_name(cu.onto_cl.map_to_leaves(node=k, return_type="ids", include_self=True)) + # Replace count on intermediate nodes with counts over leaves + if k not in leaves: + for leaf in leaves: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1 / len(cu[k]) # TODO get leaves + cell_counts[leaf] += 1. / len(leafnodes) del cell_counts[k] # Compute class-wise metrics @@ -915,9 +950,6 @@ def plot_best_classwise_heatmap( model_types = sns_tab["model_type"].unique() model_types.sort() classes = self.load_ontology_names(run_id=sns_tab["run"].values[0]) - if 'unknown' not in classes and 'Unknown' not in classes: - classes = classes + ['Unknown'] - cell_counts['Unknown'] = 0 hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): @@ -977,6 +1009,9 @@ def plot_best_classwise_scatter( organ: str, organism: str, datapath: str, + store_format: str, + targetpath: str, + configpath: str, partition_select: str = "val", metric_select: str = "custom_cce_agg", metric_show: str = "f1", @@ -985,7 +1020,7 @@ def plot_best_classwise_scatter( height_fig: int = 7, width_fig: int = 7, annotate_thres_ncells: int = 1000, - annotate_thres_f1: float = 0.5 + annotate_thres_f1: float = 0.5, ): """ Plot evaluation metric scatterplot for specified organ by cell classes and model types. @@ -993,7 +1028,9 @@ def plot_best_classwise_scatter( :param organ: Organ to plot in heatmap. :param organism: Organism that the gridsearch was run on :param datapath: Path to the local sfaira data repository - :param celltype_version: Version in sfaira celltype universe + :param store_format: + :param targetpath: + :param configpath: :param partition_select: Based on which partition to select the best model - train - val @@ -1018,7 +1055,6 @@ def plot_best_classwise_scatter( :param annotate_thres_f1: :return: fig, axs, sns_data_scatter """ - import matplotlib.pyplot as plt import seaborn as sns @@ -1035,26 +1071,30 @@ def plot_best_classwise_scatter( ) sns_tab = sns_tab[sns_tab['organ'] == organ] - dataset = Universe(data_path=datapath) - dataset.subset(key="organism", values=[organism]) - dataset.subset(key="organ", values=[organ]) - if not dataset.flatten().datasets: - raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") - dataset.load() - dataset = dataset.flatten() - - cell_counts = dataset.obs_concat(keys=['cell_ontology_class'])['cell_ontology_class'].value_counts().to_dict() + store = load_store(cache_path=datapath, store_format=store_format) + store.load_config(configpath) + store.subset(attr_key="id", values=[k for k in store.indices.keys() + if 'cell_ontology_id' in store.adata_by_key[k].obs.columns]) + store.subset(attr_key="cellontology_class", excluded_values=[ + store._adata_ids_sfaira.unknown_celltype_identifier, + store._adata_ids_sfaira.not_a_cell_celltype_identifier, + ]) + cu = CelltypeUniverse( + cl=OntologyCl(branch="v2021-02-01"), + uberon=OCS.organ, + ) + cu.load_target_universe(targetpath) + cell_counts = store.obs['cell_ontology_class'].value_counts().to_dict() celltypelist = list(cell_counts.keys()).copy() - cu = CelltypeUniverse(organism=organism) - # TODO set target universe. + leaves = cu.onto_cl.convert_to_name(cu.onto_cl.leaves) for k in celltypelist: - if k not in cu.leaves: - if k not in cu.ontology.node_names: - raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in cu[k]: # TODO get leaves + leafnodes = cu.onto_cl.convert_to_name(cu.onto_cl.map_to_leaves(node=k, return_type="ids", include_self=True)) + # Replace count on intermediate nodes with counts over leaves + if k not in leaves: + for leaf in leaves: if leaf not in cell_counts.keys(): cell_counts[leaf] = 0 - cell_counts[leaf] += 1 / len(cu[k]) # TODO get leaves + cell_counts[leaf] += 1. / len(leafnodes) del cell_counts[k] # Compute class-wise metrics @@ -1074,9 +1114,6 @@ def plot_best_classwise_scatter( # Build figure. model_types = sns_tab["model_type"].unique() classes = self.load_ontology_names(run_id=sns_tab["run"].values[0]) - if 'unknown' not in classes and 'Unknown' not in classes: - classes = classes + ['Unknown'] - cell_counts['Unknown'] = 0 hm = np.zeros((len(classes), len(model_types))) + np.nan # mask = np.isnan(hm) for i, m in enumerate(model_types): @@ -1122,6 +1159,7 @@ def plot_best_classwise_scatter( axs = sns.scatterplot(x='Number of cells in whole dataset', y='Classwise f1 score', style='Model type', + alpha=0.8, data=sns_data_scatter, ax=axs ) @@ -1146,11 +1184,11 @@ class SummarizeGridsearchEmbedding(GridsearchContainer): def __init__( self, - source_path: str, + source_path: dict, cv: bool, loss_idx: int = 0, mse_idx: int = 1, - model_id_len: int = 7 + model_id_len: int = 3 ): super(SummarizeGridsearchEmbedding, self).__init__( source_path=source_path, @@ -1164,33 +1202,34 @@ def create_summary_tab(self): metrics = list(self.evals.values())[0]['val'].keys() self.summary_tab = pandas.DataFrame(dict( list({ - "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], - "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], - "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], - "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], - "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], - "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], - "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], - "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], - "organ": [id_i.split("_")[2] for id_i in self.run_ids], - "model_type": [id_i.split("_")[3] for id_i in self.run_ids], - "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], - "run": self.run_ids, + "depth": [id_i.split("_")[self.model_id_len + 0] for id_i in self.run_ids], # noqa: E241 + "width": [id_i.split("_")[self.model_id_len + 1] for id_i in self.run_ids], # noqa: E241 + "lr": [id_i.split("_")[self.model_id_len + 2] for id_i in self.run_ids], # noqa: E241 + "dropout": [id_i.split("_")[self.model_id_len + 3] for id_i in self.run_ids], # noqa: E241 + "l1": [id_i.split("_")[self.model_id_len + 4] for id_i in self.run_ids], # noqa: E241 + "l2": [id_i.split("_")[self.model_id_len + 5] for id_i in self.run_ids], # noqa: E241 + "cv": [id_i.split("_")[-1] if self.cv else "1" for id_i in self.run_ids], # noqa: E241 + "model": ["_".join(id_i.split("_")[:self.model_id_len]) for id_i in self.run_ids], # noqa: E241 + "organism": [id_i.split("_")[1].split("-")[0] for id_i in self.run_ids], # noqa: E241 + "organ": [id_i.split("_")[1].split("-")[1] for id_i in self.run_ids], # noqa: E241 + "model_type": [id_i.split("_")[1].split("-")[2] for id_i in self.run_ids], # noqa: E241 + "version": [id_i.split("_")[1].split("-")[3] for id_i in self.run_ids], # noqa: E241 + "model_gs_id": ["_".join(id_i.split("_")[:(self.model_id_len + 6)]) for id_i in self.run_ids], # noqa: E241 + "run": self.run_ids, # noqa: E241 }.items()) + - # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() - else self.evals[x]["train"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + - # TODO: Hacky solution to make sure metrics are called the same in VAE and other models - list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() - else self.evals[x]["test"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + - # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + list(dict([("train_" + m, [self.evals[x]["train"][m] if m in self.evals[x]["train"].keys() else + self.evals[x]["train"]['neg_ll_' + m] for x in self.run_ids]) + for m in metrics]).items()) + + list(dict([("test_" + m, [self.evals[x]["test"][m] if m in self.evals[x]["test"].keys() else + self.evals[x]["test"]['neg_ll_' + m] for x in self.run_ids]) + for m in metrics]).items()) + list(dict([("val_" + m, [self.evals[x]["val"][m] if m in self.evals[x]["val"].keys() - else self.evals[x]["val"]['neg_ll_' + m] for x in self.run_ids]) for m in metrics]).items()) + + else self.evals[x]["val"]['neg_ll_' + m] for x in self.run_ids]) + for m in metrics]).items()) + list(dict([("all_" + m, [self.evals[x]["all"][m] if m in self.evals[x]["all"].keys() else self.evals[x]["all"]['neg_ll_' + m] for x in self.run_ids]) - for m in metrics]).items()) # TODO: Hacky solution to make sure metrics are called the same in VAE and other models + for m in metrics]).items()) )) - # TODO: Hacky solution to make sure metrics are called the same in VAE and other models rename_dict = { "train_neg_ll_custom_mse": "train_custom_mse", @@ -1224,11 +1263,11 @@ def best_model_embedding( if model_id is not None: if cvs is not None: fns = [ - os.path.join(self.source_path, self.gs_keys[f"{model_id}_cv{x}"], "results", f"{model_id}_cv{x}") + os.path.join(self.gs_dirs[f"{model_id}_cv{x}"], f"{model_id}_cv{x}") for x in cvs ] else: - fns = [os.path.join(self.source_path, self.gs_keys[model_id], "results", model_id)] + fns = [os.path.join(self.gs_dirs[model_id], self.gs_keys[model_id], "results", model_id)] embedding = [np.load(f"{x}_embedding.npy") for x in fns] covar = [pandas.read_csv(f"{x}_covar.csv") for x in fns] return model_id, embedding, covar @@ -1251,10 +1290,13 @@ def plot_best( """ :param rename_levels: + :param partition_select: + :param partition_show: + :param metric_select: + :param metric_show: :param collapse_cv: - :param metric: - :param ylim: - :param xrot: + :param vmin: + :param vmax: :param height_fig: :param width_fig: :return: @@ -1316,7 +1358,7 @@ def plot_best( fig, axs = plt.subplots(1, 1, figsize=(height_fig, width_fig)) with sns.axes_style("dark"): axs = sns.heatmap( - sns_data_heatmap, # mask=mask, + sns_data_heatmap, annot=True, fmt=".2f", ax=axs, xticklabels=True, yticklabels=True, @@ -1328,11 +1370,15 @@ def plot_best( def get_gradients_by_celltype( self, - organ: str, - organism: str, + organ: Union[str, None], + organism: Union[str, None], + genome: Union[str, None, dict], model_type: Union[str, List[str]], metric_select: str, + data_source: str, datapath, + configpath: Union[None, str] = None, + store_format: Union[None, str] = None, test_data=True, partition_select: str = "val", ignore_cache=False, @@ -1352,7 +1398,7 @@ def get_gradients_by_celltype( :param min_cells: :return: (cell types, input features) cumulative gradients """ - model_id, _, _ = self.get_best_model_ids( + model_id, run_id, _ = self.get_best_model_ids( tab=self.summary_tab, metric_select=metric_select, partition_select=partition_select, @@ -1361,10 +1407,7 @@ def get_gradients_by_celltype( "model_type": model_type, } ) - # check cached file - - resultspath = os.path.join(self.source_path, self.gs_keys[model_id], 'results', '') - + resultspath = self.gs_dirs[run_id] if os.path.isfile(os.path.join(resultspath, f'{model_id}_grads.pickle')) and not ignore_cache: print('Load gradients from cached file...') with open(os.path.join(resultspath, f'{model_id}_grads.pickle'), 'rb') as f: @@ -1372,26 +1415,47 @@ def get_gradients_by_celltype( else: print('Compute gradients (1/3): load data') # load data - dataset = Universe(data_path=datapath) - dataset.subset(key="organism", values=[organism]) - dataset.subset(key="organ", values=[organ]) - dataset.subset(key="annotated", values=[True]) - if not dataset.flatten().datasets: - raise ValueError(f"No datasets matching organism: {organism} and organ: {organ} found") - dataset.load() - dataset = dataset.flatten() + if data_source == "store": + if genome is not None: + warnings.warn("Using data_source='store', the provided genome will be ignored") + store = load_store(cache_path=datapath, store_format=store_format) + store.load_config(configpath) + store.subset(attr_key="id", values=[k for k in store.indices.keys() + if 'cell_ontology_id' in store.adata_by_key[k].obs.columns]) + store.subset(attr_key="cellontology_class", excluded_values=[ + store._adata_ids_sfaira.unknown_celltype_identifier, + store._adata_ids_sfaira.not_a_cell_celltype_identifier, + ]) + adatas = store.adata_sliced + # Load into memory: + for k in adatas.keys(): + adatas[k] = adatas[k].to_memory() + adata = adatas[list(adatas.keys())[0]] + if len(adatas.keys()) > 0: + adata = adata.concatenate(*[adatas[k] for k in list(adatas.keys())[1:]]) + elif data_source == "universe": + if configpath is not None or store_format is not None: + warnings.warn("Using data_source='universe', the provided configpath and store_format will be ignored") + u = Universe(data_path=datapath) + if organism is not None: + u.subset("organism", organism) + if organ is not None: + u.subset("organ", organ) + u.load(allow_caching=False) + u.streamline_features(match_to_reference=genome) + u.streamline_metadata() + adata = u.adata + else: + raise ValueError("data_source has to be 'universe' or 'store'") print('Compute gradients (2/3): load embedding') - # load embedding - adata = dataset.adata + zoo = ModelZoo() + zoo.model_id = "_".join(model_id.split("_")[:3]) embedding = EstimatorKerasEmbedding( data=adata, model_dir="", - model_id="", - organism=organism, - organ=organ, - model_type=model_type, - model_topology=model_id.split('_')[5] + model_id=model_id, + model_topology=zoo.topology_container ) embedding.init_model() embedding.model.training_model.load_weights(os.path.join(resultspath, f'{model_id}_weights.h5')) @@ -1419,10 +1483,14 @@ def get_gradients_by_celltype( def plot_gradient_distr( self, organ: str, - organism: str, model_type: Union[str, List[str]], metric_select: str, - datapath, + datapath: str, + data_source: str, + organism: Union[str, None] = None, + genome: Union[str, None] = None, + configpath: Union[None, str] = None, + store_format: Union[None, str] = None, test_data=True, partition_select: str = "val", normalize=True, @@ -1457,7 +1525,11 @@ def plot_gradient_distr( organism=organism, model_type=modelt, metric_select=metric_select, + genome=genome, + data_source=data_source, datapath=datapath, + configpath=configpath, + store_format=store_format, test_data=test_data, partition_select=partition_select, ignore_cache=ignore_cache, @@ -1466,11 +1538,9 @@ def plot_gradient_distr( if normalize: avg_grads[modelt] = np.abs(avg_grads[modelt]) - avg_grads[modelt] = (avg_grads[modelt] - np.min(avg_grads[modelt], axis=1, keepdims=True)) / \ - np.maximum( - np.max(avg_grads[modelt], axis=1, keepdims=True) - np.min(avg_grads[modelt], - axis=1, - keepdims=True), 1e-8) + avg_grads[modelt] = (avg_grads[modelt] - np.min(avg_grads[modelt], axis=1, keepdims=True)) /\ + np.maximum(np.max(avg_grads[modelt], axis=1, keepdims=True) - + np.min(avg_grads[modelt], axis=1, keepdims=True), 1e-8) fig, axs = plt.subplots(1, 1, figsize=(width_fig, height_fig)) @@ -1515,10 +1585,14 @@ def plot_gradient_distr( def plot_gradient_cor( self, organ: str, - organism: str, model_type: Union[str, List[str]], metric_select: str, - datapath, + datapath: str, + data_source: str, + organism: Union[str, None] = None, + genome: Union[str, None] = None, + configpath: Union[None, str] = None, + store_format: Union[None, str] = None, test_data=True, partition_select: str = "val", height_fig=7, @@ -1534,10 +1608,11 @@ def plot_gradient_cor( Plot correlation heatmap of gradient vectors accumulated on input features between cell types or models. :param organ: - :param organism: :param model_type: :param metric_select: :param datapath: + :param configpath: + :param store_format: :param test_data: :param partition_select: :param height_fig: @@ -1570,7 +1645,11 @@ def plot_gradient_cor( organism=organism, model_type=modelt, metric_select=metric_select, + genome=genome, + data_source=data_source, datapath=datapath, + configpath=configpath, + store_format=store_format, test_data=test_data, partition_select=partition_select, ignore_cache=ignore_cache, @@ -1596,10 +1675,10 @@ def plot_gradient_cor( plt.show() def plot_npc( - self, - organ, - topology_version, - cvs=None + self, + organ, + topology_version, + cvs=None ): """ Plots the explained variance ration that accumulates explained variation of the latent space’s ordered @@ -1641,10 +1720,10 @@ def plot_npc( plt.show() def plot_active_latent_units( - self, - organ, - topology_version, - cvs=None + self, + organ, + topology_version, + cvs=None ): """ Plots latent unit activity measured by empirical variance of the expected latent space. @@ -1653,7 +1732,8 @@ def plot_active_latent_units( the model will use z, and not z_mean. """ - colors = ['red', 'blue', 'green', 'cyan', 'magenta', 'yellow', 'darkgreen', 'lime', 'navy', 'royalblue', 'pink', 'peru'] + colors = ['red', 'blue', 'green', 'cyan', 'magenta', 'yellow', 'darkgreen', 'lime', 'navy', 'royalblue', + 'pink', 'peru'] def active_latent_units_mask(z): var_x = np.diagonal(np.cov(z.T)) @@ -1698,8 +1778,8 @@ def active_latent_units_mask(z): if model == "vaevamp": z1, z2 = np.split(np.log(np.diagonal(np.cov(z.T))), 2) plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z2)[::-1], color=colors[i], alpha=1.0, - label=r"%s $z_2$ active units: %i" % (model, len(z2[z2 > np.log(0.01)])), linestyle='dashed', - linewidth=3) + label=r"%s $z_2$ active units: %i" % (model, len(z2[z2 > np.log(0.01)])), + linestyle='dashed', linewidth=3) plt.plot(range(1, int(latent_dim / 2) + 1), np.sort(z1)[::-1], color=colors[i], alpha=1.0, label=r"%s $z_1$ active units: %i" % (model, len(z1[z1 > np.log(0.01)])), linestyle='dotted', linewidth=3) diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index b51ff99e1..4d87b28ab 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -6,19 +6,19 @@ from typing import Union from sfaira.consts import AdataIdsSfaira -from sfaira.data import DistributedStore, Universe +from sfaira.data import DistributedStoreBase, Universe from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding from sfaira.interface import ModelZoo class TrainModel: - data: Union[anndata.AnnData, DistributedStore] - estimator: EstimatorKeras + data: Union[anndata.AnnData, DistributedStoreBase] + estimator: Union[EstimatorKeras, None] def __init__( self, - data: Union[str, anndata.AnnData, Universe, DistributedStore], + data: Union[str, anndata.AnnData, Universe, DistributedStoreBase], ): # Check if handling backed anndata or base path to directory of raw files: if isinstance(data, str) and data.split(".")[-1] == "h5ad": @@ -30,30 +30,44 @@ def __init__( self.data = data elif isinstance(data, Universe): self.data = data.adata - elif isinstance(data, DistributedStore): + elif isinstance(data, DistributedStoreBase): self.data = data else: raise ValueError(f"did not recongize data of type {type(data)}") self.zoo = ModelZoo() + self._adata_ids = AdataIdsSfaira() def load_into_memory(self): """ - Loads backed objects from DistributedStore into single adata object in memory in .data slot. + Loads backed objects from DistributedStoreBase into single adata object in memory in .data slot. :return: """ - if isinstance(self.data, DistributedStore): - self.data = self.data.adata + if isinstance(self.data, DistributedStoreBase): + adata = None + for k, v in self.data.indices.items(): + x = self.data.adata_by_key[k][v, :].to_memory() + x.obs["dataset_id"] = k + if adata is None: + adata = x + else: + adata = adata.concatenate(x) + self.data = adata + + @property + @abc.abstractmethod + def topology_dict(self) -> dict: + pass @abc.abstractmethod def init_estim(self): pass @abc.abstractmethod - def save_eval(self, fn: str): + def save_eval(self, fn: str, **kwargs): pass @abc.abstractmethod - def _save_specific(self, fn: str): + def _save_specific(self, fn: str, **kwargs): pass def save( @@ -78,6 +92,16 @@ def save( if specific: self._save_specific(fn=fn) + def n_counts(self, idx): + if isinstance(self.estimator.data, anndata.AnnData): + return np.asarray( + self.estimator.data.X[np.sort(idx), :].sum(axis=1)[np.argsort(idx)] + ).flatten() + elif isinstance(self.estimator.data, DistributedStoreBase): + return self.estimator.data.n_counts(idx=idx) + else: + assert False + class TrainModelEmbedding(TrainModel): @@ -86,12 +110,17 @@ class TrainModelEmbedding(TrainModel): def __init__( self, model_path: str, - data: Union[str, anndata.AnnData, Universe, DistributedStore], + data: Union[str, anndata.AnnData, Universe, DistributedStoreBase], ): super(TrainModelEmbedding, self).__init__(data=data) self.estimator = None self.model_dir = model_path + @property + def topology_dict(self) -> dict: + topology_dict = self.zoo.topology_container.topology + return topology_dict + def init_estim( self, override_hyperpar: Union[dict, None] = None @@ -104,8 +133,9 @@ def init_estim( model_topology=self.zoo.topology_container ) self.estimator.init_model(override_hyperpar=override_hyperpar) + print(f"TRAINER: initialised model with {self.estimator.topology_container.n_var} features.") - def save_eval(self, fn: str): + def save_eval(self, fn: str, **kwargs): evaluation_train = self.estimator.evaluate_any(idx=self.estimator.idx_train) evaluation_val = self.estimator.evaluate_any(idx=self.estimator.idx_eval) evaluation_test = self.estimator.evaluate_any(idx=self.estimator.idx_test) @@ -119,7 +149,7 @@ def save_eval(self, fn: str): with open(fn + '_evaluation.pickle', 'wb') as f: pickle.dump(obj=evaluation, file=f) - def _save_specific(self, fn: str): + def _save_specific(self, fn: str, **kwargs): """ Save embedding prediction: @@ -127,12 +157,13 @@ def _save_specific(self, fn: str): :return: """ embedding = self.estimator.predict_embedding() - df_summary = self.estimator.obs_test[AdataIdsSfaira.obs_keys] - df_summary["ncounts"] = np.asarray( - self.estimator.data.X[np.sort(self.estimator.idx_test), :].sum(axis=1)[np.argsort(self.estimator.idx_test)] - ).flatten() + df_summary = self.estimator.obs_test + df_summary = df_summary[[k for k in df_summary.columns if k in self._adata_ids.obs_keys]] + df_summary["ncounts"] = self.n_counts(idx=self.estimator.idx_test) np.save(file=fn + "_embedding", arr=embedding) df_summary.to_csv(fn + "_covar.csv") + with open(fn + "_topology.pickle", "wb") as f: + pickle.dump(obj=self.topology_dict, file=f) class TrainModelCelltype(TrainModel): @@ -142,13 +173,20 @@ class TrainModelCelltype(TrainModel): def __init__( self, model_path: str, - data: Union[str, anndata.AnnData, Universe, DistributedStore], + data: Union[str, anndata.AnnData, Universe, DistributedStoreBase], fn_target_universe: str, ): super(TrainModelCelltype, self).__init__(data=data) self.estimator = None self.model_dir = model_path - self.data.celltypes_universe.load_target_universe(fn=fn_target_universe) + self.fn_target_universe = fn_target_universe + + @property + def topology_dict(self) -> dict: + topology_dict = self.zoo.topology_container.topology + # Load target universe leaves into topology dict: + topology_dict["output"]["targets"] = self.estimator.celltype_universe.onto_cl.leaves + return topology_dict def init_estim( self, @@ -161,9 +199,12 @@ def init_estim( model_id=self.zoo.model_id, model_topology=self.zoo.topology_container ) + self.estimator.celltype_universe.load_target_universe(self.fn_target_universe) self.estimator.init_model(override_hyperpar=override_hyperpar) + print(f"TRAINER: initialised model with {self.estimator.topology_container.n_var} features and " + f"{self.estimator.ntypes} labels: \n{self.estimator.ontology_names}.") - def save_eval(self, fn: str): + def save_eval(self, fn: str, eval_weighted: bool = False, **kwargs): evaluation = { 'train': self.estimator.evaluate_any(idx=self.estimator.idx_train, weighted=False), 'val': self.estimator.evaluate_any(idx=self.estimator.idx_eval, weighted=False), @@ -172,42 +213,39 @@ def save_eval(self, fn: str): } with open(fn + '_evaluation.pickle', 'wb') as f: pickle.dump(obj=evaluation, file=f) - evaluation_weighted = { - 'train': self.estimator.evaluate_any(idx=self.estimator.idx_train, weighted=True), - 'val': self.estimator.evaluate_any(idx=self.estimator.idx_eval, weighted=True), - 'test': self.estimator.evaluate_any(idx=self.estimator.idx_test, weighted=True), - 'all': self.estimator.evaluate_any(idx=None, weighted=True) - } - with open(fn + '_evaluation_weighted.pickle', 'wb') as f: - pickle.dump(obj=evaluation_weighted, file=f) - - def _save_specific(self, fn: str): + if eval_weighted: + evaluation_weighted = { + 'train': self.estimator.evaluate_any(idx=self.estimator.idx_train, weighted=True), + 'val': self.estimator.evaluate_any(idx=self.estimator.idx_eval, weighted=True), + 'test': self.estimator.evaluate_any(idx=self.estimator.idx_test, weighted=True), + 'all': self.estimator.evaluate_any(idx=None, weighted=True) + } + with open(fn + '_evaluation_weighted.pickle', 'wb') as f: + pickle.dump(obj=evaluation_weighted, file=f) + + def _save_specific(self, fn: str, **kwargs): """ Save true and predicted labels on test set: :param fn: :return: """ + obs = self.estimator.data.obs ytrue = self.estimator.ytrue() yhat = self.estimator.predict() - df_summary = self.estimator.obs_test[AdataIdsSfaira.obs_keys] - df_summary["ncounts"] = np.asarray(self.estimator.data.X[self.estimator.idx_test, :].sum(axis=1)).flatten() + df_summary = self.estimator.obs_test + df_summary = df_summary[[k for k in df_summary.columns if k in self._adata_ids.obs_keys]] + df_summary["ncounts"] = self.n_counts(idx=self.estimator.idx_test) np.save(file=fn + "_ytrue", arr=ytrue) np.save(file=fn + "_yhat", arr=yhat) df_summary.to_csv(fn + "_covar.csv") with open(fn + '_ontology_names.pickle', 'wb') as f: - pickle.dump(obj=self.estimator.ids, file=f) - - cell_counts = self.data.obs['cell_ontology_class'].value_counts().to_dict() - cell_counts_leaf = cell_counts.copy() - for k in cell_counts.keys(): - if k not in self.estimator.ids: - if k not in self.estimator.celltype_universe.onto_cl.node_ids: - raise(ValueError(f"Celltype '{k}' not found in celltype universe")) - for leaf in self.estimator.celltype_universe.onto_cl.node_ids: - if leaf not in cell_counts_leaf.keys(): - cell_counts_leaf[leaf] = 0 - cell_counts_leaf[leaf] += 1 / len(self.estimator.celltype_universe.onto_cl.node_ids) - del cell_counts_leaf[k] + pickle.dump(obj=self.estimator.ontology_names, file=f) + with open(fn + '_ontology_ids.pickle', 'wb') as f: + pickle.dump(obj=self.estimator.ontology_ids, file=f) + with open(fn + "_topology.pickle", "wb") as f: + pickle.dump(obj=self.topology_dict, file=f) + + cell_counts = obs['cell_ontology_class'].value_counts().to_dict() with open(fn + '_celltypes_valuecounts_wholedata.pickle', 'wb') as f: - pickle.dump(obj=[cell_counts, cell_counts_leaf], file=f) + pickle.dump(obj=[cell_counts], file=f) diff --git a/sfaira/unit_tests/data/test_dataset.py b/sfaira/unit_tests/data/test_dataset.py index 940b22f0a..eff963998 100644 --- a/sfaira/unit_tests/data/test_dataset.py +++ b/sfaira/unit_tests/data/test_dataset.py @@ -75,7 +75,7 @@ def test_dsgs_subset_cell_wise(organ: str, celltype: str): for k, v in x.datasets.items(): assert v.organism == "mouse", v.id assert v.ontology_container_sfaira.organ.is_a(query=v.organ, reference=organ), v.organ - for y in np.unique(v.adata.obs[v._adata_ids.cell_ontology_class].values): + for y in np.unique(v.adata.obs[v._adata_ids.cellontology_class].values): assert v.ontology_container_sfaira.cellontology_class.is_a(query=y, reference=celltype), y @@ -121,7 +121,7 @@ def test_dsg_write_store(store: str, dense: bool, clean_obs: bool): subset_genes_to_type="protein_coding") ds.streamline_metadata(schema="sfaira", uns_to_obs=False, clean_obs=clean_obs, clean_var=True, clean_uns=True, clean_obs_names=True) - ds.write_distributed_store(dir_cache=os.path.join(dir_data, "store"), store=store, dense=dense) + ds.write_distributed_store(dir_cache=os.path.join(dir_data, "store"), store_format=store, dense=dense) def test_dsg_load(): diff --git a/sfaira/unit_tests/data/test_store.py b/sfaira/unit_tests/data/test_store.py index d0befe3c0..f74395f33 100644 --- a/sfaira/unit_tests/data/test_store.py +++ b/sfaira/unit_tests/data/test_store.py @@ -1,75 +1,134 @@ +import anndata +import dask.array import numpy as np import os import pytest +import scipy.sparse import time from typing import List -from sfaira.data import DistributedStore +from sfaira.data import load_store from sfaira.versions.genomes import GenomeContainer from sfaira.unit_tests.utils import cached_store_writing MOUSE_GENOME_ANNOTATION = "Mus_musculus.GRCm38.102" +HUMAN_GENOME_ANNOTATION = "Homo_sapiens.GRCh38.102" dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") -dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data", "meta") """ -TODO tests from here on down require cached data for mouse lung +Tests from here on down require cached data for mouse lung """ -def test_config(): +@pytest.mark.parametrize("store_format", ["h5ad", "dao"]) +def test_fatal(store_format: str): + """ + Test if basic methods abort. + """ + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION, + store_format=store_format) + store = load_store(cache_path=store_path, store_format=store_format) + store.subset(attr_key="organism", values=["mouse"]) + store.subset(attr_key="assay_sc", values=["10x sequencing"]) + _ = store.n_obs + _ = store.n_vars + _ = store.var_names + _ = store.shape + _ = store.obs + _ = store.indices + _ = store.genome_container + _ = store.n_counts(idx=[1, 3]) + + +@pytest.mark.parametrize("store_format", ["h5ad", "dao"]) +@pytest.mark.parametrize("dataset", ["mouse_lung_2019_10xsequencing_pisco_022_10.1101/661728"]) +def test_data(store_format: str, dataset: str): + """ + Test if the data exposed by the store are the same as in the original Dataset instance after streamlining. + """ + store_path, ds = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION, + store_format=store_format, return_ds=True) + store = load_store(cache_path=store_path, store_format=store_format) + dataset_key_reduced = dataset.split("_10.")[0] + store.subset(attr_key="id", values=[dataset_key_reduced]) + adata_store = store.adata_by_key[dataset] + adata_ds = ds.datasets[dataset].adata + # Check .X + x_store = adata_store.X + x_ds = adata_ds.X.todense() + if isinstance(x_store, dask.array.Array): + x_store = x_store.compute() + if isinstance(x_store, anndata._core.sparse_dataset.SparseDataset): + # Need to load sparse matrix into memory if it comes from a backed anndata object. + x_store = x_store[:, :] + if isinstance(x_store, scipy.sparse.csr_matrix): + x_store = x_store.todense() + # Check that non-zero elements are the same: + assert np.all(np.where(x_store > 0)[0] == np.where(x_ds > 0)[0]) + assert np.all(np.where(x_store > 0)[1] == np.where(x_ds > 0)[1]) + assert np.all(x_store - x_ds == 0.) + assert x_store.dtype == x_ds.dtype + # Note: Do not run test on sum across entire object if dtype is float32 as this can result in test failures because + # of float overflows. + # Check .obs + obs_store = adata_store.obs + obs_ds = adata_ds.obs + assert np.all(obs_store.columns == obs_ds.columns), (obs_store.columns, obs_ds.columns) + for k, v in obs_store.items(): + assert np.all(np.asarray(v.values.tolist()) == np.asarray(obs_ds[k].values.tolist())) + # Check .var + var_store = adata_store.var + var_ds = adata_ds.var + assert np.all(var_store.columns == var_ds.columns), (var_store.columns, var_ds.columns) + for k, v in var_store.items(): + assert np.all(np.asarray(v.values.tolist()) == np.asarray(var_ds[k].values.tolist())) + # Check .uns + uns_store = adata_store.uns + uns_ds = adata_ds.uns + assert np.all(uns_store.keys() == uns_ds.keys()), (uns_store.keys(), uns_ds.keys()) + for k, v in uns_store.items(): + assert np.all(v == uns_ds[k]) + + +@pytest.mark.parametrize("store_format", ["h5ad", "dao"]) +def test_config(store_format: str): """ Test that data set config files can be set, written and recovered. """ - store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION, + store_format=store_format) config_path = os.path.join(store_path, "config_lung") - store = DistributedStore(cache_path=store_path) + store = load_store(cache_path=store_path, store_format=store_format) + store.subset(attr_key="organism", values=["mouse"]) store.subset(attr_key="assay_sc", values=["10x sequencing"]) store.write_config(fn=config_path) - store2 = DistributedStore(cache_path=store_path) + store2 = load_store(cache_path=store_path, store_format=store_format) store2.load_config(fn=config_path + ".pickle") assert np.all(store.indices.keys() == store2.indices.keys()) assert np.all([np.all(store.indices[k] == store2.indices[k]) for k in store.indices.keys()]) -def test_type_targets(): - """ - Test that target leave nodes can be set, written and recovered. - """ - store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) - target_path = os.path.join(store_path, "lung") - store = DistributedStore(cache_path=store_path) - observed_nodes = np.unique(np.concatenate([ - x.obs[store._adata_ids_sfaira.cell_ontology_class] - for x in store.adatas.values() - ])).tolist() - leaves_all = store.celltypes_universe.onto_cl.leaves - effective_leaves = store.celltypes_universe.onto_cl.get_effective_leaves(x=observed_nodes) - store.celltypes_universe.onto_cl.leaves = effective_leaves - leaves1 = store.celltypes_universe.onto_cl.leaves - store.celltypes_universe.write_target_universe(fn=target_path, x=effective_leaves) - store2 = DistributedStore(cache_path=store_path) - store2.celltypes_universe.load_target_universe(fn=target_path) - leaves2 = store2.celltypes_universe.onto_cl.leaves - assert len(leaves_all) > len(leaves1) - assert len(set(leaves1).union(set(leaves2))) == len(leaves1) - assert np.all([x in leaves1 for x in leaves2]) - - -@pytest.mark.parametrize("idx", [None, np.concatenate([np.arange(150, 200), np.array([1, 100, 2003, 33])])]) -@pytest.mark.parametrize("batch_size", [1, 10]) +@pytest.mark.parametrize("store_format", ["h5ad", "dao"]) +@pytest.mark.parametrize("idx", [np.array([2, 1020, 3, 20000, 20100]), + np.concatenate([np.arange(150, 200), np.array([1, 100, 2003, 33])])]) +@pytest.mark.parametrize("batch_size", [1, 7]) @pytest.mark.parametrize("obs_keys", [[], ["cell_ontology_class"]]) @pytest.mark.parametrize("gc", [(None, {}), (MOUSE_GENOME_ANNOTATION, {"biotype": "protein_coding"})]) -def test_generator_shapes(idx, batch_size: int, obs_keys: List[str], gc: tuple): +@pytest.mark.parametrize("randomized_batch_access", [True, False]) +def test_generator_shapes(store_format: str, idx, batch_size: int, obs_keys: List[str], gc: tuple, + randomized_batch_access: bool): """ Test generators queries do not throw errors and that output shapes are correct. """ assembly, subset = gc - store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION) - store = DistributedStore(cache_path=store_path) + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=MOUSE_GENOME_ANNOTATION, + store_format=store_format) + store = load_store(cache_path=store_path, store_format=store_format) + store.subset(attr_key="organism", values=["mouse"]) if assembly is not None: gc = GenomeContainer(assembly=assembly) gc.subset(**subset) @@ -78,14 +137,17 @@ def test_generator_shapes(idx, batch_size: int, obs_keys: List[str], gc: tuple): idx=idx, batch_size=batch_size, obs_keys=obs_keys, + randomized_batch_access=randomized_batch_access, ) nobs = len(idx) if idx is not None else store.n_obs batch_sizes = [] t0 = time.time() + x = None + obs = None for i, z in enumerate(g()): x_i, obs_i = z assert x_i.shape[0] == obs_i.shape[0] - if i == 0: # First batch hast correct shape, last batch not necessarily! + if i == 0: x = x_i obs = obs_i batch_sizes.append(x_i.shape[0]) @@ -93,12 +155,8 @@ def test_generator_shapes(idx, batch_size: int, obs_keys: List[str], gc: tuple): print(f"time for iterating over generator:" f" {tdelta}s for {np.sum(batch_sizes)} cells in {len(batch_sizes)} batches," f" {tdelta / len(batch_sizes)}s per batch.") - # Only the last batch in each data set can be of different size: - assert np.sum(batch_sizes != batch_size) <= len(store.adatas.keys()) - assert x.shape[0] == batch_size, (x.shape, batch_size) - assert obs.shape[0] == batch_size, (obs.shape, batch_size) assert x.shape[1] == store.n_vars, (x.shape, store.n_vars) - assert obs.shape[1] == len(obs_keys), (x.shape, obs_keys) - assert np.sum(batch_sizes) == nobs, (x.shape, obs_keys) + assert obs.shape[1] == len(obs_keys), (obs.shape, obs_keys) + assert np.sum(batch_sizes) == nobs, (batch_sizes, nobs) if assembly is not None: assert x.shape[1] == gc.n_var, (x.shape, gc.n_var) diff --git a/sfaira/unit_tests/estimators/custom.obo b/sfaira/unit_tests/estimators/custom.obo new file mode 100644 index 000000000..8bd872bd0 --- /dev/null +++ b/sfaira/unit_tests/estimators/custom.obo @@ -0,0 +1,16 @@ +format-version: 1.2 +ontology: custom + +[Term] +id: MYONTO:01 +name: Name1 + +[Term] +id: MYONTO:02 +name: Name2 +is_a: MYONTO:01 + +[Term] +id: MYONTO:03 +name: Name3 +is_a: MYONTO:01 diff --git a/sfaira/unit_tests/estimators/test_estimator.py b/sfaira/unit_tests/estimators/test_estimator.py index 521efd9d0..b404ef079 100644 --- a/sfaira/unit_tests/estimators/test_estimator.py +++ b/sfaira/unit_tests/estimators/test_estimator.py @@ -7,27 +7,36 @@ import time from typing import Union -from sfaira.data import DistributedStore +from sfaira.data import load_store, DistributedStoreBase from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding +from sfaira.versions.genomes import CustomFeatureContainer +from sfaira.versions.metadata import OntologyOboCustom from sfaira.versions.topologies import TopologyContainer from sfaira.unit_tests.utils import cached_store_writing, simulate_anndata dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") -dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") +dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data", "meta") cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), "cache", "genomes") -ASSEMBLY = "Mus_musculus.GRCm38.102" -GENES = ["ENSMUSG00000000003", "ENSMUSG00000000028"] -TARGETS = ["T cell", "stromal cell"] +ASSEMBLY = { + "mouse": "Mus_musculus.GRCm38.102", + "human": "Homo_sapiens.GRCh38.102", +} +GENES = { + "mouse": ["ENSMUSG00000000003", "ENSMUSG00000000028"], + "human": ["ENSG00000000003", "ENSG00000000005"], +} +TARGETS = ["T cell", "CD4-positive helper T cell", "stromal cell", "UNKNOWN"] +TARGET_UNIVERSE = ["CD4-positive helper T cell", "stromal cell"] ASSAYS = ["10x sequencing", "Smart-seq2"] TOPOLOGY_EMBEDDING_MODEL = { "model_type": None, "input": { - "genome": ASSEMBLY, - "genes": ["ensg", GENES], + "genome": None, + "genes": None, }, "output": {}, "hyper_parameters": { @@ -41,15 +50,14 @@ TOPOLOGY_CELLTYPE_MODEL = { "model_type": None, "input": { - "genome": ASSEMBLY, - "genes": ["ensg", GENES], + "genome": None, + "genes": None, }, "output": { "cl": "v2021-02-01", - "targets": TARGETS + "targets": TARGET_UNIVERSE }, "hyper_parameters": { - "latent_dim": None, "l1_coef": 0., "l2_coef": 0., } @@ -58,7 +66,7 @@ class HelperEstimatorBase: - data: Union[anndata.AnnData, DistributedStore] + data: Union[anndata.AnnData, DistributedStoreBase] estimator: Union[EstimatorKeras] model_type: str tc: TopologyContainer @@ -84,24 +92,26 @@ def load_adata(self): """ self.data = self._simulate() - def load_store(self): - store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY) - store = DistributedStore(cache_path=store_path) + def load_store(self, organism, organ): + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY[organism], + organism=organism, organ=organ) + store = load_store(cache_path=store_path) + store.subset(attr_key="organism", values=organism) + store.subset(attr_key="organ", values=organ) self.data = store @abc.abstractmethod - def init_topology(self, model_type: str, feature_space: str): + def init_topology(self, model_type: str, feature_space: str, organism: str): pass @abc.abstractmethod - def init_estimator(self): + def init_estimator(self, test_split): """ Initialise target estimator as .estimator attribute. """ pass - def estimator_train(self, test_split): - self.estimator.init_model() + def estimator_train(self, test_split, randomized_batch_access): self.estimator.train( optimizer="adam", lr=0.005, @@ -111,28 +121,29 @@ def estimator_train(self, test_split): test_split=test_split, validation_batch_size=4, max_validation_steps=1, - shuffle_buffer_size=10, + shuffle_buffer_size=None if randomized_batch_access else 10, cache_full=False, + randomized_batch_access=randomized_batch_access, ) @abc.abstractmethod def basic_estimator_test(self, test_split): pass - def load_estimator(self, model_type, data_type, feature_space, test_split): - self.init_topology(model_type=model_type, feature_space=feature_space) + def load_estimator(self, model_type, data_type, feature_space, test_split, organism="mouse", organ="lung"): + self.init_topology(model_type=model_type, feature_space=feature_space, organism=organism) np.random.seed(1) if data_type == "adata": self.load_adata() else: - self.load_store() - self.init_estimator() - self.estimator_train(test_split=test_split) + self.load_store(organism=organism, organ=organ) + self.init_estimator(test_split=test_split) def fatal_estimator_test(self, model_type, data_type, test_split=0.1, feature_space="small"): self.load_estimator(model_type=model_type, data_type=data_type, feature_space=feature_space, test_split=test_split) - self.basic_estimator_test() + self.estimator_train(test_split=test_split, randomized_batch_access=False) + self.basic_estimator_test(test_split=test_split) class HelperEstimatorKerasEmbedding(HelperEstimatorBase): @@ -141,14 +152,17 @@ class HelperEstimatorKerasEmbedding(HelperEstimatorBase): model_type: str tc: TopologyContainer - def init_topology(self, model_type: str, feature_space: str): + def init_topology(self, model_type: str, feature_space: str, organism: str): topology = TOPOLOGY_EMBEDDING_MODEL.copy() if feature_space == "full": # Read 500 genes (not full protein coding) to compromise between being able to distinguish observations # and reducing run time of unit tests. - tab = pd.read_csv(os.path.join(cache_dir, ASSEMBLY + ".csv")) + tab = pd.read_csv(os.path.join(cache_dir, ASSEMBLY[organism] + ".csv")) genes_full = tab.loc[tab["gene_biotype"].values == "protein_coding", "gene_id"].values[:500].tolist() topology["input"]["genes"] = ["ensg", genes_full] + else: + topology["input"]["genes"] = ["ensg", GENES[organism]] + topology["input"]["genome"] = ASSEMBLY[organism] topology["model_type"] = model_type if model_type == "linear": topology["hyper_parameters"]["latent_dim"] = 2 @@ -157,13 +171,15 @@ def init_topology(self, model_type: str, feature_space: str): self.model_type = model_type self.tc = TopologyContainer(topology=topology, topology_id="0.1") - def init_estimator(self): + def init_estimator(self, test_split): self.estimator = EstimatorKerasEmbedding( data=self.data, model_dir=None, model_id="testid", model_topology=self.tc ) + self.estimator.init_model() + self.estimator.split_train_val_test(test_split=test_split, val_split=0.1) def basic_estimator_test(self, test_split=0.1): _ = self.estimator.evaluate() @@ -190,25 +206,46 @@ class HelperEstimatorKerasCelltype(HelperEstimatorBase): model_type: str tc: TopologyContainer - def init_topology(self, model_type: str, feature_space: str): + def init_topology(self, model_type: str, feature_space: str, organism: str): topology = TOPOLOGY_CELLTYPE_MODEL.copy() topology["model_type"] = model_type - topology["hyper_parameters"]["latent_dim"] = (2,) + topology["input"]["genome"] = ASSEMBLY[organism] + topology["input"]["genes"] = ["ensg", GENES[organism]] + if model_type == "mlp": + topology["hyper_parameters"]["units"] = (2,) self.model_type = model_type - self.tc = TopologyContainer(topology=topology, topology_id="0.1") + self.tc = TopologyContainer(topology=topology, topology_id="0.0.1") - def init_estimator(self): + def init_estimator(self, test_split): + tc = self.tc + if isinstance(self.data, DistributedStoreBase): + # Reset leaves below: + tc.topology["output"]["targets"] = None self.estimator = EstimatorKerasCelltype( data=self.data, model_dir=None, model_id="testid", - model_topology=self.tc + model_topology=tc ) - self.estimator.celltype_universe.leaves = TARGETS + if isinstance(self.data, DistributedStoreBase): + leaves = self.estimator.celltype_universe.onto_cl.get_effective_leaves( + x=[x for x in self.data.obs[self.data._adata_ids_sfaira.cellontology_class].values + if x != self.data._adata_ids_sfaira.unknown_celltype_identifier] + ) + self.nleaves = len(leaves) + self.estimator.celltype_universe.onto_cl.leaves = leaves + else: + self.nleaves = None + self.estimator.init_model() + self.estimator.split_train_val_test(test_split=test_split, val_split=0.1) def basic_estimator_test(self, test_split=0.1): _ = self.estimator.evaluate() prediction_output = self.estimator.predict() + if isinstance(self.estimator.data, anndata.AnnData): + assert prediction_output.shape[1] == len(TARGET_UNIVERSE), prediction_output.shape + else: + assert prediction_output.shape[1] == self.nleaves, prediction_output.shape weights = self.estimator.model.training_model.get_weights() self.estimator.save_weights_to_cache() self.estimator.load_weights_from_cache() @@ -222,6 +259,48 @@ def basic_estimator_test(self, test_split=0.1): assert np.allclose(prediction_output, new_prediction_output, rtol=1e-6, atol=1e-6) +class HelperEstimatorKerasCelltypeCustomObo(HelperEstimatorKerasCelltype): + + def init_obo_custom(self) -> OntologyOboCustom: + return OntologyOboCustom(obo=os.path.join(os.path.dirname(__file__), "custom.obo")) + + def init_genome_custom(self, n_features) -> CustomFeatureContainer: + return CustomFeatureContainer(genome_tab=pd.DataFrame({ + "gene_name": ["dim_" + str(i) for i in range(n_features)], + "gene_id": ["dim_" + str(i) for i in range(n_features)], + "gene_biotype": ["embedding" for _ in range(n_features)], + })) + + def init_topology_custom(self, model_type: str, n_features): + topology = TOPOLOGY_CELLTYPE_MODEL.copy() + topology["model_type"] = model_type + topology["input"]["genome"] = "custom" + topology["input"]["genes"] = ["biotype", "embedding"] + topology["output"]["cl"] = "custom" + topology["output"]["targets"] = ["MYONTO:02", "MYONTO:03"] + if model_type == "mlp": + topology["hyper_parameters"]["units"] = (2,) + self.model_type = model_type + gc = self.init_genome_custom(n_features=n_features) + self.tc = TopologyContainer(topology=topology, topology_id="0.0.1", custom_genome_constainer=gc) + + def fatal_estimator_test_custom(self): + self.init_topology_custom(model_type="mlp", n_features=50) + obo = self.init_obo_custom() + np.random.seed(1) + self.data = simulate_anndata(n_obs=100, genes=self.tc.gc.ensembl, + targets=["MYONTO:01", "MYONTO:02", "MYONTO:03"], obo=obo) + self.estimator = EstimatorKerasCelltype( + data=self.data, + model_dir=None, + model_id="testid", + model_topology=self.tc, + celltype_ontology=obo, + ) + self.estimator.init_model() + self.estimator_train(test_split=0.1, randomized_batch_access=False) + self.basic_estimator_test(test_split=0.1) + # Test embedding models: @@ -231,14 +310,16 @@ def test_for_fatal_linear(data_type): test_estim.fatal_estimator_test(model_type="linear", data_type=data_type) -def test_for_fatal_ae(): +@pytest.mark.parametrize("data_type", ["adata"]) +def test_for_fatal_ae(data_type): test_estim = HelperEstimatorKerasEmbedding() - test_estim.fatal_estimator_test(model_type="ae", data_type="adata") + test_estim.fatal_estimator_test(model_type="ae", data_type=data_type) -def test_for_fatal_vae(): +@pytest.mark.parametrize("data_type", ["adata"]) +def test_for_fatal_vae(data_type): test_estim = HelperEstimatorKerasEmbedding() - test_estim.fatal_estimator_test(model_type="vae", data_type="adata") + test_estim.fatal_estimator_test(model_type="vae", data_type=data_type) # Test cell type predictor models: @@ -250,17 +331,63 @@ def test_for_fatal_mlp(data_type): test_estim.fatal_estimator_test(model_type="mlp", data_type=data_type) -def test_for_fatal_marker(): +@pytest.mark.parametrize("data_type", ["adata"]) +def test_for_fatal_marker(data_type): test_estim = HelperEstimatorKerasCelltype() - test_estim.fatal_estimator_test(model_type="marker", data_type="adata") + test_estim.fatal_estimator_test(model_type="marker", data_type=data_type) + +def test_for_fatal_mlp_custom(): + test_estim = HelperEstimatorKerasCelltypeCustomObo() + test_estim.fatal_estimator_test_custom() # Test index sets +@pytest.mark.parametrize("organism", ["human"]) +@pytest.mark.parametrize("organ", ["lung"]) +@pytest.mark.parametrize("batch_size", [1024, 2048, 4096]) +@pytest.mark.parametrize("randomized_batch_access", [False, True]) +def test_dataset_size(organism: str, organ: str, batch_size: int, randomized_batch_access: bool): + """ + Test that tf data set from estimator has same size as generator invoked directly from store based on number of + observations in emitted batches. + + Tests for batch sizes smaller, equal to and larger than retrieval batch size and with and without randomized + batch access. + """ + test_estim = HelperEstimatorKerasEmbedding() + retrieval_batch_size = 2048 + # Need full feature space here because observations are not necessarily different in small model testing feature + # space with only two genes: + test_estim.load_estimator(model_type="linear", data_type="store", feature_space="reduced", test_split=0.2, + organism=organism, organ=organ) + idx_train = test_estim.estimator.idx_train + shuffle_buffer_size = None if randomized_batch_access else 2 + ds_train = test_estim.estimator._get_dataset(idx=idx_train, batch_size=batch_size, mode='eval', + shuffle_buffer_size=shuffle_buffer_size, + retrieval_batch_size=retrieval_batch_size, + randomized_batch_access=randomized_batch_access) + x_train_shape = 0 + for x, _ in ds_train.as_numpy_iterator(): + x_train_shape += x[0].shape[0] + # Define raw store generator on train data to compare and check that it has the same size as tf generator exposed + # by estimator: + g_train = test_estim.estimator.data.generator(idx=idx_train, batch_size=retrieval_batch_size, + randomized_batch_access=randomized_batch_access) + x_train2_shape = 0 + for x, _ in g_train(): + x_train2_shape += x.shape[0] + assert x_train_shape == x_train2_shape + assert x_train_shape == len(idx_train) + + +@pytest.mark.parametrize("organism", ["mouse"]) +@pytest.mark.parametrize("organ", ["lung"]) @pytest.mark.parametrize("data_type", ["adata", "store"]) +@pytest.mark.parametrize("randomized_batch_access", [False, True]) @pytest.mark.parametrize("test_split", [0.3, {"assay_sc": "10x sequencing"}]) -def test_split_index_sets(data_type: str, test_split): +def test_split_index_sets(organism: str, organ: str, data_type: str, randomized_batch_access: bool, test_split): """ Test that train, val, test split index sets are correct: @@ -272,31 +399,40 @@ def test_split_index_sets(data_type: str, test_split): test_estim = HelperEstimatorKerasEmbedding() # Need full feature space here because observations are not necessarily different in small model testing feature # space with only two genes: - t0 = time.time() - test_estim.load_estimator(model_type="linear", data_type=data_type, test_split=test_split, feature_space="full") - print(f"time for running estimator test: {time.time() - t0}s") + test_estim.load_estimator(model_type="linear", data_type=data_type, test_split=test_split, feature_space="full", + organism=organism, organ=organ) idx_train = test_estim.estimator.idx_train idx_eval = test_estim.estimator.idx_eval idx_test = test_estim.estimator.idx_test - # 1) Assert that index assignments sum up to full data set: + print(idx_train) + print(idx_eval) + print(idx_test) + # 1) Assert that index assignment sets sum up to full data set: + # Make sure that there are no repeated indices in each set. + assert len(idx_train) == len(np.unique(idx_train)) + assert len(idx_eval) == len(np.unique(idx_eval)) + assert len(idx_test) == len(np.unique(idx_test)) assert len(idx_train) + len(idx_eval) + len(idx_test) == test_estim.data.n_obs, \ (len(idx_train), len(idx_eval), len(idx_test), test_estim.data.n_obs) + if isinstance(test_estim.data, DistributedStoreBase): + assert np.sum([v.shape[0] for v in test_estim.data.adata_by_key.values()]) == test_estim.data.n_obs # 2) Assert that index assignments are exclusive to each split: assert len(set(idx_train).intersection(set(idx_eval))) == 0 assert len(set(idx_train).intersection(set(idx_test))) == 0 assert len(set(idx_test).intersection(set(idx_eval))) == 0 # 3) Check partition of index vectors over store data sets matches test split scenario: - if isinstance(test_estim.estimator.data, DistributedStore): + if isinstance(test_estim.estimator.data, DistributedStoreBase): # Prepare data set-wise index vectors that are numbered in the same way as global split index vectors. - # See also EstimatorKeras.train and DistributedStore.subset_cells_idx_global - idx_raw = test_estim.estimator.data.indices_global + # See also EstimatorKeras.train and DistributedStoreBase.subset_cells_idx_global + idx_raw = test_estim.estimator.data.indices_global.values() if isinstance(test_split, float): # Make sure that indices from each split are in each data set: - for z in [idx_train, idx_eval, idx_test]: - assert np.all([ # in each data set + for i, z in enumerate([idx_train, idx_eval, idx_test]): + matches = [ # in each data set np.any([y in z for y in x]) # at least one match of data set to split index set for x in idx_raw - ]) + ] + assert np.all(matches), (i, matches) else: # Make sure that indices from (train, val) and test split are exclusive: datasets_train = np.where([ # in each data set @@ -311,70 +447,92 @@ def test_split_index_sets(data_type: str, test_split): np.any([y in idx_test for y in x]) # at least one match of data set to split index set for x in idx_raw ])[0] - assert datasets_train == datasets_eval, (datasets_train, datasets_eval) - assert len(set(datasets_train).intersection(set(datasets_test))) == 0, (datasets_train, datasets_test) + assert np.all(datasets_train == datasets_eval), (datasets_train, datasets_eval, datasets_test) + assert len(set(datasets_train).intersection(set(datasets_test))) == 0, \ + (datasets_train, datasets_eval, datasets_test) # 4) Assert that observations mapped to indices are actually unique based on expression vectors: # Build numpy arrays of expression input data sets from tensorflow data sets directly from estimator. # These data sets are the most processed transformation of the data and stand directly in concat with the model. + shuffle_buffer_size = None if randomized_batch_access else 2 t0 = time.time() - ds_train = test_estim.estimator._get_dataset(idx=idx_train, batch_size=128, mode='eval', shuffle_buffer_size=1, - retrieval_batch_size=128) + ds_train = test_estim.estimator._get_dataset(idx=idx_train, batch_size=1024, mode='eval', + shuffle_buffer_size=shuffle_buffer_size, + retrieval_batch_size=2048, + randomized_batch_access=randomized_batch_access) print(f"time for building training data set: {time.time() - t0}s") t0 = time.time() - ds_eval = test_estim.estimator._get_dataset(idx=idx_eval, batch_size=128, mode='eval', shuffle_buffer_size=1, - retrieval_batch_size=128) + ds_eval = test_estim.estimator._get_dataset(idx=idx_eval, batch_size=1024, mode='eval', + shuffle_buffer_size=shuffle_buffer_size, + retrieval_batch_size=2048, + randomized_batch_access=randomized_batch_access) print(f"time for building validation data set: {time.time() - t0}s") t0 = time.time() - ds_test = test_estim.estimator._get_dataset(idx=idx_test, batch_size=128, mode='eval', shuffle_buffer_size=1, - retrieval_batch_size=128) + ds_test = test_estim.estimator._get_dataset(idx=idx_test, batch_size=1024, mode='eval', + shuffle_buffer_size=shuffle_buffer_size, + retrieval_batch_size=2048, + randomized_batch_access=randomized_batch_access) + print(f"time for building test data set: {time.time() - t0}s") + # Create two copies of test data set to make sure that re-instantiation of a subset does not cause issues. + ds_test2 = test_estim.estimator._get_dataset(idx=idx_test, batch_size=1024, mode='eval', + shuffle_buffer_size=shuffle_buffer_size, + retrieval_batch_size=2048, + randomized_batch_access=randomized_batch_access) print(f"time for building test data set: {time.time() - t0}s") x_train = [] x_eval = [] x_test = [] + x_test2_shape = 0 t0 = time.time() - for x, y in ds_train.as_numpy_iterator(): + for x, _ in ds_train.as_numpy_iterator(): x_train.append(x[0]) x_train = np.concatenate(x_train, axis=0) print(f"time for iterating over training data set: {time.time() - t0}s") t0 = time.time() - for x, y in ds_eval.as_numpy_iterator(): + for x, _ in ds_eval.as_numpy_iterator(): x_eval.append(x[0]) x_eval = np.concatenate(x_eval, axis=0) print(f"time for iterating over validation data set: {time.time() - t0}s") t0 = time.time() - for x, y in ds_test.as_numpy_iterator(): + for x, _ in ds_test.as_numpy_iterator(): x_test.append(x[0]) x_test = np.concatenate(x_test, axis=0) print(f"time for iterating over test data set: {time.time() - t0}s") + # Assert that duplicate of test data has the same shape: + for x, _ in ds_test2: + x_test2_shape += x[0].shape[0] + assert x_test2_shape == x_test.shape[0] # Validate size of recovered numpy data sets: - print(f"shapes received {(x_train.shape[0], x_eval.shape[0], x_test.shape[0])}") + print(test_estim.data.n_obs) print(f"shapes expected {(len(idx_train), len(idx_eval), len(idx_test))}") + print(f"shapes received {(x_train.shape[0], x_eval.shape[0], x_test.shape[0])}") + assert x_train.shape[0] + x_eval.shape[0] + x_test.shape[0] == test_estim.data.n_obs + assert len(idx_train) + len(idx_eval) + len(idx_test) == test_estim.data.n_obs assert x_train.shape[0] == len(idx_train) assert x_eval.shape[0] == len(idx_eval) assert x_test.shape[0] == len(idx_test) # Assert that observations are unique within partition: assert np.all([ - np.sum([np.all(x_train[i] == x_train[j]) for j in range(x_train.shape[0])]) == 1 + np.sum(np.abs(x_train[[i], :] - x_train).sum(axis=1) == 0) == 1 for i in range(x_train.shape[0]) ]) assert np.all([ - np.sum([np.all(x_eval[i] == x_eval[j]) for j in range(x_eval.shape[0])]) == 1 + np.sum(np.abs(x_eval[[i], :] - x_eval).sum(axis=1) == 0) == 1 for i in range(x_eval.shape[0]) ]) assert np.all([ - np.sum([np.all(x_test[i] == x_test[j]) for j in range(x_test.shape[0])]) == 1 + np.sum(np.abs(x_test[[i], :] - x_test).sum(axis=1) == 0) == 1 for i in range(x_test.shape[0]) ]) # Assert that observations are not replicated across partitions: assert not np.any([ - np.any([np.all(x_train[i] == x_eval[j]) for j in range(x_eval.shape[0])]) + np.any(np.abs(x_train[[i], :] - x_eval).sum(axis=1) == 0) for i in range(x_train.shape[0]) ]) assert not np.any([ - np.any([np.all(x_train[i] == x_test[j]) for j in range(x_test.shape[0])]) + np.any(np.abs(x_train[[i], :] - x_test).sum(axis=1) == 0) for i in range(x_train.shape[0]) ]) assert not np.any([ - np.any([np.all(x_test[i] == x_eval[j]) for j in range(x_eval.shape[0])]) - for i in range(x_test.shape[0]) + np.any(np.abs(x_eval[[i], :] - x_test).sum(axis=1) == 0) + for i in range(x_eval.shape[0]) ]) diff --git a/sfaira/unit_tests/trainer/test_trainer.py b/sfaira/unit_tests/trainer/test_trainer.py index c9886c5f0..1d4b0115b 100644 --- a/sfaira/unit_tests/trainer/test_trainer.py +++ b/sfaira/unit_tests/trainer/test_trainer.py @@ -1,11 +1,10 @@ import anndata import numpy as np import os -import pytest from typing import Union -from sfaira.data import DistributedStore -from sfaira.interface import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.data import load_store +from sfaira.interface import ModelZoo from sfaira.train import TrainModelCelltype, TrainModelEmbedding from sfaira.unit_tests.utils import cached_store_writing, simulate_anndata @@ -18,9 +17,8 @@ class HelperTrainerBase: - data: Union[anndata.AnnData, DistributedStore] + data: Union[anndata.AnnData, load_store] trainer: Union[TrainModelCelltype, TrainModelEmbedding] - zoo: ModelZoo def __init__(self, zoo: ModelZoo): self.model_id = zoo.model_id @@ -41,8 +39,8 @@ def load_adata(self): self.data = self._simulate() def load_store(self): - store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY) - store = DistributedStore(cache_path=store_path) + store_path = cached_store_writing(dir_data=dir_data, dir_meta=dir_meta, assembly=ASSEMBLY, organism="mouse") + store = load_store(cache_path=store_path) self.data = store def load_data(self, data_type): @@ -52,27 +50,34 @@ def load_data(self, data_type): else: self.load_store() - def test_for_fatal(self, cls): + def test_init(self, cls): self.load_data(data_type="adata") - trainer = cls( + self.trainer = cls( data=self.data, model_path=dir_meta, ) - trainer.zoo.set_model_id(model_id=self.model_id) - trainer.init_estim(override_hyperpar={}) + self.trainer.zoo.model_id = self.model_id + self.trainer.init_estim(override_hyperpar={}) + def test_save(self): + self.trainer.estimator.train(epochs=1, max_steps_per_epoch=1, test_split=0.1, validation_split=0.1, + optimizer="adam", lr=0.005) + self.trainer.save(fn=os.path.join(dir_data, "trainer_test"), model=True, specific=True) -def test_for_fatal_embedding(): - model_id = "embedding_human-lung_linear_mylab_0.1_0.1" - zoo = ModelZooEmbedding() - zoo.set_model_id(model_id=model_id) + +def test_save_embedding(): + model_id = "embedding_human-lung-linear-0.1-0.1_mylab" + zoo = ModelZoo() + zoo.model_id = model_id test_trainer = HelperTrainerBase(zoo=zoo) - test_trainer.test_for_fatal(cls=TrainModelEmbedding) + test_trainer.test_init(cls=TrainModelEmbedding) + test_trainer.test_save() -def test_for_fatal(): - model_id = "celltype_human-lung_mlp_mylab_0.0.1_0.1" - zoo = ModelZooCelltype() - zoo.set_model_id(model_id=model_id) +def test_save_celltypes(): + model_id = "celltype_human-lung-mlp-0.0.1-0.1_mylab" + zoo = ModelZoo() + zoo.model_id = model_id test_trainer = HelperTrainerBase(zoo=zoo) - test_trainer.test_for_fatal(cls=TrainModelCelltype) + test_trainer.test_init(cls=TrainModelCelltype) + test_trainer.test_save() diff --git a/sfaira/unit_tests/utils.py b/sfaira/unit_tests/utils.py index 32d3d03fb..a2dc5d27b 100644 --- a/sfaira/unit_tests/utils.py +++ b/sfaira/unit_tests/utils.py @@ -1,53 +1,94 @@ import anndata import numpy as np import os +from typing import Tuple, Union +from sfaira.consts import AdataIdsSfaira, OCS from sfaira.data import Universe +from sfaira.versions.metadata import OntologyOboCustom -def simulate_anndata(genes, n_obs, targets=None, assays=None) -> anndata.AnnData: +def simulate_anndata(genes, n_obs, targets=None, assays=None, obo: Union[None, OntologyOboCustom] = None) -> \ + anndata.AnnData: """ Simulate basic data example. :return: AnnData instance. """ + adata_ids_sfaira = AdataIdsSfaira() data = anndata.AnnData( np.random.randint(low=0, high=100, size=(n_obs, len(genes))).astype(np.float32) ) if assays is not None: - data.obs["assay_sc"] = [ - assays[np.random.randint(0, len(targets))] - for i in range(n_obs) + data.obs[adata_ids_sfaira.assay_sc] = [ + assays[np.random.randint(0, len(assays))] + for _ in range(n_obs) ] if targets is not None: - data.obs["cell_ontology_class"] = [ + data.obs[adata_ids_sfaira.cellontology_class] = [ targets[np.random.randint(0, len(targets))] - for i in range(n_obs) + for _ in range(n_obs) ] - data.var["ensembl"] = genes + if obo is None: + data.obs[adata_ids_sfaira.cellontology_id] = [ + OCS.cellontology_class.convert_to_id(x) + if x not in [adata_ids_sfaira.unknown_celltype_identifier, + adata_ids_sfaira.not_a_cell_celltype_identifier] + else x + for x in data.obs[adata_ids_sfaira.cellontology_class].values + ] + else: + data.obs[adata_ids_sfaira.cellontology_id] = [ + obo.convert_to_id(x) + if x not in [adata_ids_sfaira.unknown_celltype_identifier, + adata_ids_sfaira.not_a_cell_celltype_identifier] + else x + for x in data.obs[adata_ids_sfaira.cellontology_class].values + ] + data.var[adata_ids_sfaira.gene_id_ensembl] = genes return data -def cached_store_writing(dir_data, dir_meta, assembly) -> os.PathLike: +def cached_store_writing(dir_data, dir_meta, assembly, organism: str = "mouse", organ: str = "lung", + store_format: str = "h5ad", return_ds: bool = False) -> Union[str, Tuple[str, Universe]]: """ Writes a store if it does not already exist. :return: Path to store. """ + adata_ids_sfaira = AdataIdsSfaira() store_path = os.path.join(dir_data, "store") ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) - ds.subset(key="organism", values=["mouse"]) - ds.subset(key="organ", values=["lung"]) + ds.subset(key=adata_ids_sfaira.organism, values=[organism]) + ds.subset(key=adata_ids_sfaira.organ, values=[organ]) # Only load files that are not already in cache. anticipated_files = np.unique([ - v.doi for k, v in ds.datasets.items() - if not os.path.exists(os.path.join(store_path, v.doi_cleaned_id + ".h5ad")) + v.doi[0] if isinstance(v.doi, list) else v.doi for k, v in ds.datasets.items() + if (not os.path.exists(os.path.join(store_path, v.doi_cleaned_id + "." + store_format)) and + store_format == "h5ad") or + (not os.path.exists(os.path.join(store_path, v.doi_cleaned_id)) and store_format == "dao") ]).tolist() - ds.subset(key="doi", values=anticipated_files) + ds.subset(key=adata_ids_sfaira.doi, values=anticipated_files) ds.load(allow_caching=True) - ds.streamline_features(remove_gene_version=True, match_to_reference={"mouse": assembly}, + ds.streamline_features(remove_gene_version=True, match_to_reference={organism: assembly}, subset_genes_to_type="protein_coding") ds.streamline_metadata(schema="sfaira", uns_to_obs=True, clean_obs=True, clean_var=True, clean_uns=True, clean_obs_names=True) - ds.write_distributed_store(dir_cache=store_path, store="h5ad", dense=False) - return store_path + if store_format == "zarr": + compression_kwargs = {"compressor": "default", "overwrite": True, "order": "C"} + else: + compression_kwargs = {} + ds.write_distributed_store(dir_cache=store_path, store_format=store_format, dense=store_format == "dao", + chunks=128, compression_kwargs=compression_kwargs) + if return_ds: + ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) + ds.subset(key=adata_ids_sfaira.organism, values=[organism]) + ds.subset(key=adata_ids_sfaira.organ, values=[organ]) + ds.load(allow_caching=True) + ds.streamline_features(remove_gene_version=True, match_to_reference={organism: assembly}, + subset_genes_to_type="protein_coding") + ds.streamline_metadata(schema="sfaira", uns_to_obs=True, clean_obs=True, clean_var=True, clean_uns=True, + clean_obs_names=True) + return store_path, ds + else: + return store_path diff --git a/sfaira/versions/genomes.py b/sfaira/versions/genomes.py index 04ec9a42c..3e43babd1 100644 --- a/sfaira/versions/genomes.py +++ b/sfaira/versions/genomes.py @@ -68,13 +68,13 @@ def download_gtf_ensembl(self): os.remove(temp_file) # Delete temporary file .gtf.gz. tab = tab.loc[tab[KEY_GTF_REGION_TYPE].values == VALUE_GTF_GENE, :] conversion_tab = pandas.DataFrame({ - "gene_id": [ + KEY_ID: [ x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_ID].split(" ")[-1].strip("\"") for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], - "gene_name": [ + KEY_SYMBOL: [ x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_SYMBOL].split(" ")[-1].strip("\"") for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], - "gene_biotype": [ + KEY_TYPE: [ x.split(";")[IDX_GTF_REGION_DETAIL_FIELD_TYPE].split(" ")[-1].strip("\"") for x in tab[KEY_GTF_REGION_DETAIL_FIELD].values], }).sort_values("gene_id") @@ -209,3 +209,25 @@ def id_to_names_dict(self): def strippednames_to_id_dict(self): return dict(zip([i.split(".")[0] for i in self.genome_tab[KEY_SYMBOL]], self.genome_tab[KEY_ID].values.tolist())) + + +class CustomFeatureContainer(GenomeContainer): + + def __init__( + self, + genome_tab: pandas.DataFrame, + ): + """ + + :param genome_tab: Table characterising feature space. Must be a data frame with 3 columns: + + - "gene_name": Name of features. + - "gene_id": ID of features, can be the same as values of "gene_name" + - "gene_biotype": Types of features, can be arbitrary like "embedding" + """ + self.assembly = "custom" + assert len(genome_tab.columns) == 3 + assert KEY_SYMBOL in genome_tab.columns + assert KEY_ID in genome_tab.columns + assert KEY_TYPE in genome_tab.columns + self.genome_tab = genome_tab diff --git a/sfaira/versions/metadata/__init__.py b/sfaira/versions/metadata/__init__.py index e16e54767..098f3dc2f 100644 --- a/sfaira/versions/metadata/__init__.py +++ b/sfaira/versions/metadata/__init__.py @@ -1,4 +1,4 @@ from sfaira.versions.metadata.base import Ontology, OntologyList, OntologyHierarchical, OntologyObo, \ - OntologyCl, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ + OntologyOboCustom, OntologyCl, OntologyUberon, OntologyHsapdv, OntologyMondo, OntologyMmusdv, \ OntologySinglecellLibraryConstruction, OntologyCellosaurus from sfaira.versions.metadata.universe import CelltypeUniverse diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 4a9aab37a..2d35786af 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -3,12 +3,9 @@ import numpy as np import obonet import os +import pickle import requests from typing import Dict, List, Tuple, Union -import warnings - -from sfaira.consts.adata_fields import AdataIdsSfaira -from sfaira.versions.metadata.extensions import ONTOLOGIY_EXTENSION FILE_PATH = __file__ @@ -29,6 +26,52 @@ """ +def get_base_ontology_cache() -> str: + folder = FILE_PATH.split(os.sep)[:-4] + folder.insert(1, os.sep) + return os.path.join(*folder, "cache", "ontologies") + + +def cached_load_obo(url, ontology_cache_dir, ontology_cache_fn): + if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet + obofile = url + else: + ontology_cache_dir = os.path.join(get_base_ontology_cache(), ontology_cache_dir) + obofile = os.path.join(ontology_cache_dir, ontology_cache_fn) + # Download if necessary: + if not os.path.isfile(obofile): + os.makedirs(name=ontology_cache_dir, exist_ok=True) + + def download_obo(): + print(f"Downloading: {ontology_cache_fn}") + if not os.path.exists(ontology_cache_dir): + os.makedirs(ontology_cache_dir) + r = requests.get(url, allow_redirects=True) + open(obofile, 'wb').write(r.content) + + download_obo() + return obofile + + +def cached_load_ebi(ontology_cache_dir, ontology_cache_fn) -> (networkx.MultiDiGraph, os.PathLike): + """ + Load pickled graph object if available. + + :param ontology_cache_dir: + :param ontology_cache_fn: + :return: + """ + ontology_cache_dir = os.path.join(get_base_ontology_cache(), ontology_cache_dir) + picklefile = os.path.join(ontology_cache_dir, ontology_cache_fn) + if os.path.isfile(picklefile): + with open(picklefile, 'rb') as f: + graph = pickle.load(f) + else: + os.makedirs(name=ontology_cache_dir, exist_ok=True) + graph = None + return graph, picklefile + + class Ontology: leaves: List[str] @@ -116,7 +159,7 @@ class OntologyHierarchical(Ontology, abc.ABC): def _check_graph(self): if not networkx.is_directed_acyclic_graph(self.graph): - warnings.warn(f"DAG was broken in {type(self)}") + print(f"Ontology {type(self)} is not a DAG, treat child-parent reasoning with care.") def __validate_node_ids(self, x: Union[str, List[str]]): if isinstance(x, str): @@ -290,8 +333,29 @@ def map_to_leaves( leaves = self.convert_to_id(self.leaves) if return_type == "ids": return [x for x in leaves if x in ancestors] - if return_type == "idx": + elif return_type == "idx": return np.sort([i for i, x in enumerate(leaves) if x in ancestors]) + else: + raise ValueError(f"return_type {return_type} not recognized") + + def prepare_maps_to_leaves( + self, + include_self: bool = True + ) -> Dict[str, np.ndarray]: + """ + Precomputes all maps of nodes to their leave nodes. + + :param include_self: whether to include node itself + :return: Dictionary of index vectors of leave node matches for each node (key). + """ + nodes = self.node_ids + maps = {} + import time + t0 = time.time() + for x in nodes: + maps[x] = self.map_to_leaves(node=x, return_type="idx", include_self=include_self) + print(f"time for precomputing ancestors: {time.time()-t0}") + return maps @abc.abstractmethod def synonym_node_properties(self) -> List[str]: @@ -311,6 +375,7 @@ def __init__( root_term: str, additional_terms: dict, additional_edges: List[Tuple[str, str]], + ontology_cache_fn: str, **kwargs ): def get_url_self(iri): @@ -377,15 +442,21 @@ def recursive_search(iri): edges_new.extend([(k_self, k_c) for k_c in direct_children]) return nodes_new, edges_new - self.graph = networkx.MultiDiGraph() - nodes, edges = recursive_search(iri=root_term) - nodes.update(additional_terms) - edges.extend(additional_edges) - for k, v in nodes.items(): - self.graph.add_node(node_for_adding=k, **v) - for x in edges: - parent, child = x - self.graph.add_edge(child, parent) + graph, picklefile = cached_load_ebi(ontology_cache_dir=ontology, ontology_cache_fn=ontology_cache_fn) + if graph is None: + self.graph = networkx.MultiDiGraph() + nodes, edges = recursive_search(iri=root_term) + nodes.update(additional_terms) + edges.extend(additional_edges) + for k, v in nodes.items(): + self.graph.add_node(node_for_adding=k, **v) + for x in edges: + parent, child = x + self.graph.add_edge(child, parent) + with open(picklefile, 'wb') as f: + pickle.dump(obj=self.graph, file=f) + else: + self.graph = graph def map_node_suggestion(self, x: str, include_synonyms: bool = True, n_suggest: int = 10): """ @@ -519,7 +590,12 @@ def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/uberon.obo") + obofile = cached_load_obo( + url="http://purl.obolibrary.org/obo/uberon.obo", + ontology_cache_dir="uberon", + ontology_cache_fn="uberon.obo", + ) + super().__init__(obo=obofile) # Clean up nodes: nodes_to_delete = [] @@ -687,26 +763,11 @@ def __init__( :param use_developmental_relationships: Whether to keep developmental relationships. :param kwargs: """ - if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet - obofile = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" - else: - # Identify cache: - folder = FILE_PATH.split(os.sep)[:-4] - folder.insert(1, os.sep) - ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cl") - fn = f"{branch}_cl.obo" - obofile = os.path.join(ontology_cache_dir, fn) - # Download if necessary: - if not os.path.isfile(obofile): - def download_cl(): - url = f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo" - print(f"Downloading: {fn}") - if not os.path.exists(ontology_cache_dir): - os.makedirs(ontology_cache_dir) - r = requests.get(url, allow_redirects=True) - open(obofile, 'wb').write(r.content) - download_cl() - + obofile = cached_load_obo( + url=f"https://raw.github.com/obophenotype/cell-ontology/{branch}/cl.obo", + ontology_cache_dir="cl", + ontology_cache_fn=f"{branch}_cl.obo", + ) super().__init__(obo=obofile) # Clean up nodes: @@ -755,6 +816,16 @@ def synonym_node_properties(self) -> List[str]: return ["synonym"] +class OntologyOboCustom(OntologyExtendedObo): + + def __init__( + self, + obo: str, + **kwargs + ): + super().__init__(obo=obo, **kwargs) + + # use OWL for OntologyHancestro @@ -764,7 +835,12 @@ def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/hsapdv.obo") + obofile = cached_load_obo( + url="http://purl.obolibrary.org/obo/hsapdv.obo", + ontology_cache_dir="hsapdv", + ontology_cache_fn="hsapdv.obo", + ) + super().__init__(obo=obofile) # Clean up nodes: nodes_to_delete = [] @@ -785,7 +861,12 @@ def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/mmusdv.obo") + obofile = cached_load_obo( + url="http://purl.obolibrary.org/obo/mmusdv.obo", + ontology_cache_dir="mmusdv", + ontology_cache_fn="mmusdv.obo", + ) + super().__init__(obo=obofile) # Clean up nodes: nodes_to_delete = [] @@ -806,7 +887,12 @@ def __init__( self, **kwargs ): - super().__init__(obo="http://purl.obolibrary.org/obo/mondo.obo") + obofile = cached_load_obo( + url="http://purl.obolibrary.org/obo/mondo.obo", + ontology_cache_dir="mondo", + ontology_cache_fn="mondo.obo", + ) + super().__init__(obo=obofile) # Clean up nodes: nodes_to_delete = [] @@ -836,27 +922,12 @@ def __init__( self, **kwargs ): - download_link = "https://ftp.expasy.org/databases/cellosaurus/cellosaurus.obo" - - if os.name == "nt": # if running on windows, do not download obo file, but rather pass url directly to obonet - super().__init__(obo=download_link) - else: - # Identify cache: - folder = FILE_PATH.split(os.sep)[:-4] - folder.insert(1, os.sep) - ontology_cache_dir = os.path.join(*folder, "cache", "ontologies", "cellosaurus") - fn = "cellosaurus.obo" - obofile = os.path.join(ontology_cache_dir, fn) - # Download if necessary: - if not os.path.isfile(obofile): - def download_cl(): - print(f"Downloading: {fn}") - if not os.path.exists(ontology_cache_dir): - os.makedirs(ontology_cache_dir) - r = requests.get(download_link, allow_redirects=True) - open(obofile, 'wb').write(r.content) - download_cl() - super().__init__(obo=obofile) + obofile = cached_load_obo( + url="https://ftp.expasy.org/databases/cellosaurus/cellosaurus.obo", + ontology_cache_dir="cellosaurus", + ontology_cache_fn="cellosaurus.obo", + ) + super().__init__(obo=obofile) # Clean up nodes: # edge_types = ["derived_from", "originate_from_same_individual_as"] @@ -885,5 +956,6 @@ def __init__(self): additional_edges=[ ("EFO:0010183", "microwell-seq"), ("EFO:0010183", "sci-plex"), - ] + ], + ontology_cache_fn="efo.pickle" ) diff --git a/sfaira/versions/metadata/universe.py b/sfaira/versions/metadata/universe.py index 41e30ce9a..f929c8214 100644 --- a/sfaira/versions/metadata/universe.py +++ b/sfaira/versions/metadata/universe.py @@ -41,7 +41,7 @@ def load_target_universe(self, fn): """ tab = pd.read_csv(fn, sep="\t", index_col=None) self.__validate_target_universe_table(tab=tab) - self.leaves = tab["name"].values + self.onto_cl.leaves = tab["name"].values def write_target_universe( self, diff --git a/sfaira/versions/topologies/class_interface.py b/sfaira/versions/topologies/class_interface.py index 7513b01b0..6c3a26489 100644 --- a/sfaira/versions/topologies/class_interface.py +++ b/sfaira/versions/topologies/class_interface.py @@ -1,3 +1,5 @@ +from typing import Union + from sfaira.versions.genomes import GenomeContainer @@ -11,9 +13,14 @@ def __init__( self, topology: dict, topology_id: str, + custom_genome_constainer: Union[GenomeContainer, None] = None, ): self.topology = topology - self.gc = GenomeContainer(assembly=self.topology["input"]["genome"]) + if custom_genome_constainer is None: + self.gc = GenomeContainer(assembly=self.topology["input"]["genome"]) + else: + assert isinstance(custom_genome_constainer, GenomeContainer) + self.gc = custom_genome_constainer self.gc.subset(**dict([tuple(self.topology["input"]["genes"])])) self.topology_id = topology_id diff --git a/sfaira/versions/topologies/human/embedding/ae.py b/sfaira/versions/topologies/human/embedding/ae.py index 59f958c36..6fd5586f3 100644 --- a/sfaira/versions/topologies/human/embedding/ae.py +++ b/sfaira/versions/topologies/human/embedding/ae.py @@ -1,8 +1,11 @@ AE_TOPOLOGIES = { "0.1": { "model_type": "ae", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -18,8 +21,11 @@ "0.2": { "model_type": "ae", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -35,8 +41,11 @@ "0.3": { "model_type": "ae", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/linear.py b/sfaira/versions/topologies/human/embedding/linear.py index 06947132e..5b46c8869 100644 --- a/sfaira/versions/topologies/human/embedding/linear.py +++ b/sfaira/versions/topologies/human/embedding/linear.py @@ -32,8 +32,11 @@ }, "0.3": { - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/nmf.py b/sfaira/versions/topologies/human/embedding/nmf.py index 3c49a8189..1ad46fd0a 100644 --- a/sfaira/versions/topologies/human/embedding/nmf.py +++ b/sfaira/versions/topologies/human/embedding/nmf.py @@ -1,8 +1,11 @@ NMF_TOPOLOGIES = { "0.1": { "model_type": "linear", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 64, "l1_coef": 0., @@ -14,8 +17,11 @@ "0.2": { "model_type": "linear", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": 128, "l1_coef": 0., diff --git a/sfaira/versions/topologies/human/embedding/vae.py b/sfaira/versions/topologies/human/embedding/vae.py index 7d6bc28e4..fbccdb79e 100644 --- a/sfaira/versions/topologies/human/embedding/vae.py +++ b/sfaira/versions/topologies/human/embedding/vae.py @@ -20,8 +20,11 @@ "0.2": { "model_type": "vae", - "genome": "Homo_sapiens.GRCh38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Homo_sapiens.GRCh38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., diff --git a/sfaira/versions/topologies/mouse/celltype/celltypemarker.py b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py index 082adbf45..3db24d205 100644 --- a/sfaira/versions/topologies/mouse/celltype/celltypemarker.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemarker.py @@ -2,7 +2,7 @@ "0.0.1": { "model_type": "marker", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": { diff --git a/sfaira/versions/topologies/mouse/celltype/celltypemlp.py b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py index ec302ce46..9ce7e5ec6 100644 --- a/sfaira/versions/topologies/mouse/celltype/celltypemlp.py +++ b/sfaira/versions/topologies/mouse/celltype/celltypemlp.py @@ -2,7 +2,7 @@ "0.0.1": { "model_type": "mlp", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": { @@ -26,7 +26,7 @@ "0.1.1": { "model_type": "mlp", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": { @@ -49,8 +49,14 @@ }, "0.1.2": { "model_type": "mlp", - "genome": "Mus_musculus.GRCm38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": { + "cl": "v2021-02-01", + "targets": None, + }, "hyper_parameters": { "units": [256, 128], "activation": "selu", @@ -68,7 +74,7 @@ "0.1.3": { "model_type": "mlp", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": { diff --git a/sfaira/versions/topologies/mouse/embedding/ae.py b/sfaira/versions/topologies/mouse/embedding/ae.py index 3f54884a7..83abd03af 100644 --- a/sfaira/versions/topologies/mouse/embedding/ae.py +++ b/sfaira/versions/topologies/mouse/embedding/ae.py @@ -1,8 +1,11 @@ AE_TOPOLOGIES = { "0.1": { "model_type": "ae", - "genome": "Mus_musculus.GRCm38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 64, 512), "l1_coef": 0., @@ -18,8 +21,11 @@ "0.2": { "model_type": "ae", - "genome": "Mus_musculus.GRCm38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (256, 128, 64, 128, 256), "l1_coef": 0., @@ -35,8 +41,11 @@ "0.3": { "model_type": "ae", - "genome": "Mus_musculus.GRCm38.102", - "genes": ["biotype", "protein_coding"], + "input": { + "genome": "Mus_musculus.GRCm38.102", + "genes": ["biotype", "protein_coding"], + }, + "output": {}, "hyper_parameters": { "latent_dim": (512, 256, 128, 256, 512), "l1_coef": 0., @@ -53,7 +62,7 @@ "0.4": { "model_type": "ae", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, diff --git a/sfaira/versions/topologies/mouse/embedding/linear.py b/sfaira/versions/topologies/mouse/embedding/linear.py index 228e8cd32..bc7c563d2 100644 --- a/sfaira/versions/topologies/mouse/embedding/linear.py +++ b/sfaira/versions/topologies/mouse/embedding/linear.py @@ -2,7 +2,7 @@ "0.1": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -18,7 +18,7 @@ "0.2": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -34,7 +34,7 @@ "0.3": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, diff --git a/sfaira/versions/topologies/mouse/embedding/nmf.py b/sfaira/versions/topologies/mouse/embedding/nmf.py index d8777708c..ef006e48a 100644 --- a/sfaira/versions/topologies/mouse/embedding/nmf.py +++ b/sfaira/versions/topologies/mouse/embedding/nmf.py @@ -2,7 +2,7 @@ "0.1": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -18,7 +18,7 @@ "0.2": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -34,7 +34,7 @@ "0.3": { "model_type": "linear", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, diff --git a/sfaira/versions/topologies/mouse/embedding/vae.py b/sfaira/versions/topologies/mouse/embedding/vae.py index 2650cb2b6..ed48b09c7 100644 --- a/sfaira/versions/topologies/mouse/embedding/vae.py +++ b/sfaira/versions/topologies/mouse/embedding/vae.py @@ -2,7 +2,7 @@ "0.1": { "model_type": "vae", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -21,7 +21,7 @@ "0.2": { "model_type": "vae", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -40,7 +40,7 @@ "0.3": { "model_type": "vae", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -59,7 +59,7 @@ "0.4": { "model_type": "vae", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, diff --git a/sfaira/versions/topologies/mouse/embedding/vaeiaf.py b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py index 0a998af56..0e0621e9e 100644 --- a/sfaira/versions/topologies/mouse/embedding/vaeiaf.py +++ b/sfaira/versions/topologies/mouse/embedding/vaeiaf.py @@ -2,7 +2,7 @@ "0.1": { "model_type": "vaeiaf", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -21,7 +21,7 @@ "0.2": { "model_type": "vaeiaf", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, diff --git a/sfaira/versions/topologies/mouse/embedding/vaevamp.py b/sfaira/versions/topologies/mouse/embedding/vaevamp.py index dbe3c9620..b2b2f522c 100644 --- a/sfaira/versions/topologies/mouse/embedding/vaevamp.py +++ b/sfaira/versions/topologies/mouse/embedding/vaevamp.py @@ -2,7 +2,7 @@ "0.2": { "model_type": "vaevamp", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, @@ -20,7 +20,7 @@ "0.3": { "model_type": "vaevamp", "input": { - "genome": "Homo_sapiens.GRCh38.102", + "genome": "Mus_musculus.GRCm38.102", "genes": ["biotype", "protein_coding"], }, "output": {}, From c3198ed82f8c12c0f0117907f142a0bf7f3aab78 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Fri, 28 May 2021 16:44:00 +0200 Subject: [PATCH 148/161] Dev merge (#278) * removed previously deleted files reintroduced by merge * removed old rst files --- sfaira/data/dataloaders/__init__.py | 1 - .../dataloaders/anatomical_groups/__init__.py | 2 - .../anatomical_groups/human/__init__.py | 43 ------------------- .../anatomical_groups/human/human_adipose.py | 19 -------- .../human/human_adrenalgland.py | 24 ----------- .../anatomical_groups/human/human_artery.py | 19 -------- .../anatomical_groups/human/human_bladder.py | 21 --------- .../anatomical_groups/human/human_blood.py | 27 ------------ .../anatomical_groups/human/human_bone.py | 21 --------- .../anatomical_groups/human/human_brain.py | 25 ----------- .../anatomical_groups/human/human_calvaria.py | 19 -------- .../anatomical_groups/human/human_cervix.py | 19 -------- .../human/human_chorionicvillus.py | 19 -------- .../anatomical_groups/human/human_colon.py | 26 ----------- .../anatomical_groups/human/human_duodenum.py | 19 -------- .../human/human_epityphlon.py | 19 -------- .../human/human_esophagus.py | 21 --------- .../anatomical_groups/human/human_eye.py | 22 ---------- .../human/human_fallopiantube.py | 19 -------- .../human/human_femalegonad.py | 20 --------- .../human/human_gallbladder.py | 19 -------- .../anatomical_groups/human/human_heart.py | 22 ---------- .../anatomical_groups/human/human_hesc.py | 19 -------- .../anatomical_groups/human/human_ileum.py | 21 --------- .../anatomical_groups/human/human_jejunum.py | 19 -------- .../anatomical_groups/human/human_kidney.py | 28 ------------ .../anatomical_groups/human/human_liver.py | 27 ------------ .../anatomical_groups/human/human_lung.py | 33 -------------- .../human/human_malegonad.py | 21 --------- .../anatomical_groups/human/human_muscle.py | 20 --------- .../anatomical_groups/human/human_omentum.py | 21 --------- .../anatomical_groups/human/human_pancreas.py | 25 ----------- .../anatomical_groups/human/human_placenta.py | 21 --------- .../anatomical_groups/human/human_pleura.py | 19 -------- .../anatomical_groups/human/human_prostate.py | 20 --------- .../anatomical_groups/human/human_rectum.py | 20 --------- .../anatomical_groups/human/human_rib.py | 20 --------- .../anatomical_groups/human/human_skin.py | 20 --------- .../human/human_spinalcord.py | 19 -------- .../anatomical_groups/human/human_spleen.py | 21 --------- .../anatomical_groups/human/human_stomach.py | 28 ------------ .../anatomical_groups/human/human_thymus.py | 21 --------- .../anatomical_groups/human/human_thyroid.py | 20 --------- .../anatomical_groups/human/human_trachea.py | 19 -------- .../anatomical_groups/human/human_ureter.py | 19 -------- .../anatomical_groups/human/human_uterus.py | 19 -------- .../anatomical_groups/mouse/__init__.py | 27 ------------ .../anatomical_groups/mouse/mouse_adipose.py | 23 ---------- .../anatomical_groups/mouse/mouse_bladder.py | 21 --------- .../anatomical_groups/mouse/mouse_blood.py | 23 ---------- .../anatomical_groups/mouse/mouse_bone.py | 21 --------- .../anatomical_groups/mouse/mouse_brain.py | 22 ---------- .../anatomical_groups/mouse/mouse_colon.py | 20 --------- .../mouse/mouse_diaphragm.py | 19 -------- .../mouse/mouse_femalegonad.py | 20 --------- .../anatomical_groups/mouse/mouse_heart.py | 21 --------- .../anatomical_groups/mouse/mouse_ileum.py | 21 --------- .../anatomical_groups/mouse/mouse_kidney.py | 22 ---------- .../anatomical_groups/mouse/mouse_liver.py | 22 ---------- .../anatomical_groups/mouse/mouse_lung.py | 23 ---------- .../mouse/mouse_malegonad.py | 20 --------- .../mouse/mouse_mammarygland.py | 24 ----------- .../anatomical_groups/mouse/mouse_muscle.py | 21 --------- .../anatomical_groups/mouse/mouse_pancreas.py | 29 ------------- .../anatomical_groups/mouse/mouse_placenta.py | 20 --------- .../anatomical_groups/mouse/mouse_prostate.py | 20 --------- .../anatomical_groups/mouse/mouse_rib.py | 21 --------- .../anatomical_groups/mouse/mouse_skin.py | 20 --------- .../anatomical_groups/mouse/mouse_spleen.py | 21 --------- .../anatomical_groups/mouse/mouse_stomach.py | 19 -------- .../anatomical_groups/mouse/mouse_thymus.py | 21 --------- .../anatomical_groups/mouse/mouse_tongue.py | 20 --------- .../anatomical_groups/mouse/mouse_trachea.py | 20 --------- .../anatomical_groups/mouse/mouse_uterus.py | 20 --------- 74 files changed, 1567 deletions(-) delete mode 100644 sfaira/data/dataloaders/anatomical_groups/__init__.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/__init__.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_artery.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_blood.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_bone.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_brain.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_colon.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_eye.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_heart.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_liver.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_lung.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_rib.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_skin.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py delete mode 100644 sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py diff --git a/sfaira/data/dataloaders/__init__.py b/sfaira/data/dataloaders/__init__.py index 92bc3fe4d..3ecbc2fc5 100644 --- a/sfaira/data/dataloaders/__init__.py +++ b/sfaira/data/dataloaders/__init__.py @@ -1,4 +1,3 @@ -from . import anatomical_groups from . import databases from . import loaders from .super_group import Universe diff --git a/sfaira/data/dataloaders/anatomical_groups/__init__.py b/sfaira/data/dataloaders/anatomical_groups/__init__.py deleted file mode 100644 index 739fab642..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from . import human -from . import mouse diff --git a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py b/sfaira/data/dataloaders/anatomical_groups/human/__init__.py deleted file mode 100644 index 3565250ac..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from .human_adipose import DatasetGroupAdipose -from .human_adrenalgland import DatasetGroupAdrenalgland -from .human_artery import DatasetGroupArtery -from .human_bladder import DatasetGroupBladder -from .human_blood import DatasetGroupBlood -from .human_bone import DatasetGroupBone -from .human_brain import DatasetGroupBrain -from .human_calvaria import DatasetGroupCalvaria -from .human_cervix import DatasetGroupCervix -from .human_chorionicvillus import DatasetGroupChorionicvillus -from .human_colon import DatasetGroupColon -from .human_duodenum import DatasetGroupDuodenum -from .human_epityphlon import DatasetGroupEpityphlon -from .human_esophagus import DatasetGroupEsophagus -from .human_eye import DatasetGroupEye -from .human_fallopiantube import DatasetGroupFallopiantube -from .human_femalegonad import DatasetGroupFemalegonad -from .human_gallbladder import DatasetGroupGallbladder -from .human_heart import DatasetGroupHeart -from .human_hesc import DatasetGroupHesc -from .human_ileum import DatasetGroupIleum -from .human_jejunum import DatasetGroupJejunum -from .human_kidney import DatasetGroupKidney -from .human_liver import DatasetGroupLiver -from .human_lung import DatasetGroupLung -from .human_malegonad import DatasetGroupMalegonad -from .human_muscle import DatasetGroupMuscle -from .human_omentum import DatasetGroupOmentum -from .human_pancreas import DatasetGroupPancreas -from .human_placenta import DatasetGroupPlacenta -from .human_pleura import DatasetGroupPleura -from .human_prostate import DatasetGroupProstate -from .human_rectum import DatasetGroupRectum -from .human_rib import DatasetGroupRib -from .human_skin import DatasetGroupSkin -from .human_spinalcord import DatasetGroupSpinalcord -from .human_spleen import DatasetGroupSpleen -from .human_stomach import DatasetGroupStomach -from .human_thymus import DatasetGroupThymus -from .human_thyroid import DatasetGroupThyroid -from .human_trachea import DatasetGroupTrachea -from .human_ureter import DatasetGroupUreter -from .human_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py deleted file mode 100644 index ad4534a7e..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adipose.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupAdipose(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_adipose_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py b/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py deleted file mode 100644 index 5b20e9c89..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_adrenalgland.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupAdrenalgland(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_adrenalgland_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_adrenalgland_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_adrenalgland_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_adrenalgland_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_adrenalgland_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_adrenalgland_2020_microwell_han_006_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py b/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py deleted file mode 100644 index 0a02eefc6..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_artery.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupArtery(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_artery_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py deleted file mode 100644 index 05330d0f7..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bladder.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBladder(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_bladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_bladder_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_bladder_2020_microwell_han_003_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py b/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py deleted file mode 100644 index 697c84f36..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_blood.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBlood(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_blood_2018_10x_ica_001", - "human_blood_2019_10x_10xGenomics_001", - "human_blood_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_006_10.1038/s41586-020-2157-4", - "human_blood_2020_microwell_han_007_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py b/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py deleted file mode 100644 index e44fe29ae..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_bone.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBone(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_bone_2018_10x_ica_001", - "human_bone_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_bone_2020_microwell_han_002_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py b/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py deleted file mode 100644 index bf26299fb..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_brain.py +++ /dev/null @@ -1,25 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBrain(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_brain_2017_DroNcSeq_habib_001", - "human_brain_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_brain_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_brain_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_brain_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_brain_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_brain_2020_microwell_han_006_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py b/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py deleted file mode 100644 index b5ae45cc3..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_calvaria.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupCalvaria(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_calvaria_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py b/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py deleted file mode 100644 index c993cf859..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_cervix.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupCervix(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_cervix_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py deleted file mode 100644 index 7cba1c705..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_chorionicvillus.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupChorionicvillus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_chorionicvillus_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py deleted file mode 100644 index ac3065342..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_colon.py +++ /dev/null @@ -1,26 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupColon(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_colon_2019_10x_kinchen_001", - "human_colon_2019_10x_smilie_001", - "human_colon_2019_10x_wang_001", - "human_colon_2020_10x_james_001", - "human_colon_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_colon_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_colon_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_colon_2020_microwell_han_004_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py deleted file mode 100644 index a78ec322d..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_duodenum.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupDuodenum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_duodenum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py b/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py deleted file mode 100644 index 08833bd57..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_epityphlon.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupEpityphlon(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_epityphlon_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py deleted file mode 100644 index e69858791..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_esophagus.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupEsophagus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_esophagus_2019_10x_madissoon_001", - "human_esophagus_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_esophagus_2020_microwell_han_002_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py b/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py deleted file mode 100644 index 2e6ab6323..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_eye.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupEye(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_eye_2019_10x_lukowski_001", - "human_eye_2019_10x_menon_001", - "human_eye_2019_10x_voigt_001", - "human_eye_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py b/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py deleted file mode 100644 index c3b7dbf0f..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_fallopiantube.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupFallopiantube(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_fallopiantube_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py deleted file mode 100644 index f3de9e48d..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_femalegonad.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupFemalegonad(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_femalegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_femalegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py b/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py deleted file mode 100644 index 96c2268d4..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_gallbladder.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupGallbladder(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_gallbladder_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py b/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py deleted file mode 100644 index 2f01a4471..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_heart.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupHeart(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_heart_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_heart_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_heart_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_heart_2020_microwell_han_004_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py b/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py deleted file mode 100644 index 1e4ee5ec6..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_hesc.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupHesc(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_hesc_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py deleted file mode 100644 index 94183416a..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ileum.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupIleum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_ileum_2019_10x_martin_001", - "human_ileum_2019_10x_wang_001", - "human_ileum_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py deleted file mode 100644 index c4f92d8c6..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_jejunum.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupJejunum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_jejunum_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py b/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py deleted file mode 100644 index d19b6c928..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_kidney.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupKidney(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_kidney_2019_10xSn_lake_001", - "human_kidney_2019_10x_stewart_001", - "human_kidney_2020_10x_liao_001", - "human_kidney_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_006_10.1038/s41586-020-2157-4", - "human_kidney_2020_microwell_han_007_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py b/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py deleted file mode 100644 index 7cccc5a31..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_liver.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupLiver(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_liver_2018_10x_macparland_001", - "human_liver_2019_10x_popescu_001", - "human_liver_2019_10x_ramachandran_001", - "human_liver_2019_mCELSeq2_aizarani_001", - "human_liver_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_liver_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_liver_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_liver_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_liver_2020_microwell_han_005_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py b/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py deleted file mode 100644 index 49e2ac34e..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_lung.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupLung(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_lung_2019_10x_braga_001", - "human_lung_2019_10x_braga_002", - "human_lung_2019_dropseq_braga_003", - "human_lung_2019_10x_madissoon_001", - "human_lung_2020_10x_habermann_001", - "human_lung_2020_10x_lukassen_001", - "human_lung_2020_10x_lukassen_002", - "human_lung_2020_10x_miller_001", - "human_lung_2020_10x_travaglini_001", - "human_lung_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_lung_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_lung_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_lung_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_lung_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_lung_2020_smartseq2_travaglini_002", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py deleted file mode 100644 index f378d4e9c..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_malegonad.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupMalegonad(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_malegonad_2018_10x_guo_001", - "human_malegonad_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_malegonad_2020_microwell_han_002_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py b/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py deleted file mode 100644 index 4b0e6f012..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_muscle.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupMuscle(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_muscle_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py deleted file mode 100644 index 7dbe75dee..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_omentum.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupOmentum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_omentum_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_omentum_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_omentum_2020_microwell_han_003_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py deleted file mode 100644 index d8361db6a..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pancreas.py +++ /dev/null @@ -1,25 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupPancreas(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_pancreas_2016_indrop_baron_001", - "human_pancreas_2016_smartseq2_segerstolpe_001", - "human_pancreas_2017_smartseq2_enge_001", - "human_pancreas_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_pancreas_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_pancreas_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_pancreas_2020_microwell_han_004_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py b/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py deleted file mode 100644 index 2265e882d..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_placenta.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupPlacenta(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_placenta_2018_smartseq2_ventotormo_001", - "human_placenta_2018_10x_ventotormo_002", - "human_placenta_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py b/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py deleted file mode 100644 index 848acdc26..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_pleura.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupPleura(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_pleura_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py b/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py deleted file mode 100644 index 1ee625b28..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_prostate.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupProstate(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_prostate_2018_10x_henry_001", - "human_prostate_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py deleted file mode 100644 index 10eba378e..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rectum.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupRectum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_rectum_2019_10x_wang_001", - "human_rectum_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py b/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py deleted file mode 100644 index 364364bdd..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_rib.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupRib(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_rib_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_rib_2020_microwell_han_002_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py b/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py deleted file mode 100644 index 149439ac3..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_skin.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupSkin(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_skin_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_skin_2020_microwell_han_002_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py deleted file mode 100644 index 9e0f5a3fc..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spinalcord.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupSpinalcord(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_spinalcord_2020_microwell_han_001_10.1038/s41586-020-2157-4" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py b/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py deleted file mode 100644 index cc595af74..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_spleen.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupSpleen(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_spleen_2019_10x_madissoon_001", - "human_spleen_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_spleen_2020_microwell_han_002_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py b/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py deleted file mode 100644 index 930285d1d..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_stomach.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupStomach(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_stomach_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_002_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_003_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_004_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_005_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_006_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_007_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_008_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_009_10.1038/s41586-020-2157-4", - "human_stomach_2020_microwell_han_010_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py deleted file mode 100644 index a2a447539..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thymus.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupThymus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_thymus_2020_10x_park_001", - "human_thymus_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_thymus_2020_microwell_han_002_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py b/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py deleted file mode 100644 index d6771c563..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_thyroid.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupThyroid(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_thyroid_2020_microwell_han_001_10.1038/s41586-020-2157-4", - "human_thyroid_2020_microwell_han_002_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py b/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py deleted file mode 100644 index 0de665fea..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_trachea.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupTrachea(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_trachea_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py b/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py deleted file mode 100644 index 8dcb14570..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_ureter.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupUreter(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_ureter_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py b/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py deleted file mode 100644 index e15f5eca0..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/human/human_uterus.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupUterus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "human_uterus_2020_microwell_han_001_10.1038/s41586-020-2157-4", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py b/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py deleted file mode 100644 index 8d8c1569d..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from .mouse_bladder import DatasetGroupBladder -from .mouse_brain import DatasetGroupBrain -from .mouse_diaphragm import DatasetGroupDiaphragm -from .mouse_adipose import DatasetGroupAdipose -from .mouse_heart import DatasetGroupHeart -from .mouse_kidney import DatasetGroupKidney -from .mouse_colon import DatasetGroupColon -from .mouse_muscle import DatasetGroupMuscle -from .mouse_liver import DatasetGroupLiver -from .mouse_lung import DatasetGroupLung -from .mouse_mammarygland import DatasetGroupMammaryGland -from .mouse_bone import DatasetGroupBone -from .mouse_femalegonad import DatasetGroupFemalegonad -from .mouse_pancreas import DatasetGroupPancreas -from .mouse_placenta import DatasetGroupPlacenta -from .mouse_blood import DatasetGroupBlood -from .mouse_prostate import DatasetGroupProstate -from .mouse_rib import DatasetGroupRib -from .mouse_ileum import DatasetGroupIleum -from .mouse_skin import DatasetGroupSkin -from .mouse_spleen import DatasetGroupSpleen -from .mouse_stomach import DatasetGroupStomach -from .mouse_malegonad import DatasetGroupMalegonad -from .mouse_thymus import DatasetGroupThymus -from .mouse_tongue import DatasetGroupTongue -from .mouse_trachea import DatasetGroupTrachea -from .mouse_uterus import DatasetGroupUterus diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py deleted file mode 100644 index 8ebc35dbf..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_adipose.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupAdipose(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_adipose_2019_10x_pisco_001_10.1101/661728", - "mouse_adipose_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_adipose_2019_smartseq2_pisco_002_10.1101/661728", - "mouse_adipose_2019_smartseq2_pisco_003_10.1101/661728", - "mouse_adipose_2019_smartseq2_pisco_004_10.1101/661728", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py deleted file mode 100644 index ba15353da..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bladder.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBladder(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_bladder_2019_10x_pisco_001_10.1101/661728", - "mouse_bladder_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_bladder_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py deleted file mode 100644 index 67cd97d08..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_blood.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBlood (DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_blood_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_blood_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - "mouse_blood_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", - "mouse_blood_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", - "mouse_blood_2018_microwell-seq_han_005_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py deleted file mode 100644 index a4cea83e7..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_bone.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBone(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_bone_2019_10x_pisco_001_10.1101/661728", - "mouse_bone_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_bone_2018_microwell_001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py deleted file mode 100644 index 881eb02bb..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_brain.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupBrain(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_brain_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_brain_2019_smartseq2_pisco_002_10.1101/661728", - "mouse_brain_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_brain_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py deleted file mode 100644 index 2b3888dc7..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_colon.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupColon(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_colon_2019_10x_pisco_001_10.1101/661728", - "mouse_colon_2019_smartseq2_pisco_001_10.1101/661728", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py deleted file mode 100644 index 0df183699..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_diaphragm.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupDiaphragm(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_diaphragm_2019_smartseq2_pisco_001_10.1101/661728", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py deleted file mode 100644 index 2d55d7bb9..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_femalegonad.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupFemalegonad(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_femalegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_femalegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py deleted file mode 100644 index cef991f69..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_heart.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupHeart(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_heart_2019_10x_pisco_001_10.1101/661728", - "mouse_heart_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_heart_2019_smartseq2_pisco_002_10.1101/661728" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py deleted file mode 100644 index 752f69c4a..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_ileum.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupIleum(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_ileum_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_ileum_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - "mouse_ileum_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py deleted file mode 100644 index 706b5d948..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_kidney.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupKidney(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_kidney_2019_10x_pisco_001_10.1101/661728", - "mouse_kidney_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_kidney_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_kidney_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py deleted file mode 100644 index 62418f9b4..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_liver.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupLiver(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_liver_2019_10x_pisco_001_10.1101/661728", - "mouse_liver_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_liver_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_liver_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py deleted file mode 100644 index 89e4dabd6..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_lung.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupLung(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_lung_2019_10x_pisco_001_10.1101/661728", - "mouse_lung_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_lung_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_lung_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - "mouse_lung_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py deleted file mode 100644 index f391cdc54..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_malegonad.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupMalegonad(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_malegonad_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_malegonad_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py deleted file mode 100644 index 540f9d30f..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_mammarygland.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupMammaryGland(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_mammarygland_2019_10x_pisco_001_10.1101/661728", - "mouse_mammarygland_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_mammarygland_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_mammarygland_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - "mouse_mammarygland_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", - "mouse_mammarygland_2018_microwell-seq_han_004_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py deleted file mode 100644 index f39a576cf..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_muscle.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupMuscle(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_muscle_2019_10x_pisco_001_10.1101/661728", - "mouse_muscle_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_muscle_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py deleted file mode 100644 index e06876965..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_pancreas.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupPancreas(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_pancreas_2019_10x_pisco_001_10.1101/661728", - "mouse_pancreas_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_pancreas_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_pancreas_2019_10x_thompson_001", - "mouse_pancreas_2019_10x_thompson_002", - "mouse_pancreas_2019_10x_thompson_003", - "mouse_pancreas_2019_10x_thompson_004", - "mouse_pancreas_2019_10x_thompson_005", - "mouse_pancreas_2019_10x_thompson_006", - "mouse_pancreas_2019_10x_thompson_007", - "mouse_pancreas_2019_10x_thompson_008", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py deleted file mode 100644 index bf084bd94..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_placenta.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupPlacenta(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_placenta_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_placenta_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py deleted file mode 100644 index f8fb99636..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_prostate.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupProstate(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_prostate_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_prostate_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py deleted file mode 100644 index df3f32238..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_rib.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupRib(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_rib_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_rib_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001", - "mouse_rib_2018_microwell-seq_han_003_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py deleted file mode 100644 index 8df3d82b5..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_skin.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupSkin(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_skin_2019_10x_pisco_001_10.1101/661728", - "mouse_skin_2019_smartseq2_pisco_001_10.1101/661728", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py deleted file mode 100644 index aa0b26a1b..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_spleen.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupSpleen(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_spleen_2019_10x_pisco_001_10.1101/661728", - "mouse_spleen_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_spleen_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py deleted file mode 100644 index 7721f9b25..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_stomach.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupStomach(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_stomach_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py deleted file mode 100644 index e5894ecf4..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_thymus.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupThymus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_thymus_2019_10x_pisco_001_10.1101/661728", - "mouse_thymus_2019_smartseq2_pisco_001_10.1101/661728", - "mouse_thymus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py deleted file mode 100644 index 564d0a1cc..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_tongue.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupTongue(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_tongue_2019_10x_pisco_001_10.1101/661728", - "mouse_tongue_2019_smartseq2_pisco_001_10.1101/661728" - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py deleted file mode 100644 index c27fdcf3f..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_trachea.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupTrachea(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_trachea_2019_10x_pisco_001_10.1101/661728", - "mouse_trachea_2019_smartseq2_pisco_001_10.1101/661728", - ]) - super().__init__(datasets=dsg.flatten().datasets) diff --git a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py b/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py deleted file mode 100644 index 277a6dcb7..000000000 --- a/sfaira/data/dataloaders/anatomical_groups/mouse/mouse_uterus.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Union - -from sfaira.data import DatasetGroup -from sfaira.data.dataloaders.super_group import Universe - - -class DatasetGroupUterus(DatasetGroup): - - def __init__( - self, - data_path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None - ): - dsg = Universe(data_path=data_path, meta_path=meta_path, cache_path=cache_path) - dsg.subset(key="id", values=[ - "mouse_uterus_2018_microwell-seq_han_001_10.1016/j.cell.2018.02.001", - "mouse_uterus_2018_microwell-seq_han_002_10.1016/j.cell.2018.02.001" - ]) - super().__init__(datasets=dsg.flatten().datasets) From e615869e73497eb1a840d26fae79421718b88d5c Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Fri, 28 May 2021 18:44:27 +0200 Subject: [PATCH 149/161] split template test into annotate and test (#272) * split template test into annotate and test Signed-off-by: zethson * fix annotation Signed-off-by: zethson * fix writing path for annotations Signed-off-by: zethson * reverted to old, correct annotation Signed-off-by: zethson * fix correct assay_sc Signed-off-by: zethson --- sfaira/cli.py | 16 ++ sfaira/commands/annotate_dataloader.py | 179 ++++++++++++++++++ ..._pancreas_2019_10xsequencing_thompson_x.py | 2 +- .../data_contribution/test_data_template.py | 161 +++------------- 4 files changed, 221 insertions(+), 137 deletions(-) create mode 100644 sfaira/commands/annotate_dataloader.py diff --git a/sfaira/cli.py b/sfaira/cli.py index 547768616..b68ee6fc8 100644 --- a/sfaira/cli.py +++ b/sfaira/cli.py @@ -7,6 +7,8 @@ import rich.logging from rich import traceback from rich import print + +from sfaira.commands.annotate_dataloader import DataloaderAnnotater from sfaira.commands.test_dataloader import DataloaderTester from sfaira.commands.clean_dataloader import DataloaderCleaner @@ -102,6 +104,20 @@ def validate_dataloader(path) -> None: dataloader_validator.validate() +@sfaira_cli.command() +@click.argument('path', type=click.Path(exists=True)) +@click.option('--doi', type=str, default=None) +@click.option('--test-data', type=click.Path(exists=True)) +def annotate_dataloader(path, doi, test_data) -> None: + """ + Annotates a dataloader. + + PATH is the absolute path of the root of your sfaira clone. + """ + dataloader_annotater = DataloaderAnnotater() + dataloader_annotater.annotate(path, doi, test_data) + + @sfaira_cli.command() @click.argument('path', type=click.Path(exists=True)) @click.option('--test-data', type=click.Path(exists=True)) diff --git a/sfaira/commands/annotate_dataloader.py b/sfaira/commands/annotate_dataloader.py new file mode 100644 index 000000000..706339707 --- /dev/null +++ b/sfaira/commands/annotate_dataloader.py @@ -0,0 +1,179 @@ +import os +import pydoc +import shutil + +from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup, DatasetBase +from sfaira.data.utils import read_yaml + +try: + import sfaira_extension as sfairae +except ImportError: + sfairae = None + + +class DataloaderAnnotater: + + def __init__(self): + self.WD = os.path.dirname(__file__) + self.file_path = None + self.file_path_sfairae = None + self.meta_path = None + self.cache_path = None + self.dir_loader = None + self.dir_loader_sfairae = None + self.package_source = None + + def annotate(self, path: str, doi: str, test_data: str): + """ + Annotates a provided dataloader. + + Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. + Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be + differentially successful. The proposed IDs groups are separate by ":|||:" strings to give you a visual anchor + when going through these lists. You need to delete all of these division strings and all labels in the second + columns other than the best fit label. Do not change the first column, + (Note that columns are separated by ",") + You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl + """ + doi_sfaira_repr = f'd{doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + self._setup_loader(doi_sfaira_repr) + self._annotate(test_data, path, doi) + + def _setup_loader(self, doi_sfaira_repr: str): + """ + Define the file names, loader paths and base paths of loader collections for sfaira and sfaira_extension + """ + dir_loader_sfaira = "sfaira.data.dataloaders.loaders." + file_path_sfaira = "/" + "/".join(pydoc.locate(dir_loader_sfaira + "FILE_PATH").split("/")[:-1]) + if sfairae is not None: + dir_loader_sfairae = "sfaira_extension.data.dataloaders.loaders." + file_path_sfairae = "/" + "/".join(pydoc.locate(dir_loader_sfairae + "FILE_PATH").split("/")[:-1]) + else: + file_path_sfairae = None + # Check if loader name is a directory either in sfaira or sfaira_extension loader collections: + if doi_sfaira_repr in os.listdir(file_path_sfaira): + dir_loader = dir_loader_sfaira + "." + doi_sfaira_repr + package_source = "sfaira" + elif doi_sfaira_repr in os.listdir(file_path_sfairae): + dir_loader = dir_loader_sfairae + "." + doi_sfaira_repr + package_source = "sfairae" + else: + raise ValueError("data loader not found in sfaira and also not in sfaira_extension") + file_path = pydoc.locate(dir_loader + ".FILE_PATH") + meta_path = None + cache_path = None + # Clear dataset cache + shutil.rmtree(cache_path, ignore_errors=True) + + self.file_path = file_path + self.file_path_sfairae = file_path_sfairae + self.meta_path = meta_path + self.cache_path = cache_path + self.dir_loader = dir_loader + self.dir_loader_sfairae = dir_loader_sfairae + self.package_source = package_source + + def _get_ds(self, test_data: str): + ds = DatasetGroupDirectoryOriented( + file_base=self.file_path, + data_path=test_data, + meta_path=None, + cache_path=None + ) + + return ds + + def buffered_load(self, test_data: str): + ds = self._get_ds(test_data=test_data) + # TODO try-except with good error description saying that the data loader is broken here: + ds.load( + remove_gene_version=False, + match_to_reference=None, + load_raw=True, # Force raw load so non confound future tests by data loader bugs in previous versions. + allow_caching=True, + ) + + assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {test_data}" + return ds + + def _annotate(self, test_data: str, path: str, doi: str): + ds = self.buffered_load(test_data=test_data) + # Create cell type conversion table: + cwd = os.path.dirname(self.file_path) + dataset_module = str(cwd.split("/")[-1]) + # Group data sets by file module: + # Note that if we were not grouping the cell type map .tsv files by file module, we could directly call + # write_ontology_class_map on the ds. + for f in os.listdir(cwd): + if os.path.isfile(os.path.join(cwd, f)): # only files + # Narrow down to data set files: + if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: + file_module = ".".join(f.split(".")[:-1]) + + # I) Instantiate Data set group to get all IDs of data sets associated with this .py file. + # Note that all data sets in this directory are already loaded in ds, so we just need the IDs. + DatasetFound = pydoc.locate(self.dir_loader + "." + file_module + ".Dataset") + # Load objects from name space: + # - load(): Loading function that return anndata instance. + # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles + load_func = pydoc.locate(self.dir_loader + "." + file_module + ".load") + load_func_annotation = pydoc.locate(self.dir_loader + "." + file_module + ".LOAD_ANNOTATION") + # Also check sfaira_extension for additional load_func_annotation: + if self.package_source != "sfairae" and sfairae is not None: + load_func_annotation_sfairae = pydoc.locate(self.dir_loader_sfairae + "." + dataset_module + + "." + file_module + ".LOAD_ANNOTATION") + # LOAD_ANNOTATION is a dictionary so we can use update to extend it. + if load_func_annotation_sfairae is not None and load_func_annotation is not None: + load_func_annotation.update(load_func_annotation_sfairae) + elif load_func_annotation_sfairae is not None and load_func_annotation is None: + load_func_annotation = load_func_annotation_sfairae + sample_fns = pydoc.locate(self.dir_loader + "." + file_module + ".SAMPLE_FNS") + fn_yaml = os.path.join(cwd, file_module + ".yaml") + fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None + # Check for sample_fns in yaml: + if fn_yaml is not None: + assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" + yaml_vals = read_yaml(fn=fn_yaml) + if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: + sample_fns = yaml_vals["meta"]["sample_fns"] + if sample_fns is None: + sample_fns = [None] + # Here we distinguish between class that are already defined and those that are not. + # The latter case arises if meta data are defined in YAMLs and _load is given as a function. + if DatasetFound is None: + datasets_f = [ + DatasetBase( + data_path=test_data, + meta_path=self.meta_path, + cache_path=self.cache_path, + load_func=load_func, + dict_load_func_annotation=load_func_annotation, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) for x in sample_fns + ] + else: + datasets_f = [ + DatasetFound( + data_path=test_data, + meta_path=self.meta_path, + cache_path=self.cache_path, + load_func=load_func, + load_func_annotation=load_func_annotation, + sample_fn=x, + sample_fns=sample_fns if sample_fns != [None] else None, + yaml_path=fn_yaml, + ) for x in sample_fns + ] + # II) Build a data set group from the already loaded data sets and use the group ontology writing + # function. + dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) + # III) Write this directly into the sfaira clone so that it can be committed via git. + # TODO any errors not to be caught here? + doi_sfaira_repr = f'd{doi.translate({ord(c): "_" for c in r"!@#$%^&*()[]/{};:,.<>?|`~-=_+"})}' + dsg_f.write_ontology_class_map( + fn=os.path.join(f"{path}/sfaira/data/dataloaders/loaders/{doi_sfaira_repr}/{file_module}.tsv"), + protected_writing=True, + n_suggest=4, + ) diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index cba511374..d06889f2e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -33,7 +33,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" - self.assay_sc = "Drop-seq" + self.assay_sc = "10x 3' v2" self.state_exact = "diabetic" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index bacf8ca50..e81c007ae 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -2,42 +2,15 @@ import pydoc import shutil -from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup, DatasetBase -from sfaira.data.utils import read_yaml +from sfaira.data import DatasetGroupDirectoryOriented + try: import sfaira_extension as sfairae except ImportError: sfairae = None -def test_load(doi_sfaira_repr: str, test_data: str): - """ - Unit test to assist with data set contribution. - - The workflow for contributing a data set with this data loader is as follows: - - 1. Write a data loader and add it into the loader directory of your local sfaira installation. - 2. Address ToDos below. - 3. Run this unit test until you are not getting errors from your data loader anymore. - - In the process of this unit test, this data loader will have written putative cell type maps from your - annotation to the cell ontology. - - 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. - Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be - differentially successful. The proposed IDs groups are separate by ":|||:" strings to give you a visual anchor - when going through these lists. You need to delete all of these division strings and all labels in the second - columns other than the best fit label. Do not change the first column, - (Note that columns are separated by ",") - You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl - 5. Run this unit test for a last time to check the cell type maps. - """ - remove_gene_version = True - match_to_reference = None - - flattened_doi = doi_sfaira_repr - # Define file names and loader paths in sfaira or sfaira_extension: - # Define base paths of loader collections in sfaira and sfaira_extension: +def _get_ds(doi_sfaira_repr: str, test_data: str): dir_loader_sfaira = "sfaira.data.dataloaders.loaders." file_path_sfaira = "/" + "/".join(pydoc.locate(dir_loader_sfaira + "FILE_PATH").split("/")[:-1]) if sfairae is not None: @@ -46,16 +19,13 @@ def test_load(doi_sfaira_repr: str, test_data: str): else: file_path_sfairae = None # Check if loader name is a directory either in sfaira or sfaira_extension loader collections: - if flattened_doi in os.listdir(file_path_sfaira): - dir_loader = dir_loader_sfaira + "." + flattened_doi - package_source = "sfaira" - elif flattened_doi in os.listdir(file_path_sfairae): - dir_loader = dir_loader_sfairae + "." + flattened_doi - package_source = "sfairae" + if doi_sfaira_repr in os.listdir(file_path_sfaira): + dir_loader = dir_loader_sfaira + "." + doi_sfaira_repr + elif doi_sfaira_repr in os.listdir(file_path_sfairae): + dir_loader = dir_loader_sfairae + "." + doi_sfaira_repr else: raise ValueError("data loader not found in sfaira and also not in sfaira_extension") file_path = pydoc.locate(dir_loader + ".FILE_PATH") - meta_path = None cache_path = None # Clear dataset cache shutil.rmtree(cache_path, ignore_errors=True) @@ -66,110 +36,29 @@ def test_load(doi_sfaira_repr: str, test_data: str): meta_path=None, cache_path=None ) - # Test raw loading and caching: - # You can set load_raw to True while debugging when caching works already to speed the test up, - # but be sure to set load_raw to True for final tests. - ds.load( - remove_gene_version=False, - match_to_reference=False, - load_raw=True, # tests raw loading - allow_caching=True, # tests caching - ) - assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {test_data}" - # Create cell type conversion table: - cwd = os.path.dirname(file_path) - dataset_module = str(cwd.split("/")[-1]) - # Group data sets by file module: - # Note that if we were not grouping the cell type map .tsv files by file module, we could directly call - # write_ontology_class_map on the ds. - for f in os.listdir(cwd): - if os.path.isfile(os.path.join(cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(f.split(".")[:-1]) + return ds, cache_path - # I) Instantiate Data set group to get all IDs of data sets associated with this .py file. - # Note that all data sets in this directory are already loaded in ds, so we just need the IDs. - DatasetFound = pydoc.locate(dir_loader + "." + file_module + ".Dataset") - # Load objects from name space: - # - load(): Loading function that return anndata instance. - # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles - load_func = pydoc.locate(dir_loader + "." + file_module + ".load") - load_func_annotation = pydoc.locate(dir_loader + "." + file_module + ".LOAD_ANNOTATION") - # Also check sfaira_extension for additional load_func_annotation: - if package_source != "sfairae" and sfairae is not None: - load_func_annotation_sfairae = pydoc.locate(dir_loader_sfairae + "." + dataset_module + - "." + file_module + ".LOAD_ANNOTATION") - # LOAD_ANNOTATION is a dictionary so we can use update to extend it. - if load_func_annotation_sfairae is not None and load_func_annotation is not None: - load_func_annotation.update(load_func_annotation_sfairae) - elif load_func_annotation_sfairae is not None and load_func_annotation is None: - load_func_annotation = load_func_annotation_sfairae - sample_fns = pydoc.locate(dir_loader + "." + file_module + ".SAMPLE_FNS") - fn_yaml = os.path.join(cwd, file_module + ".yaml") - fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None - # Check for sample_fns in yaml: - if fn_yaml is not None: - assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" - yaml_vals = read_yaml(fn=fn_yaml) - if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: - sample_fns = yaml_vals["meta"]["sample_fns"] - if sample_fns is None: - sample_fns = [None] - # Here we distinguish between class that are already defined and those that are not. - # The latter case arises if meta data are defined in YAMLs and _load is given as a function. - if DatasetFound is None: - datasets_f = [ - DatasetBase( - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - dict_load_func_annotation=load_func_annotation, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) for x in sample_fns - ] - else: - datasets_f = [ - DatasetFound( - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - load_func_annotation=load_func_annotation, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) for x in sample_fns - ] - # II) Build a data set group from the already loaded data sets and use the group ontology writing - # function. - dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) - # III) Write this directly into sfaira installation so that it can be committed via git. - dsg_f.write_ontology_class_map( - fn=os.path.join(cwd, file_module + ".tsv"), - protected_writing=True, - n_suggest=4, - ) - # Test loading from cache: - ds = DatasetGroupDirectoryOriented( - file_base=file_path, - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path +def test_load(doi_sfaira_repr: str, test_data: str): + ds, cache_path = _get_ds(doi_sfaira_repr=doi_sfaira_repr, test_data=test_data) + + ds.clean_ontology_class_map() + + # TODO try-except with good error description saying that the data loader is broken here: + ds.load( + remove_gene_version=True, + # match_to_reference=TODO get organism here, + load_raw=True, + allow_caching=True ) + # Try loading from cache: + ds = _get_ds(doi_sfaira_repr=doi_sfaira_repr, test_data=test_data) + # TODO try-except with good error description saying that the data loader is broken here: ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, + remove_gene_version=True, + # match_to_reference=TODO get organism here, load_raw=False, - allow_caching=False + allow_caching=True ) - ds.clean_ontology_class_map() - # Test concatenation: - _ = ds.adata - # Clear dataset cache shutil.rmtree(cache_path, ignore_errors=True) From a5051d47305049fa5afa14dcea8d8438e87dd2c1 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 31 May 2021 12:19:00 +0200 Subject: [PATCH 150/161] updated portal link (#281) --- README.rst | 4 ++-- docs/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index a116498a7..d22de71ed 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,7 @@ sfaira - data and model repository for single-cell data :align: center sfaira_ is a model and a data repository in a single python package (preprint_). -We provide an interactive overview of the current state of the zoos on sfaira-site_. +We provide an interactive overview of the current state of the zoos on sfaira-portal_. Its data zoo gives users access to streamlined data loaders that allow reproducible use of published and private data sets for model training and exploration. Its model zoo gives user streamlined access to pre-trained models and to common model architectures to ease usage of neural networks in common single-cell analysis workflows: @@ -43,4 +43,4 @@ sfaira integrates into scanpy_ workflows. .. _preprint: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 .. _DCA: https://github.com/theislab/dca .. _scArches: https://github.com/theislab/scarches -.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html +.. _sfaira-portal: https://theislab.github.io/sfaira-portal/ diff --git a/docs/index.rst b/docs/index.rst index 157321b12..ac1929e77 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,10 +15,10 @@ sfaira - data and model repository for single-cell data :align: center sfaira_ is a model and a data repository in a single python package. -We provide an interactive overview of the current state of the zoos on sfaira-site_. +We provide an interactive overview of the current state of the zoos on sfaira-portal_. .. _sfaira: https://www.biorxiv.org/content/10.1101/2020.12.16.419036v1 -.. _sfaira-site: https://theislab.github.io/sfaira-site/index.html +.. _sfaira-portal: https://theislab.github.io/sfaira-portal/ .. include:: environment_brief.rst From 5b496682a447386d175c35db4f38532d3a6e1031 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 31 May 2021 13:03:33 +0200 Subject: [PATCH 151/161] merge release into dev --- sfaira/cli.py | 6 - ..._pancreas_2019_10xsequencing_thompson_x.py | 4 - .../data_contribution/test_data_template.py | 160 +----------------- 3 files changed, 1 insertion(+), 169 deletions(-) diff --git a/sfaira/cli.py b/sfaira/cli.py index 39379b65e..b68ee6fc8 100644 --- a/sfaira/cli.py +++ b/sfaira/cli.py @@ -7,11 +7,8 @@ import rich.logging from rich import traceback from rich import print -<<<<<<< HEAD -======= from sfaira.commands.annotate_dataloader import DataloaderAnnotater ->>>>>>> dev from sfaira.commands.test_dataloader import DataloaderTester from sfaira.commands.clean_dataloader import DataloaderCleaner @@ -109,8 +106,6 @@ def validate_dataloader(path) -> None: @sfaira_cli.command() @click.argument('path', type=click.Path(exists=True)) -<<<<<<< HEAD -======= @click.option('--doi', type=str, default=None) @click.option('--test-data', type=click.Path(exists=True)) def annotate_dataloader(path, doi, test_data) -> None: @@ -125,7 +120,6 @@ def annotate_dataloader(path, doi, test_data) -> None: @sfaira_cli.command() @click.argument('path', type=click.Path(exists=True)) ->>>>>>> dev @click.option('--test-data', type=click.Path(exists=True)) @click.option('--doi', type=str, default=None) def test_dataloader(path, test_data, doi) -> None: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py index 9c5800adf..d06889f2e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cmet_2019_01_021/mouse_pancreas_2019_10xsequencing_thompson_x.py @@ -33,11 +33,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "pancreas" self.organism = "mouse" -<<<<<<< HEAD - self.assay_sc = "Drop-seq" -======= self.assay_sc = "10x 3' v2" ->>>>>>> dev self.state_exact = "diabetic" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index 7fd95b2c1..aa3301a35 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -2,51 +2,15 @@ import pydoc import shutil -<<<<<<< HEAD -from sfaira.data import DatasetGroupDirectoryOriented, DatasetGroup, DatasetBase -from sfaira.data.utils import read_yaml -======= from sfaira.data import DatasetGroupDirectoryOriented ->>>>>>> dev try: import sfaira_extension as sfairae except ImportError: sfairae = None -<<<<<<< HEAD -def test_load(doi_sfaira_repr: str, test_data: str): - """ - Unit test to assist with data set contribution. - - The workflow for contributing a data set with this data loader is as follows: - - 1. Write a data loader and add it into the loader directory of your local sfaira installation. - 2. Address ToDos below. - 3. Run this unit test until you are not getting errors from your data loader anymore. - - In the process of this unit test, this data loader will have written putative cell type maps from your - annotation to the cell ontology. - - 4. Moderate the suggestions made here: Choose the best fit cell ontology label for your cells. - Sfaira uses multiple mechanisms of finding matches, depending on how the free text was generated, these might be - differentially successful. The proposed IDs groups are separate by ":|||:" strings to give you a visual anchor - when going through these lists. You need to delete all of these division strings and all labels in the second - columns other than the best fit label. Do not change the first column, - (Note that columns are separated by ",") - You can also manually check maps here: https://www.ebi.ac.uk/ols/ontologies/cl - 5. Run this unit test for a last time to check the cell type maps. - """ - remove_gene_version = True - match_to_reference = None - - flattened_doi = doi_sfaira_repr - # Define file names and loader paths in sfaira or sfaira_extension: - # Define base paths of loader collections in sfaira and sfaira_extension: -======= def _get_ds(doi_sfaira_repr: str, test_data: str): ->>>>>>> dev dir_loader_sfaira = "sfaira.data.dataloaders.loaders." file_path_sfaira = "/" + "/".join(pydoc.locate(dir_loader_sfaira + "FILE_PATH").split("/")[:-1]) if sfairae is not None: @@ -55,18 +19,6 @@ def _get_ds(doi_sfaira_repr: str, test_data: str): else: file_path_sfairae = None # Check if loader name is a directory either in sfaira or sfaira_extension loader collections: -<<<<<<< HEAD - if flattened_doi in os.listdir(file_path_sfaira): - dir_loader = dir_loader_sfaira + "." + flattened_doi - package_source = "sfaira" - elif flattened_doi in os.listdir(file_path_sfairae): - dir_loader = dir_loader_sfairae + "." + flattened_doi - package_source = "sfairae" - else: - raise ValueError("data loader not found in sfaira and also not in sfaira_extension") - file_path = pydoc.locate(dir_loader + ".FILE_PATH") - meta_path = None -======= if doi_sfaira_repr in os.listdir(file_path_sfaira): dir_loader = dir_loader_sfaira + "." + doi_sfaira_repr elif doi_sfaira_repr in os.listdir(file_path_sfairae): @@ -74,7 +26,6 @@ def _get_ds(doi_sfaira_repr: str, test_data: str): else: raise ValueError("data loader not found in sfaira and also not in sfaira_extension") file_path = pydoc.locate(dir_loader + ".FILE_PATH") ->>>>>>> dev cache_path = None # Clear dataset cache shutil.rmtree(cache_path, ignore_errors=True) @@ -85,114 +36,6 @@ def _get_ds(doi_sfaira_repr: str, test_data: str): meta_path=None, cache_path=None ) -<<<<<<< HEAD - # Test raw loading and caching: - # You can set load_raw to True while debugging when caching works already to speed the test up, - # but be sure to set load_raw to True for final tests. - ds.load( - remove_gene_version=False, - match_to_reference=False, - load_raw=True, # tests raw loading - allow_caching=True, # tests caching - ) - - assert len(ds.ids) > 0, f"no data sets loaded, make sure raw data is in {test_data}" - # Create cell type conversion table: - cwd = os.path.dirname(file_path) - dataset_module = str(cwd.split("/")[-1]) - # Group data sets by file module: - # Note that if we were not grouping the cell type map .tsv files by file module, we could directly call - # write_ontology_class_map on the ds. - for f in os.listdir(cwd): - if os.path.isfile(os.path.join(cwd, f)): # only files - # Narrow down to data set files: - if f.split(".")[-1] == "py" and f.split(".")[0] not in ["__init__", "base", "group"]: - file_module = ".".join(f.split(".")[:-1]) - - # I) Instantiate Data set group to get all IDs of data sets associated with this .py file. - # Note that all data sets in this directory are already loaded in ds, so we just need the IDs. - DatasetFound = pydoc.locate(dir_loader + "." + file_module + ".Dataset") - # Load objects from name space: - # - load(): Loading function that return anndata instance. - # - SAMPLE_FNS: File name list for DatasetBaseGroupLoadingManyFiles - load_func = pydoc.locate(dir_loader + "." + file_module + ".load") - load_func_annotation = pydoc.locate(dir_loader + "." + file_module + ".LOAD_ANNOTATION") - # Also check sfaira_extension for additional load_func_annotation: - if package_source != "sfairae" and sfairae is not None: - load_func_annotation_sfairae = pydoc.locate(dir_loader_sfairae + "." + dataset_module + - "." + file_module + ".LOAD_ANNOTATION") - # LOAD_ANNOTATION is a dictionary so we can use update to extend it. - if load_func_annotation_sfairae is not None and load_func_annotation is not None: - load_func_annotation.update(load_func_annotation_sfairae) - elif load_func_annotation_sfairae is not None and load_func_annotation is None: - load_func_annotation = load_func_annotation_sfairae - sample_fns = pydoc.locate(dir_loader + "." + file_module + ".SAMPLE_FNS") - fn_yaml = os.path.join(cwd, file_module + ".yaml") - fn_yaml = fn_yaml if os.path.exists(fn_yaml) else None - # Check for sample_fns in yaml: - if fn_yaml is not None: - assert os.path.exists(fn_yaml), f"did not find yaml {fn_yaml}" - yaml_vals = read_yaml(fn=fn_yaml) - if sample_fns is None and yaml_vals["meta"]["sample_fns"] is not None: - sample_fns = yaml_vals["meta"]["sample_fns"] - if sample_fns is None: - sample_fns = [None] - # Here we distinguish between class that are already defined and those that are not. - # The latter case arises if meta data are defined in YAMLs and _load is given as a function. - if DatasetFound is None: - datasets_f = [ - DatasetBase( - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - dict_load_func_annotation=load_func_annotation, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) for x in sample_fns - ] - else: - datasets_f = [ - DatasetFound( - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path, - load_func=load_func, - load_func_annotation=load_func_annotation, - sample_fn=x, - sample_fns=sample_fns if sample_fns != [None] else None, - yaml_path=fn_yaml, - ) for x in sample_fns - ] - # II) Build a data set group from the already loaded data sets and use the group ontology writing - # function. - dsg_f = DatasetGroup(datasets=dict([(x.id, ds.datasets[x.id]) for x in datasets_f])) - # III) Write this directly into sfaira installation so that it can be committed via git. - dsg_f.write_ontology_class_map( - fn=os.path.join(cwd, file_module + ".tsv"), - protected_writing=True, - n_suggest=4, - ) - - # Test loading from cache: - ds = DatasetGroupDirectoryOriented( - file_base=file_path, - data_path=test_data, - meta_path=meta_path, - cache_path=cache_path - ) - ds.load( - remove_gene_version=remove_gene_version, - match_to_reference=match_to_reference, - load_raw=False, - allow_caching=False - ) - ds.clean_ontology_class_map() - # Test concatenation: - _ = ds.adata - # Clear dataset cache -======= return ds, cache_path @@ -218,5 +61,4 @@ def test_load(doi_sfaira_repr: str, test_data: str): load_raw=False, allow_caching=True ) ->>>>>>> dev - shutil.rmtree(cache_path, ignore_errors=True) + shutil.rmtree(cache_path, ignore_errors=True) \ No newline at end of file From 4764d8118152c6804ff2363e09b45f8c7d7f052a Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Mon, 31 May 2021 13:05:06 +0200 Subject: [PATCH 152/161] added new line --- sfaira/unit_tests/data_contribution/test_data_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/unit_tests/data_contribution/test_data_template.py b/sfaira/unit_tests/data_contribution/test_data_template.py index aa3301a35..e81c007ae 100644 --- a/sfaira/unit_tests/data_contribution/test_data_template.py +++ b/sfaira/unit_tests/data_contribution/test_data_template.py @@ -61,4 +61,4 @@ def test_load(doi_sfaira_repr: str, test_data: str): load_raw=False, allow_caching=True ) - shutil.rmtree(cache_path, ignore_errors=True) \ No newline at end of file + shutil.rmtree(cache_path, ignore_errors=True) From cfe13cc63153b5e2ced612be3ad89a28a5fe4594 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Mon, 31 May 2021 13:18:46 +0200 Subject: [PATCH 153/161] Dev merge (#285) * mitigates merge conflict between dev and release --- sfaira/interface/user_interface.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sfaira/interface/user_interface.py b/sfaira/interface/user_interface.py index b4a1caed6..b9fed1e90 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/interface/user_interface.py @@ -28,6 +28,9 @@ class UserInterface: # instead of setting `custom_repo` when initialising the UI you can also use `sfaira_repo=True` to use public weights ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) ui.load_data(anndata.read("/path/to/file.h5ad")) # load your dataset into sfaira + ui.zoo_embedding.model_id = 'embedding_human-blood-ae-0.2-0.1_theislab' # pick desired model here + ui.zoo_celltype.model_id = 'celltype_human-blood-mlp-0.1.3-0.1_theislab' # pick desired model here + ui.load_data(anndata.read("/path/to/file.h5ad"), gene_symbol_col='index', gene_ens_col='gene_ids') # load your dataset into sfaira ui.load_model_embedding() ui.load_model_celltype() ui.predict_all() @@ -60,7 +63,7 @@ def __init__( self.adata_ids = AdataIdsSfaira() if sfaira_repo: # check if public sfaira repository should be accessed - self.model_lookuptable = self._load_lookuptable("https://zenodo.org/record/4304660/files/") + self.model_lookuptable = self._load_lookuptable("https://zenodo.org/record/4836517/files/") if custom_repo: if isinstance(custom_repo, str): From 4305133e0a295467b697bab4d43ea921a5fffb29 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 8 Jun 2021 20:02:00 +0200 Subject: [PATCH 154/161] Dev release merge (#294) * v0.3.0 --- .gitignore | 10 +- .readthedocs.yml | 2 +- docs/_static/custom_sfaira.css | 137 ------------ docs/_templates/autosummary/base.rst | 40 ++++ docs/_templates/autosummary/class.rst | 74 +++++++ docs/_templates/autosummary/function.rst | 46 ++++ docs/adding_dataset_classes.rst | 112 ---------- docs/adding_datasets.rst | 119 +++++++++- docs/api.rst | 207 ++++++++++++++++++ docs/api/index.rst | 154 ------------- docs/api/sfaira.data.DatasetBase.rst | 106 --------- docs/api/sfaira.data.DatasetInteractive.rst | 106 --------- docs/api/sfaira.data.DatasetSuperGroup.rst | 55 ----- docs/api/sfaira.data.human.rst | 23 -- docs/api/sfaira.data.mouse.rst | 23 -- ...ira.genomes.ExtractFeatureListEnsemble.rst | 26 --- docs/api/sfaira.models.celltype.rst | 23 -- docs/api/sfaira.models.embedding.rst | 23 -- docs/api/sfaira.train.GridsearchContainer.rst | 37 ---- ...aira.train.SummarizeGridsearchCelltype.rst | 43 ---- ...ira.train.SummarizeGridsearchEmbedding.rst | 47 ---- docs/api/sfaira.train.TargetZoos.rst | 24 -- docs/api/sfaira.train.TrainModelCelltype.rst | 26 --- docs/api/sfaira.train.TrainModelEmbedding.rst | 26 --- docs/api/sfaira.ui.UserInterface.rst | 38 ---- docs/commandline_interface.rst | 2 +- docs/consuming_data.rst | 40 ++++ docs/{development.rst => faq.rst} | 36 ++- docs/index.rst | 7 +- docs/installation.rst | 29 ++- docs/tutorials.rst | 26 +-- docs/using_data.rst | 153 ------------- sfaira/__init__.py | 2 +- ...fcolon_2019_10xsequencing_kinchen_001.yaml | 2 +- ...pithelium_2019_10xsequencing_smilie_001.py | 2 +- ...man_ileum_2019_10xsequencing_martin_001.py | 2 +- ...stategland_2018_10xsequencing_henry_001.py | 2 +- ...uman_lung_2020_10xsequencing_miller_001.py | 2 +- ...human_testis_2018_10xsequencing_guo_001.py | 2 +- ...liver_2018_10xsequencing_macparland_001.py | 2 +- .../human_x_2019_10xsequencing_szabo_001.py | 2 +- ...man_retina_2019_10xsequencing_menon_001.py | 2 +- .../human_placenta_2018_x_ventotormo_001.py | 2 +- ...ver_2019_10xsequencing_ramachandran_001.py | 2 +- ...an_liver_2019_10xsequencing_popescu_001.py | 2 +- ...rain_2019_10x3v2sequencing_kanton_001.yaml | 2 +- .../human_lung_2020_x_travaglini_001.yaml | 2 +- ...uman_colon_2020_10xsequencing_james_001.py | 2 +- .../human_x_2019_10xsequencing_braga_x.py | 2 +- .../mouse_x_2019_10xsequencing_hove_001.py | 2 +- ...uman_kidney_2020_10xsequencing_liao_001.py | 2 +- ...man_retina_2019_10xsequencing_voigt_001.py | 2 +- .../human_x_2019_10xsequencing_wang_001.py | 2 +- ...an_lung_2020_10xsequencing_lukassen_001.py | 2 +- .../human_blood_2020_10x_hao_001.yaml | 2 +- .../d10_1101_661728/mouse_x_2019_x_pisco_x.py | 2 +- ...nchyma_2020_10xsequencing_habermann_001.py | 2 +- ...n_kidney_2019_10xsequencing_stewart_001.py | 2 +- ...uman_thymus_2020_10xsequencing_park_001.py | 2 +- ...uman_x_2019_10xsequencing_madissoon_001.py | 2 +- ..._retina_2019_10xsequencing_lukowski_001.py | 2 +- ...lood_2019_10xsequencing_10xgenomics_001.py | 2 +- .../human_x_2018_10xsequencing_regev_001.py | 2 +- sfaira/interface/__init__.py | 2 - sfaira/train/summaries.py | 2 +- sfaira/train/train_model.py | 2 +- sfaira/ui/__init__.py | 2 + sfaira/{interface => ui}/model_zoo.py | 0 sfaira/{interface => ui}/user_interface.py | 42 ++-- .../interface/test_userinterface.py | 15 +- sfaira/unit_tests/interface/test_zoo.py | 2 +- sfaira/unit_tests/trainer/test_trainer.py | 2 +- sfaira/unit_tests/utils.py | 2 + sfaira/unit_tests/versions/test_zoo.py | 2 +- sfaira/versions/metadata/base.py | 6 +- 75 files changed, 655 insertions(+), 1306 deletions(-) create mode 100644 docs/_templates/autosummary/base.rst create mode 100644 docs/_templates/autosummary/class.rst create mode 100644 docs/_templates/autosummary/function.rst delete mode 100644 docs/adding_dataset_classes.rst create mode 100644 docs/api.rst delete mode 100644 docs/api/index.rst delete mode 100644 docs/api/sfaira.data.DatasetBase.rst delete mode 100644 docs/api/sfaira.data.DatasetInteractive.rst delete mode 100644 docs/api/sfaira.data.DatasetSuperGroup.rst delete mode 100644 docs/api/sfaira.data.human.rst delete mode 100644 docs/api/sfaira.data.mouse.rst delete mode 100644 docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst delete mode 100644 docs/api/sfaira.models.celltype.rst delete mode 100644 docs/api/sfaira.models.embedding.rst delete mode 100644 docs/api/sfaira.train.GridsearchContainer.rst delete mode 100644 docs/api/sfaira.train.SummarizeGridsearchCelltype.rst delete mode 100644 docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst delete mode 100644 docs/api/sfaira.train.TargetZoos.rst delete mode 100644 docs/api/sfaira.train.TrainModelCelltype.rst delete mode 100644 docs/api/sfaira.train.TrainModelEmbedding.rst delete mode 100644 docs/api/sfaira.ui.UserInterface.rst create mode 100644 docs/consuming_data.rst rename docs/{development.rst => faq.rst} (55%) delete mode 100644 docs/using_data.rst delete mode 100644 sfaira/interface/__init__.py create mode 100644 sfaira/ui/__init__.py rename sfaira/{interface => ui}/model_zoo.py (100%) rename sfaira/{interface => ui}/user_interface.py (95%) diff --git a/.gitignore b/.gitignore index f2707b8fc..ebbb5cd2d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,15 @@ +# Cache cache/ontologies/cl/* + +# Docs +docs/api/ + +# Unit test temporary data: sfaira/unit_tests/test_data_loaders/* sfaira/unit_tests/test_data/* sfaira/unit_tests/template_data/* -sfaira/unit_tests/data_contribution/test_data_template.py + +# General patterns: git abuild cache sfaira.egg-info @@ -14,7 +21,6 @@ venv/* *.ipynb_checkpoints/ */*.ipynb_checkpoints/ **/.DS_Store -docs/_templates/ dist/ !**/.gitignore diff --git a/.readthedocs.yml b/.readthedocs.yml index b5013855b..4bfedc9b0 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -10,6 +10,6 @@ sphinx: formats: all python: - version: 3.9 + version: 3.8 install: - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/_static/custom_sfaira.css b/docs/_static/custom_sfaira.css index ff1006b23..3965aa994 100644 --- a/docs/_static/custom_sfaira.css +++ b/docs/_static/custom_sfaira.css @@ -1,140 +1,3 @@ -@import "basic.css"; - -/*Color main components with a dark theme*/ -div, span, code { - background-color: #181a1b !important; -} - -.wy-side-nav-search { - background-color: #005fff !important; -} - -.wy-side-nav-search div { - background-color: #005fff !important; -} - -/*Font color is mainly white*/ -.rst-content p, li, h1, h2, h3, h4, h5, h6, .highlight-console, .n, .section { - color: #FFFFFF; -} - -/*The side menu is slightly more grey and lighter than the overall theme*/ -.wy-menu, .wy-menu-vertical { - background-color: #2D2E2F !important; -} - -.wy-side-scroll { - background-color: #2D2E2F !important; -} - -.wy-menu .caption-text { - background-color: #2D2E2F !important; -} - -.caption-text { - background-color: #181a1b !important; -} - -.figure p { - background-color: #181a1b !important; -} - -/*Toctree wrapper on index page has a dark background unlike the other menu items*/ -.toctree-wrapper .compound ul .toctree-l1, .toctree-wrapper .compound ul .toctree-l2 { - background-color: #181a1b !important; - color: #005fff !important; -} - -/*The current menu section has a blue background*/ -.wy-menu-vertical li.toctree-l1.current li.toctree-l2 > a:hover { - color: #FFFFFF !important; - background: #005fff !important; -} - -/*Subitems under the current section are displayed in grey again*/ -.wy-menu-vertical li.toctree-l1.current li.toctree-l2 > a { - color: #FFFFFF !important; - background: #2D2E2F !important; -} - -/*Current hovered item has a blue background*/ -.wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a:hover { - color: #FFFFFF !important; - background: #005fff !important; -} - -.wy-menu-vertical li.toctree-l2.current > a:hover { - color: #FFFFFF !important; - background: #005fff !important; -} - -.wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a { - color: #FFFFFF !important; - background: #2D2E2F !important; -} - -.wy-menu-vertical li.toctree-l1.current { - color: #FFFFFF !important; - background: #005fff !important; - border-color: #005fff !important; -} - -.wy-menu-vertical li.toctree-l1.current > a { - color: #FFFFFF !important; - background: #005fff !important; - border-color: #005fff !important; -} - -.wy-menu-vertical li.toctree-l1.current > a:hover { - color: #FFFFFF !important; - background: #005fff !important; - border-color: #005fff !important; -} - -/*The expand toctree items have the same background as its corresponding section*/ -.wy-menu-vertical li.toctree-l1 a span.toctree-expand { - background-color: #005fff !important; -} - -.wy-menu-vertical li.toctree-l2 a span.toctree-expand { - background-color: #2D2E2F !important; -} - -.wy-menu-vertical li.toctree-l2 a:hover span.toctree-expand { - background-color: #005fff !important; -} - -.wy-menu-vertical li.toctree-l3 a span.toctree-expand { - background-color: #2D2E2F !important; -} - -.wy-menu-vertical li.toctree-l3 a:hover span.toctree-expand { - background-color: #005fff !important; -} - -.code .docutils .literal .notranslate .pre { - background-color: #181a1b !important; -} - -/*Color footer separately corresponding to overall dark theme*/ -footer { - color: #FFFFFF; -} - -/*Color footer buttons in blue*/ -.rst-footer-buttons a, .rst-footer-buttons a:hover, .rst-footer-buttons span { - background-color: #005fff !important; -} - -.wy-side-nav-search a { - background-color: #005fff !important; -} - -.version { - background-color: #005fff !important; - color: #FFFFFF !important; -} - /*Set max width to none so the theme uses all available width*/ .wy-nav-content { max-width: none; diff --git a/docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst new file mode 100644 index 000000000..0ded3e1e3 --- /dev/null +++ b/docs/_templates/autosummary/base.rst @@ -0,0 +1,40 @@ +.. + This code was adapted from https://github.com/theislab/cellrank/tree/master/docs/_templates/autosummary/base.rst + This file is therefore licensed under the license of the cellrank project, + available from https://github.com/theislab/cellrank and copied here at the time of accession. + + BSD 3-Clause License + + Copyright (c) 2019, Theis Lab + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +:github_url: {{ fullname }} + +{% extends "!autosummary/base.rst" %} + +.. http://www.sphinx-doc.org/en/stable/ext/autosummary.html#customizing-templates diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 100644 index 000000000..4370aaedb --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1,74 @@ +.. + This code was adapted from https://github.com/theislab/cellrank/tree/master/docs/_templates/autosummary/class.rst + This file is therefore licensed under the license of the cellrank project, + available from https://github.com/theislab/cellrank and copied here at the time of accession. + + BSD 3-Clause License + + Copyright (c) 2019, Theis Lab + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +:github_url: {{ fullname }} + +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + :toctree: . + {% for item in attributes %} + ~{{ fullname }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block methods %} + {% if methods %} + .. rubric:: Methods + + .. autosummary:: + :toctree: . + {% for item in methods %} + {%- if item != '__init__' %} + ~{{ fullname }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} + + .. _sphx_glr_backref_{{fullname}}: + + .. minigallery:: {{fullname}} + :add-heading: Examples + :heading-level: - diff --git a/docs/_templates/autosummary/function.rst b/docs/_templates/autosummary/function.rst new file mode 100644 index 000000000..96d4094fe --- /dev/null +++ b/docs/_templates/autosummary/function.rst @@ -0,0 +1,46 @@ +.. + This code was adapted from https://github.com/theislab/cellrank/tree/master/docs/_templates/autosummary/function.rst + This file is therefore licensed under the license of the cellrank project, + available from https://github.com/theislab/cellrank and copied here at the time of accession. + + BSD 3-Clause License + + Copyright (c) 2019, Theis Lab + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +:github_url: {{ fullname }} + +{{ fullname | escape | underline}} + +.. autofunction:: {{ fullname }} + +.. _sphx_glr_backref_{{fullname}}: + +.. minigallery:: {{fullname}} + :add-heading: Examples + :heading-level: - diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst deleted file mode 100644 index cb499949d..000000000 --- a/docs/adding_dataset_classes.rst +++ /dev/null @@ -1,112 +0,0 @@ -The class-based data loader python file -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. -In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. - -1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. - -.. code-block:: python - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # Data set meta data: You do not have to include all of these and can simply skip lines corresponding - # to attritbutes that you do not have access to. These are meta data on a sample level. - # The meta data attributes labeled with (*) may als be supplied per cell, see below, - # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - - self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). - - self.author = x # author (list) who sampled / created the data set - self.doi = x # doi of data set accompanying manuscript - - self.download_url_data = x # download website(s) of data files - self.download_url_meta = x # download website(s) of meta data files - - self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - self.cell_line = x # (*, optional) cell line used (for cell culture samples) - self.dev_stage = x # (*, optional) developmental stage of organism - self.ethnicity = x # (*, optional) ethnicity of sample - self.healthy = x # (*, optional) whether sample represents a healthy organism - self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - self.organ = x # (*, optional) organ (anatomical structure) - self.organism = x # (*) species / organism - self.sample_source = x # (*) whether the sample came from primary tissue or cell culture - self.sex = x # (*, optional) sex - self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - self.year = x # year in which sample was acquired - - # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the - # anndata instance (specifically in .obs) after loading. - # You need to make sure this is loaded in the loading script)! - # See above for a description what these meta data attributes mean. - # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) - self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cell_types_original = x # (optional) - # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and - # directory as the python file of this data loader (see below). - - -2. A function called to load the data set into memory: -It is important to set an automated path indicating the location of the raw files here. -Our recommendation for this directory set-up is that you define a directory folder in your directory structure -in which all of these raw files will be (self.path) and then add a sub-directory named as -`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files -directly into this sub directory. - -.. code-block:: python - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad - return adata - -In summary, a python file for a mouse lung data set could look like this: - -.. code-block:: python - - class MyDataset(DatasetBase) - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.author = "me" - self.doi = ["my preprint", "my peer-reviewed publication"] - self.download_url_data = "my GEO upload" - self.normalisation = "raw" # because I uploaded raw counts, which is good practice! - self.organ = "lung" - self.organism = "mouse" - self.assay_sc = "smart-seq2" - self.year = "2020" - self.sample_source = "primary_tissue" - - self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) - return adata diff --git a/docs/adding_datasets.rst b/docs/adding_datasets.rst index a5099ce39..e2e297aa2 100644 --- a/docs/adding_datasets.rst +++ b/docs/adding_datasets.rst @@ -1,5 +1,5 @@ -Adding data sets -=================== +Contributing data +================== Adding datasets to sfaira is a great way to increase the visibility of your dataset and to make it available to a large audience. This process requires a couple of steps as outlined in the following sections. @@ -646,4 +646,117 @@ The meta data on the meta data file do not have to modified by you are automatic `meta` in the `.yaml` file: - version: [string] - Version identifier of meta data scheme. \ No newline at end of file + Version identifier of meta data scheme. + +The class-based data loader python file +---------------------------------------- +As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. +In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. + +1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. + +.. code-block:: python + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files + + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) + self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cell_types_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and + # directory as the python file of this data loader (see below). + + +2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. + +.. code-block:: python + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad + return adata + +In summary, a python file for a mouse lung data set could look like this: + +.. code-block:: python + + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.author = "me" + self.doi = ["my preprint", "my peer-reviewed publication"] + self.download_url_data = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.assay_sc = "smart-seq2" + self.year = "2020" + self.sample_source = "primary_tissue" + + self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) + return adata diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 000000000..22adef4d3 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,207 @@ +API +==== + +Import sfaira as:: + + import sfaira + + + +Data: `data` +------------- + +.. module:: sfaira.data +.. currentmodule:: sfaira + +The sfaira data zoo API. + +Dataset representing classes used for development: + +.. autosummary:: + :toctree: api + + data.DatasetBase + data.DatasetGroup + data.DatasetGroupDirectoryOriented + data.DatasetSuperGroup + +Interactive data class to use a loaded data object in the context sfaira tools: + +.. autosummary:: + :toctree: api + + data.DatasetInteractive + +Dataset universe to interact with all data loader classes: + +.. autosummary:: + :toctree: api + + data.Universe + +Data store handling: + +.. autosummary:: + :toctree: api + + data.load_store + data.DistributedStoreBase + data.DistributedStoreDao + data.DistributedStoreH5ad + + +Estimator classes: `estimators` +-------------------------------- + +.. module:: sfaira.estimators +.. currentmodule:: sfaira + +Estimator classes from the sfaira model zoo API for advanced use. + +.. autosummary:: + :toctree: api + + estimators.EstimatorKeras + estimators.EstimatorKerasCelltype + estimators.EstimatorKerasEmbedding + +Model classes: `models` +------------------------ + +.. module:: sfaira.models +.. currentmodule:: sfaira + +Model classes from the sfaira model zoo API for advanced use. + +Cell type models +~~~~~~~~~~~~~~~~~ +Classes that wrap tensorflow cell type predictor models. + +.. autosummary:: + :toctree: api + + models.celltype.CellTypeMarker + models.celltype.CellTypeMarker + models.celltype.CellTypeMlp + models.celltype.CellTypeMlpVersioned + +Embedding models +~~~~~~~~~~~~~~~~~ +Classes that wrap tensorflow embedding models. + +.. autosummary:: + :toctree: api + + models.embedding.ModelKerasAe + models.embedding.ModelAeVersioned + models.embedding.ModelKerasVae + models.embedding.ModelVaeVersioned + models.embedding.ModelKerasLinear + models.embedding.ModelLinearVersioned + models.embedding.ModelKerasVaeIAF + models.embedding.ModelVaeIAFVersioned + models.embedding.ModelKerasVaeVamp + models.embedding.ModelVaeVampVersioned + +Train: `train` +--------------- + +.. module:: sfaira.train +.. currentmodule:: sfaira + +The interface for training sfaira compatible models. + +Trainer classes +~~~~~~~~~~~~~~~~ +Classes that wrap estimator classes to use in grid search training. + +.. autosummary:: + :toctree: api + + train.TrainModelCelltype + train.TrainModelEmbedding + +Grid search summaries +~~~~~~~~~~~~~~~~~~~~~ +Classes to pool evaluation metrics across fits in a grid search. + +.. autosummary:: + :toctree: api + + train.GridsearchContainer + train.SummarizeGridsearchCelltype + train.SummarizeGridsearchEmbedding + +Versions: `versions` +--------------------- + +.. module:: sfaira.versions +.. currentmodule:: sfaira + +The interface for sfaira metadata management. + +Genomes +~~~~~~~~ +Genome management. + +.. autosummary:: + :toctree: api + + versions.genomes.GenomeContainer + +Metadata +~~~~~~~~~ +Dataset metadata management. +Base classes to manage ontology files: + +.. autosummary:: + :toctree: api + + versions.metadata.Ontology + versions.metadata.OntologyList + versions.metadata.OntologyHierarchical + versions.metadata.OntologyObo + versions.metadata.OntologyOboCustom + +Onotology-specific classes: + +.. autosummary:: + :toctree: api + + versions.metadata.OntologyCellosaurus + versions.metadata.OntologyCl + versions.metadata.OntologyHsapdv + versions.metadata.OntologyMondo + versions.metadata.OntologyMmusdv + versions.metadata.OntologySinglecellLibraryConstruction + versions.metadata.OntologyUberon + +Class wrapping cell type ontology for predictor models: + +.. autosummary:: + :toctree: api + + versions.metadata.CelltypeUniverse + +Topologies +~~~~~~~~~~~ +Model topology management. + +.. autosummary:: + :toctree: api + + versions.topologies.TopologyContainer + +User interface: `ui` +--------------------- + +.. module:: sfaira.ui +.. currentmodule:: sfaira + +This sub-module gives users access to the model zoo, including model query from remote servers. +This API is designed to be used in analysis workflows and does not require any understanding of the way models are defined and stored. + +.. autosummary:: + :toctree: api + + ui.UserInterface diff --git a/docs/api/index.rst b/docs/api/index.rst deleted file mode 100644 index 0230f0b1a..000000000 --- a/docs/api/index.rst +++ /dev/null @@ -1,154 +0,0 @@ -.. module:: sfaira -.. automodule:: sfaira - :noindex: - -API -=== - -Import sfaira as:: - - import sfaira - - - -Data: `data` ------------- - -.. module:: sfaira.data -.. currentmodule:: sfaira - -The sfaira data zoo API. - - -Pre-defined data set collections -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This sub-module gives you access to curated subsets of the data zoo, e.g. all data sets from human lungs. - -.. autosummary:: - :toctree: . - - data.human - data.mouse - - -Functionalities for interactive data analysis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This sub-module gives you access to functionalities you need to define your own data set collections based on the sfaira data zoo. - -.. autosummary:: - :toctree: . - - data.DatasetBase - data.DatasetGroupBase - data.DatasetSuperGroup - - -Functionalities for interactive data analysis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This sub-module gives you access to functionalities you need to load new data live into the data zoo to handle a raw data set in the context of zoo data sets. - -.. autosummary:: - :toctree: . - - data.DatasetInteractive - - -Genomes: `genomes` ------------------- - -.. module:: sfaira.genomes -.. currentmodule:: sfaira - -This sub-module gives you access to properties of the genome representations used in sfaira. - -.. autosummary:: - :toctree: . - - genomes.ExtractFeatureListEnsemble - - -Models: `models` ----------------- - -.. module:: sfaira.models -.. currentmodule:: sfaira - -The sfaira model zoo API for advanced use. -This API is structured by streamlined, task-specific APIs for specific analysis problems. -This API is targeted at developers, see also `ui` for a user centric wrapping API for this model zoo. - - -Cell-type predictor models -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This sub-module handles models that predict cell types. - -.. autosummary:: - :toctree: . - - models.celltype - - -Embedding models -~~~~~~~~~~~~~~~~ - -This sub-module handles models that embed expression vectors (cells) into a latent space. - -.. autosummary:: - :toctree: . - - models.embedding - - -Train: `train` --------------- - -.. module:: sfaira.train -.. currentmodule:: sfaira - -The interface for training sfaira compatible models. -This is a sub-module dedicated for developers to ease model training and deployment. - -Trainer classes -~~~~~~~~~~~~~~~ - -Trainer class wrap estimator classes (which wrap model classes) and handle grid-search specific tasks centred on model fits, -such as saving evaluation metrics and model weights. - -.. autosummary:: - :toctree: . - - train.TargetZoos - train.TrainModelCelltype - train.TrainModelEmbedding - - -Grid search summary classes -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Grid search summary classes allow a developer to easily interact with a finished grid search by loading and summarising results, -which were saved through Trainer classes. - -.. autosummary:: - :toctree: . - - train.GridsearchContainer - train.SummarizeGridsearchCelltype - train.SummarizeGridsearchEmbedding - -User interface: `ui` --------------------- - -.. module:: sfaira.ui -.. currentmodule:: sfaira - -This sub-module gives users access to the model zoo, including model query from remote servers. -This API is designed to be used in analysis workflows and does not require any understanding of the way models are defined and stored. - -.. autosummary:: - :toctree: . - - ui.UserInterface diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst deleted file mode 100644 index b8b86e9c9..000000000 --- a/docs/api/sfaira.data.DatasetBase.rst +++ /dev/null @@ -1,106 +0,0 @@ -sfaira.data.DatasetBase -======================= - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetBase - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetBase.__init__ - ~DatasetBase.clear - ~DatasetBase.collapse_counts - ~DatasetBase.download - ~DatasetBase.load - ~DatasetBase.load_meta - ~DatasetBase.load_ontology_class_map - ~DatasetBase.project_celltypes_to_ontology - ~DatasetBase.set_dataset_id - ~DatasetBase.show_summary - ~DatasetBase.streamline_features - ~DatasetBase.streamline_metadata - ~DatasetBase.subset_cells - ~DatasetBase.write_backed - ~DatasetBase.write_distributed_store - ~DatasetBase.write_meta - ~DatasetBase.write_ontology_class_map - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetBase.additional_annotation_key - ~DatasetBase.annotated - ~DatasetBase.assay_differentiation - ~DatasetBase.assay_differentiation_obs_key - ~DatasetBase.assay_sc - ~DatasetBase.assay_sc_obs_key - ~DatasetBase.assay_type_differentiation - ~DatasetBase.assay_type_differentiation_obs_key - ~DatasetBase.author - ~DatasetBase.bio_sample - ~DatasetBase.bio_sample_obs_key - ~DatasetBase.cache_fn - ~DatasetBase.cell_line - ~DatasetBase.cell_line_obs_key - ~DatasetBase.cell_ontology_map - ~DatasetBase.cell_types_original_obs_key - ~DatasetBase.cellontology_class_obs_key - ~DatasetBase.cellontology_id_obs_key - ~DatasetBase.celltypes_universe - ~DatasetBase.citation - ~DatasetBase.data_dir - ~DatasetBase.default_embedding - ~DatasetBase.development_stage - ~DatasetBase.development_stage_obs_key - ~DatasetBase.directory_formatted_doi - ~DatasetBase.disease - ~DatasetBase.disease_obs_key - ~DatasetBase.doi - ~DatasetBase.doi_cleaned_id - ~DatasetBase.doi_main - ~DatasetBase.download_url_data - ~DatasetBase.download_url_meta - ~DatasetBase.ethnicity - ~DatasetBase.ethnicity_obs_key - ~DatasetBase.fn_ontology_class_map_tsv - ~DatasetBase.gene_id_ensembl_var_key - ~DatasetBase.gene_id_symbols_var_key - ~DatasetBase.id - ~DatasetBase.individual - ~DatasetBase.individual_obs_key - ~DatasetBase.loaded - ~DatasetBase.meta - ~DatasetBase.meta_fn - ~DatasetBase.ncells - ~DatasetBase.normalization - ~DatasetBase.ontology_celltypes - ~DatasetBase.ontology_organ - ~DatasetBase.organ - ~DatasetBase.organ_obs_key - ~DatasetBase.organism - ~DatasetBase.organism_obs_key - ~DatasetBase.primary_data - ~DatasetBase.sample_source - ~DatasetBase.sample_source_obs_key - ~DatasetBase.sex - ~DatasetBase.sex_obs_key - ~DatasetBase.source - ~DatasetBase.state_exact - ~DatasetBase.state_exact_obs_key - ~DatasetBase.tech_sample - ~DatasetBase.tech_sample_obs_key - ~DatasetBase.title - ~DatasetBase.year - - \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst deleted file mode 100644 index daf81f984..000000000 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ /dev/null @@ -1,106 +0,0 @@ -sfaira.data.DatasetInteractive -============================== - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetInteractive - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetInteractive.__init__ - ~DatasetInteractive.clear - ~DatasetInteractive.collapse_counts - ~DatasetInteractive.download - ~DatasetInteractive.load - ~DatasetInteractive.load_meta - ~DatasetInteractive.load_ontology_class_map - ~DatasetInteractive.project_celltypes_to_ontology - ~DatasetInteractive.set_dataset_id - ~DatasetInteractive.show_summary - ~DatasetInteractive.streamline_features - ~DatasetInteractive.streamline_metadata - ~DatasetInteractive.subset_cells - ~DatasetInteractive.write_backed - ~DatasetInteractive.write_distributed_store - ~DatasetInteractive.write_meta - ~DatasetInteractive.write_ontology_class_map - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetInteractive.additional_annotation_key - ~DatasetInteractive.annotated - ~DatasetInteractive.assay_differentiation - ~DatasetInteractive.assay_differentiation_obs_key - ~DatasetInteractive.assay_sc - ~DatasetInteractive.assay_sc_obs_key - ~DatasetInteractive.assay_type_differentiation - ~DatasetInteractive.assay_type_differentiation_obs_key - ~DatasetInteractive.author - ~DatasetInteractive.bio_sample - ~DatasetInteractive.bio_sample_obs_key - ~DatasetInteractive.cache_fn - ~DatasetInteractive.cell_line - ~DatasetInteractive.cell_line_obs_key - ~DatasetInteractive.cell_ontology_map - ~DatasetInteractive.cell_types_original_obs_key - ~DatasetInteractive.cellontology_class_obs_key - ~DatasetInteractive.cellontology_id_obs_key - ~DatasetInteractive.celltypes_universe - ~DatasetInteractive.citation - ~DatasetInteractive.data_dir - ~DatasetInteractive.default_embedding - ~DatasetInteractive.development_stage - ~DatasetInteractive.development_stage_obs_key - ~DatasetInteractive.directory_formatted_doi - ~DatasetInteractive.disease - ~DatasetInteractive.disease_obs_key - ~DatasetInteractive.doi - ~DatasetInteractive.doi_cleaned_id - ~DatasetInteractive.doi_main - ~DatasetInteractive.download_url_data - ~DatasetInteractive.download_url_meta - ~DatasetInteractive.ethnicity - ~DatasetInteractive.ethnicity_obs_key - ~DatasetInteractive.fn_ontology_class_map_tsv - ~DatasetInteractive.gene_id_ensembl_var_key - ~DatasetInteractive.gene_id_symbols_var_key - ~DatasetInteractive.id - ~DatasetInteractive.individual - ~DatasetInteractive.individual_obs_key - ~DatasetInteractive.loaded - ~DatasetInteractive.meta - ~DatasetInteractive.meta_fn - ~DatasetInteractive.ncells - ~DatasetInteractive.normalization - ~DatasetInteractive.ontology_celltypes - ~DatasetInteractive.ontology_organ - ~DatasetInteractive.organ - ~DatasetInteractive.organ_obs_key - ~DatasetInteractive.organism - ~DatasetInteractive.organism_obs_key - ~DatasetInteractive.primary_data - ~DatasetInteractive.sample_source - ~DatasetInteractive.sample_source_obs_key - ~DatasetInteractive.sex - ~DatasetInteractive.sex_obs_key - ~DatasetInteractive.source - ~DatasetInteractive.state_exact - ~DatasetInteractive.state_exact_obs_key - ~DatasetInteractive.tech_sample - ~DatasetInteractive.tech_sample_obs_key - ~DatasetInteractive.title - ~DatasetInteractive.year - - \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst deleted file mode 100644 index 5bfa255c8..000000000 --- a/docs/api/sfaira.data.DatasetSuperGroup.rst +++ /dev/null @@ -1,55 +0,0 @@ -sfaira.data.DatasetSuperGroup -============================= - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetSuperGroup - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetSuperGroup.__init__ - ~DatasetSuperGroup.collapse_counts - ~DatasetSuperGroup.delete_backed - ~DatasetSuperGroup.download - ~DatasetSuperGroup.extend_dataset_groups - ~DatasetSuperGroup.flatten - ~DatasetSuperGroup.get_gc - ~DatasetSuperGroup.load - ~DatasetSuperGroup.load_cached_backed - ~DatasetSuperGroup.load_config - ~DatasetSuperGroup.ncells - ~DatasetSuperGroup.ncells_bydataset - ~DatasetSuperGroup.ncells_bydataset_flat - ~DatasetSuperGroup.project_celltypes_to_ontology - ~DatasetSuperGroup.remove_duplicates - ~DatasetSuperGroup.set_dataset_groups - ~DatasetSuperGroup.show_summary - ~DatasetSuperGroup.streamline_features - ~DatasetSuperGroup.streamline_metadata - ~DatasetSuperGroup.subset - ~DatasetSuperGroup.subset_cells - ~DatasetSuperGroup.write_backed - ~DatasetSuperGroup.write_config - ~DatasetSuperGroup.write_distributed_store - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetSuperGroup.adata - ~DatasetSuperGroup.adata_ls - ~DatasetSuperGroup.additional_annotation_key - ~DatasetSuperGroup.datasets - ~DatasetSuperGroup.ids - - \ No newline at end of file diff --git a/docs/api/sfaira.data.human.rst b/docs/api/sfaira.data.human.rst deleted file mode 100644 index bddab197c..000000000 --- a/docs/api/sfaira.data.human.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.data.human -================= - -.. automodule:: sfaira.data.human - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.data.mouse.rst b/docs/api/sfaira.data.mouse.rst deleted file mode 100644 index af3c07453..000000000 --- a/docs/api/sfaira.data.mouse.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.data.mouse -================= - -.. automodule:: sfaira.data.mouse - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst deleted file mode 100644 index 415c01979..000000000 --- a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.genomes.ExtractFeatureListEnsemble -========================================= - -.. currentmodule:: sfaira.genomes - -.. autoclass:: ExtractFeatureListEnsemble - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ExtractFeatureListEnsemble.__init__ - ~ExtractFeatureListEnsemble.from_ensemble_gtf - ~ExtractFeatureListEnsemble.reduce_types - ~ExtractFeatureListEnsemble.reduce_types_protein_coding - ~ExtractFeatureListEnsemble.write_gene_table_to_csv - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.models.celltype.rst b/docs/api/sfaira.models.celltype.rst deleted file mode 100644 index 48b1f74e2..000000000 --- a/docs/api/sfaira.models.celltype.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.models.celltype -====================== - -.. automodule:: sfaira.models.celltype - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.models.embedding.rst b/docs/api/sfaira.models.embedding.rst deleted file mode 100644 index 2446b787d..000000000 --- a/docs/api/sfaira.models.embedding.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.models.embedding -======================= - -.. automodule:: sfaira.models.embedding - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.train.GridsearchContainer.rst b/docs/api/sfaira.train.GridsearchContainer.rst deleted file mode 100644 index ae233fecf..000000000 --- a/docs/api/sfaira.train.GridsearchContainer.rst +++ /dev/null @@ -1,37 +0,0 @@ -sfaira.train.GridsearchContainer -================================ - -.. currentmodule:: sfaira.train - -.. autoclass:: GridsearchContainer - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~GridsearchContainer.__init__ - ~GridsearchContainer.best_model_by_partition - ~GridsearchContainer.get_best_model_ids - ~GridsearchContainer.load_gs - ~GridsearchContainer.load_y - ~GridsearchContainer.plot_best_model_by_hyperparam - ~GridsearchContainer.plot_completions - ~GridsearchContainer.plot_training_history - ~GridsearchContainer.save_best_weight - ~GridsearchContainer.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~GridsearchContainer.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst deleted file mode 100644 index eeb1cb5a9..000000000 --- a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst +++ /dev/null @@ -1,43 +0,0 @@ -sfaira.train.SummarizeGridsearchCelltype -======================================== - -.. currentmodule:: sfaira.train - -.. autoclass:: SummarizeGridsearchCelltype - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SummarizeGridsearchCelltype.__init__ - ~SummarizeGridsearchCelltype.best_model_by_partition - ~SummarizeGridsearchCelltype.best_model_celltype - ~SummarizeGridsearchCelltype.create_summary_tab - ~SummarizeGridsearchCelltype.get_best_model_ids - ~SummarizeGridsearchCelltype.load_gs - ~SummarizeGridsearchCelltype.load_ontology_names - ~SummarizeGridsearchCelltype.load_y - ~SummarizeGridsearchCelltype.plot_best - ~SummarizeGridsearchCelltype.plot_best_classwise_heatmap - ~SummarizeGridsearchCelltype.plot_best_classwise_scatter - ~SummarizeGridsearchCelltype.plot_best_model_by_hyperparam - ~SummarizeGridsearchCelltype.plot_completions - ~SummarizeGridsearchCelltype.plot_training_history - ~SummarizeGridsearchCelltype.save_best_weight - ~SummarizeGridsearchCelltype.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SummarizeGridsearchCelltype.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst deleted file mode 100644 index 5edcfac79..000000000 --- a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst +++ /dev/null @@ -1,47 +0,0 @@ -sfaira.train.SummarizeGridsearchEmbedding -========================================= - -.. currentmodule:: sfaira.train - -.. autoclass:: SummarizeGridsearchEmbedding - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SummarizeGridsearchEmbedding.__init__ - ~SummarizeGridsearchEmbedding.best_model_by_partition - ~SummarizeGridsearchEmbedding.best_model_embedding_latentspace - ~SummarizeGridsearchEmbedding.create_summary_tab - ~SummarizeGridsearchEmbedding.get_best_model_ids - ~SummarizeGridsearchEmbedding.get_gradients_by_celltype - ~SummarizeGridsearchEmbedding.load_gs - ~SummarizeGridsearchEmbedding.load_y - ~SummarizeGridsearchEmbedding.plot_active_latent_units - ~SummarizeGridsearchEmbedding.plot_best - ~SummarizeGridsearchEmbedding.plot_best_model_by_hyperparam - ~SummarizeGridsearchEmbedding.plot_completions - ~SummarizeGridsearchEmbedding.plot_gradient_cor - ~SummarizeGridsearchEmbedding.plot_gradient_distr - ~SummarizeGridsearchEmbedding.plot_npc - ~SummarizeGridsearchEmbedding.plot_training_history - ~SummarizeGridsearchEmbedding.save_best_weight - ~SummarizeGridsearchEmbedding.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SummarizeGridsearchEmbedding.List - ~SummarizeGridsearchEmbedding.Union - ~SummarizeGridsearchEmbedding.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TargetZoos.rst b/docs/api/sfaira.train.TargetZoos.rst deleted file mode 100644 index bbf18cd74..000000000 --- a/docs/api/sfaira.train.TargetZoos.rst +++ /dev/null @@ -1,24 +0,0 @@ -sfaira.train.TargetZoos -======================= - -.. currentmodule:: sfaira.train - -.. autoclass:: TargetZoos - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TargetZoos.__init__ - ~TargetZoos.write_celltypes_tocsv_human - ~TargetZoos.write_celltypes_tocsv_mouse - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst deleted file mode 100644 index 79cf1acc3..000000000 --- a/docs/api/sfaira.train.TrainModelCelltype.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.train.TrainModelCelltype -=============================== - -.. currentmodule:: sfaira.train - -.. autoclass:: TrainModelCelltype - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TrainModelCelltype.__init__ - ~TrainModelCelltype.init_estim - ~TrainModelCelltype.load_into_memory - ~TrainModelCelltype.save - ~TrainModelCelltype.save_eval - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst deleted file mode 100644 index fc3dea8d6..000000000 --- a/docs/api/sfaira.train.TrainModelEmbedding.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.train.TrainModelEmbedding -================================ - -.. currentmodule:: sfaira.train - -.. autoclass:: TrainModelEmbedding - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TrainModelEmbedding.__init__ - ~TrainModelEmbedding.init_estim - ~TrainModelEmbedding.load_into_memory - ~TrainModelEmbedding.save - ~TrainModelEmbedding.save_eval - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst deleted file mode 100644 index 6b8ba31b2..000000000 --- a/docs/api/sfaira.ui.UserInterface.rst +++ /dev/null @@ -1,38 +0,0 @@ -sfaira.ui.UserInterface -======================= - -.. currentmodule:: sfaira - -.. autoclass:: ui.UserInterface - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ui.UserInterface.__init__ - ~ui.UserInterface.celltype_summary - ~ui.UserInterface.predict_all - ~ui.UserInterface.compute_all_kipoi - ~ui.UserInterface.predict_celltypes - ~ui.UserInterface.compute_celltype_kipoi - ~ui.UserInterface.compute_denoised_expression - ~ui.UserInterface.compute_denoised_expression_kipoi - ~ui.UserInterface.predict_embedding - ~ui.UserInterface.compute_embedding_kipoi - ~ui.UserInterface.deposit_zenodo - ~ui.UserInterface.filter_cells - ~ui.UserInterface.get_references - ~ui.UserInterface.load_data - ~ui.UserInterface.load_model_celltype - ~ui.UserInterface.load_model_embedding - ~ui.UserInterface.write_lookuptable - - - - - - \ No newline at end of file diff --git a/docs/commandline_interface.rst b/docs/commandline_interface.rst index c70724ebd..a995699dc 100644 --- a/docs/commandline_interface.rst +++ b/docs/commandline_interface.rst @@ -1,5 +1,5 @@ Commandline interface ---------------------- +---------------------- .. click:: sfaira.cli:sfaira_cli :prog: sfaira diff --git a/docs/consuming_data.rst b/docs/consuming_data.rst new file mode 100644 index 000000000..8037892fc --- /dev/null +++ b/docs/consuming_data.rst @@ -0,0 +1,40 @@ +Consuming Data +=============== + +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png + :width: 600px + :align: center + +Build data repository locally +------------------------------ + +Build a repository structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 1. Choose a directory to dedicate to the data base, called root in the following. + 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. + +Note that the automated download is a feature of sfaira but not the core purpose of the package: +Sfaira allows you efficiently interact with such a local data repository. +Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. + +Use 3rd party repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +One example for such an organization is the cellxgene_ data portal. +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon! +Contact us for support of any other repositories. + +.. _cellxgene: https://cellxgene.cziscience.com/ + +Genome management +----------------- + +We streamline feature spaces used by models by defining standardized gene sets that are used as model input. +Per default, sfaira works with the protein coding genes of a genome assembly right now. +A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. +As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. +Note that because protein coding genes do not change drastically between genome assemblies, +sample can be carried over to assemblies they were not aligned against by matching gene identifiers. +Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. diff --git a/docs/development.rst b/docs/faq.rst similarity index 55% rename from docs/development.rst rename to docs/faq.rst index 1d8488c31..9a3a5276f 100644 --- a/docs/development.rst +++ b/docs/faq.rst @@ -1,27 +1,19 @@ -Development -=========== +FAQ +==== -Data zoo FAQ ------------- +Data zoo +--------- -How are the meta data entries that I define in the constructor constrained or protected? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The items that are not free text are documented in the readthedocs data section, often, -these would require entries to be terms in an ontology. -If you make a mistake in defining these fields in a data loader that you contribute, -the template test data loader and any loading operation will throw an error -pointing at this meta data element. - -How is _load() used in data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`_load()` contains all processing steps that load raw data files into a ready to use adata object. -`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. +How is load() function used in data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`load()` contains all processing steps that load raw data files into a ready to use adata object. This adata object can be cached as an h5ad file named after the dataset ID for faster reloading -(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available +(if allow_caching=True), which exactly skips the code in `load()`. +`load()` can be triggered to reload from scratch even if cached data is available (if use_cached=False). How is the feature space (gene names) manipulated during data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and vice versa. Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); @@ -29,17 +21,17 @@ in this case, counts are aggregated across these features. Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. Datasets, DatasetGroups, DatasetSuperGroups - what are they? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dataset: Custom class that loads a specific dataset. DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. -Basics of sfaira lazy loading via split into constructor and _load function. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Basics of sfaira lazy loading via split into constructor and load() function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The constructor of a dataset defines all metadata associated with this data set. The loading of the actual data happens in the `load()` function and not in the constructor. This is useful as it allows initialising the datasets and accessing dataset metadata without loading the actual count data. -DatasetGroups can contain initialised Datasets and can be subsetted based on metadata +DatasetGroups can contain initialised Datasets and can be sub-setted based on metadata before loading is triggered across the entire group. diff --git a/docs/index.rst b/docs/index.rst index ac1929e77..38c4db65c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,14 +35,13 @@ Latest additions :hidden: installation - api/index + api commandline_interface tutorials - using_data adding_datasets - adding_dataset_classes + consuming_data models ecosystem roadmap - development + faq changelog diff --git a/docs/installation.rst b/docs/installation.rst index 66e4b23cf..b446eeac5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,4 +1,31 @@ Installation ============ -A clone of sfaira can be installed via pip install -e . \ No newline at end of file +sfaira is pip installable. + +PyPI +~~~~ +To install a sfaira release directly from PyPi, run:: + + pip install sfaira + + +Install a development version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +To install a specific branch `target_branch` of sfaira from a clone, run:: + + cd target_directory + git clone https://github.com/theislab/sfaira.git + cd sfaira + git checkout target_branch + git pull + pip install -e . + +In most cases, you would install one of the following: +You may choose the branch `release` if you want to use a relatively stable version +which is similar to the current release but may have additional features already. +You may choose the branch `dev` if you want newer features than available from `release`. +You may choose a specific feature branch if you want to use or improve that feature before it +is reviewed and merged into `dev`. +Note that the `master` branch only contains releases, +so every installation based on the `master` branch can also be performed via PyPi. diff --git a/docs/tutorials.rst b/docs/tutorials.rst index a644fc8bf..eefe98be5 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -1,17 +1,15 @@ Tutorials ========= -Queries to data zoo -------------------- - -We provide a tutorial for queries to the data zoo through our python API (dataloaders_) and for assembling meta data across the zoo (metadata_). - -.. _dataloaders: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/data_loaders.ipynb -.. _metadata: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/meta_data.ipynb - -Queries to model zoo --------------------- - -We provide a tutorial for interacting with our model zoo through a python API in a scanpy workflow through our `user interface`_ - -.. _user interface: https://nbviewer.jupyter.org/github.com/theislab/sfaira_tutorials/blob/master/tutorials/user_interface.ipynb +We provide multiple tutorials in separate repository_. + +* A tutorial for interacting with the data loaders via the `Universe` class (universe_). +* A tutorial for general usage of the user interface (user_interface_). +* A tutorial for zero-shot analysis with the user interface (pbmc3k_). +* A tutorial for creating meta data-based data zoo overview figure (meta_data_) + +.. _meta_data: https://github.com/theislab/sfaira_tutorials/blob/master/summaries/meta_data.ipynb +.. _pbmc3k: https://github.com/theislab/sfaira_tutorials/blob/master/tutorials/pbmc3k.ipynb +.. _repository: https://github.com/theislab/sfaira_tutorials/ +.. _universe: https://github.com/theislab/sfaira_tutorials/blob/master/tutorials/data_loaders.ipynb +.. _user_interface: https://github.com/theislab/sfaira_tutorials/blob/master/tutorials/user_interface.ipynb diff --git a/docs/using_data.rst b/docs/using_data.rst deleted file mode 100644 index 24f0a1cbb..000000000 --- a/docs/using_data.rst +++ /dev/null @@ -1,153 +0,0 @@ -Using Data -========== - -.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png - :width: 600px - :align: center - -Build data repository locally ------------------------------- - -Build a repository structure -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - 1. Choose a directory to dedicate to the data base, called root in the following. - 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. - -Note that the automated download is a feature of sfaira but not the core purpose of the package: -Sfaira allows you efficiently interact with such a local data repository. -Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. - -Use 3rd party repositories -~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. -One example for such an organization is the cellxgene_ data portal. -Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. -Data loaders for cellxgene structured data objects will be available soon! -Contact us for support of any other repositories. - -.. _cellxgene: https://cellxgene.cziscience.com/ - -Genome management ------------------ - -We streamline feature spaces used by models by defining standardized gene sets that are used as model input. -Per default, sfaira works with the protein coding genes of a genome assembly right now. -A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. -As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. -Note that because protein coding genes do not change drastically between genome assemblies, -sample can be carried over to assemblies they were not aligned against by matching gene identifiers. -Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. - -FAQ ---- - -How is the dataset’s ID structured? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi - -How do I assemble the data set ID if some of its element meta data are not unique? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The data set ID is designed to be a unique identifier of a data set. -Therefore, it is not an issue if it does not capture the full complexity of the data. -Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. - -What are cell-wise and sample-wise meta data? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Metadata can be set on a per sample level or, in some cases, per cell. -Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). -Cell-wise metadata can be provided in `.obs` of the loaded data, here, -a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels -(e.g. self.obs_key_organism). -Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. - -Which meta data objects are mandatory? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): - - - .id: Dataset ID. This is used to identify the data set uniquely. - Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - - .download_url_data: Link to data download website. - Example: self.download = "some URL" - - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not - specified. - Example: self.download_meta = "some URL" - - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var - (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. - Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” - - .author: First author of publication (or list of all authors). - self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] - - .doi: Doi of publication - Example: self.doi = "10.1016/j.cell.2019.06.029" - - .organism (or .obs_key_organism): Organism sampled. - Example: self.organism = “human” - - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture - Example: self.sample_source = "primary_tissue" - -Highly recommended: - - - .normalization: Normalization of count data: - Example: self.normalization = “raw” - - .organ (or .obs_key_organ): Organ sampled. - Example: self.organ = “liver” - - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. - Example: self.assay_sc = “10x” - -Optional (if available): - - - .age (or .obs_key_age): Age of individual sampled. - Example: self.age = 80 # (80 years old for human) - - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. - Example: self.dev_stage = “mature” - - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). - Example: self.ethnicity = “free text” - - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) - Example: self.healthy = True - - .sex (or .obs_key_sex): Sex of individual sampled. - Example: self.sex = “male” - - .state_exact (or .obs_key_state_exact): Exact disease state - self.state_exact = free text - - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. - Example: self.obs_key_cell_types_original = 'CellType' - - .year: Year of publication: - Example: self.year = 2019 - - .cell_line: Which cell line was used for the experiment (for cell culture samples) - Example: self.cell_line = "409B2 (CVCL_K092)" - - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) - - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided - (for cell culture samples) - -How do I cache data sets? -~~~~~~~~~~~~~~~~~~~~~~~~~ -When loading a dataset with `Dataset.load(),`you can specify if the adata object -should be cached or not (allow_caching= True). -If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. - -How do I add cell type annotation? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We are simplifying this right now, new instructions will be available second half of January. - -Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Initiation and data set loading are handled separately to allow lazy loading. -All steps that are required to load the count data and -additional metadata should be defined solely in the `_load` section. -Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. - -How do I tell sfaira where the gene names are? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. -If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. - -I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -No, that is not a problem. They will automatically be converted to Ensembl IDs. -You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` -to which the names should be mapped to. - -I have CITE-seq data, where can I put the protein quantification? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We will soon provide a structured interface for loading and accessing CITE-seq data, -for now you can add it into `self.adata.obsm[“CITE”]`. diff --git a/sfaira/__init__.py b/sfaira/__init__.py index 48839deaf..6a84da418 100644 --- a/sfaira/__init__.py +++ b/sfaira/__init__.py @@ -4,9 +4,9 @@ import sfaira.consts import sfaira.data import sfaira.genomes -import sfaira.interface as ui import sfaira.models import sfaira.train +import sfaira.ui import sfaira.versions from ._version import get_versions diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml index 826736b3a..3d3f45314 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2018_08_067/human_laminapropriaofmucosaofcolon_2019_10xsequencing_kinchen_001.yaml @@ -19,7 +19,7 @@ dataset_wise: primary_data: year: 2019 dataset_or_observation_wise: - assay_sc: "10x sequencing" + assay_sc: "10x technology" assay_sc_obs_key: assay_differentiation: assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py index 7f978325a..8fcc18add 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_06_029/human_colonicepithelium_2019_10xsequencing_smilie_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/smillie19_epi.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Smilie" self.disease = "healthy" self.doi = "10.1016/j.cell.2019.06.029" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py index fde1b559b..84bb3cb0f 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_cell_2019_08_008/human_ileum_2019_10xsequencing_martin_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/martin19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Martin" self.disease = "healthy" self.doi = "10.1016/j.cell.2019.08.008" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py index b16ceb030..a5ed8b6a4 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_celrep_2018_11_086/human_prostategland_2018_10xsequencing_henry_001.py @@ -18,7 +18,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/henry18_0.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Henry" self.disease = "healthy" self.doi = "10.1016/j.celrep.2018.11.086" diff --git a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py index ccd9bdca3..ea7d78a77 100644 --- a/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1016_j_devcel_2020_01_033/human_lung_2020_10xsequencing_miller_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/miller20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Miller" self.disease = "healthy" self.doi = "10.1016/j.devcel.2020.01.033" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py index c3d9af7a9..8fb1f6b59 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41422_018_0099_2/human_testis_2018_10xsequencing_guo_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/guo18_donor.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Guo" self.disease = "healthy" self.doi = "10.1038/s41422-018-0099-2" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py index aa459169b..b581a4338 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_018_06318_7/human_caudatelobeofliver_2018_10xsequencing_macparland_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "private,GSE115469.csv.gz" self.download_url_meta = "private,GSE115469_labels.txt" - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "MacParland" self.disease = "healthy" self.doi = "10.1038/s41467-018-06318-7" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py index 60448ed1f..02e672cc9 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12464_3/human_x_2019_10xsequencing_szabo_001.py @@ -58,7 +58,7 @@ def __init__(self, **kwargs): "private,donor2.annotation.txt" ] - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Szabo" self.doi = "10.1038/s41467-019-12464-3" self.individual = SAMPLE_DICT[self.sample_fn][1] diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py index cfb4a1155..d1891ed98 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41467_019_12780_8/human_retina_2019_10xsequencing_menon_001.py @@ -11,7 +11,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/menon19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Menon" self.disease = "healthy" self.doi = "10.1038/s41467-019-12780-8" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py index 84b0d677e..4afa22223 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_018_0698_6/human_placenta_2018_x_ventotormo_001.py @@ -19,7 +19,7 @@ def __init__(self, **kwargs): self.download_url_meta = f"https://www.ebi.ac.uk/arrayexpress/files/{self.sample_fn.split('.')[0]}/" \ f"{self.sample_fn}.2.zip" - self.assay_sc = "10x sequencing" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" + self.assay_sc = "10x technology" if self.sample_fn == "E-MTAB-6678.processed" else "Smart-seq2" self.author = "Ventotormo" self.disease = "healthy" self.doi = "10.1038/s41586-018-0698-6" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py index d4487a62f..1c12db6b3 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1631_3/human_liver_2019_10xsequencing_ramachandran_001.py @@ -14,7 +14,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3433/tissue.rdata" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Ramachandran" self.doi = "10.1038/s41586-019-1631-3" self.normalization = "raw" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py index d36bc6554..9c1e3efd0 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1652_y/human_liver_2019_10xsequencing_popescu_001.py @@ -11,7 +11,7 @@ def __init__(self, **kwargs): self.download_url_data = "private,fetal_liver_alladata_.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Popescu" self.disease = "healthy" self.doi = "10.1038/s41586-019-1652-y" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml index dde9db146..ced5c2cd8 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_019_1654_9/human_brain_2019_10x3v2sequencing_kanton_001.yaml @@ -14,7 +14,7 @@ dataset_wise: normalization: "raw" year: 2019 dataset_or_observation_wise: - assay_sc: "10x 3' v2 sequencing" + assay_sc: "10x 3' v2" assay_sc_obs_key: assay_differentiation: "Lancaster, 2014 (doi: 10.1038/nprot.2014.158)" assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml index ab082177f..b84ead614 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41586_020_2922_4/human_lung_2020_x_travaglini_001.yaml @@ -18,7 +18,7 @@ dataset_wise: year: 2020 dataset_or_observation_wise: assay_sc: - droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10x sequencing" + droplet_normal_lung_blood_scanpy.20200205.RC4.h5ad: "10x technology" facs_normal_lung_blood_scanpy.20200205.RC4.h5ad: "Smart-seq2" assay_sc_obs_key: assay_differentiation: diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py index 6ae1b3135..5a8d212c6 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41590_020_0602_z/human_colon_2020_10xsequencing_james_001.py @@ -13,7 +13,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/james20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "James" self.disease = "healthy" self.doi = "10.1038/s41590-020-0602-z" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py index ad68bd449..0a644c56b 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41591_019_0468_5/human_x_2019_10xsequencing_braga_x.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Braga" self.disease = "healthy" self.doi = "10.1038/s41591-019-0468-5" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py index f4c7413ba..f2c463bcb 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41593_019_0393_4/mouse_x_2019_10xsequencing_hove_001.py @@ -16,7 +16,7 @@ def __init__(self, **kwargs): self.download_url_meta = \ "https://www.brainimmuneatlas.org/data_files/toDownload/annot_fullAggr.csv" - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Hove" self.disease = "healthy" self.doi = "10.1038/s41593-019-0393-4" diff --git a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py index 8256e3e73..e1f35a862 100644 --- a/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1038_s41597_019_0351_8/human_kidney_2020_10xsequencing_liao_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE131nnn/GSE131685/suppl/GSE131685_RAW.tar" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Liao" self.disease = "healthy" self.normalization = "raw" diff --git a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py index feaf2d55b..526111fe5 100644 --- a/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1073_pnas_1914143116/human_retina_2019_10xsequencing_voigt_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/voigt19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Voigt" self.disease = "healthy" self.doi = "10.1073/pnas.1914143116" diff --git a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py index 84b179723..5d768b35a 100644 --- a/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1084_jem_20191130/human_x_2019_10xsequencing_wang_001.py @@ -21,7 +21,7 @@ def __init__(self, **kwargs): organ = self.sample_fn.split("_")[1].split(".")[0] - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Wang" self.disease = "healthy" self.doi = "10.1084/jem.20191130" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py index 1352418da..c5e946f30 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_03_13_991455/human_lung_2020_10xsequencing_lukassen_001.py @@ -18,7 +18,7 @@ def __init__(self, **kwargs): self.download_url_data = f"https://covid19.cog.sanger.ac.uk/{self.sample_fn}" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Lukassen" self.disease = "healthy" self.doi = "10.1101/2020.03.13.991455" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml index b5c0a6b3f..71ca7fafe 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml +++ b/sfaira/data/dataloaders/loaders/d10_1101_2020_10_12_335331/human_blood_2020_10x_hao_001.yaml @@ -12,7 +12,7 @@ dataset_wise: primary_data: year: 2020 dataset_or_observation_wise: - assay_sc: "10x sequencing" + assay_sc: "10x technology" assay_sc_obs_key: assay_differentiation: assay_differentiation_obs_key: diff --git a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py index 10170393a..178ec629d 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_661728/mouse_x_2019_x_pisco_x.py @@ -87,7 +87,7 @@ def __init__(self, **kwargs): self.normalization = "norm" self.organism = "mouse" self.organ = organ - self.assay_sc = "10x sequencing" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" + self.assay_sc = "10x technology" if self.sample_fn.split("-")[3] == "droplet" else "Smart-seq2" self.year = 2019 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py index 4cee92c69..c4a572f59 100644 --- a/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1101_753806/human_lungparenchyma_2020_10xsequencing_habermann_001.py @@ -24,7 +24,7 @@ def __init__(self, **kwargs): self.normalization = "raw" self.organ = "lung parenchyma" self.organism = "human" - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.year = 2020 self.sample_source = "primary_tissue" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py index 97df2730f..8bc35a905 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aat5031/human_kidney_2019_10xsequencing_stewart_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): ] self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Stewart" self.disease = "healthy" self.doi = "10.1126/science.aat5031" diff --git a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py index 07de38e1c..7aef677df 100644 --- a/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1126_science_aay3224/human_thymus_2020_10xsequencing_park_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/park20.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Park" self.disease = "healthy" self.doi = "10.1126/science.aay3224" diff --git a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py index d6e7200a9..b97a67c9e 100644 --- a/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py +++ b/sfaira/data/dataloaders/loaders/d10_1186_s13059_019_1906_x/human_x_2019_10xsequencing_madissoon_001.py @@ -33,7 +33,7 @@ def __init__(self, **kwargs): self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Madissoon" self.disease = "healthy" self.doi = "10.1186/s13059-019-1906-x" diff --git a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py index 5385e38fd..7b9501d25 100644 --- a/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py +++ b/sfaira/data/dataloaders/loaders/d10_15252_embj_2018100811/human_retina_2019_10xsequencing_lukowski_001.py @@ -14,7 +14,7 @@ def __init__(self, **kwargs): self.download_url_data = "https://covid19.cog.sanger.ac.uk/lukowski19.processed.h5ad" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Lukowski" self.disease = "healthy" self.doi = "10.15252/embj.2018100811" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py index b4bd78bc8..e2aeeb90c 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_10x_genomics/human_blood_2019_10xsequencing_10xgenomics_001.py @@ -15,7 +15,7 @@ def __init__(self, **kwargs): "http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.h5" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "10x Genomics" self.disease = "healthy" self.doi = "no_doi_10x_genomics" diff --git a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py index 2d8ad20a1..242d861f0 100644 --- a/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py +++ b/sfaira/data/dataloaders/loaders/dno_doi_regev/human_x_2018_10xsequencing_regev_001.py @@ -12,7 +12,7 @@ def __init__(self, **kwargs): "cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom" self.download_url_meta = None - self.assay_sc = "10x sequencing" + self.assay_sc = "10x technology" self.author = "Regev" self.disease = "healthy" self.doi = "no_doi_regev" diff --git a/sfaira/interface/__init__.py b/sfaira/interface/__init__.py deleted file mode 100644 index 7c96cac34..000000000 --- a/sfaira/interface/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sfaira.interface.model_zoo import ModelZoo -from sfaira.interface.user_interface import UserInterface diff --git a/sfaira/train/summaries.py b/sfaira/train/summaries.py index 5e4d72bb8..933ba3924 100644 --- a/sfaira/train/summaries.py +++ b/sfaira/train/summaries.py @@ -11,7 +11,7 @@ from sfaira.data import load_store from sfaira.data.dataloaders import Universe from sfaira.estimators import EstimatorKerasEmbedding -from sfaira.interface import ModelZoo +from sfaira.ui import ModelZoo from sfaira.versions.metadata import CelltypeUniverse, OntologyCl diff --git a/sfaira/train/train_model.py b/sfaira/train/train_model.py index 4d87b28ab..dc72b8ca9 100644 --- a/sfaira/train/train_model.py +++ b/sfaira/train/train_model.py @@ -8,7 +8,7 @@ from sfaira.consts import AdataIdsSfaira from sfaira.data import DistributedStoreBase, Universe from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding -from sfaira.interface import ModelZoo +from sfaira.ui import ModelZoo class TrainModel: diff --git a/sfaira/ui/__init__.py b/sfaira/ui/__init__.py new file mode 100644 index 000000000..2c6c9d1a2 --- /dev/null +++ b/sfaira/ui/__init__.py @@ -0,0 +1,2 @@ +from sfaira.ui.model_zoo import ModelZoo +from sfaira.ui.user_interface import UserInterface diff --git a/sfaira/interface/model_zoo.py b/sfaira/ui/model_zoo.py similarity index 100% rename from sfaira/interface/model_zoo.py rename to sfaira/ui/model_zoo.py diff --git a/sfaira/interface/user_interface.py b/sfaira/ui/user_interface.py similarity index 95% rename from sfaira/interface/user_interface.py rename to sfaira/ui/user_interface.py index b9fed1e90..96a831c7b 100644 --- a/sfaira/interface/user_interface.py +++ b/sfaira/ui/user_interface.py @@ -11,34 +11,32 @@ from sfaira.consts import AdataIdsSfaira, AdataIds from sfaira.data import DatasetInteractive from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype -from sfaira.interface.model_zoo import ModelZoo +from sfaira.ui.model_zoo import ModelZoo from sfaira.versions.topologies import TopologyContainer class UserInterface: """ This class performs data set handling and coordinates estimators for the different model types. - Example code to obtain a UMAP embedding plot of the embedding created from your data with cell-type labels: - ``` - import sfaira - import anndata - import scanpy - - # initialise your sfaira instance with a model lookuptable. - # instead of setting `custom_repo` when initialising the UI you can also use `sfaira_repo=True` to use public weights - ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) - ui.load_data(anndata.read("/path/to/file.h5ad")) # load your dataset into sfaira - ui.zoo_embedding.model_id = 'embedding_human-blood-ae-0.2-0.1_theislab' # pick desired model here - ui.zoo_celltype.model_id = 'celltype_human-blood-mlp-0.1.3-0.1_theislab' # pick desired model here - ui.load_data(anndata.read("/path/to/file.h5ad"), gene_symbol_col='index', gene_ens_col='gene_ids') # load your dataset into sfaira - ui.load_model_embedding() - ui.load_model_celltype() - ui.predict_all() - adata = ui.data.adata - scanpy.pp.neighbors(adata, use_rep="X_sfaira") - scanpy.tl.umap(adata) - scanpy.pl.umap(adata, color="celltypes_sfaira", show=True, save="UMAP_sfaira.png") - ``` + Example code to obtain a UMAP embedding plot of the embedding created from your data with cell-type labels:: + + import sfaira + import anndata + import scanpy + + # initialise your sfaira instance with a model lookuptable. + ui = sfaira.ui.UserInterface(custom_repo="/path/to/local/repo/folder/or/zenodo/repo/URL", sfaira_repo=False) + ui.zoo_embedding.model_id = 'embedding_human-blood-ae-0.2-0.1_theislab' # pick desired model here + ui.zoo_celltype.model_id = 'celltype_human-blood-mlp-0.1.3-0.1_theislab' # pick desired model here + ui.load_data(anndata.read("/path/to/file.h5ad"), gene_symbol_col='index', gene_ens_col='gene_ids') + ui.load_model_embedding() + ui.load_model_celltype() + ui.predict_all() + adata = ui.data.adata + scanpy.pp.neighbors(adata, use_rep="X_sfaira") + scanpy.tl.umap(adata) + scanpy.pl.umap(adata, color="celltypes_sfaira", show=True, save="UMAP_sfaira.png") + """ estimator_embedding: Union[EstimatorKerasEmbedding, None] diff --git a/sfaira/unit_tests/interface/test_userinterface.py b/sfaira/unit_tests/interface/test_userinterface.py index 613d016fe..69c5f1f02 100644 --- a/sfaira/unit_tests/interface/test_userinterface.py +++ b/sfaira/unit_tests/interface/test_userinterface.py @@ -2,7 +2,7 @@ import os from typing import Union -from sfaira.interface import UserInterface +from sfaira.ui import UserInterface class TestUi: @@ -10,7 +10,7 @@ class TestUi: data: np.ndarray """ - Contains functions _test* to test individual functions and attributes of the user interface class. + Contains functions _test* to test individual functions and attributes of the user ui class. TODO for everybody working on this, add one _test* function in here and add it into basic_estimator_test(). See _test_call() for an example. @@ -35,14 +35,3 @@ def _test_basic(self): """ temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), '../test_data') self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) - - def _test_kipoi(self): - """ - Test all kipoi_experimental model methods. - - - :return: - """ - temp_fn = os.path.join(str(os.path.dirname(os.path.abspath(__file__))), '../test_data') - self.ui = UserInterface(custom_repo=temp_fn, sfaira_repo=False) - self.ui.compute_embedding_kipoi() diff --git a/sfaira/unit_tests/interface/test_zoo.py b/sfaira/unit_tests/interface/test_zoo.py index 2a8f2bd30..d531bd14e 100644 --- a/sfaira/unit_tests/interface/test_zoo.py +++ b/sfaira/unit_tests/interface/test_zoo.py @@ -1,5 +1,5 @@ import os -from sfaira.interface import ModelZoo +from sfaira.ui import ModelZoo dir_data = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data") dir_meta = os.path.join(os.path.dirname(os.path.dirname(__file__)), "test_data/meta") diff --git a/sfaira/unit_tests/trainer/test_trainer.py b/sfaira/unit_tests/trainer/test_trainer.py index 1d4b0115b..e153fce2f 100644 --- a/sfaira/unit_tests/trainer/test_trainer.py +++ b/sfaira/unit_tests/trainer/test_trainer.py @@ -4,7 +4,7 @@ from typing import Union from sfaira.data import load_store -from sfaira.interface import ModelZoo +from sfaira.ui import ModelZoo from sfaira.train import TrainModelCelltype, TrainModelEmbedding from sfaira.unit_tests.utils import cached_store_writing, simulate_anndata diff --git a/sfaira/unit_tests/utils.py b/sfaira/unit_tests/utils.py index a2dc5d27b..fa040445e 100644 --- a/sfaira/unit_tests/utils.py +++ b/sfaira/unit_tests/utils.py @@ -58,6 +58,8 @@ def cached_store_writing(dir_data, dir_meta, assembly, organism: str = "mouse", """ adata_ids_sfaira = AdataIdsSfaira() store_path = os.path.join(dir_data, "store") + if not os.path.exists(store_path): + os.mkdir(store_path) ds = Universe(data_path=dir_data, meta_path=dir_meta, cache_path=dir_data) ds.subset(key=adata_ids_sfaira.organism, values=[organism]) ds.subset(key=adata_ids_sfaira.organ, values=[organ]) diff --git a/sfaira/unit_tests/versions/test_zoo.py b/sfaira/unit_tests/versions/test_zoo.py index 9204117ae..b8663214f 100644 --- a/sfaira/unit_tests/versions/test_zoo.py +++ b/sfaira/unit_tests/versions/test_zoo.py @@ -5,7 +5,7 @@ from typing import Union import unittest -from sfaira.interface.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.ui.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding class _TestZoo: diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py index 2d35786af..cc9a4132c 100644 --- a/sfaira/versions/metadata/base.py +++ b/sfaira/versions/metadata/base.py @@ -950,12 +950,12 @@ def __init__(self): ontology="efo", root_term="EFO_0010183", additional_terms={ - "microwell-seq": {"name": "microwell-seq"}, - "sci-plex": {"name": "sci-plex"} + "sci-plex": {"name": "sci-plex"}, + "sci-RNA-seq": {"name": "sci-RNA-seq"}, }, additional_edges=[ - ("EFO:0010183", "microwell-seq"), ("EFO:0010183", "sci-plex"), + ("EFO:0010183", "sci-RNA-seq"), ], ontology_cache_fn="efo.pickle" ) From 680511cb279cc8f964d9e609878d653f5f8d8eef Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Tue, 8 Jun 2021 20:02:46 +0200 Subject: [PATCH 155/161] Release v0.3.0 (#290) From 6e81f75877f2bb433ff27797e3ffdf49a5e40491 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 9 Jun 2021 12:44:21 +0200 Subject: [PATCH 156/161] Master dev merge (#298) Resolves conflicts from master -> dev --- docs/adding_dataset_classes.rst | 112 +++++++++++++ docs/api/sfaira.data.DatasetBase.rst | 106 ++++++++++++ docs/api/sfaira.data.DatasetInteractive.rst | 106 ++++++++++++ docs/api/sfaira.data.DatasetSuperGroup.rst | 55 +++++++ docs/api/sfaira.data.human.rst | 23 +++ docs/api/sfaira.data.mouse.rst | 23 +++ ...ira.genomes.ExtractFeatureListEnsemble.rst | 26 +++ docs/api/sfaira.models.celltype.rst | 23 +++ docs/api/sfaira.models.embedding.rst | 23 +++ docs/api/sfaira.train.GridsearchContainer.rst | 37 +++++ ...aira.train.SummarizeGridsearchCelltype.rst | 43 +++++ ...ira.train.SummarizeGridsearchEmbedding.rst | 47 ++++++ docs/api/sfaira.train.TargetZoos.rst | 24 +++ docs/api/sfaira.train.TrainModelCelltype.rst | 26 +++ docs/api/sfaira.train.TrainModelEmbedding.rst | 26 +++ docs/api/sfaira.ui.UserInterface.rst | 38 +++++ docs/development.rst | 45 ++++++ docs/using_data.rst | 153 ++++++++++++++++++ sfaira/unit_tests/versions/test_zoo.py | 19 +-- 19 files changed, 941 insertions(+), 14 deletions(-) create mode 100644 docs/adding_dataset_classes.rst create mode 100644 docs/api/sfaira.data.DatasetBase.rst create mode 100644 docs/api/sfaira.data.DatasetInteractive.rst create mode 100644 docs/api/sfaira.data.DatasetSuperGroup.rst create mode 100644 docs/api/sfaira.data.human.rst create mode 100644 docs/api/sfaira.data.mouse.rst create mode 100644 docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst create mode 100644 docs/api/sfaira.models.celltype.rst create mode 100644 docs/api/sfaira.models.embedding.rst create mode 100644 docs/api/sfaira.train.GridsearchContainer.rst create mode 100644 docs/api/sfaira.train.SummarizeGridsearchCelltype.rst create mode 100644 docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst create mode 100644 docs/api/sfaira.train.TargetZoos.rst create mode 100644 docs/api/sfaira.train.TrainModelCelltype.rst create mode 100644 docs/api/sfaira.train.TrainModelEmbedding.rst create mode 100644 docs/api/sfaira.ui.UserInterface.rst create mode 100644 docs/development.rst create mode 100644 docs/using_data.rst diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst new file mode 100644 index 000000000..cb499949d --- /dev/null +++ b/docs/adding_dataset_classes.rst @@ -0,0 +1,112 @@ +The class-based data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. +In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. + +1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. + +.. code-block:: python + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files + + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) + self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cell_types_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and + # directory as the python file of this data loader (see below). + + +2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. + +.. code-block:: python + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad + return adata + +In summary, a python file for a mouse lung data set could look like this: + +.. code-block:: python + + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.author = "me" + self.doi = ["my preprint", "my peer-reviewed publication"] + self.download_url_data = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.assay_sc = "smart-seq2" + self.year = "2020" + self.sample_source = "primary_tissue" + + self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) + return adata diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst new file mode 100644 index 000000000..b8b86e9c9 --- /dev/null +++ b/docs/api/sfaira.data.DatasetBase.rst @@ -0,0 +1,106 @@ +sfaira.data.DatasetBase +======================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetBase + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetBase.__init__ + ~DatasetBase.clear + ~DatasetBase.collapse_counts + ~DatasetBase.download + ~DatasetBase.load + ~DatasetBase.load_meta + ~DatasetBase.load_ontology_class_map + ~DatasetBase.project_celltypes_to_ontology + ~DatasetBase.set_dataset_id + ~DatasetBase.show_summary + ~DatasetBase.streamline_features + ~DatasetBase.streamline_metadata + ~DatasetBase.subset_cells + ~DatasetBase.write_backed + ~DatasetBase.write_distributed_store + ~DatasetBase.write_meta + ~DatasetBase.write_ontology_class_map + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetBase.additional_annotation_key + ~DatasetBase.annotated + ~DatasetBase.assay_differentiation + ~DatasetBase.assay_differentiation_obs_key + ~DatasetBase.assay_sc + ~DatasetBase.assay_sc_obs_key + ~DatasetBase.assay_type_differentiation + ~DatasetBase.assay_type_differentiation_obs_key + ~DatasetBase.author + ~DatasetBase.bio_sample + ~DatasetBase.bio_sample_obs_key + ~DatasetBase.cache_fn + ~DatasetBase.cell_line + ~DatasetBase.cell_line_obs_key + ~DatasetBase.cell_ontology_map + ~DatasetBase.cell_types_original_obs_key + ~DatasetBase.cellontology_class_obs_key + ~DatasetBase.cellontology_id_obs_key + ~DatasetBase.celltypes_universe + ~DatasetBase.citation + ~DatasetBase.data_dir + ~DatasetBase.default_embedding + ~DatasetBase.development_stage + ~DatasetBase.development_stage_obs_key + ~DatasetBase.directory_formatted_doi + ~DatasetBase.disease + ~DatasetBase.disease_obs_key + ~DatasetBase.doi + ~DatasetBase.doi_cleaned_id + ~DatasetBase.doi_main + ~DatasetBase.download_url_data + ~DatasetBase.download_url_meta + ~DatasetBase.ethnicity + ~DatasetBase.ethnicity_obs_key + ~DatasetBase.fn_ontology_class_map_tsv + ~DatasetBase.gene_id_ensembl_var_key + ~DatasetBase.gene_id_symbols_var_key + ~DatasetBase.id + ~DatasetBase.individual + ~DatasetBase.individual_obs_key + ~DatasetBase.loaded + ~DatasetBase.meta + ~DatasetBase.meta_fn + ~DatasetBase.ncells + ~DatasetBase.normalization + ~DatasetBase.ontology_celltypes + ~DatasetBase.ontology_organ + ~DatasetBase.organ + ~DatasetBase.organ_obs_key + ~DatasetBase.organism + ~DatasetBase.organism_obs_key + ~DatasetBase.primary_data + ~DatasetBase.sample_source + ~DatasetBase.sample_source_obs_key + ~DatasetBase.sex + ~DatasetBase.sex_obs_key + ~DatasetBase.source + ~DatasetBase.state_exact + ~DatasetBase.state_exact_obs_key + ~DatasetBase.tech_sample + ~DatasetBase.tech_sample_obs_key + ~DatasetBase.title + ~DatasetBase.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst new file mode 100644 index 000000000..daf81f984 --- /dev/null +++ b/docs/api/sfaira.data.DatasetInteractive.rst @@ -0,0 +1,106 @@ +sfaira.data.DatasetInteractive +============================== + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetInteractive + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetInteractive.__init__ + ~DatasetInteractive.clear + ~DatasetInteractive.collapse_counts + ~DatasetInteractive.download + ~DatasetInteractive.load + ~DatasetInteractive.load_meta + ~DatasetInteractive.load_ontology_class_map + ~DatasetInteractive.project_celltypes_to_ontology + ~DatasetInteractive.set_dataset_id + ~DatasetInteractive.show_summary + ~DatasetInteractive.streamline_features + ~DatasetInteractive.streamline_metadata + ~DatasetInteractive.subset_cells + ~DatasetInteractive.write_backed + ~DatasetInteractive.write_distributed_store + ~DatasetInteractive.write_meta + ~DatasetInteractive.write_ontology_class_map + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetInteractive.additional_annotation_key + ~DatasetInteractive.annotated + ~DatasetInteractive.assay_differentiation + ~DatasetInteractive.assay_differentiation_obs_key + ~DatasetInteractive.assay_sc + ~DatasetInteractive.assay_sc_obs_key + ~DatasetInteractive.assay_type_differentiation + ~DatasetInteractive.assay_type_differentiation_obs_key + ~DatasetInteractive.author + ~DatasetInteractive.bio_sample + ~DatasetInteractive.bio_sample_obs_key + ~DatasetInteractive.cache_fn + ~DatasetInteractive.cell_line + ~DatasetInteractive.cell_line_obs_key + ~DatasetInteractive.cell_ontology_map + ~DatasetInteractive.cell_types_original_obs_key + ~DatasetInteractive.cellontology_class_obs_key + ~DatasetInteractive.cellontology_id_obs_key + ~DatasetInteractive.celltypes_universe + ~DatasetInteractive.citation + ~DatasetInteractive.data_dir + ~DatasetInteractive.default_embedding + ~DatasetInteractive.development_stage + ~DatasetInteractive.development_stage_obs_key + ~DatasetInteractive.directory_formatted_doi + ~DatasetInteractive.disease + ~DatasetInteractive.disease_obs_key + ~DatasetInteractive.doi + ~DatasetInteractive.doi_cleaned_id + ~DatasetInteractive.doi_main + ~DatasetInteractive.download_url_data + ~DatasetInteractive.download_url_meta + ~DatasetInteractive.ethnicity + ~DatasetInteractive.ethnicity_obs_key + ~DatasetInteractive.fn_ontology_class_map_tsv + ~DatasetInteractive.gene_id_ensembl_var_key + ~DatasetInteractive.gene_id_symbols_var_key + ~DatasetInteractive.id + ~DatasetInteractive.individual + ~DatasetInteractive.individual_obs_key + ~DatasetInteractive.loaded + ~DatasetInteractive.meta + ~DatasetInteractive.meta_fn + ~DatasetInteractive.ncells + ~DatasetInteractive.normalization + ~DatasetInteractive.ontology_celltypes + ~DatasetInteractive.ontology_organ + ~DatasetInteractive.organ + ~DatasetInteractive.organ_obs_key + ~DatasetInteractive.organism + ~DatasetInteractive.organism_obs_key + ~DatasetInteractive.primary_data + ~DatasetInteractive.sample_source + ~DatasetInteractive.sample_source_obs_key + ~DatasetInteractive.sex + ~DatasetInteractive.sex_obs_key + ~DatasetInteractive.source + ~DatasetInteractive.state_exact + ~DatasetInteractive.state_exact_obs_key + ~DatasetInteractive.tech_sample + ~DatasetInteractive.tech_sample_obs_key + ~DatasetInteractive.title + ~DatasetInteractive.year + + \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst new file mode 100644 index 000000000..5bfa255c8 --- /dev/null +++ b/docs/api/sfaira.data.DatasetSuperGroup.rst @@ -0,0 +1,55 @@ +sfaira.data.DatasetSuperGroup +============================= + +.. currentmodule:: sfaira.data + +.. autoclass:: DatasetSuperGroup + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DatasetSuperGroup.__init__ + ~DatasetSuperGroup.collapse_counts + ~DatasetSuperGroup.delete_backed + ~DatasetSuperGroup.download + ~DatasetSuperGroup.extend_dataset_groups + ~DatasetSuperGroup.flatten + ~DatasetSuperGroup.get_gc + ~DatasetSuperGroup.load + ~DatasetSuperGroup.load_cached_backed + ~DatasetSuperGroup.load_config + ~DatasetSuperGroup.ncells + ~DatasetSuperGroup.ncells_bydataset + ~DatasetSuperGroup.ncells_bydataset_flat + ~DatasetSuperGroup.project_celltypes_to_ontology + ~DatasetSuperGroup.remove_duplicates + ~DatasetSuperGroup.set_dataset_groups + ~DatasetSuperGroup.show_summary + ~DatasetSuperGroup.streamline_features + ~DatasetSuperGroup.streamline_metadata + ~DatasetSuperGroup.subset + ~DatasetSuperGroup.subset_cells + ~DatasetSuperGroup.write_backed + ~DatasetSuperGroup.write_config + ~DatasetSuperGroup.write_distributed_store + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DatasetSuperGroup.adata + ~DatasetSuperGroup.adata_ls + ~DatasetSuperGroup.additional_annotation_key + ~DatasetSuperGroup.datasets + ~DatasetSuperGroup.ids + + \ No newline at end of file diff --git a/docs/api/sfaira.data.human.rst b/docs/api/sfaira.data.human.rst new file mode 100644 index 000000000..bddab197c --- /dev/null +++ b/docs/api/sfaira.data.human.rst @@ -0,0 +1,23 @@ +sfaira.data.human +================= + +.. automodule:: sfaira.data.human + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.data.mouse.rst b/docs/api/sfaira.data.mouse.rst new file mode 100644 index 000000000..af3c07453 --- /dev/null +++ b/docs/api/sfaira.data.mouse.rst @@ -0,0 +1,23 @@ +sfaira.data.mouse +================= + +.. automodule:: sfaira.data.mouse + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst new file mode 100644 index 000000000..415c01979 --- /dev/null +++ b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst @@ -0,0 +1,26 @@ +sfaira.genomes.ExtractFeatureListEnsemble +========================================= + +.. currentmodule:: sfaira.genomes + +.. autoclass:: ExtractFeatureListEnsemble + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ExtractFeatureListEnsemble.__init__ + ~ExtractFeatureListEnsemble.from_ensemble_gtf + ~ExtractFeatureListEnsemble.reduce_types + ~ExtractFeatureListEnsemble.reduce_types_protein_coding + ~ExtractFeatureListEnsemble.write_gene_table_to_csv + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.models.celltype.rst b/docs/api/sfaira.models.celltype.rst new file mode 100644 index 000000000..48b1f74e2 --- /dev/null +++ b/docs/api/sfaira.models.celltype.rst @@ -0,0 +1,23 @@ +sfaira.models.celltype +====================== + +.. automodule:: sfaira.models.celltype + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.models.embedding.rst b/docs/api/sfaira.models.embedding.rst new file mode 100644 index 000000000..2446b787d --- /dev/null +++ b/docs/api/sfaira.models.embedding.rst @@ -0,0 +1,23 @@ +sfaira.models.embedding +======================= + +.. automodule:: sfaira.models.embedding + + + + + + + + + + + + + + + + + + + diff --git a/docs/api/sfaira.train.GridsearchContainer.rst b/docs/api/sfaira.train.GridsearchContainer.rst new file mode 100644 index 000000000..ae233fecf --- /dev/null +++ b/docs/api/sfaira.train.GridsearchContainer.rst @@ -0,0 +1,37 @@ +sfaira.train.GridsearchContainer +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: GridsearchContainer + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~GridsearchContainer.__init__ + ~GridsearchContainer.best_model_by_partition + ~GridsearchContainer.get_best_model_ids + ~GridsearchContainer.load_gs + ~GridsearchContainer.load_y + ~GridsearchContainer.plot_best_model_by_hyperparam + ~GridsearchContainer.plot_completions + ~GridsearchContainer.plot_training_history + ~GridsearchContainer.save_best_weight + ~GridsearchContainer.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~GridsearchContainer.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst new file mode 100644 index 000000000..eeb1cb5a9 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst @@ -0,0 +1,43 @@ +sfaira.train.SummarizeGridsearchCelltype +======================================== + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchCelltype.__init__ + ~SummarizeGridsearchCelltype.best_model_by_partition + ~SummarizeGridsearchCelltype.best_model_celltype + ~SummarizeGridsearchCelltype.create_summary_tab + ~SummarizeGridsearchCelltype.get_best_model_ids + ~SummarizeGridsearchCelltype.load_gs + ~SummarizeGridsearchCelltype.load_ontology_names + ~SummarizeGridsearchCelltype.load_y + ~SummarizeGridsearchCelltype.plot_best + ~SummarizeGridsearchCelltype.plot_best_classwise_heatmap + ~SummarizeGridsearchCelltype.plot_best_classwise_scatter + ~SummarizeGridsearchCelltype.plot_best_model_by_hyperparam + ~SummarizeGridsearchCelltype.plot_completions + ~SummarizeGridsearchCelltype.plot_training_history + ~SummarizeGridsearchCelltype.save_best_weight + ~SummarizeGridsearchCelltype.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchCelltype.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst new file mode 100644 index 000000000..5edcfac79 --- /dev/null +++ b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst @@ -0,0 +1,47 @@ +sfaira.train.SummarizeGridsearchEmbedding +========================================= + +.. currentmodule:: sfaira.train + +.. autoclass:: SummarizeGridsearchEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.__init__ + ~SummarizeGridsearchEmbedding.best_model_by_partition + ~SummarizeGridsearchEmbedding.best_model_embedding_latentspace + ~SummarizeGridsearchEmbedding.create_summary_tab + ~SummarizeGridsearchEmbedding.get_best_model_ids + ~SummarizeGridsearchEmbedding.get_gradients_by_celltype + ~SummarizeGridsearchEmbedding.load_gs + ~SummarizeGridsearchEmbedding.load_y + ~SummarizeGridsearchEmbedding.plot_active_latent_units + ~SummarizeGridsearchEmbedding.plot_best + ~SummarizeGridsearchEmbedding.plot_best_model_by_hyperparam + ~SummarizeGridsearchEmbedding.plot_completions + ~SummarizeGridsearchEmbedding.plot_gradient_cor + ~SummarizeGridsearchEmbedding.plot_gradient_distr + ~SummarizeGridsearchEmbedding.plot_npc + ~SummarizeGridsearchEmbedding.plot_training_history + ~SummarizeGridsearchEmbedding.save_best_weight + ~SummarizeGridsearchEmbedding.write_best_hyparam + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SummarizeGridsearchEmbedding.List + ~SummarizeGridsearchEmbedding.Union + ~SummarizeGridsearchEmbedding.cv_keys + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TargetZoos.rst b/docs/api/sfaira.train.TargetZoos.rst new file mode 100644 index 000000000..bbf18cd74 --- /dev/null +++ b/docs/api/sfaira.train.TargetZoos.rst @@ -0,0 +1,24 @@ +sfaira.train.TargetZoos +======================= + +.. currentmodule:: sfaira.train + +.. autoclass:: TargetZoos + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TargetZoos.__init__ + ~TargetZoos.write_celltypes_tocsv_human + ~TargetZoos.write_celltypes_tocsv_mouse + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst new file mode 100644 index 000000000..79cf1acc3 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelCelltype.rst @@ -0,0 +1,26 @@ +sfaira.train.TrainModelCelltype +=============================== + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelCelltype + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelCelltype.__init__ + ~TrainModelCelltype.init_estim + ~TrainModelCelltype.load_into_memory + ~TrainModelCelltype.save + ~TrainModelCelltype.save_eval + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst new file mode 100644 index 000000000..fc3dea8d6 --- /dev/null +++ b/docs/api/sfaira.train.TrainModelEmbedding.rst @@ -0,0 +1,26 @@ +sfaira.train.TrainModelEmbedding +================================ + +.. currentmodule:: sfaira.train + +.. autoclass:: TrainModelEmbedding + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~TrainModelEmbedding.__init__ + ~TrainModelEmbedding.init_estim + ~TrainModelEmbedding.load_into_memory + ~TrainModelEmbedding.save + ~TrainModelEmbedding.save_eval + + + + + + \ No newline at end of file diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst new file mode 100644 index 000000000..6b8ba31b2 --- /dev/null +++ b/docs/api/sfaira.ui.UserInterface.rst @@ -0,0 +1,38 @@ +sfaira.ui.UserInterface +======================= + +.. currentmodule:: sfaira + +.. autoclass:: ui.UserInterface + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ui.UserInterface.__init__ + ~ui.UserInterface.celltype_summary + ~ui.UserInterface.predict_all + ~ui.UserInterface.compute_all_kipoi + ~ui.UserInterface.predict_celltypes + ~ui.UserInterface.compute_celltype_kipoi + ~ui.UserInterface.compute_denoised_expression + ~ui.UserInterface.compute_denoised_expression_kipoi + ~ui.UserInterface.predict_embedding + ~ui.UserInterface.compute_embedding_kipoi + ~ui.UserInterface.deposit_zenodo + ~ui.UserInterface.filter_cells + ~ui.UserInterface.get_references + ~ui.UserInterface.load_data + ~ui.UserInterface.load_model_celltype + ~ui.UserInterface.load_model_embedding + ~ui.UserInterface.write_lookuptable + + + + + + \ No newline at end of file diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000..1d8488c31 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,45 @@ +Development +=========== + +Data zoo FAQ +------------ + +How are the meta data entries that I define in the constructor constrained or protected? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The items that are not free text are documented in the readthedocs data section, often, +these would require entries to be terms in an ontology. +If you make a mistake in defining these fields in a data loader that you contribute, +the template test data loader and any loading operation will throw an error +pointing at this meta data element. + +How is _load() used in data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`_load()` contains all processing steps that load raw data files into a ready to use adata object. +`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. +This adata object can be cached as an h5ad file named after the dataset ID for faster reloading +(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available +(if use_cached=False). + +How is the feature space (gene names) manipulated during data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and +vice versa. +Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); +in this case, counts are aggregated across these features. +Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. + +Datasets, DatasetGroups, DatasetSuperGroups - what are they? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Dataset: Custom class that loads a specific dataset. +DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). +This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. +DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. + +Basics of sfaira lazy loading via split into constructor and _load function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The constructor of a dataset defines all metadata associated with this data set. +The loading of the actual data happens in the `load()` function and not in the constructor. +This is useful as it allows initialising the datasets and accessing dataset metadata +without loading the actual count data. +DatasetGroups can contain initialised Datasets and can be subsetted based on metadata +before loading is triggered across the entire group. diff --git a/docs/using_data.rst b/docs/using_data.rst new file mode 100644 index 000000000..24f0a1cbb --- /dev/null +++ b/docs/using_data.rst @@ -0,0 +1,153 @@ +Using Data +========== + +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png + :width: 600px + :align: center + +Build data repository locally +------------------------------ + +Build a repository structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 1. Choose a directory to dedicate to the data base, called root in the following. + 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. + +Note that the automated download is a feature of sfaira but not the core purpose of the package: +Sfaira allows you efficiently interact with such a local data repository. +Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. + +Use 3rd party repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +One example for such an organization is the cellxgene_ data portal. +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon! +Contact us for support of any other repositories. + +.. _cellxgene: https://cellxgene.cziscience.com/ + +Genome management +----------------- + +We streamline feature spaces used by models by defining standardized gene sets that are used as model input. +Per default, sfaira works with the protein coding genes of a genome assembly right now. +A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. +As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. +Note that because protein coding genes do not change drastically between genome assemblies, +sample can be carried over to assemblies they were not aligned against by matching gene identifiers. +Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. + +FAQ +--- + +How is the dataset’s ID structured? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi + +How do I assemble the data set ID if some of its element meta data are not unique? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The data set ID is designed to be a unique identifier of a data set. +Therefore, it is not an issue if it does not capture the full complexity of the data. +Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. + +What are cell-wise and sample-wise meta data? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Metadata can be set on a per sample level or, in some cases, per cell. +Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). +Cell-wise metadata can be provided in `.obs` of the loaded data, here, +a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels +(e.g. self.obs_key_organism). +Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. + +Which meta data objects are mandatory? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): + + - .id: Dataset ID. This is used to identify the data set uniquely. + Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" + - .download_url_data: Link to data download website. + Example: self.download = "some URL" + - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not + specified. + Example: self.download_meta = "some URL" + - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var + (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. + Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” + - .author: First author of publication (or list of all authors). + self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] + - .doi: Doi of publication + Example: self.doi = "10.1016/j.cell.2019.06.029" + - .organism (or .obs_key_organism): Organism sampled. + Example: self.organism = “human” + - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture + Example: self.sample_source = "primary_tissue" + +Highly recommended: + + - .normalization: Normalization of count data: + Example: self.normalization = “raw” + - .organ (or .obs_key_organ): Organ sampled. + Example: self.organ = “liver” + - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. + Example: self.assay_sc = “10x” + +Optional (if available): + + - .age (or .obs_key_age): Age of individual sampled. + Example: self.age = 80 # (80 years old for human) + - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. + Example: self.dev_stage = “mature” + - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). + Example: self.ethnicity = “free text” + - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) + Example: self.healthy = True + - .sex (or .obs_key_sex): Sex of individual sampled. + Example: self.sex = “male” + - .state_exact (or .obs_key_state_exact): Exact disease state + self.state_exact = free text + - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. + Example: self.obs_key_cell_types_original = 'CellType' + - .year: Year of publication: + Example: self.year = 2019 + - .cell_line: Which cell line was used for the experiment (for cell culture samples) + Example: self.cell_line = "409B2 (CVCL_K092)" + - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) + - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided + (for cell culture samples) + +How do I cache data sets? +~~~~~~~~~~~~~~~~~~~~~~~~~ +When loading a dataset with `Dataset.load(),`you can specify if the adata object +should be cached or not (allow_caching= True). +If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. + +How do I add cell type annotation? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are simplifying this right now, new instructions will be available second half of January. + +Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Initiation and data set loading are handled separately to allow lazy loading. +All steps that are required to load the count data and +additional metadata should be defined solely in the `_load` section. +Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. + +How do I tell sfaira where the gene names are? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. +If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. + +I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +No, that is not a problem. They will automatically be converted to Ensembl IDs. +You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` +to which the names should be mapped to. + +I have CITE-seq data, where can I put the protein quantification? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We will soon provide a structured interface for loading and accessing CITE-seq data, +for now you can add it into `self.adata.obsm[“CITE”]`. diff --git a/sfaira/unit_tests/versions/test_zoo.py b/sfaira/unit_tests/versions/test_zoo.py index b8663214f..831db82ec 100644 --- a/sfaira/unit_tests/versions/test_zoo.py +++ b/sfaira/unit_tests/versions/test_zoo.py @@ -5,7 +5,7 @@ from typing import Union import unittest -from sfaira.ui.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding +from sfaira.ui.model_zoo import ModelZoo class _TestZoo: @@ -38,14 +38,6 @@ def simulate(self): """ pass - def _test_kipoi_call(self): - """ - Test whether kipoi_experimental model call works. - - :return: - """ - self.zoo.call_kipoi() - def _test_basic(self, id: str): """ Test all relevant model methods. @@ -56,7 +48,6 @@ def _test_basic(self, id: str): np.random.seed(1) self.simulate() self.init_zoo() - # self._test_kipoi_call() self.zoo_manual.set_model_id(id) @@ -68,8 +59,8 @@ def init_zoo(self): os.path.join(package_dir, '../test_data', 'model_lookuptable.csv'), header=0, index_col=0 ) - self.zoo = ModelZooEmbedding(model_lookuptable=lookup_table) - self.zoo_manual = ModelZooEmbedding(model_lookuptable=None) + self.zoo = ModelZoo(model_lookuptable=lookup_table) + self.zoo_manual = ModelZoo(model_lookuptable=None) def test_basic(self): self._test_basic(id="embedding_mouse_lung_vae_theislab_0.1_0.1") @@ -86,8 +77,8 @@ def init_zoo(self): os.path.join(package_dir, '../test_data', 'model_lookuptable.csv'), header=0, index_col=0 ) - self.zoo = ModelZooCelltype(model_lookuptable=lookup_table) - self.zoo_manual = ModelZooCelltype(model_lookuptable=None) + self.zoo = ModelZoo(model_lookuptable=lookup_table) + self.zoo_manual = ModelZoo(model_lookuptable=None) def test_basic(self): self._test_basic(id="celltype_mouse_lung_mlp_theislab_0.0.1_0.1") From 4b9fc64358721a4484567617ad4cb276037efea0 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 9 Jun 2021 12:53:06 +0200 Subject: [PATCH 157/161] Dev release merge (#300) --- docs/adding_dataset_classes.rst | 112 ------------- docs/api/sfaira.data.DatasetBase.rst | 106 ------------ docs/api/sfaira.data.DatasetInteractive.rst | 106 ------------ docs/api/sfaira.data.DatasetSuperGroup.rst | 55 ------- docs/api/sfaira.data.human.rst | 23 --- docs/api/sfaira.data.mouse.rst | 23 --- ...ira.genomes.ExtractFeatureListEnsemble.rst | 26 --- docs/api/sfaira.models.celltype.rst | 23 --- docs/api/sfaira.models.embedding.rst | 23 --- docs/api/sfaira.train.GridsearchContainer.rst | 37 ----- ...aira.train.SummarizeGridsearchCelltype.rst | 43 ----- ...ira.train.SummarizeGridsearchEmbedding.rst | 47 ------ docs/api/sfaira.train.TargetZoos.rst | 24 --- docs/api/sfaira.train.TrainModelCelltype.rst | 26 --- docs/api/sfaira.train.TrainModelEmbedding.rst | 26 --- docs/api/sfaira.ui.UserInterface.rst | 38 ----- docs/development.rst | 45 ------ docs/using_data.rst | 153 ------------------ 18 files changed, 936 deletions(-) delete mode 100644 docs/adding_dataset_classes.rst delete mode 100644 docs/api/sfaira.data.DatasetBase.rst delete mode 100644 docs/api/sfaira.data.DatasetInteractive.rst delete mode 100644 docs/api/sfaira.data.DatasetSuperGroup.rst delete mode 100644 docs/api/sfaira.data.human.rst delete mode 100644 docs/api/sfaira.data.mouse.rst delete mode 100644 docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst delete mode 100644 docs/api/sfaira.models.celltype.rst delete mode 100644 docs/api/sfaira.models.embedding.rst delete mode 100644 docs/api/sfaira.train.GridsearchContainer.rst delete mode 100644 docs/api/sfaira.train.SummarizeGridsearchCelltype.rst delete mode 100644 docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst delete mode 100644 docs/api/sfaira.train.TargetZoos.rst delete mode 100644 docs/api/sfaira.train.TrainModelCelltype.rst delete mode 100644 docs/api/sfaira.train.TrainModelEmbedding.rst delete mode 100644 docs/api/sfaira.ui.UserInterface.rst delete mode 100644 docs/development.rst delete mode 100644 docs/using_data.rst diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst deleted file mode 100644 index cb499949d..000000000 --- a/docs/adding_dataset_classes.rst +++ /dev/null @@ -1,112 +0,0 @@ -The class-based data loader python file -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. -In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. - -1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. - -.. code-block:: python - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # Data set meta data: You do not have to include all of these and can simply skip lines corresponding - # to attritbutes that you do not have access to. These are meta data on a sample level. - # The meta data attributes labeled with (*) may als be supplied per cell, see below, - # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - - self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). - - self.author = x # author (list) who sampled / created the data set - self.doi = x # doi of data set accompanying manuscript - - self.download_url_data = x # download website(s) of data files - self.download_url_meta = x # download website(s) of meta data files - - self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - self.cell_line = x # (*, optional) cell line used (for cell culture samples) - self.dev_stage = x # (*, optional) developmental stage of organism - self.ethnicity = x # (*, optional) ethnicity of sample - self.healthy = x # (*, optional) whether sample represents a healthy organism - self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - self.organ = x # (*, optional) organ (anatomical structure) - self.organism = x # (*) species / organism - self.sample_source = x # (*) whether the sample came from primary tissue or cell culture - self.sex = x # (*, optional) sex - self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - self.year = x # year in which sample was acquired - - # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the - # anndata instance (specifically in .obs) after loading. - # You need to make sure this is loaded in the loading script)! - # See above for a description what these meta data attributes mean. - # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) - self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cell_types_original = x # (optional) - # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and - # directory as the python file of this data loader (see below). - - -2. A function called to load the data set into memory: -It is important to set an automated path indicating the location of the raw files here. -Our recommendation for this directory set-up is that you define a directory folder in your directory structure -in which all of these raw files will be (self.path) and then add a sub-directory named as -`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files -directly into this sub directory. - -.. code-block:: python - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad - return adata - -In summary, a python file for a mouse lung data set could look like this: - -.. code-block:: python - - class MyDataset(DatasetBase) - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.author = "me" - self.doi = ["my preprint", "my peer-reviewed publication"] - self.download_url_data = "my GEO upload" - self.normalisation = "raw" # because I uploaded raw counts, which is good practice! - self.organ = "lung" - self.organism = "mouse" - self.assay_sc = "smart-seq2" - self.year = "2020" - self.sample_source = "primary_tissue" - - self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) - return adata diff --git a/docs/api/sfaira.data.DatasetBase.rst b/docs/api/sfaira.data.DatasetBase.rst deleted file mode 100644 index b8b86e9c9..000000000 --- a/docs/api/sfaira.data.DatasetBase.rst +++ /dev/null @@ -1,106 +0,0 @@ -sfaira.data.DatasetBase -======================= - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetBase - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetBase.__init__ - ~DatasetBase.clear - ~DatasetBase.collapse_counts - ~DatasetBase.download - ~DatasetBase.load - ~DatasetBase.load_meta - ~DatasetBase.load_ontology_class_map - ~DatasetBase.project_celltypes_to_ontology - ~DatasetBase.set_dataset_id - ~DatasetBase.show_summary - ~DatasetBase.streamline_features - ~DatasetBase.streamline_metadata - ~DatasetBase.subset_cells - ~DatasetBase.write_backed - ~DatasetBase.write_distributed_store - ~DatasetBase.write_meta - ~DatasetBase.write_ontology_class_map - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetBase.additional_annotation_key - ~DatasetBase.annotated - ~DatasetBase.assay_differentiation - ~DatasetBase.assay_differentiation_obs_key - ~DatasetBase.assay_sc - ~DatasetBase.assay_sc_obs_key - ~DatasetBase.assay_type_differentiation - ~DatasetBase.assay_type_differentiation_obs_key - ~DatasetBase.author - ~DatasetBase.bio_sample - ~DatasetBase.bio_sample_obs_key - ~DatasetBase.cache_fn - ~DatasetBase.cell_line - ~DatasetBase.cell_line_obs_key - ~DatasetBase.cell_ontology_map - ~DatasetBase.cell_types_original_obs_key - ~DatasetBase.cellontology_class_obs_key - ~DatasetBase.cellontology_id_obs_key - ~DatasetBase.celltypes_universe - ~DatasetBase.citation - ~DatasetBase.data_dir - ~DatasetBase.default_embedding - ~DatasetBase.development_stage - ~DatasetBase.development_stage_obs_key - ~DatasetBase.directory_formatted_doi - ~DatasetBase.disease - ~DatasetBase.disease_obs_key - ~DatasetBase.doi - ~DatasetBase.doi_cleaned_id - ~DatasetBase.doi_main - ~DatasetBase.download_url_data - ~DatasetBase.download_url_meta - ~DatasetBase.ethnicity - ~DatasetBase.ethnicity_obs_key - ~DatasetBase.fn_ontology_class_map_tsv - ~DatasetBase.gene_id_ensembl_var_key - ~DatasetBase.gene_id_symbols_var_key - ~DatasetBase.id - ~DatasetBase.individual - ~DatasetBase.individual_obs_key - ~DatasetBase.loaded - ~DatasetBase.meta - ~DatasetBase.meta_fn - ~DatasetBase.ncells - ~DatasetBase.normalization - ~DatasetBase.ontology_celltypes - ~DatasetBase.ontology_organ - ~DatasetBase.organ - ~DatasetBase.organ_obs_key - ~DatasetBase.organism - ~DatasetBase.organism_obs_key - ~DatasetBase.primary_data - ~DatasetBase.sample_source - ~DatasetBase.sample_source_obs_key - ~DatasetBase.sex - ~DatasetBase.sex_obs_key - ~DatasetBase.source - ~DatasetBase.state_exact - ~DatasetBase.state_exact_obs_key - ~DatasetBase.tech_sample - ~DatasetBase.tech_sample_obs_key - ~DatasetBase.title - ~DatasetBase.year - - \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetInteractive.rst b/docs/api/sfaira.data.DatasetInteractive.rst deleted file mode 100644 index daf81f984..000000000 --- a/docs/api/sfaira.data.DatasetInteractive.rst +++ /dev/null @@ -1,106 +0,0 @@ -sfaira.data.DatasetInteractive -============================== - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetInteractive - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetInteractive.__init__ - ~DatasetInteractive.clear - ~DatasetInteractive.collapse_counts - ~DatasetInteractive.download - ~DatasetInteractive.load - ~DatasetInteractive.load_meta - ~DatasetInteractive.load_ontology_class_map - ~DatasetInteractive.project_celltypes_to_ontology - ~DatasetInteractive.set_dataset_id - ~DatasetInteractive.show_summary - ~DatasetInteractive.streamline_features - ~DatasetInteractive.streamline_metadata - ~DatasetInteractive.subset_cells - ~DatasetInteractive.write_backed - ~DatasetInteractive.write_distributed_store - ~DatasetInteractive.write_meta - ~DatasetInteractive.write_ontology_class_map - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetInteractive.additional_annotation_key - ~DatasetInteractive.annotated - ~DatasetInteractive.assay_differentiation - ~DatasetInteractive.assay_differentiation_obs_key - ~DatasetInteractive.assay_sc - ~DatasetInteractive.assay_sc_obs_key - ~DatasetInteractive.assay_type_differentiation - ~DatasetInteractive.assay_type_differentiation_obs_key - ~DatasetInteractive.author - ~DatasetInteractive.bio_sample - ~DatasetInteractive.bio_sample_obs_key - ~DatasetInteractive.cache_fn - ~DatasetInteractive.cell_line - ~DatasetInteractive.cell_line_obs_key - ~DatasetInteractive.cell_ontology_map - ~DatasetInteractive.cell_types_original_obs_key - ~DatasetInteractive.cellontology_class_obs_key - ~DatasetInteractive.cellontology_id_obs_key - ~DatasetInteractive.celltypes_universe - ~DatasetInteractive.citation - ~DatasetInteractive.data_dir - ~DatasetInteractive.default_embedding - ~DatasetInteractive.development_stage - ~DatasetInteractive.development_stage_obs_key - ~DatasetInteractive.directory_formatted_doi - ~DatasetInteractive.disease - ~DatasetInteractive.disease_obs_key - ~DatasetInteractive.doi - ~DatasetInteractive.doi_cleaned_id - ~DatasetInteractive.doi_main - ~DatasetInteractive.download_url_data - ~DatasetInteractive.download_url_meta - ~DatasetInteractive.ethnicity - ~DatasetInteractive.ethnicity_obs_key - ~DatasetInteractive.fn_ontology_class_map_tsv - ~DatasetInteractive.gene_id_ensembl_var_key - ~DatasetInteractive.gene_id_symbols_var_key - ~DatasetInteractive.id - ~DatasetInteractive.individual - ~DatasetInteractive.individual_obs_key - ~DatasetInteractive.loaded - ~DatasetInteractive.meta - ~DatasetInteractive.meta_fn - ~DatasetInteractive.ncells - ~DatasetInteractive.normalization - ~DatasetInteractive.ontology_celltypes - ~DatasetInteractive.ontology_organ - ~DatasetInteractive.organ - ~DatasetInteractive.organ_obs_key - ~DatasetInteractive.organism - ~DatasetInteractive.organism_obs_key - ~DatasetInteractive.primary_data - ~DatasetInteractive.sample_source - ~DatasetInteractive.sample_source_obs_key - ~DatasetInteractive.sex - ~DatasetInteractive.sex_obs_key - ~DatasetInteractive.source - ~DatasetInteractive.state_exact - ~DatasetInteractive.state_exact_obs_key - ~DatasetInteractive.tech_sample - ~DatasetInteractive.tech_sample_obs_key - ~DatasetInteractive.title - ~DatasetInteractive.year - - \ No newline at end of file diff --git a/docs/api/sfaira.data.DatasetSuperGroup.rst b/docs/api/sfaira.data.DatasetSuperGroup.rst deleted file mode 100644 index 5bfa255c8..000000000 --- a/docs/api/sfaira.data.DatasetSuperGroup.rst +++ /dev/null @@ -1,55 +0,0 @@ -sfaira.data.DatasetSuperGroup -============================= - -.. currentmodule:: sfaira.data - -.. autoclass:: DatasetSuperGroup - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DatasetSuperGroup.__init__ - ~DatasetSuperGroup.collapse_counts - ~DatasetSuperGroup.delete_backed - ~DatasetSuperGroup.download - ~DatasetSuperGroup.extend_dataset_groups - ~DatasetSuperGroup.flatten - ~DatasetSuperGroup.get_gc - ~DatasetSuperGroup.load - ~DatasetSuperGroup.load_cached_backed - ~DatasetSuperGroup.load_config - ~DatasetSuperGroup.ncells - ~DatasetSuperGroup.ncells_bydataset - ~DatasetSuperGroup.ncells_bydataset_flat - ~DatasetSuperGroup.project_celltypes_to_ontology - ~DatasetSuperGroup.remove_duplicates - ~DatasetSuperGroup.set_dataset_groups - ~DatasetSuperGroup.show_summary - ~DatasetSuperGroup.streamline_features - ~DatasetSuperGroup.streamline_metadata - ~DatasetSuperGroup.subset - ~DatasetSuperGroup.subset_cells - ~DatasetSuperGroup.write_backed - ~DatasetSuperGroup.write_config - ~DatasetSuperGroup.write_distributed_store - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DatasetSuperGroup.adata - ~DatasetSuperGroup.adata_ls - ~DatasetSuperGroup.additional_annotation_key - ~DatasetSuperGroup.datasets - ~DatasetSuperGroup.ids - - \ No newline at end of file diff --git a/docs/api/sfaira.data.human.rst b/docs/api/sfaira.data.human.rst deleted file mode 100644 index bddab197c..000000000 --- a/docs/api/sfaira.data.human.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.data.human -================= - -.. automodule:: sfaira.data.human - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.data.mouse.rst b/docs/api/sfaira.data.mouse.rst deleted file mode 100644 index af3c07453..000000000 --- a/docs/api/sfaira.data.mouse.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.data.mouse -================= - -.. automodule:: sfaira.data.mouse - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst b/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst deleted file mode 100644 index 415c01979..000000000 --- a/docs/api/sfaira.genomes.ExtractFeatureListEnsemble.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.genomes.ExtractFeatureListEnsemble -========================================= - -.. currentmodule:: sfaira.genomes - -.. autoclass:: ExtractFeatureListEnsemble - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ExtractFeatureListEnsemble.__init__ - ~ExtractFeatureListEnsemble.from_ensemble_gtf - ~ExtractFeatureListEnsemble.reduce_types - ~ExtractFeatureListEnsemble.reduce_types_protein_coding - ~ExtractFeatureListEnsemble.write_gene_table_to_csv - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.models.celltype.rst b/docs/api/sfaira.models.celltype.rst deleted file mode 100644 index 48b1f74e2..000000000 --- a/docs/api/sfaira.models.celltype.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.models.celltype -====================== - -.. automodule:: sfaira.models.celltype - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.models.embedding.rst b/docs/api/sfaira.models.embedding.rst deleted file mode 100644 index 2446b787d..000000000 --- a/docs/api/sfaira.models.embedding.rst +++ /dev/null @@ -1,23 +0,0 @@ -sfaira.models.embedding -======================= - -.. automodule:: sfaira.models.embedding - - - - - - - - - - - - - - - - - - - diff --git a/docs/api/sfaira.train.GridsearchContainer.rst b/docs/api/sfaira.train.GridsearchContainer.rst deleted file mode 100644 index ae233fecf..000000000 --- a/docs/api/sfaira.train.GridsearchContainer.rst +++ /dev/null @@ -1,37 +0,0 @@ -sfaira.train.GridsearchContainer -================================ - -.. currentmodule:: sfaira.train - -.. autoclass:: GridsearchContainer - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~GridsearchContainer.__init__ - ~GridsearchContainer.best_model_by_partition - ~GridsearchContainer.get_best_model_ids - ~GridsearchContainer.load_gs - ~GridsearchContainer.load_y - ~GridsearchContainer.plot_best_model_by_hyperparam - ~GridsearchContainer.plot_completions - ~GridsearchContainer.plot_training_history - ~GridsearchContainer.save_best_weight - ~GridsearchContainer.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~GridsearchContainer.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst b/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst deleted file mode 100644 index eeb1cb5a9..000000000 --- a/docs/api/sfaira.train.SummarizeGridsearchCelltype.rst +++ /dev/null @@ -1,43 +0,0 @@ -sfaira.train.SummarizeGridsearchCelltype -======================================== - -.. currentmodule:: sfaira.train - -.. autoclass:: SummarizeGridsearchCelltype - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SummarizeGridsearchCelltype.__init__ - ~SummarizeGridsearchCelltype.best_model_by_partition - ~SummarizeGridsearchCelltype.best_model_celltype - ~SummarizeGridsearchCelltype.create_summary_tab - ~SummarizeGridsearchCelltype.get_best_model_ids - ~SummarizeGridsearchCelltype.load_gs - ~SummarizeGridsearchCelltype.load_ontology_names - ~SummarizeGridsearchCelltype.load_y - ~SummarizeGridsearchCelltype.plot_best - ~SummarizeGridsearchCelltype.plot_best_classwise_heatmap - ~SummarizeGridsearchCelltype.plot_best_classwise_scatter - ~SummarizeGridsearchCelltype.plot_best_model_by_hyperparam - ~SummarizeGridsearchCelltype.plot_completions - ~SummarizeGridsearchCelltype.plot_training_history - ~SummarizeGridsearchCelltype.save_best_weight - ~SummarizeGridsearchCelltype.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SummarizeGridsearchCelltype.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst b/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst deleted file mode 100644 index 5edcfac79..000000000 --- a/docs/api/sfaira.train.SummarizeGridsearchEmbedding.rst +++ /dev/null @@ -1,47 +0,0 @@ -sfaira.train.SummarizeGridsearchEmbedding -========================================= - -.. currentmodule:: sfaira.train - -.. autoclass:: SummarizeGridsearchEmbedding - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SummarizeGridsearchEmbedding.__init__ - ~SummarizeGridsearchEmbedding.best_model_by_partition - ~SummarizeGridsearchEmbedding.best_model_embedding_latentspace - ~SummarizeGridsearchEmbedding.create_summary_tab - ~SummarizeGridsearchEmbedding.get_best_model_ids - ~SummarizeGridsearchEmbedding.get_gradients_by_celltype - ~SummarizeGridsearchEmbedding.load_gs - ~SummarizeGridsearchEmbedding.load_y - ~SummarizeGridsearchEmbedding.plot_active_latent_units - ~SummarizeGridsearchEmbedding.plot_best - ~SummarizeGridsearchEmbedding.plot_best_model_by_hyperparam - ~SummarizeGridsearchEmbedding.plot_completions - ~SummarizeGridsearchEmbedding.plot_gradient_cor - ~SummarizeGridsearchEmbedding.plot_gradient_distr - ~SummarizeGridsearchEmbedding.plot_npc - ~SummarizeGridsearchEmbedding.plot_training_history - ~SummarizeGridsearchEmbedding.save_best_weight - ~SummarizeGridsearchEmbedding.write_best_hyparam - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~SummarizeGridsearchEmbedding.List - ~SummarizeGridsearchEmbedding.Union - ~SummarizeGridsearchEmbedding.cv_keys - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TargetZoos.rst b/docs/api/sfaira.train.TargetZoos.rst deleted file mode 100644 index bbf18cd74..000000000 --- a/docs/api/sfaira.train.TargetZoos.rst +++ /dev/null @@ -1,24 +0,0 @@ -sfaira.train.TargetZoos -======================= - -.. currentmodule:: sfaira.train - -.. autoclass:: TargetZoos - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TargetZoos.__init__ - ~TargetZoos.write_celltypes_tocsv_human - ~TargetZoos.write_celltypes_tocsv_mouse - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelCelltype.rst b/docs/api/sfaira.train.TrainModelCelltype.rst deleted file mode 100644 index 79cf1acc3..000000000 --- a/docs/api/sfaira.train.TrainModelCelltype.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.train.TrainModelCelltype -=============================== - -.. currentmodule:: sfaira.train - -.. autoclass:: TrainModelCelltype - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TrainModelCelltype.__init__ - ~TrainModelCelltype.init_estim - ~TrainModelCelltype.load_into_memory - ~TrainModelCelltype.save - ~TrainModelCelltype.save_eval - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.train.TrainModelEmbedding.rst b/docs/api/sfaira.train.TrainModelEmbedding.rst deleted file mode 100644 index fc3dea8d6..000000000 --- a/docs/api/sfaira.train.TrainModelEmbedding.rst +++ /dev/null @@ -1,26 +0,0 @@ -sfaira.train.TrainModelEmbedding -================================ - -.. currentmodule:: sfaira.train - -.. autoclass:: TrainModelEmbedding - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~TrainModelEmbedding.__init__ - ~TrainModelEmbedding.init_estim - ~TrainModelEmbedding.load_into_memory - ~TrainModelEmbedding.save - ~TrainModelEmbedding.save_eval - - - - - - \ No newline at end of file diff --git a/docs/api/sfaira.ui.UserInterface.rst b/docs/api/sfaira.ui.UserInterface.rst deleted file mode 100644 index 6b8ba31b2..000000000 --- a/docs/api/sfaira.ui.UserInterface.rst +++ /dev/null @@ -1,38 +0,0 @@ -sfaira.ui.UserInterface -======================= - -.. currentmodule:: sfaira - -.. autoclass:: ui.UserInterface - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ui.UserInterface.__init__ - ~ui.UserInterface.celltype_summary - ~ui.UserInterface.predict_all - ~ui.UserInterface.compute_all_kipoi - ~ui.UserInterface.predict_celltypes - ~ui.UserInterface.compute_celltype_kipoi - ~ui.UserInterface.compute_denoised_expression - ~ui.UserInterface.compute_denoised_expression_kipoi - ~ui.UserInterface.predict_embedding - ~ui.UserInterface.compute_embedding_kipoi - ~ui.UserInterface.deposit_zenodo - ~ui.UserInterface.filter_cells - ~ui.UserInterface.get_references - ~ui.UserInterface.load_data - ~ui.UserInterface.load_model_celltype - ~ui.UserInterface.load_model_embedding - ~ui.UserInterface.write_lookuptable - - - - - - \ No newline at end of file diff --git a/docs/development.rst b/docs/development.rst deleted file mode 100644 index 1d8488c31..000000000 --- a/docs/development.rst +++ /dev/null @@ -1,45 +0,0 @@ -Development -=========== - -Data zoo FAQ ------------- - -How are the meta data entries that I define in the constructor constrained or protected? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The items that are not free text are documented in the readthedocs data section, often, -these would require entries to be terms in an ontology. -If you make a mistake in defining these fields in a data loader that you contribute, -the template test data loader and any loading operation will throw an error -pointing at this meta data element. - -How is _load() used in data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`_load()` contains all processing steps that load raw data files into a ready to use adata object. -`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. -This adata object can be cached as an h5ad file named after the dataset ID for faster reloading -(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available -(if use_cached=False). - -How is the feature space (gene names) manipulated during data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and -vice versa. -Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); -in this case, counts are aggregated across these features. -Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. - -Datasets, DatasetGroups, DatasetSuperGroups - what are they? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Dataset: Custom class that loads a specific dataset. -DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). -This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. -DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. - -Basics of sfaira lazy loading via split into constructor and _load function. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The constructor of a dataset defines all metadata associated with this data set. -The loading of the actual data happens in the `load()` function and not in the constructor. -This is useful as it allows initialising the datasets and accessing dataset metadata -without loading the actual count data. -DatasetGroups can contain initialised Datasets and can be subsetted based on metadata -before loading is triggered across the entire group. diff --git a/docs/using_data.rst b/docs/using_data.rst deleted file mode 100644 index 24f0a1cbb..000000000 --- a/docs/using_data.rst +++ /dev/null @@ -1,153 +0,0 @@ -Using Data -========== - -.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png - :width: 600px - :align: center - -Build data repository locally ------------------------------- - -Build a repository structure -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - 1. Choose a directory to dedicate to the data base, called root in the following. - 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. - -Note that the automated download is a feature of sfaira but not the core purpose of the package: -Sfaira allows you efficiently interact with such a local data repository. -Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. - -Use 3rd party repositories -~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. -One example for such an organization is the cellxgene_ data portal. -Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. -Data loaders for cellxgene structured data objects will be available soon! -Contact us for support of any other repositories. - -.. _cellxgene: https://cellxgene.cziscience.com/ - -Genome management ------------------ - -We streamline feature spaces used by models by defining standardized gene sets that are used as model input. -Per default, sfaira works with the protein coding genes of a genome assembly right now. -A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. -As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. -Note that because protein coding genes do not change drastically between genome assemblies, -sample can be carried over to assemblies they were not aligned against by matching gene identifiers. -Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. - -FAQ ---- - -How is the dataset’s ID structured? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi - -How do I assemble the data set ID if some of its element meta data are not unique? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The data set ID is designed to be a unique identifier of a data set. -Therefore, it is not an issue if it does not capture the full complexity of the data. -Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. - -What are cell-wise and sample-wise meta data? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Metadata can be set on a per sample level or, in some cases, per cell. -Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). -Cell-wise metadata can be provided in `.obs` of the loaded data, here, -a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels -(e.g. self.obs_key_organism). -Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. - -Which meta data objects are mandatory? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): - - - .id: Dataset ID. This is used to identify the data set uniquely. - Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - - .download_url_data: Link to data download website. - Example: self.download = "some URL" - - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not - specified. - Example: self.download_meta = "some URL" - - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var - (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. - Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” - - .author: First author of publication (or list of all authors). - self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] - - .doi: Doi of publication - Example: self.doi = "10.1016/j.cell.2019.06.029" - - .organism (or .obs_key_organism): Organism sampled. - Example: self.organism = “human” - - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture - Example: self.sample_source = "primary_tissue" - -Highly recommended: - - - .normalization: Normalization of count data: - Example: self.normalization = “raw” - - .organ (or .obs_key_organ): Organ sampled. - Example: self.organ = “liver” - - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. - Example: self.assay_sc = “10x” - -Optional (if available): - - - .age (or .obs_key_age): Age of individual sampled. - Example: self.age = 80 # (80 years old for human) - - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. - Example: self.dev_stage = “mature” - - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). - Example: self.ethnicity = “free text” - - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) - Example: self.healthy = True - - .sex (or .obs_key_sex): Sex of individual sampled. - Example: self.sex = “male” - - .state_exact (or .obs_key_state_exact): Exact disease state - self.state_exact = free text - - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. - Example: self.obs_key_cell_types_original = 'CellType' - - .year: Year of publication: - Example: self.year = 2019 - - .cell_line: Which cell line was used for the experiment (for cell culture samples) - Example: self.cell_line = "409B2 (CVCL_K092)" - - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) - - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided - (for cell culture samples) - -How do I cache data sets? -~~~~~~~~~~~~~~~~~~~~~~~~~ -When loading a dataset with `Dataset.load(),`you can specify if the adata object -should be cached or not (allow_caching= True). -If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. - -How do I add cell type annotation? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We are simplifying this right now, new instructions will be available second half of January. - -Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Initiation and data set loading are handled separately to allow lazy loading. -All steps that are required to load the count data and -additional metadata should be defined solely in the `_load` section. -Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. - -How do I tell sfaira where the gene names are? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. -If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. - -I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -No, that is not a problem. They will automatically be converted to Ensembl IDs. -You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` -to which the names should be mapped to. - -I have CITE-seq data, where can I put the protein quantification? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We will soon provide a structured interface for loading and accessing CITE-seq data, -for now you can add it into `self.adata.obsm[“CITE”]`. From 7a2d677b54e9e2e1b6072f724d5d3e5582ee2c3e Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 9 Jun 2021 13:03:07 +0200 Subject: [PATCH 158/161] Dev release merge (#301) --- sfaira/unit_tests/versions/test_zoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sfaira/unit_tests/versions/test_zoo.py b/sfaira/unit_tests/versions/test_zoo.py index 831db82ec..7ab6d821d 100644 --- a/sfaira/unit_tests/versions/test_zoo.py +++ b/sfaira/unit_tests/versions/test_zoo.py @@ -5,7 +5,7 @@ from typing import Union import unittest -from sfaira.ui.model_zoo import ModelZoo +from sfaira.ui.model_zoo import ModelZoo, ModelZooCelltype, ModelZooEmbedding class _TestZoo: From 3e2e7976c9e086ed58c90a97c733eda824fb15c7 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 9 Jun 2021 13:11:34 +0200 Subject: [PATCH 159/161] renamed ui test folder (#302) --- sfaira/unit_tests/{interface => ui}/__init__.py | 0 sfaira/unit_tests/{interface => ui}/test_userinterface.py | 0 sfaira/unit_tests/{interface => ui}/test_zoo.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename sfaira/unit_tests/{interface => ui}/__init__.py (100%) rename sfaira/unit_tests/{interface => ui}/test_userinterface.py (100%) rename sfaira/unit_tests/{interface => ui}/test_zoo.py (100%) diff --git a/sfaira/unit_tests/interface/__init__.py b/sfaira/unit_tests/ui/__init__.py similarity index 100% rename from sfaira/unit_tests/interface/__init__.py rename to sfaira/unit_tests/ui/__init__.py diff --git a/sfaira/unit_tests/interface/test_userinterface.py b/sfaira/unit_tests/ui/test_userinterface.py similarity index 100% rename from sfaira/unit_tests/interface/test_userinterface.py rename to sfaira/unit_tests/ui/test_userinterface.py diff --git a/sfaira/unit_tests/interface/test_zoo.py b/sfaira/unit_tests/ui/test_zoo.py similarity index 100% rename from sfaira/unit_tests/interface/test_zoo.py rename to sfaira/unit_tests/ui/test_zoo.py From 2ab8d9bf28faad8d71b3f4272c8f9b6c3aabeed5 Mon Sep 17 00:00:00 2001 From: "David S. Fischer" Date: Wed, 9 Jun 2021 13:41:34 +0200 Subject: [PATCH 160/161] release v0.3.1 (#297) --- docs/adding_dataset_classes.rst | 112 +++++++++++++++++++++++ docs/development.rst | 45 ++++++++++ docs/using_data.rst | 153 ++++++++++++++++++++++++++++++++ 3 files changed, 310 insertions(+) create mode 100644 docs/adding_dataset_classes.rst create mode 100644 docs/development.rst create mode 100644 docs/using_data.rst diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst new file mode 100644 index 000000000..cb499949d --- /dev/null +++ b/docs/adding_dataset_classes.rst @@ -0,0 +1,112 @@ +The class-based data loader python file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. +In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. + +1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. + +.. code-block:: python + + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + # Data set meta data: You do not have to include all of these and can simply skip lines corresponding + # to attritbutes that you do not have access to. These are meta data on a sample level. + # The meta data attributes labeled with (*) may als be supplied per cell, see below, + # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. + + self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). + + self.author = x # author (list) who sampled / created the data set + self.doi = x # doi of data set accompanying manuscript + + self.download_url_data = x # download website(s) of data files + self.download_url_meta = x # download website(s) of meta data files + + self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) + self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) + self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) + self.cell_line = x # (*, optional) cell line used (for cell culture samples) + self.dev_stage = x # (*, optional) developmental stage of organism + self.ethnicity = x # (*, optional) ethnicity of sample + self.healthy = x # (*, optional) whether sample represents a healthy organism + self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") + self.organ = x # (*, optional) organ (anatomical structure) + self.organism = x # (*) species / organism + self.sample_source = x # (*) whether the sample came from primary tissue or cell culture + self.sex = x # (*, optional) sex + self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample + self.year = x # year in which sample was acquired + + # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the + # anndata instance (specifically in .obs) after loading. + # You need to make sure this is loaded in the loading script)! + # See above for a description what these meta data attributes mean. + # Again, if these attributes are note available, you can simply leave this out. + self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) + self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) + self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) + self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) + self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) + self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) + self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) + self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) + self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) + self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) + self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) + self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) + # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. + # name of column which contain streamlined cell ontology cell type classes: + self.obs_key_cell_types_original = x # (optional) + # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and + # directory as the python file of this data loader (see below). + + +2. A function called to load the data set into memory: +It is important to set an automated path indicating the location of the raw files here. +Our recommendation for this directory set-up is that you define a directory folder in your directory structure +in which all of these raw files will be (self.path) and then add a sub-directory named as +`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files +directly into this sub directory. + +.. code-block:: python + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad + return adata + +In summary, a python file for a mouse lung data set could look like this: + +.. code-block:: python + + class MyDataset(DatasetBase) + def __init__( + self, + path: Union[str, None] = None, + meta_path: Union[str, None] = None, + cache_path: Union[str, None] = None, + **kwargs + ): + super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) + self.author = "me" + self.doi = ["my preprint", "my peer-reviewed publication"] + self.download_url_data = "my GEO upload" + self.normalisation = "raw" # because I uploaded raw counts, which is good practice! + self.organ = "lung" + self.organism = "mouse" + self.assay_sc = "smart-seq2" + self.year = "2020" + self.sample_source = "primary_tissue" + + self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here + + def load(data_dir, fn=None) -> anndata.AnnData: + fn = os.path.join(data_dir, "my.h5ad") + adata = anndata.read(fn) + return adata diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000..1d8488c31 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,45 @@ +Development +=========== + +Data zoo FAQ +------------ + +How are the meta data entries that I define in the constructor constrained or protected? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The items that are not free text are documented in the readthedocs data section, often, +these would require entries to be terms in an ontology. +If you make a mistake in defining these fields in a data loader that you contribute, +the template test data loader and any loading operation will throw an error +pointing at this meta data element. + +How is _load() used in data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`_load()` contains all processing steps that load raw data files into a ready to use adata object. +`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. +This adata object can be cached as an h5ad file named after the dataset ID for faster reloading +(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available +(if use_cached=False). + +How is the feature space (gene names) manipulated during data loading? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and +vice versa. +Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); +in this case, counts are aggregated across these features. +Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. + +Datasets, DatasetGroups, DatasetSuperGroups - what are they? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Dataset: Custom class that loads a specific dataset. +DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). +This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. +DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. + +Basics of sfaira lazy loading via split into constructor and _load function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The constructor of a dataset defines all metadata associated with this data set. +The loading of the actual data happens in the `load()` function and not in the constructor. +This is useful as it allows initialising the datasets and accessing dataset metadata +without loading the actual count data. +DatasetGroups can contain initialised Datasets and can be subsetted based on metadata +before loading is triggered across the entire group. diff --git a/docs/using_data.rst b/docs/using_data.rst new file mode 100644 index 000000000..24f0a1cbb --- /dev/null +++ b/docs/using_data.rst @@ -0,0 +1,153 @@ +Using Data +========== + +.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png + :width: 600px + :align: center + +Build data repository locally +------------------------------ + +Build a repository structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 1. Choose a directory to dedicate to the data base, called root in the following. + 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. + +Note that the automated download is a feature of sfaira but not the core purpose of the package: +Sfaira allows you efficiently interact with such a local data repository. +Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. + +Use 3rd party repositories +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. +One example for such an organization is the cellxgene_ data portal. +Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. +Data loaders for cellxgene structured data objects will be available soon! +Contact us for support of any other repositories. + +.. _cellxgene: https://cellxgene.cziscience.com/ + +Genome management +----------------- + +We streamline feature spaces used by models by defining standardized gene sets that are used as model input. +Per default, sfaira works with the protein coding genes of a genome assembly right now. +A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. +As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. +Note that because protein coding genes do not change drastically between genome assemblies, +sample can be carried over to assemblies they were not aligned against by matching gene identifiers. +Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. + +FAQ +--- + +How is the dataset’s ID structured? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi + +How do I assemble the data set ID if some of its element meta data are not unique? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The data set ID is designed to be a unique identifier of a data set. +Therefore, it is not an issue if it does not capture the full complexity of the data. +Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. + +What are cell-wise and sample-wise meta data? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Metadata can be set on a per sample level or, in some cases, per cell. +Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). +Cell-wise metadata can be provided in `.obs` of the loaded data, here, +a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels +(e.g. self.obs_key_organism). +Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. + +Which meta data objects are mandatory? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): + + - .id: Dataset ID. This is used to identify the data set uniquely. + Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" + - .download_url_data: Link to data download website. + Example: self.download = "some URL" + - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not + specified. + Example: self.download_meta = "some URL" + - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var + (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. + Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” + - .author: First author of publication (or list of all authors). + self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] + - .doi: Doi of publication + Example: self.doi = "10.1016/j.cell.2019.06.029" + - .organism (or .obs_key_organism): Organism sampled. + Example: self.organism = “human” + - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture + Example: self.sample_source = "primary_tissue" + +Highly recommended: + + - .normalization: Normalization of count data: + Example: self.normalization = “raw” + - .organ (or .obs_key_organ): Organ sampled. + Example: self.organ = “liver” + - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. + Example: self.assay_sc = “10x” + +Optional (if available): + + - .age (or .obs_key_age): Age of individual sampled. + Example: self.age = 80 # (80 years old for human) + - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. + Example: self.dev_stage = “mature” + - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). + Example: self.ethnicity = “free text” + - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) + Example: self.healthy = True + - .sex (or .obs_key_sex): Sex of individual sampled. + Example: self.sex = “male” + - .state_exact (or .obs_key_state_exact): Exact disease state + self.state_exact = free text + - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. + Example: self.obs_key_cell_types_original = 'CellType' + - .year: Year of publication: + Example: self.year = 2019 + - .cell_line: Which cell line was used for the experiment (for cell culture samples) + Example: self.cell_line = "409B2 (CVCL_K092)" + - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) + - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided + (for cell culture samples) + +How do I cache data sets? +~~~~~~~~~~~~~~~~~~~~~~~~~ +When loading a dataset with `Dataset.load(),`you can specify if the adata object +should be cached or not (allow_caching= True). +If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. + +How do I add cell type annotation? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We are simplifying this right now, new instructions will be available second half of January. + +Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Initiation and data set loading are handled separately to allow lazy loading. +All steps that are required to load the count data and +additional metadata should be defined solely in the `_load` section. +Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. + +How do I tell sfaira where the gene names are? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. +If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. + +I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +No, that is not a problem. They will automatically be converted to Ensembl IDs. +You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` +to which the names should be mapped to. + +I have CITE-seq data, where can I put the protein quantification? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We will soon provide a structured interface for loading and accessing CITE-seq data, +for now you can add it into `self.adata.obsm[“CITE”]`. From 4bcd88a490841fd7c8a40f6cff1498f5f4142074 Mon Sep 17 00:00:00 2001 From: "david.seb.fischer" Date: Wed, 9 Jun 2021 13:43:58 +0200 Subject: [PATCH 161/161] removed outdated files --- docs/adding_dataset_classes.rst | 112 ----------------------- docs/development.rst | 45 ---------- docs/using_data.rst | 153 -------------------------------- 3 files changed, 310 deletions(-) delete mode 100644 docs/adding_dataset_classes.rst delete mode 100644 docs/development.rst delete mode 100644 docs/using_data.rst diff --git a/docs/adding_dataset_classes.rst b/docs/adding_dataset_classes.rst deleted file mode 100644 index cb499949d..000000000 --- a/docs/adding_dataset_classes.rst +++ /dev/null @@ -1,112 +0,0 @@ -The class-based data loader python file -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -As an alternative to the preferred yaml-based dataloaders, users can provide a dataloader class together with the load function. -In this scenario, meta data is described in a constructor of a class in the same python file as the loading function. - -1. A constructor of the following form that contains all the relevant metadata that is available before the actual dataset is loaded to memory. - -.. code-block:: python - - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - # Data set meta data: You do not have to include all of these and can simply skip lines corresponding - # to attritbutes that you do not have access to. These are meta data on a sample level. - # The meta data attributes labeled with (*) may als be supplied per cell, see below, - # in this case, if you supply a .obs_key* attribute, you ccan leave out the sample-wise attribute. - - self.id = x # unique identifier of data set (Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi). - - self.author = x # author (list) who sampled / created the data set - self.doi = x # doi of data set accompanying manuscript - - self.download_url_data = x # download website(s) of data files - self.download_url_meta = x # download website(s) of meta data files - - self.assay_sc = x # (*, optional) protocol used to sample data (e.g. smart-seq2) - self.assay_differentiation = x # (*, optional) protocol used to differentiate the cell line (e.g. Lancaster, 2014) - self.assay_type_differentiation = x # (*, optional) type of protocol used to differentiate the cell line (guided/unguided) - self.cell_line = x # (*, optional) cell line used (for cell culture samples) - self.dev_stage = x # (*, optional) developmental stage of organism - self.ethnicity = x # (*, optional) ethnicity of sample - self.healthy = x # (*, optional) whether sample represents a healthy organism - self.normalisation = x # (optional) normalisation applied to raw data loaded (ideally counts, "raw") - self.organ = x # (*, optional) organ (anatomical structure) - self.organism = x # (*) species / organism - self.sample_source = x # (*) whether the sample came from primary tissue or cell culture - self.sex = x # (*, optional) sex - self.state_exact = x # (*, optional) exact disease, treatment or perturbation state of sample - self.year = x # year in which sample was acquired - - # The following meta data may instead also be supplied on a cell level if an appropriate column is present in the - # anndata instance (specifically in .obs) after loading. - # You need to make sure this is loaded in the loading script)! - # See above for a description what these meta data attributes mean. - # Again, if these attributes are note available, you can simply leave this out. - self.obs_key_assay_sc = x # (optional, see above, do not provide if .assay_sc is provided) - self.obs_key_assay_differentiation = x # (optional, see above, do not provide if .age is assay_differentiation) - self.obs_key_assay_type_differentiation = x # (optional, see above, do not provide if .assay_type_differentiation is provided) - self.obs_key_cell_line = x # (optional, see above, do not provide if .cell_line is provided) - self.obs_key_dev_stage = x # (optional, see above, do not provide if .dev_stage is provided) - self.obs_key_ethnicity = x # (optional, see above, do not provide if .ethnicity is provided) - self.obs_key_healthy = x # (optional, see above, do not provide if .healthy is provided) - self.obs_key_organ = x # (optional, see above, do not provide if .organ is provided) - self.obs_key_organism = x # (optional, see above, do not provide if .organism is provided) - self.obs_key_sample_source = x # (optional, see above, do not provide if .sample_source is provided) - self.obs_key_sex = x # (optional, see above, do not provide if .sex is provided) - self.obs_key_state_exact = x # (optional, see above, do not provide if .state_exact is provided) - # Additionally, cell type annotation is ALWAYS provided per cell in .obs, this annotation is optional though. - # name of column which contain streamlined cell ontology cell type classes: - self.obs_key_cell_types_original = x # (optional) - # This cell type annotation is free text but is mapped to an ontology via a .tsv file with the same name and - # directory as the python file of this data loader (see below). - - -2. A function called to load the data set into memory: -It is important to set an automated path indicating the location of the raw files here. -Our recommendation for this directory set-up is that you define a directory folder in your directory structure -in which all of these raw files will be (self.path) and then add a sub-directory named as -`self.directory_formatted_doi` (ie. the doi with all special characters replaced by "_" and place the raw files -directly into this sub directory. - -.. code-block:: python - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) # loading instruction into adata, use other ones if the data is not h5ad - return adata - -In summary, a python file for a mouse lung data set could look like this: - -.. code-block:: python - - class MyDataset(DatasetBase) - def __init__( - self, - path: Union[str, None] = None, - meta_path: Union[str, None] = None, - cache_path: Union[str, None] = None, - **kwargs - ): - super().__init__(path=path, meta_path=meta_path, cache_path=cache_path, **kwargs) - self.author = "me" - self.doi = ["my preprint", "my peer-reviewed publication"] - self.download_url_data = "my GEO upload" - self.normalisation = "raw" # because I uploaded raw counts, which is good practice! - self.organ = "lung" - self.organism = "mouse" - self.assay_sc = "smart-seq2" - self.year = "2020" - self.sample_source = "primary_tissue" - - self.obs_key_cell_types_original = "louvain_named" # i save my cell type names in here - - def load(data_dir, fn=None) -> anndata.AnnData: - fn = os.path.join(data_dir, "my.h5ad") - adata = anndata.read(fn) - return adata diff --git a/docs/development.rst b/docs/development.rst deleted file mode 100644 index 1d8488c31..000000000 --- a/docs/development.rst +++ /dev/null @@ -1,45 +0,0 @@ -Development -=========== - -Data zoo FAQ ------------- - -How are the meta data entries that I define in the constructor constrained or protected? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The items that are not free text are documented in the readthedocs data section, often, -these would require entries to be terms in an ontology. -If you make a mistake in defining these fields in a data loader that you contribute, -the template test data loader and any loading operation will throw an error -pointing at this meta data element. - -How is _load() used in data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`_load()` contains all processing steps that load raw data files into a ready to use adata object. -`_load()` is wrapped in `load()`, the main loading function of a `Dataset` instance. -This adata object can be cached as an h5ad file named after the dataset ID for faster reloading -(if allow_caching=True). `_load()` can be triggered to reload from scratch even if cached data is available -(if use_cached=False). - -How is the feature space (gene names) manipulated during data loading? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sfaira provides both gene names and ENSEMBL IDs. Missing IDs will automatically be inferred from the gene names and -vice versa. -Version tags on ENSEMBL gene IDs will be removed if specified (if remove_gene_version=True); -in this case, counts are aggregated across these features. -Sfaira makes sure that gene IDs in a dataset match IDs of chosen reference genomes. - -Datasets, DatasetGroups, DatasetSuperGroups - what are they? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Dataset: Custom class that loads a specific dataset. -DatasetGroup: A dataset group manages collection of data loaders (multiple instances of Dataset). -This is useful to group for example all data loaders corresponding to a certain study or a certain tissue. -DatasetSuperGroups: A group of DatasetGroups that allow easy addition of multiple instances of DatasetGroup. - -Basics of sfaira lazy loading via split into constructor and _load function. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The constructor of a dataset defines all metadata associated with this data set. -The loading of the actual data happens in the `load()` function and not in the constructor. -This is useful as it allows initialising the datasets and accessing dataset metadata -without loading the actual count data. -DatasetGroups can contain initialised Datasets and can be subsetted based on metadata -before loading is triggered across the entire group. diff --git a/docs/using_data.rst b/docs/using_data.rst deleted file mode 100644 index 24f0a1cbb..000000000 --- a/docs/using_data.rst +++ /dev/null @@ -1,153 +0,0 @@ -Using Data -========== - -.. image:: https://raw.githubusercontent.com/theislab/sfaira/master/resources/images/data_zoo.png - :width: 600px - :align: center - -Build data repository locally ------------------------------- - -Build a repository structure -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - 1. Choose a directory to dedicate to the data base, called root in the following. - 2. Run the sfaira download script (sfaira.data.utils.download_all). Alternatively, you can manually set up a data base by making subfolders for each study. - -Note that the automated download is a feature of sfaira but not the core purpose of the package: -Sfaira allows you efficiently interact with such a local data repository. -Some data sets cannot be automatically downloaded and need you manual intervention, which we report in the download script output. - -Use 3rd party repositories -~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some organization provide streamlined data objects that can be directly consumed by data zoos such as sfaira. -One example for such an organization is the cellxgene_ data portal. -Through these repositories, one can easily build or extend a collection of data sets that can be easily interfaced with sfaira. -Data loaders for cellxgene structured data objects will be available soon! -Contact us for support of any other repositories. - -.. _cellxgene: https://cellxgene.cziscience.com/ - -Genome management ------------------ - -We streamline feature spaces used by models by defining standardized gene sets that are used as model input. -Per default, sfaira works with the protein coding genes of a genome assembly right now. -A model topology version includes the genome it was trained for, which also defines the feature of this model as genes. -As genome assemblies are updated, model topology version can be updated and models retrained to reflect these changes. -Note that because protein coding genes do not change drastically between genome assemblies, -sample can be carried over to assemblies they were not aligned against by matching gene identifiers. -Sfaira automatically tries to overlap gene identifiers to the genome assembly selected through the current model. - -FAQ ---- - -How is the dataset’s ID structured? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Organism_Organ_Year_AssaySc_NumberOfDataset_FirstAuthorLastname_doi - -How do I assemble the data set ID if some of its element meta data are not unique? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The data set ID is designed to be a unique identifier of a data set. -Therefore, it is not an issue if it does not capture the full complexity of the data. -Simply choose the meta data value out of the list of corresponding values which comes first in the alphabet. - -What are cell-wise and sample-wise meta data? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Metadata can be set on a per sample level or, in some cases, per cell. -Sample-wise meta data can be directly set in the constructor (e.g self.organism = “human”). -Cell-wise metadata can be provided in `.obs` of the loaded data, here, -a Dataset attribute contains the name of the `.obs` column that contains these cell-wise labels -(e.g. self.obs_key_organism). -Note that sample-wise meta data should be yielded as such and not as a column in `.obs` to simplify loading. - -Which meta data objects are mandatory? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Mandatory on sample (self.attribute) or cell level (self.obs_key_attribute): - - - .id: Dataset ID. This is used to identify the data set uniquely. - Example: self.id = "human_colon_2019_10x_smilie_001_10.1016/j.cell.2019.06.029" - - .download_url_data: Link to data download website. - Example: self.download = "some URL" - - .download_url_meta: Download link to metadata. Assumes that meta data is defined in .download_url_data if not - specified. - Example: self.download_meta = "some URL" - - .gene_id_symbols_var_key, .gene_id_ensembl_var_key: Location of gene name as gene symbol and/or ENSEMBL ID in adata.var - (if index of adata.var, set to “index”, otherwise to column name). One of the two must be provided. - Example: self.gene_id_symbols_var_key = 'index', self.gene_id_ensembl_var_key = “GeneID” - - .author: First author of publication (or list of all authors). - self.author = "Last name, first name" # or ["Last name, first name", "Last name, first name"] - - .doi: Doi of publication - Example: self.doi = "10.1016/j.cell.2019.06.029" - - .organism (or .obs_key_organism): Organism sampled. - Example: self.organism = “human” - - .sample_source (or .obs_key_sample_source): Whether data was obtained from primary tissue or cell culture - Example: self.sample_source = "primary_tissue" - -Highly recommended: - - - .normalization: Normalization of count data: - Example: self.normalization = “raw” - - .organ (or .obs_key_organ): Organ sampled. - Example: self.organ = “liver” - - .assay_sc (or .obs_key_assay_sc): Protocol with which data was collected. - Example: self.assay_sc = “10x” - -Optional (if available): - - - .age (or .obs_key_age): Age of individual sampled. - Example: self.age = 80 # (80 years old for human) - - .dev_stage (or .obs_key_dev_stage): Developmental stage of individual sampled. - Example: self.dev_stage = “mature” - - .ethnicity (or .obs_key_ethnicity): Ethnicity of individual sampled (only for human). - Example: self.ethnicity = “free text” - - .healthy (or .obs_key_healthy): Is the sampled from a disease individual? (bool) - Example: self.healthy = True - - .sex (or .obs_key_sex): Sex of individual sampled. - Example: self.sex = “male” - - .state_exact (or .obs_key_state_exact): Exact disease state - self.state_exact = free text - - .obs_key_cell_types_original: Column in .obs in which free text cell type names are stored. - Example: self.obs_key_cell_types_original = 'CellType' - - .year: Year of publication: - Example: self.year = 2019 - - .cell_line: Which cell line was used for the experiment (for cell culture samples) - Example: self.cell_line = "409B2 (CVCL_K092)" - - .assay_differentiation: Which protocol was used for the differentiation of the cells (for cell culture samples) - - .assay_type_differentiation: Which protocol-type was used for the differentiation of the cells: guided or unguided - (for cell culture samples) - -How do I cache data sets? -~~~~~~~~~~~~~~~~~~~~~~~~~ -When loading a dataset with `Dataset.load(),`you can specify if the adata object -should be cached or not (allow_caching= True). -If set to True, the loaded adata object will be cached as an h5ad object for faster reloading. - -How do I add cell type annotation? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We are simplifying this right now, new instructions will be available second half of January. - -Why are constructor (`__init__`) and loading function (`_load`) split in the template data loader? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Initiation and data set loading are handled separately to allow lazy loading. -All steps that are required to load the count data and -additional metadata should be defined solely in the `_load` section. -Setting of class metadata such as `.doi`, `.id` etc. should be done in the constructor. - -How do I tell sfaira where the gene names are? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By setting the attributes `.gene_id_symbols_var_key` or `.gene_id_ensembl_var_key` in the constructor. -If the gene names are in the index of this data frame, you can set “index” as the value of these attributes. - -I only have gene symbols (human readable names, often abbreviations), such as HGNC or MGI, but not ENSEMBL identifiers, is that a problem? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -No, that is not a problem. They will automatically be converted to Ensembl IDs. -You can, however, specify the reference genome in `Dataset.load(match_to_reference = ReferenceGenomeName)` -to which the names should be mapped to. - -I have CITE-seq data, where can I put the protein quantification? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We will soon provide a structured interface for loading and accessing CITE-seq data, -for now you can add it into `self.adata.obsm[“CITE”]`.